504 lines
8.6 KiB
Zig
Executable File
504 lines
8.6 KiB
Zig
Executable File
const coral = @import("coral");
|
|
|
|
pub const Token = union(enum) {
|
|
end,
|
|
unknown: coral.io.Byte,
|
|
newline,
|
|
identifier: []const coral.io.Byte,
|
|
builtin: []const coral.io.Byte,
|
|
|
|
symbol_plus,
|
|
symbol_minus,
|
|
symbol_asterisk,
|
|
symbol_forward_slash,
|
|
symbol_paren_left,
|
|
symbol_paren_right,
|
|
symbol_bang,
|
|
symbol_comma,
|
|
symbol_at,
|
|
symbol_brace_left,
|
|
symbol_brace_right,
|
|
symbol_bracket_left,
|
|
symbol_bracket_right,
|
|
symbol_period,
|
|
symbol_colon,
|
|
symbol_less_than,
|
|
symbol_less_equals,
|
|
symbol_greater_than,
|
|
symbol_greater_equals,
|
|
symbol_equals,
|
|
symbol_double_equals,
|
|
|
|
number: []const coral.io.Byte,
|
|
string: []const coral.io.Byte,
|
|
|
|
keyword_nil,
|
|
keyword_false,
|
|
keyword_true,
|
|
keyword_return,
|
|
keyword_self,
|
|
keyword_const,
|
|
keyword_if,
|
|
keyword_do,
|
|
keyword_end,
|
|
keyword_while,
|
|
keyword_else,
|
|
keyword_elif,
|
|
keyword_var,
|
|
keyword_let,
|
|
keyword_lambda,
|
|
|
|
pub fn text(self: Token) []const coral.io.Byte {
|
|
return switch (self) {
|
|
.end => "end",
|
|
.unknown => |unknown| @as([*]const coral.io.Byte, @ptrCast(&unknown))[0 .. 1],
|
|
.newline => "newline",
|
|
|
|
.identifier => |identifier| identifier,
|
|
.builtin => |identifier| identifier,
|
|
|
|
.symbol_plus => "+",
|
|
.symbol_minus => "-",
|
|
.symbol_asterisk => "*",
|
|
.symbol_forward_slash => "/",
|
|
.symbol_paren_left => "(",
|
|
.symbol_paren_right => ")",
|
|
.symbol_bang => "!",
|
|
.symbol_comma => ",",
|
|
.symbol_at => "@",
|
|
.symbol_brace_left => "{",
|
|
.symbol_brace_right => "}",
|
|
.symbol_bracket_left => "[",
|
|
.symbol_bracket_right => "]",
|
|
.symbol_period => ".",
|
|
.symbol_colon => ":",
|
|
.symbol_less_than => "<",
|
|
.symbol_less_equals => "<=",
|
|
.symbol_greater_than => ">",
|
|
.symbol_greater_equals => ">=",
|
|
.symbol_equals => "=",
|
|
.symbol_double_equals => "==",
|
|
|
|
.number => |literal| literal,
|
|
.string => |literal| literal,
|
|
|
|
.keyword_const => "const",
|
|
.keyword_nil => "nil",
|
|
.keyword_false => "false",
|
|
.keyword_true => "true",
|
|
.keyword_return => "return",
|
|
.keyword_self => "self",
|
|
.keyword_if => "if",
|
|
.keyword_do => "do",
|
|
.keyword_end => "end",
|
|
.keyword_while => "while",
|
|
.keyword_elif => "elif",
|
|
.keyword_else => "else",
|
|
.keyword_var => "var",
|
|
.keyword_let => "let",
|
|
.keyword_lambda => "lambda",
|
|
};
|
|
}
|
|
};
|
|
|
|
pub const Tokenizer = struct {
|
|
source: []const coral.io.Byte,
|
|
lines_stepped: usize = 1,
|
|
token: Token = .newline,
|
|
|
|
pub fn skip_newlines(self: *Tokenizer) void {
|
|
self.step();
|
|
|
|
while (self.token == .newline) {
|
|
self.step();
|
|
}
|
|
}
|
|
|
|
pub fn step(self: *Tokenizer) void {
|
|
var cursor = @as(usize, 0);
|
|
|
|
defer self.source = self.source[cursor ..];
|
|
|
|
while (cursor < self.source.len) {
|
|
switch (self.source[cursor]) {
|
|
'#' => {
|
|
cursor += 1;
|
|
|
|
while (cursor < self.source.len and self.source[cursor] != '\n') {
|
|
cursor += 1;
|
|
}
|
|
},
|
|
|
|
' ', '\t' => cursor += 1,
|
|
|
|
'\n' => {
|
|
cursor += 1;
|
|
self.token = .newline;
|
|
self.lines_stepped += 1;
|
|
|
|
return;
|
|
},
|
|
|
|
'0' ... '9' => {
|
|
const begin = cursor;
|
|
|
|
cursor += 1;
|
|
|
|
while (cursor < self.source.len) switch (self.source[cursor]) {
|
|
'0' ... '9' => cursor += 1,
|
|
|
|
'.' => {
|
|
cursor += 1;
|
|
|
|
while (cursor < self.source.len) switch (self.source[cursor]) {
|
|
'0' ... '9' => cursor += 1,
|
|
else => break,
|
|
};
|
|
|
|
self.token = .{.number = self.source[begin .. cursor]};
|
|
|
|
return;
|
|
},
|
|
|
|
else => break,
|
|
};
|
|
|
|
self.token = .{.number = self.source[begin .. cursor]};
|
|
|
|
return;
|
|
},
|
|
|
|
'A' ... 'Z', 'a' ... 'z', '_' => {
|
|
const begin = cursor;
|
|
|
|
cursor += 1;
|
|
|
|
while (cursor < self.source.len) switch (self.source[cursor]) {
|
|
'0'...'9', 'A'...'Z', 'a'...'z', '_' => cursor += 1,
|
|
else => break,
|
|
};
|
|
|
|
const identifier = self.source[begin .. cursor];
|
|
|
|
coral.debug.assert(identifier.len != 0);
|
|
|
|
switch (identifier[0]) {
|
|
'c' => {
|
|
if (coral.io.ends_with(identifier, "onst")) {
|
|
self.token = .keyword_const;
|
|
|
|
return;
|
|
}
|
|
},
|
|
|
|
'd' => {
|
|
if (coral.io.ends_with(identifier, "o")) {
|
|
self.token = .keyword_do;
|
|
|
|
return;
|
|
}
|
|
},
|
|
|
|
'e' => {
|
|
if (coral.io.ends_with(identifier, "lse")) {
|
|
self.token = .keyword_else;
|
|
|
|
return;
|
|
}
|
|
|
|
if (coral.io.ends_with(identifier, "lif")) {
|
|
self.token = .keyword_elif;
|
|
|
|
return;
|
|
}
|
|
|
|
if (coral.io.ends_with(identifier, "nd")) {
|
|
self.token = .keyword_end;
|
|
|
|
return;
|
|
}
|
|
},
|
|
|
|
'f' => {
|
|
if (coral.io.ends_with(identifier, "alse")) {
|
|
self.token = .keyword_false;
|
|
|
|
return;
|
|
}
|
|
},
|
|
|
|
'i' => {
|
|
if (coral.io.ends_with(identifier, "f")) {
|
|
self.token = .keyword_if;
|
|
|
|
return;
|
|
}
|
|
},
|
|
|
|
'l' => {
|
|
if (coral.io.ends_with(identifier, "ambda")) {
|
|
self.token = .keyword_lambda;
|
|
|
|
return;
|
|
}
|
|
|
|
if (coral.io.ends_with(identifier, "et")) {
|
|
self.token = .keyword_let;
|
|
|
|
return;
|
|
}
|
|
},
|
|
|
|
'n' => {
|
|
if (coral.io.ends_with(identifier, "il")) {
|
|
self.token = .keyword_nil;
|
|
|
|
return;
|
|
}
|
|
},
|
|
|
|
'r' => {
|
|
if (coral.io.ends_with(identifier, "eturn")) {
|
|
self.token = .keyword_return;
|
|
|
|
return;
|
|
}
|
|
},
|
|
|
|
's' => {
|
|
if (coral.io.ends_with(identifier, "elf")) {
|
|
self.token = .keyword_self;
|
|
|
|
return;
|
|
}
|
|
},
|
|
|
|
't' => {
|
|
if (coral.io.ends_with(identifier, "rue")) {
|
|
self.token = .keyword_true;
|
|
|
|
return;
|
|
}
|
|
},
|
|
|
|
'v' => {
|
|
if (coral.io.ends_with(identifier, "ar")) {
|
|
self.token = .keyword_var;
|
|
|
|
return;
|
|
}
|
|
},
|
|
|
|
'w' => {
|
|
if (coral.io.ends_with(identifier, "hile")) {
|
|
self.token = .keyword_while;
|
|
|
|
return;
|
|
}
|
|
},
|
|
|
|
else => {},
|
|
}
|
|
|
|
self.token = .{.identifier = identifier};
|
|
|
|
return;
|
|
},
|
|
|
|
'@' => {
|
|
cursor += 1;
|
|
|
|
const begin = cursor;
|
|
|
|
while (cursor < self.source.len) switch (self.source[cursor]) {
|
|
'0'...'9', 'A'...'Z', 'a'...'z', '_' => cursor += 1,
|
|
else => break,
|
|
};
|
|
|
|
self.token = if (begin == cursor) .{.unknown = '@'} else .{.builtin = self.source[begin .. cursor]};
|
|
|
|
return;
|
|
},
|
|
|
|
'"' => {
|
|
cursor += 1;
|
|
|
|
const begin = cursor;
|
|
|
|
cursor += 1;
|
|
|
|
while (cursor < self.source.len) switch (self.source[cursor]) {
|
|
'"' => break,
|
|
else => cursor += 1,
|
|
};
|
|
|
|
self.token = .{.string = self.source[begin .. cursor]};
|
|
cursor += 1;
|
|
|
|
return;
|
|
},
|
|
|
|
'{' => {
|
|
self.token = .symbol_brace_left;
|
|
cursor += 1;
|
|
|
|
return;
|
|
},
|
|
|
|
'}' => {
|
|
self.token = .symbol_brace_right;
|
|
cursor += 1;
|
|
|
|
return;
|
|
},
|
|
|
|
'[' => {
|
|
self.token = .symbol_bracket_left;
|
|
cursor += 1;
|
|
|
|
return;
|
|
},
|
|
|
|
']' => {
|
|
self.token = .symbol_bracket_right;
|
|
cursor += 1;
|
|
|
|
return;
|
|
},
|
|
|
|
',' => {
|
|
self.token = .symbol_comma;
|
|
cursor += 1;
|
|
|
|
return;
|
|
},
|
|
|
|
'!' => {
|
|
self.token = .symbol_bang;
|
|
cursor += 1;
|
|
|
|
return;
|
|
},
|
|
|
|
')' => {
|
|
self.token = .symbol_paren_right;
|
|
cursor += 1;
|
|
|
|
return;
|
|
},
|
|
|
|
'(' => {
|
|
self.token = .symbol_paren_left;
|
|
cursor += 1;
|
|
|
|
return;
|
|
},
|
|
|
|
'/' => {
|
|
self.token = .symbol_forward_slash;
|
|
cursor += 1;
|
|
|
|
return;
|
|
},
|
|
|
|
'*' => {
|
|
self.token = .symbol_asterisk;
|
|
cursor += 1;
|
|
|
|
return;
|
|
},
|
|
|
|
'-' => {
|
|
self.token = .symbol_minus;
|
|
cursor += 1;
|
|
|
|
return;
|
|
},
|
|
|
|
'+' => {
|
|
self.token = .symbol_plus;
|
|
cursor += 1;
|
|
|
|
return;
|
|
},
|
|
|
|
':' => {
|
|
self.token = .symbol_colon;
|
|
cursor += 1;
|
|
|
|
return;
|
|
},
|
|
|
|
'=' => {
|
|
cursor += 1;
|
|
|
|
if (cursor < self.source.len) {
|
|
switch (self.source[cursor]) {
|
|
'=' => {
|
|
cursor += 1;
|
|
self.token = .symbol_double_equals;
|
|
|
|
return;
|
|
},
|
|
|
|
else => {},
|
|
}
|
|
}
|
|
|
|
self.token = .symbol_equals;
|
|
|
|
return;
|
|
},
|
|
|
|
'<' => {
|
|
cursor += 1;
|
|
|
|
if (cursor < self.source.len and (self.source[cursor] == '=')) {
|
|
cursor += 1;
|
|
self.token = .symbol_less_equals;
|
|
|
|
return;
|
|
}
|
|
|
|
self.token = .symbol_less_than;
|
|
|
|
return;
|
|
},
|
|
|
|
'>' => {
|
|
cursor += 1;
|
|
|
|
if (cursor < self.source.len and (self.source[cursor] == '=')) {
|
|
cursor += 1;
|
|
self.token = .symbol_greater_equals;
|
|
|
|
return;
|
|
}
|
|
|
|
self.token = .symbol_greater_than;
|
|
|
|
return;
|
|
},
|
|
|
|
'.' => {
|
|
self.token = .symbol_period;
|
|
cursor += 1;
|
|
|
|
return;
|
|
},
|
|
|
|
else => {
|
|
self.token = .{.unknown = self.source[cursor]};
|
|
cursor += 1;
|
|
|
|
return;
|
|
},
|
|
}
|
|
}
|
|
|
|
self.token = .end;
|
|
|
|
return;
|
|
}
|
|
};
|