ona/source/ona/kym/tokens.zig

504 lines
8.6 KiB
Zig
Executable File

const coral = @import("coral");
pub const Token = union(enum) {
end,
unknown: coral.io.Byte,
newline,
identifier: []const coral.io.Byte,
builtin: []const coral.io.Byte,
symbol_plus,
symbol_minus,
symbol_asterisk,
symbol_forward_slash,
symbol_paren_left,
symbol_paren_right,
symbol_bang,
symbol_comma,
symbol_at,
symbol_brace_left,
symbol_brace_right,
symbol_bracket_left,
symbol_bracket_right,
symbol_period,
symbol_colon,
symbol_less_than,
symbol_less_equals,
symbol_greater_than,
symbol_greater_equals,
symbol_equals,
symbol_double_equals,
number: []const coral.io.Byte,
string: []const coral.io.Byte,
keyword_nil,
keyword_false,
keyword_true,
keyword_return,
keyword_self,
keyword_const,
keyword_if,
keyword_do,
keyword_end,
keyword_while,
keyword_else,
keyword_elif,
keyword_var,
keyword_let,
keyword_lambda,
pub fn text(self: Token) []const coral.io.Byte {
return switch (self) {
.end => "end",
.unknown => |unknown| @as([*]const coral.io.Byte, @ptrCast(&unknown))[0 .. 1],
.newline => "newline",
.identifier => |identifier| identifier,
.builtin => |identifier| identifier,
.symbol_plus => "+",
.symbol_minus => "-",
.symbol_asterisk => "*",
.symbol_forward_slash => "/",
.symbol_paren_left => "(",
.symbol_paren_right => ")",
.symbol_bang => "!",
.symbol_comma => ",",
.symbol_at => "@",
.symbol_brace_left => "{",
.symbol_brace_right => "}",
.symbol_bracket_left => "[",
.symbol_bracket_right => "]",
.symbol_period => ".",
.symbol_colon => ":",
.symbol_less_than => "<",
.symbol_less_equals => "<=",
.symbol_greater_than => ">",
.symbol_greater_equals => ">=",
.symbol_equals => "=",
.symbol_double_equals => "==",
.number => |literal| literal,
.string => |literal| literal,
.keyword_const => "const",
.keyword_nil => "nil",
.keyword_false => "false",
.keyword_true => "true",
.keyword_return => "return",
.keyword_self => "self",
.keyword_if => "if",
.keyword_do => "do",
.keyword_end => "end",
.keyword_while => "while",
.keyword_elif => "elif",
.keyword_else => "else",
.keyword_var => "var",
.keyword_let => "let",
.keyword_lambda => "lambda",
};
}
};
pub const Tokenizer = struct {
source: []const coral.io.Byte,
lines_stepped: usize = 1,
token: Token = .newline,
pub fn skip_newlines(self: *Tokenizer) void {
self.step();
while (self.token == .newline) {
self.step();
}
}
pub fn step(self: *Tokenizer) void {
var cursor = @as(usize, 0);
defer self.source = self.source[cursor ..];
while (cursor < self.source.len) {
switch (self.source[cursor]) {
'#' => {
cursor += 1;
while (cursor < self.source.len and self.source[cursor] != '\n') {
cursor += 1;
}
},
' ', '\t' => cursor += 1,
'\n' => {
cursor += 1;
self.token = .newline;
self.lines_stepped += 1;
return;
},
'0' ... '9' => {
const begin = cursor;
cursor += 1;
while (cursor < self.source.len) switch (self.source[cursor]) {
'0' ... '9' => cursor += 1,
'.' => {
cursor += 1;
while (cursor < self.source.len) switch (self.source[cursor]) {
'0' ... '9' => cursor += 1,
else => break,
};
self.token = .{.number = self.source[begin .. cursor]};
return;
},
else => break,
};
self.token = .{.number = self.source[begin .. cursor]};
return;
},
'A' ... 'Z', 'a' ... 'z', '_' => {
const begin = cursor;
cursor += 1;
while (cursor < self.source.len) switch (self.source[cursor]) {
'0'...'9', 'A'...'Z', 'a'...'z', '_' => cursor += 1,
else => break,
};
const identifier = self.source[begin .. cursor];
coral.debug.assert(identifier.len != 0);
switch (identifier[0]) {
'c' => {
if (coral.io.ends_with(identifier, "onst")) {
self.token = .keyword_const;
return;
}
},
'd' => {
if (coral.io.ends_with(identifier, "o")) {
self.token = .keyword_do;
return;
}
},
'e' => {
if (coral.io.ends_with(identifier, "lse")) {
self.token = .keyword_else;
return;
}
if (coral.io.ends_with(identifier, "lif")) {
self.token = .keyword_elif;
return;
}
if (coral.io.ends_with(identifier, "nd")) {
self.token = .keyword_end;
return;
}
},
'f' => {
if (coral.io.ends_with(identifier, "alse")) {
self.token = .keyword_false;
return;
}
},
'i' => {
if (coral.io.ends_with(identifier, "f")) {
self.token = .keyword_if;
return;
}
},
'l' => {
if (coral.io.ends_with(identifier, "ambda")) {
self.token = .keyword_lambda;
return;
}
if (coral.io.ends_with(identifier, "et")) {
self.token = .keyword_let;
return;
}
},
'n' => {
if (coral.io.ends_with(identifier, "il")) {
self.token = .keyword_nil;
return;
}
},
'r' => {
if (coral.io.ends_with(identifier, "eturn")) {
self.token = .keyword_return;
return;
}
},
's' => {
if (coral.io.ends_with(identifier, "elf")) {
self.token = .keyword_self;
return;
}
},
't' => {
if (coral.io.ends_with(identifier, "rue")) {
self.token = .keyword_true;
return;
}
},
'v' => {
if (coral.io.ends_with(identifier, "ar")) {
self.token = .keyword_var;
return;
}
},
'w' => {
if (coral.io.ends_with(identifier, "hile")) {
self.token = .keyword_while;
return;
}
},
else => {},
}
self.token = .{.identifier = identifier};
return;
},
'@' => {
cursor += 1;
const begin = cursor;
while (cursor < self.source.len) switch (self.source[cursor]) {
'0'...'9', 'A'...'Z', 'a'...'z', '_' => cursor += 1,
else => break,
};
self.token = if (begin == cursor) .{.unknown = '@'} else .{.builtin = self.source[begin .. cursor]};
return;
},
'"' => {
cursor += 1;
const begin = cursor;
cursor += 1;
while (cursor < self.source.len) switch (self.source[cursor]) {
'"' => break,
else => cursor += 1,
};
self.token = .{.string = self.source[begin .. cursor]};
cursor += 1;
return;
},
'{' => {
self.token = .symbol_brace_left;
cursor += 1;
return;
},
'}' => {
self.token = .symbol_brace_right;
cursor += 1;
return;
},
'[' => {
self.token = .symbol_bracket_left;
cursor += 1;
return;
},
']' => {
self.token = .symbol_bracket_right;
cursor += 1;
return;
},
',' => {
self.token = .symbol_comma;
cursor += 1;
return;
},
'!' => {
self.token = .symbol_bang;
cursor += 1;
return;
},
')' => {
self.token = .symbol_paren_right;
cursor += 1;
return;
},
'(' => {
self.token = .symbol_paren_left;
cursor += 1;
return;
},
'/' => {
self.token = .symbol_forward_slash;
cursor += 1;
return;
},
'*' => {
self.token = .symbol_asterisk;
cursor += 1;
return;
},
'-' => {
self.token = .symbol_minus;
cursor += 1;
return;
},
'+' => {
self.token = .symbol_plus;
cursor += 1;
return;
},
':' => {
self.token = .symbol_colon;
cursor += 1;
return;
},
'=' => {
cursor += 1;
if (cursor < self.source.len) {
switch (self.source[cursor]) {
'=' => {
cursor += 1;
self.token = .symbol_double_equals;
return;
},
else => {},
}
}
self.token = .symbol_equals;
return;
},
'<' => {
cursor += 1;
if (cursor < self.source.len and (self.source[cursor] == '=')) {
cursor += 1;
self.token = .symbol_less_equals;
return;
}
self.token = .symbol_less_than;
return;
},
'>' => {
cursor += 1;
if (cursor < self.source.len and (self.source[cursor] == '=')) {
cursor += 1;
self.token = .symbol_greater_equals;
return;
}
self.token = .symbol_greater_than;
return;
},
'.' => {
self.token = .symbol_period;
cursor += 1;
return;
},
else => {
self.token = .{.unknown = self.source[cursor]};
cursor += 1;
return;
},
}
}
self.token = .end;
return;
}
};