ona/source/kym/tokens.zig

298 lines
5.7 KiB
Zig
Raw Normal View History

2023-05-06 03:49:10 +02:00
const coral = @import("coral");
pub const Token = union(enum) {
unknown: u8,
newline,
global_identifier: []const u8,
local_identifier: []const u8,
symbol_assign,
symbol_plus,
symbol_dash,
symbol_asterisk,
symbol_forward_slash,
symbol_paren_left,
symbol_paren_right,
symbol_bang,
symbol_comma,
symbol_at,
symbol_brace_left,
symbol_brace_right,
symbol_bracket_left,
symbol_bracket_right,
symbol_period,
symbol_arrow,
integer_literal: []const u8,
real_literal: []const u8,
string_literal: []const u8,
keyword_nil,
keyword_false,
keyword_true,
keyword_return,
keyword_self,
pub const ExpectError = error {
UnexpectedToken,
};
pub fn expect(self: Token, tag: coral.io.Tag(Token)) ExpectError!void {
if (self != tag) return error.UnexpectedToken;
}
pub fn expect_any(self: Token, tags: []const coral.io.Tag(Token)) ExpectError!void {
for (tags) |tag| {
if (self == tag) return;
}
return error.UnexpectedToken;
}
pub fn text(self: Token) []const u8 {
return switch (self) {
.unknown => |unknown| @ptrCast([*]const u8, &unknown)[0 .. 1],
.newline => "newline",
.global_identifier => |identifier| identifier,
.local_identifier => |identifier| identifier,
.symbol_assign => "=",
.symbol_plus => "+",
.symbol_dash => "-",
.symbol_asterisk => "*",
.symbol_forward_slash => "/",
.symbol_paren_left => "(",
.symbol_paren_right => ")",
.symbol_bang => "!",
.symbol_comma => ",",
.symbol_at => "@",
.symbol_brace_left => "{",
.symbol_brace_right => "}",
.symbol_bracket_left => "[",
.symbol_bracket_right => "]",
.symbol_period => ".",
.symbol_arrow => "=>",
.integer_literal => |literal| literal,
.real_literal => |literal| literal,
.string_literal => |literal| literal,
.keyword_nil => "nil",
.keyword_false => "false",
.keyword_true => "true",
.keyword_return => "return",
.keyword_self => "self",
};
}
};
pub const Tokenizer = struct {
source: []const u8,
cursor: usize = 0,
pub fn has_next(self: Tokenizer) bool {
return self.cursor < self.source.len;
}
pub fn next(self: *Tokenizer) ?Token {
while (self.has_next()) switch (self.source[self.cursor]) {
' ', '\t' => self.cursor += 1,
'\n' => {
self.cursor += 1;
return .newline;
},
'0' ... '9' => {
const begin = self.cursor;
self.cursor += 1;
while (self.has_next()) switch (self.source[self.cursor]) {
'0' ... '9' => self.cursor += 1,
'.' => {
self.cursor += 1;
while (self.has_next()) switch (self.source[self.cursor]) {
'0' ... '9' => self.cursor += 1,
else => break,
};
return Token{.real_literal = self.source[begin .. self.cursor]};
},
else => break,
};
return Token{.integer_literal = self.source[begin .. self.cursor]};
},
'A' ... 'Z', 'a' ... 'z', '_' => {
const begin = self.cursor;
self.cursor += 1;
while (self.cursor < self.source.len) switch (self.source[self.cursor]) {
'0'...'9', 'A'...'Z', 'a'...'z', '_' => self.cursor += 1,
else => break,
};
const identifier = self.source[begin..self.cursor];
coral.debug.assert(identifier.len != 0);
switch (identifier[0]) {
'n' => if (coral.io.ends_with(identifier, "il")) return .keyword_nil,
'f' => if (coral.io.ends_with(identifier, "alse")) return .keyword_false,
't' => if (coral.io.ends_with(identifier, "rue")) return .keyword_true,
'r' => if (coral.io.ends_with(identifier, "eturn")) return .keyword_return,
's' => if (coral.io.ends_with(identifier, "elf")) return .keyword_self,
else => {},
}
return Token{.local_identifier = identifier};
},
'@' => {
self.cursor += 1;
if (self.has_next()) switch (self.source[self.cursor]) {
'A'...'Z', 'a'...'z', '_' => {
const begin = self.cursor;
self.cursor += 1;
while (self.has_next()) switch (self.source[self.cursor]) {
'0'...'9', 'A'...'Z', 'a'...'z', '_' => self.cursor += 1,
else => break,
};
return Token{.global_identifier = self.source[begin..self.cursor]};
},
'"' => {
self.cursor += 1;
const begin = self.cursor;
self.cursor += 1;
while (self.has_next()) switch (self.source[self.cursor]) {
'"' => break,
else => self.cursor += 1,
};
defer self.cursor += 1;
return Token{.global_identifier = self.source[begin..self.cursor]};
},
else => {},
};
return .symbol_at;
},
'"' => {
self.cursor += 1;
const begin = self.cursor;
self.cursor += 1;
while (self.has_next()) switch (self.source[self.cursor]) {
'"' => break,
else => self.cursor += 1,
};
defer self.cursor += 1;
return Token{.string_literal = self.source[begin..self.cursor]};
},
'{' => {
self.cursor += 1;
return .symbol_brace_left;
},
'}' => {
self.cursor += 1;
return .symbol_brace_right;
},
',' => {
self.cursor += 1;
return .symbol_comma;
},
'!' => {
self.cursor += 1;
return .symbol_bang;
},
')' => {
self.cursor += 1;
return .symbol_paren_right;
},
'(' => {
self.cursor += 1;
return .symbol_paren_left;
},
'/' => {
self.cursor += 1;
return .symbol_forward_slash;
},
'*' => {
self.cursor += 1;
return .symbol_asterisk;
},
'-' => {
self.cursor += 1;
return .symbol_dash;
},
'+' => {
self.cursor += 1;
return .symbol_plus;
},
'=' => {
self.cursor += 1;
return .symbol_assign;
},
'.' => {
self.cursor += 1;
return .symbol_period;
},
else => {
defer self.cursor += 1;
return Token{.unknown = self.source[self.cursor]};
},
};
return null;
}
};