2023-04-19 01:25:35 +02:00
|
|
|
const coral = @import("coral");
|
|
|
|
|
|
|
|
pub const Token = union(enum) {
|
|
|
|
unknown: u8,
|
|
|
|
newline,
|
|
|
|
|
|
|
|
global_identifier: []const u8,
|
|
|
|
local_identifier: []const u8,
|
|
|
|
|
|
|
|
symbol_assign,
|
|
|
|
symbol_plus,
|
|
|
|
symbol_dash,
|
|
|
|
symbol_asterisk,
|
|
|
|
symbol_forward_slash,
|
|
|
|
symbol_paren_left,
|
|
|
|
symbol_paren_right,
|
|
|
|
symbol_bang,
|
|
|
|
symbol_comma,
|
|
|
|
symbol_at,
|
|
|
|
symbol_brace_left,
|
|
|
|
symbol_brace_right,
|
|
|
|
symbol_bracket_left,
|
|
|
|
symbol_bracket_right,
|
|
|
|
symbol_period,
|
|
|
|
|
|
|
|
integer_literal: []const u8,
|
|
|
|
real_literal: []const u8,
|
|
|
|
string_literal: []const u8,
|
|
|
|
|
|
|
|
keyword_nil,
|
|
|
|
keyword_false,
|
|
|
|
keyword_true,
|
|
|
|
keyword_return,
|
|
|
|
keyword_self,
|
|
|
|
|
2023-04-23 16:53:50 +02:00
|
|
|
pub const ExpectError = error {
|
|
|
|
UnexpectedToken,
|
|
|
|
};
|
|
|
|
|
|
|
|
pub fn expect(self: Token, tag: coral.io.Tag(Token)) ExpectError!void {
|
|
|
|
if (self != tag) return error.UnexpectedToken;
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn expect_any(self: Token, tags: []const coral.io.Tag(Token)) ExpectError!void {
|
|
|
|
for (tags) |tag| {
|
|
|
|
if (self == tag) return;
|
|
|
|
}
|
|
|
|
|
|
|
|
return error.UnexpectedToken;
|
|
|
|
}
|
|
|
|
|
2023-04-19 01:25:35 +02:00
|
|
|
pub fn text(self: Token) []const u8 {
|
|
|
|
return switch (self) {
|
|
|
|
.unknown => |unknown| @ptrCast([*]const u8, &unknown)[0 .. 1],
|
|
|
|
.newline => "newline",
|
|
|
|
.global_identifier => |identifier| identifier,
|
|
|
|
.local_identifier => |identifier| identifier,
|
|
|
|
|
|
|
|
.symbol_assign => "=",
|
|
|
|
.symbol_plus => "+",
|
|
|
|
.symbol_dash => "-",
|
|
|
|
.symbol_asterisk => "*",
|
|
|
|
.symbol_forward_slash => "/",
|
|
|
|
.symbol_paren_left => "(",
|
|
|
|
.symbol_paren_right => ")",
|
|
|
|
.symbol_bang => "!",
|
|
|
|
.symbol_comma => ",",
|
|
|
|
.symbol_at => "@",
|
|
|
|
.symbol_brace_left => "{",
|
|
|
|
.symbol_brace_right => "}",
|
|
|
|
.symbol_bracket_left => "[",
|
|
|
|
.symbol_bracket_right => "]",
|
|
|
|
.symbol_period => ".",
|
|
|
|
|
|
|
|
.integer_literal => |literal| literal,
|
|
|
|
.real_literal => |literal| literal,
|
|
|
|
.string_literal => |literal| literal,
|
|
|
|
|
|
|
|
.keyword_nil => "nil",
|
|
|
|
.keyword_false => "false",
|
|
|
|
.keyword_true => "true",
|
|
|
|
.keyword_return => "return",
|
2023-04-23 16:53:50 +02:00
|
|
|
.keyword_self => "self",
|
2023-04-19 01:25:35 +02:00
|
|
|
};
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
pub const Tokenizer = struct {
|
|
|
|
source: []const u8,
|
|
|
|
cursor: usize = 0,
|
|
|
|
|
2023-04-23 16:53:50 +02:00
|
|
|
pub fn has_next(self: Tokenizer) bool {
|
|
|
|
return self.cursor < self.source.len;
|
|
|
|
}
|
|
|
|
|
2023-04-19 01:25:35 +02:00
|
|
|
pub fn next(self: *Tokenizer) ?Token {
|
2023-04-23 16:53:50 +02:00
|
|
|
while (self.has_next()) switch (self.source[self.cursor]) {
|
2023-04-19 01:25:35 +02:00
|
|
|
' ', '\t' => self.cursor += 1,
|
|
|
|
|
|
|
|
'\n' => {
|
|
|
|
self.cursor += 1;
|
|
|
|
|
|
|
|
return .newline;
|
|
|
|
},
|
|
|
|
|
|
|
|
'0' ... '9' => {
|
|
|
|
const begin = self.cursor;
|
|
|
|
|
|
|
|
self.cursor += 1;
|
|
|
|
|
2023-04-23 16:53:50 +02:00
|
|
|
while (self.has_next()) switch (self.source[self.cursor]) {
|
2023-04-19 01:25:35 +02:00
|
|
|
'0' ... '9' => self.cursor += 1,
|
|
|
|
|
|
|
|
'.' => {
|
|
|
|
self.cursor += 1;
|
|
|
|
|
2023-04-23 16:53:50 +02:00
|
|
|
while (self.has_next()) switch (self.source[self.cursor]) {
|
2023-04-19 01:25:35 +02:00
|
|
|
'0' ... '9' => self.cursor += 1,
|
|
|
|
else => break,
|
|
|
|
};
|
|
|
|
|
|
|
|
return Token{.real_literal = self.source[begin .. self.cursor]};
|
|
|
|
},
|
|
|
|
|
|
|
|
else => break,
|
|
|
|
};
|
|
|
|
|
|
|
|
return Token{.integer_literal = self.source[begin .. self.cursor]};
|
|
|
|
},
|
|
|
|
|
|
|
|
'A' ... 'Z', 'a' ... 'z', '_' => {
|
|
|
|
const begin = self.cursor;
|
|
|
|
|
|
|
|
self.cursor += 1;
|
|
|
|
|
|
|
|
while (self.cursor < self.source.len) switch (self.source[self.cursor]) {
|
|
|
|
'0'...'9', 'A'...'Z', 'a'...'z', '_' => self.cursor += 1,
|
|
|
|
else => break,
|
|
|
|
};
|
|
|
|
|
|
|
|
const identifier = self.source[begin..self.cursor];
|
|
|
|
|
|
|
|
coral.debug.assert(identifier.len != 0);
|
|
|
|
|
|
|
|
switch (identifier[0]) {
|
|
|
|
'n' => if (coral.io.ends_with(identifier, "il")) return .keyword_nil,
|
|
|
|
'f' => if (coral.io.ends_with(identifier, "alse")) return .keyword_false,
|
|
|
|
't' => if (coral.io.ends_with(identifier, "rue")) return .keyword_true,
|
|
|
|
'r' => if (coral.io.ends_with(identifier, "eturn")) return .keyword_return,
|
|
|
|
's' => if (coral.io.ends_with(identifier, "elf")) return .keyword_self,
|
|
|
|
else => {},
|
|
|
|
}
|
|
|
|
|
|
|
|
return Token{.local_identifier = identifier};
|
|
|
|
},
|
|
|
|
|
|
|
|
'@' => {
|
|
|
|
self.cursor += 1;
|
|
|
|
|
2023-04-23 16:53:50 +02:00
|
|
|
if (self.has_next()) switch (self.source[self.cursor]) {
|
2023-04-19 01:25:35 +02:00
|
|
|
'A'...'Z', 'a'...'z', '_' => {
|
|
|
|
const begin = self.cursor;
|
|
|
|
|
|
|
|
self.cursor += 1;
|
|
|
|
|
2023-04-23 16:53:50 +02:00
|
|
|
while (self.has_next()) switch (self.source[self.cursor]) {
|
2023-04-19 01:25:35 +02:00
|
|
|
'0'...'9', 'A'...'Z', 'a'...'z', '_' => self.cursor += 1,
|
|
|
|
else => break,
|
|
|
|
};
|
|
|
|
|
|
|
|
return Token{.global_identifier = self.source[begin..self.cursor]};
|
|
|
|
},
|
|
|
|
|
|
|
|
'"' => {
|
|
|
|
self.cursor += 1;
|
|
|
|
|
|
|
|
const begin = self.cursor;
|
|
|
|
|
|
|
|
self.cursor += 1;
|
|
|
|
|
2023-04-23 16:53:50 +02:00
|
|
|
while (self.has_next()) switch (self.source[self.cursor]) {
|
2023-04-19 01:25:35 +02:00
|
|
|
'"' => break,
|
|
|
|
else => self.cursor += 1,
|
|
|
|
};
|
|
|
|
|
|
|
|
defer self.cursor += 1;
|
|
|
|
|
|
|
|
return Token{.global_identifier = self.source[begin..self.cursor]};
|
|
|
|
},
|
|
|
|
|
|
|
|
else => {},
|
|
|
|
};
|
|
|
|
|
|
|
|
return .symbol_at;
|
|
|
|
},
|
|
|
|
|
|
|
|
'"' => {
|
|
|
|
self.cursor += 1;
|
|
|
|
|
|
|
|
const begin = self.cursor;
|
|
|
|
|
|
|
|
self.cursor += 1;
|
|
|
|
|
2023-04-23 16:53:50 +02:00
|
|
|
while (self.has_next()) switch (self.source[self.cursor]) {
|
2023-04-19 01:25:35 +02:00
|
|
|
'"' => break,
|
|
|
|
else => self.cursor += 1,
|
|
|
|
};
|
|
|
|
|
|
|
|
defer self.cursor += 1;
|
|
|
|
|
|
|
|
return Token{.string_literal = self.source[begin..self.cursor]};
|
|
|
|
},
|
|
|
|
|
|
|
|
'{' => {
|
|
|
|
self.cursor += 1;
|
|
|
|
|
|
|
|
return .symbol_brace_left;
|
|
|
|
},
|
|
|
|
|
|
|
|
'}' => {
|
|
|
|
self.cursor += 1;
|
|
|
|
|
|
|
|
return .symbol_brace_right;
|
|
|
|
},
|
|
|
|
|
|
|
|
',' => {
|
|
|
|
self.cursor += 1;
|
|
|
|
|
|
|
|
return .symbol_comma;
|
|
|
|
},
|
|
|
|
|
|
|
|
'!' => {
|
|
|
|
self.cursor += 1;
|
|
|
|
|
|
|
|
return .symbol_bang;
|
|
|
|
},
|
|
|
|
|
|
|
|
')' => {
|
|
|
|
self.cursor += 1;
|
|
|
|
|
|
|
|
return .symbol_paren_right;
|
|
|
|
},
|
|
|
|
|
|
|
|
'(' => {
|
|
|
|
self.cursor += 1;
|
|
|
|
|
|
|
|
return .symbol_paren_left;
|
|
|
|
},
|
|
|
|
|
|
|
|
'/' => {
|
|
|
|
self.cursor += 1;
|
|
|
|
|
|
|
|
return .symbol_forward_slash;
|
|
|
|
},
|
|
|
|
|
|
|
|
'*' => {
|
|
|
|
self.cursor += 1;
|
|
|
|
|
|
|
|
return .symbol_asterisk;
|
|
|
|
},
|
|
|
|
|
|
|
|
'-' => {
|
|
|
|
self.cursor += 1;
|
|
|
|
|
|
|
|
return .symbol_dash;
|
|
|
|
},
|
|
|
|
|
|
|
|
'+' => {
|
|
|
|
self.cursor += 1;
|
|
|
|
|
|
|
|
return .symbol_plus;
|
|
|
|
},
|
|
|
|
|
|
|
|
'=' => {
|
|
|
|
self.cursor += 1;
|
|
|
|
|
|
|
|
return .symbol_assign;
|
|
|
|
},
|
|
|
|
|
|
|
|
'.' => {
|
|
|
|
self.cursor += 1;
|
|
|
|
|
|
|
|
return .symbol_period;
|
|
|
|
},
|
|
|
|
|
|
|
|
else => {
|
|
|
|
defer self.cursor += 1;
|
|
|
|
|
|
|
|
return Token{.unknown = self.source[self.cursor]};
|
|
|
|
},
|
|
|
|
};
|
|
|
|
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
};
|