const coral = @import("coral"); pub const Token = union(enum) { unknown: u8, newline, global_identifier: []const u8, local_identifier: []const u8, symbol_assign, symbol_plus, symbol_dash, symbol_asterisk, symbol_forward_slash, symbol_paren_left, symbol_paren_right, symbol_bang, symbol_comma, symbol_at, symbol_brace_left, symbol_brace_right, symbol_bracket_left, symbol_bracket_right, symbol_period, integer_literal: []const u8, real_literal: []const u8, string_literal: []const u8, keyword_nil, keyword_false, keyword_true, keyword_return, keyword_self, pub const ExpectError = error { UnexpectedToken, }; pub fn expect(self: Token, tag: coral.io.Tag(Token)) ExpectError!void { if (self != tag) return error.UnexpectedToken; } pub fn expect_any(self: Token, tags: []const coral.io.Tag(Token)) ExpectError!void { for (tags) |tag| { if (self == tag) return; } return error.UnexpectedToken; } pub fn text(self: Token) []const u8 { return switch (self) { .unknown => |unknown| @ptrCast([*]const u8, &unknown)[0 .. 1], .newline => "newline", .global_identifier => |identifier| identifier, .local_identifier => |identifier| identifier, .symbol_assign => "=", .symbol_plus => "+", .symbol_dash => "-", .symbol_asterisk => "*", .symbol_forward_slash => "/", .symbol_paren_left => "(", .symbol_paren_right => ")", .symbol_bang => "!", .symbol_comma => ",", .symbol_at => "@", .symbol_brace_left => "{", .symbol_brace_right => "}", .symbol_bracket_left => "[", .symbol_bracket_right => "]", .symbol_period => ".", .integer_literal => |literal| literal, .real_literal => |literal| literal, .string_literal => |literal| literal, .keyword_nil => "nil", .keyword_false => "false", .keyword_true => "true", .keyword_return => "return", .keyword_self => "self", }; } }; pub const Tokenizer = struct { source: []const u8, cursor: usize = 0, pub fn has_next(self: Tokenizer) bool { return self.cursor < self.source.len; } pub fn next(self: *Tokenizer) ?Token { while (self.has_next()) switch (self.source[self.cursor]) { ' ', '\t' => self.cursor += 1, '\n' => { self.cursor += 1; return .newline; }, '0' ... '9' => { const begin = self.cursor; self.cursor += 1; while (self.has_next()) switch (self.source[self.cursor]) { '0' ... '9' => self.cursor += 1, '.' => { self.cursor += 1; while (self.has_next()) switch (self.source[self.cursor]) { '0' ... '9' => self.cursor += 1, else => break, }; return Token{.real_literal = self.source[begin .. self.cursor]}; }, else => break, }; return Token{.integer_literal = self.source[begin .. self.cursor]}; }, 'A' ... 'Z', 'a' ... 'z', '_' => { const begin = self.cursor; self.cursor += 1; while (self.cursor < self.source.len) switch (self.source[self.cursor]) { '0'...'9', 'A'...'Z', 'a'...'z', '_' => self.cursor += 1, else => break, }; const identifier = self.source[begin..self.cursor]; coral.debug.assert(identifier.len != 0); switch (identifier[0]) { 'n' => if (coral.io.ends_with(identifier, "il")) return .keyword_nil, 'f' => if (coral.io.ends_with(identifier, "alse")) return .keyword_false, 't' => if (coral.io.ends_with(identifier, "rue")) return .keyword_true, 'r' => if (coral.io.ends_with(identifier, "eturn")) return .keyword_return, 's' => if (coral.io.ends_with(identifier, "elf")) return .keyword_self, else => {}, } return Token{.local_identifier = identifier}; }, '@' => { self.cursor += 1; if (self.has_next()) switch (self.source[self.cursor]) { 'A'...'Z', 'a'...'z', '_' => { const begin = self.cursor; self.cursor += 1; while (self.has_next()) switch (self.source[self.cursor]) { '0'...'9', 'A'...'Z', 'a'...'z', '_' => self.cursor += 1, else => break, }; return Token{.global_identifier = self.source[begin..self.cursor]}; }, '"' => { self.cursor += 1; const begin = self.cursor; self.cursor += 1; while (self.has_next()) switch (self.source[self.cursor]) { '"' => break, else => self.cursor += 1, }; defer self.cursor += 1; return Token{.global_identifier = self.source[begin..self.cursor]}; }, else => {}, }; return .symbol_at; }, '"' => { self.cursor += 1; const begin = self.cursor; self.cursor += 1; while (self.has_next()) switch (self.source[self.cursor]) { '"' => break, else => self.cursor += 1, }; defer self.cursor += 1; return Token{.string_literal = self.source[begin..self.cursor]}; }, '{' => { self.cursor += 1; return .symbol_brace_left; }, '}' => { self.cursor += 1; return .symbol_brace_right; }, ',' => { self.cursor += 1; return .symbol_comma; }, '!' => { self.cursor += 1; return .symbol_bang; }, ')' => { self.cursor += 1; return .symbol_paren_right; }, '(' => { self.cursor += 1; return .symbol_paren_left; }, '/' => { self.cursor += 1; return .symbol_forward_slash; }, '*' => { self.cursor += 1; return .symbol_asterisk; }, '-' => { self.cursor += 1; return .symbol_dash; }, '+' => { self.cursor += 1; return .symbol_plus; }, '=' => { self.cursor += 1; return .symbol_assign; }, '.' => { self.cursor += 1; return .symbol_period; }, else => { defer self.cursor += 1; return Token{.unknown = self.source[self.cursor]}; }, }; return null; } };