From 440d63447f8da3c8622512b693a63fed5b5fc96a Mon Sep 17 00:00:00 2001 From: kayomn Date: Fri, 14 Jul 2023 20:00:11 +0100 Subject: [PATCH] Hotfix parser and runtime bugs --- source/ona/kym.zig | 32 ++++--- source/ona/kym/Ast.zig | 185 ++++++++++++++++++++------------------ source/ona/kym/State.zig | 14 ++- source/ona/kym/tokens.zig | 166 ++++++++++++++++++---------------- 4 files changed, 214 insertions(+), 183 deletions(-) diff --git a/source/ona/kym.zig b/source/ona/kym.zig index 5e7825b..e535e49 100644 --- a/source/ona/kym.zig +++ b/source/ona/kym.zig @@ -46,17 +46,21 @@ const Compiler = struct { } fn resolve(self: *Self, local_identifier: []const coral.io.Byte) ?u8 { - var index = @as(u8, self.count); + if (self.count == 0) { + return null; + } - while (index != 0) { - index -= 1; + var index = @as(u8, self.count - 1); + while (true) : (index -= 1) { if (coral.io.equals(local_identifier, self.buffer[index])) { return index; } - } - return null; + if (index == 0) { + return null; + } + } } }, @@ -350,64 +354,64 @@ pub const RuntimeEnv = struct { }, .add => { - const lhs_number = try to_number(self, try self.state.pop_value()); const rhs_number = try to_number(self, try self.state.pop_value()); + const lhs_number = try to_number(self, try self.state.pop_value()); try self.state.push_value(.{.number = lhs_number + rhs_number}); }, .sub => { - const lhs_number = try to_number(self, try self.state.pop_value()); const rhs_number = try to_number(self, try self.state.pop_value()); + const lhs_number = try to_number(self, try self.state.pop_value()); try self.state.push_value(.{.number = lhs_number - rhs_number}); }, .mul => { - const lhs_number = try to_number(self, try self.state.pop_value()); const rhs_number = try to_number(self, try self.state.pop_value()); + const lhs_number = try to_number(self, try self.state.pop_value()); try self.state.push_value(.{.number = lhs_number * rhs_number}); }, .div => { - const lhs_number = try to_number(self, try self.state.pop_value()); const rhs_number = try to_number(self, try self.state.pop_value()); + const lhs_number = try to_number(self, try self.state.pop_value()); try self.state.push_value(.{.number = lhs_number / rhs_number}); }, .eql => { - const lhs = try self.state.pop_value(); const rhs = try self.state.pop_value(); + const lhs = try self.state.pop_value(); try self.state.push_value(if (lhs.equals(rhs)) .true else .false); }, .cgt => { - const lhs_number = try to_number(self, try self.state.pop_value()); const rhs_number = try to_number(self, try self.state.pop_value()); + const lhs_number = try to_number(self, try self.state.pop_value()); try self.state.push_value(if (lhs_number > rhs_number) .true else .false); }, .clt => { - const lhs_number = try to_number(self, try self.state.pop_value()); const rhs_number = try to_number(self, try self.state.pop_value()); + const lhs_number = try to_number(self, try self.state.pop_value()); try self.state.push_value(if (lhs_number < rhs_number) .true else .false); }, .cge => { - const lhs_number = try to_number(self, try self.state.pop_value()); const rhs_number = try to_number(self, try self.state.pop_value()); + const lhs_number = try to_number(self, try self.state.pop_value()); try self.state.push_value(if (lhs_number >= rhs_number) .true else .false); }, .cle => { - const lhs_number = try to_number(self, try self.state.pop_value()); const rhs_number = try to_number(self, try self.state.pop_value()); + const lhs_number = try to_number(self, try self.state.pop_value()); try self.state.push_value(if (lhs_number <= rhs_number) .true else .false); }, diff --git a/source/ona/kym/Ast.zig b/source/ona/kym/Ast.zig index 82829b3..cf1bf71 100755 --- a/source/ona/kym/Ast.zig +++ b/source/ona/kym/Ast.zig @@ -99,10 +99,13 @@ fn binary_operation_parser(comptime parse_next: ExpressionParser, comptime opera inline for (operators) |operator| { const token = comptime operator.token(); - if (tokenizer.current_token == coral.io.tag_of(token)) { - try self.check_syntax( - tokenizer.step(.{.include_newlines = true}), - "expected right-hand side of expression after `" ++ comptime token.text() ++ "`"); + if (tokenizer.is_token(coral.io.tag_of(token))) { + tokenizer.step(); + + if (tokenizer.token == null) { + return self.raise( + "expected right-hand side of expression after `" ++ comptime token.text() ++ "`"); + } expression = .{ .binary_operation = .{ @@ -120,20 +123,6 @@ fn binary_operation_parser(comptime parse_next: ExpressionParser, comptime opera }.parse; } -fn check_syntax(self: *Self, condition: bool, message: []const u8) ParseError!void { - if (condition) { - return; - } - - return self.fail_syntax(message); -} - -fn fail_syntax(self: *Self, message: []const u8) ParseError { - self.error_message = message; - - return error.BadSyntax; -} - pub fn free(self: *Self) void { self.arena.free(); self.statements.free(); @@ -148,6 +137,12 @@ pub fn make(allocator: coral.io.Allocator) Self { }; } +fn raise(self: *Self, message: []const u8) ParseError { + self.error_message = message; + + return error.BadSyntax; +} + pub fn list_statements(self: Self) []const Statement { return self.statements.values; } @@ -157,22 +152,24 @@ pub fn parse(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!void { var has_returned = false; - while (tokenizer.step(.{.include_newlines = false})) { - switch (tokenizer.current_token) { + while (true) { + tokenizer.skip(.newline); + + switch (tokenizer.token orelse return) { .keyword_return => { - try self.check_syntax(!has_returned, "multiple returns in function scope but expected only one"); + if (has_returned) { + return self.raise("multiple returns in function scope but expected only one"); + } try self.statements.push_one(get_statement: { - if (tokenizer.step(.{.include_newlines = true})) { - if (tokenizer.current_token != .newline) { - break: get_statement .{.return_expression = try self.parse_expression(tokenizer)}; - } + tokenizer.step(); - if (tokenizer.step(.{.include_newlines = true})) { - try self.check_syntax( - tokenizer.current_token == .newline, - "expected end of declaration after return expression"); - } + if (!tokenizer.is_token_null_or(.newline)) { + break: get_statement .{.return_expression = try self.parse_expression(tokenizer)}; + } + + if (!tokenizer.is_token_null_or(.newline)) { + return self.raise("unexpected token after return"); } break: get_statement .return_nothing; @@ -182,33 +179,37 @@ pub fn parse(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!void { }, .local => |identifier| { - try self.check_syntax(tokenizer.step(.{.include_newlines = true}), "statement has no effect"); + tokenizer.step(); + + const no_effect_message = "statement has no effect"; + + switch (tokenizer.token orelse return self.raise(no_effect_message)) { + .newline => return self.raise(no_effect_message), - switch (tokenizer.current_token) { .symbol_equals => { - try self.check_syntax( - tokenizer.step(.{.include_newlines = true}), - "expected expression after `=`"); + tokenizer.step(); + + if (tokenizer.token == null) { + return self.raise("expected expression after `=`"); + } try self.statements.push_one(.{ .set_local = .{ + .expression = try self.parse_expression(tokenizer), .identifier = identifier, - .expression = try self.parse_expression(tokenizer) - } + }, }); - if (tokenizer.step(.{.include_newlines = true})) { - try self.check_syntax( - tokenizer.current_token == .newline, - "expected end of declaration after variable assignment"); + if (!tokenizer.is_token_null_or(.newline)) { + return self.raise("unexpected token after assignment"); } }, - else => return self.fail_syntax("expected `=` after local"), + else => return self.raise("expected `=` after local"), } }, - else => return self.fail_syntax("invalid statement"), + else => return self.raise("invalid statement"), } } } @@ -230,111 +231,125 @@ const parse_expression = binary_operation_parser(parse_equality, &.{ }); fn parse_factor(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression { - switch (tokenizer.current_token) { + const allocator = self.arena.as_allocator(); + + switch (tokenizer.token orelse return self.raise("expected operand after operator")) { .symbol_paren_left => { - try self.check_syntax(tokenizer.step(.{.include_newlines = false}), "expected an expression after `(`"); + tokenizer.skip(.newline); + + if (tokenizer.token == null) { + return self.raise("expected an expression after `(`"); + } const expression = try self.parse_expression(tokenizer); - try self.check_syntax( - tokenizer.step(.{.include_newlines = false}) and tokenizer.current_token == .symbol_paren_right, - "expected a closing `)` after expression"); + if (!tokenizer.is_token(.symbol_paren_right)) { + return self.raise("expected a closing `)` after expression"); + } - return Expression{.grouped_expression = try coral.io.allocate_one(self.arena.as_allocator(), expression)}; + tokenizer.step(); + + return Expression{.grouped_expression = try coral.io.allocate_one(allocator, expression)}; }, .number => |value| { + tokenizer.step(); + return Expression{.number_literal = value}; }, .string => |value| { + tokenizer.step(); + return Expression{.string_literal = value}; }, .local => |identifier| { + tokenizer.step(); + return Expression{.get_local = identifier}; }, .symbol_brace_left => { - try self.check_syntax(tokenizer.step(.{.include_newlines = false}), "unexpected end of table literal"); + var table_fields = Expression.NamedList.make(allocator); - var expression = Expression{.table_literal = Expression.NamedList.make(self.arena.as_allocator())}; - - coral.debug.assert(expression == .table_literal); + tokenizer.skip(.newline); while (true) { - switch (tokenizer.current_token) { + switch (tokenizer.token orelse return self.raise("unexpected end of table literal")) { .symbol_brace_right => { - _ = tokenizer.step(.{.include_newlines = false}); + tokenizer.step(); - return expression; + return Expression{.table_literal = table_fields}; }, .local => |identifier| { - try self.check_syntax( - tokenizer.step(.{.include_newlines = false}) and tokenizer.current_token == .symbol_equals, - "expected `=` after identifier"); + tokenizer.skip(.newline); - try self.check_syntax(tokenizer.step(.{.include_newlines = false}), "unexpected end after `=`"); + if (!tokenizer.is_token(.symbol_equals)) { + return self.raise("expected `=` after identifier"); + } - try expression.table_literal.push_one(.{ - .identifier = identifier, + tokenizer.skip(.newline); + + if (tokenizer.token == null) { + return self.raise("unexpected end after `=`"); + } + + try table_fields.push_one(.{ .expression = try self.parse_expression(tokenizer), + .identifier = identifier, }); - try self.check_syntax(tokenizer.step(.{.include_newlines = false}), "unexpected end of table"); - - switch (tokenizer.current_token) { - .symbol_comma => _ = tokenizer.step(.{.include_newlines = false}), + switch (tokenizer.token orelse return self.raise("unexpected end of table")) { + .symbol_comma => tokenizer.skip(.newline), .symbol_brace_right => { - _ = tokenizer.step(.{.include_newlines = false}); + tokenizer.step(); - return expression; + return Expression{.table_literal = table_fields}; }, - else => return self.fail_syntax("expected `,` or `}` after expression"), + else => return self.raise("expected `,` or `}` after expression"), } }, - else => return self.fail_syntax("expected `}` or fields in table literal"), + else => return self.raise("expected `}` or fields in table literal"), } } }, .symbol_minus => { - try self.check_syntax( - tokenizer.step(.{.include_newlines = false}), - "expected expression after numeric negation (`-`)"); + tokenizer.skip(.newline); + + if (tokenizer.token == null) { + return self.raise("expected expression after numeric negation (`-`)"); + } return Expression{ .unary_operation = .{ - .expression = try coral.io.allocate_one( - self.arena.as_allocator(), - try self.parse_factor(tokenizer)), - + .expression = try coral.io.allocate_one(allocator, try self.parse_factor(tokenizer)), .operator = .numeric_negation, }, }; }, .symbol_bang => { - try self.check_syntax( - tokenizer.step(.{.include_newlines = false}), - "expected expression after numeric negation (`!`)"); + tokenizer.skip(.newline); + + if (tokenizer.token == null) { + return self.raise("expected expression after boolean negation (`!`)"); + } return Expression{ .unary_operation = .{ - .expression = try coral.io.allocate_one( - self.arena.as_allocator(), - try self.parse_factor(tokenizer)), - + .expression = try coral.io.allocate_one(allocator, try self.parse_factor(tokenizer)), .operator = .boolean_negation, }, }; }, - else => return self.fail_syntax("unexpected token in expression"), + else => return self.raise("unexpected token in expression"), } } diff --git a/source/ona/kym/State.zig b/source/ona/kym/State.zig index 7f2688a..7f29d16 100644 --- a/source/ona/kym/State.zig +++ b/source/ona/kym/State.zig @@ -100,12 +100,8 @@ pub fn free(self: *Self) void { self.interned.free(); } -pub fn get_value(self: *Self, tail_index: usize) Variant { - if (tail_index >= self.values.values.len) { - return .nil; - } - - return self.values.values[self.values.values.len - (1 + tail_index)]; +pub fn get_value(self: *Self, index: u8) Variant { + return if (index < self.values.values.len) self.values.values[index] else .nil; } pub fn make(allocator: coral.io.Allocator) Self { @@ -141,12 +137,12 @@ pub fn release(self: *Self, object: *Object) void { } } -pub fn set_value(self: *Self, tail_index: usize, value: Variant) bool { - if (tail_index >= self.values.values.len) { +pub fn set_value(self: *Self, index: u8, value: Variant) bool { + if (index >= self.values.values.len) { return false; } - self.values.values[self.values.values.len - (1 + tail_index)] = value; + self.values.values[index] = value; return true; } diff --git a/source/ona/kym/tokens.zig b/source/ona/kym/tokens.zig index a04cf80..43f4d2c 100755 --- a/source/ona/kym/tokens.zig +++ b/source/ona/kym/tokens.zig @@ -85,13 +85,31 @@ pub const Token = union(enum) { pub const Tokenizer = struct { source: []const u8, lines_stepped: usize = 1, - current_token: Token = .{.unknown = 0}, + token: ?Token = null, - const StepOptions = struct { - include_newlines: bool, - }; + const TokenTag = coral.io.Tag(Token); - pub fn step(self: *Tokenizer, options: StepOptions) bool { + pub fn is_token(self: *Tokenizer, token_tag: TokenTag) bool { + return if (self.token) |token| token == token_tag else false; + } + + pub fn is_token_null_or(self: *Tokenizer, token_tag: TokenTag) bool { + return if (self.token) |token| token == token_tag else true; + } + + pub fn skip(self: *Tokenizer, skip_token_tag: TokenTag) void { + self.step(); + + while (self.token) |token| { + if (token != skip_token_tag) { + return; + } + + self.step(); + } + } + + pub fn step(self: *Tokenizer) void { var cursor = @as(usize, 0); defer self.source = self.source[cursor ..]; @@ -110,12 +128,10 @@ pub const Tokenizer = struct { '\n' => { cursor += 1; - self.current_token = .newline; + self.token = .newline; self.lines_stepped += 1; - if (options.include_newlines) { - return true; - } + return; }, '0' ... '9' => { @@ -134,17 +150,17 @@ pub const Tokenizer = struct { else => break, }; - self.current_token = .{.number = self.source[begin .. cursor]}; + self.token = .{.number = self.source[begin .. cursor]}; - return true; + return; }, else => break, }; - self.current_token = .{.number = self.source[begin .. cursor]}; + self.token = .{.number = self.source[begin .. cursor]}; - return true; + return; }, 'A' ... 'Z', 'a' ... 'z', '_' => { @@ -163,47 +179,47 @@ pub const Tokenizer = struct { switch (identifier[0]) { 'c' => if (coral.io.ends_with(identifier, "onst")) { - self.current_token = .keyword_const; + self.token = .keyword_const; - return true; + return; }, 'n' => if (coral.io.ends_with(identifier, "il")) { - self.current_token = .keyword_nil; + self.token = .keyword_nil; - return true; + return; }, 'f' => if (coral.io.ends_with(identifier, "alse")) { - self.current_token = .keyword_false; + self.token = .keyword_false; - return true; + return; }, 't' => if (coral.io.ends_with(identifier, "rue")) { - self.current_token = .keyword_true; + self.token = .keyword_true; - return true; + return; }, 'r' => if (coral.io.ends_with(identifier, "eturn")) { - self.current_token = .keyword_return; + self.token = .keyword_return; - return true; + return; }, 's' => if (coral.io.ends_with(identifier, "elf")) { - self.current_token = .keyword_self; + self.token = .keyword_self; - return true; + return; }, else => {}, } - self.current_token = .{.local = identifier}; + self.token = .{.local = identifier}; - return true; + return; }, '@' => { @@ -220,9 +236,9 @@ pub const Tokenizer = struct { else => break, }; - self.current_token = .{.global = self.source[begin .. cursor]}; + self.token = .{.global = self.source[begin .. cursor]}; - return true; + return; }, '"' => { @@ -237,18 +253,18 @@ pub const Tokenizer = struct { else => cursor += 1, }; - self.current_token = .{.global = self.source[begin .. cursor]}; + self.token = .{.global = self.source[begin .. cursor]}; cursor += 1; - return true; + return; }, else => {}, }; - self.current_token = .symbol_at; + self.token = .symbol_at; - return true; + return; }, '"' => { @@ -263,80 +279,80 @@ pub const Tokenizer = struct { else => cursor += 1, }; - self.current_token = .{.string = self.source[begin .. cursor]}; + self.token = .{.string = self.source[begin .. cursor]}; cursor += 1; - return true; + return; }, '{' => { - self.current_token = .symbol_brace_left; + self.token = .symbol_brace_left; cursor += 1; - return true; + return; }, '}' => { - self.current_token = .symbol_brace_right; + self.token = .symbol_brace_right; cursor += 1; - return true; + return; }, ',' => { - self.current_token = .symbol_comma; + self.token = .symbol_comma; cursor += 1; - return true; + return; }, '!' => { - self.current_token = .symbol_bang; + self.token = .symbol_bang; cursor += 1; - return true; + return; }, ')' => { - self.current_token = .symbol_paren_right; + self.token = .symbol_paren_right; cursor += 1; - return true; + return; }, '(' => { - self.current_token = .symbol_paren_left; + self.token = .symbol_paren_left; cursor += 1; - return true; + return; }, '/' => { - self.current_token = .symbol_forward_slash; + self.token = .symbol_forward_slash; cursor += 1; - return true; + return; }, '*' => { - self.current_token = .symbol_asterisk; + self.token = .symbol_asterisk; cursor += 1; - return true; + return; }, '-' => { - self.current_token = .symbol_minus; + self.token = .symbol_minus; cursor += 1; - return true; + return; }, '+' => { - self.current_token = .symbol_plus; + self.token = .symbol_plus; cursor += 1; - return true; + return; }, '=' => { @@ -346,25 +362,25 @@ pub const Tokenizer = struct { switch (self.source[cursor]) { '=' => { cursor += 1; - self.current_token = .symbol_double_equals; + self.token = .symbol_double_equals; - return true; + return; }, '>' => { cursor += 1; - self.current_token = .symbol_lambda; + self.token = .symbol_lambda; - return true; + return; }, else => {}, } } - self.current_token = .symbol_equals; + self.token = .symbol_equals; - return true; + return; }, '<' => { @@ -372,14 +388,14 @@ pub const Tokenizer = struct { if (cursor < self.source.len and (self.source[cursor] == '=')) { cursor += 1; - self.current_token = .symbol_less_equals; + self.token = .symbol_less_equals; - return true; + return; } - self.current_token = .symbol_less_than; + self.token = .symbol_less_than; - return true; + return; }, '>' => { @@ -387,34 +403,34 @@ pub const Tokenizer = struct { if (cursor < self.source.len and (self.source[cursor] == '=')) { cursor += 1; - self.current_token = .symbol_greater_equals; + self.token = .symbol_greater_equals; - return true; + return; } - self.current_token = .symbol_greater_than; + self.token = .symbol_greater_than; - return true; + return; }, '.' => { - self.current_token = .symbol_period; + self.token = .symbol_period; cursor += 1; - return true; + return; }, else => { - self.current_token = .{.unknown = self.source[cursor]}; + self.token = .{.unknown = self.source[cursor]}; cursor += 1; - return true; + return; }, } } - self.current_token = .newline; + self.token = null; - return false; + return; } };