Change KYM tokenizer to be internal AST.
continuous-integration/drone/push Build is passing Details
continuous-integration/drone/pr Build is passing Details

This commit is contained in:
kayomn 2023-07-22 15:06:39 +01:00
parent 62a318e69b
commit 51732a9bf5
2 changed files with 93 additions and 97 deletions

View File

@ -8,8 +8,6 @@ const coral = @import("coral");
const file = @import("./file.zig"); const file = @import("./file.zig");
const tokens = @import("./kym/tokens.zig");
pub const Any = union (enum) { pub const Any = union (enum) {
nil, nil,
boolean: bool, boolean: bool,
@ -204,14 +202,10 @@ pub const RuntimeEnv = struct {
defer ast.free(); defer ast.free();
{ ast.parse(data) catch |parse_error| switch (parse_error) {
var tokenizer = tokens.Tokenizer{.source = data}; error.BadSyntax => return self.raise(error.BadSyntax, ast.error_message()),
error.OutOfMemory => return error.OutOfMemory,
ast.parse(&tokenizer) catch |parse_error| switch (parse_error) { };
error.BadSyntax => return self.raise(error.BadSyntax, ast.error_message()),
error.OutOfMemory => return error.OutOfMemory,
};
}
var chunk = Chunk.make(self); var chunk = Chunk.make(self);

View File

@ -7,6 +7,7 @@ allocator: coral.io.Allocator,
arena: coral.arena.Stacking, arena: coral.arena.Stacking,
statements: Statement.List, statements: Statement.List,
error_buffer: coral.list.ByteStack, error_buffer: coral.list.ByteStack,
tokenizer: tokens.Tokenizer,
pub const Expression = union (enum) { pub const Expression = union (enum) {
nil_literal, nil_literal,
@ -68,7 +69,7 @@ pub const Expression = union (enum) {
pub const List = coral.list.Stack(Expression); pub const List = coral.list.Stack(Expression);
}; };
const ExpressionParser = fn (self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression; const ExpressionParser = fn (self: *Self) ParseError!Expression;
pub const ParseError = error { pub const ParseError = error {
OutOfMemory, OutOfMemory,
@ -104,25 +105,25 @@ fn binary_operation_parser(
comptime operators: []const Expression.BinaryOperator) ExpressionParser { comptime operators: []const Expression.BinaryOperator) ExpressionParser {
const BinaryOperationParser = struct { const BinaryOperationParser = struct {
fn parse(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression { fn parse(self: *Self) ParseError!Expression {
const allocator = self.arena.as_allocator(); const allocator = self.arena.as_allocator();
var expression = try parse_next(self, tokenizer); var expression = try parse_next(self);
inline for (operators) |operator| { inline for (operators) |operator| {
const token = comptime operator.token(); const token = comptime operator.token();
if (tokenizer.is_token(coral.io.tag_of(token))) { if (self.tokenizer.is_token(coral.io.tag_of(token))) {
tokenizer.step(); self.tokenizer.step();
if (tokenizer.token == null) { if (self.tokenizer.token == null) {
return self.report(tokenizer, "expected other half of expression after `" ++ comptime token.text() ++ "`"); return self.report("expected other half of expression after `" ++ comptime token.text() ++ "`");
} }
expression = .{ expression = .{
.binary_operation = .{ .binary_operation = .{
.operator = operator, .operator = operator,
.lhs_expression = try coral.io.allocate_one(allocator, expression), .lhs_expression = try coral.io.allocate_one(allocator, expression),
.rhs_expression = try coral.io.allocate_one(allocator, try parse_next(self, tokenizer)), .rhs_expression = try coral.io.allocate_one(allocator, try parse_next(self)),
}, },
}; };
} }
@ -150,15 +151,16 @@ pub fn make(allocator: coral.io.Allocator, ast_name: []const coral.io.Byte) Self
.arena = coral.arena.Stacking.make(allocator, 4096), .arena = coral.arena.Stacking.make(allocator, 4096),
.error_buffer = coral.list.ByteStack.make(allocator), .error_buffer = coral.list.ByteStack.make(allocator),
.statements = Statement.List.make(allocator), .statements = Statement.List.make(allocator),
.tokenizer = .{.source = ""},
.allocator = allocator, .allocator = allocator,
.name = ast_name, .name = ast_name,
}; };
} }
fn report(self: *Self, tokenizer: *tokens.Tokenizer, message: []const coral.io.Byte) ParseError { fn report(self: *Self, message: []const coral.io.Byte) ParseError {
coral.utf8.print_formatted(coral.list.stack_as_writer(&self.error_buffer), "{name}@{line}: {message}", .{ coral.utf8.print_formatted(coral.list.stack_as_writer(&self.error_buffer), "{name}@{line}: {message}", .{
.name = self.name, .name = self.name,
.line = tokenizer.lines_stepped, .line = self.tokenizer.lines_stepped,
.message = message, .message = message,
}) catch return error.OutOfMemory; }) catch return error.OutOfMemory;
@ -169,30 +171,30 @@ pub fn list_statements(self: Self) []const Statement {
return self.statements.values; return self.statements.values;
} }
pub fn parse(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!void { pub fn parse(self: *Self, data: []const coral.io.Byte) ParseError!void {
self.free(); self.tokenizer = .{.source = data};
const allocator = self.arena.as_allocator(); const allocator = self.arena.as_allocator();
var has_returned = false; var has_returned = false;
while (true) { while (true) {
tokenizer.skip(.newline); self.tokenizer.skip(.newline);
switch (tokenizer.token orelse return) { switch (self.tokenizer.token orelse return) {
.keyword_return => { .keyword_return => {
if (has_returned) { if (has_returned) {
return self.report(tokenizer, "multiple returns in function scope but expected only one"); return self.report("multiple returns in function scope but expected only one");
} }
try self.statements.push_one(get_statement: { try self.statements.push_one(get_statement: {
tokenizer.step(); self.tokenizer.step();
if (!tokenizer.is_token_null_or(.newline)) { if (!self.tokenizer.is_token_null_or(.newline)) {
break: get_statement .{.return_expression = try self.parse_expression(tokenizer)}; break: get_statement .{.return_expression = try self.parse_expression()};
} }
if (!tokenizer.is_token_null_or(.newline)) { if (!self.tokenizer.is_token_null_or(.newline)) {
return self.report(tokenizer, "unexpected token after return"); return self.report("unexpected token after return");
} }
break: get_statement .return_nothing; break: get_statement .return_nothing;
@ -202,64 +204,64 @@ pub fn parse(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!void {
}, },
.identifier => |identifier| { .identifier => |identifier| {
tokenizer.step(); self.tokenizer.step();
const no_effect_message = "statement has no effect"; const no_effect_message = "statement has no effect";
switch (tokenizer.token orelse return self.report(tokenizer, no_effect_message)) { switch (self.tokenizer.token orelse return self.report(no_effect_message)) {
.newline => return self.report(tokenizer, no_effect_message), .newline => return self.report(no_effect_message),
.symbol_equals => { .symbol_equals => {
tokenizer.step(); self.tokenizer.step();
if (tokenizer.token == null) { if (self.tokenizer.token == null) {
return self.report(tokenizer, "expected expression after `=`"); return self.report("expected expression after `=`");
} }
try self.statements.push_one(.{ try self.statements.push_one(.{
.set_local = .{ .set_local = .{
.expression = try self.parse_expression(tokenizer), .expression = try self.parse_expression(),
.identifier = identifier, .identifier = identifier,
}, },
}); });
if (!tokenizer.is_token_null_or(.newline)) { if (!self.tokenizer.is_token_null_or(.newline)) {
return self.report(tokenizer, "unexpected token after assignment"); return self.report("unexpected token after assignment");
} }
}, },
else => return self.report(tokenizer, "expected `=` after local"), else => return self.report("expected `=` after local"),
} }
}, },
.special_identifier => |identifier| { .special_identifier => |identifier| {
tokenizer.step(); self.tokenizer.step();
const missing_arguments_message = "system call is missing arguments"; const missing_arguments_message = "system call is missing arguments";
switch (tokenizer.token orelse return self.report(tokenizer, missing_arguments_message)) { switch (self.tokenizer.token orelse return self.report(missing_arguments_message)) {
.newline => return self.report(tokenizer, missing_arguments_message), .newline => return self.report(missing_arguments_message),
.symbol_paren_left => { .symbol_paren_left => {
tokenizer.step(); self.tokenizer.step();
var expressions_list = Expression.List.make(allocator); var expressions_list = Expression.List.make(allocator);
while (true) { while (true) {
if (tokenizer.is_token(.symbol_paren_right)) { if (self.tokenizer.is_token(.symbol_paren_right)) {
break; break;
} }
try expressions_list.push_one(try self.parse_expression(tokenizer)); try expressions_list.push_one(try self.parse_expression());
switch (tokenizer.token orelse return self.report(tokenizer, "unexpected end after after `(`")) { switch (self.tokenizer.token orelse return self.report("unexpected end after after `(`")) {
.symbol_comma => continue, .symbol_comma => continue,
.symbol_paren_right => break, .symbol_paren_right => break,
else => return self.report(tokenizer, "expected `)` or argument after `(`"), else => return self.report("expected `)` or argument after `(`"),
} }
} }
tokenizer.step(); self.tokenizer.step();
try self.statements.push_one(.{ try self.statements.push_one(.{
.call_system = .{ .call_system = .{
@ -269,11 +271,11 @@ pub fn parse(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!void {
}); });
}, },
else => return self.report(tokenizer, "expected `=` after local"), else => return self.report("expected `=` after local"),
} }
}, },
else => return self.report(tokenizer, "invalid statement"), else => return self.report("invalid statement"),
} }
} }
} }
@ -294,67 +296,67 @@ const parse_expression = binary_operation_parser(parse_equality, &.{
.subtraction, .subtraction,
}); });
fn parse_factor(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression { fn parse_factor(self: *Self) ParseError!Expression {
const allocator = self.arena.as_allocator(); const allocator = self.arena.as_allocator();
switch (tokenizer.token orelse return self.report(tokenizer, "expected operand after operator")) { switch (self.tokenizer.token orelse return self.report("expected operand after operator")) {
.symbol_paren_left => { .symbol_paren_left => {
tokenizer.skip(.newline); self.tokenizer.skip(.newline);
if (tokenizer.token == null) { if (self.tokenizer.token == null) {
return self.report(tokenizer, "expected an expression after `(`"); return self.report("expected an expression after `(`");
} }
const expression = try self.parse_expression(tokenizer); const expression = try self.parse_expression();
if (!tokenizer.is_token(.symbol_paren_right)) { if (!self.tokenizer.is_token(.symbol_paren_right)) {
return self.report(tokenizer, "expected a closing `)` after expression"); return self.report("expected a closing `)` after expression");
} }
tokenizer.step(); self.tokenizer.step();
return Expression{.grouped_expression = try coral.io.allocate_one(allocator, expression)}; return Expression{.grouped_expression = try coral.io.allocate_one(allocator, expression)};
}, },
.keyword_nil => { .keyword_nil => {
tokenizer.step(); self.tokenizer.step();
return .nil_literal; return .nil_literal;
}, },
.keyword_true => { .keyword_true => {
tokenizer.step(); self.tokenizer.step();
return .true_literal; return .true_literal;
}, },
.keyword_false => { .keyword_false => {
tokenizer.step(); self.tokenizer.step();
return .false_literal; return .false_literal;
}, },
.number => |value| { .number => |value| {
tokenizer.step(); self.tokenizer.step();
return Expression{.number_literal = value}; return Expression{.number_literal = value};
}, },
.string => |value| { .string => |value| {
tokenizer.step(); self.tokenizer.step();
return Expression{.string_literal = value}; return Expression{.string_literal = value};
}, },
.special_identifier => |identifier| { .special_identifier => |identifier| {
tokenizer.skip(.newline); self.tokenizer.skip(.newline);
var expression_list = Expression.List.make(allocator); var expression_list = Expression.List.make(allocator);
while (true) { while (true) {
switch (tokenizer.token orelse return self.report(tokenizer, "expected expression or `)` after `(`")) { switch (self.tokenizer.token orelse return self.report("expected expression or `)` after `(`")) {
.symbol_paren_right => { .symbol_paren_right => {
tokenizer.step(); self.tokenizer.step();
return Expression{ return Expression{
.call_system = .{ .call_system = .{
@ -365,13 +367,13 @@ fn parse_factor(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression
}, },
else => { else => {
try expression_list.push_one(try self.parse_expression(tokenizer)); try expression_list.push_one(try self.parse_expression());
switch (tokenizer.token orelse return self.report(tokenizer, "expected `,` or `)` after argument")) { switch (self.tokenizer.token orelse return self.report("expected `,` or `)` after argument")) {
.symbol_comma => continue, .symbol_comma => continue,
.symbol_paren_right => { .symbol_paren_right => {
tokenizer.step(); self.tokenizer.step();
return Expression{ return Expression{
.call_system = .{ .call_system = .{
@ -381,7 +383,7 @@ fn parse_factor(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression
}; };
}, },
else => return self.report(tokenizer, "expected `,` or `)` after argument"), else => return self.report("expected `,` or `)` after argument"),
} }
}, },
} }
@ -389,7 +391,7 @@ fn parse_factor(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression
}, },
.identifier => |identifier| { .identifier => |identifier| {
tokenizer.step(); self.tokenizer.step();
return Expression{.get_local = identifier}; return Expression{.get_local = identifier};
}, },
@ -397,83 +399,83 @@ fn parse_factor(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression
.symbol_brace_left => { .symbol_brace_left => {
var table_fields = Expression.NamedList.make(allocator); var table_fields = Expression.NamedList.make(allocator);
tokenizer.skip(.newline); self.tokenizer.skip(.newline);
while (true) { while (true) {
switch (tokenizer.token orelse return self.report(tokenizer, "unexpected end of table literal")) { switch (self.tokenizer.token orelse return self.report("unexpected end of table literal")) {
.symbol_brace_right => { .symbol_brace_right => {
tokenizer.step(); self.tokenizer.step();
return Expression{.table_literal = table_fields}; return Expression{.table_literal = table_fields};
}, },
.identifier => |identifier| { .identifier => |identifier| {
tokenizer.skip(.newline); self.tokenizer.skip(.newline);
if (!tokenizer.is_token(.symbol_equals)) { if (!self.tokenizer.is_token(.symbol_equals)) {
return self.report(tokenizer, "expected `=` after identifier"); return self.report("expected `=` after identifier");
} }
tokenizer.skip(.newline); self.tokenizer.skip(.newline);
if (tokenizer.token == null) { if (self.tokenizer.token == null) {
return self.report(tokenizer, "unexpected end after `=`"); return self.report("unexpected end after `=`");
} }
try table_fields.push_one(.{ try table_fields.push_one(.{
.expression = try self.parse_expression(tokenizer), .expression = try self.parse_expression(),
.identifier = identifier, .identifier = identifier,
}); });
switch (tokenizer.token orelse return self.report(tokenizer, "unexpected end of table")) { switch (self.tokenizer.token orelse return self.report("unexpected end of table")) {
.symbol_comma => tokenizer.skip(.newline), .symbol_comma => self.tokenizer.skip(.newline),
.symbol_brace_right => { .symbol_brace_right => {
tokenizer.step(); self.tokenizer.step();
return Expression{.table_literal = table_fields}; return Expression{.table_literal = table_fields};
}, },
else => return self.report(tokenizer, "expected `,` or `}` after expression"), else => return self.report("expected `,` or `}` after expression"),
} }
}, },
else => return self.report(tokenizer, "expected `}` or fields in table literal"), else => return self.report("expected `}` or fields in table literal"),
} }
} }
}, },
.symbol_minus => { .symbol_minus => {
tokenizer.skip(.newline); self.tokenizer.skip(.newline);
if (tokenizer.token == null) { if (self.tokenizer.token == null) {
return self.report(tokenizer, "expected expression after numeric negation (`-`)"); return self.report("expected expression after numeric negation (`-`)");
} }
return Expression{ return Expression{
.unary_operation = .{ .unary_operation = .{
.expression = try coral.io.allocate_one(allocator, try self.parse_factor(tokenizer)), .expression = try coral.io.allocate_one(allocator, try self.parse_factor()),
.operator = .numeric_negation, .operator = .numeric_negation,
}, },
}; };
}, },
.symbol_bang => { .symbol_bang => {
tokenizer.skip(.newline); self.tokenizer.skip(.newline);
if (tokenizer.token == null) { if (self.tokenizer.token == null) {
return self.report(tokenizer, "expected expression after boolean negation (`!`)"); return self.report("expected expression after boolean negation (`!`)");
} }
return Expression{ return Expression{
.unary_operation = .{ .unary_operation = .{
.expression = try coral.io.allocate_one(allocator, try self.parse_factor(tokenizer)), .expression = try coral.io.allocate_one(allocator, try self.parse_factor()),
.operator = .boolean_negation, .operator = .boolean_negation,
}, },
}; };
}, },
else => return self.report(tokenizer, "unexpected token in expression"), else => return self.report("unexpected token in expression"),
} }
} }