Change KYM tokenizer to be internal AST.
This commit is contained in:
parent
62a318e69b
commit
51732a9bf5
|
@ -8,8 +8,6 @@ const coral = @import("coral");
|
|||
|
||||
const file = @import("./file.zig");
|
||||
|
||||
const tokens = @import("./kym/tokens.zig");
|
||||
|
||||
pub const Any = union (enum) {
|
||||
nil,
|
||||
boolean: bool,
|
||||
|
@ -204,14 +202,10 @@ pub const RuntimeEnv = struct {
|
|||
|
||||
defer ast.free();
|
||||
|
||||
{
|
||||
var tokenizer = tokens.Tokenizer{.source = data};
|
||||
|
||||
ast.parse(&tokenizer) catch |parse_error| switch (parse_error) {
|
||||
error.BadSyntax => return self.raise(error.BadSyntax, ast.error_message()),
|
||||
error.OutOfMemory => return error.OutOfMemory,
|
||||
};
|
||||
}
|
||||
ast.parse(data) catch |parse_error| switch (parse_error) {
|
||||
error.BadSyntax => return self.raise(error.BadSyntax, ast.error_message()),
|
||||
error.OutOfMemory => return error.OutOfMemory,
|
||||
};
|
||||
|
||||
var chunk = Chunk.make(self);
|
||||
|
||||
|
|
|
@ -7,6 +7,7 @@ allocator: coral.io.Allocator,
|
|||
arena: coral.arena.Stacking,
|
||||
statements: Statement.List,
|
||||
error_buffer: coral.list.ByteStack,
|
||||
tokenizer: tokens.Tokenizer,
|
||||
|
||||
pub const Expression = union (enum) {
|
||||
nil_literal,
|
||||
|
@ -68,7 +69,7 @@ pub const Expression = union (enum) {
|
|||
pub const List = coral.list.Stack(Expression);
|
||||
};
|
||||
|
||||
const ExpressionParser = fn (self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression;
|
||||
const ExpressionParser = fn (self: *Self) ParseError!Expression;
|
||||
|
||||
pub const ParseError = error {
|
||||
OutOfMemory,
|
||||
|
@ -104,25 +105,25 @@ fn binary_operation_parser(
|
|||
comptime operators: []const Expression.BinaryOperator) ExpressionParser {
|
||||
|
||||
const BinaryOperationParser = struct {
|
||||
fn parse(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression {
|
||||
fn parse(self: *Self) ParseError!Expression {
|
||||
const allocator = self.arena.as_allocator();
|
||||
var expression = try parse_next(self, tokenizer);
|
||||
var expression = try parse_next(self);
|
||||
|
||||
inline for (operators) |operator| {
|
||||
const token = comptime operator.token();
|
||||
|
||||
if (tokenizer.is_token(coral.io.tag_of(token))) {
|
||||
tokenizer.step();
|
||||
if (self.tokenizer.is_token(coral.io.tag_of(token))) {
|
||||
self.tokenizer.step();
|
||||
|
||||
if (tokenizer.token == null) {
|
||||
return self.report(tokenizer, "expected other half of expression after `" ++ comptime token.text() ++ "`");
|
||||
if (self.tokenizer.token == null) {
|
||||
return self.report("expected other half of expression after `" ++ comptime token.text() ++ "`");
|
||||
}
|
||||
|
||||
expression = .{
|
||||
.binary_operation = .{
|
||||
.operator = operator,
|
||||
.lhs_expression = try coral.io.allocate_one(allocator, expression),
|
||||
.rhs_expression = try coral.io.allocate_one(allocator, try parse_next(self, tokenizer)),
|
||||
.rhs_expression = try coral.io.allocate_one(allocator, try parse_next(self)),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
@ -150,15 +151,16 @@ pub fn make(allocator: coral.io.Allocator, ast_name: []const coral.io.Byte) Self
|
|||
.arena = coral.arena.Stacking.make(allocator, 4096),
|
||||
.error_buffer = coral.list.ByteStack.make(allocator),
|
||||
.statements = Statement.List.make(allocator),
|
||||
.tokenizer = .{.source = ""},
|
||||
.allocator = allocator,
|
||||
.name = ast_name,
|
||||
};
|
||||
}
|
||||
|
||||
fn report(self: *Self, tokenizer: *tokens.Tokenizer, message: []const coral.io.Byte) ParseError {
|
||||
fn report(self: *Self, message: []const coral.io.Byte) ParseError {
|
||||
coral.utf8.print_formatted(coral.list.stack_as_writer(&self.error_buffer), "{name}@{line}: {message}", .{
|
||||
.name = self.name,
|
||||
.line = tokenizer.lines_stepped,
|
||||
.line = self.tokenizer.lines_stepped,
|
||||
.message = message,
|
||||
}) catch return error.OutOfMemory;
|
||||
|
||||
|
@ -169,30 +171,30 @@ pub fn list_statements(self: Self) []const Statement {
|
|||
return self.statements.values;
|
||||
}
|
||||
|
||||
pub fn parse(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!void {
|
||||
self.free();
|
||||
pub fn parse(self: *Self, data: []const coral.io.Byte) ParseError!void {
|
||||
self.tokenizer = .{.source = data};
|
||||
|
||||
const allocator = self.arena.as_allocator();
|
||||
var has_returned = false;
|
||||
|
||||
while (true) {
|
||||
tokenizer.skip(.newline);
|
||||
self.tokenizer.skip(.newline);
|
||||
|
||||
switch (tokenizer.token orelse return) {
|
||||
switch (self.tokenizer.token orelse return) {
|
||||
.keyword_return => {
|
||||
if (has_returned) {
|
||||
return self.report(tokenizer, "multiple returns in function scope but expected only one");
|
||||
return self.report("multiple returns in function scope but expected only one");
|
||||
}
|
||||
|
||||
try self.statements.push_one(get_statement: {
|
||||
tokenizer.step();
|
||||
self.tokenizer.step();
|
||||
|
||||
if (!tokenizer.is_token_null_or(.newline)) {
|
||||
break: get_statement .{.return_expression = try self.parse_expression(tokenizer)};
|
||||
if (!self.tokenizer.is_token_null_or(.newline)) {
|
||||
break: get_statement .{.return_expression = try self.parse_expression()};
|
||||
}
|
||||
|
||||
if (!tokenizer.is_token_null_or(.newline)) {
|
||||
return self.report(tokenizer, "unexpected token after return");
|
||||
if (!self.tokenizer.is_token_null_or(.newline)) {
|
||||
return self.report("unexpected token after return");
|
||||
}
|
||||
|
||||
break: get_statement .return_nothing;
|
||||
|
@ -202,64 +204,64 @@ pub fn parse(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!void {
|
|||
},
|
||||
|
||||
.identifier => |identifier| {
|
||||
tokenizer.step();
|
||||
self.tokenizer.step();
|
||||
|
||||
const no_effect_message = "statement has no effect";
|
||||
|
||||
switch (tokenizer.token orelse return self.report(tokenizer, no_effect_message)) {
|
||||
.newline => return self.report(tokenizer, no_effect_message),
|
||||
switch (self.tokenizer.token orelse return self.report(no_effect_message)) {
|
||||
.newline => return self.report(no_effect_message),
|
||||
|
||||
.symbol_equals => {
|
||||
tokenizer.step();
|
||||
self.tokenizer.step();
|
||||
|
||||
if (tokenizer.token == null) {
|
||||
return self.report(tokenizer, "expected expression after `=`");
|
||||
if (self.tokenizer.token == null) {
|
||||
return self.report("expected expression after `=`");
|
||||
}
|
||||
|
||||
try self.statements.push_one(.{
|
||||
.set_local = .{
|
||||
.expression = try self.parse_expression(tokenizer),
|
||||
.expression = try self.parse_expression(),
|
||||
.identifier = identifier,
|
||||
},
|
||||
});
|
||||
|
||||
if (!tokenizer.is_token_null_or(.newline)) {
|
||||
return self.report(tokenizer, "unexpected token after assignment");
|
||||
if (!self.tokenizer.is_token_null_or(.newline)) {
|
||||
return self.report("unexpected token after assignment");
|
||||
}
|
||||
},
|
||||
|
||||
else => return self.report(tokenizer, "expected `=` after local"),
|
||||
else => return self.report("expected `=` after local"),
|
||||
}
|
||||
},
|
||||
|
||||
.special_identifier => |identifier| {
|
||||
tokenizer.step();
|
||||
self.tokenizer.step();
|
||||
|
||||
const missing_arguments_message = "system call is missing arguments";
|
||||
|
||||
switch (tokenizer.token orelse return self.report(tokenizer, missing_arguments_message)) {
|
||||
.newline => return self.report(tokenizer, missing_arguments_message),
|
||||
switch (self.tokenizer.token orelse return self.report(missing_arguments_message)) {
|
||||
.newline => return self.report(missing_arguments_message),
|
||||
|
||||
.symbol_paren_left => {
|
||||
tokenizer.step();
|
||||
self.tokenizer.step();
|
||||
|
||||
var expressions_list = Expression.List.make(allocator);
|
||||
|
||||
while (true) {
|
||||
if (tokenizer.is_token(.symbol_paren_right)) {
|
||||
if (self.tokenizer.is_token(.symbol_paren_right)) {
|
||||
break;
|
||||
}
|
||||
|
||||
try expressions_list.push_one(try self.parse_expression(tokenizer));
|
||||
try expressions_list.push_one(try self.parse_expression());
|
||||
|
||||
switch (tokenizer.token orelse return self.report(tokenizer, "unexpected end after after `(`")) {
|
||||
switch (self.tokenizer.token orelse return self.report("unexpected end after after `(`")) {
|
||||
.symbol_comma => continue,
|
||||
.symbol_paren_right => break,
|
||||
else => return self.report(tokenizer, "expected `)` or argument after `(`"),
|
||||
else => return self.report("expected `)` or argument after `(`"),
|
||||
}
|
||||
}
|
||||
|
||||
tokenizer.step();
|
||||
self.tokenizer.step();
|
||||
|
||||
try self.statements.push_one(.{
|
||||
.call_system = .{
|
||||
|
@ -269,11 +271,11 @@ pub fn parse(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!void {
|
|||
});
|
||||
},
|
||||
|
||||
else => return self.report(tokenizer, "expected `=` after local"),
|
||||
else => return self.report("expected `=` after local"),
|
||||
}
|
||||
},
|
||||
|
||||
else => return self.report(tokenizer, "invalid statement"),
|
||||
else => return self.report("invalid statement"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -294,67 +296,67 @@ const parse_expression = binary_operation_parser(parse_equality, &.{
|
|||
.subtraction,
|
||||
});
|
||||
|
||||
fn parse_factor(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression {
|
||||
fn parse_factor(self: *Self) ParseError!Expression {
|
||||
const allocator = self.arena.as_allocator();
|
||||
|
||||
switch (tokenizer.token orelse return self.report(tokenizer, "expected operand after operator")) {
|
||||
switch (self.tokenizer.token orelse return self.report("expected operand after operator")) {
|
||||
.symbol_paren_left => {
|
||||
tokenizer.skip(.newline);
|
||||
self.tokenizer.skip(.newline);
|
||||
|
||||
if (tokenizer.token == null) {
|
||||
return self.report(tokenizer, "expected an expression after `(`");
|
||||
if (self.tokenizer.token == null) {
|
||||
return self.report("expected an expression after `(`");
|
||||
}
|
||||
|
||||
const expression = try self.parse_expression(tokenizer);
|
||||
const expression = try self.parse_expression();
|
||||
|
||||
if (!tokenizer.is_token(.symbol_paren_right)) {
|
||||
return self.report(tokenizer, "expected a closing `)` after expression");
|
||||
if (!self.tokenizer.is_token(.symbol_paren_right)) {
|
||||
return self.report("expected a closing `)` after expression");
|
||||
}
|
||||
|
||||
tokenizer.step();
|
||||
self.tokenizer.step();
|
||||
|
||||
return Expression{.grouped_expression = try coral.io.allocate_one(allocator, expression)};
|
||||
},
|
||||
|
||||
.keyword_nil => {
|
||||
tokenizer.step();
|
||||
self.tokenizer.step();
|
||||
|
||||
return .nil_literal;
|
||||
},
|
||||
|
||||
.keyword_true => {
|
||||
tokenizer.step();
|
||||
self.tokenizer.step();
|
||||
|
||||
return .true_literal;
|
||||
},
|
||||
|
||||
.keyword_false => {
|
||||
tokenizer.step();
|
||||
self.tokenizer.step();
|
||||
|
||||
return .false_literal;
|
||||
},
|
||||
|
||||
.number => |value| {
|
||||
tokenizer.step();
|
||||
self.tokenizer.step();
|
||||
|
||||
return Expression{.number_literal = value};
|
||||
},
|
||||
|
||||
.string => |value| {
|
||||
tokenizer.step();
|
||||
self.tokenizer.step();
|
||||
|
||||
return Expression{.string_literal = value};
|
||||
},
|
||||
|
||||
.special_identifier => |identifier| {
|
||||
tokenizer.skip(.newline);
|
||||
self.tokenizer.skip(.newline);
|
||||
|
||||
var expression_list = Expression.List.make(allocator);
|
||||
|
||||
while (true) {
|
||||
switch (tokenizer.token orelse return self.report(tokenizer, "expected expression or `)` after `(`")) {
|
||||
switch (self.tokenizer.token orelse return self.report("expected expression or `)` after `(`")) {
|
||||
.symbol_paren_right => {
|
||||
tokenizer.step();
|
||||
self.tokenizer.step();
|
||||
|
||||
return Expression{
|
||||
.call_system = .{
|
||||
|
@ -365,13 +367,13 @@ fn parse_factor(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression
|
|||
},
|
||||
|
||||
else => {
|
||||
try expression_list.push_one(try self.parse_expression(tokenizer));
|
||||
try expression_list.push_one(try self.parse_expression());
|
||||
|
||||
switch (tokenizer.token orelse return self.report(tokenizer, "expected `,` or `)` after argument")) {
|
||||
switch (self.tokenizer.token orelse return self.report("expected `,` or `)` after argument")) {
|
||||
.symbol_comma => continue,
|
||||
|
||||
.symbol_paren_right => {
|
||||
tokenizer.step();
|
||||
self.tokenizer.step();
|
||||
|
||||
return Expression{
|
||||
.call_system = .{
|
||||
|
@ -381,7 +383,7 @@ fn parse_factor(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression
|
|||
};
|
||||
},
|
||||
|
||||
else => return self.report(tokenizer, "expected `,` or `)` after argument"),
|
||||
else => return self.report("expected `,` or `)` after argument"),
|
||||
}
|
||||
},
|
||||
}
|
||||
|
@ -389,7 +391,7 @@ fn parse_factor(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression
|
|||
},
|
||||
|
||||
.identifier => |identifier| {
|
||||
tokenizer.step();
|
||||
self.tokenizer.step();
|
||||
|
||||
return Expression{.get_local = identifier};
|
||||
},
|
||||
|
@ -397,83 +399,83 @@ fn parse_factor(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression
|
|||
.symbol_brace_left => {
|
||||
var table_fields = Expression.NamedList.make(allocator);
|
||||
|
||||
tokenizer.skip(.newline);
|
||||
self.tokenizer.skip(.newline);
|
||||
|
||||
while (true) {
|
||||
switch (tokenizer.token orelse return self.report(tokenizer, "unexpected end of table literal")) {
|
||||
switch (self.tokenizer.token orelse return self.report("unexpected end of table literal")) {
|
||||
.symbol_brace_right => {
|
||||
tokenizer.step();
|
||||
self.tokenizer.step();
|
||||
|
||||
return Expression{.table_literal = table_fields};
|
||||
},
|
||||
|
||||
.identifier => |identifier| {
|
||||
tokenizer.skip(.newline);
|
||||
self.tokenizer.skip(.newline);
|
||||
|
||||
if (!tokenizer.is_token(.symbol_equals)) {
|
||||
return self.report(tokenizer, "expected `=` after identifier");
|
||||
if (!self.tokenizer.is_token(.symbol_equals)) {
|
||||
return self.report("expected `=` after identifier");
|
||||
}
|
||||
|
||||
tokenizer.skip(.newline);
|
||||
self.tokenizer.skip(.newline);
|
||||
|
||||
if (tokenizer.token == null) {
|
||||
return self.report(tokenizer, "unexpected end after `=`");
|
||||
if (self.tokenizer.token == null) {
|
||||
return self.report("unexpected end after `=`");
|
||||
}
|
||||
|
||||
try table_fields.push_one(.{
|
||||
.expression = try self.parse_expression(tokenizer),
|
||||
.expression = try self.parse_expression(),
|
||||
.identifier = identifier,
|
||||
});
|
||||
|
||||
switch (tokenizer.token orelse return self.report(tokenizer, "unexpected end of table")) {
|
||||
.symbol_comma => tokenizer.skip(.newline),
|
||||
switch (self.tokenizer.token orelse return self.report("unexpected end of table")) {
|
||||
.symbol_comma => self.tokenizer.skip(.newline),
|
||||
|
||||
.symbol_brace_right => {
|
||||
tokenizer.step();
|
||||
self.tokenizer.step();
|
||||
|
||||
return Expression{.table_literal = table_fields};
|
||||
},
|
||||
|
||||
else => return self.report(tokenizer, "expected `,` or `}` after expression"),
|
||||
else => return self.report("expected `,` or `}` after expression"),
|
||||
}
|
||||
},
|
||||
|
||||
else => return self.report(tokenizer, "expected `}` or fields in table literal"),
|
||||
else => return self.report("expected `}` or fields in table literal"),
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
.symbol_minus => {
|
||||
tokenizer.skip(.newline);
|
||||
self.tokenizer.skip(.newline);
|
||||
|
||||
if (tokenizer.token == null) {
|
||||
return self.report(tokenizer, "expected expression after numeric negation (`-`)");
|
||||
if (self.tokenizer.token == null) {
|
||||
return self.report("expected expression after numeric negation (`-`)");
|
||||
}
|
||||
|
||||
return Expression{
|
||||
.unary_operation = .{
|
||||
.expression = try coral.io.allocate_one(allocator, try self.parse_factor(tokenizer)),
|
||||
.expression = try coral.io.allocate_one(allocator, try self.parse_factor()),
|
||||
.operator = .numeric_negation,
|
||||
},
|
||||
};
|
||||
},
|
||||
|
||||
.symbol_bang => {
|
||||
tokenizer.skip(.newline);
|
||||
self.tokenizer.skip(.newline);
|
||||
|
||||
if (tokenizer.token == null) {
|
||||
return self.report(tokenizer, "expected expression after boolean negation (`!`)");
|
||||
if (self.tokenizer.token == null) {
|
||||
return self.report("expected expression after boolean negation (`!`)");
|
||||
}
|
||||
|
||||
return Expression{
|
||||
.unary_operation = .{
|
||||
.expression = try coral.io.allocate_one(allocator, try self.parse_factor(tokenizer)),
|
||||
.expression = try coral.io.allocate_one(allocator, try self.parse_factor()),
|
||||
.operator = .boolean_negation,
|
||||
},
|
||||
};
|
||||
},
|
||||
|
||||
else => return self.report(tokenizer, "unexpected token in expression"),
|
||||
else => return self.report("unexpected token in expression"),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue