Change KYM tokenizer to be internal AST.
continuous-integration/drone/push Build is passing Details
continuous-integration/drone/pr Build is passing Details

This commit is contained in:
kayomn 2023-07-22 15:06:39 +01:00
parent 62a318e69b
commit 51732a9bf5
2 changed files with 93 additions and 97 deletions

View File

@ -8,8 +8,6 @@ const coral = @import("coral");
const file = @import("./file.zig");
const tokens = @import("./kym/tokens.zig");
pub const Any = union (enum) {
nil,
boolean: bool,
@ -204,14 +202,10 @@ pub const RuntimeEnv = struct {
defer ast.free();
{
var tokenizer = tokens.Tokenizer{.source = data};
ast.parse(&tokenizer) catch |parse_error| switch (parse_error) {
ast.parse(data) catch |parse_error| switch (parse_error) {
error.BadSyntax => return self.raise(error.BadSyntax, ast.error_message()),
error.OutOfMemory => return error.OutOfMemory,
};
}
var chunk = Chunk.make(self);

View File

@ -7,6 +7,7 @@ allocator: coral.io.Allocator,
arena: coral.arena.Stacking,
statements: Statement.List,
error_buffer: coral.list.ByteStack,
tokenizer: tokens.Tokenizer,
pub const Expression = union (enum) {
nil_literal,
@ -68,7 +69,7 @@ pub const Expression = union (enum) {
pub const List = coral.list.Stack(Expression);
};
const ExpressionParser = fn (self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression;
const ExpressionParser = fn (self: *Self) ParseError!Expression;
pub const ParseError = error {
OutOfMemory,
@ -104,25 +105,25 @@ fn binary_operation_parser(
comptime operators: []const Expression.BinaryOperator) ExpressionParser {
const BinaryOperationParser = struct {
fn parse(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression {
fn parse(self: *Self) ParseError!Expression {
const allocator = self.arena.as_allocator();
var expression = try parse_next(self, tokenizer);
var expression = try parse_next(self);
inline for (operators) |operator| {
const token = comptime operator.token();
if (tokenizer.is_token(coral.io.tag_of(token))) {
tokenizer.step();
if (self.tokenizer.is_token(coral.io.tag_of(token))) {
self.tokenizer.step();
if (tokenizer.token == null) {
return self.report(tokenizer, "expected other half of expression after `" ++ comptime token.text() ++ "`");
if (self.tokenizer.token == null) {
return self.report("expected other half of expression after `" ++ comptime token.text() ++ "`");
}
expression = .{
.binary_operation = .{
.operator = operator,
.lhs_expression = try coral.io.allocate_one(allocator, expression),
.rhs_expression = try coral.io.allocate_one(allocator, try parse_next(self, tokenizer)),
.rhs_expression = try coral.io.allocate_one(allocator, try parse_next(self)),
},
};
}
@ -150,15 +151,16 @@ pub fn make(allocator: coral.io.Allocator, ast_name: []const coral.io.Byte) Self
.arena = coral.arena.Stacking.make(allocator, 4096),
.error_buffer = coral.list.ByteStack.make(allocator),
.statements = Statement.List.make(allocator),
.tokenizer = .{.source = ""},
.allocator = allocator,
.name = ast_name,
};
}
fn report(self: *Self, tokenizer: *tokens.Tokenizer, message: []const coral.io.Byte) ParseError {
fn report(self: *Self, message: []const coral.io.Byte) ParseError {
coral.utf8.print_formatted(coral.list.stack_as_writer(&self.error_buffer), "{name}@{line}: {message}", .{
.name = self.name,
.line = tokenizer.lines_stepped,
.line = self.tokenizer.lines_stepped,
.message = message,
}) catch return error.OutOfMemory;
@ -169,30 +171,30 @@ pub fn list_statements(self: Self) []const Statement {
return self.statements.values;
}
pub fn parse(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!void {
self.free();
pub fn parse(self: *Self, data: []const coral.io.Byte) ParseError!void {
self.tokenizer = .{.source = data};
const allocator = self.arena.as_allocator();
var has_returned = false;
while (true) {
tokenizer.skip(.newline);
self.tokenizer.skip(.newline);
switch (tokenizer.token orelse return) {
switch (self.tokenizer.token orelse return) {
.keyword_return => {
if (has_returned) {
return self.report(tokenizer, "multiple returns in function scope but expected only one");
return self.report("multiple returns in function scope but expected only one");
}
try self.statements.push_one(get_statement: {
tokenizer.step();
self.tokenizer.step();
if (!tokenizer.is_token_null_or(.newline)) {
break: get_statement .{.return_expression = try self.parse_expression(tokenizer)};
if (!self.tokenizer.is_token_null_or(.newline)) {
break: get_statement .{.return_expression = try self.parse_expression()};
}
if (!tokenizer.is_token_null_or(.newline)) {
return self.report(tokenizer, "unexpected token after return");
if (!self.tokenizer.is_token_null_or(.newline)) {
return self.report("unexpected token after return");
}
break: get_statement .return_nothing;
@ -202,64 +204,64 @@ pub fn parse(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!void {
},
.identifier => |identifier| {
tokenizer.step();
self.tokenizer.step();
const no_effect_message = "statement has no effect";
switch (tokenizer.token orelse return self.report(tokenizer, no_effect_message)) {
.newline => return self.report(tokenizer, no_effect_message),
switch (self.tokenizer.token orelse return self.report(no_effect_message)) {
.newline => return self.report(no_effect_message),
.symbol_equals => {
tokenizer.step();
self.tokenizer.step();
if (tokenizer.token == null) {
return self.report(tokenizer, "expected expression after `=`");
if (self.tokenizer.token == null) {
return self.report("expected expression after `=`");
}
try self.statements.push_one(.{
.set_local = .{
.expression = try self.parse_expression(tokenizer),
.expression = try self.parse_expression(),
.identifier = identifier,
},
});
if (!tokenizer.is_token_null_or(.newline)) {
return self.report(tokenizer, "unexpected token after assignment");
if (!self.tokenizer.is_token_null_or(.newline)) {
return self.report("unexpected token after assignment");
}
},
else => return self.report(tokenizer, "expected `=` after local"),
else => return self.report("expected `=` after local"),
}
},
.special_identifier => |identifier| {
tokenizer.step();
self.tokenizer.step();
const missing_arguments_message = "system call is missing arguments";
switch (tokenizer.token orelse return self.report(tokenizer, missing_arguments_message)) {
.newline => return self.report(tokenizer, missing_arguments_message),
switch (self.tokenizer.token orelse return self.report(missing_arguments_message)) {
.newline => return self.report(missing_arguments_message),
.symbol_paren_left => {
tokenizer.step();
self.tokenizer.step();
var expressions_list = Expression.List.make(allocator);
while (true) {
if (tokenizer.is_token(.symbol_paren_right)) {
if (self.tokenizer.is_token(.symbol_paren_right)) {
break;
}
try expressions_list.push_one(try self.parse_expression(tokenizer));
try expressions_list.push_one(try self.parse_expression());
switch (tokenizer.token orelse return self.report(tokenizer, "unexpected end after after `(`")) {
switch (self.tokenizer.token orelse return self.report("unexpected end after after `(`")) {
.symbol_comma => continue,
.symbol_paren_right => break,
else => return self.report(tokenizer, "expected `)` or argument after `(`"),
else => return self.report("expected `)` or argument after `(`"),
}
}
tokenizer.step();
self.tokenizer.step();
try self.statements.push_one(.{
.call_system = .{
@ -269,11 +271,11 @@ pub fn parse(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!void {
});
},
else => return self.report(tokenizer, "expected `=` after local"),
else => return self.report("expected `=` after local"),
}
},
else => return self.report(tokenizer, "invalid statement"),
else => return self.report("invalid statement"),
}
}
}
@ -294,67 +296,67 @@ const parse_expression = binary_operation_parser(parse_equality, &.{
.subtraction,
});
fn parse_factor(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression {
fn parse_factor(self: *Self) ParseError!Expression {
const allocator = self.arena.as_allocator();
switch (tokenizer.token orelse return self.report(tokenizer, "expected operand after operator")) {
switch (self.tokenizer.token orelse return self.report("expected operand after operator")) {
.symbol_paren_left => {
tokenizer.skip(.newline);
self.tokenizer.skip(.newline);
if (tokenizer.token == null) {
return self.report(tokenizer, "expected an expression after `(`");
if (self.tokenizer.token == null) {
return self.report("expected an expression after `(`");
}
const expression = try self.parse_expression(tokenizer);
const expression = try self.parse_expression();
if (!tokenizer.is_token(.symbol_paren_right)) {
return self.report(tokenizer, "expected a closing `)` after expression");
if (!self.tokenizer.is_token(.symbol_paren_right)) {
return self.report("expected a closing `)` after expression");
}
tokenizer.step();
self.tokenizer.step();
return Expression{.grouped_expression = try coral.io.allocate_one(allocator, expression)};
},
.keyword_nil => {
tokenizer.step();
self.tokenizer.step();
return .nil_literal;
},
.keyword_true => {
tokenizer.step();
self.tokenizer.step();
return .true_literal;
},
.keyword_false => {
tokenizer.step();
self.tokenizer.step();
return .false_literal;
},
.number => |value| {
tokenizer.step();
self.tokenizer.step();
return Expression{.number_literal = value};
},
.string => |value| {
tokenizer.step();
self.tokenizer.step();
return Expression{.string_literal = value};
},
.special_identifier => |identifier| {
tokenizer.skip(.newline);
self.tokenizer.skip(.newline);
var expression_list = Expression.List.make(allocator);
while (true) {
switch (tokenizer.token orelse return self.report(tokenizer, "expected expression or `)` after `(`")) {
switch (self.tokenizer.token orelse return self.report("expected expression or `)` after `(`")) {
.symbol_paren_right => {
tokenizer.step();
self.tokenizer.step();
return Expression{
.call_system = .{
@ -365,13 +367,13 @@ fn parse_factor(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression
},
else => {
try expression_list.push_one(try self.parse_expression(tokenizer));
try expression_list.push_one(try self.parse_expression());
switch (tokenizer.token orelse return self.report(tokenizer, "expected `,` or `)` after argument")) {
switch (self.tokenizer.token orelse return self.report("expected `,` or `)` after argument")) {
.symbol_comma => continue,
.symbol_paren_right => {
tokenizer.step();
self.tokenizer.step();
return Expression{
.call_system = .{
@ -381,7 +383,7 @@ fn parse_factor(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression
};
},
else => return self.report(tokenizer, "expected `,` or `)` after argument"),
else => return self.report("expected `,` or `)` after argument"),
}
},
}
@ -389,7 +391,7 @@ fn parse_factor(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression
},
.identifier => |identifier| {
tokenizer.step();
self.tokenizer.step();
return Expression{.get_local = identifier};
},
@ -397,83 +399,83 @@ fn parse_factor(self: *Self, tokenizer: *tokens.Tokenizer) ParseError!Expression
.symbol_brace_left => {
var table_fields = Expression.NamedList.make(allocator);
tokenizer.skip(.newline);
self.tokenizer.skip(.newline);
while (true) {
switch (tokenizer.token orelse return self.report(tokenizer, "unexpected end of table literal")) {
switch (self.tokenizer.token orelse return self.report("unexpected end of table literal")) {
.symbol_brace_right => {
tokenizer.step();
self.tokenizer.step();
return Expression{.table_literal = table_fields};
},
.identifier => |identifier| {
tokenizer.skip(.newline);
self.tokenizer.skip(.newline);
if (!tokenizer.is_token(.symbol_equals)) {
return self.report(tokenizer, "expected `=` after identifier");
if (!self.tokenizer.is_token(.symbol_equals)) {
return self.report("expected `=` after identifier");
}
tokenizer.skip(.newline);
self.tokenizer.skip(.newline);
if (tokenizer.token == null) {
return self.report(tokenizer, "unexpected end after `=`");
if (self.tokenizer.token == null) {
return self.report("unexpected end after `=`");
}
try table_fields.push_one(.{
.expression = try self.parse_expression(tokenizer),
.expression = try self.parse_expression(),
.identifier = identifier,
});
switch (tokenizer.token orelse return self.report(tokenizer, "unexpected end of table")) {
.symbol_comma => tokenizer.skip(.newline),
switch (self.tokenizer.token orelse return self.report("unexpected end of table")) {
.symbol_comma => self.tokenizer.skip(.newline),
.symbol_brace_right => {
tokenizer.step();
self.tokenizer.step();
return Expression{.table_literal = table_fields};
},
else => return self.report(tokenizer, "expected `,` or `}` after expression"),
else => return self.report("expected `,` or `}` after expression"),
}
},
else => return self.report(tokenizer, "expected `}` or fields in table literal"),
else => return self.report("expected `}` or fields in table literal"),
}
}
},
.symbol_minus => {
tokenizer.skip(.newline);
self.tokenizer.skip(.newline);
if (tokenizer.token == null) {
return self.report(tokenizer, "expected expression after numeric negation (`-`)");
if (self.tokenizer.token == null) {
return self.report("expected expression after numeric negation (`-`)");
}
return Expression{
.unary_operation = .{
.expression = try coral.io.allocate_one(allocator, try self.parse_factor(tokenizer)),
.expression = try coral.io.allocate_one(allocator, try self.parse_factor()),
.operator = .numeric_negation,
},
};
},
.symbol_bang => {
tokenizer.skip(.newline);
self.tokenizer.skip(.newline);
if (tokenizer.token == null) {
return self.report(tokenizer, "expected expression after boolean negation (`!`)");
if (self.tokenizer.token == null) {
return self.report("expected expression after boolean negation (`!`)");
}
return Expression{
.unary_operation = .{
.expression = try coral.io.allocate_one(allocator, try self.parse_factor(tokenizer)),
.expression = try coral.io.allocate_one(allocator, try self.parse_factor()),
.operator = .boolean_negation,
},
};
},
else => return self.report(tokenizer, "unexpected token in expression"),
else => return self.report("unexpected token in expression"),
}
}