Add Kym parsing support for array literals
continuous-integration/drone/push Build is passing Details
continuous-integration/drone/pr Build is passing Details

This commit is contained in:
kayomn 2023-05-29 01:37:26 +00:00
parent 5e52fcc795
commit 27b809f2c5
3 changed files with 173 additions and 115 deletions

View File

@ -25,6 +25,7 @@ const Opcode = enum (u8) {
push_integer, push_integer,
push_float, push_float,
push_object, push_object,
push_array,
push_table, push_table,
not, not,
@ -126,19 +127,32 @@ pub fn compile_expression(self: *Self, expression: ast.Expression) types.Runtime
try self.emit_object(try self.intern(literal)); try self.emit_object(try self.intern(literal));
}, },
.table_literal => |literal| { .array_literal => |elements| {
if (literal.values.len > coral.math.max_int(@typeInfo(types.Integer).Int)) { if (elements.values.len > coral.math.max_int(@typeInfo(types.Integer).Int)) {
return error.OutOfMemory; return error.OutOfMemory;
} }
for (literal.values) |field| { for (elements.values) |element_expression| {
try self.compile_expression(field.expression.*); try self.compile_expression(element_expression);
}
try self.emit_opcode(.push_array);
try self.emit_integer(@intCast(types.Integer, elements.values.len));
},
.table_literal => |fields| {
if (fields.values.len > coral.math.max_int(@typeInfo(types.Integer).Int)) {
return error.OutOfMemory;
}
for (fields.values) |field| {
try self.compile_expression(field.expression);
try self.emit_opcode(.push_object); try self.emit_opcode(.push_object);
try self.emit_object(try self.intern(field.identifier)); try self.emit_object(try self.intern(field.identifier));
} }
try self.emit_opcode(.push_table); try self.emit_opcode(.push_table);
try self.emit_integer(@intCast(types.Integer, literal.values.len)); try self.emit_integer(@intCast(types.Integer, fields.values.len));
}, },
.binary_operation => |operation| { .binary_operation => |operation| {

View File

@ -4,6 +4,8 @@ const tokens = @import("./tokens.zig");
const types = @import("./types.zig"); const types = @import("./types.zig");
const ArrayElements = coral.list.Stack(Expression);
pub const BinaryOperation = enum { pub const BinaryOperation = enum {
addition, addition,
subtraction, subtraction,
@ -33,7 +35,7 @@ pub const ParsedExpression = union (enum) {
}; };
if (tokenizer.current_token == .symbol_plus) { if (tokenizer.current_token == .symbol_plus) {
if (!tokenizer.step()) { if (!tokenizer.step(.ignore_newlines)) {
return ParsedExpression{.invalid = "expected right-hand side of expression after `+`"}; return ParsedExpression{.invalid = "expected right-hand side of expression after `+`"};
} }
@ -55,7 +57,7 @@ pub const ParsedExpression = union (enum) {
} }
if (tokenizer.current_token == .symbol_minus) { if (tokenizer.current_token == .symbol_minus) {
if (!tokenizer.step()) { if (!tokenizer.step(.ignore_newlines)) {
return ParsedExpression{.invalid = "expected right-hand side of expression after `-`"}; return ParsedExpression{.invalid = "expected right-hand side of expression after `-`"};
} }
@ -101,7 +103,7 @@ pub const ParsedExpression = union (enum) {
}; };
if (tokenizer.current_token == .symbol_greater_than) { if (tokenizer.current_token == .symbol_greater_than) {
if (!tokenizer.step()) { if (!tokenizer.step(.ignore_newlines)) {
return ParsedExpression{.invalid = "expected right-hand side of expression after `>`"}; return ParsedExpression{.invalid = "expected right-hand side of expression after `>`"};
} }
@ -123,7 +125,7 @@ pub const ParsedExpression = union (enum) {
} }
if (tokenizer.current_token == .symbol_greater_equals) { if (tokenizer.current_token == .symbol_greater_equals) {
if (!tokenizer.step()) { if (!tokenizer.step(.ignore_newlines)) {
return ParsedExpression{.invalid = "expected right-hand side of expression after `>=`"}; return ParsedExpression{.invalid = "expected right-hand side of expression after `>=`"};
} }
@ -145,7 +147,7 @@ pub const ParsedExpression = union (enum) {
} }
if (tokenizer.current_token == .symbol_less_than) { if (tokenizer.current_token == .symbol_less_than) {
if (!tokenizer.step()) { if (!tokenizer.step(.ignore_newlines)) {
return ParsedExpression{.invalid = "expected right-hand side of expression after `<`"}; return ParsedExpression{.invalid = "expected right-hand side of expression after `<`"};
} }
@ -167,7 +169,7 @@ pub const ParsedExpression = union (enum) {
} }
if (tokenizer.current_token == .symbol_less_equals) { if (tokenizer.current_token == .symbol_less_equals) {
if (!tokenizer.step()) { if (!tokenizer.step(.ignore_newlines)) {
return ParsedExpression{.invalid = "expected right-hand side of expression after `<=`"}; return ParsedExpression{.invalid = "expected right-hand side of expression after `<=`"};
} }
@ -213,7 +215,7 @@ pub const ParsedExpression = union (enum) {
}; };
if (tokenizer.current_token == .symbol_double_equals) { if (tokenizer.current_token == .symbol_double_equals) {
if (!tokenizer.step()) { if (!tokenizer.step(.ignore_newlines)) {
return ParsedExpression{.invalid = "expected right-hand side of expression after `==`"}; return ParsedExpression{.invalid = "expected right-hand side of expression after `==`"};
} }
@ -249,7 +251,7 @@ pub const ParsedExpression = union (enum) {
switch (tokenizer.current_token) { switch (tokenizer.current_token) {
.symbol_paren_left => { .symbol_paren_left => {
if (!tokenizer.step()) { if (!tokenizer.step(.ignore_newlines)) {
return ParsedExpression{.invalid = "expected an expression after `(`"}; return ParsedExpression{.invalid = "expected an expression after `(`"};
} }
@ -263,7 +265,7 @@ pub const ParsedExpression = union (enum) {
expression.deinit(allocator); expression.deinit(allocator);
}; };
if ((!tokenizer.step()) or (tokenizer.current_token != .symbol_paren_right)) { if ((!tokenizer.step(.ignore_newlines)) or (tokenizer.current_token != .symbol_paren_right)) {
return ParsedExpression{.invalid = "expected a closing `)` after expression"}; return ParsedExpression{.invalid = "expected a closing `)` after expression"};
} }
@ -278,55 +280,87 @@ pub const ParsedExpression = union (enum) {
} }
}, },
.integer => |value| { .integer => |value| return ParsedExpression{
defer _ = tokenizer.step(); .valid = .{
.integer_literal = coral.utf8.parse_int(
@typeInfo(types.Integer).Int,
value, .{}) catch |parse_error| {
return ParsedExpression{ return ParsedExpression{
.valid = .{ .invalid = switch (parse_error) {
.integer_literal = coral.utf8.parse_int( error.BadSyntax => "invalid integer literal",
@typeInfo(types.Integer).Int, error.IntOverflow => "integer literal is too big",
value, .{}) catch |parse_error| { }
};
return ParsedExpression{
.invalid = switch (parse_error) {
error.BadSyntax => "invalid integer literal",
error.IntOverflow => "integer literal is too big",
}
};
},
}, },
}; },
}, },
.real => |value| { .real => |value| return ParsedExpression{
defer _ = tokenizer.step(); .valid = .{
.float_literal = coral.utf8.parse_float(
@typeInfo(types.Float).Float,
value) catch |parse_error| {
return ParsedExpression{ return ParsedExpression{
.valid = .{ .invalid = switch (parse_error) {
.float_literal = coral.utf8.parse_float( error.BadSyntax => "invalid float literal",
@typeInfo(types.Float).Float, },
value) catch |parse_error| { };
},
},
},
.string => |value| return ParsedExpression{
.valid = .{.string_literal = value},
},
.symbol_bracket_left => {
if (tokenizer.step(.ignore_newlines)) {
return ParsedExpression{.invalid = "unexpected end of array literal"};
}
var is_invalid = true;
var array_elements = try ArrayElements.init(allocator, 0);
defer if (is_invalid) {
array_elements.deinit(allocator);
};
while (true) {
switch (tokenizer.current_token) {
.symbol_bracket_right => {
_ = tokenizer.step(.ignore_newlines);
is_invalid = false;
return ParsedExpression{ return ParsedExpression{
.invalid = switch (parse_error) { .valid = .{.array_literal = array_elements},
error.BadSyntax => "invalid float literal", };
},
else => {
if (!tokenizer.step(.ignore_newlines)) {
return ParsedExpression{.invalid = "expected `]` or expression after `[`"};
}
var parsed_expression = try ParsedExpression.init(allocator, tokenizer);
switch (parsed_expression) {
.valid => |*expression| {
errdefer expression.deinit(allocator);
try array_elements.push_one(allocator, expression.*);
}, },
};
.invalid => |detail| return ParsedExpression{.invalid = detail},
}
}, },
}, }
}; }
},
.string => |value| {
defer _ = tokenizer.step();
return ParsedExpression{
.valid = .{.string_literal = value},
};
}, },
.symbol_brace_left => { .symbol_brace_left => {
if (tokenizer.step()) { if (!tokenizer.step(.ignore_newlines)) {
return ParsedExpression{.invalid = "unexpected end of table literal"}; return ParsedExpression{.invalid = "unexpected end of table literal"};
} }
@ -340,7 +374,7 @@ pub const ParsedExpression = union (enum) {
while (true) { while (true) {
switch (tokenizer.current_token) { switch (tokenizer.current_token) {
.symbol_brace_right => { .symbol_brace_right => {
_ = tokenizer.step(); _ = tokenizer.step(.ignore_newlines);
is_invalid = false; is_invalid = false;
return ParsedExpression{ return ParsedExpression{
@ -351,11 +385,11 @@ pub const ParsedExpression = union (enum) {
.local => |identifier| { .local => |identifier| {
const key = identifier; const key = identifier;
if (!tokenizer.step() or tokenizer.current_token != .symbol_equals) { if (!tokenizer.step(.ignore_newlines) or tokenizer.current_token != .symbol_equals) {
return ParsedExpression{.invalid = "expected `=` after identifier"}; return ParsedExpression{.invalid = "expected `=` after identifier"};
} }
if (!tokenizer.step()) { if (!tokenizer.step(.ignore_newlines)) {
return ParsedExpression{.invalid = "unexpected end after `=`"}; return ParsedExpression{.invalid = "unexpected end after `=`"};
} }
@ -367,48 +401,40 @@ pub const ParsedExpression = union (enum) {
try table_fields.push_one(allocator, .{ try table_fields.push_one(allocator, .{
.identifier = key, .identifier = key,
.expression = expression, .expression = expression.*,
}); });
}, },
.invalid => |details| return ParsedExpression{.invalid = details}, .invalid => |details| return ParsedExpression{.invalid = details},
} }
},
.string => |identifier| { if (!tokenizer.step(.ignore_newlines)) {
const key = identifier; return ParsedExpression{.invalid = "unexpected end after expression"};
if (!tokenizer.step() or tokenizer.current_token != .symbol_equals) {
return ParsedExpression{.invalid = "expected `=` after identifier"};
} }
if (!tokenizer.step()) { switch (tokenizer.current_token) {
return ParsedExpression{.invalid = "unexpected end after `=`"}; .symbol_comma => _ = tokenizer.step(.ignore_newlines),
}
var parsed_expression = try init(allocator, tokenizer); .symbol_brace_right => {
_ = tokenizer.step(.ignore_newlines);
is_invalid = false;
switch (parsed_expression) { return ParsedExpression{
.valid => |*expression| { .valid = .{.table_literal = table_fields},
errdefer expression.deinit(allocator); };
try table_fields.push_one(allocator, .{
.identifier = key,
.expression = expression,
});
}, },
.invalid => |details| return ParsedExpression{.invalid = details}, else => return ParsedExpression{.invalid = "expected `,` or `}` after expression"},
} }
}, },
else => return ParsedExpression{.invalid = "expected `}` or fields in table expression"} else => return ParsedExpression{.invalid = "expected `}` or fields in table literal"},
} }
} }
}, },
.symbol_minus => { .symbol_minus => {
if (!tokenizer.step()) { if (!tokenizer.step(.ignore_newlines)) {
return ParsedExpression{.invalid = "expected expression after numeric negation (`-`)"}; return ParsedExpression{.invalid = "expected expression after numeric negation (`-`)"};
} }
@ -431,7 +457,7 @@ pub const ParsedExpression = union (enum) {
}, },
.symbol_bang => { .symbol_bang => {
if (!tokenizer.step()) { if (!tokenizer.step(.ignore_newlines)) {
return ParsedExpression{.invalid = "expected expression after boolean negation (`!`)"}; return ParsedExpression{.invalid = "expected expression after boolean negation (`!`)"};
} }
@ -470,7 +496,7 @@ pub const ParsedExpression = union (enum) {
}; };
if (tokenizer.current_token == .symbol_asterisk) { if (tokenizer.current_token == .symbol_asterisk) {
if (!tokenizer.step()) { if (!tokenizer.step(.ignore_newlines)) {
return ParsedExpression{.invalid = "expected right-hand side of expression after `*`"}; return ParsedExpression{.invalid = "expected right-hand side of expression after `*`"};
} }
@ -492,7 +518,7 @@ pub const ParsedExpression = union (enum) {
} }
if (tokenizer.current_token == .symbol_forward_slash) { if (tokenizer.current_token == .symbol_forward_slash) {
if (!tokenizer.step()) { if (!tokenizer.step(.ignore_newlines)) {
return ParsedExpression{.invalid = "expected right-hand side of expression after `/`"}; return ParsedExpression{.invalid = "expected right-hand side of expression after `/`"};
} }
@ -540,16 +566,14 @@ pub const ParsedStatements = union (enum) {
statements_list.deinit(allocator); statements_list.deinit(allocator);
}; };
while (tokenizer.step()) { while (tokenizer.step(.ignore_newlines)) {
switch (tokenizer.current_token) { switch (tokenizer.current_token) {
.newline => {},
.keyword_return => { .keyword_return => {
if (has_returned) { if (has_returned) {
return ParsedStatements{.invalid = "cannot return more than once per function scope"}; return ParsedStatements{.invalid = "cannot return more than once per function scope"};
} }
if (tokenizer.step() and (tokenizer.current_token != .newline)) { if (tokenizer.step(.include_newlines) and (tokenizer.current_token != .newline)) {
var parsed_expression = try ParsedExpression.init(allocator, tokenizer); var parsed_expression = try ParsedExpression.init(allocator, tokenizer);
switch (parsed_expression) { switch (parsed_expression) {
@ -569,16 +593,14 @@ pub const ParsedStatements = union (enum) {
try statements_list.push_one(allocator, .return_nothing); try statements_list.push_one(allocator, .return_nothing);
} }
if (tokenizer.step() and tokenizer.current_token != .newline) { if (tokenizer.step(.ignore_newlines) and tokenizer.current_token != .newline) {
return ParsedStatements{.invalid = "expected newline after expression"}; return ParsedStatements{.invalid = "expected newline"};
} }
has_returned = true; has_returned = true;
}, },
else => { else => return ParsedStatements{.invalid = "invalid statement"},
return ParsedStatements{.invalid = "invalid statement"};
},
} }
} }
@ -599,6 +621,7 @@ pub const Expression = union (enum) {
integer_literal: types.Integer, integer_literal: types.Integer,
float_literal: types.Float, float_literal: types.Float,
string_literal: []const u8, string_literal: []const u8,
array_literal: ArrayElements,
table_literal: TableFields, table_literal: TableFields,
grouped_expression: *Expression, grouped_expression: *Expression,
@ -617,13 +640,20 @@ pub const Expression = union (enum) {
switch (self.*) { switch (self.*) {
.nil_literal, .true_literal, .false_literal, .integer_literal, .float_literal, .string_literal => {}, .nil_literal, .true_literal, .false_literal, .integer_literal, .float_literal, .string_literal => {},
.table_literal => |*literal| { .array_literal => |*elements| {
for (literal.values) |field| { for (elements.values) |*element_expression| {
field.expression.deinit(allocator); element_expression.deinit(allocator);
coral.io.deallocate(allocator, field.expression);
} }
literal.deinit(allocator); elements.deinit(allocator);
},
.table_literal => |*fields| {
for (fields.values) |*field| {
field.expression.deinit(allocator);
}
fields.deinit(allocator);
}, },
.grouped_expression => |expression| { .grouped_expression => |expression| {
@ -716,7 +746,7 @@ pub const Statements = struct {
const TableFields = coral.list.Stack(struct { const TableFields = coral.list.Stack(struct {
identifier: []const u8, identifier: []const u8,
expression: *Expression, expression: Expression,
}); });
pub const UnaryOperation = enum { pub const UnaryOperation = enum {

View File

@ -43,7 +43,6 @@ pub const Token = union(enum) {
pub fn text(self: Token) []const u8 { pub fn text(self: Token) []const u8 {
return switch (self) { return switch (self) {
.unknown => |unknown| @ptrCast([*]const u8, &unknown)[0 .. 1], .unknown => |unknown| @ptrCast([*]const u8, &unknown)[0 .. 1],
.newline => "newline",
.identifier_global => |identifier| identifier, .identifier_global => |identifier| identifier,
.identifier_local => |identifier| identifier, .identifier_local => |identifier| identifier,
@ -83,22 +82,18 @@ pub const Tokenizer = struct {
previous_token: Token = .newline, previous_token: Token = .newline,
current_token: Token = .newline, current_token: Token = .newline,
pub fn has_next(self: Tokenizer) bool { pub fn step(self: *Tokenizer, newline_rules: enum { ignore_newlines, include_newlines }) bool {
return self.source.len != 0;
}
pub fn step(self: *Tokenizer) bool {
self.previous_token = self.current_token; self.previous_token = self.current_token;
var cursor = @as(usize, 0); var cursor = @as(usize, 0);
defer self.source = self.source[cursor ..]; defer self.source = self.source[cursor ..];
while (self.has_next()) switch (self.source[cursor]) { while (cursor < self.source.len) switch (self.source[cursor]) {
'#' => { '#' => {
cursor += 1; cursor += 1;
while (self.has_next() and (self.source[cursor] == '\n')) { while (cursor < self.source.len and self.source[cursor] == '\n') {
cursor += 1; cursor += 1;
} }
}, },
@ -107,10 +102,17 @@ pub const Tokenizer = struct {
'\n' => { '\n' => {
cursor += 1; cursor += 1;
self.lines_stepped += 1;
self.current_token = .newline;
return true; switch (newline_rules) {
.include_newlines => {
self.lines_stepped += 1;
self.current_token = .newline;
return true;
},
else => {},
}
}, },
'0' ... '9' => { '0' ... '9' => {
@ -118,13 +120,13 @@ pub const Tokenizer = struct {
cursor += 1; cursor += 1;
while (self.has_next()) switch (self.source[cursor]) { while (cursor < self.source.len) switch (self.source[cursor]) {
'0' ... '9' => cursor += 1, '0' ... '9' => cursor += 1,
'.' => { '.' => {
cursor += 1; cursor += 1;
while (self.has_next()) switch (self.source[cursor]) { while (cursor < self.source.len) switch (self.source[cursor]) {
'0' ... '9' => cursor += 1, '0' ... '9' => cursor += 1,
else => break, else => break,
}; };
@ -159,40 +161,52 @@ pub const Tokenizer = struct {
switch (identifier[0]) { switch (identifier[0]) {
'n' => if (coral.io.ends_with(identifier, "il")) { 'n' => if (coral.io.ends_with(identifier, "il")) {
self.current_token = .keyword_nil; self.current_token = .keyword_nil;
return true;
}, },
'f' => if (coral.io.ends_with(identifier, "alse")) { 'f' => if (coral.io.ends_with(identifier, "alse")) {
self.current_token = .keyword_false; self.current_token = .keyword_false;
return true;
}, },
't' => if (coral.io.ends_with(identifier, "rue")) { 't' => if (coral.io.ends_with(identifier, "rue")) {
self.current_token = .keyword_true; self.current_token = .keyword_true;
return true;
}, },
'r' => if (coral.io.ends_with(identifier, "eturn")) { 'r' => if (coral.io.ends_with(identifier, "eturn")) {
self.current_token = .keyword_return; self.current_token = .keyword_return;
return true;
}, },
's' => if (coral.io.ends_with(identifier, "elf")) { 's' => if (coral.io.ends_with(identifier, "elf")) {
self.current_token = .keyword_self; self.current_token = .keyword_self;
return true;
}, },
else => self.current_token = .{.local = identifier}, else => {},
} }
self.current_token = .{.local = identifier};
return true; return true;
}, },
'@' => { '@' => {
cursor += 1; cursor += 1;
if (self.has_next()) switch (self.source[cursor]) { if (cursor < self.source.len) switch (self.source[cursor]) {
'A'...'Z', 'a'...'z', '_' => { 'A'...'Z', 'a'...'z', '_' => {
const begin = cursor; const begin = cursor;
cursor += 1; cursor += 1;
while (self.has_next()) switch (self.source[cursor]) { while (cursor < self.source.len) switch (self.source[cursor]) {
'0'...'9', 'A'...'Z', 'a'...'z', '_' => cursor += 1, '0'...'9', 'A'...'Z', 'a'...'z', '_' => cursor += 1,
else => break, else => break,
}; };
@ -209,7 +223,7 @@ pub const Tokenizer = struct {
cursor += 1; cursor += 1;
while (self.has_next()) switch (self.source[cursor]) { while (cursor < self.source.len) switch (self.source[cursor]) {
'"' => break, '"' => break,
else => cursor += 1, else => cursor += 1,
}; };
@ -235,7 +249,7 @@ pub const Tokenizer = struct {
cursor += 1; cursor += 1;
while (self.has_next()) switch (self.source[cursor]) { while (cursor < self.source.len) switch (self.source[cursor]) {
'"' => break, '"' => break,
else => cursor += 1, else => cursor += 1,
}; };
@ -319,7 +333,7 @@ pub const Tokenizer = struct {
'=' => { '=' => {
cursor += 1; cursor += 1;
if (self.has_next()) { if (cursor < self.source.len) {
switch (self.source[cursor]) { switch (self.source[cursor]) {
'=' => { '=' => {
cursor += 1; cursor += 1;
@ -347,7 +361,7 @@ pub const Tokenizer = struct {
'<' => { '<' => {
cursor += 1; cursor += 1;
if (self.has_next() and (self.source[cursor] == '=')) { if (cursor < self.source.len and (self.source[cursor] == '=')) {
cursor += 1; cursor += 1;
self.current_token = .symbol_less_equals; self.current_token = .symbol_less_equals;
@ -362,7 +376,7 @@ pub const Tokenizer = struct {
'>' => { '>' => {
cursor += 1; cursor += 1;
if (self.has_next() and (self.source[cursor] == '=')) { if (cursor < self.source.len and (self.source[cursor] == '=')) {
cursor += 1; cursor += 1;
self.current_token = .symbol_greater_equals; self.current_token = .symbol_greater_equals;