From 0974cb016b8af3665be9f999c1541df48f0d165d Mon Sep 17 00:00:00 2001 From: kayomn Date: Sun, 23 Apr 2023 15:53:50 +0100 Subject: [PATCH] Tidy up Kym implementation --- source/kym/bytecode.zig | 1289 ++++++++++++++++++++++----------------- source/kym/kym.zig | 5 +- source/kym/tokens.zig | 35 +- 3 files changed, 766 insertions(+), 563 deletions(-) mode change 100644 => 100755 source/kym/bytecode.zig diff --git a/source/kym/bytecode.zig b/source/kym/bytecode.zig old mode 100644 new mode 100755 index 1b7324a..446c035 --- a/source/kym/bytecode.zig +++ b/source/kym/bytecode.zig @@ -1,552 +1,737 @@ -const coral = @import("coral"); - -const tokens = @import("./tokens.zig"); - -pub const Chunk = struct { - constant_buffer: Buffer, - bytecode_buffer: Buffer, - constants: Constants, - - const Buffer = coral.stack.Dense(u8); - - const Constants = coral.stack.Dense(Constant); - - pub fn compile(self: *Chunk, script: []const u8) !void { - self.reset(); - - var tokenizer = tokens.Tokenizer{.source = script}; - - var parser = Parser{ - .chunk = self, - .tokenizer = &tokenizer, - }; - - errdefer self.reset(); - - try parser.parse_statement(); - } - - pub fn deinit(self: *Chunk) void { - self.bytecode_buffer.deinit(); - self.constant_buffer.deinit(); - self.constants.deinit(); - } - - pub fn emit_byte(self: *Chunk, byte: u8) !void { - return self.bytecode_buffer.push_one(byte); - } - - pub fn emit_opcode(self: *Chunk, opcode: Opcode) !void { - return self.bytecode_buffer.push_one(@enumToInt(opcode)); - } - - pub fn emit_operand(self: *Chunk, operand: Operand) !void { - return self.bytecode_buffer.push_all(coral.io.bytes_of(&operand)); - } - - pub fn intern_string(self: *Chunk, string: []const u8) !u64 { - var constant_slot = @as(u64, 0); - - for (self.constants.values) |interned_constant| { - switch (interned_constant) { - .string => |interned_string| if (coral.io.equals(interned_string, string)) return constant_slot, - } - - constant_slot += 1; - } - - const constant_allocator = coral.stack.as_dense_allocator(&self.constant_buffer); - const allocation = constant_allocator.allocate_many(u8, string.len + 1) orelse return error.OutOfMemory; - - errdefer constant_allocator.deallocate(allocation); - - // Zero-terminate string. - allocation[string.len] = 0; - - // Write string contents. - { - const allocated_string = allocation[0 .. string.len]; - - coral.io.copy(allocated_string, string); - try self.constants.push_one(.{.string = @ptrCast([:0]u8, allocated_string)}); - } - - return constant_slot; - } - - pub fn fetch_byte(self: Chunk, cursor: *usize) ?u8 { - if (cursor.* >= self.bytecode_buffer.values.len) return null; - - defer cursor.* += 1; - - return self.bytecode_buffer.values[cursor.*]; - } - - pub fn fetch_constant(self: Chunk, cursor: *usize) ?*Constant { - return &self.constants.values[self.fetch_operand(cursor) orelse return null]; - } - - pub fn fetch_opcode(self: Chunk, cursor: *usize) ?Opcode { - return @intToEnum(Opcode, self.fetch_byte(cursor) orelse return null); - } - - pub fn fetch_operand(self: Chunk, cursor: *usize) ?Operand { - const operand_size = @sizeOf(Operand); - const updated_cursor = cursor.* + operand_size; - - if (updated_cursor > self.bytecode_buffer.values.len) return null; - - var operand_bytes align(@alignOf(Operand)) = [_]u8{0} ** operand_size; - - coral.io.copy(&operand_bytes, self.bytecode_buffer.values[cursor.* .. updated_cursor]); - - cursor.* = updated_cursor; - - return @bitCast(Operand, operand_bytes); - } - - pub fn init(allocator: coral.io.MemoryAllocator) !Chunk { - const page_size = 1024; - var constant_buffer = try Buffer.init(allocator, page_size); - - errdefer constant_buffer.deinit(); - - const assumed_average_bytecode_size = 1024; - var bytecode_buffer = try Buffer.init(allocator, assumed_average_bytecode_size); - - errdefer bytecode_buffer.deinit(); - - const assumed_average_constant_count = 512; - var constants = try Constants.init(allocator, assumed_average_constant_count); - - errdefer constants.deinit(); - - return Chunk{ - .constant_buffer = constant_buffer, - .bytecode_buffer = bytecode_buffer, - .constants = constants, - }; - } - - pub fn reset(self: *Chunk) void { - self.bytecode_buffer.clear(); - self.constant_buffer.clear(); - } -}; - -pub const Constant = union (enum) { - string: [:0]u8, -}; - -pub const Opcode = enum(u8) { - push_nil, - push_true, - push_false, - push_zero, - push_integer, - push_float, - push_string, - push_array, - push_table, - - not, - neg, - add, - sub, - div, - mul, - - call, - get_field, - set_field, - get_x, - set_x, - get_y, - set_y, - get_global, - set_global, - get_local, -}; - -pub const Operand = u64; - -const ParseError = SyntaxError || error{ - OutOfMemory, -}; - -const Parser = struct { - tokenizer: *tokens.Tokenizer, - scope_depth: u16 = 0, - chunk: *Chunk, - locals: SmallStack(Local, Local.empty) = .{}, - - const Local = struct { - name: []const u8, - depth: u16, - - const empty = Local{ .name = "", .depth = 0 }; - }; - - const Operations = SmallStack(Operator, .not); - - const Operator = enum { - not, - negate, - add, - subtract, - divide, - multiply, - - fn opcode(self: Operator) Opcode { - return switch (self) { - .not => .not, - .negate => .neg, - .add => .add, - .subtract => .sub, - .multiply => .mul, - .divide => .div, - }; - } - - fn precedence(self: Operator) isize { - return switch (self) { - .not => 13, - .negate => 13, - .add => 11, - .subtract => 11, - .divide => 12, - .multiply => 12, - }; - } - }; - - fn declare_local(self: *Parser, name: []const u8) !void { - return self.locals.push(.{ - .name = name, - .depth = self.scope_depth, - }); - } - - fn error_unexpected_end(self: *Parser) SyntaxError { - _ = self; - - return error.BadSyntax; - } - - fn error_unexpected_token(self: *Parser, token: tokens.Token) SyntaxError { - _ = self; - _ = token; - // _ = self.error_writer.write("unexpected token `") catch {}; - // _ = self.error_writer.write(token.text()) catch {}; - // _ = self.error_writer.write("`") catch {}; - - return error.BadSyntax; - } - - fn error_integer_overflow(self: *Parser, integer_literal: []const u8) SyntaxError { - // TODO: Implement. - _ = self; - _ = integer_literal; - - return error.BadSyntax; - } - - pub fn parse_expression(self: *Parser, initial_token: tokens.Token) ParseError!void { - var operations = Operations{}; - var previous_token = initial_token; - - while (self.tokenizer.next()) |current_token| { - switch (current_token) { - .newline => { - previous_token = current_token; - - break; - }, - - else => previous_token = try self.parse_operation(&operations, previous_token, current_token), - } - } - - while (operations.pop()) |operator| try self.chunk.emit_opcode(operator.opcode()); - } - - fn parse_arguments(self: *Parser) ParseError!tokens.Token { - var operations = Operations{}; - var previous_token = @as(tokens.Token, .symbol_paren_left); - var argument_count = @as(Operand, 0); - - while (self.tokenizer.next()) |current_token| { - switch (current_token) { - .symbol_paren_right => { - while (operations.pop()) |operator| try self.chunk.emit_opcode(operator.opcode()); - - try self.chunk.emit_opcode(.call); - try self.chunk.emit_operand(argument_count); - - return .symbol_paren_right; - }, - - .symbol_comma => { - while (operations.pop()) |operator| try self.chunk.emit_opcode(operator.opcode()); - - previous_token = current_token; - - argument_count += 1; - }, - - else => previous_token = try self.parse_operation(&operations, previous_token, current_token), - } - } - - - - return previous_token; - } - - fn parse_group(_: *Parser) ParseError!tokens.Token { - return error.BadSyntax; - } - - pub fn parse_operation(self: *Parser, operations: *Operations, - previous_token: tokens.Token, current_token: tokens.Token) ParseError!tokens.Token { - - switch (current_token) { - .integer_literal => |literal| { - const value = coral.utf8.parse_signed(@bitSizeOf(i64), literal) catch |err| switch (err) { - error.BadSyntax => unreachable, - error.IntOverflow => return self.error_integer_overflow(literal), - }; - - if (value == 0) { - try self.chunk.emit_opcode(.push_zero); - } else { - try self.chunk.emit_opcode(.push_integer); - try self.chunk.emit_operand(@bitCast(u64, value)); - } - }, - - .real_literal => |literal| { - try self.chunk.emit_operand(@bitCast(u64, coral.utf8.parse_float(@bitSizeOf(f64), literal) catch |err| { - switch (err) { - // Already validated to be a real by the tokenizer so this cannot fail, as real syntax is a - // subset of float syntax. - error.BadSyntax => unreachable, - } - })); - }, - - .string_literal => |literal| { - try self.chunk.emit_opcode(.push_string); - try self.chunk.emit_operand(try self.chunk.intern_string(literal)); - }, - - .global_identifier => |identifier| { - try self.chunk.emit_opcode(.get_global); - try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); - }, - - .local_identifier => |identifier| { - if (self.resolve_local(identifier)) |local| { - try self.chunk.emit_opcode(.get_local); - try self.chunk.emit_byte(local); - } else { - try self.chunk.emit_opcode(.push_nil); - } - }, - - .symbol_bang => try operations.push(.not), - - .symbol_plus => while (operations.pop()) |operator| { - if (Operator.add.precedence() < operator.precedence()) break try operations.push(operator); - - try self.chunk.emit_opcode(operator.opcode()); - }, - - .symbol_dash => while (operations.pop()) |operator| { - if (Operator.subtract.precedence() < operator.precedence()) break try operations.push(operator); - - try self.chunk.emit_opcode(operator.opcode()); - }, - - .symbol_asterisk => while (operations.pop()) |operator| { - if (Operator.multiply.precedence() < operator.precedence()) break try operations.push(operator); - - try self.chunk.emit_opcode(operator.opcode()); - }, - - .symbol_forward_slash => while (operations.pop()) |operator| { - if (Operator.divide.precedence() < operator.precedence()) break try operations.push(operator); - - try self.chunk.emit_opcode(operator.opcode()); - }, - - .symbol_period => { - const field_token = self.tokenizer.next() orelse return self.error_unexpected_end(); - - switch (field_token) { - .local_identifier => |identifier| { - try self.chunk.emit_opcode(.get_field); - try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); - - return field_token; - }, - - else => return self.error_unexpected_token(field_token), - } - }, - - .symbol_paren_left => return try switch (previous_token) { - .local_identifier, .global_identifier => self.parse_arguments(), - else => self.parse_group(), - }, - - .symbol_brace_left => { - try self.parse_table(); - - switch (previous_token) { - .local_identifier, .global_identifier => { - // Created as function argument. - try self.chunk.emit_opcode(.call); - try self.chunk.emit_operand(1); - }, - - else => {}, - } - - return .symbol_brace_right; - }, - else => return self.error_unexpected_token(current_token), - } - - return current_token; - } - - pub fn parse_statement(self: *Parser) ParseError!void { - // TODO: Implement. - return self.error_unexpected_end(); - } - - fn parse_table(self: *Parser) ParseError!void { - var field_count = @as(Operand, 0); - - while (self.tokenizer.next()) |field_token| { - switch (field_token) { - .newline => {}, - - .local_identifier => |field_identifier| { - const operation_token = self.tokenizer.next() orelse return self.error_unexpected_end(); - const interned_identifier = try self.chunk.intern_string(field_identifier); - - field_count += 1; - - switch (operation_token) { - .symbol_assign => { - var operations = Operations{}; - var previous_token = @as(tokens.Token, .symbol_assign); - - while (self.tokenizer.next()) |token| : (previous_token = token) switch (token) { - .newline => {}, - .symbol_comma => break, - - .symbol_brace_right => { - try self.chunk.emit_opcode(.push_string); - try self.chunk.emit_operand(interned_identifier); - try self.chunk.emit_opcode(.push_table); - try self.chunk.emit_operand(field_count); - - return; - }, - - else => previous_token = try self.parse_operation(&operations, previous_token, token), - }; - - while (operations.pop()) |operator| try self.chunk.emit_opcode(operator.opcode()); - - try self.chunk.emit_opcode(.push_string); - try self.chunk.emit_operand(interned_identifier); - }, - - .symbol_comma => { - try self.chunk.emit_opcode(.push_string); - try self.chunk.emit_operand(interned_identifier); - }, - - .symbol_brace_right => { - try self.chunk.emit_opcode(.push_string); - try self.chunk.emit_operand(interned_identifier); - try self.chunk.emit_opcode(.push_table); - try self.chunk.emit_operand(field_count); - - return; - }, - - else => return self.error_unexpected_token(operation_token), - } - }, - - .symbol_brace_right => { - try self.chunk.emit_opcode(.push_table); - try self.chunk.emit_operand(field_count); - - return; - }, - - else => return self.error_unexpected_token(field_token), - } - } - - return self.error_unexpected_end(); - } - - fn resolve_local(self: *Parser, name: []const u8) ?u8 { - var count = @as(u8, self.locals.buffer.len); - - while (count != 0) { - const index = count - 1; - - if (coral.io.equals(name, self.locals.buffer[index].name)) return index; - - count = index; - } - - return null; - } -}; - -fn SmallStack(comptime Element: type, comptime default: Element) type { - const maximum = 255; - - return struct { - buffer: [maximum]Element = [_]Element{default} ** maximum, - count: u8 = 0, - - const Self = @This(); - - fn peek(self: Self) ?Element { - if (self.count == 0) return null; - - return self.buffer[self.count - 1]; - } - - fn pop(self: *Self) ?Element { - if (self.count == 0) return null; - - self.count -= 1; - - return self.buffer[self.count]; - } - - fn push(self: *Self, local: Element) !void { - if (self.count == maximum) return error.OutOfMemory; - - self.buffer[self.count] = local; - self.count += 1; - } - }; -} - -const SymbolTable = coral.table.Hashed(coral.table.string_key, usize); - -const SyntaxError = error{ - BadSyntax, -}; +const coral = @import("coral"); + +const tokens = @import("./tokens.zig"); + +pub const Chunk = struct { + constant_buffer: Buffer, + bytecode_buffer: Buffer, + constants: Constants, + locals: SmallStack(Local, .{.name = "", .depth = 0}) = .{}, + + const Buffer = coral.stack.Dense(u8); + + const Constants = coral.stack.Dense(Constant); + + const Local = struct { + name: []const u8, + depth: u16, + + const empty = Local{ .name = "", .depth = 0 }; + }; + + pub fn compile(self: *Chunk, script: []const u8) ParseError!void { + self.reset(); + + var tokenizer = tokens.Tokenizer{.source = script}; + + errdefer self.reset(); + + var parser = Parser{ + .chunk = self, + .tokenizer = &tokenizer, + }; + + while (true) { + parser.step() catch |step_error| switch (step_error) { + error.UnexpectedEnd => return, + }; + + try parser.parse_statement(); + } + } + + fn declare_local(self: *Chunk, name: []const u8) !void { + return self.locals.push(.{ + .name = name, + .depth = 0, + }); + } + + pub fn deinit(self: *Chunk) void { + self.bytecode_buffer.deinit(); + self.constant_buffer.deinit(); + self.constants.deinit(); + } + + fn emit_byte(self: *Chunk, byte: u8) !void { + return self.bytecode_buffer.push_one(byte); + } + + fn emit_opcode(self: *Chunk, opcode: Opcode) !void { + return self.bytecode_buffer.push_one(@enumToInt(opcode)); + } + + fn emit_operand(self: *Chunk, operand: Operand) !void { + return self.bytecode_buffer.push_all(coral.io.bytes_of(&operand)); + } + + pub fn fetch_byte(self: Chunk, cursor: *usize) ?u8 { + if (cursor.* >= self.bytecode_buffer.values.len) return null; + + defer cursor.* += 1; + + return self.bytecode_buffer.values[cursor.*]; + } + + pub fn fetch_constant(self: Chunk, cursor: *usize) ?*Constant { + return &self.constants.values[self.fetch_operand(cursor) orelse return null]; + } + + pub fn fetch_opcode(self: Chunk, cursor: *usize) ?Opcode { + return @intToEnum(Opcode, self.fetch_byte(cursor) orelse return null); + } + + pub fn fetch_operand(self: Chunk, cursor: *usize) ?Operand { + const operand_size = @sizeOf(Operand); + const updated_cursor = cursor.* + operand_size; + + if (updated_cursor > self.bytecode_buffer.values.len) return null; + + var operand_bytes align(@alignOf(Operand)) = [_]u8{0} ** operand_size; + + coral.io.copy(&operand_bytes, self.bytecode_buffer.values[cursor.* .. updated_cursor]); + + cursor.* = updated_cursor; + + return @bitCast(Operand, operand_bytes); + } + + pub fn init(allocator: coral.io.MemoryAllocator) !Chunk { + const page_size = 1024; + var constant_buffer = try Buffer.init(allocator, page_size); + + errdefer constant_buffer.deinit(); + + const assumed_average_bytecode_size = 1024; + var bytecode_buffer = try Buffer.init(allocator, assumed_average_bytecode_size); + + errdefer bytecode_buffer.deinit(); + + const assumed_average_constant_count = 512; + var constants = try Constants.init(allocator, assumed_average_constant_count); + + errdefer constants.deinit(); + + return Chunk{ + .constant_buffer = constant_buffer, + .bytecode_buffer = bytecode_buffer, + .constants = constants, + }; + } + + fn intern_string(self: *Chunk, string: []const u8) !u64 { + var constant_slot = @as(u64, 0); + + for (self.constants.values) |interned_constant| { + switch (interned_constant) { + .string => |interned_string| if (coral.io.equals(interned_string, string)) return constant_slot, + } + + constant_slot += 1; + } + + const constant_allocator = coral.stack.as_dense_allocator(&self.constant_buffer); + const allocation = constant_allocator.allocate_many(u8, string.len + 1) orelse return error.OutOfMemory; + + errdefer constant_allocator.deallocate(allocation); + + // Zero-terminate string. + allocation[string.len] = 0; + + // Write string contents. + { + const allocated_string = allocation[0 .. string.len]; + + coral.io.copy(allocated_string, string); + try self.constants.push_one(.{.string = @ptrCast([:0]u8, allocated_string)}); + } + + return constant_slot; + } + + pub fn reset(self: *Chunk) void { + self.bytecode_buffer.clear(); + self.constant_buffer.clear(); + } + + pub fn resolve_local(self: *Chunk, name: []const u8) ?u8 { + var count = @as(u8, self.locals.buffer.len); + + while (count != 0) { + const index = count - 1; + + if (coral.io.equals(name, self.locals.buffer[index].name)) return index; + + count = index; + } + + return null; + } +}; + +pub const Constant = union (enum) { + string: [:0]u8, +}; + +pub const Opcode = enum(u8) { + pop, + push_nil, + push_true, + push_false, + push_zero, + push_integer, + push_float, + push_string, + push_array, + push_table, + + not, + neg, + add, + sub, + div, + mul, + + call, + get_index, + set_index, + get_x, + set_x, + get_y, + set_y, + get_global, + set_global, + get_local, + set_local, +}; + +pub const Operand = u64; + +pub const ParseError = Parser.StepError || tokens.Token.ExpectError || error { + OutOfMemory, + IntOverflow, + UndefinedLocal, +}; + +const Parser = struct { + chunk: *Chunk, + tokenizer: *tokens.Tokenizer, + current_token: tokens.Token = .newline, + previous_token: tokens.Token = .newline, + + const Operator = enum { + not, + negate, + add, + subtract, + divide, + multiply, + + const Self = @This(); + + fn opcode(self: Self) Opcode { + return switch (self) { + .not => .not, + .negate => .neg, + .add => .add, + .subtract => .sub, + .multiply => .mul, + .divide => .div, + }; + } + + fn precedence(self: Self) isize { + return switch (self) { + .not => 13, + .negate => 13, + .add => 11, + .subtract => 11, + .divide => 12, + .multiply => 12, + }; + } + }; + + const OperatorStack = SmallStack(Operator, .not); + + const StepError = error { + UnexpectedEnd, + }; + + const operator_tokens = &.{.symbol_assign, .symbol_plus, .symbol_dash, .symbol_asterisk, .symbol_forward_slash}; + + fn parse_expression(self: *Parser) ParseError!void { + var operators = OperatorStack{}; + var local_depth = @as(usize, 0); + + while (true) { + switch (self.current_token) { + .keyword_nil => { + try self.previous_token.expect_any(operator_tokens); + try self.chunk.emit_opcode(.push_nil); + + self.step() catch |step_error| switch (step_error) { + error.UnexpectedEnd => return, + }; + }, + + .keyword_true => { + try self.previous_token.expect_any(operator_tokens); + try self.chunk.emit_opcode(.push_true); + + self.step() catch |step_error| switch (step_error) { + error.UnexpectedEnd => return, + }; + }, + + .keyword_false => { + try self.previous_token.expect_any(operator_tokens); + try self.chunk.emit_opcode(.push_false); + + self.step() catch |step_error| switch (step_error) { + error.UnexpectedEnd => return, + }; + }, + + .integer_literal => |literal| { + try self.previous_token.expect_any(operator_tokens); + + const value = coral.utf8.parse_signed(@bitSizeOf(i64), literal) + catch |parse_error| switch (parse_error) { + error.BadSyntax => unreachable, + error.IntOverflow => return error.IntOverflow, + }; + + if (value == 0) { + try self.chunk.emit_opcode(.push_zero); + } else { + try self.chunk.emit_opcode(.push_integer); + try self.chunk.emit_operand(@bitCast(u64, value)); + } + + try self.step(); + }, + + .real_literal => |literal| { + try self.previous_token.expect_any(operator_tokens); + + try self.chunk.emit_operand(@bitCast(u64, coral.utf8.parse_float(@bitSizeOf(f64), literal) + catch |parse_error| switch (parse_error) { + // Already validated to be a real by the tokenizer so this cannot fail, as real syntax is a + // subset of float syntax. + error.BadSyntax => unreachable, + })); + + try self.step(); + }, + + .string_literal => |literal| { + try self.previous_token.expect_any(operator_tokens); + try self.chunk.emit_opcode(.push_string); + try self.chunk.emit_operand(try self.chunk.intern_string(literal)); + try self.step(); + }, + + .global_identifier, .local_identifier => { + try self.previous_token.expect_any(&.{.symbol_assign, .symbol_plus, + .symbol_dash, .symbol_asterisk, .symbol_forward_slash, .symbol_period}); + + try self.step(); + }, + + .symbol_bang => { + try self.previous_token.expect_any(operator_tokens); + try operators.push(.not); + try self.step(); + + local_depth = 0; + }, + + .symbol_plus => { + try self.parse_operator(&operators, .add); + + local_depth = 0; + }, + + .symbol_dash => { + try self.parse_operator(&operators, .subtract); + + local_depth = 0; + }, + + .symbol_asterisk => { + try self.parse_operator(&operators, .multiply); + + local_depth = 0; + }, + + .symbol_forward_slash => { + try self.parse_operator(&operators, .divide); + + local_depth = 0; + }, + + .symbol_period => { + switch (self.previous_token) { + .global_identifier => |identifier| { + try self.chunk.emit_opcode(.get_global); + try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); + }, + + .local_identifier => |identifier| { + if (local_depth == 0) { + try self.chunk.emit_byte(self.chunk.resolve_local(identifier) orelse { + return error.UndefinedLocal; + }); + } else { + try self.chunk.emit_opcode(.get_index); + try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); + } + }, + + else => return error.UnexpectedToken, + } + + try self.step(); + + local_depth += 1; + }, + + .symbol_paren_left => { + switch (self.previous_token) { + .local_identifier => |identifier| { + if (local_depth == 0) { + try self.chunk.emit_byte(self.chunk.resolve_local(identifier) orelse { + return error.UndefinedLocal; + }); + } else { + try self.chunk.emit_opcode(.get_index); + try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); + } + }, + + .global_identifier => |identifier| { + try self.chunk.emit_opcode(.get_global); + try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); + }, + + else => { + try self.parse_expression(); + try self.previous_token.expect(.symbol_paren_right); + try self.step(); + + local_depth = 0; + + continue; + }, + } + + local_depth += 1; + + var argument_count = @as(Operand, 0); + + while (true) { + try self.step(); + + try switch (self.current_token) { + .symbol_paren_right => break, + else => self.parse_expression(), + }; + + switch (self.previous_token) { + .symbol_paren_right => break, + .symbol_comma => {}, + else => return error.UnexpectedToken, + } + + argument_count += 1; + } + + try self.chunk.emit_opcode(.call); + try self.chunk.emit_operand(argument_count); + try self.step(); + + local_depth = 0; + }, + + .symbol_brace_left => { + const is_call_argument = switch (self.previous_token) { + .local_identifier, .global_identifier => true, + else => false, + }; + + var field_count = @as(Operand, 0); + + while (true) { + try self.step(); + + switch (self.current_token) { + .newline => {}, + + .local_identifier => { + // Create local copy of identifier because step() will overwrite captures. + const interned_identifier = + try self.chunk.intern_string(self.current_token.local_identifier); + + try self.chunk.emit_opcode(.push_string); + try self.chunk.emit_operand(interned_identifier); + try self.step(); + + switch (self.current_token) { + .symbol_assign => { + try self.parse_expression(); + + field_count += 1; + }, + + .symbol_brace_right => { + try self.chunk.emit_opcode(.push_string); + try self.chunk.emit_operand(interned_identifier); + + field_count += 1; + + break; + }, + + .symbol_comma => { + try self.chunk.emit_opcode(.push_string); + try self.chunk.emit_operand(interned_identifier); + + field_count += 1; + }, + + else => return error.UnexpectedToken, + } + }, + + .symbol_brace_right => break, + else => return error.UnexpectedToken, + } + } + + if (is_call_argument) { + try self.chunk.emit_opcode(.call); + try self.chunk.emit_operand(1); + } + }, + + else => { + try self.previous_token.expect_any(&.{.keyword_nil, .keyword_true, .keyword_false, .integer_literal, + .real_literal, .string_literal, .global_identifier, .local_identifier, .symbol_brace_right, + .symbol_paren_right}); + + while (operators.pop()) |operator| { + try self.chunk.emit_opcode(operator.opcode()); + } + + return; + }, + } + } + } + + fn parse_operator(self: *Parser, operators: *OperatorStack, rhs_operator: Operator) ParseError!void { + try self.previous_token.expect_any(operator_tokens); + + while (operators.pop()) |lhs_operator| { + if (rhs_operator.precedence() < lhs_operator.precedence()) break try operators.push(lhs_operator); + + try self.chunk.emit_opcode(lhs_operator.opcode()); + } + + try operators.push(rhs_operator); + try self.step(); + } + + fn parse_statement(self: *Parser) ParseError!void { + var local_depth = @as(usize, 0); + + while (true) { + switch (self.current_token) { + .newline => self.step() catch |step_error| switch (step_error) { + error.UnexpectedEnd => return, + }, + + .keyword_return => { + try self.previous_token.expect(.newline); + + self.step() catch |step_error| switch (step_error) { + error.UnexpectedEnd => return, + }; + + try self.parse_expression(); + + while (true) { + self.step() catch |step_error| switch (step_error) { + error.UnexpectedEnd => return, + }; + + try self.current_token.expect(.newline); + } + }, + + .local_identifier => { + try self.previous_token.expect_any(&.{.newline, .symbol_period}); + try self.step(); + }, + + .global_identifier => { + try self.previous_token.expect(.newline); + try self.step(); + }, + + .symbol_period => switch (self.previous_token) { + .global_identifier => { + // Create local copy of identifier because step() will overwrite captures. + const identifier = self.previous_token.local_identifier; + + try self.step(); + try self.current_token.expect(.local_identifier); + try self.chunk.emit_opcode(.get_global); + try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); + + local_depth += 1; + }, + + .local_identifier => { + // Create local copy of identifier because step() will overwrite captures. + const identifier = self.previous_token.global_identifier; + + try self.step(); + try self.current_token.expect(.local_identifier); + + if (local_depth == 0) { + try self.chunk.emit_byte(self.chunk.resolve_local(identifier) orelse { + return error.UndefinedLocal; + }); + } else { + try self.chunk.emit_opcode(.get_index); + try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); + } + + local_depth += 1; + }, + + else => return error.UnexpectedToken, + }, + + .symbol_assign => { + try self.previous_token.expect(.local_identifier); + + const identifier = self.previous_token.local_identifier; + + if (local_depth == 0) { + if (self.chunk.resolve_local(identifier)) |local_slot| { + try self.chunk.emit_opcode(.set_local); + try self.chunk.emit_byte(local_slot); + } else { + try self.chunk.declare_local(identifier); + } + } else { + try self.chunk.emit_opcode(.set_index); + try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); + } + + try self.step(); + try self.parse_expression(); + + local_depth = 0; + }, + + .symbol_paren_left => { + switch (self.previous_token) { + .local_identifier => |identifier| { + if (local_depth == 0) { + try self.chunk.emit_byte(self.chunk.resolve_local(identifier) orelse { + return error.UndefinedLocal; + }); + } else { + try self.chunk.emit_opcode(.get_index); + try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); + } + }, + + .global_identifier => |identifier| { + try self.chunk.emit_opcode(.get_global); + try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); + }, + + else => return error.UnexpectedToken, + } + + var argument_count = @as(Operand, 0); + + while (true) { + try self.step(); + + try switch (self.current_token) { + .symbol_paren_right => break, + else => self.parse_expression(), + }; + + argument_count += 1; + + switch (self.current_token) { + .symbol_paren_right => break, + .symbol_comma => {}, + else => return error.UnexpectedToken, + } + } + + try self.chunk.emit_opcode(.call); + try self.chunk.emit_operand(argument_count); + try self.chunk.emit_opcode(.pop); + + self.step() catch |step_error| switch (step_error) { + error.UnexpectedEnd => return, + }; + + local_depth = 0; + }, + + else => return error.UnexpectedToken, + } + } + } + + fn step(self: *Parser) StepError!void { + self.previous_token = self.current_token; + self.current_token = self.tokenizer.next() orelse return error.UnexpectedEnd; + + @import("std").debug.print("{s}\n", .{self.current_token.text()}); + } +}; + +fn SmallStack(comptime Element: type, comptime default: Element) type { + const maximum = 255; + + return struct { + buffer: [maximum]Element = [_]Element{default} ** maximum, + count: u8 = 0, + + const Self = @This(); + + fn peek(self: Self) ?Element { + if (self.count == 0) return null; + + return self.buffer[self.count - 1]; + } + + fn pop(self: *Self) ?Element { + if (self.count == 0) return null; + + self.count -= 1; + + return self.buffer[self.count]; + } + + fn push(self: *Self, element: Element) !void { + if (self.count == maximum) return error.OutOfMemory; + + self.buffer[self.count] = element; + self.count += 1; + } + }; +} + +const SymbolTable = coral.table.Hashed(coral.table.string_key, usize); diff --git a/source/kym/kym.zig b/source/kym/kym.zig index 730789d..f875648 100644 --- a/source/kym/kym.zig +++ b/source/kym/kym.zig @@ -144,10 +144,7 @@ pub const Vm = struct { } }, - pub const CompileError = error { - BadSyntax, - OutOfMemory, - }; + pub const CompileError = bytecode.ParseError; const HeapAllocation = union(enum) { next_free: u32, diff --git a/source/kym/tokens.zig b/source/kym/tokens.zig index aa05a70..df1d228 100644 --- a/source/kym/tokens.zig +++ b/source/kym/tokens.zig @@ -33,6 +33,22 @@ pub const Token = union(enum) { keyword_return, keyword_self, + pub const ExpectError = error { + UnexpectedToken, + }; + + pub fn expect(self: Token, tag: coral.io.Tag(Token)) ExpectError!void { + if (self != tag) return error.UnexpectedToken; + } + + pub fn expect_any(self: Token, tags: []const coral.io.Tag(Token)) ExpectError!void { + for (tags) |tag| { + if (self == tag) return; + } + + return error.UnexpectedToken; + } + pub fn text(self: Token) []const u8 { return switch (self) { .unknown => |unknown| @ptrCast([*]const u8, &unknown)[0 .. 1], @@ -64,6 +80,7 @@ pub const Token = union(enum) { .keyword_false => "false", .keyword_true => "true", .keyword_return => "return", + .keyword_self => "self", }; } }; @@ -72,8 +89,12 @@ pub const Tokenizer = struct { source: []const u8, cursor: usize = 0, + pub fn has_next(self: Tokenizer) bool { + return self.cursor < self.source.len; + } + pub fn next(self: *Tokenizer) ?Token { - while (self.cursor < self.source.len) switch (self.source[self.cursor]) { + while (self.has_next()) switch (self.source[self.cursor]) { ' ', '\t' => self.cursor += 1, '\n' => { @@ -87,13 +108,13 @@ pub const Tokenizer = struct { self.cursor += 1; - while (self.cursor < self.source.len) switch (self.source[self.cursor]) { + while (self.has_next()) switch (self.source[self.cursor]) { '0' ... '9' => self.cursor += 1, '.' => { self.cursor += 1; - while (self.cursor < self.source.len) switch (self.source[self.cursor]) { + while (self.has_next()) switch (self.source[self.cursor]) { '0' ... '9' => self.cursor += 1, else => break, }; @@ -136,13 +157,13 @@ pub const Tokenizer = struct { '@' => { self.cursor += 1; - if (self.cursor < self.source.len) switch (self.source[self.cursor]) { + if (self.has_next()) switch (self.source[self.cursor]) { 'A'...'Z', 'a'...'z', '_' => { const begin = self.cursor; self.cursor += 1; - while (self.cursor < self.source.len) switch (self.source[self.cursor]) { + while (self.has_next()) switch (self.source[self.cursor]) { '0'...'9', 'A'...'Z', 'a'...'z', '_' => self.cursor += 1, else => break, }; @@ -157,7 +178,7 @@ pub const Tokenizer = struct { self.cursor += 1; - while (self.cursor < self.source.len) switch (self.source[self.cursor]) { + while (self.has_next()) switch (self.source[self.cursor]) { '"' => break, else => self.cursor += 1, }; @@ -180,7 +201,7 @@ pub const Tokenizer = struct { self.cursor += 1; - while (self.cursor < self.source.len) switch (self.source[self.cursor]) { + while (self.has_next()) switch (self.source[self.cursor]) { '"' => break, else => self.cursor += 1, };