const coral = @import("coral"); const tokens = @import("./tokens.zig"); pub const Chunk = struct { constant_buffer: Buffer, bytecode_buffer: Buffer, constants: Constants, locals: SmallStack(Local, .{.name = "", .depth = 0}) = .{}, const Buffer = coral.stack.Dense(u8); const Constants = coral.stack.Dense(Constant); const Local = struct { name: []const u8, depth: u16, const empty = Local{ .name = "", .depth = 0 }; }; pub fn compile(self: *Chunk, script: []const u8) ParseError!void { self.reset(); var tokenizer = tokens.Tokenizer{.source = script}; errdefer self.reset(); var parser = Parser{ .chunk = self, .tokenizer = &tokenizer, }; while (true) { parser.step() catch |step_error| switch (step_error) { error.UnexpectedEnd => return, }; try parser.parse_statement(); } } fn declare_local(self: *Chunk, name: []const u8) !void { return self.locals.push(.{ .name = name, .depth = 0, }); } pub fn deinit(self: *Chunk) void { self.bytecode_buffer.deinit(); self.constant_buffer.deinit(); self.constants.deinit(); } fn emit_byte(self: *Chunk, byte: u8) !void { return self.bytecode_buffer.push_one(byte); } fn emit_opcode(self: *Chunk, opcode: Opcode) !void { return self.bytecode_buffer.push_one(@enumToInt(opcode)); } fn emit_operand(self: *Chunk, operand: Operand) !void { return self.bytecode_buffer.push_all(coral.io.bytes_of(&operand)); } pub fn fetch_byte(self: Chunk, cursor: *usize) ?u8 { if (cursor.* >= self.bytecode_buffer.values.len) return null; defer cursor.* += 1; return self.bytecode_buffer.values[cursor.*]; } pub fn fetch_constant(self: Chunk, cursor: *usize) ?*Constant { return &self.constants.values[self.fetch_operand(cursor) orelse return null]; } pub fn fetch_opcode(self: Chunk, cursor: *usize) ?Opcode { return @intToEnum(Opcode, self.fetch_byte(cursor) orelse return null); } pub fn fetch_operand(self: Chunk, cursor: *usize) ?Operand { const operand_size = @sizeOf(Operand); const updated_cursor = cursor.* + operand_size; if (updated_cursor > self.bytecode_buffer.values.len) return null; var operand_bytes align(@alignOf(Operand)) = [_]u8{0} ** operand_size; coral.io.copy(&operand_bytes, self.bytecode_buffer.values[cursor.* .. updated_cursor]); cursor.* = updated_cursor; return @bitCast(Operand, operand_bytes); } pub fn init(allocator: coral.io.MemoryAllocator) !Chunk { const page_size = 1024; var constant_buffer = try Buffer.init(allocator, page_size); errdefer constant_buffer.deinit(); const assumed_average_bytecode_size = 1024; var bytecode_buffer = try Buffer.init(allocator, assumed_average_bytecode_size); errdefer bytecode_buffer.deinit(); const assumed_average_constant_count = 512; var constants = try Constants.init(allocator, assumed_average_constant_count); errdefer constants.deinit(); return Chunk{ .constant_buffer = constant_buffer, .bytecode_buffer = bytecode_buffer, .constants = constants, }; } fn intern_string(self: *Chunk, string: []const u8) !u64 { var constant_slot = @as(u64, 0); for (self.constants.values) |interned_constant| { switch (interned_constant) { .string => |interned_string| if (coral.io.equals(interned_string, string)) return constant_slot, } constant_slot += 1; } const constant_allocator = coral.stack.as_dense_allocator(&self.constant_buffer); const allocation = constant_allocator.allocate_many(u8, string.len + 1) orelse return error.OutOfMemory; errdefer constant_allocator.deallocate(allocation); // Zero-terminate string. allocation[string.len] = 0; // Write string contents. { const allocated_string = allocation[0 .. string.len]; coral.io.copy(allocated_string, string); try self.constants.push_one(.{.string = @ptrCast([:0]u8, allocated_string)}); } return constant_slot; } pub fn reset(self: *Chunk) void { self.bytecode_buffer.clear(); self.constant_buffer.clear(); } pub fn resolve_local(self: *Chunk, name: []const u8) ?u8 { var count = @as(u8, self.locals.buffer.len); while (count != 0) { const index = count - 1; if (coral.io.equals(name, self.locals.buffer[index].name)) return index; count = index; } return null; } }; pub const Constant = union (enum) { string: [:0]u8, }; pub const Opcode = enum(u8) { pop, push_nil, push_true, push_false, push_zero, push_integer, push_float, push_string, push_array, push_table, not, neg, add, sub, div, mul, call, get_index, set_index, get_x, set_x, get_y, set_y, get_global, set_global, get_local, set_local, }; pub const Operand = u64; pub const ParseError = Parser.StepError || tokens.Token.ExpectError || error { OutOfMemory, IntOverflow, UndefinedLocal, }; const Parser = struct { chunk: *Chunk, tokenizer: *tokens.Tokenizer, current_token: tokens.Token = .newline, previous_token: tokens.Token = .newline, const Operator = enum { not, negate, add, subtract, divide, multiply, const Self = @This(); fn opcode(self: Self) Opcode { return switch (self) { .not => .not, .negate => .neg, .add => .add, .subtract => .sub, .multiply => .mul, .divide => .div, }; } fn precedence(self: Self) isize { return switch (self) { .not => 13, .negate => 13, .add => 11, .subtract => 11, .divide => 12, .multiply => 12, }; } }; const OperatorStack = SmallStack(Operator, .not); const StepError = error { UnexpectedEnd, }; const operator_tokens = &.{.symbol_assign, .symbol_plus, .symbol_dash, .symbol_asterisk, .symbol_forward_slash}; fn parse_expression(self: *Parser) ParseError!void { var operators = OperatorStack{}; var local_depth = @as(usize, 0); while (true) { switch (self.current_token) { .keyword_nil => { try self.previous_token.expect_any(operator_tokens); try self.chunk.emit_opcode(.push_nil); self.step() catch |step_error| switch (step_error) { error.UnexpectedEnd => return, }; }, .keyword_true => { try self.previous_token.expect_any(operator_tokens); try self.chunk.emit_opcode(.push_true); self.step() catch |step_error| switch (step_error) { error.UnexpectedEnd => return, }; }, .keyword_false => { try self.previous_token.expect_any(operator_tokens); try self.chunk.emit_opcode(.push_false); self.step() catch |step_error| switch (step_error) { error.UnexpectedEnd => return, }; }, .integer_literal => |literal| { try self.previous_token.expect_any(operator_tokens); const value = coral.utf8.parse_signed(@bitSizeOf(i64), literal) catch |parse_error| switch (parse_error) { error.BadSyntax => unreachable, error.IntOverflow => return error.IntOverflow, }; if (value == 0) { try self.chunk.emit_opcode(.push_zero); } else { try self.chunk.emit_opcode(.push_integer); try self.chunk.emit_operand(@bitCast(u64, value)); } try self.step(); }, .real_literal => |literal| { try self.previous_token.expect_any(operator_tokens); try self.chunk.emit_operand(@bitCast(u64, coral.utf8.parse_float(@bitSizeOf(f64), literal) catch |parse_error| switch (parse_error) { // Already validated to be a real by the tokenizer so this cannot fail, as real syntax is a // subset of float syntax. error.BadSyntax => unreachable, })); try self.step(); }, .string_literal => |literal| { try self.previous_token.expect_any(operator_tokens); try self.chunk.emit_opcode(.push_string); try self.chunk.emit_operand(try self.chunk.intern_string(literal)); try self.step(); }, .global_identifier, .local_identifier => { try self.previous_token.expect_any(&.{.symbol_assign, .symbol_plus, .symbol_dash, .symbol_asterisk, .symbol_forward_slash, .symbol_period}); try self.step(); }, .symbol_bang => { try self.previous_token.expect_any(operator_tokens); try operators.push(.not); try self.step(); local_depth = 0; }, .symbol_plus => { try self.parse_operator(&operators, .add); local_depth = 0; }, .symbol_dash => { try self.parse_operator(&operators, .subtract); local_depth = 0; }, .symbol_asterisk => { try self.parse_operator(&operators, .multiply); local_depth = 0; }, .symbol_forward_slash => { try self.parse_operator(&operators, .divide); local_depth = 0; }, .symbol_period => { switch (self.previous_token) { .global_identifier => |identifier| { try self.chunk.emit_opcode(.get_global); try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); }, .local_identifier => |identifier| { if (local_depth == 0) { try self.chunk.emit_byte(self.chunk.resolve_local(identifier) orelse { return error.UndefinedLocal; }); } else { try self.chunk.emit_opcode(.get_index); try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); } }, else => return error.UnexpectedToken, } try self.step(); local_depth += 1; }, .symbol_paren_left => { switch (self.previous_token) { .local_identifier => |identifier| { if (local_depth == 0) { try self.chunk.emit_byte(self.chunk.resolve_local(identifier) orelse { return error.UndefinedLocal; }); } else { try self.chunk.emit_opcode(.get_index); try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); } }, .global_identifier => |identifier| { try self.chunk.emit_opcode(.get_global); try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); }, else => { try self.parse_expression(); try self.previous_token.expect(.symbol_paren_right); try self.step(); local_depth = 0; continue; }, } local_depth += 1; var argument_count = @as(Operand, 0); while (true) { try self.step(); try switch (self.current_token) { .symbol_paren_right => break, else => self.parse_expression(), }; switch (self.previous_token) { .symbol_paren_right => break, .symbol_comma => {}, else => return error.UnexpectedToken, } argument_count += 1; } try self.chunk.emit_opcode(.call); try self.chunk.emit_operand(argument_count); try self.step(); local_depth = 0; }, .symbol_brace_left => { const is_call_argument = switch (self.previous_token) { .local_identifier, .global_identifier => true, else => false, }; var field_count = @as(Operand, 0); while (true) { try self.step(); switch (self.current_token) { .newline => {}, .local_identifier => { // Create local copy of identifier because step() will overwrite captures. const interned_identifier = try self.chunk.intern_string(self.current_token.local_identifier); try self.chunk.emit_opcode(.push_string); try self.chunk.emit_operand(interned_identifier); try self.step(); switch (self.current_token) { .symbol_assign => { try self.parse_expression(); field_count += 1; }, .symbol_brace_right => { try self.chunk.emit_opcode(.push_string); try self.chunk.emit_operand(interned_identifier); field_count += 1; break; }, .symbol_comma => { try self.chunk.emit_opcode(.push_string); try self.chunk.emit_operand(interned_identifier); field_count += 1; }, else => return error.UnexpectedToken, } }, .symbol_brace_right => break, else => return error.UnexpectedToken, } } try self.chunk.emit_opcode(.push_table); try self.chunk.emit_operand(field_count); if (is_call_argument) { try self.chunk.emit_opcode(.call); try self.chunk.emit_operand(1); } }, else => { try self.previous_token.expect_any(&.{.keyword_nil, .keyword_true, .keyword_false, .integer_literal, .real_literal, .string_literal, .global_identifier, .local_identifier, .symbol_brace_right, .symbol_paren_right}); while (operators.pop()) |operator| { try self.chunk.emit_opcode(operator.opcode()); } return; }, } } } fn parse_operator(self: *Parser, operators: *OperatorStack, rhs_operator: Operator) ParseError!void { try self.previous_token.expect_any(operator_tokens); while (operators.pop()) |lhs_operator| { if (rhs_operator.precedence() < lhs_operator.precedence()) break try operators.push(lhs_operator); try self.chunk.emit_opcode(lhs_operator.opcode()); } try operators.push(rhs_operator); try self.step(); } fn parse_statement(self: *Parser) ParseError!void { var local_depth = @as(usize, 0); while (true) { switch (self.current_token) { .newline => self.step() catch |step_error| switch (step_error) { error.UnexpectedEnd => return, }, .keyword_return => { try self.previous_token.expect(.newline); self.step() catch |step_error| switch (step_error) { error.UnexpectedEnd => return, }; try self.parse_expression(); while (true) { self.step() catch |step_error| switch (step_error) { error.UnexpectedEnd => return, }; try self.current_token.expect(.newline); } }, .local_identifier => { try self.previous_token.expect_any(&.{.newline, .symbol_period}); try self.step(); }, .global_identifier => { try self.previous_token.expect(.newline); try self.step(); }, .symbol_period => switch (self.previous_token) { .global_identifier => { // Create local copy of identifier because step() will overwrite captures. const identifier = self.previous_token.local_identifier; try self.step(); try self.current_token.expect(.local_identifier); try self.chunk.emit_opcode(.get_global); try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); local_depth += 1; }, .local_identifier => { // Create local copy of identifier because step() will overwrite captures. const identifier = self.previous_token.global_identifier; try self.step(); try self.current_token.expect(.local_identifier); if (local_depth == 0) { try self.chunk.emit_byte(self.chunk.resolve_local(identifier) orelse { return error.UndefinedLocal; }); } else { try self.chunk.emit_opcode(.get_index); try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); } local_depth += 1; }, else => return error.UnexpectedToken, }, .symbol_assign => { try self.previous_token.expect(.local_identifier); const identifier = self.previous_token.local_identifier; if (local_depth == 0) { if (self.chunk.resolve_local(identifier)) |local_slot| { try self.chunk.emit_opcode(.set_local); try self.chunk.emit_byte(local_slot); } else { try self.chunk.declare_local(identifier); } } else { try self.chunk.emit_opcode(.set_index); try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); } try self.step(); try self.parse_expression(); local_depth = 0; }, .symbol_paren_left => { switch (self.previous_token) { .local_identifier => |identifier| { if (local_depth == 0) { try self.chunk.emit_byte(self.chunk.resolve_local(identifier) orelse { return error.UndefinedLocal; }); } else { try self.chunk.emit_opcode(.get_index); try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); } }, .global_identifier => |identifier| { try self.chunk.emit_opcode(.get_global); try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); }, else => return error.UnexpectedToken, } var argument_count = @as(Operand, 0); while (true) { try self.step(); try switch (self.current_token) { .symbol_paren_right => break, else => self.parse_expression(), }; argument_count += 1; switch (self.current_token) { .symbol_paren_right => break, .symbol_comma => {}, else => return error.UnexpectedToken, } } try self.chunk.emit_opcode(.call); try self.chunk.emit_operand(argument_count); try self.chunk.emit_opcode(.pop); self.step() catch |step_error| switch (step_error) { error.UnexpectedEnd => return, }; local_depth = 0; }, else => return error.UnexpectedToken, } } } fn step(self: *Parser) StepError!void { self.previous_token = self.current_token; self.current_token = self.tokenizer.next() orelse return error.UnexpectedEnd; @import("std").debug.print("{s}\n", .{self.current_token.text()}); } }; fn SmallStack(comptime Element: type, comptime default: Element) type { const maximum = 255; return struct { buffer: [maximum]Element = [_]Element{default} ** maximum, count: u8 = 0, const Self = @This(); fn peek(self: Self) ?Element { if (self.count == 0) return null; return self.buffer[self.count - 1]; } fn pop(self: *Self) ?Element { if (self.count == 0) return null; self.count -= 1; return self.buffer[self.count]; } fn push(self: *Self, element: Element) !void { if (self.count == maximum) return error.OutOfMemory; self.buffer[self.count] = element; self.count += 1; } }; } const SymbolTable = coral.table.Hashed(coral.table.string_key, usize);