const coral = @import("coral"); const tokens = @import("./tokens.zig"); pub const Chunk = struct { constant_buffer: Buffer, bytecode_buffer: Buffer, constants: Constants, const Buffer = coral.stack.Dense(u8); const Constants = coral.stack.Dense(Constant); pub fn compile(self: *Chunk, script: []const u8) !void { self.reset(); var tokenizer = tokens.Tokenizer{.source = script}; var parser = Parser{ .chunk = self, .tokenizer = &tokenizer, }; errdefer self.reset(); try parser.parse_statement(); } pub fn deinit(self: *Chunk) void { self.bytecode_buffer.deinit(); self.constant_buffer.deinit(); self.constants.deinit(); } pub fn emit_byte(self: *Chunk, byte: u8) !void { return self.bytecode_buffer.push_one(byte); } pub fn emit_opcode(self: *Chunk, opcode: Opcode) !void { return self.bytecode_buffer.push_one(@enumToInt(opcode)); } pub fn emit_operand(self: *Chunk, operand: Operand) !void { return self.bytecode_buffer.push_all(coral.io.bytes_of(&operand)); } pub fn intern_string(self: *Chunk, string: []const u8) !u64 { var constant_slot = @as(u64, 0); for (self.constants.values) |interned_constant| { switch (interned_constant) { .string => |interned_string| if (coral.io.equals(interned_string, string)) return constant_slot, } constant_slot += 1; } const constant_allocator = coral.stack.as_dense_allocator(&self.constant_buffer); const allocation = constant_allocator.allocate_many(u8, string.len + 1) orelse return error.OutOfMemory; errdefer constant_allocator.deallocate(allocation); // Zero-terminate string. allocation[string.len] = 0; // Write string contents. { const allocated_string = allocation[0 .. string.len]; coral.io.copy(allocated_string, string); try self.constants.push_one(.{.string = @ptrCast([:0]u8, allocated_string)}); } return constant_slot; } pub fn fetch_byte(self: Chunk, cursor: *usize) ?u8 { if (cursor.* >= self.bytecode_buffer.values.len) return null; defer cursor.* += 1; return self.bytecode_buffer.values[cursor.*]; } pub fn fetch_constant(self: Chunk, cursor: *usize) ?*Constant { return &self.constants.values[self.fetch_operand(cursor) orelse return null]; } pub fn fetch_opcode(self: Chunk, cursor: *usize) ?Opcode { return @intToEnum(Opcode, self.fetch_byte(cursor) orelse return null); } pub fn fetch_operand(self: Chunk, cursor: *usize) ?Operand { const operand_size = @sizeOf(Operand); const updated_cursor = cursor.* + operand_size; if (updated_cursor > self.bytecode_buffer.values.len) return null; var operand_bytes align(@alignOf(Operand)) = [_]u8{0} ** operand_size; coral.io.copy(&operand_bytes, self.bytecode_buffer.values[cursor.* .. updated_cursor]); cursor.* = updated_cursor; return @bitCast(Operand, operand_bytes); } pub fn init(allocator: coral.io.MemoryAllocator) !Chunk { const page_size = 1024; var constant_buffer = try Buffer.init(allocator, page_size); errdefer constant_buffer.deinit(); const assumed_average_bytecode_size = 1024; var bytecode_buffer = try Buffer.init(allocator, assumed_average_bytecode_size); errdefer bytecode_buffer.deinit(); const assumed_average_constant_count = 512; var constants = try Constants.init(allocator, assumed_average_constant_count); errdefer constants.deinit(); return Chunk{ .constant_buffer = constant_buffer, .bytecode_buffer = bytecode_buffer, .constants = constants, }; } pub fn reset(self: *Chunk) void { self.bytecode_buffer.clear(); self.constant_buffer.clear(); } }; pub const Constant = union (enum) { string: [:0]u8, }; pub const Opcode = enum(u8) { push_nil, push_true, push_false, push_zero, push_integer, push_float, push_string, push_array, push_table, not, neg, add, sub, div, mul, call, get_field, set_field, get_x, set_x, get_y, set_y, get_global, set_global, get_local, }; pub const Operand = u64; const ParseError = SyntaxError || error{ OutOfMemory, }; const Parser = struct { tokenizer: *tokens.Tokenizer, scope_depth: u16 = 0, chunk: *Chunk, locals: SmallStack(Local, Local.empty) = .{}, const Local = struct { name: []const u8, depth: u16, const empty = Local{ .name = "", .depth = 0 }; }; const Operations = SmallStack(Operator, .not); const Operator = enum { not, negate, add, subtract, divide, multiply, fn opcode(self: Operator) Opcode { return switch (self) { .not => .not, .negate => .neg, .add => .add, .subtract => .sub, .multiply => .mul, .divide => .div, }; } fn precedence(self: Operator) isize { return switch (self) { .not => 13, .negate => 13, .add => 11, .subtract => 11, .divide => 12, .multiply => 12, }; } }; fn declare_local(self: *Parser, name: []const u8) !void { return self.locals.push(.{ .name = name, .depth = self.scope_depth, }); } fn error_unexpected_end(self: *Parser) SyntaxError { _ = self; return error.BadSyntax; } fn error_unexpected_token(self: *Parser, token: tokens.Token) SyntaxError { _ = self; _ = token; // _ = self.error_writer.write("unexpected token `") catch {}; // _ = self.error_writer.write(token.text()) catch {}; // _ = self.error_writer.write("`") catch {}; return error.BadSyntax; } fn error_integer_overflow(self: *Parser, integer_literal: []const u8) SyntaxError { // TODO: Implement. _ = self; _ = integer_literal; return error.BadSyntax; } pub fn parse_expression(self: *Parser, initial_token: tokens.Token) ParseError!void { var operations = Operations{}; var previous_token = initial_token; while (self.tokenizer.next()) |current_token| { switch (current_token) { .newline => { previous_token = current_token; break; }, else => previous_token = try self.parse_operation(&operations, previous_token, current_token), } } while (operations.pop()) |operator| try self.chunk.emit_opcode(operator.opcode()); } fn parse_arguments(self: *Parser) ParseError!tokens.Token { var operations = Operations{}; var previous_token = @as(tokens.Token, .symbol_paren_left); var argument_count = @as(Operand, 0); while (self.tokenizer.next()) |current_token| { switch (current_token) { .symbol_paren_right => { while (operations.pop()) |operator| try self.chunk.emit_opcode(operator.opcode()); try self.chunk.emit_opcode(.call); try self.chunk.emit_operand(argument_count); return .symbol_paren_right; }, .symbol_comma => { while (operations.pop()) |operator| try self.chunk.emit_opcode(operator.opcode()); previous_token = current_token; argument_count += 1; }, else => previous_token = try self.parse_operation(&operations, previous_token, current_token), } } return previous_token; } fn parse_group(_: *Parser) ParseError!tokens.Token { return error.BadSyntax; } pub fn parse_operation(self: *Parser, operations: *Operations, previous_token: tokens.Token, current_token: tokens.Token) ParseError!tokens.Token { switch (current_token) { .integer_literal => |literal| { const value = coral.utf8.parse_signed(@bitSizeOf(i64), literal) catch |err| switch (err) { error.BadSyntax => unreachable, error.IntOverflow => return self.error_integer_overflow(literal), }; if (value == 0) { try self.chunk.emit_opcode(.push_zero); } else { try self.chunk.emit_opcode(.push_integer); try self.chunk.emit_operand(@bitCast(u64, value)); } }, .real_literal => |literal| { try self.chunk.emit_operand(@bitCast(u64, coral.utf8.parse_float(@bitSizeOf(f64), literal) catch |err| { switch (err) { // Already validated to be a real by the tokenizer so this cannot fail, as real syntax is a // subset of float syntax. error.BadSyntax => unreachable, } })); }, .string_literal => |literal| { try self.chunk.emit_opcode(.push_string); try self.chunk.emit_operand(try self.chunk.intern_string(literal)); }, .global_identifier => |identifier| { try self.chunk.emit_opcode(.get_global); try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); }, .local_identifier => |identifier| { if (self.resolve_local(identifier)) |local| { try self.chunk.emit_opcode(.get_local); try self.chunk.emit_byte(local); } else { try self.chunk.emit_opcode(.push_nil); } }, .symbol_bang => try operations.push(.not), .symbol_plus => while (operations.pop()) |operator| { if (Operator.add.precedence() < operator.precedence()) break try operations.push(operator); try self.chunk.emit_opcode(operator.opcode()); }, .symbol_dash => while (operations.pop()) |operator| { if (Operator.subtract.precedence() < operator.precedence()) break try operations.push(operator); try self.chunk.emit_opcode(operator.opcode()); }, .symbol_asterisk => while (operations.pop()) |operator| { if (Operator.multiply.precedence() < operator.precedence()) break try operations.push(operator); try self.chunk.emit_opcode(operator.opcode()); }, .symbol_forward_slash => while (operations.pop()) |operator| { if (Operator.divide.precedence() < operator.precedence()) break try operations.push(operator); try self.chunk.emit_opcode(operator.opcode()); }, .symbol_period => { const field_token = self.tokenizer.next() orelse return self.error_unexpected_end(); switch (field_token) { .local_identifier => |identifier| { try self.chunk.emit_opcode(.get_field); try self.chunk.emit_operand(try self.chunk.intern_string(identifier)); return field_token; }, else => return self.error_unexpected_token(field_token), } }, .symbol_paren_left => return try switch (previous_token) { .local_identifier, .global_identifier => self.parse_arguments(), else => self.parse_group(), }, .symbol_brace_left => { try self.parse_table(); switch (previous_token) { .local_identifier, .global_identifier => { // Created as function argument. try self.chunk.emit_opcode(.call); try self.chunk.emit_operand(1); }, else => {}, } return .symbol_brace_right; }, else => return self.error_unexpected_token(current_token), } return current_token; } pub fn parse_statement(self: *Parser) ParseError!void { // TODO: Implement. return self.error_unexpected_end(); } fn parse_table(self: *Parser) ParseError!void { var field_count = @as(Operand, 0); while (self.tokenizer.next()) |field_token| { switch (field_token) { .newline => {}, .local_identifier => |field_identifier| { const operation_token = self.tokenizer.next() orelse return self.error_unexpected_end(); const interned_identifier = try self.chunk.intern_string(field_identifier); field_count += 1; switch (operation_token) { .symbol_assign => { var operations = Operations{}; var previous_token = @as(tokens.Token, .symbol_assign); while (self.tokenizer.next()) |token| : (previous_token = token) switch (token) { .newline => {}, .symbol_comma => break, .symbol_brace_right => { try self.chunk.emit_opcode(.push_string); try self.chunk.emit_operand(interned_identifier); try self.chunk.emit_opcode(.push_table); try self.chunk.emit_operand(field_count); return; }, else => previous_token = try self.parse_operation(&operations, previous_token, token), }; while (operations.pop()) |operator| try self.chunk.emit_opcode(operator.opcode()); try self.chunk.emit_opcode(.push_string); try self.chunk.emit_operand(interned_identifier); }, .symbol_comma => { try self.chunk.emit_opcode(.push_string); try self.chunk.emit_operand(interned_identifier); }, .symbol_brace_right => { try self.chunk.emit_opcode(.push_string); try self.chunk.emit_operand(interned_identifier); try self.chunk.emit_opcode(.push_table); try self.chunk.emit_operand(field_count); return; }, else => return self.error_unexpected_token(operation_token), } }, .symbol_brace_right => { try self.chunk.emit_opcode(.push_table); try self.chunk.emit_operand(field_count); return; }, else => return self.error_unexpected_token(field_token), } } return self.error_unexpected_end(); } fn resolve_local(self: *Parser, name: []const u8) ?u8 { var count = @as(u8, self.locals.buffer.len); while (count != 0) { const index = count - 1; if (coral.io.equals(name, self.locals.buffer[index].name)) return index; count = index; } return null; } }; fn SmallStack(comptime Element: type, comptime default: Element) type { const maximum = 255; return struct { buffer: [maximum]Element = [_]Element{default} ** maximum, count: u8 = 0, const Self = @This(); fn peek(self: Self) ?Element { if (self.count == 0) return null; return self.buffer[self.count - 1]; } fn pop(self: *Self) ?Element { if (self.count == 0) return null; self.count -= 1; return self.buffer[self.count]; } fn push(self: *Self, local: Element) !void { if (self.count == maximum) return error.OutOfMemory; self.buffer[self.count] = local; self.count += 1; } }; } const SymbolTable = coral.table.Hashed(coral.table.string_key, usize); const SyntaxError = error{ BadSyntax, };