ona/source/kym/bytecode.zig

738 lines
18 KiB
Zig
Raw Normal View History

2023-04-23 16:53:50 +02:00
const coral = @import("coral");
const tokens = @import("./tokens.zig");
pub const Chunk = struct {
constant_buffer: Buffer,
bytecode_buffer: Buffer,
constants: Constants,
locals: SmallStack(Local, .{.name = "", .depth = 0}) = .{},
const Buffer = coral.stack.Dense(u8);
const Constants = coral.stack.Dense(Constant);
const Local = struct {
name: []const u8,
depth: u16,
const empty = Local{ .name = "", .depth = 0 };
};
pub fn compile(self: *Chunk, script: []const u8) ParseError!void {
self.reset();
var tokenizer = tokens.Tokenizer{.source = script};
errdefer self.reset();
var parser = Parser{
.chunk = self,
.tokenizer = &tokenizer,
};
while (true) {
parser.step() catch |step_error| switch (step_error) {
error.UnexpectedEnd => return,
};
try parser.parse_statement();
}
}
fn declare_local(self: *Chunk, name: []const u8) !void {
return self.locals.push(.{
.name = name,
.depth = 0,
});
}
pub fn deinit(self: *Chunk) void {
self.bytecode_buffer.deinit();
self.constant_buffer.deinit();
self.constants.deinit();
}
fn emit_byte(self: *Chunk, byte: u8) !void {
return self.bytecode_buffer.push_one(byte);
}
fn emit_opcode(self: *Chunk, opcode: Opcode) !void {
return self.bytecode_buffer.push_one(@enumToInt(opcode));
}
fn emit_operand(self: *Chunk, operand: Operand) !void {
return self.bytecode_buffer.push_all(coral.io.bytes_of(&operand));
}
pub fn fetch_byte(self: Chunk, cursor: *usize) ?u8 {
if (cursor.* >= self.bytecode_buffer.values.len) return null;
defer cursor.* += 1;
return self.bytecode_buffer.values[cursor.*];
}
pub fn fetch_constant(self: Chunk, cursor: *usize) ?*Constant {
return &self.constants.values[self.fetch_operand(cursor) orelse return null];
}
pub fn fetch_opcode(self: Chunk, cursor: *usize) ?Opcode {
return @intToEnum(Opcode, self.fetch_byte(cursor) orelse return null);
}
pub fn fetch_operand(self: Chunk, cursor: *usize) ?Operand {
const operand_size = @sizeOf(Operand);
const updated_cursor = cursor.* + operand_size;
if (updated_cursor > self.bytecode_buffer.values.len) return null;
var operand_bytes align(@alignOf(Operand)) = [_]u8{0} ** operand_size;
coral.io.copy(&operand_bytes, self.bytecode_buffer.values[cursor.* .. updated_cursor]);
cursor.* = updated_cursor;
return @bitCast(Operand, operand_bytes);
}
pub fn init(allocator: coral.io.MemoryAllocator) !Chunk {
const page_size = 1024;
var constant_buffer = try Buffer.init(allocator, page_size);
errdefer constant_buffer.deinit();
const assumed_average_bytecode_size = 1024;
var bytecode_buffer = try Buffer.init(allocator, assumed_average_bytecode_size);
errdefer bytecode_buffer.deinit();
const assumed_average_constant_count = 512;
var constants = try Constants.init(allocator, assumed_average_constant_count);
errdefer constants.deinit();
return Chunk{
.constant_buffer = constant_buffer,
.bytecode_buffer = bytecode_buffer,
.constants = constants,
};
}
fn intern_string(self: *Chunk, string: []const u8) !u64 {
var constant_slot = @as(u64, 0);
for (self.constants.values) |interned_constant| {
switch (interned_constant) {
.string => |interned_string| if (coral.io.equals(interned_string, string)) return constant_slot,
}
constant_slot += 1;
}
const constant_allocator = coral.stack.as_dense_allocator(&self.constant_buffer);
const allocation = constant_allocator.allocate_many(u8, string.len + 1) orelse return error.OutOfMemory;
errdefer constant_allocator.deallocate(allocation);
// Zero-terminate string.
allocation[string.len] = 0;
// Write string contents.
{
const allocated_string = allocation[0 .. string.len];
coral.io.copy(allocated_string, string);
try self.constants.push_one(.{.string = @ptrCast([:0]u8, allocated_string)});
}
return constant_slot;
}
pub fn reset(self: *Chunk) void {
self.bytecode_buffer.clear();
self.constant_buffer.clear();
}
pub fn resolve_local(self: *Chunk, name: []const u8) ?u8 {
var count = @as(u8, self.locals.buffer.len);
while (count != 0) {
const index = count - 1;
if (coral.io.equals(name, self.locals.buffer[index].name)) return index;
count = index;
}
return null;
}
};
pub const Constant = union (enum) {
string: [:0]u8,
};
pub const Opcode = enum(u8) {
pop,
push_nil,
push_true,
push_false,
push_zero,
push_integer,
push_float,
push_string,
push_array,
push_table,
not,
neg,
add,
sub,
div,
mul,
call,
get_index,
set_index,
get_x,
set_x,
get_y,
set_y,
get_global,
set_global,
get_local,
set_local,
};
pub const Operand = u64;
pub const ParseError = Parser.StepError || tokens.Token.ExpectError || error {
OutOfMemory,
IntOverflow,
UndefinedLocal,
};
const Parser = struct {
chunk: *Chunk,
tokenizer: *tokens.Tokenizer,
current_token: tokens.Token = .newline,
previous_token: tokens.Token = .newline,
const Operator = enum {
not,
negate,
add,
subtract,
divide,
multiply,
const Self = @This();
fn opcode(self: Self) Opcode {
return switch (self) {
.not => .not,
.negate => .neg,
.add => .add,
.subtract => .sub,
.multiply => .mul,
.divide => .div,
};
}
fn precedence(self: Self) isize {
return switch (self) {
.not => 13,
.negate => 13,
.add => 11,
.subtract => 11,
.divide => 12,
.multiply => 12,
};
}
};
const OperatorStack = SmallStack(Operator, .not);
const StepError = error {
UnexpectedEnd,
};
const operator_tokens = &.{.symbol_assign, .symbol_plus, .symbol_dash, .symbol_asterisk, .symbol_forward_slash};
fn parse_expression(self: *Parser) ParseError!void {
var operators = OperatorStack{};
var local_depth = @as(usize, 0);
while (true) {
switch (self.current_token) {
.keyword_nil => {
try self.previous_token.expect_any(operator_tokens);
try self.chunk.emit_opcode(.push_nil);
self.step() catch |step_error| switch (step_error) {
error.UnexpectedEnd => return,
};
},
.keyword_true => {
try self.previous_token.expect_any(operator_tokens);
try self.chunk.emit_opcode(.push_true);
self.step() catch |step_error| switch (step_error) {
error.UnexpectedEnd => return,
};
},
.keyword_false => {
try self.previous_token.expect_any(operator_tokens);
try self.chunk.emit_opcode(.push_false);
self.step() catch |step_error| switch (step_error) {
error.UnexpectedEnd => return,
};
},
.integer_literal => |literal| {
try self.previous_token.expect_any(operator_tokens);
const value = coral.utf8.parse_signed(@bitSizeOf(i64), literal)
catch |parse_error| switch (parse_error) {
error.BadSyntax => unreachable,
error.IntOverflow => return error.IntOverflow,
};
if (value == 0) {
try self.chunk.emit_opcode(.push_zero);
} else {
try self.chunk.emit_opcode(.push_integer);
try self.chunk.emit_operand(@bitCast(u64, value));
}
try self.step();
},
.real_literal => |literal| {
try self.previous_token.expect_any(operator_tokens);
try self.chunk.emit_operand(@bitCast(u64, coral.utf8.parse_float(@bitSizeOf(f64), literal)
catch |parse_error| switch (parse_error) {
// Already validated to be a real by the tokenizer so this cannot fail, as real syntax is a
// subset of float syntax.
error.BadSyntax => unreachable,
}));
try self.step();
},
.string_literal => |literal| {
try self.previous_token.expect_any(operator_tokens);
try self.chunk.emit_opcode(.push_string);
try self.chunk.emit_operand(try self.chunk.intern_string(literal));
try self.step();
},
.global_identifier, .local_identifier => {
try self.previous_token.expect_any(&.{.symbol_assign, .symbol_plus,
.symbol_dash, .symbol_asterisk, .symbol_forward_slash, .symbol_period});
try self.step();
},
.symbol_bang => {
try self.previous_token.expect_any(operator_tokens);
try operators.push(.not);
try self.step();
local_depth = 0;
},
.symbol_plus => {
try self.parse_operator(&operators, .add);
local_depth = 0;
},
.symbol_dash => {
try self.parse_operator(&operators, .subtract);
local_depth = 0;
},
.symbol_asterisk => {
try self.parse_operator(&operators, .multiply);
local_depth = 0;
},
.symbol_forward_slash => {
try self.parse_operator(&operators, .divide);
local_depth = 0;
},
.symbol_period => {
switch (self.previous_token) {
.global_identifier => |identifier| {
try self.chunk.emit_opcode(.get_global);
try self.chunk.emit_operand(try self.chunk.intern_string(identifier));
},
.local_identifier => |identifier| {
if (local_depth == 0) {
try self.chunk.emit_byte(self.chunk.resolve_local(identifier) orelse {
return error.UndefinedLocal;
});
} else {
try self.chunk.emit_opcode(.get_index);
try self.chunk.emit_operand(try self.chunk.intern_string(identifier));
}
},
else => return error.UnexpectedToken,
}
try self.step();
local_depth += 1;
},
.symbol_paren_left => {
switch (self.previous_token) {
.local_identifier => |identifier| {
if (local_depth == 0) {
try self.chunk.emit_byte(self.chunk.resolve_local(identifier) orelse {
return error.UndefinedLocal;
});
} else {
try self.chunk.emit_opcode(.get_index);
try self.chunk.emit_operand(try self.chunk.intern_string(identifier));
}
},
.global_identifier => |identifier| {
try self.chunk.emit_opcode(.get_global);
try self.chunk.emit_operand(try self.chunk.intern_string(identifier));
},
else => {
try self.parse_expression();
try self.previous_token.expect(.symbol_paren_right);
try self.step();
local_depth = 0;
continue;
},
}
local_depth += 1;
var argument_count = @as(Operand, 0);
while (true) {
try self.step();
try switch (self.current_token) {
.symbol_paren_right => break,
else => self.parse_expression(),
};
switch (self.previous_token) {
.symbol_paren_right => break,
.symbol_comma => {},
else => return error.UnexpectedToken,
}
argument_count += 1;
}
try self.chunk.emit_opcode(.call);
try self.chunk.emit_operand(argument_count);
try self.step();
local_depth = 0;
},
.symbol_brace_left => {
const is_call_argument = switch (self.previous_token) {
.local_identifier, .global_identifier => true,
else => false,
};
var field_count = @as(Operand, 0);
while (true) {
try self.step();
switch (self.current_token) {
.newline => {},
.local_identifier => {
// Create local copy of identifier because step() will overwrite captures.
const interned_identifier =
try self.chunk.intern_string(self.current_token.local_identifier);
try self.chunk.emit_opcode(.push_string);
try self.chunk.emit_operand(interned_identifier);
try self.step();
switch (self.current_token) {
.symbol_assign => {
try self.parse_expression();
field_count += 1;
},
.symbol_brace_right => {
try self.chunk.emit_opcode(.push_string);
try self.chunk.emit_operand(interned_identifier);
field_count += 1;
break;
},
.symbol_comma => {
try self.chunk.emit_opcode(.push_string);
try self.chunk.emit_operand(interned_identifier);
field_count += 1;
},
else => return error.UnexpectedToken,
}
},
.symbol_brace_right => break,
else => return error.UnexpectedToken,
}
}
if (is_call_argument) {
try self.chunk.emit_opcode(.call);
try self.chunk.emit_operand(1);
}
},
else => {
try self.previous_token.expect_any(&.{.keyword_nil, .keyword_true, .keyword_false, .integer_literal,
.real_literal, .string_literal, .global_identifier, .local_identifier, .symbol_brace_right,
.symbol_paren_right});
while (operators.pop()) |operator| {
try self.chunk.emit_opcode(operator.opcode());
}
return;
},
}
}
}
fn parse_operator(self: *Parser, operators: *OperatorStack, rhs_operator: Operator) ParseError!void {
try self.previous_token.expect_any(operator_tokens);
while (operators.pop()) |lhs_operator| {
if (rhs_operator.precedence() < lhs_operator.precedence()) break try operators.push(lhs_operator);
try self.chunk.emit_opcode(lhs_operator.opcode());
}
try operators.push(rhs_operator);
try self.step();
}
fn parse_statement(self: *Parser) ParseError!void {
var local_depth = @as(usize, 0);
while (true) {
switch (self.current_token) {
.newline => self.step() catch |step_error| switch (step_error) {
error.UnexpectedEnd => return,
},
.keyword_return => {
try self.previous_token.expect(.newline);
self.step() catch |step_error| switch (step_error) {
error.UnexpectedEnd => return,
};
try self.parse_expression();
while (true) {
self.step() catch |step_error| switch (step_error) {
error.UnexpectedEnd => return,
};
try self.current_token.expect(.newline);
}
},
.local_identifier => {
try self.previous_token.expect_any(&.{.newline, .symbol_period});
try self.step();
},
.global_identifier => {
try self.previous_token.expect(.newline);
try self.step();
},
.symbol_period => switch (self.previous_token) {
.global_identifier => {
// Create local copy of identifier because step() will overwrite captures.
const identifier = self.previous_token.local_identifier;
try self.step();
try self.current_token.expect(.local_identifier);
try self.chunk.emit_opcode(.get_global);
try self.chunk.emit_operand(try self.chunk.intern_string(identifier));
local_depth += 1;
},
.local_identifier => {
// Create local copy of identifier because step() will overwrite captures.
const identifier = self.previous_token.global_identifier;
try self.step();
try self.current_token.expect(.local_identifier);
if (local_depth == 0) {
try self.chunk.emit_byte(self.chunk.resolve_local(identifier) orelse {
return error.UndefinedLocal;
});
} else {
try self.chunk.emit_opcode(.get_index);
try self.chunk.emit_operand(try self.chunk.intern_string(identifier));
}
local_depth += 1;
},
else => return error.UnexpectedToken,
},
.symbol_assign => {
try self.previous_token.expect(.local_identifier);
const identifier = self.previous_token.local_identifier;
if (local_depth == 0) {
if (self.chunk.resolve_local(identifier)) |local_slot| {
try self.chunk.emit_opcode(.set_local);
try self.chunk.emit_byte(local_slot);
} else {
try self.chunk.declare_local(identifier);
}
} else {
try self.chunk.emit_opcode(.set_index);
try self.chunk.emit_operand(try self.chunk.intern_string(identifier));
}
try self.step();
try self.parse_expression();
local_depth = 0;
},
.symbol_paren_left => {
switch (self.previous_token) {
.local_identifier => |identifier| {
if (local_depth == 0) {
try self.chunk.emit_byte(self.chunk.resolve_local(identifier) orelse {
return error.UndefinedLocal;
});
} else {
try self.chunk.emit_opcode(.get_index);
try self.chunk.emit_operand(try self.chunk.intern_string(identifier));
}
},
.global_identifier => |identifier| {
try self.chunk.emit_opcode(.get_global);
try self.chunk.emit_operand(try self.chunk.intern_string(identifier));
},
else => return error.UnexpectedToken,
}
var argument_count = @as(Operand, 0);
while (true) {
try self.step();
try switch (self.current_token) {
.symbol_paren_right => break,
else => self.parse_expression(),
};
argument_count += 1;
switch (self.current_token) {
.symbol_paren_right => break,
.symbol_comma => {},
else => return error.UnexpectedToken,
}
}
try self.chunk.emit_opcode(.call);
try self.chunk.emit_operand(argument_count);
try self.chunk.emit_opcode(.pop);
self.step() catch |step_error| switch (step_error) {
error.UnexpectedEnd => return,
};
local_depth = 0;
},
else => return error.UnexpectedToken,
}
}
}
fn step(self: *Parser) StepError!void {
self.previous_token = self.current_token;
self.current_token = self.tokenizer.next() orelse return error.UnexpectedEnd;
@import("std").debug.print("{s}\n", .{self.current_token.text()});
}
};
fn SmallStack(comptime Element: type, comptime default: Element) type {
const maximum = 255;
return struct {
buffer: [maximum]Element = [_]Element{default} ** maximum,
count: u8 = 0,
const Self = @This();
fn peek(self: Self) ?Element {
if (self.count == 0) return null;
return self.buffer[self.count - 1];
}
fn pop(self: *Self) ?Element {
if (self.count == 0) return null;
self.count -= 1;
return self.buffer[self.count];
}
fn push(self: *Self, element: Element) !void {
if (self.count == maximum) return error.OutOfMemory;
self.buffer[self.count] = element;
self.count += 1;
}
};
}
const SymbolTable = coral.table.Hashed(coral.table.string_key, usize);