From a0c30163a2a463ec3c7713b02b40b74b8001473a Mon Sep 17 00:00:00 2001 From: kayomn Date: Wed, 24 May 2023 00:30:40 +0000 Subject: [PATCH] Implement hash table type in Coral --- source/coral/table.zig | 311 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 275 insertions(+), 36 deletions(-) diff --git a/source/coral/table.zig b/source/coral/table.zig index 1ddab16..ae0a0ae 100755 --- a/source/coral/table.zig +++ b/source/coral/table.zig @@ -1,64 +1,303 @@ +const debug = @import("./debug.zig"); + const io = @import("./io.zig"); const math = @import("./math.zig"); -pub fn Hashed(comptime key: Key, comptime Element: type) type { - const Entry = struct { - key: key.Element, - value: Element, - }; +/// +/// Hash type used by tables and their associated structures. +/// +pub const Hash = u64; + +/// +/// Returns a table type of `Key`-`Value` pairs implementing a hash-only approach to key-value storage. +/// +/// Entries are hashed using the `keyer` and collisions are resolved by looking for another empty space nearby. This +/// repeats until the load factor exceeds the implementation-defined load maximum, at which point the table will rehash +/// itself to acquire more capacity. +/// +pub fn Hashed(comptime Key: type, comptime Value: type, comptime keyer: Keyer(Key)) type { + const hash_info = @typeInfo(Hash).Int; + const load_max = 0.75; + const growth_factor = 0.6; return struct { - entries: []?Entry = &.{}, + count: usize, + table: []?Entry, + /// + /// Key-value pair bundling. + /// + pub const Entry = struct { + key: Key, + value: Value, + + /// + /// Attempts to write `self` into `entry_table`, returning `true` if no identical entry already existed, + /// otherwise `false`. + /// + /// Note that this does not modify the memory pointed to by `entry_table` in any way, meaning that + /// completely filled entry tables cannot perform the write at all and will invoke safety-checked behavior. + /// + fn write_into(self: Entry, entry_table: []?Entry) bool { + const hash_max = math.min(math.max_int(hash_info), entry_table.len); + var hashed_key = math.wrap(keyer.hasher(self.key), math.min_int(hash_info), hash_max); + var iterations = @as(usize, 0); + + while (true) : (iterations += 1) { + debug.assert(iterations < entry_table.len); + + const table_entry = &(entry_table[hashed_key] orelse { + entry_table[hashed_key] = .{ + .key = self.key, + .value = self.value, + }; + + return true; + }); + + if (keyer.comparer(table_entry.key, self.key) == 0) { + return false; + } + + hashed_key = (hashed_key +% 1) % hash_max; + } + } + }; + + /// + /// Table type. + /// const Self = @This(); - pub fn assign(self: *Self, allocator: io.Allocator, key_element: key.Element, value_element: Element) io.AllocationError!void { - // TODO: Implement. - _ = self; - _ = allocator; - _ = key_element; - _ = value_element; + /// + /// Attempts to write the `key`-`value` pair into `self`, using `allocator` as the memory allocation strategy, + /// and overwriting any value stored with a matching `key` and returning it if one existed. + /// + /// The function returns [AllocationError] instead if `allocator` cannot commit the memory required to grow the + /// entry table of `self` when necessary. + /// + /// *Note* `allocator` must reference the same allocation strategy as the one originally used to initialize + /// `self`. + /// + pub fn assign(self: *Self, allocator: io.Allocator, key: Key, value: Value) io.AllocationError!?Entry { + if (self.calculate_load_factor() >= load_max) { + const growth_size = @intToFloat(f64, self.table.len) * growth_factor; + + if (growth_size > math.max_int(@typeInfo(usize).Int)) { + return error.OutOfMemory; + } + + try self.rehash(allocator, @floatToInt(usize, growth_size)); + } + + debug.assert(self.table.len > self.count); + + { + const hash_max = math.min(math.max_int(hash_info), self.table.len); + var hashed_key = math.wrap(keyer.hasher(key), math.min_int(hash_info), hash_max); + + while (true) { + const entry = &(self.table[hashed_key] orelse { + self.table[hashed_key] = .{ + .key = key, + .value = value, + }; + + return null; + }); + + if (keyer.comparer(entry.key, key) == 0) { + const original_entry = entry.*; + + entry.* = .{ + .key = key, + .value = value, + }; + + return original_entry; + } + + hashed_key = (hashed_key +% 1) % hash_max; + } + } + + return false; } + /// + /// Returns the calculated load factor of `self` at the moment. + /// + pub fn calculate_load_factor(self: Self) f32 { + return @intToFloat(f32, self.count) / @intToFloat(f32, self.table.len); + } + + /// + /// Clears all entries from `self`, resetting the count to `0`. + /// + /// To clean up memory allocations made by the stack and deinitialize it, see [deinit] instead. + /// pub fn clear(self: *Self) void { - // TODO: Implement. - _ = self; + for (self.table) |*entry| { + entry.* = null; + } + + self.count = 0; } - pub fn deinit(self: *Self, allocator: io.MemoryAllocator) void { - // TODO: Implement. - _ = self; - _ = allocator; + /// + /// Deinitializes `self` and sets it to an invalid state, freeing all memory allocated by `allocator`. + /// + /// To clear all items from the table while preserving the current capacity, see [clear] instead. + /// + /// *Note* `allocator` must reference the same allocation strategy as the one originally used to initialize + /// `self`. + /// + pub fn deinit(self: *Self, allocator: io.Allocator) void { + io.deallocate(allocator, self.table); + + self.table = &.{}; + self.count = 0; } - pub fn insert(self: *Self, key_element: key.Element, value_element: Element) io.AllocationError!bool { - // TODO: Implement. - _ = self; - _ = key_element; - _ = value_element; + /// + /// Attempts to allocate and return an empty table with an implementation-defined initial capacity using + /// `allocator` as the memory allocation strategy. + /// + /// The function returns [AllocationError] instead if `allocator` cannot commit the memory required for the + /// table capcity size. + /// + pub fn init(allocator: io.Allocator) io.AllocationError!Self { + const table = try io.allocate_many(?Entry, 4, allocator); + + errdefer io.deallocate(allocator, table); + + for (table) |*entry| { + entry.* = null; + } + + return Self{ + .table = table, + .count = 0, + }; } - pub fn lookup(self: Self, key_element: key.Element) ?Element { - // TODO: Implement. - _ = self; - _ = key_element; + /// + /// Attempts to write the `key`-`value` pair into `self`, using `allocator` as the memory allocation strategy, + /// if no value already exists with a matching `key`, returning `true` if it was inserted, otherwise `false`. + /// + /// The function returns [AllocationError] instead if `allocator` cannot commit the memory required to grow the + /// entry table of `self` when necessary. + /// + /// *Note* `allocator` must reference the same allocation strategy as the one originally used to initialize + /// `self`. + /// + pub fn insert(self: *Self, allocator: io.Allocator, key: Key, value: Value) io.AllocationError!bool { + if (self.calculate_load_factor() >= load_max) { + const growth_size = @intToFloat(f64, self.table.len) * growth_factor; + + if (growth_size > math.max_int(@typeInfo(usize).Int)) { + return error.OutOfMemory; + } + + try self.rehash(allocator, @floatToInt(usize, growth_size)); + } + + debug.assert(self.table.len > self.count); + + return (Entry{ + .key = key, + .value = value, + }).write_into(self.table); + } + + /// + /// Attempts to find an entry in `self` matching `key`, returning it or `null` if no matching entry was found. + /// + pub fn lookup(self: Self, key: Key) ?Value { + if (self.count == 0) { + return null; + } + + { + const hash_max = math.min(math.max_int(hash_info), self.table.len); + var hashed_key = math.wrap(keyer.hasher(key), math.min_int(hash_info), hash_max); + + while (true) { + const entry = &(self.table[hashed_key] orelse return null); + + if (keyer.comparer(entry.key, key) == 0) { + return entry.value; + } + + hashed_key = (hashed_key +% 1) % hash_max; + } + } return null; } + + /// + /// Attempts to reallocate and regenerate the table capacity in `self` using `allocator` to be equal to or + /// greater than `requested_range`, returning [io.AllocationError] if `allocator` cannot commit the memory + /// required for the table capacity size. + /// + /// *Note* `allocator` must reference the same allocation strategy as the one originally used to initialize + /// `self`. + /// + pub fn rehash(self: *Self, allocator: io.Allocator, requested_range: usize) io.AllocationError!void { + const old_table = self.table; + + self.table = try io.allocate_many(?Entry, math.max(requested_range, self.count), allocator); + + errdefer { + io.deallocate(allocator, self.table); + + self.table = old_table; + } + + for (self.table) |*entry| { + entry.* = null; + } + + for (old_table) |maybe_entry| { + if (maybe_entry) |entry| { + debug.assert(entry.write_into(self.table)); + } + } + + io.deallocate(allocator, old_table); + } }; } -pub const Key = struct { - Element: type, -}; - -pub fn unsigned_key(comptime bits: comptime_int) Key { - return .{ - .Element = math.Unsigned(bits), +/// +/// Returns a function group for defining table keying operations performable on `Key`. +/// +pub fn Keyer(comptime Key: type) type { + return struct { + hasher: fn (key: Key) Hash, + comparer: fn (key_a: Key, key_b: Key) isize, }; } -pub const string_key = Key{ - .Element = []const u8, +/// +/// A standard [Keyer] for `[]const u8` types that provides general-purpose string keying. +/// +pub const string_keyer = Keyer([]const u8){ + .hasher = hash_string, + .comparer = io.compare, }; + +/// +/// Returns a general-purpose, non-cryptographically safe hash value for `string`. +/// +pub fn hash_string(string: []const u8) Hash { + var hash_code = @as(Hash, 5381); + + for (string) |byte| { + hash_code = ((hash_code << 5) + hash_code) + byte; + } + + return hash_code; +}