const debug = @import("./debug.zig"); const io = @import("./io.zig"); const math = @import("./math.zig"); /// /// Hash type used by tables and their associated structures. /// pub const Hash = u64; /// /// Returns a table type of `Key`-`Value` pairs implementing a hash-only approach to key-value storage. /// /// Entries are hashed using the `keyer` and collisions are resolved by looking for another empty space nearby. This /// repeats until the load factor exceeds the implementation-defined load maximum, at which point the table will rehash /// itself to acquire more capacity. /// pub fn Hashed(comptime Key: type, comptime Value: type, comptime keyer: Keyer(Key)) type { const hash_info = @typeInfo(Hash).Int; const load_max = 0.75; const growth_factor = 0.6; return struct { count: usize = 0, table: []?Entry = &.{}, /// /// Key-value pair bundling. /// pub const Entry = struct { key: Key, value: Value, /// /// Attempts to write `self` into `entry_table`, returning `true` if no identical entry already existed, /// otherwise `false`. /// /// Note that this does not modify the memory pointed to by `entry_table` in any way, meaning that /// completely filled entry tables cannot perform the write at all and will invoke safety-checked behavior. /// fn write_into(self: Entry, entry_table: []?Entry) bool { const hash_max = math.min(math.max_int(hash_info), entry_table.len); var hashed_key = math.wrap(keyer.hasher(self.key), math.min_int(hash_info), hash_max); var iterations = @as(usize, 0); while (true) : (iterations += 1) { debug.assert(iterations < entry_table.len); const table_entry = &(entry_table[hashed_key] orelse { entry_table[hashed_key] = .{ .key = self.key, .value = self.value, }; return true; }); if (keyer.comparer(table_entry.key, self.key) == 0) { return false; } hashed_key = (hashed_key +% 1) % hash_max; } } }; /// /// Table type. /// const Self = @This(); /// /// Attempts to write the `key`-`value` pair into `self`, using `allocator` as the memory allocation strategy, /// and overwriting any value stored with a matching `key` and returning it if one existed. /// /// The function returns [AllocationError] instead if `allocator` cannot commit the memory required to grow the /// entry table of `self` when necessary. /// /// *Note* `allocator` must reference the same allocation strategy as the one originally used to initialize /// `self`. /// pub fn assign(self: *Self, allocator: io.Allocator, key: Key, value: Value) io.AllocationError!?Entry { if (self.calculate_load_factor() >= load_max) { const growth_size = @intToFloat(f64, math.max(1, self.table.len)) * growth_factor; if (growth_size > math.max_int(@typeInfo(usize).Int)) { return error.OutOfMemory; } try self.rehash(allocator, @floatToInt(usize, growth_size)); } debug.assert(self.table.len > self.count); { const hash_max = math.min(math.max_int(hash_info), self.table.len); var hashed_key = math.wrap(keyer.hasher(key), math.min_int(hash_info), hash_max); while (true) { const entry = &(self.table[hashed_key] orelse { self.table[hashed_key] = .{ .key = key, .value = value, }; return null; }); if (keyer.comparer(entry.key, key) == 0) { const original_entry = entry.*; entry.* = .{ .key = key, .value = value, }; return original_entry; } hashed_key = (hashed_key +% 1) % hash_max; } } return false; } /// /// Returns the calculated load factor of `self` at the moment. /// pub fn calculate_load_factor(self: Self) f32 { return if (self.table.len == 0) 1 else @intToFloat(f32, self.count) / @intToFloat(f32, self.table.len); } /// /// Clears all entries from `self`, resetting the count to `0`. /// /// To clean up memory allocations made by the stack and deinitialize it, see [deinit] instead. /// pub fn clear(self: *Self) void { for (self.table) |*entry| { entry.* = null; } self.count = 0; } /// /// Deinitializes `self` and sets it to an invalid state, freeing all memory allocated by `allocator`. /// /// To clear all items from the table while preserving the current capacity, see [clear] instead. /// /// *Note* `allocator` must reference the same allocation strategy as the one originally used to initialize /// `self`. /// pub fn deinit(self: *Self, allocator: io.Allocator) void { if (self.table.len == 0) { return; } io.deallocate(allocator, self.table); self.table = &.{}; self.count = 0; } /// /// Attempts to write the `key`-`value` pair into `self`, using `allocator` as the memory allocation strategy, /// if no value already exists with a matching `key`, returning `true` if it was inserted, otherwise `false`. /// /// The function returns [AllocationError] instead if `allocator` cannot commit the memory required to grow the /// entry table of `self` when necessary. /// /// *Note* `allocator` must reference the same allocation strategy as the one originally used to initialize /// `self`. /// pub fn insert(self: *Self, allocator: io.Allocator, key: Key, value: Value) io.AllocationError!bool { if (self.calculate_load_factor() >= load_max) { const growth_amount = @intToFloat(f64, self.table.len) * growth_factor; const min_size = 1; try self.rehash(allocator, self.table.len + math.max(min_size, @floatToInt(usize, growth_amount))); } debug.assert(self.table.len > self.count); defer self.count += 1; return (Entry{ .key = key, .value = value, }).write_into(self.table); } /// /// Attempts to find an entry in `self` matching `key`, returning it or `null` if no matching entry was found. /// pub fn lookup(self: Self, key: Key) ?Value { if (self.count == 0) { return null; } const hash_max = math.min(math.max_int(hash_info), self.table.len); var hashed_key = math.wrap(keyer.hasher(key), math.min_int(hash_info), hash_max); var iterations = @as(usize, 0); while (iterations < self.count) : (iterations += 1) { const entry = &(self.table[hashed_key] orelse return null); if (keyer.comparer(entry.key, key) == 0) { return entry.value; } hashed_key = (hashed_key +% 1) % hash_max; } return null; } /// /// Attempts to reallocate and regenerate the table capacity in `self` using `allocator` to be equal to or /// greater than `requested_range`, returning [io.AllocationError] if `allocator` cannot commit the memory /// required for the table capacity size. /// /// *Note* `allocator` must reference the same allocation strategy as the one originally used to initialize /// `self`. /// pub fn rehash(self: *Self, allocator: io.Allocator, requested_range: usize) io.AllocationError!void { const old_table = self.table; self.table = try io.allocate_many(allocator, math.max(requested_range, self.count), ?Entry); errdefer { io.deallocate(allocator, self.table); self.table = old_table; } for (self.table) |*entry| { entry.* = null; } if (old_table.len != 0) { for (old_table) |maybe_entry| { if (maybe_entry) |entry| { debug.assert(entry.write_into(self.table)); } } io.deallocate(allocator, old_table); } } }; } /// /// Returns a function group for defining table keying operations performable on `Key`. /// pub fn Keyer(comptime Key: type) type { return struct { hasher: fn (key: Key) Hash, comparer: fn (key_a: Key, key_b: Key) isize, }; } /// /// A standard [Keyer] for `[]const u8` types that provides general-purpose string keying. /// pub const string_keyer = Keyer([]const u8){ .hasher = hash_string, .comparer = io.compare, }; /// /// Returns a general-purpose, non-cryptographically safe hash value for `string`. /// pub fn hash_string(string: []const u8) Hash { var hash_code = @as(Hash, 5381); for (string) |byte| { hash_code = ((hash_code << 5) + hash_code) + byte; } return hash_code; }