Implement hash table type in Coral
This commit is contained in:
parent
b94e03f829
commit
a0c30163a2
|
@ -1,64 +1,303 @@
|
||||||
|
const debug = @import("./debug.zig");
|
||||||
|
|
||||||
const io = @import("./io.zig");
|
const io = @import("./io.zig");
|
||||||
|
|
||||||
const math = @import("./math.zig");
|
const math = @import("./math.zig");
|
||||||
|
|
||||||
pub fn Hashed(comptime key: Key, comptime Element: type) type {
|
///
|
||||||
const Entry = struct {
|
/// Hash type used by tables and their associated structures.
|
||||||
key: key.Element,
|
///
|
||||||
value: Element,
|
pub const Hash = u64;
|
||||||
};
|
|
||||||
|
///
|
||||||
|
/// Returns a table type of `Key`-`Value` pairs implementing a hash-only approach to key-value storage.
|
||||||
|
///
|
||||||
|
/// Entries are hashed using the `keyer` and collisions are resolved by looking for another empty space nearby. This
|
||||||
|
/// repeats until the load factor exceeds the implementation-defined load maximum, at which point the table will rehash
|
||||||
|
/// itself to acquire more capacity.
|
||||||
|
///
|
||||||
|
pub fn Hashed(comptime Key: type, comptime Value: type, comptime keyer: Keyer(Key)) type {
|
||||||
|
const hash_info = @typeInfo(Hash).Int;
|
||||||
|
const load_max = 0.75;
|
||||||
|
const growth_factor = 0.6;
|
||||||
|
|
||||||
return struct {
|
return struct {
|
||||||
entries: []?Entry = &.{},
|
count: usize,
|
||||||
|
table: []?Entry,
|
||||||
|
|
||||||
|
///
|
||||||
|
/// Key-value pair bundling.
|
||||||
|
///
|
||||||
|
pub const Entry = struct {
|
||||||
|
key: Key,
|
||||||
|
value: Value,
|
||||||
|
|
||||||
|
///
|
||||||
|
/// Attempts to write `self` into `entry_table`, returning `true` if no identical entry already existed,
|
||||||
|
/// otherwise `false`.
|
||||||
|
///
|
||||||
|
/// Note that this does not modify the memory pointed to by `entry_table` in any way, meaning that
|
||||||
|
/// completely filled entry tables cannot perform the write at all and will invoke safety-checked behavior.
|
||||||
|
///
|
||||||
|
fn write_into(self: Entry, entry_table: []?Entry) bool {
|
||||||
|
const hash_max = math.min(math.max_int(hash_info), entry_table.len);
|
||||||
|
var hashed_key = math.wrap(keyer.hasher(self.key), math.min_int(hash_info), hash_max);
|
||||||
|
var iterations = @as(usize, 0);
|
||||||
|
|
||||||
|
while (true) : (iterations += 1) {
|
||||||
|
debug.assert(iterations < entry_table.len);
|
||||||
|
|
||||||
|
const table_entry = &(entry_table[hashed_key] orelse {
|
||||||
|
entry_table[hashed_key] = .{
|
||||||
|
.key = self.key,
|
||||||
|
.value = self.value,
|
||||||
|
};
|
||||||
|
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (keyer.comparer(table_entry.key, self.key) == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
hashed_key = (hashed_key +% 1) % hash_max;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
///
|
||||||
|
/// Table type.
|
||||||
|
///
|
||||||
const Self = @This();
|
const Self = @This();
|
||||||
|
|
||||||
pub fn assign(self: *Self, allocator: io.Allocator, key_element: key.Element, value_element: Element) io.AllocationError!void {
|
///
|
||||||
// TODO: Implement.
|
/// Attempts to write the `key`-`value` pair into `self`, using `allocator` as the memory allocation strategy,
|
||||||
_ = self;
|
/// and overwriting any value stored with a matching `key` and returning it if one existed.
|
||||||
_ = allocator;
|
///
|
||||||
_ = key_element;
|
/// The function returns [AllocationError] instead if `allocator` cannot commit the memory required to grow the
|
||||||
_ = value_element;
|
/// entry table of `self` when necessary.
|
||||||
|
///
|
||||||
|
/// *Note* `allocator` must reference the same allocation strategy as the one originally used to initialize
|
||||||
|
/// `self`.
|
||||||
|
///
|
||||||
|
pub fn assign(self: *Self, allocator: io.Allocator, key: Key, value: Value) io.AllocationError!?Entry {
|
||||||
|
if (self.calculate_load_factor() >= load_max) {
|
||||||
|
const growth_size = @intToFloat(f64, self.table.len) * growth_factor;
|
||||||
|
|
||||||
|
if (growth_size > math.max_int(@typeInfo(usize).Int)) {
|
||||||
|
return error.OutOfMemory;
|
||||||
|
}
|
||||||
|
|
||||||
|
try self.rehash(allocator, @floatToInt(usize, growth_size));
|
||||||
|
}
|
||||||
|
|
||||||
|
debug.assert(self.table.len > self.count);
|
||||||
|
|
||||||
|
{
|
||||||
|
const hash_max = math.min(math.max_int(hash_info), self.table.len);
|
||||||
|
var hashed_key = math.wrap(keyer.hasher(key), math.min_int(hash_info), hash_max);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const entry = &(self.table[hashed_key] orelse {
|
||||||
|
self.table[hashed_key] = .{
|
||||||
|
.key = key,
|
||||||
|
.value = value,
|
||||||
|
};
|
||||||
|
|
||||||
|
return null;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (keyer.comparer(entry.key, key) == 0) {
|
||||||
|
const original_entry = entry.*;
|
||||||
|
|
||||||
|
entry.* = .{
|
||||||
|
.key = key,
|
||||||
|
.value = value,
|
||||||
|
};
|
||||||
|
|
||||||
|
return original_entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
hashed_key = (hashed_key +% 1) % hash_max;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///
|
||||||
|
/// Returns the calculated load factor of `self` at the moment.
|
||||||
|
///
|
||||||
|
pub fn calculate_load_factor(self: Self) f32 {
|
||||||
|
return @intToFloat(f32, self.count) / @intToFloat(f32, self.table.len);
|
||||||
|
}
|
||||||
|
|
||||||
|
///
|
||||||
|
/// Clears all entries from `self`, resetting the count to `0`.
|
||||||
|
///
|
||||||
|
/// To clean up memory allocations made by the stack and deinitialize it, see [deinit] instead.
|
||||||
|
///
|
||||||
pub fn clear(self: *Self) void {
|
pub fn clear(self: *Self) void {
|
||||||
// TODO: Implement.
|
for (self.table) |*entry| {
|
||||||
_ = self;
|
entry.* = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn deinit(self: *Self, allocator: io.MemoryAllocator) void {
|
///
|
||||||
// TODO: Implement.
|
/// Deinitializes `self` and sets it to an invalid state, freeing all memory allocated by `allocator`.
|
||||||
_ = self;
|
///
|
||||||
_ = allocator;
|
/// To clear all items from the table while preserving the current capacity, see [clear] instead.
|
||||||
|
///
|
||||||
|
/// *Note* `allocator` must reference the same allocation strategy as the one originally used to initialize
|
||||||
|
/// `self`.
|
||||||
|
///
|
||||||
|
pub fn deinit(self: *Self, allocator: io.Allocator) void {
|
||||||
|
io.deallocate(allocator, self.table);
|
||||||
|
|
||||||
|
self.table = &.{};
|
||||||
|
self.count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn insert(self: *Self, key_element: key.Element, value_element: Element) io.AllocationError!bool {
|
///
|
||||||
// TODO: Implement.
|
/// Attempts to allocate and return an empty table with an implementation-defined initial capacity using
|
||||||
_ = self;
|
/// `allocator` as the memory allocation strategy.
|
||||||
_ = key_element;
|
///
|
||||||
_ = value_element;
|
/// The function returns [AllocationError] instead if `allocator` cannot commit the memory required for the
|
||||||
|
/// table capcity size.
|
||||||
|
///
|
||||||
|
pub fn init(allocator: io.Allocator) io.AllocationError!Self {
|
||||||
|
const table = try io.allocate_many(?Entry, 4, allocator);
|
||||||
|
|
||||||
|
errdefer io.deallocate(allocator, table);
|
||||||
|
|
||||||
|
for (table) |*entry| {
|
||||||
|
entry.* = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return Self{
|
||||||
|
.table = table,
|
||||||
|
.count = 0,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn lookup(self: Self, key_element: key.Element) ?Element {
|
///
|
||||||
// TODO: Implement.
|
/// Attempts to write the `key`-`value` pair into `self`, using `allocator` as the memory allocation strategy,
|
||||||
_ = self;
|
/// if no value already exists with a matching `key`, returning `true` if it was inserted, otherwise `false`.
|
||||||
_ = key_element;
|
///
|
||||||
|
/// The function returns [AllocationError] instead if `allocator` cannot commit the memory required to grow the
|
||||||
|
/// entry table of `self` when necessary.
|
||||||
|
///
|
||||||
|
/// *Note* `allocator` must reference the same allocation strategy as the one originally used to initialize
|
||||||
|
/// `self`.
|
||||||
|
///
|
||||||
|
pub fn insert(self: *Self, allocator: io.Allocator, key: Key, value: Value) io.AllocationError!bool {
|
||||||
|
if (self.calculate_load_factor() >= load_max) {
|
||||||
|
const growth_size = @intToFloat(f64, self.table.len) * growth_factor;
|
||||||
|
|
||||||
|
if (growth_size > math.max_int(@typeInfo(usize).Int)) {
|
||||||
|
return error.OutOfMemory;
|
||||||
|
}
|
||||||
|
|
||||||
|
try self.rehash(allocator, @floatToInt(usize, growth_size));
|
||||||
|
}
|
||||||
|
|
||||||
|
debug.assert(self.table.len > self.count);
|
||||||
|
|
||||||
|
return (Entry{
|
||||||
|
.key = key,
|
||||||
|
.value = value,
|
||||||
|
}).write_into(self.table);
|
||||||
|
}
|
||||||
|
|
||||||
|
///
|
||||||
|
/// Attempts to find an entry in `self` matching `key`, returning it or `null` if no matching entry was found.
|
||||||
|
///
|
||||||
|
pub fn lookup(self: Self, key: Key) ?Value {
|
||||||
|
if (self.count == 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const hash_max = math.min(math.max_int(hash_info), self.table.len);
|
||||||
|
var hashed_key = math.wrap(keyer.hasher(key), math.min_int(hash_info), hash_max);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
const entry = &(self.table[hashed_key] orelse return null);
|
||||||
|
|
||||||
|
if (keyer.comparer(entry.key, key) == 0) {
|
||||||
|
return entry.value;
|
||||||
|
}
|
||||||
|
|
||||||
|
hashed_key = (hashed_key +% 1) % hash_max;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///
|
||||||
|
/// Attempts to reallocate and regenerate the table capacity in `self` using `allocator` to be equal to or
|
||||||
|
/// greater than `requested_range`, returning [io.AllocationError] if `allocator` cannot commit the memory
|
||||||
|
/// required for the table capacity size.
|
||||||
|
///
|
||||||
|
/// *Note* `allocator` must reference the same allocation strategy as the one originally used to initialize
|
||||||
|
/// `self`.
|
||||||
|
///
|
||||||
|
pub fn rehash(self: *Self, allocator: io.Allocator, requested_range: usize) io.AllocationError!void {
|
||||||
|
const old_table = self.table;
|
||||||
|
|
||||||
|
self.table = try io.allocate_many(?Entry, math.max(requested_range, self.count), allocator);
|
||||||
|
|
||||||
|
errdefer {
|
||||||
|
io.deallocate(allocator, self.table);
|
||||||
|
|
||||||
|
self.table = old_table;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (self.table) |*entry| {
|
||||||
|
entry.* = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (old_table) |maybe_entry| {
|
||||||
|
if (maybe_entry) |entry| {
|
||||||
|
debug.assert(entry.write_into(self.table));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
io.deallocate(allocator, old_table);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub const Key = struct {
|
///
|
||||||
Element: type,
|
/// Returns a function group for defining table keying operations performable on `Key`.
|
||||||
};
|
///
|
||||||
|
pub fn Keyer(comptime Key: type) type {
|
||||||
pub fn unsigned_key(comptime bits: comptime_int) Key {
|
return struct {
|
||||||
return .{
|
hasher: fn (key: Key) Hash,
|
||||||
.Element = math.Unsigned(bits),
|
comparer: fn (key_a: Key, key_b: Key) isize,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub const string_key = Key{
|
///
|
||||||
.Element = []const u8,
|
/// A standard [Keyer] for `[]const u8` types that provides general-purpose string keying.
|
||||||
|
///
|
||||||
|
pub const string_keyer = Keyer([]const u8){
|
||||||
|
.hasher = hash_string,
|
||||||
|
.comparer = io.compare,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
///
|
||||||
|
/// Returns a general-purpose, non-cryptographically safe hash value for `string`.
|
||||||
|
///
|
||||||
|
pub fn hash_string(string: []const u8) Hash {
|
||||||
|
var hash_code = @as(Hash, 5381);
|
||||||
|
|
||||||
|
for (string) |byte| {
|
||||||
|
hash_code = ((hash_code << 5) + hash_code) + byte;
|
||||||
|
}
|
||||||
|
|
||||||
|
return hash_code;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue