From 033227b24319f8fdac46ae67dd57e059be923cf3 Mon Sep 17 00:00:00 2001 From: kayomn Date: Mon, 17 Oct 2022 15:48:56 +0100 Subject: [PATCH] Replace UTF-8 compares with standard memory compares --- src/ona/io.zig | 37 +++++++++++++++++++++++++++++++++++++ src/ona/table.zig | 35 +++++++---------------------------- src/ona/utf8.zig | 29 ----------------------------- 3 files changed, 44 insertions(+), 57 deletions(-) delete mode 100644 src/ona/utf8.zig diff --git a/src/ona/io.zig b/src/ona/io.zig index 7c8bee5..ca74716 100644 --- a/src/ona/io.zig +++ b/src/ona/io.zig @@ -286,6 +286,43 @@ pub const Writer = struct { } }; +/// +/// Returns `true` if `this_bytes` is the same length and contains the same data as `that_bytes`, +/// otherwise `false`. +/// +pub fn equalsBytes(this_bytes: []const u8, that_bytes: []const u8) bool { + return std.mem.eql(u8, this_bytes, that_bytes); +} + +test "Equivalence of bytes" { + const bytes_sequence = &.{69, 42, 0}; + const testing = std.testing; + + try testing.expect(equalsBytes(bytes_sequence, bytes_sequence)); + try testing.expect(!equalsBytes(bytes_sequence, &.{69, 42})); +} + +/// +/// Returns a deterministic hash code compiled from each byte in `bytes`. +/// +/// **Note** that this operation has `O(n)` time complexity. +/// +pub fn hashBytes(bytes: []const u8) usize { + var hash = @as(usize, 5381); + + for (bytes) |byte| hash = ((hash << 5) + hash) + byte; + + return hash; +} + +test "Hashing bytes" { + const bytes_sequence = &.{69, 42, 0}; + const testing = std.testing; + + try testing.expect(hashBytes(bytes_sequence) == hashBytes(bytes_sequence)); + try testing.expect(hashBytes(bytes_sequence) != hashBytes(&.{69, 42})); +} + /// /// Writer that silently throws consumed data away and never fails. /// diff --git a/src/ona/table.zig b/src/ona/table.zig index 040a4f7..b844766 100644 --- a/src/ona/table.zig +++ b/src/ona/table.zig @@ -173,35 +173,14 @@ pub fn KeyContext(comptime Key: type) type { }; } -/// -/// Tests if the contents of `this_string` lexically equals the contents of `that_string`. -/// -fn equalsString(this_string: []const u8, that_string: []const u8) bool { - return std.mem.eql(u8, this_string, that_string); -} - -/// -/// Hashes `string` into a hash value of `usize`. -/// -fn hashString(string: []const u8) usize { - var hash = @as(usize, 5381); - - for (string) |byte| hash = ((hash << 5) + hash) + byte; - - return hash; -} - -/// -/// A [KeyContext] for handling `[]const u8` types. -/// -pub const string_context = KeyContext([]const u8){ - .hash = hashString, - .equals = equalsString, -}; - -test "Hashed table manipulation with string context" { +test "Hashed table manipulation with bytes context" { const testing = std.testing; - var table = try Hashed([]const u8, u32, string_context).init(testing.allocator); + const io = @import("./io.zig"); + + var table = try Hashed([]const u8, u32, .{ + .equals = io.equalsBytes, + .hash = io.hashBytes, + }).init(testing.allocator); defer table.deinit(); diff --git a/src/ona/utf8.zig b/src/ona/utf8.zig deleted file mode 100644 index 352a9d6..0000000 --- a/src/ona/utf8.zig +++ /dev/null @@ -1,29 +0,0 @@ -const std = @import("std"); -const table = @import("./table.zig"); - -/// -/// Tests if the contents of `this_utf8_sequence` lexically equals the contents of -/// `that_utf8_sequence`. -/// -pub fn equals(this_utf8_sequence: []const u8, that_utf8_sequence: []const u8) bool { - return std.mem.eql(u8, this_utf8_sequence, that_utf8_sequence); -} - -/// -/// Returns a deterministic hash for `utf8_sequence`. -/// -pub fn hash(utf8_sequence: []const u8) usize { - var utf8_hash = @as(usize, 5381); - - for (utf8_sequence) |utf8_code| utf8_hash = ((utf8_hash << 5) + utf8_hash) + utf8_code; - - return utf8_hash; -} - -/// -/// A [table.KeyContext] for handling UTF-8 character sequences. -/// -pub const key_context = table.KeyContext([]const u8){ - .hash = hash, - .equals = equals, -};