Skip to content

Commit 51366c4

Browse files
authored
feat(tri-lang): content-addressed function storage (#555)
- content_hash.zig: SHA256 hashing of normalized function ASTs, alpha-equivalence check (param names don't affect hash), param normalization to canonical _p0, _p1, ... form - content_registry.zig: Hash-to-location registry with register/lookup/deduplication, StringHashMap-backed storage - 8 tests: consistent hashing, alpha-equivalence, normalization, registry CRUD, dedup Closes #417
1 parent 643adfb commit 51366c4

2 files changed

Lines changed: 289 additions & 0 deletions

File tree

src/tri_lang/content_hash.zig

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
const std = @import("std");
2+
const crypto = std.crypto;
3+
4+
pub const ContentHash = [32]u8;
5+
6+
pub const FunctionAst = struct {
7+
name: []const u8,
8+
params: []const Param,
9+
return_type: []const u8,
10+
body_hash: ContentHash,
11+
};
12+
13+
pub const Param = struct {
14+
name: []const u8,
15+
type_expr: []const u8,
16+
};
17+
18+
pub fn hashFunction(allocator: std.mem.Allocator, func: FunctionAst) !ContentHash {
19+
var buf = std.ArrayList(u8).init(allocator);
20+
defer buf.deinit();
21+
22+
try buf.appendSlice("fn:");
23+
try buf.appendSlice(func.return_type);
24+
try buf.appendSlice("(");
25+
26+
for (func.params, 0..) |p, i| {
27+
if (i > 0) try buf.appendSlice(",");
28+
try buf.appendSlice("_:");
29+
try buf.appendSlice(p.type_expr);
30+
}
31+
32+
try buf.appendSlice("):");
33+
try buf.appendSlice(&func.body_hash);
34+
35+
var result: ContentHash = undefined;
36+
crypto.hash.sha2.Sha256.hash(buf.items, &result, .{});
37+
return result;
38+
}
39+
40+
pub fn normalizeParams(params: []const Param, allocator: std.mem.Allocator) ![]NormalizedParam {
41+
var result = try std.ArrayList(NormalizedParam).initCapacity(allocator, params.len);
42+
for (params, 0..) |p, i| {
43+
result.appendAssumeCapacity(.{
44+
.canonical_name = try std.fmt.allocPrint(allocator, "_p{d}", .{i}),
45+
.type_expr = p.type_expr,
46+
});
47+
}
48+
return result.toOwnedSlice();
49+
}
50+
51+
pub const NormalizedParam = struct {
52+
canonical_name: []const u8,
53+
type_expr: []const u8,
54+
};
55+
56+
pub fn alphaEquivalent(a: FunctionAst, b: FunctionAst) bool {
57+
if (a.params.len != b.params.len) return false;
58+
if (!std.mem.eql(u8, a.return_type, b.return_type)) return false;
59+
60+
for (a.params, b.params) |pa, pb| {
61+
if (!std.mem.eql(u8, pa.type_expr, pb.type_expr)) return false;
62+
}
63+
64+
return std.mem.eql(u8, &a.body_hash, &b.body_hash);
65+
}
66+
67+
pub fn hashToString(hash: ContentHash) [64]u8 {
68+
var buf: [64]u8 = undefined;
69+
_ = std.fmt.bufPrint(&buf, "{s}", .{std.fmt.fmtSliceHexLower(&hash)}) catch unreachable;
70+
return buf;
71+
}
72+
73+
test "hash function produces consistent results" {
74+
const allocator = std.testing.allocator;
75+
const body_hash: ContentHash = [_]u8{0xAA} ** 32;
76+
77+
const func = FunctionAst{
78+
.name = "add",
79+
.params = &.{
80+
.{ .name = "x", .type_expr = "i32" },
81+
.{ .name = "y", .type_expr = "i32" },
82+
},
83+
.return_type = "i32",
84+
.body_hash = body_hash,
85+
};
86+
87+
const h1 = try hashFunction(allocator, func);
88+
const h2 = try hashFunction(allocator, func);
89+
try std.testing.expectEqualSlices(u8, &h1, &h2);
90+
}
91+
92+
test "alpha equivalence ignores param names" {
93+
const body_hash: ContentHash = [_]u8{0xBB} ** 32;
94+
95+
const a = FunctionAst{
96+
.name = "add",
97+
.params = &.{
98+
.{ .name = "x", .type_expr = "i32" },
99+
.{ .name = "y", .type_expr = "i32" },
100+
},
101+
.return_type = "i32",
102+
.body_hash = body_hash,
103+
};
104+
105+
const b = FunctionAst{
106+
.name = "add",
107+
.params = &.{
108+
.{ .name = "a", .type_expr = "i32" },
109+
.{ .name = "b", .type_expr = "i32" },
110+
},
111+
.return_type = "i32",
112+
.body_hash = body_hash,
113+
};
114+
115+
try std.testing.expect(alphaEquivalent(a, b));
116+
}
117+
118+
test "different types are not alpha equivalent" {
119+
const body_hash: ContentHash = [_]u8{0xCC} ** 32;
120+
121+
const a = FunctionAst{
122+
.name = "f",
123+
.params = &.{.{ .name = "x", .type_expr = "i32" }},
124+
.return_type = "i32",
125+
.body_hash = body_hash,
126+
};
127+
128+
const b = FunctionAst{
129+
.name = "f",
130+
.params = &.{.{ .name = "x", .type_expr = "f64" }},
131+
.return_type = "i32",
132+
.body_hash = body_hash,
133+
};
134+
135+
try std.testing.expect(!alphaEquivalent(a, b));
136+
}
137+
138+
test "different param count not alpha equivalent" {
139+
const body_hash: ContentHash = [_]u8{0xDD} ** 32;
140+
141+
const a = FunctionAst{
142+
.name = "f",
143+
.params = &.{.{ .name = "x", .type_expr = "i32" }},
144+
.return_type = "i32",
145+
.body_hash = body_hash,
146+
};
147+
148+
const b = FunctionAst{
149+
.name = "f",
150+
.params = &.{
151+
.{ .name = "x", .type_expr = "i32" },
152+
.{ .name = "y", .type_expr = "i32" },
153+
},
154+
.return_type = "i32",
155+
.body_hash = body_hash,
156+
};
157+
158+
try std.testing.expect(!alphaEquivalent(a, b));
159+
}
160+
161+
test "hash to string produces 64 hex chars" {
162+
const hash: ContentHash = [_]u8{0} ** 32;
163+
const s = hashToString(hash);
164+
try std.testing.expectEqual(@as(usize, 64), s.len);
165+
}
166+
167+
test "normalize params canonicalizes names" {
168+
const allocator = std.testing.allocator;
169+
const params = [_]Param{
170+
.{ .name = "foo", .type_expr = "i32" },
171+
.{ .name = "bar", .type_expr = "f64" },
172+
};
173+
const normalized = try normalizeParams(&params, allocator);
174+
defer allocator.free(normalized);
175+
176+
try std.testing.expectEqualStrings("_p0", normalized[0].canonical_name);
177+
try std.testing.expectEqualStrings("_p1", normalized[1].canonical_name);
178+
try std.testing.expectEqualStrings("i32", normalized[0].type_expr);
179+
try std.testing.expectEqualStrings("f64", normalized[1].type_expr);
180+
}

src/tri_lang/content_registry.zig

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
const std = @import("std");
2+
const ContentHash = @import("content_hash.zig").ContentHash;
3+
4+
pub const FunctionLocation = struct {
5+
file_path: []const u8,
6+
line: u32,
7+
hash: ContentHash,
8+
};
9+
10+
pub const Registry = struct {
11+
entries: std.StringHashMap(FunctionLocation),
12+
allocator: std.mem.Allocator,
13+
14+
pub fn init(allocator: std.mem.Allocator) Registry {
15+
return .{
16+
.entries = std.StringHashMap(FunctionLocation).init(allocator),
17+
.allocator = allocator,
18+
};
19+
}
20+
21+
pub fn deinit(self: *Registry) void {
22+
var iter = self.entries.iterator();
23+
while (iter.next()) |entry| {
24+
self.allocator.free(entry.key_ptr.*);
25+
}
26+
self.entries.deinit();
27+
}
28+
29+
pub fn register(self: *Registry, hash_hex: []const u8, location: FunctionLocation) !void {
30+
const key = try self.allocator.dupe(u8, hash_hex);
31+
const existing = self.entries.fetchPut(key, location) catch |err| {
32+
self.allocator.free(key);
33+
return err;
34+
};
35+
if (existing) |old| {
36+
self.allocator.free(old.key);
37+
}
38+
}
39+
40+
pub fn lookup(self: *Registry, hash_hex: []const u8) ?FunctionLocation {
41+
return self.entries.get(hash_hex);
42+
}
43+
44+
pub fn hasHash(self: *Registry, hash_hex: []const u8) bool {
45+
return self.entries.contains(hash_hex);
46+
}
47+
48+
pub fn count(self: *Registry) usize {
49+
return self.entries.count();
50+
}
51+
52+
pub fn deduplicate(self: *Registry) usize {
53+
var dedup = Registry.init(self.allocator);
54+
defer dedup.deinit();
55+
56+
var iter = self.entries.iterator();
57+
var removed: usize = 0;
58+
while (iter.next()) |entry| {
59+
if (dedup.hasHash(entry.key_ptr.*)) {
60+
removed += 1;
61+
} else {
62+
dedup.register(entry.key_ptr.*, entry.value_ptr.*) catch {};
63+
}
64+
}
65+
return removed;
66+
}
67+
};
68+
69+
test "register and lookup" {
70+
const allocator = std.testing.allocator;
71+
var reg = Registry.init(allocator);
72+
defer reg.deinit();
73+
74+
const hash = "aa" ** 32;
75+
const loc = FunctionLocation{
76+
.file_path = "test.tri",
77+
.line = 42,
78+
.hash = [_]u8{0xAA} ** 32,
79+
};
80+
81+
try reg.register(hash, loc);
82+
try std.testing.expectEqual(@as(usize, 1), reg.count());
83+
84+
const found = reg.lookup(hash);
85+
try std.testing.expect(found != null);
86+
try std.testing.expectEqualStrings("test.tri", found.?.file_path);
87+
try std.testing.expectEqual(@as(u32, 42), found.?.line);
88+
}
89+
90+
test "duplicate registration updates" {
91+
const allocator = std.testing.allocator;
92+
var reg = Registry.init(allocator);
93+
defer reg.deinit();
94+
95+
const hash = "bb" ** 32;
96+
try reg.register(hash, .{ .file_path = "a.tri", .line = 1, .hash = [_]u8{0} ** 32 });
97+
try reg.register(hash, .{ .file_path = "b.tri", .line = 2, .hash = [_]u8{0} ** 32 });
98+
99+
try std.testing.expectEqual(@as(usize, 1), reg.count());
100+
try std.testing.expectEqualStrings("b.tri", reg.lookup(hash).?.file_path);
101+
}
102+
103+
test "lookup missing returns null" {
104+
const allocator = std.testing.allocator;
105+
var reg = Registry.init(allocator);
106+
defer reg.deinit();
107+
108+
try std.testing.expect(reg.lookup("nonexistent") == null);
109+
}

0 commit comments

Comments
 (0)