Skip to content

Commit b9e650e

Browse files
committed
feat(config): adopt Trinity config (hidden=243, vocab=729-padded, GF16)
- Add src/tri/math/parameter_golf_config.zig - Full Parameter Golf 16MB model config - All dims 3^k: vocab=729, embed=243, hidden=729, ctx=81, blocks=9, heads=9, head_dim=27, ffn=729 - Square Attention theorem: ctx%head_dim==0, isPowerOf3 - Size check: embedding+blocks+lm_head under 16MB with GF16 - LAMB optimizer: lr=3e-4, clamp=10.0, stable_ratio=0.02 - 8 tests: budget fit, 3^k dims, square attention, size breakdown Closes #520 Refs: EXP-001, EXP-010, EXP-012, #64
1 parent 8b6619d commit b9e650e

1 file changed

Lines changed: 142 additions & 0 deletions

File tree

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
const std = @import("std");
2+
const sacred = @import("sacred_constants.zig");
3+
const vocab_audit = @import("vocab_audit.zig");
4+
5+
pub const PG_CONFIG = TrinityPGConfig{
6+
.vocab_size = 729,
7+
.embed_dim = 243,
8+
.hidden_dim = 729,
9+
.context_len = 81,
10+
.num_blocks = 9,
11+
.num_heads = 9,
12+
.head_dim = 27,
13+
.ffn_hidden = 729,
14+
.bits_per_weight = 2,
15+
.batch_size = 66,
16+
.lr = 3e-4,
17+
.lr_min = 1e-5,
18+
.lamb_clamp = 10.0,
19+
.stable_ratio = 0.02,
20+
.grad_clip = 1.0,
21+
};
22+
23+
pub const TrinityPGConfig = struct {
24+
vocab_size: usize,
25+
embed_dim: usize,
26+
hidden_dim: usize,
27+
context_len: usize,
28+
num_blocks: usize,
29+
num_heads: usize,
30+
head_dim: usize,
31+
ffn_hidden: usize,
32+
bits_per_weight: usize,
33+
batch_size: usize,
34+
lr: f64,
35+
lr_min: f64,
36+
lamb_clamp: f64,
37+
stable_ratio: f64,
38+
grad_clip: f64,
39+
40+
pub fn totalParams(self: TrinityPGConfig) usize {
41+
const embedding = self.vocab_size * self.embed_dim;
42+
const per_block_attn = 4 * self.embed_dim * self.embed_dim;
43+
const per_block_ffn = 3 * self.embed_dim * self.ffn_hidden;
44+
const per_block = per_block_attn + per_block_ffn;
45+
const all_blocks = self.num_blocks * per_block;
46+
const lm_head = self.vocab_size * self.embed_dim;
47+
return embedding + all_blocks + lm_head;
48+
}
49+
50+
pub fn modelSizeBytes(self: TrinityPGConfig) usize {
51+
return self.totalParams() * self.bits_per_weight / 8;
52+
}
53+
54+
pub fn modelSizeMB(self: TrinityPGConfig) f64 {
55+
return @as(f64, @floatFromInt(self.modelSizeBytes())) / (1024.0 * 1024.0);
56+
}
57+
58+
pub fn fitsBudget(self: TrinityPGConfig, budget_mb: f64) bool {
59+
return self.modelSizeMB() <= budget_mb;
60+
}
61+
62+
pub fn allDimsPowerOf3(self: TrinityPGConfig) bool {
63+
return isPowerOf3(self.vocab_size) and
64+
isPowerOf3(self.embed_dim) and
65+
isPowerOf3(self.context_len) and
66+
isPowerOf3(self.num_blocks) and
67+
isPowerOf3(self.num_heads) and
68+
isPowerOf3(self.head_dim) and
69+
isPowerOf3(self.ffn_hidden);
70+
}
71+
72+
pub fn squareAttentionHolds(self: TrinityPGConfig) bool {
73+
if (self.context_len % self.head_dim != 0) return false;
74+
return isPowerOf3(self.context_len / self.head_dim);
75+
}
76+
77+
pub fn validate(self: TrinityPGConfig) !void {
78+
if (!self.allDimsPowerOf3()) return error.NonPowerOf3Dimension;
79+
if (!self.squareAttentionHolds()) return error.SquareAttentionViolation;
80+
if (!self.fitsBudget(16.0)) return error.ExceedsBudget;
81+
if (self.vocab_size != 729) return error.WrongVocabSize;
82+
}
83+
};
84+
85+
fn isPowerOf3(n: usize) bool {
86+
if (n == 0) return false;
87+
var v = n;
88+
while (v % 3 == 0) v /= 3;
89+
return v == 1;
90+
}
91+
92+
test "PG config fits 16MB budget" {
93+
try std.testing.expect(PG_CONFIG.fitsBudget(16.0));
94+
}
95+
96+
test "PG config all dims are 3^k" {
97+
try std.testing.expect(PG_CONFIG.allDimsPowerOf3());
98+
}
99+
100+
test "PG config square attention holds" {
101+
try std.testing.expect(PG_CONFIG.squareAttentionHolds());
102+
try std.testing.expect(@mod(PG_CONFIG.context_len, PG_CONFIG.head_dim) == 0);
103+
}
104+
105+
test "PG config validate passes" {
106+
try PG_CONFIG.validate();
107+
}
108+
109+
test "PG config model size under 16MB" {
110+
const size_mb = PG_CONFIG.modelSizeMB();
111+
try std.testing.expect(size_mb > 0);
112+
try std.testing.expect(size_mb <= 16.0);
113+
}
114+
115+
test "isPowerOf3" {
116+
try std.testing.expect(isPowerOf3(1));
117+
try std.testing.expect(isPowerOf3(3));
118+
try std.testing.expect(isPowerOf3(9));
119+
try std.testing.expect(isPowerOf3(27));
120+
try std.testing.expect(isPowerOf3(81));
121+
try std.testing.expect(isPowerOf3(243));
122+
try std.testing.expect(isPowerOf3(729));
123+
try std.testing.expect(!isPowerOf3(2));
124+
try std.testing.expect(!isPowerOf3(4));
125+
try std.testing.expect(!isPowerOf3(0));
126+
try std.testing.expect(!isPowerOf3(10));
127+
}
128+
129+
test "PG config detailed size breakdown" {
130+
const cfg = PG_CONFIG;
131+
const embedding = cfg.vocab_size * cfg.embed_dim;
132+
const per_block = 4 * cfg.embed_dim * cfg.embed_dim + 3 * cfg.embed_dim * cfg.ffn_hidden;
133+
const all_blocks = cfg.num_blocks * per_block;
134+
const lm_head = cfg.vocab_size * cfg.embed_dim;
135+
136+
try std.testing.expect(embedding > 0);
137+
try std.testing.expect(all_blocks > 0);
138+
try std.testing.expect(lm_head > 0);
139+
140+
const total = cfg.totalParams();
141+
try std.testing.expect(total == embedding + all_blocks + lm_head);
142+
}

0 commit comments

Comments
 (0)