|
| 1 | +//! Centralized Configuration Management for Trinity S³AI |
| 2 | +//! |
| 3 | +//! Provides unified configuration loading from JSON files, |
| 4 | +//! environment variables, and command-line arguments. |
| 5 | + |
| 6 | +const std = @import("std"); |
| 7 | + |
| 8 | +/// Sacred constants for default values |
| 9 | +pub const Sacred = struct { |
| 10 | + pub const PHI: f64 = 1.618033988749895; |
| 11 | + pub const PHI_INV: f64 = 0.618033988749895; |
| 12 | + pub const PHI_INV_SQ: f64 = 0.3819660112501051; |
| 13 | + pub const PHI_INV_CUBED: f64 = 0.2360679774997897; |
| 14 | + |
| 15 | + pub const OPTIMAL_SPARSITY: f64 = PHI_INV_SQ; // ~0.382 non-zero |
| 16 | + pub const TARGET_SPARSITY: f64 = 1.0 - PHI_INV_SQ; // ~0.618 zeros |
| 17 | + pub const FFN_EXPANSION: f64 = PHI * PHI; // ~2.618 |
| 18 | +}; |
| 19 | + |
| 20 | +/// Learning rate schedule types |
| 21 | +pub const LRSchedule = enum { |
| 22 | + constant, |
| 23 | + linear, |
| 24 | + cosine, |
| 25 | + sacred, // φ-based decay |
| 26 | + |
| 27 | + pub fn toString(self: LRSchedule) []const u8 { |
| 28 | + return switch (self) { |
| 29 | + .constant => "constant", |
| 30 | + .linear => "linear", |
| 31 | + .cosine => "cosine", |
| 32 | + .sacred => "sacred", |
| 33 | + }; |
| 34 | + } |
| 35 | +}; |
| 36 | + |
| 37 | +/// Scale initialization types |
| 38 | +pub const ScaleType = enum { |
| 39 | + standard, // Standard initialization |
| 40 | + sacred, // φ-based scaling |
| 41 | + kaiming, // Kaiming He |
| 42 | + xavier, // Xavier Glorot |
| 43 | + |
| 44 | + pub fn toString(self: ScaleType) []const u8 { |
| 45 | + return switch (self) { |
| 46 | + .standard => "standard", |
| 47 | + .sacred => "sacred", |
| 48 | + .kaiming => "kaiming", |
| 49 | + .xavier => "xavier", |
| 50 | + }; |
| 51 | + } |
| 52 | +}; |
| 53 | + |
| 54 | +/// Model architecture configuration |
| 55 | +pub const ModelConfig = struct { |
| 56 | + vocab_size: usize = 31000, |
| 57 | + hidden_dim: usize = 512, |
| 58 | + num_layers: usize = 6, |
| 59 | + num_heads: usize = 8, |
| 60 | + ffn_dim: usize = 0, // 0 = auto-calculate using sacred expansion |
| 61 | + max_seq_len: usize = 512, |
| 62 | + |
| 63 | + pub fn getFFNDim(self: ModelConfig) usize { |
| 64 | + return if (self.ffn_dim > 0) |
| 65 | + self.ffn_dim |
| 66 | + else |
| 67 | + @intFromFloat(@as(f64, @floatFromInt(self.hidden_dim)) * Sacred.FFN_EXPANSION); |
| 68 | + } |
| 69 | + |
| 70 | + pub fn validate(self: ModelConfig) !void { |
| 71 | + if (self.vocab_size == 0) return error.InvalidVocabSize; |
| 72 | + if (self.hidden_dim == 0) return error.InvalidHiddenDim; |
| 73 | + if (self.num_layers == 0) return error.InvalidNumLayers; |
| 74 | + if (self.num_heads == 0) return error.InvalidNumHeads; |
| 75 | + if (self.hidden_dim % self.num_heads != 0) return error.HiddenDimNotDivisibleByHeads; |
| 76 | + } |
| 77 | +}; |
| 78 | + |
| 79 | +/// Training configuration |
| 80 | +pub const TrainingConfig = struct { |
| 81 | + learning_rate: f64 = 0.001, |
| 82 | + lr_schedule: LRSchedule = .sacred, |
| 83 | + warmup_steps: u32 = 1000, |
| 84 | + max_steps: u32 = 30000, |
| 85 | + batch_size: u32 = 64, |
| 86 | + weight_decay: f64 = 0.01, |
| 87 | + gradient_clip: f64 = 1.0, |
| 88 | + |
| 89 | + pub fn validate(self: TrainingConfig) !void { |
| 90 | + if (self.learning_rate <= 0) return error.InvalidLearningRate; |
| 91 | + if (self.max_steps == 0) return error.InvalidMaxSteps; |
| 92 | + if (self.batch_size == 0) return error.InvalidBatchSize; |
| 93 | + if (self.weight_decay < 0) return error.InvalidWeightDecay; |
| 94 | + if (self.gradient_clip <= 0) return error.InvalidGradientClip; |
| 95 | + } |
| 96 | + |
| 97 | + /// Calculate learning rate at step t |
| 98 | + pub fn getLR(self: TrainingConfig, step: u32) f64 { |
| 99 | + if (step < self.warmup_steps) { |
| 100 | + // Linear warmup |
| 101 | + const warmup_frac: f64 = @as(f64, @floatFromInt(step)) / @as(f64, @floatFromInt(self.warmup_steps)); |
| 102 | + return self.learning_rate * warmup_frac; |
| 103 | + } |
| 104 | + |
| 105 | + return switch (self.lr_schedule) { |
| 106 | + .constant => self.learning_rate, |
| 107 | + .linear => { |
| 108 | + const progress: f64 = @as(f64, @floatFromInt(step - self.warmup_steps)) / |
| 109 | + @as(f64, @floatFromInt(self.max_steps - self.warmup_steps)); |
| 110 | + return self.learning_rate * (1.0 - progress); |
| 111 | + }, |
| 112 | + .cosine => { |
| 113 | + const progress: f64 = @as(f64, @floatFromInt(step - self.warmup_steps)) / |
| 114 | + @as(f64, @floatFromInt(self.max_steps - self.warmup_steps)); |
| 115 | + const cosine = 0.5 * (1.0 + std.math.cos(std.math.pi * progress)); |
| 116 | + return self.learning_rate * cosine; |
| 117 | + }, |
| 118 | + .sacred => { |
| 119 | + // φ-based decay: LR * φ^(-progress/φ) |
| 120 | + const progress: f64 = @as(f64, @floatFromInt(step - self.warmup_steps)) / |
| 121 | + @as(f64, @floatFromInt(self.max_steps - self.warmup_steps)); |
| 122 | + const decay = std.math.pow(Sacred.PHI, -progress / Sacred.PHI); |
| 123 | + return self.learning_rate * decay; |
| 124 | + }, |
| 125 | + }; |
| 126 | + } |
| 127 | +}; |
| 128 | + |
| 129 | +/// Sacred configuration |
| 130 | +pub const SacredConfig = struct { |
| 131 | + use_sacred_scaling: bool = true, |
| 132 | + sacred_sparsity: f64 = Sacred.OPTIMAL_SPARSITY, |
| 133 | + phi_expansion: bool = true, |
| 134 | + |
| 135 | + pub fn validate(self: SacredConfig) !void { |
| 136 | + if (self.sacred_sparsity < 0 or self.sacred_sparsity > 1) return error.InvalidSparsity; |
| 137 | + } |
| 138 | + |
| 139 | + /// Get scale factor for parameter initialization |
| 140 | + pub fn getScaleFactor(self: SacredConfig, dim: usize) f64 { |
| 141 | + if (!self.use_sacred_scaling) return std.math.sqrt(2.0 / @as(f64, @floatFromInt(dim))); |
| 142 | + |
| 143 | + // Sacred scaling: σ = d^(-φ⁻³) |
| 144 | + return std.math.pow(@as(f64, @floatFromInt(dim)), -Sacred.PHI_INV_CUBED); |
| 145 | + } |
| 146 | + |
| 147 | + /// Get target sparsity (fraction of zeros) |
| 148 | + pub fn getTargetSparsity(self: SacredConfig) f64 { |
| 149 | + return 1.0 - self.sacred_sparsity; |
| 150 | + } |
| 151 | +}; |
| 152 | + |
| 153 | +/// Quantization configuration |
| 154 | +pub const QuantConfig = struct { |
| 155 | + ternary_weights: bool = true, |
| 156 | + sparsity: f64 = Sacred.TARGET_SPARSITY, |
| 157 | + vsa_binding: bool = true, |
| 158 | + quantize_k: bool = true, |
| 159 | + quantize_v: bool = true, |
| 160 | + |
| 161 | + pub fn validate(self: QuantConfig) !void { |
| 162 | + if (self.sparsity < 0 or self.sparsity > 1) return error.InvalidSparsity; |
| 163 | + } |
| 164 | +}; |
| 165 | + |
| 166 | +/// Complete Trinity configuration |
| 167 | +pub const TrinityConfig = struct { |
| 168 | + model: ModelConfig = .{}, |
| 169 | + training: TrainingConfig = .{}, |
| 170 | + sacred: SacredConfig = .{}, |
| 171 | + quantization: QuantConfig = .{}, |
| 172 | + |
| 173 | + /// Validate all configurations |
| 174 | + pub fn validate(self: TrinityConfig) !void { |
| 175 | + try self.model.validate(); |
| 176 | + try self.training.validate(); |
| 177 | + try self.sacred.validate(); |
| 178 | + try self.quantization.validate(); |
| 179 | + } |
| 180 | + |
| 181 | + /// Get FFN dimension (calculated) |
| 182 | + pub fn getFFNDim(self: TrinityConfig) usize { |
| 183 | + return self.model.getFFNDim(); |
| 184 | + } |
| 185 | + |
| 186 | + /// Get parameter initialization scale |
| 187 | + pub fn getInitScale(self: TrinityConfig, dim: usize) f64 { |
| 188 | + return self.sacred.getScaleFactor(dim); |
| 189 | + } |
| 190 | +}; |
| 191 | + |
| 192 | +/// Configuration file format (JSON) |
| 193 | +pub const ConfigFile = struct { |
| 194 | + version: []const u8 = "1.0", |
| 195 | + model: ModelConfig = .{}, |
| 196 | + training: TrainingConfig = .{}, |
| 197 | + sacred: SacredConfig = .{}, |
| 198 | + quantization: QuantConfig = .{}, |
| 199 | + |
| 200 | + /// Convert to TrinityConfig |
| 201 | + pub fn toTrinityConfig(self: ConfigFile) TrinityConfig { |
| 202 | + return .{ |
| 203 | + .model = self.model, |
| 204 | + .training = self.training, |
| 205 | + .sacred = self.sacred, |
| 206 | + .quantization = self.quantization, |
| 207 | + }; |
| 208 | + } |
| 209 | +}; |
| 210 | + |
| 211 | +/// Load configuration from JSON file |
| 212 | +pub fn loadConfig(allocator: std.mem.Allocator, path: []const u8) !TrinityConfig { |
| 213 | + const file = try std.fs.cwd().openFile(path, .{}); |
| 214 | + defer file.close(); |
| 215 | + |
| 216 | + const max_size = 1024 * 1024; // 1MB max |
| 217 | + const source = try file.readToEndAlloc(allocator, max_size); |
| 218 | + defer allocator.free(source); |
| 219 | + |
| 220 | + const parsed = try std.json.parseFromSlice(ConfigFile, allocator, source, .{ |
| 221 | + .ignore_unknown_fields = true, |
| 222 | + .allocate = .{}, |
| 223 | + }); |
| 224 | + defer parsed.deinit(); |
| 225 | + |
| 226 | + const config = parsed.value.toTrinityConfig(); |
| 227 | + try config.validate(); |
| 228 | + |
| 229 | + return config; |
| 230 | +} |
| 231 | + |
| 232 | +/// Save configuration to JSON file |
| 233 | +pub fn saveConfig(allocator: std.mem.Allocator, path: []const u8, config: TrinityConfig) !void { |
| 234 | + const config_file: ConfigFile = .{ |
| 235 | + .model = config.model, |
| 236 | + .training = config.training, |
| 237 | + .sacred = config.sacred, |
| 238 | + .quantization = config.quantization, |
| 239 | + }; |
| 240 | + |
| 241 | + const options = .{ .whitespace = .indent }; |
| 242 | + const stringified = try std.json.stringifyAlloc(allocator, config_file, options); |
| 243 | + defer allocator.free(stringified); |
| 244 | + |
| 245 | + const file = try std.fs.cwd().createFile(path, .{}); |
| 246 | + defer file.close(); |
| 247 | + |
| 248 | + try file.writeAll(stringified); |
| 249 | +} |
| 250 | + |
| 251 | +/// Load configuration from environment variables |
| 252 | +pub fn loadFromEnv() !TrinityConfig { |
| 253 | + var config = TrinityConfig{}; |
| 254 | + |
| 255 | + // Training parameters |
| 256 | + if (std.os.getenv("HSLM_LEARNING_RATE")) |lr_str| { |
| 257 | + const lr = try std.fmt.parseFloat(f64, lr_str); |
| 258 | + config.training.learning_rate = lr; |
| 259 | + } |
| 260 | + |
| 261 | + if (std.os.getenv("HSLM_MAX_STEPS")) |steps_str| { |
| 262 | + const steps = try std.fmt.parseInt(u32, steps_str, 10); |
| 263 | + config.training.max_steps = steps; |
| 264 | + } |
| 265 | + |
| 266 | + if (std.os.getenv("HSLM_BATCH_SIZE")) |batch_str| { |
| 267 | + const batch = try std.fmt.parseInt(u32, batch_str, 10); |
| 268 | + config.training.batch_size = batch; |
| 269 | + } |
| 270 | + |
| 271 | + if (std.os.getenv("HSLM_LR_SCHEDULE")) |schedule_str| { |
| 272 | + config.training.lr_schedule = if (std.mem.eql(u8, schedule_str, "sacred")) |
| 273 | + .sacred |
| 274 | + else if (std.mem.eql(u8, schedule_str, "cosine")) |
| 275 | + .cosine |
| 276 | + else if (std.mem.eql(u8, schedule_str, "linear")) |
| 277 | + .linear |
| 278 | + else |
| 279 | + .constant; |
| 280 | + } |
| 281 | + |
| 282 | + // Sacred parameters |
| 283 | + if (std.os.getenv("HSLM_OPTIMIZER")) |opt_str| { |
| 284 | + if (std.mem.indexOf(u8, opt_str, "sacred") != null) { |
| 285 | + config.sacred.use_sacred_scaling = true; |
| 286 | + } |
| 287 | + } |
| 288 | + |
| 289 | + try config.validate(); |
| 290 | + return config; |
| 291 | +} |
| 292 | + |
| 293 | +/// Get default configuration for specific model size |
| 294 | +pub fn getDefaultHSLM1_95M() TrinityConfig { |
| 295 | + return .{ |
| 296 | + .model = .{ |
| 297 | + .vocab_size = 31000, |
| 298 | + .hidden_dim = 512, |
| 299 | + .num_layers = 6, |
| 300 | + .num_heads = 8, |
| 301 | + .ffn_dim = 0, // Auto-calculate |
| 302 | + .max_seq_len = 512, |
| 303 | + }, |
| 304 | + .training = .{ |
| 305 | + .learning_rate = 0.001, |
| 306 | + .lr_schedule = .sacred, |
| 307 | + .warmup_steps = 1000, |
| 308 | + .max_steps = 30000, |
| 309 | + .batch_size = 64, |
| 310 | + .weight_decay = 0.01, |
| 311 | + .gradient_clip = 1.0, |
| 312 | + }, |
| 313 | + .sacred = .{ |
| 314 | + .use_sacred_scaling = true, |
| 315 | + .sacred_sparsity = Sacred.OPTIMAL_SPARSITY, |
| 316 | + .phi_expansion = true, |
| 317 | + }, |
| 318 | + .quantization = .{ |
| 319 | + .ternary_weights = true, |
| 320 | + .sparsity = Sacred.TARGET_SPARSITY, |
| 321 | + .vsa_binding = true, |
| 322 | + .quantize_k = true, |
| 323 | + .quantize_v = true, |
| 324 | + }, |
| 325 | + }; |
| 326 | +} |
| 327 | + |
| 328 | +// Tests |
| 329 | +test "ModelConfig validation" { |
| 330 | + var config = ModelConfig{}; |
| 331 | + config.hidden_dim = 0; |
| 332 | + try std.testing.expectError(error.InvalidHiddenDim, config.validate()); |
| 333 | + |
| 334 | + config = .{ .hidden_dim = 512, .num_heads = 9 }; // 512 % 9 != 0 |
| 335 | + try std.testing.expectError(error.HiddenDimNotDivisibleByHeads, config.validate()); |
| 336 | +} |
| 337 | + |
| 338 | +test "TrainingConfig LR schedule" { |
| 339 | + const config = TrainingConfig{ |
| 340 | + .learning_rate = 0.001, |
| 341 | + .lr_schedule = .sacred, |
| 342 | + .warmup_steps = 100, |
| 343 | + .max_steps = 1000, |
| 344 | + }; |
| 345 | + |
| 346 | + // Warmup phase |
| 347 | + const lr_warmup = config.getLR(50); |
| 348 | + try std.testing.expect(lr_warmup > 0 and lr_warmup < 0.001); |
| 349 | + |
| 350 | + // Sacred decay phase |
| 351 | + const lr_decay = config.getLR(500); |
| 352 | + try std.testing.expect(lr_decay > 0 and lr_decay < 0.001); |
| 353 | + |
| 354 | + // End of training |
| 355 | + const lr_end = config.getLR(999); |
| 356 | + try std.testing.expect(lr_end > 0); |
| 357 | +} |
| 358 | + |
| 359 | +test "SacredConfig scale factor" { |
| 360 | + const config = SacredConfig{ .use_sacred_scaling = true }; |
| 361 | + |
| 362 | + const scale_512 = config.getScaleFactor(512); |
| 363 | + try std.testing.expect(scale_512 > 0 and scale_512 < 1.0); |
| 364 | + |
| 365 | + // Sacred scaling should be larger than standard (better gradients) |
| 366 | + const scale_standard = std.math.sqrt(2.0 / 512.0); |
| 367 | + try std.testing.expect(scale_512 > scale_standard); |
| 368 | +} |
| 369 | + |
| 370 | +test "Default HSLM config" { |
| 371 | + const config = getDefaultHSLM1_95M(); |
| 372 | + try config.validate(); |
| 373 | + |
| 374 | + try std.testing.expectEqual(@as(usize, 512), config.model.hidden_dim); |
| 375 | + try std.testing.expectEqual(@as(usize, 6), config.model.num_layers); |
| 376 | + try std.testing.expect(config.sacred.use_sacred_scaling); |
| 377 | + try std.testing.expect(config.training.lr_schedule == .sacred); |
| 378 | + |
| 379 | + // FFN dimension should be ~512 * 2.618 ≈ 1340 |
| 380 | + const ffn_dim = config.getFFNDim(); |
| 381 | + try std.testing.expect(ffn_dim >= 1300 and ffn_dim <= 1400); |
| 382 | +} |
0 commit comments