-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathllm_suite_style.rs
More file actions
330 lines (309 loc) · 10.6 KB
/
llm_suite_style.rs
File metadata and controls
330 lines (309 loc) · 10.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
// ─── llm_suite_style.rs ───────────────────────────────────────────────────────
// Artist and genre reference comprehension tests.
//
// These test whether the model maps cultural/artist references to the correct
// sonic parameter space. A consistent FAIL means the reference is not in the
// model's training data — update styles.json to use a plain sonic description
// instead of the artist name.
//
// Artist reference tiers (roughly by training-data prevalence across models):
// ✅ Broadly known: Aphex Twin, Autechre, Daft Punk, Kraftwerk, Tangerine Dream
// 🟡 Niche but common: Phuture/DJ Pierre, Basic Channel, Venetian Snares, Plastikman
// ❓ Uncertain: Neophyte, Drexciya, Mixmaster Morris, Gost, Enduser
//
// Run: ./scripts/run-llm-style.sh
// cargo test --features llm-tests -- llm_suite_style
use crate::llm::prompt::build_system_prompt;
use crate::llm::{LlamaServerBackend, LlmBackend, SamplingParams};
use crate::state::AppState;
use serde_json::Value;
const RUNS: usize = 10;
const REQUIRED_LOOSE: usize = 7;
fn at<'a>(json: &'a Value, path: &str) -> Option<&'a Value> {
path.split('.').fold(Some(json), |v, k| v?.get(k))
}
fn num(json: &Value, path: &str) -> f64 {
at(json, path).and_then(|v| v.as_f64()).unwrap_or(f64::NAN)
}
fn setup() -> Option<(LlamaServerBackend, String)> {
let state = AppState::default();
let backend = if let Ok(url) = std::env::var("LLAMA_SERVER_URL") {
LlamaServerBackend::connect(&url)
} else {
LlamaServerBackend::new(&state.llm.model_path, 65536, 18080)
};
if !backend.is_live() {
eprintln!(
"\n[llm-style] SKIP — server not available (model: '{}')\n\
Run ./scripts/run-llm-style.sh or set LLAMA_SERVER_URL.\n",
state.llm.model_path
);
return None;
}
let system = build_system_prompt(&state, &[]);
Some((backend, system))
}
const THINK_ON: &str = "\x1b[2;3m";
const THINK_OFF: &str = "\x1b[0m";
fn trunc(s: &str, max: usize) -> String {
if std::env::var("LLM_SUITE_VERBOSE").is_ok_and(|v| v == "1") || s.len() <= max {
s.to_string()
} else {
format!("{}…", &s[..s.floor_char_boundary(max)])
}
}
fn infer_json(
backend: &mut LlamaServerBackend,
system: &str,
prompt: &str,
heat: f32,
) -> Option<Value> {
let sampling = SamplingParams {
heat,
..SamplingParams::default()
};
match backend.infer(system, prompt, &sampling) {
Ok(out) => {
if let Some(ref think) = out.thinking {
eprintln!(
"{THINK_ON} <think> {} </think>{THINK_OFF}",
trunc(think, 300)
);
}
if out.param_update.is_none() {
eprintln!(
"[llm-style] infer OK but param_update=None (text: {})",
trunc(&out.text, 200)
);
}
out.param_update
}
Err(e) => {
eprintln!("[llm-style] infer error: {}", trunc(&e.to_string(), 300));
None
}
}
}
fn assert_gate(
backend: &mut LlamaServerBackend,
system: &str,
prompt: &str,
heat: f32,
required: usize,
check: impl Fn(&Value) -> bool,
) {
let mut passes = 0usize;
let mut total_ms = 0u128;
for _ in 0..RUNS {
let t0 = std::time::Instant::now();
let ok = infer_json(backend, system, prompt, heat)
.map(|v| check(&v))
.unwrap_or(false);
total_ms += t0.elapsed().as_millis();
if ok {
passes += 1;
}
}
let avg_ms = total_ms / RUNS as u128;
let gate_label = if passes >= required { "✓" } else { "✗" };
eprint!(" {gate_label} {passes}/{RUNS} (need ≥{required}) ~{avg_ms}ms/req ");
assert!(
passes >= required,
"[llm-style] '{}': {}/{} runs passed (need {})\n\
→ model lacks this reference; update styles.json to use plain sonic description",
prompt,
passes,
RUNS,
required
);
}
// ── Artist / cultural reference tests ────────────────────────────────────────
/// Classic acid should set high resonance — the squelch IS the point.
/// Phuture and DJ Pierre are foundational acid house; any model that knows "acid house"
/// should know these names.
#[test]
fn classic_acid_phuture_sets_high_resonance() {
let Some((mut b, sys)) = setup() else { return };
assert_gate(
&mut b,
&sys,
"classic Chicago acid house — think Phuture, DJ Pierre, Trax Records, pure 303 squelch",
0.3,
REQUIRED_LOOSE,
|j| num(j, "bass.resonance") >= 0.65,
);
}
/// Classic acid should stay dry — barely any FX.
#[test]
fn classic_acid_phuture_stays_dry() {
let Some((mut b, sys)) = setup() else { return };
assert_gate(
&mut b,
&sys,
"classic acid, Phuture style — raw and dry, no reverb, no delay",
0.3,
REQUIRED_LOOSE,
|j| {
let rmix = at(j, "fx.reverb_mix")
.and_then(|v| v.as_f64())
.unwrap_or(0.0);
let dmix = at(j, "fx.delay_mix")
.and_then(|v| v.as_f64())
.unwrap_or(0.0);
rmix <= 0.2 && dmix <= 0.15
},
);
}
/// Autechre = IDM, so kick should NOT be strict four-on-the-floor.
/// One of the most famous IDM acts — any model with IDM knowledge should know Autechre.
#[test]
fn autechre_idm_breaks_four_on_the_floor() {
let four_floor = serde_json::json!([
true, false, false, false, true, false, false, false, true, false, false, false, true,
false, false, false
]);
let Some((mut b, sys)) = setup() else { return };
assert_gate(
&mut b,
&sys,
"go full Autechre IDM — irregular kick, subvert the grid, nothing four-on-the-floor",
0.3,
REQUIRED_LOOSE,
|j| match at(j, "sequencer.kick_a_steps") {
None => true,
Some(arr) => arr != &four_floor,
},
);
}
/// Aphex Twin Selected Ambient Works Vol 2 = spacious, heavy reverb.
#[test]
fn aphex_twin_ambient_uses_reverb() {
let Some((mut b, sys)) = setup() else { return };
assert_gate(
&mut b,
&sys,
"ambient Aphex Twin mood — Selected Ambient Works Vol 2, spacious and ethereal",
0.3,
REQUIRED_LOOSE,
|j| num(j, "fx.reverb_mix") >= 0.25,
);
}
/// Basic Channel dub techno = FX-heavy (reverb + delay are the music).
/// If the model doesn't know Basic Channel, "dub techno" alone should still trigger FX.
#[test]
fn basic_channel_dub_techno_uses_heavy_fx() {
let Some((mut b, sys)) = setup() else { return };
assert_gate(
&mut b,
&sys,
"dub techno in the style of Basic Channel — maximum reverb, ghost delay echoes",
0.3,
REQUIRED_LOOSE,
|j| num(j, "fx.reverb_mix") >= 0.3 || num(j, "fx.delay_mix") >= 0.2,
);
}
/// Berlin techno = dark filter, deep bass.
/// Richie Hawtin / Berghain refs — model should understand "Berlin" if not the names.
#[test]
fn berlin_techno_sets_dark_filter() {
let Some((mut b, sys)) = setup() else { return };
assert_gate(
&mut b,
&sys,
"Berlin techno — Berghain floor, deep dark kick, filter nearly closed, Richie Hawtin",
0.3,
REQUIRED_LOOSE,
|j| num(j, "bass.cutoff") <= 0.35,
);
}
/// Venetian Snares breakcore = very high BPM (if the model touches BPM at all).
#[test]
fn venetian_snares_sets_high_bpm() {
let Some((mut b, sys)) = setup() else { return };
assert_gate(
&mut b,
&sys,
"breakcore chaos, Venetian Snares energy — shredded Amen, extreme BPM",
0.3,
REQUIRED_LOOSE,
|j| match at(j, "sequencer.bpm").and_then(|v| v.as_f64()) {
None => true,
Some(bpm) => bpm >= 160.0,
},
);
}
// ── Baroque / Bach style ──────────────────────────────────────────────────────
//
// These test whether the model understands classical melodic structure:
// stepwise voice leading, correct tempo range, no drums, piano-like voice.
/// A Bach melody should move mostly by step (conjunct motion ≤5 semitones).
#[test]
fn bach_melody_is_mostly_stepwise() {
let Some((mut b, sys)) = setup() else { return };
assert_gate(
&mut b,
&sys,
"compose a Bach-style Baroque melody in D minor — \
dense stepwise piano melody, no drums, no bass",
0.3,
REQUIRED_LOOSE,
|j| {
let notes_arr = at(j, "an1x.an1x_notes")
.or_else(|| at(j, "sequencer.bass_notes"))
.or_else(|| at(j, "hoover.hoover_notes"))
.and_then(|v| v.as_array());
let Some(arr) = notes_arr else {
return false;
};
let notes: Vec<u8> = arr
.iter()
.filter_map(|n| n.as_u64().map(|v| v as u8))
.collect();
if notes.len() < 3 {
return false;
}
let stepwise = notes
.windows(2)
.filter(|w| (w[0] as i16 - w[1] as i16).unsigned_abs() <= 5)
.count();
stepwise as f64 / (notes.len() - 1) as f64 >= 0.55
},
);
}
/// Baroque Bach should NOT be at club tempo.
#[test]
fn bach_not_club_tempo() {
let Some((mut b, sys)) = setup() else { return };
assert_gate(
&mut b,
&sys,
"compose a Bach-style Baroque melody in D minor",
0.3,
REQUIRED_LOOSE,
|j| match at(j, "sequencer.bpm").and_then(|v| v.as_f64()) {
None => true,
Some(bpm) => bpm <= 140.0,
},
);
}
/// Bach piano needs AN1X enabled; bass should be silent or absent.
#[test]
fn bach_enables_an1x_not_bass() {
let Some((mut b, sys)) = setup() else { return };
assert_gate(
&mut b,
&sys,
"FULL RESET to Baroque Bach piano — dense D minor melody, no drums, no bass",
0.3,
REQUIRED_LOOSE,
|j| {
let an1x_on = at(j, "an1x.enabled")
.and_then(|v| v.as_bool())
.unwrap_or(false);
let bass_silent = at(j, "bass.volume")
.and_then(|v| v.as_f64())
.map_or(true, |v| v <= 0.1);
an1x_on && bass_silent
},
);
}