Skip to content

Commit 97bd1f3

Browse files
test: add CPU golden path E2E validation tests (#837)
## Summary Expands `crates/bitnet-inference/tests/e2e_cpu_golden_path.rs` with four new always-on integration tests that validate the inference pipeline end-to-end without any model download. ## New Tests | Test | What it proves | |------|---------------| | `test_e2e_stop_token_id_halts_generation_early` | A configured stop token ID terminates generation before `max_tokens` — wires stop-token logic into the E2E path | | `test_e2e_receipt_kernel_ids_schema_constraints` | Every recorded kernel ID satisfies receipt schema rules: non-empty, ≤ 128 chars, count ≤ 10 000 | | `test_e2e_receipt_schema_version_is_1_0_0` | Receipt `schema_version` is pinned to `"1.0.0"` and matches `RECEIPT_SCHEMA_VERSION` | | `test_e2e_max_tokens_boundary` | Generation terminates at exactly `max_tokens` for values 1–4 when no stop token fires | ## How to run ```sh cargo test -p bitnet-inference --no-default-features --features cpu --test e2e_cpu_golden_path ``` All 8 tests pass (4 existing + 4 new). No model download required. No `BITNET_MODEL_PATH` needed. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 26aea4f commit 97bd1f3

1 file changed

Lines changed: 133 additions & 0 deletions

File tree

crates/bitnet-inference/tests/e2e_cpu_golden_path.rs

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,18 @@
99
//! * `test_e2e_golden_path_pinned_output` — pins specific token IDs produced by greedy
1010
//! decoding on the synthetic model (seed=42) as a regression guard.
1111
//!
12+
//! * `test_e2e_stop_token_id_halts_generation_early` — verifies that a configured stop
13+
//! token ID terminates generation before `max_tokens` is reached.
14+
//!
15+
//! * `test_e2e_receipt_kernel_ids_schema_constraints` — verifies all recorded kernel IDs
16+
//! satisfy the receipt schema constraints (non-empty, ≤ 128 chars, count ≤ 10 000).
17+
//!
18+
//! * `test_e2e_receipt_schema_version_is_1_0_0` — verifies the receipt schema version
19+
//! is the pinned constant "1.0.0".
20+
//!
21+
//! * `test_e2e_max_tokens_boundary` — verifies `max_tokens` is respected exactly when no
22+
//! stop token is encountered, across several small values (1–4).
23+
//!
1224
//! * `test_e2e_real_model_golden_path` — skipped in PR CI; run locally with a real model:
1325
//! ```sh
1426
//! BITNET_MODEL_PATH=models/model.gguf \
@@ -169,6 +181,127 @@ async fn test_e2e_golden_path_pinned_output() -> Result<()> {
169181
Ok(())
170182
}
171183

184+
// ---------------------------------------------------------------------------
185+
// Stop token ID: early termination
186+
// ---------------------------------------------------------------------------
187+
188+
/// A configured stop token ID must terminate generation before `max_tokens` is reached,
189+
/// proving the stop-token logic is correctly wired into the E2E pipeline.
190+
///
191+
/// The pinned golden sequence with seed=42 is `[140, 459, 459, 459]`.
192+
/// Setting stop_token_id=459 must cause the engine to stop after emitting 140,
193+
/// because 459 is checked *before* it is appended to the output.
194+
#[tokio::test]
195+
async fn test_e2e_stop_token_id_halts_generation_early() -> Result<()> {
196+
let model = synthetic_model()?;
197+
let tokenizer = Arc::new(MockTokenizer::new());
198+
let engine = InferenceEngine::new(model, tokenizer.clone(), Device::Cpu)?;
199+
let config = GenerationConfig::greedy()
200+
.with_seed(42)
201+
.with_max_tokens(10) // generous budget; stop token should fire first
202+
.with_stop_token_id(459); // second generated token in the pinned golden sequence
203+
204+
let prompt_ids = tokenizer.encode("2+2=", false, false)?;
205+
let tokens = engine.generate_tokens(&prompt_ids, &config).await?;
206+
207+
assert_eq!(
208+
tokens,
209+
vec![140],
210+
"stop_token_id=459 must halt generation before emitting 459; got {tokens:?}"
211+
);
212+
Ok(())
213+
}
214+
215+
// ---------------------------------------------------------------------------
216+
// Receipt kernel ID schema constraints
217+
// ---------------------------------------------------------------------------
218+
219+
/// All kernel IDs recorded during a real inference pass must satisfy the schema
220+
/// constraints required for honest-compute receipts: non-empty strings, at most
221+
/// 128 characters each, and a total count ≤ 10 000.
222+
#[tokio::test]
223+
async fn test_e2e_receipt_kernel_ids_schema_constraints() -> Result<()> {
224+
let model = synthetic_model()?;
225+
let tokenizer = Arc::new(MockTokenizer::new());
226+
let recorder = KernelRecorder::new();
227+
let engine = InferenceEngine::new(model, tokenizer.clone(), Device::Cpu)?
228+
.with_recorder(recorder.clone());
229+
230+
let config = GenerationConfig::greedy().with_seed(42).with_max_tokens(4);
231+
let prompt_ids = tokenizer.encode("2+2=", false, false)?;
232+
engine.generate_tokens(&prompt_ids, &config).await?;
233+
234+
let kernel_ids = recorder.snapshot();
235+
assert!(!kernel_ids.is_empty(), "at least one kernel ID must be recorded");
236+
assert!(
237+
kernel_ids.len() <= 10_000,
238+
"kernel count {} exceeds schema limit of 10 000",
239+
kernel_ids.len()
240+
);
241+
for id in &kernel_ids {
242+
assert!(!id.is_empty(), "kernel ID must not be an empty string");
243+
assert!(
244+
id.len() <= 128,
245+
"kernel ID '{id}' length {} exceeds schema limit of 128",
246+
id.len()
247+
);
248+
}
249+
Ok(())
250+
}
251+
252+
// ---------------------------------------------------------------------------
253+
// Receipt schema version
254+
// ---------------------------------------------------------------------------
255+
256+
/// The receipt schema version must always be the pinned literal "1.0.0" and
257+
/// must match the `RECEIPT_SCHEMA_VERSION` constant exported by `bitnet_receipts`.
258+
#[tokio::test]
259+
async fn test_e2e_receipt_schema_version_is_1_0_0() -> Result<()> {
260+
let model = synthetic_model()?;
261+
let tokenizer = Arc::new(MockTokenizer::new());
262+
let recorder = KernelRecorder::new();
263+
let engine = InferenceEngine::new(model, tokenizer.clone(), Device::Cpu)?
264+
.with_recorder(recorder.clone());
265+
266+
let config = GenerationConfig::greedy().with_seed(42).with_max_tokens(2);
267+
let prompt_ids = tokenizer.encode("2+2=", false, false)?;
268+
engine.generate_tokens(&prompt_ids, &config).await?;
269+
270+
let receipt = InferenceReceipt::generate("cpu-rust", recorder.snapshot(), None)?;
271+
assert_eq!(receipt.schema_version, "1.0.0", "receipt schema version must be fixed at '1.0.0'");
272+
assert_eq!(
273+
receipt.schema_version,
274+
bitnet_receipts::RECEIPT_SCHEMA_VERSION,
275+
"receipt schema version must match the RECEIPT_SCHEMA_VERSION constant"
276+
);
277+
Ok(())
278+
}
279+
280+
// ---------------------------------------------------------------------------
281+
// Max-tokens boundary: exact token count
282+
// ---------------------------------------------------------------------------
283+
284+
/// `max_tokens` must be respected exactly across small values when no stop token
285+
/// is encountered, validating the generation loop termination condition.
286+
#[tokio::test]
287+
async fn test_e2e_max_tokens_boundary() -> Result<()> {
288+
for &n in &[1u32, 2, 3, 4] {
289+
let model = synthetic_model()?;
290+
let tokenizer = Arc::new(MockTokenizer::new());
291+
let engine = InferenceEngine::new(model, tokenizer.clone(), Device::Cpu)?;
292+
let config = GenerationConfig::greedy().with_seed(42).with_max_tokens(n);
293+
let prompt_ids = tokenizer.encode("2+2=", false, false)?;
294+
let tokens = engine.generate_tokens(&prompt_ids, &config).await?;
295+
assert_eq!(
296+
tokens.len(),
297+
n as usize,
298+
"max_tokens={n}: expected {n} tokens but got {}",
299+
tokens.len()
300+
);
301+
}
302+
Ok(())
303+
}
304+
172305
// ---------------------------------------------------------------------------
173306
// Real-model E2E test (skipped in PR CI)
174307
// ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)