|
9 | 9 | //! * `test_e2e_golden_path_pinned_output` — pins specific token IDs produced by greedy |
10 | 10 | //! decoding on the synthetic model (seed=42) as a regression guard. |
11 | 11 | //! |
| 12 | +//! * `test_e2e_stop_token_id_halts_generation_early` — verifies that a configured stop |
| 13 | +//! token ID terminates generation before `max_tokens` is reached. |
| 14 | +//! |
| 15 | +//! * `test_e2e_receipt_kernel_ids_schema_constraints` — verifies all recorded kernel IDs |
| 16 | +//! satisfy the receipt schema constraints (non-empty, ≤ 128 chars, count ≤ 10 000). |
| 17 | +//! |
| 18 | +//! * `test_e2e_receipt_schema_version_is_1_0_0` — verifies the receipt schema version |
| 19 | +//! is the pinned constant "1.0.0". |
| 20 | +//! |
| 21 | +//! * `test_e2e_max_tokens_boundary` — verifies `max_tokens` is respected exactly when no |
| 22 | +//! stop token is encountered, across several small values (1–4). |
| 23 | +//! |
12 | 24 | //! * `test_e2e_real_model_golden_path` — skipped in PR CI; run locally with a real model: |
13 | 25 | //! ```sh |
14 | 26 | //! BITNET_MODEL_PATH=models/model.gguf \ |
@@ -169,6 +181,127 @@ async fn test_e2e_golden_path_pinned_output() -> Result<()> { |
169 | 181 | Ok(()) |
170 | 182 | } |
171 | 183 |
|
| 184 | +// --------------------------------------------------------------------------- |
| 185 | +// Stop token ID: early termination |
| 186 | +// --------------------------------------------------------------------------- |
| 187 | + |
| 188 | +/// A configured stop token ID must terminate generation before `max_tokens` is reached, |
| 189 | +/// proving the stop-token logic is correctly wired into the E2E pipeline. |
| 190 | +/// |
| 191 | +/// The pinned golden sequence with seed=42 is `[140, 459, 459, 459]`. |
| 192 | +/// Setting stop_token_id=459 must cause the engine to stop after emitting 140, |
| 193 | +/// because 459 is checked *before* it is appended to the output. |
| 194 | +#[tokio::test] |
| 195 | +async fn test_e2e_stop_token_id_halts_generation_early() -> Result<()> { |
| 196 | + let model = synthetic_model()?; |
| 197 | + let tokenizer = Arc::new(MockTokenizer::new()); |
| 198 | + let engine = InferenceEngine::new(model, tokenizer.clone(), Device::Cpu)?; |
| 199 | + let config = GenerationConfig::greedy() |
| 200 | + .with_seed(42) |
| 201 | + .with_max_tokens(10) // generous budget; stop token should fire first |
| 202 | + .with_stop_token_id(459); // second generated token in the pinned golden sequence |
| 203 | + |
| 204 | + let prompt_ids = tokenizer.encode("2+2=", false, false)?; |
| 205 | + let tokens = engine.generate_tokens(&prompt_ids, &config).await?; |
| 206 | + |
| 207 | + assert_eq!( |
| 208 | + tokens, |
| 209 | + vec![140], |
| 210 | + "stop_token_id=459 must halt generation before emitting 459; got {tokens:?}" |
| 211 | + ); |
| 212 | + Ok(()) |
| 213 | +} |
| 214 | + |
| 215 | +// --------------------------------------------------------------------------- |
| 216 | +// Receipt kernel ID schema constraints |
| 217 | +// --------------------------------------------------------------------------- |
| 218 | + |
| 219 | +/// All kernel IDs recorded during a real inference pass must satisfy the schema |
| 220 | +/// constraints required for honest-compute receipts: non-empty strings, at most |
| 221 | +/// 128 characters each, and a total count ≤ 10 000. |
| 222 | +#[tokio::test] |
| 223 | +async fn test_e2e_receipt_kernel_ids_schema_constraints() -> Result<()> { |
| 224 | + let model = synthetic_model()?; |
| 225 | + let tokenizer = Arc::new(MockTokenizer::new()); |
| 226 | + let recorder = KernelRecorder::new(); |
| 227 | + let engine = InferenceEngine::new(model, tokenizer.clone(), Device::Cpu)? |
| 228 | + .with_recorder(recorder.clone()); |
| 229 | + |
| 230 | + let config = GenerationConfig::greedy().with_seed(42).with_max_tokens(4); |
| 231 | + let prompt_ids = tokenizer.encode("2+2=", false, false)?; |
| 232 | + engine.generate_tokens(&prompt_ids, &config).await?; |
| 233 | + |
| 234 | + let kernel_ids = recorder.snapshot(); |
| 235 | + assert!(!kernel_ids.is_empty(), "at least one kernel ID must be recorded"); |
| 236 | + assert!( |
| 237 | + kernel_ids.len() <= 10_000, |
| 238 | + "kernel count {} exceeds schema limit of 10 000", |
| 239 | + kernel_ids.len() |
| 240 | + ); |
| 241 | + for id in &kernel_ids { |
| 242 | + assert!(!id.is_empty(), "kernel ID must not be an empty string"); |
| 243 | + assert!( |
| 244 | + id.len() <= 128, |
| 245 | + "kernel ID '{id}' length {} exceeds schema limit of 128", |
| 246 | + id.len() |
| 247 | + ); |
| 248 | + } |
| 249 | + Ok(()) |
| 250 | +} |
| 251 | + |
| 252 | +// --------------------------------------------------------------------------- |
| 253 | +// Receipt schema version |
| 254 | +// --------------------------------------------------------------------------- |
| 255 | + |
| 256 | +/// The receipt schema version must always be the pinned literal "1.0.0" and |
| 257 | +/// must match the `RECEIPT_SCHEMA_VERSION` constant exported by `bitnet_receipts`. |
| 258 | +#[tokio::test] |
| 259 | +async fn test_e2e_receipt_schema_version_is_1_0_0() -> Result<()> { |
| 260 | + let model = synthetic_model()?; |
| 261 | + let tokenizer = Arc::new(MockTokenizer::new()); |
| 262 | + let recorder = KernelRecorder::new(); |
| 263 | + let engine = InferenceEngine::new(model, tokenizer.clone(), Device::Cpu)? |
| 264 | + .with_recorder(recorder.clone()); |
| 265 | + |
| 266 | + let config = GenerationConfig::greedy().with_seed(42).with_max_tokens(2); |
| 267 | + let prompt_ids = tokenizer.encode("2+2=", false, false)?; |
| 268 | + engine.generate_tokens(&prompt_ids, &config).await?; |
| 269 | + |
| 270 | + let receipt = InferenceReceipt::generate("cpu-rust", recorder.snapshot(), None)?; |
| 271 | + assert_eq!(receipt.schema_version, "1.0.0", "receipt schema version must be fixed at '1.0.0'"); |
| 272 | + assert_eq!( |
| 273 | + receipt.schema_version, |
| 274 | + bitnet_receipts::RECEIPT_SCHEMA_VERSION, |
| 275 | + "receipt schema version must match the RECEIPT_SCHEMA_VERSION constant" |
| 276 | + ); |
| 277 | + Ok(()) |
| 278 | +} |
| 279 | + |
| 280 | +// --------------------------------------------------------------------------- |
| 281 | +// Max-tokens boundary: exact token count |
| 282 | +// --------------------------------------------------------------------------- |
| 283 | + |
| 284 | +/// `max_tokens` must be respected exactly across small values when no stop token |
| 285 | +/// is encountered, validating the generation loop termination condition. |
| 286 | +#[tokio::test] |
| 287 | +async fn test_e2e_max_tokens_boundary() -> Result<()> { |
| 288 | + for &n in &[1u32, 2, 3, 4] { |
| 289 | + let model = synthetic_model()?; |
| 290 | + let tokenizer = Arc::new(MockTokenizer::new()); |
| 291 | + let engine = InferenceEngine::new(model, tokenizer.clone(), Device::Cpu)?; |
| 292 | + let config = GenerationConfig::greedy().with_seed(42).with_max_tokens(n); |
| 293 | + let prompt_ids = tokenizer.encode("2+2=", false, false)?; |
| 294 | + let tokens = engine.generate_tokens(&prompt_ids, &config).await?; |
| 295 | + assert_eq!( |
| 296 | + tokens.len(), |
| 297 | + n as usize, |
| 298 | + "max_tokens={n}: expected {n} tokens but got {}", |
| 299 | + tokens.len() |
| 300 | + ); |
| 301 | + } |
| 302 | + Ok(()) |
| 303 | +} |
| 304 | + |
172 | 305 | // --------------------------------------------------------------------------- |
173 | 306 | // Real-model E2E test (skipped in PR CI) |
174 | 307 | // --------------------------------------------------------------------------- |
|
0 commit comments