Address proof-bound delta review

BidhanRoy · BidhanRoy · commit 567c4fcfbf99 · 2026-05-31T16:25:04.000-04:00
diff --git a/readme.md b/readme.md
@@ -32,6 +32,8 @@ To solve this, we created **ZKLoRA** a zero-knowledge verification protocol that
 
 This implementation uses a native Halo2 backend for transcript-bound proof artifacts. The v2 proof contract verifies exact quantized LoRA delta correctness for the statement the base user actually sent and received, and binds the proof to a pre-inference adapter manifest. It does not claim an end-to-end proof that the base model computed those activations.
 
+Verifier trust boundary: `expected_adapters` must be obtained and pinned by the verifier out-of-band before inference starts, for example by recording the exact manifest file or digest. A contributor-generated adapter manifest is only a convenience handoff artifact; if it is first generated after inference or supplied only alongside proofs, it is not trusted verifier input.
+
 For detailed information about this research, please refer to [our paper](https://arxiv.org/abs/2501.13965).
 
 <h2 align="center">Quick Usage Instructions</h2>
@@ -45,7 +47,7 @@ pip install zklora
 
 Use `src/scripts/lora_contributor_sample_script.py` to:
 - Host LoRA submodules
-- Write a pre-inference adapter manifest
+- Write a pre-inference adapter manifest for the verifier to pin out-of-band
 - Handle inference requests
 - Generate proof artifacts
 
@@ -57,18 +59,36 @@ import time
 from zklora import LoRAServer, LoRAServerSocket
 
 def main():
-    parser = argparse.ArgumentParser()
+    parser = argparse.ArgumentParser(
+        description=(
+            "Run a sample LoRA contributor server and write the adapter manifest "
+            "that the verifier should pin out-of-band before inference."
+        )
+    )
     parser.add_argument("--host", default="127.0.0.1")
     parser.add_argument("--port_a", type=int, default=30000)
     parser.add_argument("--base_model", default="distilgpt2")
     parser.add_argument("--lora_model_id", default="ng0-k1/distilgpt2-finetuned-es")
     parser.add_argument("--out_dir", default="a-out")
-    parser.add_argument("--adapter_manifest", default="adapter-manifest.json")
+    parser.add_argument(
+        "--adapter_manifest",
+        default="adapter-manifest.json",
+        help=(
+            "Convenience manifest handoff path. The verifier must obtain and pin "
+            "this manifest out-of-band before inference; a post-inference manifest "
+            "is not trusted expected_adapters input."
+        ),
+    )
     args = parser.parse_args()
 
     stop_event = threading.Event()
     server_obj = LoRAServer(args.base_model, args.lora_model_id, args.out_dir)
     server_obj.write_adapter_manifest(args.adapter_manifest)
+    print(f"[A-Server] wrote adapter manifest => {args.adapter_manifest}")
+    print(
+        "[A-Server] verifier must pin this manifest out-of-band before inference; "
+        "post-inference manifests are not trusted expected_adapters."
+    )
     t = LoRAServerSocket(args.host, args.port_a, server_obj, stop_event)
     t.start()
 
@@ -141,6 +161,8 @@ if __name__=="__main__":
 
 Use `src/scripts/verify_proofs.py` to validate the proof artifacts:
 
+`--expected_adapters` must point to the verifier's pinned pre-inference adapter manifest. Do not accept a contributor manifest that was generated after inference, or first delivered with the proof bundle, as trusted verifier input; it is useful only as a handoff artifact to compare against the pinned expectation.
+
 ```python
 #!/usr/bin/env python3
 """
@@ -173,7 +195,10 @@ def main():
         "--expected_adapters",
         type=str,
         required=True,
-        help="Pre-inference adapter manifest JSON agreed by the verifier."
+        help=(
+            "Verifier-pinned pre-inference adapter manifest JSON. This must be "
+            "obtained out-of-band before inference, not first supplied with proofs."
+        )
     )
     parser.add_argument(
         "--verbose",
diff --git a/src/Cargo.toml b/src/Cargo.toml
@@ -15,7 +15,6 @@ extension-module = ["python", "pyo3/extension-module"]
 [dependencies]
 halo2_proofs = "0.3.2"
 halo2_gadgets = "0.4"
-halo2_poseidon = "0.1"
 ff = "0.13"
 num-bigint = "0.4"
 num-integer = "0.1"
diff --git a/src/README.md b/src/README.md
@@ -28,10 +28,12 @@ src/
 The zero-knowledge proof system in ZKLoRA is built on transcript-bound LoRA delta statements and native Halo2 proofs. The `zk_proof_generator.py` module orchestrates the proof generation process by:
 
 1. Capturing the base user's local transcript of activations and returned LoRA deltas
-2. Binding each proof to a pre-inference adapter manifest with a Poseidon adapter commitment
+2. Binding each proof to a verifier-pinned pre-inference adapter manifest with a Poseidon adapter commitment
 3. Generating native `.zklora.*` proof artifacts for contributor-side LoRA invocations
 4. Verifying proof artifacts against both the base user's transcript and expected adapter manifest before accepting a module
 
+The verifier must obtain and pin `expected_adapters` out-of-band before inference starts. Contributor-generated adapter manifests are convenience handoff artifacts only; if a manifest is generated after inference or first delivered alongside proofs, it is not trusted to define the expected adapter.
+
 ### Multi-Party Inference Protocol
 
 The MPI system enables interaction between the base model user (B) and LoRA provider (A) through:
@@ -113,4 +115,6 @@ verify_time, num_proofs = batch_verify_proofs(
 )
 ```
 
+In this example, `adapter-manifest.json` is the verifier's pre-inference pinned copy or digest-matched file, not a manifest first generated after inference.
+
 For detailed implementation information, please refer to the individual module documentation. 
diff --git a/src/scripts/lora_contributor_sample_script.py b/src/scripts/lora_contributor_sample_script.py
@@ -6,19 +6,49 @@
 
 
 def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--host", default="127.0.0.1")
-    parser.add_argument("--port_a", type=int, default=30000)
-    parser.add_argument("--base_model", default="distilgpt2")
-    parser.add_argument("--lora_model_id", default="ng0-k1/distilgpt2-finetuned-es")
-    parser.add_argument("--out_dir", default="proof_artifacts")
-    parser.add_argument("--adapter_manifest", default="adapter-manifest.json")
+    parser = argparse.ArgumentParser(
+        description=(
+            "Run a sample LoRA contributor server and write the adapter manifest "
+            "that the verifier should pin out-of-band before inference."
+        ),
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument("--host", default="127.0.0.1", help="Contributor bind host.")
+    parser.add_argument("--port_a", type=int, default=30000, help="Contributor port.")
+    parser.add_argument(
+        "--base_model",
+        default="distilgpt2",
+        help="Base model name expected for the LoRA adapter.",
+    )
+    parser.add_argument(
+        "--lora_model_id",
+        default="ng0-k1/distilgpt2-finetuned-es",
+        help="LoRA model ID or local path served by this contributor.",
+    )
+    parser.add_argument(
+        "--out_dir",
+        default="proof_artifacts",
+        help="Directory where native .zklora proof artifacts are written.",
+    )
+    parser.add_argument(
+        "--adapter_manifest",
+        default="adapter-manifest.json",
+        help=(
+            "Convenience manifest handoff path. The verifier must obtain and pin "
+            "this manifest out-of-band before inference; a post-inference manifest "
+            "is not trusted expected_adapters input."
+        ),
+    )
     args = parser.parse_args()
 
     stop_event = threading.Event()
     server_obj = LoRAServer(args.base_model, args.lora_model_id, args.out_dir)
     server_obj.write_adapter_manifest(args.adapter_manifest)
     print(f"[A-Server] wrote adapter manifest => {args.adapter_manifest}")
+    print(
+        "[A-Server] verifier must pin this manifest out-of-band before inference; "
+        "post-inference manifests are not trusted expected_adapters."
+    )
     t = LoRAServerSocket(
         args.host, args.port_a, server_obj, stop_event, stop_timeout=1.0
     )
diff --git a/src/src/lib.rs b/src/src/lib.rs
@@ -26,6 +26,8 @@ use std::convert::TryInto;
 
 const ADAPTER_COMMITMENT_DOMAIN: u64 = 0x5a4b4c4f5241; // "ZKLORA"
 const ADAPTER_COMMITMENT_VERSION: u64 = 1;
+// Must match proof_contract.SCHEMA_VERSION; it is hashed into adapter commitments.
+const ARTIFACT_SCHEMA_VERSION: u64 = 2;
 const FIELD_SAFE_BITS: usize = 250;
 const POSEIDON_PAIR_ROWS: usize = 96;
 
@@ -282,7 +284,7 @@ impl Circuit<Fp> for LoraCircuit {
                     &config,
                     &mut adapter_words,
                     &mut offset,
-                    BigInt::from(2u64),
+                    BigInt::from(ARTIFACT_SCHEMA_VERSION),
                     "adapter schema version",
                 )?;
                 for (label, value) in [
@@ -1371,6 +1373,35 @@ mod tests {
         }
     }
 
+    fn minimal_circuit() -> LoraCircuit {
+        let input = AdapterCommitmentInput {
+            schema_version: ARTIFACT_SCHEMA_VERSION,
+            in_dim: 1,
+            rank: 1,
+            out_dim: 1,
+            fixed_point: FixedPointConfig {
+                scale_bits: 0,
+                value_bits: 3,
+                intermediate_bits: 4,
+            },
+            scaling_num: 1,
+            scaling_den: 1,
+            a: vec![vec![1]],
+            b: vec![vec![1]],
+        };
+        LoraCircuit {
+            a: input.a.clone(),
+            b: input.b.clone(),
+            x: vec![1],
+            delta: vec![1],
+            fixed_point: input.fixed_point.clone(),
+            scaling_num: input.scaling_num,
+            scaling_den: input.scaling_den,
+            adapter_commitment: adapter_commitment_for_input(&input).unwrap(),
+            statement_digest: "22".repeat(32),
+        }
+    }
+
     #[test]
     fn poseidon_adapter_commitment_is_deterministic() {
         let input = adapter_input();
@@ -1409,7 +1440,7 @@ mod tests {
     #[test]
     #[ignore = "IPA proof generation for the Poseidon/range-check circuit is intentionally slow"]
     fn real_proof_verifies_for_tiny_relation() {
-        let circuit = valid_circuit();
+        let circuit = minimal_circuit();
         let statement = NativeStatement {
             x: circuit.x.clone(),
             delta: circuit.delta.clone(),
diff --git a/src/zklora/base_model_user_mpi/__init__.py b/src/zklora/base_model_user_mpi/__init__.py
@@ -1,4 +1,5 @@
 import json
+import math
 import socket
 import uuid
 from typing import Any
@@ -168,14 +169,33 @@ def forward(self, x: torch.Tensor):
             scaling_den = (
                 self.transcript_recorder.scaling_den if self.transcript_recorder else 1
             )
-        if remote_out is None:
-            raise RuntimeError(f"[B] submodule '{self.sub_name}' => no output from A.")
-        out_t = torch.tensor(remote_out, dtype=torch.float32)
+        if q_delta is not None and self.transcript_recorder is not None:
+            out_t = _dequantize_q_delta(
+                q_delta,
+                self.transcript_recorder.fixed_point,
+                tuple(base_out.shape),
+                base_out.device,
+                base_out.dtype if torch.is_floating_point(base_out) else torch.float32,
+            )
+            remote_out_for_record = out_t.detach().cpu().numpy()
+        elif self.transcript_recorder is not None:
+            raise RuntimeError(
+                f"[B] submodule '{self.sub_name}' => proof-bound response missing q_delta."
+            )
+        else:
+            if remote_out is None:
+                raise RuntimeError(
+                    f"[B] submodule '{self.sub_name}' => no output from A."
+                )
+            out_t = torch.tensor(
+                remote_out, dtype=torch.float32, device=base_out.device
+            )
+            remote_out_for_record = remote_out
         if self.transcript_recorder is not None:
             self.transcript_recorder.record(
                 self.sub_name,
                 arr,
-                remote_out,
+                remote_out_for_record,
                 scaling_num=scaling_num,
                 scaling_den=scaling_den,
                 q_delta_values=q_delta,
@@ -275,7 +295,9 @@ def _canonical_rows(values):
 
 
 def _canonical_int_rows(values):
-    tensor = torch.as_tensor(_to_list(values), dtype=torch.int64)
+    values = _to_list(values)
+    _assert_exact_int_values(values)
+    tensor = torch.as_tensor(values, dtype=torch.int64)
     if tensor.numel() == 0:
         return []
     if tensor.ndim == 0:
@@ -288,6 +310,49 @@ def _canonical_int_rows(values):
     ]
 
 
+def _assert_exact_int_values(values):
+    if isinstance(values, bool):
+        raise ValueError("q_delta values must be integers, not booleans")
+    if isinstance(values, int):
+        return
+    if isinstance(values, (list, tuple)):
+        for value in values:
+            _assert_exact_int_values(value)
+        return
+    raise ValueError(f"q_delta values must be integers, got {type(values).__name__}")
+
+
+def _dequantize_q_delta(
+    q_delta_values,
+    fixed_point: FixedPointConfig,
+    target_shape: tuple[int, ...],
+    device,
+    dtype,
+):
+    q_delta_rows = _canonical_int_rows(q_delta_values)
+    if not q_delta_rows:
+        raise RuntimeError("Received empty q_delta for proof-bound LoRA response.")
+    q_delta = torch.tensor(q_delta_rows, dtype=torch.float64)
+    expected_rows = math.prod(target_shape[:-1]) if target_shape else 1
+    expected_cols = target_shape[-1] if target_shape else 1
+    if list(q_delta.shape) != [expected_rows, expected_cols]:
+        raise RuntimeError(
+            "q_delta shape does not match local module output rows: "
+            f"{list(q_delta.shape)} != {[expected_rows, expected_cols]}"
+        )
+    expected = math.prod(target_shape)
+    if q_delta.numel() != expected:
+        raise RuntimeError(
+            "q_delta shape does not match local module output: "
+            f"{list(q_delta.shape)} cannot reshape to {list(target_shape)}"
+        )
+    return (
+        (q_delta / float(fixed_point.scale))
+        .reshape(target_shape)
+        .to(device=device, dtype=dtype)
+    )
+
+
 class BaseModelClient:
     def __init__(
         self,
diff --git a/src/zklora/proof_contract.py b/src/zklora/proof_contract.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import hashlib
+import importlib
 import json
 import os
 import re
@@ -424,11 +425,19 @@ def _native_witness_json(witness: InvocationWitness) -> str:
 
 def _native_module():
     try:
-        from zklora import _native_prover  # type: ignore
-
-        return _native_prover
-    except Exception:
-        return None
+        return importlib.import_module("zklora._native_prover")
+    except ModuleNotFoundError as exc:
+        if exc.name == "zklora._native_prover":
+            return None
+        raise
+    except ImportError as exc:
+        raise ProofContractError(
+            f"failed to import native Halo2 prover: {exc}"
+        ) from exc
+    except Exception as exc:
+        raise ProofContractError(
+            f"failed to import native Halo2 prover: {exc}"
+        ) from exc
 
 
 def write_invocation_artifacts(
diff --git a/tests/test_multi_contributor.py b/tests/test_multi_contributor.py
diff --git a/tests/test_native_extension.py b/tests/test_native_extension.py