CBMC/CI: Bump disk space to 64G

hanno-becker · hanno-becker · commit 992d012e5d6c · 2026-05-19T09:02:19.000+01:00
Signed-off-by: Hanno Becker &lt;beckphan@amazon.co.uk&gt;
diff --git a/.github/actions/cbmc/report.py b/.github/actions/cbmc/report.py
@@ -99,6 +99,24 @@ def classify_proof(r, baseline_runtimes, cfg):
     base = baseline_runtimes.get((name, solver), {})
     base_val, base_failed = base.get("value"), base.get("status") == "failed"
     base_omitted = base.get("status") == "omitted"
+    base_inconclusive = base.get("status") == "inconclusive"
+
+    # Solver could not decide -- not a real failure, not a regression.
+    if r.get("status") == "inconclusive":
+        prev = (
+            f"{base_val}s" if base_val
+            else "failed" if base_failed
+            else "inconclusive" if base_inconclusive
+            else "omitted" if base_omitted
+            else "-"
+        )
+        # Was passing in the baseline, now inconclusive: surface as a warning.
+        if base_val is not None and not base_failed:
+            return (
+                ProofResult(name, solver, WARN, "?", prev, "inconclusive"),
+                True,
+            )
+        return ProofResult(name, solver, OK, "?", prev, "inconclusive"), False
 
     # Pair was intentionally not run.
     if r.get("status") == "omitted":
@@ -141,13 +159,14 @@ def classify_proof(r, baseline_runtimes, cfg):
 
 
 def compute_total_runtime(data):
-    """Compute total runtime from proof results, ignoring failed/omitted."""
+    """Compute total runtime from proof results, ignoring failed/omitted/inconclusive."""
     if not data:
         return None
     return sum(
         r["value"]
         for r in data.get("runtimes", [])
-        if r.get("status") not in ("failed", "omitted") and "value" in r
+        if r.get("status") not in ("failed", "omitted", "inconclusive")
+        and "value" in r
     )
 
 
diff --git a/.github/workflows/cbmc.yml b/.github/workflows/cbmc.yml
@@ -24,9 +24,9 @@ jobs:
     uses: ./.github/workflows/ci_ec2_reusable.yml
     with:
       name: CBMC (ML-DSA-${{ matrix.parameter_set }}${{ matrix.reduce_ram && ', REDUCE_RAM' || '' }})
-      ec2_instance_type: c7g.4xlarge
+      ec2_instance_type: r7g.4xlarge
       ec2_ami: ubuntu-latest (aarch64)
-      ec2_volume_size: 20
+      ec2_volume_size: 64
       compile_mode: native
       opt: no_opt
       lint: false
diff --git a/nix/cbmc/default.nix b/nix/cbmc/default.nix
@@ -37,8 +37,8 @@ buildEnv {
       });
 
       inherit
-        bitwuzla # 0.8.2
-        cvc5 # 1.3.2
+        bitwuzla# 0.8.2
+        cvc5# 1.3.2
         ninja; # 1.13.2
     };
 }
diff --git a/proofs/cbmc/lib/cvc5_arrays_exp b/proofs/cbmc/lib/cvc5_arrays_exp
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+# Copyright (c) The mldsa-native project authors
+# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
+
+# Enable the experimental array theory in cvc5. CBMC encodes parts of
+# the CPROVER library using STORE_ALL, which cvc5 only accepts under
+# --arrays-exp.
+#
+# `exec` is load-bearing: without it bash sits between cbmc and cvc5,
+# holds the stdout FD open after cvc5 closes it, and cbmc's reader
+# stops mid-stream and treats every property verdict as ERROR even
+# though cvc5 returned `unsat`.
+exec cvc5 --arrays-exp "$@"
diff --git a/proofs/cbmc/lib/summarize.py b/proofs/cbmc/lib/summarize.py
@@ -84,19 +84,55 @@ def _split_pipeline_name(pipeline_name):
     return proof_uid, solver
 
 
+# Marker emitted by cbmc when the SMT backend returned `unknown` on the
+# verification query. cbmc still exits non-zero (cprover-status: ERROR)
+# in this case, so the pipeline shows up as `fail` in litani; but no
+# property was actually refuted -- the solver simply could not decide.
+# We surface this as a distinct "Inconclusive" outcome.
+_SOLVER_UNKNOWN_MARKER = 'SMT2 solver returned "unknown"'
+
+
+def _is_solver_inconclusive(stdout_file):
+    """Return True iff the cbmc safety-check job's stdout-file (result.xml)
+    contains the cbmc message indicating the SMT backend returned `unknown`.
+    """
+    if not stdout_file:
+        return False
+    try:
+        with open(stdout_file, encoding="utf-8", errors="replace") as f:
+            return _SOLVER_UNKNOWN_MARKER in f.read()
+    except OSError:
+        return False
+
+
 def _parse_proof_pipeline(proof_pipeline):
     """Parse a single proof pipeline, returning
     (name, solver, status, duration, has_timeout)."""
     duration = 0
     has_timeout = False
+    inconclusive = False
     for stage in proof_pipeline["ci_stages"]:
         for job in stage["jobs"]:
             if job.get("timeout_reached", False):
                 has_timeout = True
             if "duration" in job:
                 duration += int(job["duration"])
-
-    status = "Timeout" if has_timeout else proof_pipeline["status"].title()
+            # Identify the safety-check job by its description suffix.
+            # Litani stores both description and stdout_file under
+            # wrapper_arguments (the args passed to `litani add-job`).
+            wa = job.get("wrapper_arguments") or {}
+            desc = wa.get("description") or ""
+            if desc.endswith(": checking safety properties") and _is_solver_inconclusive(
+                wa.get("stdout_file")
+            ):
+                inconclusive = True
+
+    if has_timeout:
+        status = "Timeout"
+    elif inconclusive:
+        status = "Inconclusive"
+    else:
+        status = proof_pipeline["status"].title()
     name, solver = _split_pipeline_name(proof_pipeline["name"])
     return name, solver, status, duration, has_timeout
 
@@ -172,11 +208,23 @@ def export_result_json(output_path, run_file, omitted_pairs=None):
     for name, solver, status, duration_str in proof_table[1:]:  # skip header
         is_success = status == "Success"
         is_omitted = status == "-"
+        is_inconclusive = status == "Inconclusive"
 
         if is_omitted:
             runtimes.append({"name": name, "solver": solver, "status": "omitted"})
             continue
 
+        if is_inconclusive:
+            runtimes.append(
+                {
+                    "name": name,
+                    "solver": solver,
+                    "status": "inconclusive",
+                    "duration": duration_str,
+                }
+            )
+            continue
+
         if not is_success:
             failures.append(
                 {
@@ -198,15 +246,17 @@ def export_result_json(output_path, run_file, omitted_pairs=None):
     failed = sum(1 for f in failures if f["status"] != "Timeout")
     timeout = sum(1 for f in failures if f["status"] == "Timeout")
     omitted = sum(1 for r in runtimes if r.get("status") == "omitted")
+    inconclusive = sum(1 for r in runtimes if r.get("status") == "inconclusive")
 
     result = {
         "mldsa_parameter_set": os.getenv("MLD_CONFIG_PARAMETER_SET", "unknown"),
         "summary": {
             "total": total,
-            "success": total - failed - timeout - omitted,
+            "success": total - failed - timeout - omitted - inconclusive,
             "failed": failed,
             "timeout": timeout,
             "omitted": omitted,
+            "inconclusive": inconclusive,
         },
         "failures": failures,
         "runtimes": runtimes,
@@ -244,16 +294,23 @@ def print_proof_results(out_file, omitted_pairs=None):
         "summarizing all proof results"
     )
 
-    # Check for timeouts by examining status table
-    has_timeout = any(row[0] == "Timeout" for row in status_table[1:])
-    has_failure = run_dict["status"] != "success"
+    # Check for timeouts and real failures. "Inconclusive" rows count as
+    # neither: the solver could not decide, but no property was refuted.
+    proof_statuses = [row[2] for row in proof_table[1:]]  # status column
+    has_timeout = any(s == "Timeout" for s in proof_statuses)
+    has_real_failure = any(s == "Fail" for s in proof_statuses)
+    has_inconclusive = any(s == "Inconclusive" for s in proof_statuses)
 
-    if has_timeout or has_failure:
+    if has_timeout or has_real_failure:
         logging.error("Not all proofs passed.")
         if has_timeout:
             logging.error("Some proofs timed out.")
         logging.error(msg)
         sys.exit(1)
+    if has_inconclusive:
+        logging.warning(
+            "Some (proof, solver) pairs were inconclusive (solver returned 'unknown')."
+        )
     logging.info(msg)
 
 
diff --git a/proofs/cbmc/run-cbmc-proofs.py b/proofs/cbmc/run-cbmc-proofs.py
@@ -199,6 +199,14 @@ def get_args():
                 "solvers in the canonical list (Z3, BITWUZLA, CVC5)"
             ),
         },
+        {
+            "flags": ["--default-solver-only"],
+            "action": "store_true",
+            "help": (
+                "for each harness, run only the solver named by its "
+                "CBMC_DEFAULT_SOLVER. Cannot be combined with --solver."
+            ),
+        },
     ]:
         flags = arg.pop("flags")
         pars.add_argument(*flags, **arg)
@@ -361,6 +369,25 @@ def read_solver_matrix(proof_dir):
     return out
 
 
+def read_default_solver(proof_dir):
+    """Return CBMC_DEFAULT_SOLVER for a per-harness Makefile."""
+    cmd = ["make", "--no-print-directory", "echo-default-solver"]
+    proc = subprocess.run(
+        cmd,
+        cwd=proof_dir,
+        universal_newlines=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        check=False,
+    )
+    if proc.returncode:
+        logging.critical(
+            "Could not read default solver from %s: %s", proof_dir, proc.stderr
+        )
+        sys.exit(1)
+    return proc.stdout.strip()
+
+
 def read_proof_uid(proof_dir):
     """Read PROOF_UID from a per-harness Makefile."""
     with (pathlib.Path(proof_dir) / "Makefile").open() as handle:
@@ -542,6 +569,10 @@ async def main():  # pylint: disable=too-many-locals
         logging.critical("No proof directories found")
         sys.exit(1)
 
+    if args.default_solver_only and args.solver:
+        logging.critical("--default-solver-only and --solver are mutually exclusive")
+        sys.exit(1)
+
     selected_solvers = args.solver if args.solver else list(ALL_SOLVERS)
     for s in selected_solvers:
         if s not in ALL_SOLVERS:
@@ -552,14 +583,21 @@ async def main():  # pylint: disable=too-many-locals
 
     # Enforce PROOF_UID uniqueness up-front, then expand each proof
     # directory into the Cartesian product with its solver matrix.
+    # When --default-solver-only is given, the per-harness solver list
+    # collapses to {CBMC_DEFAULT_SOLVER}; otherwise every solver in
+    # selected_solvers that the matrix declares enabled is used.
     proof_uids = {}
     pairs_to_run = []  # (proof_dir, solver)
     omitted_pairs = []  # (proof_uid, solver)
     for proof_dir in proof_dirs:
         check_uid_uniqueness(proof_dir, proof_uids)
         proof_uid = read_proof_uid(proof_dir)
         matrix = read_solver_matrix(proof_dir)
-        for solver in selected_solvers:
+        if args.default_solver_only:
+            per_harness_solvers = [read_default_solver(proof_dir)]
+        else:
+            per_harness_solvers = selected_solvers
+        for solver in per_harness_solvers:
             if matrix.get(solver):
                 pairs_to_run.append((proof_dir, solver))
             else:
diff --git a/scripts/tests b/scripts/tests
@@ -918,6 +918,15 @@ class Tests:
                 print(p)
             exit(0)
 
+        def solver_args():
+            args = []
+            if self.args.default_solver_only:
+                args.append("--default-solver-only")
+            if self.args.solver:
+                for s in self.args.solver:
+                    args += ["--solver", s]
+            return args
+
         def run_cbmc_single_step(mldsa_parameter_set, proofs):
             envvars = {"MLD_CONFIG_PARAMETER_SET": mldsa_parameter_set}
             if self.args.reduce_ram:
@@ -940,6 +949,7 @@ class Tests:
                             "-p",
                             func,
                         ]
+                        + solver_args()
                         + self.make_j(),
                         cwd="proofs/cbmc",
                         env=os.environ.copy() | envvars,
@@ -1004,6 +1014,7 @@ class Tests:
                     "-p",
                 ]
                 + proofs
+                + solver_args()
                 + self.make_j()
             )
             if self.args.output_result_json:
@@ -1478,6 +1489,28 @@ def cli():
         default=False,
     )
 
+    cbmc_parser.add_argument(
+        "--solver",
+        action="append",
+        help=(
+            "Restrict CBMC run to the given solver (repeatable). Forwarded "
+            "to run-cbmc-proofs.py --solver. Default: all solvers in the "
+            "canonical list (Z3, BITWUZLA, CVC5)."
+        ),
+        default=None,
+    )
+
+    cbmc_parser.add_argument(
+        "--default-solver-only",
+        help=(
+            "For each harness, run only the solver named by its "
+            "CBMC_DEFAULT_SOLVER. Forwarded to run-cbmc-proofs.py. "
+            "Cannot be combined with --solver."
+        ),
+        action="store_true",
+        default=False,
+    )
+
     # hol_light arguments
     hol_light_parser = cmd_subparsers.add_parser(
         "hol_light",