Skip to content

Commit 992d012

Browse files
committed
CBMC/CI: Bump disk space to 64G
Signed-off-by: Hanno Becker <beckphan@amazon.co.uk>
1 parent 7539831 commit 992d012

7 files changed

Lines changed: 174 additions & 14 deletions

File tree

.github/actions/cbmc/report.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,24 @@ def classify_proof(r, baseline_runtimes, cfg):
9999
base = baseline_runtimes.get((name, solver), {})
100100
base_val, base_failed = base.get("value"), base.get("status") == "failed"
101101
base_omitted = base.get("status") == "omitted"
102+
base_inconclusive = base.get("status") == "inconclusive"
103+
104+
# Solver could not decide -- not a real failure, not a regression.
105+
if r.get("status") == "inconclusive":
106+
prev = (
107+
f"{base_val}s" if base_val
108+
else "failed" if base_failed
109+
else "inconclusive" if base_inconclusive
110+
else "omitted" if base_omitted
111+
else "-"
112+
)
113+
# Was passing in the baseline, now inconclusive: surface as a warning.
114+
if base_val is not None and not base_failed:
115+
return (
116+
ProofResult(name, solver, WARN, "?", prev, "inconclusive"),
117+
True,
118+
)
119+
return ProofResult(name, solver, OK, "?", prev, "inconclusive"), False
102120

103121
# Pair was intentionally not run.
104122
if r.get("status") == "omitted":
@@ -141,13 +159,14 @@ def classify_proof(r, baseline_runtimes, cfg):
141159

142160

143161
def compute_total_runtime(data):
144-
"""Compute total runtime from proof results, ignoring failed/omitted."""
162+
"""Compute total runtime from proof results, ignoring failed/omitted/inconclusive."""
145163
if not data:
146164
return None
147165
return sum(
148166
r["value"]
149167
for r in data.get("runtimes", [])
150-
if r.get("status") not in ("failed", "omitted") and "value" in r
168+
if r.get("status") not in ("failed", "omitted", "inconclusive")
169+
and "value" in r
151170
)
152171

153172

.github/workflows/cbmc.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ jobs:
2424
uses: ./.github/workflows/ci_ec2_reusable.yml
2525
with:
2626
name: CBMC (ML-DSA-${{ matrix.parameter_set }}${{ matrix.reduce_ram && ', REDUCE_RAM' || '' }})
27-
ec2_instance_type: c7g.4xlarge
27+
ec2_instance_type: r7g.4xlarge
2828
ec2_ami: ubuntu-latest (aarch64)
29-
ec2_volume_size: 20
29+
ec2_volume_size: 64
3030
compile_mode: native
3131
opt: no_opt
3232
lint: false

nix/cbmc/default.nix

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ buildEnv {
3737
});
3838

3939
inherit
40-
bitwuzla # 0.8.2
41-
cvc5 # 1.3.2
40+
bitwuzla# 0.8.2
41+
cvc5# 1.3.2
4242
ninja; # 1.13.2
4343
};
4444
}

proofs/cbmc/lib/cvc5_arrays_exp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/usr/bin/env bash
2+
# Copyright (c) The mldsa-native project authors
3+
# SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT
4+
5+
# Enable the experimental array theory in cvc5. CBMC encodes parts of
6+
# the CPROVER library using STORE_ALL, which cvc5 only accepts under
7+
# --arrays-exp.
8+
#
9+
# `exec` is load-bearing: without it bash sits between cbmc and cvc5,
10+
# holds the stdout FD open after cvc5 closes it, and cbmc's reader
11+
# stops mid-stream and treats every property verdict as ERROR even
12+
# though cvc5 returned `unsat`.
13+
exec cvc5 --arrays-exp "$@"

proofs/cbmc/lib/summarize.py

Lines changed: 64 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -84,19 +84,55 @@ def _split_pipeline_name(pipeline_name):
8484
return proof_uid, solver
8585

8686

87+
# Marker emitted by cbmc when the SMT backend returned `unknown` on the
88+
# verification query. cbmc still exits non-zero (cprover-status: ERROR)
89+
# in this case, so the pipeline shows up as `fail` in litani; but no
90+
# property was actually refuted -- the solver simply could not decide.
91+
# We surface this as a distinct "Inconclusive" outcome.
92+
_SOLVER_UNKNOWN_MARKER = 'SMT2 solver returned "unknown"'
93+
94+
95+
def _is_solver_inconclusive(stdout_file):
96+
"""Return True iff the cbmc safety-check job's stdout-file (result.xml)
97+
contains the cbmc message indicating the SMT backend returned `unknown`.
98+
"""
99+
if not stdout_file:
100+
return False
101+
try:
102+
with open(stdout_file, encoding="utf-8", errors="replace") as f:
103+
return _SOLVER_UNKNOWN_MARKER in f.read()
104+
except OSError:
105+
return False
106+
107+
87108
def _parse_proof_pipeline(proof_pipeline):
88109
"""Parse a single proof pipeline, returning
89110
(name, solver, status, duration, has_timeout)."""
90111
duration = 0
91112
has_timeout = False
113+
inconclusive = False
92114
for stage in proof_pipeline["ci_stages"]:
93115
for job in stage["jobs"]:
94116
if job.get("timeout_reached", False):
95117
has_timeout = True
96118
if "duration" in job:
97119
duration += int(job["duration"])
98-
99-
status = "Timeout" if has_timeout else proof_pipeline["status"].title()
120+
# Identify the safety-check job by its description suffix.
121+
# Litani stores both description and stdout_file under
122+
# wrapper_arguments (the args passed to `litani add-job`).
123+
wa = job.get("wrapper_arguments") or {}
124+
desc = wa.get("description") or ""
125+
if desc.endswith(": checking safety properties") and _is_solver_inconclusive(
126+
wa.get("stdout_file")
127+
):
128+
inconclusive = True
129+
130+
if has_timeout:
131+
status = "Timeout"
132+
elif inconclusive:
133+
status = "Inconclusive"
134+
else:
135+
status = proof_pipeline["status"].title()
100136
name, solver = _split_pipeline_name(proof_pipeline["name"])
101137
return name, solver, status, duration, has_timeout
102138

@@ -172,11 +208,23 @@ def export_result_json(output_path, run_file, omitted_pairs=None):
172208
for name, solver, status, duration_str in proof_table[1:]: # skip header
173209
is_success = status == "Success"
174210
is_omitted = status == "-"
211+
is_inconclusive = status == "Inconclusive"
175212

176213
if is_omitted:
177214
runtimes.append({"name": name, "solver": solver, "status": "omitted"})
178215
continue
179216

217+
if is_inconclusive:
218+
runtimes.append(
219+
{
220+
"name": name,
221+
"solver": solver,
222+
"status": "inconclusive",
223+
"duration": duration_str,
224+
}
225+
)
226+
continue
227+
180228
if not is_success:
181229
failures.append(
182230
{
@@ -198,15 +246,17 @@ def export_result_json(output_path, run_file, omitted_pairs=None):
198246
failed = sum(1 for f in failures if f["status"] != "Timeout")
199247
timeout = sum(1 for f in failures if f["status"] == "Timeout")
200248
omitted = sum(1 for r in runtimes if r.get("status") == "omitted")
249+
inconclusive = sum(1 for r in runtimes if r.get("status") == "inconclusive")
201250

202251
result = {
203252
"mldsa_parameter_set": os.getenv("MLD_CONFIG_PARAMETER_SET", "unknown"),
204253
"summary": {
205254
"total": total,
206-
"success": total - failed - timeout - omitted,
255+
"success": total - failed - timeout - omitted - inconclusive,
207256
"failed": failed,
208257
"timeout": timeout,
209258
"omitted": omitted,
259+
"inconclusive": inconclusive,
210260
},
211261
"failures": failures,
212262
"runtimes": runtimes,
@@ -244,16 +294,23 @@ def print_proof_results(out_file, omitted_pairs=None):
244294
"summarizing all proof results"
245295
)
246296

247-
# Check for timeouts by examining status table
248-
has_timeout = any(row[0] == "Timeout" for row in status_table[1:])
249-
has_failure = run_dict["status"] != "success"
297+
# Check for timeouts and real failures. "Inconclusive" rows count as
298+
# neither: the solver could not decide, but no property was refuted.
299+
proof_statuses = [row[2] for row in proof_table[1:]] # status column
300+
has_timeout = any(s == "Timeout" for s in proof_statuses)
301+
has_real_failure = any(s == "Fail" for s in proof_statuses)
302+
has_inconclusive = any(s == "Inconclusive" for s in proof_statuses)
250303

251-
if has_timeout or has_failure:
304+
if has_timeout or has_real_failure:
252305
logging.error("Not all proofs passed.")
253306
if has_timeout:
254307
logging.error("Some proofs timed out.")
255308
logging.error(msg)
256309
sys.exit(1)
310+
if has_inconclusive:
311+
logging.warning(
312+
"Some (proof, solver) pairs were inconclusive (solver returned 'unknown')."
313+
)
257314
logging.info(msg)
258315

259316

proofs/cbmc/run-cbmc-proofs.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,14 @@ def get_args():
199199
"solvers in the canonical list (Z3, BITWUZLA, CVC5)"
200200
),
201201
},
202+
{
203+
"flags": ["--default-solver-only"],
204+
"action": "store_true",
205+
"help": (
206+
"for each harness, run only the solver named by its "
207+
"CBMC_DEFAULT_SOLVER. Cannot be combined with --solver."
208+
),
209+
},
202210
]:
203211
flags = arg.pop("flags")
204212
pars.add_argument(*flags, **arg)
@@ -361,6 +369,25 @@ def read_solver_matrix(proof_dir):
361369
return out
362370

363371

372+
def read_default_solver(proof_dir):
373+
"""Return CBMC_DEFAULT_SOLVER for a per-harness Makefile."""
374+
cmd = ["make", "--no-print-directory", "echo-default-solver"]
375+
proc = subprocess.run(
376+
cmd,
377+
cwd=proof_dir,
378+
universal_newlines=True,
379+
stdout=subprocess.PIPE,
380+
stderr=subprocess.PIPE,
381+
check=False,
382+
)
383+
if proc.returncode:
384+
logging.critical(
385+
"Could not read default solver from %s: %s", proof_dir, proc.stderr
386+
)
387+
sys.exit(1)
388+
return proc.stdout.strip()
389+
390+
364391
def read_proof_uid(proof_dir):
365392
"""Read PROOF_UID from a per-harness Makefile."""
366393
with (pathlib.Path(proof_dir) / "Makefile").open() as handle:
@@ -542,6 +569,10 @@ async def main(): # pylint: disable=too-many-locals
542569
logging.critical("No proof directories found")
543570
sys.exit(1)
544571

572+
if args.default_solver_only and args.solver:
573+
logging.critical("--default-solver-only and --solver are mutually exclusive")
574+
sys.exit(1)
575+
545576
selected_solvers = args.solver if args.solver else list(ALL_SOLVERS)
546577
for s in selected_solvers:
547578
if s not in ALL_SOLVERS:
@@ -552,14 +583,21 @@ async def main(): # pylint: disable=too-many-locals
552583

553584
# Enforce PROOF_UID uniqueness up-front, then expand each proof
554585
# directory into the Cartesian product with its solver matrix.
586+
# When --default-solver-only is given, the per-harness solver list
587+
# collapses to {CBMC_DEFAULT_SOLVER}; otherwise every solver in
588+
# selected_solvers that the matrix declares enabled is used.
555589
proof_uids = {}
556590
pairs_to_run = [] # (proof_dir, solver)
557591
omitted_pairs = [] # (proof_uid, solver)
558592
for proof_dir in proof_dirs:
559593
check_uid_uniqueness(proof_dir, proof_uids)
560594
proof_uid = read_proof_uid(proof_dir)
561595
matrix = read_solver_matrix(proof_dir)
562-
for solver in selected_solvers:
596+
if args.default_solver_only:
597+
per_harness_solvers = [read_default_solver(proof_dir)]
598+
else:
599+
per_harness_solvers = selected_solvers
600+
for solver in per_harness_solvers:
563601
if matrix.get(solver):
564602
pairs_to_run.append((proof_dir, solver))
565603
else:

scripts/tests

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -918,6 +918,15 @@ class Tests:
918918
print(p)
919919
exit(0)
920920

921+
def solver_args():
922+
args = []
923+
if self.args.default_solver_only:
924+
args.append("--default-solver-only")
925+
if self.args.solver:
926+
for s in self.args.solver:
927+
args += ["--solver", s]
928+
return args
929+
921930
def run_cbmc_single_step(mldsa_parameter_set, proofs):
922931
envvars = {"MLD_CONFIG_PARAMETER_SET": mldsa_parameter_set}
923932
if self.args.reduce_ram:
@@ -940,6 +949,7 @@ class Tests:
940949
"-p",
941950
func,
942951
]
952+
+ solver_args()
943953
+ self.make_j(),
944954
cwd="proofs/cbmc",
945955
env=os.environ.copy() | envvars,
@@ -1004,6 +1014,7 @@ class Tests:
10041014
"-p",
10051015
]
10061016
+ proofs
1017+
+ solver_args()
10071018
+ self.make_j()
10081019
)
10091020
if self.args.output_result_json:
@@ -1478,6 +1489,28 @@ def cli():
14781489
default=False,
14791490
)
14801491

1492+
cbmc_parser.add_argument(
1493+
"--solver",
1494+
action="append",
1495+
help=(
1496+
"Restrict CBMC run to the given solver (repeatable). Forwarded "
1497+
"to run-cbmc-proofs.py --solver. Default: all solvers in the "
1498+
"canonical list (Z3, BITWUZLA, CVC5)."
1499+
),
1500+
default=None,
1501+
)
1502+
1503+
cbmc_parser.add_argument(
1504+
"--default-solver-only",
1505+
help=(
1506+
"For each harness, run only the solver named by its "
1507+
"CBMC_DEFAULT_SOLVER. Forwarded to run-cbmc-proofs.py. "
1508+
"Cannot be combined with --solver."
1509+
),
1510+
action="store_true",
1511+
default=False,
1512+
)
1513+
14811514
# hol_light arguments
14821515
hol_light_parser = cmd_subparsers.add_parser(
14831516
"hol_light",

0 commit comments

Comments
 (0)