diff --git a/doc/man/cbmc.1 b/doc/man/cbmc.1 index 0e454caf4c1..ee00086194d 100644 --- a/doc/man/cbmc.1 +++ b/doc/man/cbmc.1 @@ -560,6 +560,9 @@ output smt incremental formula to the given file \fB\-\-write\-solver\-stats\-to\fR json\-file collect the solver query complexity .TP +\fB\-\-pointer\-encoding\-via\-maps\fR +use map-based pointer encoding instead of bit-packing +.TP \fB\-\-refine\-strings\fR use string refinement (experimental) .TP diff --git a/doc/man/goto-synthesizer.1 b/doc/man/goto-synthesizer.1 index 1b2b44ed343..dfb6132e2bc 100644 --- a/doc/man/goto-synthesizer.1 +++ b/doc/man/goto-synthesizer.1 @@ -126,6 +126,9 @@ output smt incremental formula to the given file \fB\-\-write\-solver\-stats\-to\fR json\-file collect the solver query complexity .TP +\fB\-\-pointer\-encoding\-via\-maps\fR +use map-based pointer encoding instead of bit-packing +.TP \fB\-\-arrays\-uf\-never\fR never turn arrays into uninterpreted functions .TP diff --git a/doc/man/jbmc.1 b/doc/man/jbmc.1 index 05f70b7805b..89823902488 100644 --- a/doc/man/jbmc.1 +++ b/doc/man/jbmc.1 @@ -464,6 +464,9 @@ output smt incremental formula to the given file \fB\-\-write\-solver\-stats\-to\fR \fIjson\-file\fR collect the solver query complexity .TP +\fB\-\-pointer\-encoding\-via\-maps\fR +use map-based pointer encoding instead of bit-packing +.TP \fB\-\-no\-refine\-strings\fR turn off string refinement .TP diff --git a/regression/cbmc/Array_UF23/main.c b/regression/cbmc/Array_UF23/main.c new file mode 100644 index 00000000000..77b15f695f7 --- /dev/null +++ b/regression/cbmc/Array_UF23/main.c @@ -0,0 +1,21 @@ +/// \file +/// Test that Ackermann constraints are reduced by the weak equivalence +/// optimisation: derived arrays (with, if, etc.) do not need Ackermann +/// constraints because they are implied by the with/if constraints plus +/// Ackermann on base arrays. +#include +int main() +{ + size_t array_size; + int a[array_size]; + int i0, i1, i2, i3, i4; + + a[i0] = 0; + a[i1] = 1; + a[i2] = 2; + a[i3] = 3; + a[i4] = 4; + + __CPROVER_assert(a[i0] >= 0, "a[i0] >= 0"); + return 0; +} diff --git a/regression/cbmc/Array_UF23/test.desc b/regression/cbmc/Array_UF23/test.desc new file mode 100644 index 00000000000..37d9e984f8d --- /dev/null +++ b/regression/cbmc/Array_UF23/test.desc @@ -0,0 +1,13 @@ +KNOWNBUG broken-refine-arrays broken-smt-backend no-new-smt +main.c +--arrays-uf-always --unwind 1 --show-array-constraints --json-ui +"arrayAckermann": 60 +^EXIT=10$ +^SIGNAL=0$ +-- +"arrayAckermann": 110 +-- +Checks that the weak equivalence optimisation reduces Ackermann constraints. +With 5 stores to the same unbounded array, the old code produced 110 Ackermann +constraints (one per pair of indices per array term). The optimised code skips +derived arrays (with, if, etc.) and produces only 60. diff --git a/regression/cbmc/Array_operations2/test-arrays-uf-always.desc b/regression/cbmc/Array_operations2/test-arrays-uf-always.desc new file mode 100644 index 00000000000..71ebea4f13c --- /dev/null +++ b/regression/cbmc/Array_operations2/test-arrays-uf-always.desc @@ -0,0 +1,36 @@ +KNOWNBUG broken-cprover-smt-backend no-new-smt +main.c +--arrays-uf-always +^\[main.assertion.12\] line 34 assertion arr\[1\].c\[2\] == 0: FAILURE$ +^\[main.assertion.13\] line 35 assertion arr\[1\].c\[3\] == 0: FAILURE$ +^\[main.assertion.14\] line 36 assertion arr\[1\].c\[4\] == 0: FAILURE$ +^\*\* 3 of 21 failed +^VERIFICATION FAILED$ +^EXIT=10$ +^SIGNAL=0$ +-- +^warning: ignoring +-- +With --arrays-uf-always the array theory's union-find incorrectly conflates +structurally different arrays that happen to have equal initial values. + +The SSA equations for the zero-initialised struct array produce: + arr#2[0].c = {0,0,0,0,0} + arr#2[1].c = {0,0,0,0,0} + arr#2[2].c = {0,0,0,0,0} + +Because all three equalities share the same RHS literal {0,0,0,0,0}, +record_array_equality transitively unifies arr#2[0].c, arr#2[1].c, and +arr#2[2].c into a single equivalence class. After __CPROVER_array_replace +modifies arr (producing arr#3 via byte_update), the array theory cannot +distinguish the three .c sub-arrays: constraints meant for arr#3[1].c +(the replaced region) bleed into arr#3[0].c and arr#3[2].c, causing +spurious failures on assertions 1-11 and 15-21. + +Without --arrays-uf-always the char[5] sub-arrays are small enough to be +flattened to bitvectors, bypassing the array decision procedure entirely, +so the test passes. + +Fixing this requires the union-find to distinguish "structurally same +array" from "arrays with equal values", which is a fundamental change to +the Nelson-Oppen style array decision procedure as used in CBMC. diff --git a/regression/cbmc/Array_operations2/test.desc b/regression/cbmc/Array_operations2/test.desc index 559e087cce9..652d05a691c 100644 --- a/regression/cbmc/Array_operations2/test.desc +++ b/regression/cbmc/Array_operations2/test.desc @@ -1,4 +1,4 @@ -CORE broken-cprover-smt-backend no-new-smt +CORE broken-cprover-smt-backend no-new-smt broken-arrays-uf-always main.c ^\[main.assertion.12\] line 34 assertion arr\[1\].c\[2\] == 0: FAILURE$ diff --git a/regression/cbmc/CMakeLists.txt b/regression/cbmc/CMakeLists.txt index 82bb6a06236..cdcb9a42f99 100644 --- a/regression/cbmc/CMakeLists.txt +++ b/regression/cbmc/CMakeLists.txt @@ -32,6 +32,20 @@ add_test_pl_profile( "CORE" ) +add_test_pl_profile( + "cbmc-arrays-uf-always" + "$ --arrays-uf-always" + "-C;-X;broken-arrays-uf-always;-X;thorough-arrays-uf-always;-X;smt-backend;${gcc_only_string}-s;arrays-uf-always" + "CORE" +) + +add_test_pl_profile( + "cbmc-arrays-uf-always-refine-arrays" + "$ --arrays-uf-always --refine-arrays" + "-C;-X;broken-arrays-uf-always;-X;thorough-arrays-uf-always;-X;broken-refine-arrays;-X;smt-backend;${gcc_only_string}-s;arrays-uf-always-refine-arrays" + "CORE" +) + # solver appears on the PATH in Windows already if(NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Windows") set_property( diff --git a/regression/cbmc/Pointer_array6/test-pointer-encoding-via-maps.desc b/regression/cbmc/Pointer_array6/test-pointer-encoding-via-maps.desc new file mode 100644 index 00000000000..bddcecb568a --- /dev/null +++ b/regression/cbmc/Pointer_array6/test-pointer-encoding-via-maps.desc @@ -0,0 +1,13 @@ +KNOWNBUG pointer-encoding-via-maps +main.c +--no-malloc-may-fail +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +Map-based pointer encoding does not correctly handle byte-level +round-trips through pointer arrays. The misaligned byte_extract +and byte_update operations produce incorrect values because the +map encoding stores pointer indices rather than flat addresses. diff --git a/regression/cbmc/Pointer_array6/test.desc b/regression/cbmc/Pointer_array6/test.desc index 915afae768a..e40c5a5c781 100644 --- a/regression/cbmc/Pointer_array6/test.desc +++ b/regression/cbmc/Pointer_array6/test.desc @@ -1,4 +1,4 @@ -CORE +CORE no-pointer-encoding-via-maps main.c --no-malloc-may-fail ^EXIT=0$ diff --git a/regression/cbmc/Pointer_difference2/test-pointer-encoding-via-maps.desc b/regression/cbmc/Pointer_difference2/test-pointer-encoding-via-maps.desc new file mode 100644 index 00000000000..f9380beb04f --- /dev/null +++ b/regression/cbmc/Pointer_difference2/test-pointer-encoding-via-maps.desc @@ -0,0 +1,19 @@ +CORE broken-smt-backend no-new-smt pointer-encoding-via-maps +main.c + +^EXIT=10$ +^SIGNAL=0$ +^\[main.assertion.1\] line 6 correct: SUCCESS +^\[main.pointer.1\] line 8 same object violation in array - other_array: FAILURE$ +^\[main.assertion.3\] line 13 undefined behavior: FAILURE$ +^\[main.assertion.8\] line 28 end plus 2 is nondet: (UNKNOWN|FAILURE)$ +^\[main.pointer_arithmetic.\d+\] line 28 pointer relation: pointer outside object bounds in p: FAILURE$ +^\*\* 5 of \d+ failed +^VERIFICATION FAILED$ +-- +^warning: ignoring +^CONVERSION ERROR$ +-- +Map-based pointer encoding variant: fewer overflow failures because +base addresses are constrained to the positive 32-bit range, so +pointer-to-integer arithmetic doesn't overflow. diff --git a/regression/cbmc/Pointer_difference2/test.desc b/regression/cbmc/Pointer_difference2/test.desc index 91119db20b3..263d3fbba0e 100644 --- a/regression/cbmc/Pointer_difference2/test.desc +++ b/regression/cbmc/Pointer_difference2/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend no-new-smt +CORE broken-smt-backend no-new-smt no-pointer-encoding-via-maps main.c ^\[main.assertion.1\] line 6 correct: SUCCESS diff --git a/regression/cbmc/String_Abstraction7/test-pointer-encoding-via-maps.desc b/regression/cbmc/String_Abstraction7/test-pointer-encoding-via-maps.desc new file mode 100644 index 00000000000..fc4aa3b0255 --- /dev/null +++ b/regression/cbmc/String_Abstraction7/test-pointer-encoding-via-maps.desc @@ -0,0 +1,12 @@ +KNOWNBUG no-new-smt pointer-encoding-via-maps +main.c +--string-abstraction +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +Map-based pointer encoding produces VERIFICATION ERROR with +string abstraction. The string abstraction pass creates pointer +expressions that the map encoding does not handle correctly. diff --git a/regression/cbmc/String_Abstraction7/test.desc b/regression/cbmc/String_Abstraction7/test.desc index 500d141fdec..fb4f7457090 100644 --- a/regression/cbmc/String_Abstraction7/test.desc +++ b/regression/cbmc/String_Abstraction7/test.desc @@ -1,4 +1,4 @@ -CORE no-new-smt +CORE no-new-smt no-pointer-encoding-via-maps main.c --string-abstraction ^EXIT=0$ diff --git a/regression/cbmc/address_space_size_limit1/test-pointer-encoding-via-maps.desc b/regression/cbmc/address_space_size_limit1/test-pointer-encoding-via-maps.desc new file mode 100644 index 00000000000..1a0606a86ed --- /dev/null +++ b/regression/cbmc/address_space_size_limit1/test-pointer-encoding-via-maps.desc @@ -0,0 +1,11 @@ +KNOWNBUG thorough-paths no-new-smt pointer-encoding-via-maps +test.c +--no-simplify --unwind 300 --object-bits 8 +^EXIT=6$ +^SIGNAL=0$ +too many addressed objects +-- +-- +Map-based pointer encoding does not enforce the object-bits limit +in the same way as the standard encoding, so the "too many +addressed objects" error is not triggered. diff --git a/regression/cbmc/address_space_size_limit1/test.desc b/regression/cbmc/address_space_size_limit1/test.desc index 0794e0fe6a1..635add4f287 100644 --- a/regression/cbmc/address_space_size_limit1/test.desc +++ b/regression/cbmc/address_space_size_limit1/test.desc @@ -1,4 +1,4 @@ -CORE thorough-paths no-new-smt +CORE thorough-paths no-new-smt no-pointer-encoding-via-maps test.c --no-simplify --unwind 300 --object-bits 8 too many addressed objects diff --git a/regression/cbmc/argc-and-argv/argv1.desc b/regression/cbmc/argc-and-argv/argv1.desc index 47303fff42a..c680cf15876 100644 --- a/regression/cbmc/argc-and-argv/argv1.desc +++ b/regression/cbmc/argc-and-argv/argv1.desc @@ -1,4 +1,4 @@ -CORE +CORE no-pointer-encoding-via-maps argv1.c ^EXIT=0$ diff --git a/regression/cbmc/argc-and-argv/test-pointer-encoding-via-maps.desc b/regression/cbmc/argc-and-argv/test-pointer-encoding-via-maps.desc new file mode 100644 index 00000000000..47b318d80f5 --- /dev/null +++ b/regression/cbmc/argc-and-argv/test-pointer-encoding-via-maps.desc @@ -0,0 +1,13 @@ +KNOWNBUG pointer-encoding-via-maps +argv1.c + +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +Map-based pointer encoding does not correctly propagate +argv[argc]==NULL through the array theory. The with-constraints +fix for SSA-renamed indices is present but insufficient for the +map encoding's pointer equality path. diff --git a/regression/cbmc/array_of_bool_as_bitvec/test-smt2-outfile.desc b/regression/cbmc/array_of_bool_as_bitvec/test-smt2-outfile.desc index d757d3e38af..67ab82dbe4d 100644 --- a/regression/cbmc/array_of_bool_as_bitvec/test-smt2-outfile.desc +++ b/regression/cbmc/array_of_bool_as_bitvec/test-smt2-outfile.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend no-new-smt +CORE broken-smt-backend no-new-smt broken-refine-arrays main.c --no-malloc-may-fail --smt2 --outfile - \(= \(select array_of\.0 i\) \(ite false #b1 #b0\)\) diff --git a/regression/cbmc/arrays-uf-always-large-struct-soundness/main.c b/regression/cbmc/arrays-uf-always-large-struct-soundness/main.c new file mode 100644 index 00000000000..0ba8d547bbf --- /dev/null +++ b/regression/cbmc/arrays-uf-always-large-struct-soundness/main.c @@ -0,0 +1,16 @@ +struct S +{ + int d[65]; +}; + +unsigned nondet_unsigned(void); + +int main() +{ + struct S a[2]; + a[0].d[0] = 1; + a[1].d[0] = 1; + unsigned i = nondet_unsigned(); + __CPROVER_assume(i < 2); + __CPROVER_assert(a[i].d[0] == 1, ""); +} diff --git a/regression/cbmc/arrays-uf-always-large-struct-soundness/test.desc b/regression/cbmc/arrays-uf-always-large-struct-soundness/test.desc new file mode 100644 index 00000000000..6fa36be2289 --- /dev/null +++ b/regression/cbmc/arrays-uf-always-large-struct-soundness/test.desc @@ -0,0 +1,14 @@ +CORE +main.c +--arrays-uf-always --no-standard-checks +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +With --arrays-uf-always, indexing an array of structs containing array members +with 65+ elements through a nondeterministic index must produce correct results. +The bitvector solver connects the array symbol's map literals to the array +theory's element-wise constraints so that struct-level equalities properly +constrain the array elements. diff --git a/regression/cbmc/arrays-uf-always-member-crash/main.c b/regression/cbmc/arrays-uf-always-member-crash/main.c new file mode 100644 index 00000000000..533c2858f52 --- /dev/null +++ b/regression/cbmc/arrays-uf-always-member-crash/main.c @@ -0,0 +1,12 @@ +struct S +{ + int a[1]; +}; + +int main() +{ + struct S x[2]; + int i; + __CPROVER_assume(i >= 0 && i < 2); + __CPROVER_assert(x[i].a[0] == 0, ""); +} diff --git a/regression/cbmc/arrays-uf-always-member-crash/test.desc b/regression/cbmc/arrays-uf-always-member-crash/test.desc new file mode 100644 index 00000000000..c941d5374d1 --- /dev/null +++ b/regression/cbmc/arrays-uf-always-member-crash/test.desc @@ -0,0 +1,12 @@ +CORE +main.c +--arrays-uf-always --no-standard-checks +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +-- +^warning: ignoring +-- +Verify that --arrays-uf-always handles array-of-structs containing array +members accessed through a nondet index. The arrays theory must accept member +expressions where the struct operand is an index expression. diff --git a/regression/cbmc/arrays-uf-always-member-soundness/main.c b/regression/cbmc/arrays-uf-always-member-soundness/main.c new file mode 100644 index 00000000000..3e1913441d3 --- /dev/null +++ b/regression/cbmc/arrays-uf-always-member-soundness/main.c @@ -0,0 +1,16 @@ +struct S +{ + int d[1]; +}; + +int nondet_int(void); + +int main() +{ + struct S a[2]; + a[0].d[0] = 1; + a[1].d[0] = 1; + int i = nondet_int(); + __CPROVER_assume(i == 0 || i == 1); + __CPROVER_assert(a[i].d[0] == 1, ""); +} diff --git a/regression/cbmc/arrays-uf-always-member-soundness/test.desc b/regression/cbmc/arrays-uf-always-member-soundness/test.desc new file mode 100644 index 00000000000..6d7452abfd6 --- /dev/null +++ b/regression/cbmc/arrays-uf-always-member-soundness/test.desc @@ -0,0 +1,13 @@ +CORE +main.c +--arrays-uf-always --no-standard-checks +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +With --arrays-uf-always, indexing an array of structs that contain array members +through a nondeterministic index must produce correct results. The bitvector +solver handles member expressions with non-symbol struct operands (e.g., +member(index(outer_array, i), field)) directly, bypassing the array theory. diff --git a/regression/cbmc/bounds_check1/refine-arrays.desc b/regression/cbmc/bounds_check1/refine-arrays.desc new file mode 100644 index 00000000000..8e8f1c5175d --- /dev/null +++ b/regression/cbmc/bounds_check1/refine-arrays.desc @@ -0,0 +1,18 @@ +CORE thorough-smt-backend no-new-smt +main.c +--no-malloc-may-fail --arrays-uf-always --refine-arrays +^EXIT=10$ +^SIGNAL=0$ +\[\(.*\)i2\]: FAILURE +dest\[\(.*\)j2\]: FAILURE +payload\[\(.*\)[kl]2\]: FAILURE +\*\* 7 of [0-9]+ failed +-- +^warning: ignoring +\[\(.*\)i\]: FAILURE +dest\[\(.*\)j\]: FAILURE +payload\[\(.*\)[kl]\]: FAILURE +-- +Tests that --refine-arrays with --arrays-uf-always produces correct results +and completes quickly. Without --refine-arrays this test takes ~85 seconds; +with --refine-arrays the lazy with/if/Ackermann constraints reduce it to ~2s. diff --git a/regression/cbmc/bounds_check1/test.desc b/regression/cbmc/bounds_check1/test.desc index ce90426706c..79ccfbd95cb 100644 --- a/regression/cbmc/bounds_check1/test.desc +++ b/regression/cbmc/bounds_check1/test.desc @@ -1,4 +1,4 @@ -CORE thorough-smt-backend no-new-smt +CORE thorough-smt-backend no-new-smt thorough-arrays-uf-always main.c --no-malloc-may-fail ^EXIT=10$ diff --git a/regression/cbmc/memory_allocation1/test-pointer-encoding-via-maps.desc b/regression/cbmc/memory_allocation1/test-pointer-encoding-via-maps.desc new file mode 100644 index 00000000000..b3adf2172fb --- /dev/null +++ b/regression/cbmc/memory_allocation1/test-pointer-encoding-via-maps.desc @@ -0,0 +1,13 @@ +KNOWNBUG pointer-encoding-via-maps +main.c + +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +-- +^warning: ignoring +-- +Map-based pointer encoding does not correctly model +__CPROVER_allocated_memory or integer-address pointer +dereferences, causing different failures than the standard +encoding. diff --git a/regression/cbmc/memory_allocation1/test.desc b/regression/cbmc/memory_allocation1/test.desc index ffa7f87988a..e7de3087bd5 100644 --- a/regression/cbmc/memory_allocation1/test.desc +++ b/regression/cbmc/memory_allocation1/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +CORE broken-smt-backend no-pointer-encoding-via-maps main.c ^EXIT=10$ diff --git a/regression/cbmc/mm_io1/test-pointer-encoding-via-maps.desc b/regression/cbmc/mm_io1/test-pointer-encoding-via-maps.desc new file mode 100644 index 00000000000..58be4d6d663 --- /dev/null +++ b/regression/cbmc/mm_io1/test-pointer-encoding-via-maps.desc @@ -0,0 +1,11 @@ +KNOWNBUG pointer-encoding-via-maps +main.c +--no-standard-checks +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +-- +Map-based pointer encoding does not correctly handle memory-mapped +I/O pointer dispatch, causing assertion failures for reads from +fixed addresses. diff --git a/regression/cbmc/mm_io1/test.desc b/regression/cbmc/mm_io1/test.desc index bfb5c9e422c..ee4226c38bd 100644 --- a/regression/cbmc/mm_io1/test.desc +++ b/regression/cbmc/mm_io1/test.desc @@ -1,4 +1,4 @@ -CORE +CORE no-pointer-encoding-via-maps main.c --no-standard-checks ^Replaced MMIO operations: 3 read\(s\), 1 write\(s\) diff --git a/regression/cbmc/pointer-encoding-via-maps/main.c b/regression/cbmc/pointer-encoding-via-maps/main.c new file mode 100644 index 00000000000..1a513c9bc90 --- /dev/null +++ b/regression/cbmc/pointer-encoding-via-maps/main.c @@ -0,0 +1,23 @@ +#include + +int main() +{ + int x = 1; + int y = 2; + int *p = &x; + int *q = &y; + + // address-of and pointer comparison + assert(p == &x); + assert(q == &y); + assert(p != q); + + // pointer arithmetic + int arr[3] = {10, 20, 30}; + int *r = arr; + assert(*r == 10); + assert(*(r + 1) == 20); + assert(*(r + 2) == 30); + + return 0; +} diff --git a/regression/cbmc/pointer-encoding-via-maps/test.desc b/regression/cbmc/pointer-encoding-via-maps/test.desc new file mode 100644 index 00000000000..ce13dc97954 --- /dev/null +++ b/regression/cbmc/pointer-encoding-via-maps/test.desc @@ -0,0 +1,6 @@ +CORE +main.c +--pointer-encoding-via-maps +^VERIFICATION SUCCESSFUL$ +^EXIT=0$ +^SIGNAL=0$ diff --git a/regression/cbmc/r_w_ok10/test-pointer-encoding-via-maps.desc b/regression/cbmc/r_w_ok10/test-pointer-encoding-via-maps.desc new file mode 100644 index 00000000000..fc22a613549 --- /dev/null +++ b/regression/cbmc/r_w_ok10/test-pointer-encoding-via-maps.desc @@ -0,0 +1,19 @@ +CORE pointer-encoding-via-maps +main.c +--no-malloc-may-fail +^EXIT=10$ +^SIGNAL=0$ +^\[main.pointer_primitives.\d+\] line 7 pointer invalid in R_OK\(p1, \(unsigned (long (long )?)?int\)1\): FAILURE$ +^\[main.pointer_primitives.\d+\] line 7 pointer outside object bounds in R_OK\(p1, \(unsigned (long (long )?)?int\)1\): FAILURE$ +^\[main.pointer_primitives.\d+\] line 11 pointer outside object bounds in R_OK\(p2, \(unsigned (long (long )?)?int\)1\): FAILURE$ +^\[main.pointer_primitives.\d+\] line 15 pointer outside object bounds in R_OK\(p3, \(unsigned (long (long )?)?int\)1\): FAILURE$ +^\[main.pointer_primitives.\d+\] line 20 pointer outside object bounds in R_OK\(p4, \(unsigned (long (long )?)?int\)1\): FAILURE$ +^\[main.pointer_primitives.\d+\] line 34 dead object in R_OK\(p6, \(unsigned (long (long )?)?int\)1\): FAILURE$ +^\[main.pointer_primitives.\d+\] line 40 deallocated dynamic object in R_OK\(p7, \(unsigned (long (long )?)?int\)1\): FAILURE$ +^\*\* [89] of \d+ failed +-- +^warning: ignoring +-- +Map-based pointer encoding variant: the invalid pointer constant +(1 << 56) is treated as a valid integer-address object, so the +"pointer invalid" check for p2 passes instead of failing. diff --git a/regression/cbmc/r_w_ok10/test.desc b/regression/cbmc/r_w_ok10/test.desc index 7fd4ebb3aa8..45174660c49 100644 --- a/regression/cbmc/r_w_ok10/test.desc +++ b/regression/cbmc/r_w_ok10/test.desc @@ -1,4 +1,4 @@ -CORE +CORE no-pointer-encoding-via-maps main.c --no-malloc-may-fail ^EXIT=10$ diff --git a/regression/cbmc/trace-values/trace-values.desc b/regression/cbmc/trace-values/trace-values.desc index 98d0d6e9ce4..5b1d316e798 100644 --- a/regression/cbmc/trace-values/trace-values.desc +++ b/regression/cbmc/trace-values/trace-values.desc @@ -1,4 +1,4 @@ -CORE no-new-smt +CORE no-new-smt broken-arrays-uf-always trace-values.c --no-malloc-may-fail --trace ^EXIT=10$ diff --git a/src/goto-checker/solver_factory.cpp b/src/goto-checker/solver_factory.cpp index 18dac69c636..4a8ec9202bc 100644 --- a/src/goto-checker/solver_factory.cpp +++ b/src/goto-checker/solver_factory.cpp @@ -21,6 +21,7 @@ Author: Daniel Kroening, Peter Schrammel #include #include +#include #include #include #include @@ -215,7 +216,8 @@ get_sat_solver(message_handlert &message_handler, const optionst &options) { const bool no_simplifier = options.get_bool_option("beautify") || !options.get_bool_option("sat-preprocessor") || - options.get_bool_option("refine-strings"); + options.get_bool_option("refine-strings") || + options.get_bool_option("refine-arrays"); if(options.is_set("sat-solver")) { @@ -362,18 +364,28 @@ std::unique_ptr solver_factoryt::get_default() bool get_array_constraints = options.get_bool_option("show-array-constraints"); - auto bv_pointers = std::make_unique( - ns, *sat_solver, message_handler, get_array_constraints); + + std::unique_ptr bv_pointers; + if(options.get_bool_option("pointer-encoding-via-maps")) + { + bv_pointers = std::make_unique( + ns, *sat_solver, message_handler, get_array_constraints); + } + else + { + bv_pointers = std::make_unique( + ns, *sat_solver, message_handler, get_array_constraints); + } if(options.get_option("arrays-uf") == "never") - bv_pointers->unbounded_array = bv_pointerst::unbounded_arrayt::U_NONE; + bv_pointers->unbounded_array = boolbvt::unbounded_arrayt::U_NONE; else if(options.get_option("arrays-uf") == "always") - bv_pointers->unbounded_array = bv_pointerst::unbounded_arrayt::U_ALL; + bv_pointers->unbounded_array = boolbvt::unbounded_arrayt::U_ALL; set_decision_procedure_time_limit(*bv_pointers); - std::unique_ptr boolbv = std::move(bv_pointers); - return std::make_unique(std::move(boolbv), std::move(sat_solver)); + return std::make_unique( + std::move(bv_pointers), std::move(sat_solver)); } std::unique_ptr solver_factoryt::get_dimacs() @@ -411,8 +423,16 @@ std::unique_ptr solver_factoryt::get_external_sat() auto prop = std::make_unique(message_handler, external_sat_solver); - std::unique_ptr bv_pointers = - std::make_unique(ns, *prop, message_handler); + std::unique_ptr bv_pointers; + if(options.get_bool_option("pointer-encoding-via-maps")) + { + bv_pointers = + std::make_unique(ns, *prop, message_handler); + } + else + { + bv_pointers = std::make_unique(ns, *prop, message_handler); + } return std::make_unique(std::move(bv_pointers), std::move(prop)); } @@ -421,7 +441,7 @@ std::unique_ptr solver_factoryt::get_bv_refinement() { std::unique_ptr prop = get_sat_solver(message_handler, options); - bv_refinementt::infot info; + bv_refinement_infot info; info.ns = &ns; info.prop = prop.get(); info.output_xml = output_xml_in_refinement; @@ -435,8 +455,16 @@ std::unique_ptr solver_factoryt::get_bv_refinement() info.refine_arithmetic = options.get_bool_option("refine-arithmetic"); info.message_handler = &message_handler; - std::unique_ptr decision_procedure = - std::make_unique(info); + std::unique_ptr decision_procedure; + if(options.get_bool_option("pointer-encoding-via-maps")) + { + decision_procedure = + std::make_unique>(info); + } + else + { + decision_procedure = std::make_unique>(info); + } set_decision_procedure_time_limit(*decision_procedure); return std::make_unique( std::move(decision_procedure), std::move(prop)); @@ -815,4 +843,7 @@ void parse_solver_options(const cmdlinet &cmdline, optionst &options) options.set_option( "max-node-refinement", cmdline.get_value("max-node-refinement")); } + + if(cmdline.isset("pointer-encoding-via-maps")) + options.set_option("pointer-encoding-via-maps", true); } diff --git a/src/goto-checker/solver_factory.h b/src/goto-checker/solver_factory.h index 2ba2f4e0242..c0855d9ffa7 100644 --- a/src/goto-checker/solver_factory.h +++ b/src/goto-checker/solver_factory.h @@ -120,7 +120,8 @@ void parse_solver_options(const cmdlinet &cmdline, optionst &options); "(refine-arithmetic)" \ "(outfile):" \ "(dump-smt-formula):" \ - "(write-solver-stats-to):" + "(write-solver-stats-to):" \ + "(pointer-encoding-via-maps)" #define HELP_SOLVER \ " {y--sat-solver} {usolver} \t use specified SAT solver\n" \ @@ -154,6 +155,8 @@ void parse_solver_options(const cmdlinet &cmdline, optionst &options); " {y--dump-smt-formula} {ufilename} \t " \ "output smt incremental formula to the given file\n" \ " {y--write-solver-stats-to} {ujson-file} \t " \ - "collect the solver query complexity\n" + "collect the solver query complexity\n" \ + " {y--pointer-encoding-via-maps} \t " \ + "use map-based pointer encoding instead of bit-packing\n" #endif // CPROVER_GOTO_CHECKER_SOLVER_FACTORY_H diff --git a/src/solvers/Makefile b/src/solvers/Makefile index 9f4bf192744..9ba509481f3 100644 --- a/src/solvers/Makefile +++ b/src/solvers/Makefile @@ -137,6 +137,7 @@ SRC = $(BOOLEFORCE_SRC) \ flattening/bv_dimacs.cpp \ flattening/bv_minimize.cpp \ flattening/bv_pointers.cpp \ + flattening/bv_pointers_wide.cpp \ flattening/bv_utils.cpp \ flattening/c_bit_field_replacement_type.cpp \ flattening/equality.cpp \ diff --git a/src/solvers/flattening/arrays.cpp b/src/solvers/flattening/arrays.cpp index 96cb3c8045f..84704136b92 100644 --- a/src/solvers/flattening/arrays.cpp +++ b/src/solvers/flattening/arrays.cpp @@ -193,12 +193,6 @@ void arrayst::collect_arrays(const exprt &a) } else if(a.id()==ID_member) { - const auto &struct_op = to_member_expr(a).struct_op(); - - DATA_INVARIANT( - struct_op.id() == ID_symbol || struct_op.id() == ID_nondet_symbol, - "unexpected array expression: member with '" + struct_op.id_string() + - "'"); } else if(a.is_constant() || a.id() == ID_array || a.id() == ID_string_constant) { @@ -222,7 +216,15 @@ void arrayst::collect_arrays(const exprt &a) typecast_op.type().id() == ID_array, "unexpected array type cast from " + typecast_op.type().id_string()); - arrays.make_union(a, typecast_op); + // Only unify when element types match; casts between different + // element sizes (e.g., SIMD reinterpretation) are handled at the + // bitvector level. + if( + to_array_type(a.type()).element_type() == + to_array_type(typecast_op.type()).element_type()) + { + arrays.make_union(a, typecast_op); + } collect_arrays(typecast_op); } else if(a.id()==ID_index) @@ -335,6 +337,25 @@ void arrayst::add_array_Ackermann_constraints() // iterate over arrays for(std::size_t i=0; i(arr)) + continue; + const index_sett &index_set=index_map[arrays.find_number(i)]; #ifdef DEBUG @@ -494,10 +515,7 @@ void arrayst::add_array_constraints( expr.id() == ID_string_constant) { } - else if( - expr.id() == ID_member && - (to_member_expr(expr).struct_op().id() == ID_symbol || - to_member_expr(expr).struct_op().id() == ID_nondet_symbol)) + else if(expr.id() == ID_member) { } else if(expr.id()==ID_byte_update_little_endian || @@ -510,22 +528,32 @@ void arrayst::add_array_constraints( // we got a=(type[])b const auto &expr_typecast_op = to_typecast_expr(expr).op(); - // add a[i]=b[i] - for(const auto &index : index_set) + const typet &dest_element_type = to_array_type(expr.type()).element_type(); + const typet &src_element_type = + to_array_type(expr_typecast_op.type()).element_type(); + + // When element types differ in size (e.g., SIMD vector reinterpretation + // casts like int32[4] <-> int64[2]), the element-wise constraint + // a[i]=b[i] is incorrect. The bitvector-level conversion handles + // these as bitwise copies, so skip the array-level constraint. + if(dest_element_type == src_element_type) { - const typet &element_type = to_array_type(expr.type()).element_type(); - index_exprt index_expr1(expr, index, element_type); - index_exprt index_expr2(expr_typecast_op, index, element_type); + // add a[i]=b[i] + for(const auto &index : index_set) + { + index_exprt index_expr1(expr, index, dest_element_type); + index_exprt index_expr2(expr_typecast_op, index, dest_element_type); - DATA_INVARIANT( - index_expr1.type()==index_expr2.type(), - "array elements should all have same type"); + DATA_INVARIANT( + index_expr1.type() == index_expr2.type(), + "array elements should all have same type"); - // add constraint - lazy_constraintt lazy(lazy_typet::ARRAY_TYPECAST, - equal_exprt(index_expr1, index_expr2)); - add_array_constraint(lazy, false); // added immediately - array_constraint_count[constraint_typet::ARRAY_TYPECAST]++; + // add constraint + lazy_constraintt lazy( + lazy_typet::ARRAY_TYPECAST, equal_exprt(index_expr1, index_expr2)); + add_array_constraint(lazy, false); // added immediately + array_constraint_count[constraint_typet::ARRAY_TYPECAST]++; + } } } else if(expr.id()==ID_index) @@ -585,11 +613,37 @@ void arrayst::add_array_constraints_with( lazy_constraintt lazy( lazy_typet::ARRAY_WITH, equal_exprt(index_expr, expr.new_value())); - add_array_constraint(lazy, false); // added immediately + add_array_constraint(lazy, false); // always eager array_constraint_count[constraint_typet::ARRAY_WITH]++; updated_indices.insert(expr.where()); + // Also add x[I]=v for other indices I that may equal the + // write index. This helps propagation when the write index + // and read index are different SSA symbols connected by + // equality constraints (e.g., argc'#0 and main_argc). + for(const auto &other_index : index_set) + { + if(other_index == expr.where()) + continue; + + const literalt idx_eq = convert(equal_exprt( + other_index, + typecast_exprt::conditional_cast(expr.where(), other_index.type()))); + + if(idx_eq.is_false()) + continue; + + index_exprt other_read( + expr, other_index, to_array_type(expr.type()).element_type()); + lazy_constraintt lazy2( + lazy_typet::ARRAY_WITH, + implies_exprt( + literal_exprt(idx_eq), equal_exprt(other_read, expr.new_value()))); + add_array_constraint(lazy2, false); + array_constraint_count[constraint_typet::ARRAY_WITH]++; + } + // For all other indices use the existing value, i.e., add constraints // x[I]=y[I] for I!=i,j,... @@ -620,7 +674,7 @@ void arrayst::add_array_constraints_with( lazy_constraintt lazy(lazy_typet::ARRAY_WITH, or_exprt(equality_expr, literal_exprt(guard_lit))); - add_array_constraint(lazy, false); // added immediately + add_array_constraint(lazy, false); // always eager array_constraint_count[constraint_typet::ARRAY_WITH]++; #if 0 // old code for adding, not significantly faster @@ -849,7 +903,7 @@ void arrayst::add_array_constraints_if( lazy_constraintt lazy(lazy_typet::ARRAY_IF, or_exprt(literal_exprt(!cond_lit), equal_exprt(index_expr1, index_expr2))); - add_array_constraint(lazy, false); // added immediately + add_array_constraint(lazy, false); // always eager array_constraint_count[constraint_typet::ARRAY_IF]++; #if 0 // old code for adding, not significantly faster @@ -869,7 +923,7 @@ void arrayst::add_array_constraints_if( lazy_typet::ARRAY_IF, or_exprt(literal_exprt(cond_lit), equal_exprt(index_expr1, index_expr2))); - add_array_constraint(lazy, false); // added immediately + add_array_constraint(lazy, false); // always eager array_constraint_count[constraint_typet::ARRAY_IF]++; #if 0 // old code for adding, not significantly faster diff --git a/src/solvers/flattening/boolbv.cpp b/src/solvers/flattening/boolbv.cpp index 68194f9d98f..dbe99785d07 100644 --- a/src/solvers/flattening/boolbv.cpp +++ b/src/solvers/flattening/boolbv.cpp @@ -512,7 +512,47 @@ bool boolbvt::boolbv_set_equality_to_true(const equal_exprt &expr) { // see if it is an unbounded array if(is_unbounded_array(type)) + { + // For arrays with a known finite size that fit within the + // flattening limit, connect the symbol's map literals to the + // element-wise bitvectors. This is needed when the array appears + // inside a struct that is an element of another array: the + // struct's bitvector uses the map literals, but the array theory + // constrains element-wise free variables. Without this + // connection, the map literals remain unconstrained. + const auto &array_type = to_array_type(type); + const auto size = numeric_cast(array_type.size()); + const auto elem_width_opt = + bv_width.get_width_opt(array_type.element_type()); + if( + size.has_value() && *size > 0 && *size <= MAX_FLATTENED_ARRAY_SIZE && + elem_width_opt.has_value() && *elem_width_opt > 0) + { + const irep_idt &identifier = + to_symbol_expr(expr.lhs()).get_identifier(); + const std::size_t elem_width = *elem_width_opt; + + bvt bv; + bv.reserve( + numeric_cast_v(*size) * elem_width); + + for(mp_integer i = 0; i < *size; ++i) + { + index_exprt idx( + expr.lhs(), from_integer(i, array_type.index_type())); + const bvt &elem_bv = convert_bv(idx, elem_width); + bv.insert(bv.end(), elem_bv.begin(), elem_bv.end()); + } + + map.set_literals(identifier, type, bv); + + if(freeze_all) + set_frozen(bv); + } + + // still let the array theory handle the equality return true; + } const bvt &bv1=convert_bv(expr.rhs()); diff --git a/src/solvers/flattening/boolbv_equality.cpp b/src/solvers/flattening/boolbv_equality.cpp index 8e604fe9ac1..61a0d58cf56 100644 --- a/src/solvers/flattening/boolbv_equality.cpp +++ b/src/solvers/flattening/boolbv_equality.cpp @@ -8,6 +8,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include "boolbv.h" @@ -28,8 +29,10 @@ literalt boolbvt::convert_equality(const equal_exprt &expr) if(has_byte_operator(expr)) { - return record_array_equality( - to_equal_expr(lower_byte_operators(expr, ns))); + exprt simplified = simplify_expr(lower_byte_operators(expr, ns), ns); + if(simplified.id() != ID_equal) + return convert_bool(simplified); + return record_array_equality(to_equal_expr(simplified)); } return record_array_equality(expr); diff --git a/src/solvers/flattening/boolbv_index.cpp b/src/solvers/flattening/boolbv_index.cpp index 2e400a57345..0ea1594fcae 100644 --- a/src/solvers/flattening/boolbv_index.cpp +++ b/src/solvers/flattening/boolbv_index.cpp @@ -6,18 +6,19 @@ Author: Daniel Kroening, kroening@kroening.com \*******************************************************************/ -#include "boolbv.h" - -#include - #include #include +#include #include #include #include #include #include +#include "boolbv.h" + +#include + bvt boolbvt::convert_index(const index_exprt &expr) { const exprt &array=expr.array(); @@ -33,32 +34,78 @@ bvt boolbvt::convert_index(const index_exprt &expr) to_array_type(array_op_type); // see if the array size is constant - - if(is_unbounded_array(array_type)) + // Member expressions with non-symbol struct operands (e.g., + // member(index(outer_array, i), field)) cannot be properly + // constrained by the array theory, which treats them as opaque + // base arrays. Fall through to the bounded-array encoding when + // the array has a known finite size so that the bitvector solver + // directly connects the element to the struct field bits. + const bool member_with_non_symbol_struct = + array.id() == ID_member && + to_member_expr(array).compound().id() != ID_symbol && + to_member_expr(array).compound().id() != ID_nondet_symbol && + array_type.size().is_constant(); + + if(is_unbounded_array(array_type) && !member_with_non_symbol_struct) { // use array decision procedure + // Typecast between array types with different element sizes + // (e.g., SIMD reinterpretation int32[4] <-> int64[2]) cannot be + // handled by the array theory's element-wise constraints. + // Lower to byte_extract which the bitvector solver handles. + if( + array.id() == ID_typecast && + to_typecast_expr(array).op().type().id() == ID_array && + to_array_type(array.type()).element_type() != + to_array_type(to_typecast_expr(array).op().type()).element_type()) + { + const auto &src = to_typecast_expr(array).op(); + const auto elem_size = boolbv_width(array_type.element_type()) / 8; + return convert_bv(lower_byte_operators( + byte_extract_exprt( + ID_byte_extract_little_endian, + src, + mult_exprt( + typecast_exprt::conditional_cast( + index, signedbv_typet(config.ansi_c.pointer_width)), + from_integer( + elem_size, signedbv_typet(config.ansi_c.pointer_width))), + config.ansi_c.char_width, + array_type.element_type()), + ns)); + } + if(has_byte_operator(expr)) { - const index_exprt final_expr = - to_index_expr(lower_byte_operators(expr, ns)); - CHECK_RETURN(final_expr != expr); - bv = convert_bv(final_expr); + exprt lowered = simplify_expr(lower_byte_operators(expr, ns), ns); + CHECK_RETURN(lowered != expr); - // record type if array is a symbol - const exprt &final_array = final_expr.array(); - if( - final_array.id() == ID_symbol || final_array.id() == ID_nondet_symbol) + if(lowered.id() == ID_index) { - const auto &array_width_opt = bv_width.get_width_opt(array_type); - (void)map.get_literals( - final_array.get(ID_identifier), - array_type, - array_width_opt.value_or(0)); + const index_exprt &final_expr = to_index_expr(lowered); + bv = convert_bv(final_expr); + + // record type if array is a symbol + const exprt &final_array = final_expr.array(); + if( + final_array.id() == ID_symbol || + final_array.id() == ID_nondet_symbol) + { + const auto &array_width_opt = bv_width.get_width_opt(array_type); + (void)map.get_literals( + final_array.get(ID_identifier), + array_type, + array_width_opt.value_or(0)); + } + + // make sure we have the index in the cache + convert_bv(final_expr.index()); + } + else + { + bv = convert_bv(lowered); } - - // make sure we have the index in the cache - convert_bv(final_expr.index()); } else { diff --git a/src/solvers/flattening/bv_pointers_wide.cpp b/src/solvers/flattening/bv_pointers_wide.cpp new file mode 100644 index 00000000000..40da17feb35 --- /dev/null +++ b/src/solvers/flattening/bv_pointers_wide.cpp @@ -0,0 +1,1841 @@ +/*******************************************************************\ + +Module: + +Author: CBMC Contributors + +\*******************************************************************/ + +/// \file +/// Pointer encoding using solver-level maps (arrays) + +#include "bv_pointers_wide.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/// Map bytes according to the configured endianness. The key difference to +/// endianness_mapt is that bv_endianness_mapt is aware of the bit-level +/// encoding of types, which need not co-incide with the bit layout at +/// source-code level. +class bv_endianness_map_widet : public endianness_mapt +{ +public: + bv_endianness_map_widet( + const typet &type, + bool little_endian, + const namespacet &_ns, + const boolbv_widtht &_boolbv_width) + : endianness_mapt(_ns), boolbv_width(_boolbv_width) + { + build(type, little_endian); + } + +protected: + const boolbv_widtht &boolbv_width; + + void build_little_endian(const typet &type) override; + void build_big_endian(const typet &type) override; +}; + +void bv_endianness_map_widet::build_little_endian(const typet &src) +{ + const auto &width_opt = boolbv_width.get_width_opt(src); + if(!width_opt.has_value()) + return; + + const std::size_t new_size = map.size() + *width_opt; + map.reserve(new_size); + + for(std::size_t i = map.size(); i < new_size; ++i) + map.push_back(i); +} + +void bv_endianness_map_widet::build_big_endian(const typet &src) +{ + if(src.id() == ID_pointer) + build_little_endian(src); + else + endianness_mapt::build_big_endian(src); +} + +endianness_mapt +bv_pointers_widet::endianness_map(const typet &type, bool little_endian) const +{ + return bv_endianness_map_widet{type, little_endian, ns, bv_width}; +} + +// Width helpers -- in the map-based encoding every component uses +// the full pointer width. + +std::size_t bv_pointers_widet::get_object_width(const pointer_typet &type) const +{ + return type.get_width(); +} + +std::size_t bv_pointers_widet::get_offset_width(const pointer_typet &type) const +{ + return type.get_width(); +} + +std::size_t +bv_pointers_widet::get_address_width(const pointer_typet &type) const +{ + return type.get_width(); +} + +// Constructor + +bv_pointers_widet::bv_pointers_widet( + const namespacet &_ns, + propt &_prop, + message_handlert &message_handler, + bool get_array_constraints) + : boolbvt(_ns, _prop, message_handler, get_array_constraints), + pointer_logic(_ns), + object_map( + "bv_pointers_wide::object_map", + array_typet( + unsignedbv_typet(config.ansi_c.pointer_width), + infinity_exprt(unsignedbv_typet(config.ansi_c.pointer_width)))), + offset_map( + "bv_pointers_wide::offset_map", + array_typet( + unsignedbv_typet(config.ansi_c.pointer_width), + infinity_exprt(unsignedbv_typet(config.ansi_c.pointer_width)))), + base_address_map( + "bv_pointers_wide::base_address_map", + array_typet( + unsignedbv_typet(config.ansi_c.pointer_width), + infinity_exprt(unsignedbv_typet(config.ansi_c.pointer_width)))), + next_bv_pointer_index(0) +{ +} + +// Helper: build a constant expression from a pointer index. + +exprt bv_pointers_widet::index_to_expr( + const mp_integer &index, + const pointer_typet &type) const +{ + return from_integer(index, unsignedbv_typet(type.get_width())); +} + +// Read helpers: look up object/offset via solver-level arrays. + +bvt bv_pointers_widet::read_object(const bvt &bv, const pointer_typet &type) +{ + // Try direct lookup from the index to avoid array reads + mp_integer idx_val = 0; + bool is_const = true; + for(std::size_t i = 0; i < bv.size(); ++i) + { + if(bv[i].is_true()) + idx_val += power(2, i); + else if(!bv[i].is_false()) + { + is_const = false; + break; + } + } + if(is_const) + { + auto it = index_to_bv_object_offset.find(idx_val); + if(it != index_to_bv_object_offset.end()) + return it->second.first; + } + + const std::size_t width = type.get_width(); + const unsignedbv_typet bv_type(width); + // Create a fresh symbol for the index value + symbol_exprt idx_sym( + "bv_pointers_wide::ro_idx::" + std::to_string(scope_counter++), bv_type); + const bvt &idx_bv = convert_bv(idx_sym); + for(std::size_t i = 0; i < width; ++i) + prop.set_equal(idx_bv[i], bv[i]); + return convert_bv(index_exprt(object_map, idx_sym)); +} + +bvt bv_pointers_widet::read_offset(const bvt &bv, const pointer_typet &type) +{ + // Try direct lookup from the index to avoid array reads + mp_integer idx_val = 0; + bool is_const = true; + for(std::size_t i = 0; i < bv.size(); ++i) + { + if(bv[i].is_true()) + idx_val += power(2, i); + else if(!bv[i].is_false()) + { + is_const = false; + break; + } + } + if(is_const) + { + auto it = index_to_bv_object_offset.find(idx_val); + if(it != index_to_bv_object_offset.end()) + return it->second.second; + } + + const std::size_t width = type.get_width(); + const unsignedbv_typet bv_type(width); + symbol_exprt idx_sym( + "bv_pointers_wide::roff_idx::" + std::to_string(scope_counter++), bv_type); + const bvt &idx_bv = convert_bv(idx_sym); + for(std::size_t i = 0; i < width; ++i) + prop.set_equal(idx_bv[i], bv[i]); + return convert_bv(index_exprt(offset_map, idx_sym)); +} + +// Get or create a symbolic base address for an object. + +bvt bv_pointers_widet::get_object_base_address( + const mp_integer &object, + std::size_t width) +{ + auto it = object_base_address.find(object); + if(it != object_base_address.end()) + return it->second; + + bvt base = prop.new_variables(width); + object_base_address[object] = base; + return base; +} + +// Encode: allocate a fresh index, constrain the maps. + +bvt bv_pointers_widet::encode( + const mp_integer &object, + const pointer_typet &type) +{ + // Return cached encoding if available + auto cache_it = encode_cache.find(object); + if(cache_it != encode_cache.end()) + return cache_it->second; + + const std::size_t width = type.get_width(); + const unsignedbv_typet bv_type(width); + + mp_integer idx = next_bv_pointer_index; + ++next_bv_pointer_index; + exprt idx_expr = from_integer(idx, bv_type); + + // object_map[idx] == object + set_to( + equal_exprt( + index_exprt(object_map, idx_expr), from_integer(object, bv_type)), + true); + + // offset_map[idx] == 0 + set_to( + equal_exprt(index_exprt(offset_map, idx_expr), from_integer(0, bv_type)), + true); + + index_to_object_offset[idx] = {object, mp_integer{0}}; + + // Store bitvector-level object/offset for direct lookup + // in offset_arithmetic (avoids array reads). + index_to_bv_object_offset[idx] = { + bv_utils.build_constant(object, width), bv_utils.build_constant(0, width)}; + + bvt result = convert_bv(idx_expr); + encode_cache[object] = result; + return result; +} + +// encode_fresh: like encode but with symbolic object/offset bvs. + +bvt bv_pointers_widet::encode_fresh( + const bvt &object_bv, + const bvt &offset_bv, + const pointer_typet &type) +{ + const std::size_t width = type.get_width(); + const unsignedbv_typet bv_type(width); + + mp_integer idx = next_bv_pointer_index; + ++next_bv_pointer_index; + exprt idx_expr = from_integer(idx, bv_type); + bvt index_bv = convert_bv(idx_expr); + + // Constrain object_map[idx] == object_bv + bvt obj_read = convert_bv(index_exprt(object_map, idx_expr)); + for(std::size_t i = 0; i < width; ++i) + prop.set_equal(obj_read[i], object_bv[i]); + + // Constrain offset_map[idx] == offset_bv + bvt off_read = convert_bv(index_exprt(offset_map, idx_expr)); + for(std::size_t i = 0; i < width; ++i) + prop.set_equal(off_read[i], offset_bv[i]); + + index_to_bv_object_offset[idx] = {object_bv, offset_bv}; + + return index_bv; +} + +// add_addr: register an object and encode it. + +bvt bv_pointers_widet::add_addr(const exprt &expr) +{ + const auto a = pointer_logic.add_object(expr); + const pointer_typet type = pointer_type(expr.type()); + return encode(a, type); +} + +// offset_arithmetic overloads + +bvt bv_pointers_widet::offset_arithmetic( + const pointer_typet &type, + const bvt &bv, + const mp_integer &x) +{ + const std::size_t offset_bits = get_offset_width(type); + return offset_arithmetic( + type, bv, 1, bv_utils.build_constant(x, offset_bits)); +} + +bvt bv_pointers_widet::offset_arithmetic( + const pointer_typet &type, + const bvt &bv, + const mp_integer &factor, + const exprt &index) +{ + bvt bv_index = convert_bv(index); + + bv_utilst::representationt rep = index.type().id() == ID_signedbv + ? bv_utilst::representationt::SIGNED + : bv_utilst::representationt::UNSIGNED; + + const std::size_t offset_bits = get_offset_width(type); + bv_index = bv_utils.extension(bv_index, offset_bits, rep); + + return offset_arithmetic(type, bv, factor, bv_index); +} + +bvt bv_pointers_widet::offset_arithmetic( + const pointer_typet &type, + const bvt &bv, + const exprt &factor, + const exprt &index) +{ + bvt bv_factor = convert_bv(factor); + bvt bv_index = + convert_bv(typecast_exprt::conditional_cast(index, factor.type())); + + bv_utilst::representationt rep = factor.type().id() == ID_signedbv + ? bv_utilst::representationt::SIGNED + : bv_utilst::representationt::UNSIGNED; + + bv_index = bv_utils.multiplier(bv_index, bv_factor, rep); + + const std::size_t offset_bits = get_offset_width(type); + bv_index = bv_utils.extension(bv_index, offset_bits, rep); + + return offset_arithmetic(type, bv, 1, bv_index); +} + +bvt bv_pointers_widet::offset_arithmetic( + const pointer_typet &type, + const bvt &bv, + const mp_integer &factor, + const bvt &index) +{ + bvt bv_index; + + if(factor == 1) + bv_index = index; + else + { + bvt bv_factor = bv_utils.build_constant(factor, index.size()); + bv_index = bv_utils.signed_multiplier(index, bv_factor); + } + + const std::size_t offset_bits = get_offset_width(type); + bv_index = bv_utils.zero_extension(bv_index, offset_bits); + + bvt obj = read_object(bv, type); + bvt old_offset = read_offset(bv, type); + bvt new_offset = bv_utils.add(old_offset, bv_index); + return encode_fresh(obj, new_offset, type); +} + +// convert_address_of_rec + +std::optional bv_pointers_widet::convert_address_of_rec(const exprt &expr) +{ + if(expr.id() == ID_symbol || expr.id() == ID_label) + { + return add_addr(expr); + } + else if(expr.id() == ID_null_object) + { + pointer_typet pt = pointer_type(expr.type()); + return encode(pointer_logic.get_null_object(), pt); + } + else if(expr.id() == ID_index) + { + const index_exprt &index_expr = to_index_expr(expr); + const exprt &array = index_expr.array(); + const exprt &index = index_expr.index(); + const auto &array_type = to_array_type(array.type()); + + pointer_typet type = pointer_type(expr.type()); + const std::size_t bits = boolbv_width(type); + + bvt bv; + + if(array_type.id() == ID_pointer) + { + bv = convert_pointer_type(array); + CHECK_RETURN(bv.size() == bits); + } + else if( + array_type.id() == ID_array || array_type.id() == ID_string_constant) + { + auto bv_opt = convert_address_of_rec(array); + if(!bv_opt.has_value()) + return {}; + bv = std::move(*bv_opt); + CHECK_RETURN(bv.size() == bits); + } + else + UNREACHABLE; + + auto size = pointer_offset_size(array_type.element_type(), ns); + CHECK_RETURN(size.has_value() && *size >= 0); + + bv = offset_arithmetic(type, bv, *size, index); + CHECK_RETURN(bv.size() == bits); + + return std::move(bv); + } + else if( + expr.id() == ID_byte_extract_little_endian || + expr.id() == ID_byte_extract_big_endian) + { + const auto &byte_extract_expr = to_byte_extract_expr(expr); + + auto bv_opt = convert_address_of_rec(byte_extract_expr.op()); + if(!bv_opt.has_value()) + return {}; + + pointer_typet type = pointer_type(expr.type()); + const std::size_t bits = boolbv_width(type); + CHECK_RETURN(bv_opt->size() == bits); + + bvt bv = offset_arithmetic(type, *bv_opt, 1, byte_extract_expr.offset()); + CHECK_RETURN(bv.size() == bits); + return std::move(bv); + } + else if(expr.id() == ID_member) + { + const member_exprt &member_expr = to_member_expr(expr); + const exprt &struct_op = member_expr.compound(); + + auto bv_opt = convert_address_of_rec(struct_op); + if(!bv_opt.has_value()) + return {}; + + bvt bv = std::move(*bv_opt); + if( + struct_op.type().id() == ID_struct || + struct_op.type().id() == ID_struct_tag) + { + const struct_typet &struct_op_type = + struct_op.type().id() == ID_struct_tag + ? ns.follow_tag(to_struct_tag_type(struct_op.type())) + : to_struct_type(struct_op.type()); + auto offset = + member_offset(struct_op_type, member_expr.get_component_name(), ns); + CHECK_RETURN(offset.has_value()); + + pointer_typet type = pointer_type(expr.type()); + bv = offset_arithmetic(type, bv, *offset); + } + else + { + INVARIANT( + struct_op.type().id() == ID_union || + struct_op.type().id() == ID_union_tag, + "member expression should operate on " + "struct or union"); + } + + return std::move(bv); + } + else if( + expr.is_constant() || expr.id() == ID_string_constant || + expr.id() == ID_array) + { + return add_addr(expr); + } + else if(expr.id() == ID_if) + { + const if_exprt &ifex = to_if_expr(expr); + + literalt cond = convert(ifex.cond()); + + auto bv1_opt = convert_address_of_rec(ifex.true_case()); + if(!bv1_opt.has_value()) + return {}; + + auto bv2_opt = convert_address_of_rec(ifex.false_case()); + if(!bv2_opt.has_value()) + return {}; + + return bv_utils.select(cond, *bv1_opt, *bv2_opt); + } + + return {}; +} + +// convert_pointer_type + +bvt bv_pointers_widet::convert_pointer_type(const exprt &expr) +{ + const pointer_typet &type = to_pointer_type(expr.type()); + const std::size_t bits = boolbv_width(expr.type()); + + if(expr.id() == ID_symbol) + { + const irep_idt &identifier = to_symbol_expr(expr).get_identifier(); + return map.get_literals(identifier, type, bits); + } + else if(expr.id() == ID_nondet_symbol) + { + return prop.new_variables(bits); + } + else if(expr.id() == ID_typecast) + { + const typecast_exprt &tc = to_typecast_expr(expr); + const exprt &op = tc.op(); + const typet &op_type = op.type(); + + if(op_type.id() == ID_pointer) + return convert_bv(op); + else if( + can_cast_type(op_type) || op_type.id() == ID_bool || + op_type.id() == ID_c_enum || op_type.id() == ID_c_enum_tag) + { + // Integer-to-pointer cast. + bvt int_bv = convert_bv(op); + const std::size_t ptr_width = type.get_width(); + + // Check if the integer value is a constant + mp_integer int_val = 0; + bool is_const = true; + for(std::size_t i = 0; i < int_bv.size(); ++i) + { + if(int_bv[i].is_true()) + int_val += power(2, i); + else if(!int_bv[i].is_false()) + { + is_const = false; + break; + } + } + + if(is_const && int_val == 0) + { + // (T*)0 is NULL + return encode(pointer_logic.get_null_object(), type); + } + else if(is_const) + { + // For constant non-zero integer addresses, create a + // dedicated "integer address" object with the constant + // as its base address and offset 0. + const auto int_addr_obj = pointer_logic.add_object(constant_exprt( + integer2bvrep(int_val, ptr_width), unsignedbv_typet(ptr_width))); + bvt result = encode(int_addr_obj, type); + integer_address_objects.insert(int_addr_obj); + // Set the base address to the constant value + bvt base = get_object_base_address(int_addr_obj, ptr_width); + bvt val_bv = bv_utils.build_constant(int_val, ptr_width); + for(std::size_t i = 0; i < ptr_width; ++i) + prop.set_equal(base[i], val_bv[i]); + return result; + } + else + { + // Symbolic integer-to-pointer: create a fresh pointer + // constrained so base[object] + offset == integer value. + bvt obj_bv = prop.new_variables(ptr_width); + bvt off_bv = prop.new_variables(ptr_width); + bvt int_ext = bv_utils.zero_extension(int_bv, ptr_width); + + const auto &objects = pointer_logic.objects; + std::size_t number = 0; + std::vector valid_obj_lits; + for(auto it = objects.cbegin(); it != objects.cend(); ++it, ++number) + { + bvt obj_const = bv_utils.build_constant(number, ptr_width); + literalt is_this_obj = bv_utils.equal(obj_bv, obj_const); + valid_obj_lits.push_back(is_this_obj); + if(is_this_obj.is_false()) + continue; + + bvt base = get_object_base_address(number, ptr_width); + bvt flat = bv_utils.add(base, off_bv); + // Forward: obj == i => base[i] + offset == int_value + for(std::size_t i = 0; i < ptr_width; ++i) + { + prop.lcnf({!is_this_obj, !flat[i], int_ext[i]}); + prop.lcnf({!is_this_obj, flat[i], !int_ext[i]}); + } + + // Backward: if int_value is in [base[i], base[i]+size), + // then obj must be i. This helps the solver determine + // which object the I2P result points to. + auto size_opt = pointer_offset_size(it->type(), ns); + if(size_opt.has_value() && *size_opt > 0) + { + // base[i] <= int_value + literalt ge_base = bv_utils.rel( + int_ext, ID_ge, base, bv_utilst::representationt::UNSIGNED); + // int_value < base[i] + size + bvt end = + bv_utils.add(base, bv_utils.build_constant(*size_opt, ptr_width)); + literalt lt_end = bv_utils.rel( + int_ext, ID_lt, end, bv_utilst::representationt::UNSIGNED); + // in_range => obj == i + literalt in_range = prop.land(ge_base, lt_end); + prop.l_set_to_true(prop.limplies(in_range, is_this_obj)); + } + } + // Object must be one of the known objects + prop.lcnf(valid_obj_lits); + + return encode_fresh(obj_bv, off_bv, type); + } + } + } + else if(expr.id() == ID_if) + { + return SUB::convert_if(to_if_expr(expr)); + } + else if(expr.id() == ID_index) + { + return SUB::convert_index(to_index_expr(expr)); + } + else if(expr.id() == ID_member) + { + return SUB::convert_member(to_member_expr(expr)); + } + else if(expr.id() == ID_address_of) + { + const address_of_exprt &address_of_expr = to_address_of_expr(expr); + auto bv_opt = convert_address_of_rec(address_of_expr.op()); + if(!bv_opt.has_value()) + return conversion_failed(address_of_expr); + + CHECK_RETURN(bv_opt->size() == bits); + return *bv_opt; + } + else if(expr.id() == ID_object_address) + { + const auto &object_address_expr = to_object_address_expr(expr); + return add_addr(object_address_expr.object_expr()); + } + else if(expr.is_constant()) + { + const constant_exprt &c = to_constant_expr(expr); + if(c.is_null_pointer()) + return encode(pointer_logic.get_null_object(), type); + else + { + mp_integer i = bvrep2integer(c.get_value(), bits, false); + return bv_utils.build_constant(i, bits); + } + } + else if(expr.id() == ID_plus) + { + const plus_exprt &plus_expr = to_plus_expr(expr); + + bvt bv; + mp_integer size = 0; + std::size_t count = 0; + + for(const auto &op : plus_expr.operands()) + { + if(op.type().id() == ID_pointer) + { + count++; + bv = convert_bv(op); + CHECK_RETURN(bv.size() == bits); + + typet base_type = to_pointer_type(op.type()).base_type(); + DATA_INVARIANT( + base_type.id() != ID_empty, + "no pointer arithmetic over void pointers"); + auto size_opt = pointer_offset_size(base_type, ns); + CHECK_RETURN(size_opt.has_value() && *size_opt >= 0); + size = *size_opt; + } + } + + INVARIANT(count == 1, "exactly one pointer operand"); + + const std::size_t offset_bits = get_offset_width(type); + bvt sum = bv_utils.build_constant(0, offset_bits); + + for(const auto &operand : plus_expr.operands()) + { + if(operand.type().id() == ID_pointer) + continue; + + if( + operand.type().id() != ID_unsignedbv && + operand.type().id() != ID_signedbv) + { + return conversion_failed(plus_expr); + } + + bv_utilst::representationt rep = operand.type().id() == ID_signedbv + ? bv_utilst::representationt::SIGNED + : bv_utilst::representationt::UNSIGNED; + + bvt op = convert_bv(operand); + CHECK_RETURN(!op.empty()); + op = bv_utils.extension(op, offset_bits, rep); + sum = bv_utils.add(sum, op); + } + + return offset_arithmetic(type, bv, size, sum); + } + else if(expr.id() == ID_minus) + { + const minus_exprt &minus_expr = to_minus_expr(expr); + + INVARIANT( + minus_expr.lhs().type().id() == ID_pointer, + "first operand should be of pointer type"); + + if( + minus_expr.rhs().type().id() != ID_unsignedbv && + minus_expr.rhs().type().id() != ID_signedbv) + { + return conversion_failed(minus_expr); + } + + const unary_minus_exprt neg_op1(minus_expr.rhs()); + const bvt &bv = convert_bv(minus_expr.lhs()); + + typet base_type = to_pointer_type(minus_expr.lhs().type()).base_type(); + DATA_INVARIANT( + base_type.id() != ID_empty, "no pointer arithmetic over void pointers"); + auto element_size_opt = pointer_offset_size(base_type, ns); + CHECK_RETURN(element_size_opt.has_value() && *element_size_opt > 0); + return offset_arithmetic(type, bv, *element_size_opt, neg_op1); + } + else if( + expr.id() == ID_byte_extract_little_endian || + expr.id() == ID_byte_extract_big_endian) + { + return SUB::convert_byte_extract(to_byte_extract_expr(expr)); + } + else if( + expr.id() == ID_byte_update_little_endian || + expr.id() == ID_byte_update_big_endian) + { + return SUB::convert_byte_update(to_byte_update_expr(expr)); + } + else if(expr.id() == ID_field_address) + { + const auto &fa = to_field_address_expr(expr); + const typet &compound_type = fa.compound_type(); + + auto bv = convert_bitvector(fa.base()); + + if(compound_type.id() == ID_struct || compound_type.id() == ID_struct_tag) + { + const struct_typet &st = + compound_type.id() == ID_struct_tag + ? ns.follow_tag(to_struct_tag_type(compound_type)) + : to_struct_type(compound_type); + auto offset = member_offset(st, fa.component_name(), ns); + CHECK_RETURN(offset.has_value()); + + bv = offset_arithmetic(fa.type(), bv, *offset); + } + else if( + compound_type.id() == ID_union || compound_type.id() == ID_union_tag) + { + // nothing to do + } + else + { + INVARIANT(false, "field address on struct or union"); + } + + return bv; + } + else if(expr.id() == ID_element_address) + { + const auto &ea = to_element_address_expr(expr); + + auto bv = convert_bitvector(ea.base()); + + auto size = pointer_offset_size(ea.element_type(), ns); + CHECK_RETURN(size.has_value() && *size >= 0); + + bv = offset_arithmetic(ea.type(), bv, *size, ea.index()); + + return bv; + } + + return conversion_failed(expr); +} + +// is_pointer_subtraction helper + +static bool is_pointer_subtraction(const exprt &expr) +{ + if(expr.id() != ID_minus) + return false; + const auto &minus_expr = to_minus_expr(expr); + return minus_expr.lhs().type().id() == ID_pointer && + minus_expr.rhs().type().id() == ID_pointer; +} + +// boolbv_set_equality_to_true: for pointer equalities used as +// constraints (via set_to), force bitvector identity. This ensures +// that the solver assigns the same index to both sides, which is +// needed for model extraction (bv_get_rec looks up the index). + +bool bv_pointers_widet::boolbv_set_equality_to_true(const equal_exprt &expr) +{ + if( + expr.lhs().type().id() == ID_pointer && + expr.rhs().type().id() == ID_pointer) + { + const bvt &lhs_bv = convert_bv(expr.lhs()); + const bvt &rhs_bv = convert_bv(expr.rhs()); + + // Force bitvector identity (index equality) + for(std::size_t i = 0; i < lhs_bv.size() && i < rhs_bv.size(); ++i) + prop.set_equal(lhs_bv[i], rhs_bv[i]); + + return false; // handled + } + + return SUB::boolbv_set_equality_to_true(expr); +} + +// convert_byte_extract: lower when pointers are involved, +// since the abstract pointer index is not meaningful as bytes. + +bvt bv_pointers_widet::convert_byte_extract(const byte_extract_exprt &expr) +{ + if( + has_subtype(expr.type(), ID_pointer, ns) || + has_subtype(expr.op().type(), ID_pointer, ns)) + { + return convert_bv(lower_byte_extract(expr, ns)); + } + return SUB::convert_byte_extract(expr); +} + +// convert_byte_update: lower when pointers are involved. + +bvt bv_pointers_widet::convert_byte_update(const byte_update_exprt &expr) +{ + if( + has_subtype(expr.value().type(), ID_pointer, ns) || + has_subtype(expr.op0().type(), ID_pointer, ns)) + { + return convert_bv(lower_byte_update(expr, ns)); + } + return SUB::convert_byte_update(expr); +} + +// convert_bitvector + +bvt bv_pointers_widet::convert_bitvector(const exprt &expr) +{ + if(expr.type().id() == ID_pointer) + return convert_pointer_type(expr); + + if(is_pointer_subtraction(expr)) + { + std::size_t width = boolbv_width(expr.type()); + + const auto &minus_expr = to_minus_expr(expr); + + const exprt same_obj = ::same_object(minus_expr.lhs(), minus_expr.rhs()); + const literalt same_object_lit = convert(same_obj); + + const pointer_typet &lhs_pt = to_pointer_type(minus_expr.lhs().type()); + const bvt &lhs = convert_bv(minus_expr.lhs()); + const pointer_typet &rhs_pt = to_pointer_type(minus_expr.rhs().type()); + const bvt &rhs = convert_bv(minus_expr.rhs()); + + bvt lhs_offset = bv_utils.zero_extension(read_offset(lhs, lhs_pt), width); + bvt rhs_offset = bv_utils.zero_extension(read_offset(rhs, rhs_pt), width); + + DATA_INVARIANT( + lhs_pt.base_type().id() != ID_empty, + "no pointer arithmetic over void pointers"); + auto element_size_opt = pointer_offset_size(lhs_pt.base_type(), ns); + CHECK_RETURN(element_size_opt.has_value() && *element_size_opt > 0); + + bvt bv = prop.new_variables(width); + + // Same-object case: result = (lhs_offset - rhs_offset) / element_size + if(!same_object_lit.is_false()) + { + bvt difference = bv_utils.sub(lhs_offset, rhs_offset); + if(*element_size_opt != 1) + { + bvt element_size_bv = bv_utils.build_constant(*element_size_opt, width); + difference = bv_utils.divider( + difference, element_size_bv, bv_utilst::representationt::SIGNED); + } + prop.l_set_to_true( + prop.limplies(same_object_lit, bv_utils.equal(difference, bv))); + } + + // Different-object case: use flat address difference. + // This handles integer-address objects like (char*)20-(char*)10. + if(!same_object_lit.is_true()) + { + bvt lhs_obj = read_object(lhs, lhs_pt); + bvt rhs_obj = read_object(rhs, rhs_pt); + const std::size_t ptr_width = config.ansi_c.pointer_width; + + bvt lhs_flat = lhs_offset; + bvt rhs_flat = rhs_offset; + + const auto &objects = pointer_logic.objects; + std::size_t number = 0; + for(auto it = objects.cbegin(); it != objects.cend(); ++it, ++number) + { + auto base_it = object_base_address.find(mp_integer(number)); + if(base_it == object_base_address.end()) + continue; + bvt obj_const = bv_utils.build_constant(number, ptr_width); + bvt base_ext = bv_utils.zero_extension(base_it->second, width); + + literalt is_l = bv_utils.equal(lhs_obj, obj_const); + lhs_flat = + bv_utils.select(is_l, bv_utils.add(base_ext, lhs_offset), lhs_flat); + + literalt is_r = bv_utils.equal(rhs_obj, obj_const); + rhs_flat = + bv_utils.select(is_r, bv_utils.add(base_ext, rhs_offset), rhs_flat); + } + + bvt flat_diff = bv_utils.sub(lhs_flat, rhs_flat); + if(*element_size_opt != 1) + { + bvt element_size_bv = bv_utils.build_constant(*element_size_opt, width); + flat_diff = bv_utils.divider( + flat_diff, element_size_bv, bv_utilst::representationt::SIGNED); + } + prop.l_set_to_true( + prop.limplies(!same_object_lit, bv_utils.equal(flat_diff, bv))); + } + + return bv; + } + else if( + expr.id() == ID_pointer_offset && + to_pointer_offset_expr(expr).pointer().type().id() == ID_pointer) + { + std::size_t width = boolbv_width(expr.type()); + + const exprt &pointer = to_pointer_offset_expr(expr).pointer(); + const bvt &pointer_bv = convert_bv(pointer); + + bvt offset_bv = read_offset(pointer_bv, to_pointer_type(pointer.type())); + + return bv_utils.zero_extension(offset_bv, width); + } + else if( + const auto object_size = expr_try_dynamic_cast(expr)) + { + std::size_t width = boolbv_width(object_size->type()); + + postponed_list.emplace_back( + prop.new_variables(width), + convert_bv(object_size->pointer()), + *object_size); + + return postponed_list.back().bv; + } + else if( + expr.id() == ID_pointer_object && + to_pointer_object_expr(expr).pointer().type().id() == ID_pointer) + { + std::size_t width = boolbv_width(expr.type()); + + const exprt &pointer = to_pointer_object_expr(expr).pointer(); + const bvt &pointer_bv = convert_bv(pointer); + + bvt object_bv = read_object(pointer_bv, to_pointer_type(pointer.type())); + + return bv_utils.zero_extension(object_bv, width); + } + else if( + expr.id() == ID_typecast && + to_typecast_expr(expr).op().type().id() == ID_pointer) + { + // Pointer-to-integer cast: compute base[object] + offset. + // For pointers with known constant indices (from encode()), + // look up the object directly. For symbolic pointers, + // postpone to finish_eager_conversion. + const exprt &ptr_op = to_typecast_expr(expr).op(); + const bvt &ptr_bv = convert_bv(ptr_op); + const pointer_typet &ptr_type = to_pointer_type(ptr_op.type()); + const std::size_t ptr_width = ptr_type.get_width(); + std::size_t width = boolbv_width(expr.type()); + + // Try to extract a constant index from the pointer bitvector + mp_integer idx_val = 0; + bool is_constant = true; + for(std::size_t i = 0; i < ptr_bv.size(); ++i) + { + if(ptr_bv[i].is_true()) + idx_val += power(2, i); + else if(!ptr_bv[i].is_false()) + { + is_constant = false; + break; + } + } + + if(is_constant) + { + auto it = index_to_object_offset.find(idx_val); + if(it != index_to_object_offset.end()) + { + bvt base = get_object_base_address(it->second.first, ptr_width); + bvt off_const = bv_utils.build_constant(it->second.second, ptr_width); + bvt flat = bv_utils.add(base, off_const); + return bv_utils.zero_extension(flat, width); + } + + // Also check encode_fresh's bitvector-level map + auto it2 = index_to_bv_object_offset.find(idx_val); + if(it2 != index_to_bv_object_offset.end()) + { + const bvt &obj_bv = it2->second.first; + const bvt &off_bv = it2->second.second; + + // Try to extract constant object number for base address + mp_integer obj_val = 0; + bool obj_is_const = true; + for(std::size_t i = 0; i < obj_bv.size(); ++i) + { + if(obj_bv[i].is_true()) + obj_val += power(2, i); + else if(!obj_bv[i].is_false()) + { + obj_is_const = false; + break; + } + } + + if(obj_is_const) + { + bvt base = get_object_base_address(obj_val, ptr_width); + bvt flat = bv_utils.add(base, off_bv); + return bv_utils.zero_extension(flat, width); + } + } + } + + // For symbolic pointers, postpone + bvt result = prop.new_variables(width); + postponed_list.emplace_back(result, ptr_bv, expr); + return result; + } + + return SUB::convert_bitvector(expr); +} + +// convert_rest + +literalt bv_pointers_widet::convert_rest(const exprt &expr) +{ + PRECONDITION(expr.is_boolean()); + + // Handle pointer equality/inequality FIRST, before the + // else-if chain below, to ensure it's always reached. + if(expr.id() == ID_equal || expr.id() == ID_notequal) + { + const auto &rel = to_binary_relation_expr(expr); + if( + rel.lhs().type().id() == ID_pointer && + rel.rhs().type().id() == ID_pointer) + { + const pointer_typet &lhs_type = to_pointer_type(rel.lhs().type()); + const pointer_typet &rhs_type = to_pointer_type(rel.rhs().type()); + + const bvt &lhs_bv = convert_bv(rel.lhs()); + const bvt &rhs_bv = convert_bv(rel.rhs()); + + literalt indices_equal = bv_utils.equal(lhs_bv, rhs_bv); + + if(indices_equal.is_false()) + { + // Indices definitely different — semantic comparison + bvt lhs_obj = read_object(lhs_bv, lhs_type); + bvt rhs_obj = read_object(rhs_bv, rhs_type); + bvt lhs_off = read_offset(lhs_bv, lhs_type); + bvt rhs_off = read_offset(rhs_bv, rhs_type); + + literalt obj_eq = bv_utils.equal(lhs_obj, rhs_obj); + literalt off_eq = bv_utils.equal(lhs_off, rhs_off); + literalt result = prop.land(obj_eq, off_eq); + + if(expr.id() == ID_notequal) + return !result; + return result; + } + + // Add semantic comparison for different indices + bvt lhs_obj = read_object(lhs_bv, lhs_type); + bvt rhs_obj = read_object(rhs_bv, rhs_type); + bvt lhs_off = read_offset(lhs_bv, lhs_type); + bvt rhs_off = read_offset(rhs_bv, rhs_type); + + literalt obj_eq = bv_utils.equal(lhs_obj, rhs_obj); + literalt off_eq = bv_utils.equal(lhs_off, rhs_off); + literalt semantic_eq = prop.land(obj_eq, off_eq); + + literalt result = prop.lor(indices_equal, semantic_eq); + + // Bidirectional: index equality implies semantic equality + prop.l_set_to_true(prop.limplies(indices_equal, obj_eq)); + prop.l_set_to_true(prop.limplies(indices_equal, off_eq)); + + if(expr.id() == ID_notequal) + return !result; + return result; + } + } + + const exprt::operandst &operands = expr.operands(); + + if(expr.id() == ID_is_invalid_pointer) + { + if(operands.size() == 1 && operands[0].type().id() == ID_pointer) + { + const bvt &bv = convert_bv(operands[0]); + + if(!bv.empty()) + { + const pointer_typet &type = to_pointer_type(operands[0].type()); + bvt object_bv = read_object(bv, type); + + bvt invalid_bv = bv_utils.build_constant( + pointer_logic.get_invalid_object(), get_object_width(type)); + + const std::size_t object_bits = get_object_width(type); + + bvt equal_bv; + equal_bv.reserve(object_bits); + + for(std::size_t i = 0; i < object_bits; i++) + { + equal_bv.push_back(prop.lequal(object_bv[i], invalid_bv[i])); + } + + return prop.land(equal_bv); + } + } + } + else if(expr.id() == ID_is_dynamic_object) + { + if(operands.size() == 1 && operands[0].type().id() == ID_pointer) + { + literalt l = prop.new_variable(); + postponed_list.emplace_back(bvt{1, l}, convert_bv(operands[0]), expr); + return l; + } + } + else if( + expr.id() == ID_lt || expr.id() == ID_le || expr.id() == ID_gt || + expr.id() == ID_ge) + { + if( + operands.size() == 2 && operands[0].type().id() == ID_pointer && + operands[1].type().id() == ID_pointer) + { + const bvt &bv0 = convert_bv(operands[0]); + const bvt &bv1 = convert_bv(operands[1]); + + const pointer_typet &type0 = to_pointer_type(operands[0].type()); + bvt offset_bv0 = read_offset(bv0, type0); + + const pointer_typet &type1 = to_pointer_type(operands[1].type()); + bvt offset_bv1 = read_offset(bv1, type1); + + const exprt same_obj = ::same_object(operands[0], operands[1]); + const literalt same_object_lit = convert(same_obj); + if(same_object_lit.is_false()) + return same_object_lit; + + return prop.land( + same_object_lit, + bv_utils.rel( + offset_bv0, + expr.id(), + offset_bv1, + bv_utilst::representationt::UNSIGNED)); + } + } + else if( + auto prophecy_r_or_w_ok = + expr_try_dynamic_cast(expr)) + { + return convert(simplify_expr(prophecy_r_or_w_ok->lower(ns), ns)); + } + else if( + auto prophecy_pointer_in_range = + expr_try_dynamic_cast(expr)) + { + return convert(simplify_expr(prophecy_pointer_in_range->lower(ns), ns)); + } + + else if( + const auto minus_overflow = + expr_try_dynamic_cast(expr)) + { + if( + minus_overflow->lhs().type().id() == ID_pointer && + minus_overflow->rhs().type().id() == ID_pointer) + { + // Pointer subtraction overflow: use the offsets from the + // maps instead of the raw indices. When both pointers + // are in the same object, the offsets are bounded by the + // object size and the difference cannot overflow. + const pointer_typet &lhs_pt = + to_pointer_type(minus_overflow->lhs().type()); + const pointer_typet &rhs_pt = + to_pointer_type(minus_overflow->rhs().type()); + + const bvt &lhs_bv = convert_bv(minus_overflow->lhs()); + const bvt &rhs_bv = convert_bv(minus_overflow->rhs()); + + // For same-object pointers, check offset overflow. + // For different-object pointers, the subtraction is + // undefined — use the flat address difference. + bvt lhs_obj = read_object(lhs_bv, lhs_pt); + bvt rhs_obj = read_object(rhs_bv, rhs_pt); + literalt same_obj = bv_utils.equal(lhs_obj, rhs_obj); + + bvt lhs_off = read_offset(lhs_bv, lhs_pt); + bvt rhs_off = read_offset(rhs_bv, rhs_pt); + + // Same object: overflow iff offset difference overflows + literalt off_overflow = bv_utils.overflow_sub( + lhs_off, rhs_off, bv_utilst::representationt::SIGNED); + + // Different objects: use flat addresses for overflow check + const std::size_t width = lhs_off.size(); + bvt lhs_flat = lhs_off; // placeholder + bvt rhs_flat = rhs_off; + + // Try to get flat addresses from base address map + mp_integer lhs_idx = 0, rhs_idx = 0; + bool lhs_const = true, rhs_const = true; + for(std::size_t i = 0; i < lhs_bv.size(); ++i) + { + if(lhs_bv[i].is_true()) + lhs_idx += power(2, i); + else if(!lhs_bv[i].is_false()) + lhs_const = false; + } + for(std::size_t i = 0; i < rhs_bv.size(); ++i) + { + if(rhs_bv[i].is_true()) + rhs_idx += power(2, i); + else if(!rhs_bv[i].is_false()) + rhs_const = false; + } + + if(lhs_const && rhs_const) + { + auto lit = index_to_bv_object_offset.find(lhs_idx); + auto rit = index_to_bv_object_offset.find(rhs_idx); + if( + lit != index_to_bv_object_offset.end() && + rit != index_to_bv_object_offset.end()) + { + // Extract object numbers + mp_integer lobj = 0, robj = 0; + bool lok = true, rok = true; + for(std::size_t i = 0; i < lit->second.first.size(); ++i) + { + if(lit->second.first[i].is_true()) + lobj += power(2, i); + else if(!lit->second.first[i].is_false()) + lok = false; + } + for(std::size_t i = 0; i < rit->second.first.size(); ++i) + { + if(rit->second.first[i].is_true()) + robj += power(2, i); + else if(!rit->second.first[i].is_false()) + rok = false; + } + if(lok && rok) + { + bvt lbase = get_object_base_address(lobj, width); + bvt rbase = get_object_base_address(robj, width); + lhs_flat = bv_utils.add(lbase, lhs_off); + rhs_flat = bv_utils.add(rbase, rhs_off); + } + } + } + + literalt flat_overflow = bv_utils.overflow_sub( + lhs_flat, rhs_flat, bv_utilst::representationt::SIGNED); + + // Overflow if same object and offset overflows, or + // different objects and flat address overflows + return prop.lor( + prop.land(same_obj, off_overflow), prop.land(!same_obj, flat_overflow)); + } + } + + return SUB::convert_rest(expr); +} + +// bits_to_string helper + +static std::string bits_to_string(const propt &prop, const bvt &bv) +{ + std::string result; + + for(const auto &literal : bv) + { + char ch = 0; + + // clang-format off + switch(prop.l_get(literal).get_value()) + { + case tvt::tv_enumt::TV_FALSE: ch='0'; break; + case tvt::tv_enumt::TV_TRUE: ch='1'; break; + case tvt::tv_enumt::TV_UNKNOWN: ch='0'; break; + } + // clang-format on + + result = ch + result; + } + + return result; +} + +// bv_get_rec + +exprt bv_pointers_widet::bv_get_rec( + const exprt &expr, + const bvt &bv, + std::size_t offset) const +{ + const typet &type = expr.type(); + + if(type.id() != ID_pointer) + return SUB::bv_get_rec(expr, bv, offset); + + const pointer_typet &pt = to_pointer_type(type); + const std::size_t bits = boolbv_width(pt); + bvt value_bv(bv.begin() + offset, bv.begin() + offset + bits); + + std::string value = bits_to_string(prop, value_bv); + + const irep_idt bvrep = make_bvrep( + bits, + [&value](std::size_t i) { return value[value.size() - 1 - i] == '1'; }); + + // The bitvector holds the abstract pointer index. + // Look up the (object, offset) pair recorded at + // encode time. + mp_integer idx_val = binary2integer(value, false); + + auto it = index_to_object_offset.find(idx_val); + if(it != index_to_object_offset.end()) + { + pointer_logict::pointert pointer{it->second.first, it->second.second}; + // Try to compute flat address for the hex display + irep_idt display_bvrep = bvrep; + auto base_it = object_base_address.find(it->second.first); + if(base_it != object_base_address.end()) + { + std::string base_str = bits_to_string(prop, base_it->second); + mp_integer base_val = binary2integer(base_str, false); + mp_integer flat_addr = base_val + it->second.second; + display_bvrep = integer2bvrep(flat_addr, bits); + } + return annotated_pointer_constant_exprt{ + display_bvrep, pointer_logic.pointer_expr(pointer, pt)}; + } + + // For indices not tracked by encode(), try encode_fresh's + // bitvector map and read the model values. + auto it2 = index_to_bv_object_offset.find(idx_val); + if(it2 != index_to_bv_object_offset.end()) + { + std::string obj_str = bits_to_string(prop, it2->second.first); + std::string off_str = bits_to_string(prop, it2->second.second); + mp_integer obj_val = binary2integer(obj_str, false); + mp_integer off_val = binary2integer(off_str, false); + pointer_logict::pointert pointer{obj_val, off_val}; + // Try to compute flat address + irep_idt display_bvrep = bvrep; + auto base_it = object_base_address.find(obj_val); + if(base_it != object_base_address.end()) + { + std::string base_str = bits_to_string(prop, base_it->second); + mp_integer base_val = binary2integer(base_str, false); + mp_integer flat_addr = base_val + off_val; + display_bvrep = integer2bvrep(flat_addr, bits); + } + return annotated_pointer_constant_exprt{ + display_bvrep, pointer_logic.pointer_expr(pointer, pt)}; + } + + // Truly unknown index — return raw constant. + return constant_exprt(bvrep, type); +} + +// prepare_postponed_is_dynamic_object + +std::pair bv_pointers_widet::prepare_postponed_is_dynamic_object( + std::vector &placeholders) const +{ + PRECONDITION(placeholders.empty()); + + const auto &objects = pointer_logic.objects; + std::size_t number = 0; + + exprt::operandst dynamic_objects_ops; + exprt::operandst not_dynamic_objects_ops; + dynamic_objects_ops.reserve(objects.size()); + not_dynamic_objects_ops.reserve(objects.size()); + + for(auto it = objects.cbegin(); it != objects.cend(); ++it, ++number) + { + const exprt &expr = *it; + + // In the wide encoding the object identity is the + // object number itself as a constant bitvector. + pointer_typet pt = pointer_type(expr.type()); + bvt bv = bv_utils.build_constant(number, get_object_width(pt)); + + exprt::operandst conjuncts; + conjuncts.reserve(bv.size()); + placeholders.reserve(bv.size()); + for(std::size_t i = 0; i < bv.size(); ++i) + { + if(placeholders.size() <= i) + { + placeholders.push_back(symbol_exprt{std::to_string(i), bool_typet{}}); + } + + POSTCONDITION(bv[i].is_constant()); + if(bv[i].is_true()) + conjuncts.emplace_back(placeholders[i]); + else + conjuncts.emplace_back(not_exprt{placeholders[i]}); + } + + if(pointer_logic.is_dynamic_object(expr)) + dynamic_objects_ops.push_back(conjunction(conjuncts)); + else + { + not_dynamic_objects_ops.push_back(conjunction(conjuncts)); + } + } + + exprt dynamic_objects = disjunction(dynamic_objects_ops); + exprt not_dynamic_objects = disjunction(not_dynamic_objects_ops); + + bdd_exprt bdd_converter; + bddt dyn_bdd = bdd_converter.from_expr(dynamic_objects); + bddt not_dyn_bdd = bdd_converter.from_expr(not_dynamic_objects); + + return {bdd_converter.as_expr(dyn_bdd), bdd_converter.as_expr(not_dyn_bdd)}; +} + +// prepare_postponed_object_size + +std::unordered_map +bv_pointers_widet::prepare_postponed_object_size( + std::vector &placeholders) const +{ + PRECONDITION(placeholders.empty()); + + const auto &objects = pointer_logic.objects; + std::size_t number = 0; + + std::unordered_map per_size_object_ops; + + for(auto it = objects.cbegin(); it != objects.cend(); ++it, ++number) + { + const exprt &expr = *it; + + if(expr.id() != ID_symbol && expr.id() != ID_string_constant) + { + continue; + } + + const auto size_expr = size_of_expr(expr.type(), ns); + if(!size_expr.has_value()) + continue; + + pointer_typet pt = pointer_type(expr.type()); + bvt bv = bv_utils.build_constant(number, get_object_width(pt)); + + exprt::operandst conjuncts; + conjuncts.reserve(bv.size()); + placeholders.reserve(bv.size()); + for(std::size_t i = 0; i < bv.size(); ++i) + { + if(placeholders.size() <= i) + { + placeholders.push_back(symbol_exprt{std::to_string(i), bool_typet{}}); + } + + POSTCONDITION(bv[i].is_constant()); + if(bv[i].is_true()) + conjuncts.emplace_back(placeholders[i]); + else + conjuncts.emplace_back(not_exprt{placeholders[i]}); + } + + per_size_object_ops[size_expr.value()].push_back(conjunction(conjuncts)); + } + + std::unordered_map result; + for(const auto &size_entry : per_size_object_ops) + { + exprt all_objects_this_size = disjunction(size_entry.second); + bdd_exprt bdd_converter; + bddt bdd = bdd_converter.from_expr(all_objects_this_size); + + result.emplace(size_entry.first, bdd_converter.as_expr(bdd)); + } + + return result; +} + +// finish_eager_conversion + +void bv_pointers_widet::finish_eager_conversion() +{ + // Pre-read object bitvectors for all postponed entries + // BEFORE array theory finalization. read_object() creates + // solver-level array reads that must be registered before + // arrayst processes consistency constraints. + std::vector preread_obj; + std::vector preread_off; + preread_obj.reserve(postponed_list.size()); + preread_off.reserve(postponed_list.size()); + for(const postponedt &postponed : postponed_list) + { + if(postponed.expr.id() == ID_is_dynamic_object) + { + const auto &type = + to_pointer_type(to_unary_expr(postponed.expr).op().type()); + preread_obj.push_back(read_object(postponed.op, type)); + preread_off.push_back(bvt{}); + } + else if(expr_try_dynamic_cast(postponed.expr)) + { + const auto &type = + to_pointer_type(expr_try_dynamic_cast(postponed.expr) + ->pointer() + .type()); + preread_obj.push_back(read_object(postponed.op, type)); + preread_off.push_back(bvt{}); + } + else if( + postponed.expr.id() == ID_typecast && + to_typecast_expr(postponed.expr).op().type().id() == ID_pointer) + { + // Pointer-to-integer cast: pre-read object and offset + const auto &ptr_type = + to_pointer_type(to_typecast_expr(postponed.expr).op().type()); + preread_obj.push_back(read_object(postponed.op, ptr_type)); + preread_off.push_back(read_offset(postponed.op, ptr_type)); + } + else + UNREACHABLE; + } + + // Now finalize arrays (and everything else). + SUB::finish_eager_conversion(); + + // Build BDD-optimized Boolean formulas lazily. + std::pair is_dynamic_expr = {nil_exprt{}, nil_exprt{}}; + std::vector is_dynamic_placeholders; + + std::unordered_map object_sizes; + std::vector object_size_placeholders; + + std::size_t postponed_idx = 0; + + for(const postponedt &postponed : postponed_list) + { + const bvt &saved_obj_bv = preread_obj[postponed_idx]; + ++postponed_idx; + + if(postponed.expr.id() == ID_is_dynamic_object) + { + if(is_dynamic_expr.first.is_nil()) + { + is_dynamic_expr = + prepare_postponed_is_dynamic_object(is_dynamic_placeholders); + } + + POSTCONDITION(saved_obj_bv.size() == is_dynamic_placeholders.size()); + replace_mapt replacements; + for(std::size_t i = 0; i < saved_obj_bv.size(); ++i) + { + replacements.emplace( + is_dynamic_placeholders[i], literal_exprt{saved_obj_bv[i]}); + } + exprt is_dyn = is_dynamic_expr.first; + replace_expr(replacements, is_dyn); + exprt is_not_dyn = is_dynamic_expr.second; + replace_expr(replacements, is_not_dyn); + + PRECONDITION(postponed.bv.size() == 1); + prop.l_set_to_true( + prop.limplies(convert_bv(is_dyn)[0], postponed.bv.front())); + prop.l_set_to_true( + prop.limplies(convert_bv(is_not_dyn)[0], !postponed.bv.front())); + } + else if( + const auto postponed_object_size = + expr_try_dynamic_cast(postponed.expr)) + { + if(object_sizes.empty()) + { + object_sizes = prepare_postponed_object_size(object_size_placeholders); + } + + // we might not have any usable objects + if(object_size_placeholders.empty()) + continue; + + POSTCONDITION(saved_obj_bv.size() == object_size_placeholders.size()); + replace_mapt replacements; + for(std::size_t i = 0; i < saved_obj_bv.size(); ++i) + { + replacements.emplace( + object_size_placeholders[i], literal_exprt{saved_obj_bv[i]}); + } + + for(const auto &object_size_entry : object_sizes) + { + const exprt object_size = typecast_exprt::conditional_cast( + object_size_entry.first, postponed_object_size->type()); + bvt size_bv = convert_bv(object_size); + POSTCONDITION(size_bv.size() == postponed.bv.size()); + + exprt all_objects_this_size = object_size_entry.second; + replace_expr(replacements, all_objects_this_size); + + literalt l1 = convert_bv(all_objects_this_size)[0]; + if(l1.is_true()) + { + for(std::size_t i = 0; i < postponed.bv.size(); ++i) + { + prop.set_equal(postponed.bv[i], size_bv[i]); + } + break; + } + else if(l1.is_false()) + continue; + + for(std::size_t i = 0; i < postponed.bv.size(); ++i) + { + prop.lcnf({!l1, !postponed.bv[i], size_bv[i]}); + prop.lcnf({!l1, postponed.bv[i], !size_bv[i]}); + } + } + } + else if( + postponed.expr.id() == ID_typecast && + to_typecast_expr(postponed.expr).op().type().id() == ID_pointer) + { + // Pointer-to-integer cast: compute base[object] + offset + // using a MUX chain over all known objects. + const bvt &obj_bv = saved_obj_bv; + const bvt &off_bv = preread_off[postponed_idx - 1]; + const std::size_t ptr_width = config.ansi_c.pointer_width; + const std::size_t result_width = postponed.bv.size(); + + bvt result = bv_utils.build_constant(0, result_width); + + const auto &objects = pointer_logic.objects; + std::size_t obj_number = 0; + for(auto it = objects.cbegin(); it != objects.cend(); ++it, ++obj_number) + { + bvt obj_const = bv_utils.build_constant(obj_number, ptr_width); + literalt is_this_obj = bv_utils.equal(obj_bv, obj_const); + + if(is_this_obj.is_false()) + continue; + + bvt base = get_object_base_address(obj_number, ptr_width); + bvt flat = bv_utils.add(base, off_bv); + bvt flat_ext = bv_utils.zero_extension(flat, result_width); + + result = bv_utils.select(is_this_obj, flat_ext, result); + } + + // Constrain postponed.bv == result + for(std::size_t i = 0; i < result_width; ++i) + prop.set_equal(postponed.bv[i], result[i]); + } + else + UNREACHABLE; + } + + // Add non-overlapping constraints for base addresses AFTER + // all P2I casts have been processed (which creates the base + // address variables). + { + const auto &objects = pointer_logic.objects; + const std::size_t ptr_width = config.ansi_c.pointer_width; + + // Constrain NULL object to have base address 0 + // Always create the NULL base address so it participates + // in non-overlapping constraints. + bvt null_base = + get_object_base_address(pointer_logic.get_null_object(), ptr_width); + bvt zero_bv = bv_utils.build_constant(0, ptr_width); + for(std::size_t i = 0; i < ptr_width; ++i) + prop.set_equal(null_base[i], zero_bv[i]); + + // Collect all objects with base addresses + std::vector> obj_sizes; + std::size_t number = 0; + for(auto it = objects.cbegin(); it != objects.cend(); ++it, ++number) + { + if(object_base_address.find(number) == object_base_address.end()) + continue; + // Skip integer-address objects — they may overlap with + // regular objects (the integer address might point into + // an existing object). + if(integer_address_objects.count(number)) + continue; + const exprt &expr = *it; + auto size_opt = pointer_offset_size(expr.type(), ns); + mp_integer size = + (size_opt.has_value() && *size_opt > 0) ? *size_opt : mp_integer{1}; + obj_sizes.push_back({mp_integer(number), size}); + + // Constrain base address to avoid unsigned overflow: + // base + size <= 2^width (i.e., base <= MAX - size + 1) + bvt base = get_object_base_address(mp_integer(number), ptr_width); + mp_integer max_base = power(2, ptr_width) - size; + bvt max_base_bv = bv_utils.build_constant(max_base, ptr_width); + prop.l_set_to_true(bv_utils.rel( + base, ID_le, max_base_bv, bv_utilst::representationt::UNSIGNED)); + + // Also constrain base + size to fit in the positive range + // of a signed integer of pointer width. This ensures that + // pointer-to-integer casts and subsequent arithmetic don't + // overflow signed types. On real hardware, user-space + // addresses are in the lower half of the address space. + // Use 31 bits (not ptr_width-1) to also handle casts to + // 32-bit int on 64-bit platforms. + std::size_t addr_bits = std::min(ptr_width - 1, std::size_t{31}); + mp_integer signed_max = power(2, addr_bits) - 1 - size; + if(signed_max > 0) + { + bvt signed_max_bv = bv_utils.build_constant(signed_max, ptr_width); + prop.l_set_to_true(bv_utils.rel( + base, ID_le, signed_max_bv, bv_utilst::representationt::UNSIGNED)); + } + + // Constrain alignment: base addresses are aligned to the + // natural alignment of the object type. For most types + // this is min(size, pointer_width/8). + mp_integer alignment = std::min(size, mp_integer(ptr_width / 8)); + // Round down to power of 2 + mp_integer align_pow2 = 1; + while(align_pow2 * 2 <= alignment) + align_pow2 *= 2; + if(align_pow2 > 1) + { + // base % align_pow2 == 0, i.e., low bits are zero + std::size_t align_bits = 0; + mp_integer tmp = align_pow2; + while(tmp > 1) + { + align_bits++; + tmp /= 2; + } + for(std::size_t i = 0; i < align_bits && i < ptr_width; ++i) + prop.l_set_to_true(!base[i]); + } + } + + for(std::size_t i = 0; i < obj_sizes.size(); ++i) + { + bvt base_i = get_object_base_address(obj_sizes[i].first, ptr_width); + + for(std::size_t j = i + 1; j < obj_sizes.size(); ++j) + { + bvt base_j = get_object_base_address(obj_sizes[j].first, ptr_width); + + bvt end_i = bv_utils.add( + base_i, bv_utils.build_constant(obj_sizes[i].second, ptr_width)); + literalt i_before_j = bv_utils.rel( + end_i, ID_le, base_j, bv_utilst::representationt::UNSIGNED); + + bvt end_j = bv_utils.add( + base_j, bv_utils.build_constant(obj_sizes[j].second, ptr_width)); + literalt j_before_i = bv_utils.rel( + end_j, ID_le, base_i, bv_utilst::representationt::UNSIGNED); + + // Non-overlapping ranges + literalt range_lit = prop.lor(i_before_j, j_before_i); + prop.l_set_to_true(range_lit); + // Redundant but helps the solver: distinct base addresses + literalt neq_lit = !bv_utils.equal(base_i, base_j); + prop.l_set_to_true(neq_lit); + + // Explicit pairwise inequality for all offsets within + // the smaller object. The range constraint is logically + // sufficient but the SAT solver cannot derive these + // inequalities from it. + mp_integer max_off = std::max(obj_sizes[i].second, obj_sizes[j].second); + for(mp_integer k = 0; k < max_off; ++k) + { + bvt shifted_i = + bv_utils.add(base_i, bv_utils.build_constant(k, ptr_width)); + prop.l_set_to_true(!bv_utils.equal(shifted_i, base_j)); + bvt shifted_j = + bv_utils.add(base_j, bv_utils.build_constant(k, ptr_width)); + prop.l_set_to_true(!bv_utils.equal(shifted_j, base_i)); + } + } + } + } + + postponed_list.clear(); +} diff --git a/src/solvers/flattening/bv_pointers_wide.h b/src/solvers/flattening/bv_pointers_wide.h new file mode 100644 index 00000000000..3d4cb9bf4a8 --- /dev/null +++ b/src/solvers/flattening/bv_pointers_wide.h @@ -0,0 +1,169 @@ +/*******************************************************************\ + +Module: + +Author: CBMC Contributors + +\*******************************************************************/ + +/// \file +/// Pointer encoding using solver-level maps + +#ifndef CPROVER_SOLVERS_FLATTENING_BV_POINTERS_WIDE_H +#define CPROVER_SOLVERS_FLATTENING_BV_POINTERS_WIDE_H + +#include "boolbv.h" +#include "pointer_logic.h" + +#include +#include + +/// Encodes pointer expressions using solver-level arrays (maps) instead +/// of bit-packing. Each pointer-typed expression is represented as an +/// integer index (using the full pointer width). Two global array +/// symbols -- one for the object id and one for the byte offset -- +/// map that index to the object number and offset respectively. +class bv_pointers_widet : public boolbvt +{ +public: + bv_pointers_widet( + const namespacet &, + propt &, + message_handlert &, + bool get_array_constraints = false); + + void finish_eager_conversion() override; + + endianness_mapt + endianness_map(const typet &, bool little_endian) const override; + +protected: + pointer_logict pointer_logic; + + // NOLINTNEXTLINE(readability/identifiers) + typedef boolbvt SUB; + + /// Width helpers -- all widths equal the pointer width. + std::size_t get_object_width(const pointer_typet &) const; + std::size_t get_offset_width(const pointer_typet &) const; + std::size_t get_address_width(const pointer_typet &) const; + + // Solver-level array symbols for the object and offset maps. + symbol_exprt object_map; + symbol_exprt offset_map; + + /// Solver-level array mapping object numbers to symbolic base addresses. + /// Used for pointer-to-integer casts: the integer value is + /// base_address_map[object] + offset. + symbol_exprt base_address_map; + + /// Cache of base address bitvectors per object number. + /// Populated lazily when pointer-to-integer casts are encountered. + std::map object_base_address; + + /// Objects created from integer-to-pointer casts of constants. + /// These are excluded from non-overlapping constraints because + /// the integer address might point into an existing object. + std::set integer_address_objects; + + /// Counter for allocating fresh pointer indices. + mp_integer next_bv_pointer_index; + + /// Cache of encode() results per object number, so that the same + /// object always gets the same pointer index (e.g., NULL). + std::map encode_cache; + + /// Map from pointer index to (object, offset) for model + /// extraction in bv_get_rec. Populated by encode(). + std::map> + index_to_object_offset; + + /// Map from pointer index to (object_bv, offset_bv) for model + /// extraction of encode_fresh pointers. + std::map> index_to_bv_object_offset; + + /// Allocate a fresh index bitvector, constrain the maps, and return + /// the index as a bvt. + [[nodiscard]] bvt encode(const mp_integer &object, const pointer_typet &); + + /// Like encode but for a fresh symbolic pointer index. + [[nodiscard]] bvt encode_fresh( + const bvt &object_bv, + const bvt &offset_bv, + const pointer_typet &type); + + virtual bvt convert_pointer_type(const exprt &); + + [[nodiscard]] virtual bvt add_addr(const exprt &); + + // overloading + literalt convert_rest(const exprt &) override; + bvt convert_bitvector(const exprt &) override; + bool boolbv_set_equality_to_true(const equal_exprt &expr) override; + bvt convert_byte_extract(const byte_extract_exprt &expr) override; + bvt convert_byte_update(const byte_update_exprt &expr) override; + + exprt + bv_get_rec(const exprt &, const bvt &, std::size_t offset) const override; + + [[nodiscard]] std::optional convert_address_of_rec(const exprt &); + + [[nodiscard]] bvt + offset_arithmetic(const pointer_typet &, const bvt &, const mp_integer &); + [[nodiscard]] bvt offset_arithmetic( + const pointer_typet &, + const bvt &, + const mp_integer &factor, + const exprt &index); + [[nodiscard]] bvt offset_arithmetic( + const pointer_typet &type, + const bvt &bv, + const exprt &factor, + const exprt &index); + [[nodiscard]] bvt offset_arithmetic( + const pointer_typet &, + const bvt &, + const mp_integer &factor, + const bvt &index_bv); + + struct postponedt + { + bvt bv, op; + exprt expr; + + postponedt(bvt _bv, bvt _op, exprt _expr) + : bv(std::move(_bv)), op(std::move(_op)), expr(std::move(_expr)) + { + } + }; + + typedef std::list postponed_listt; + postponed_listt postponed_list; + + /// Build a constant expression from a pointer index. + exprt index_to_expr(const mp_integer &index, const pointer_typet &type) const; + + /// Read the object number for a pointer whose bitvector is \p bv. + bvt read_object(const bvt &bv, const pointer_typet &type); + + /// Read the offset for a pointer whose bitvector is \p bv. + bvt read_offset(const bvt &bv, const pointer_typet &type); + + /// Get or create a symbolic base address bitvector for the given + /// object number. Used for pointer-to-integer casts. + bvt get_object_base_address(const mp_integer &object, std::size_t width); + + /// Create Boolean functions describing all dynamic and all + /// not-dynamic object encodings over \p placeholders as input + /// Boolean variables representing object bits. + std::pair prepare_postponed_is_dynamic_object( + std::vector &placeholders) const; + + /// Create Boolean functions describing all objects of each + /// known object size over \p placeholders as input Boolean + /// variables representing object bits. + std::unordered_map + prepare_postponed_object_size(std::vector &placeholders) const; +}; + +#endif // CPROVER_SOLVERS_FLATTENING_BV_POINTERS_WIDE_H diff --git a/src/solvers/refinement/bv_refinement.h b/src/solvers/refinement/bv_refinement.h index c26bb556f41..279f05a79d8 100644 --- a/src/solvers/refinement/bv_refinement.h +++ b/src/solvers/refinement/bv_refinement.h @@ -16,38 +16,47 @@ Author: Daniel Kroening, kroening@kroening.com #define MAX_STATE 10000 -class bv_refinementt:public bv_pointerst +/// Configuration and info for the refinement solver. +/// Separated from the class template so callers don't need +/// to know the template parameter. +struct bv_refinement_configt +{ + bool output_xml = false; + unsigned max_node_refinement = 5; + bool refine_arrays = true; + bool refine_arithmetic = true; +}; + +struct bv_refinement_infot : public bv_refinement_configt +{ + const namespacet *ns = nullptr; + propt *prop = nullptr; + message_handlert *message_handler = nullptr; +}; + +/// Abstraction refinement loop, parameterized on the pointer +/// encoding base class. The default is bv_pointerst (standard +/// bit-packed encoding). Use bv_pointers_widet for the map-based +/// encoding. +template +class bv_refinementt : public bv_pointers_baset { -private: - struct configt - { - bool output_xml = false; - /// Max number of times we refine a formula node - unsigned max_node_refinement=5; - /// Enable array refinement - bool refine_arrays=true; - /// Enable arithmetic refinement - bool refine_arithmetic=true; - }; public: - struct infot:public configt - { - const namespacet *ns=nullptr; - propt *prop=nullptr; - message_handlert *message_handler = nullptr; - }; + using resultt = decision_proceduret::resultt; + + // Keep the old infot name for backward compatibility + using infot = bv_refinement_infot; explicit bv_refinementt(const infot &info); - decision_proceduret::resultt dec_solve(const exprt &) override; + resultt dec_solve(const exprt &) override; std::string decision_procedure_text() const override { - return "refinement loop with "+prop.solver_text(); + return "refinement loop with " + bv_pointers_baset::prop.solver_text(); } protected: - // Refine array void finish_eager_conversion_arrays() override; @@ -62,11 +71,8 @@ class bv_refinementt:public bv_pointerst struct approximationt final { public: - explicit approximationt(std::size_t _id_nr): - no_operands(0), - under_state(0), - over_state(0), - id_nr(_id_nr) + explicit approximationt(std::size_t _id_nr) + : no_operands(0), under_state(0), over_state(0), id_nr(_id_nr) { } @@ -79,7 +85,6 @@ class bv_refinementt:public bv_pointerst std::vector under_assumptions; std::vector over_assumptions; - // the kind of under- or over-approximation unsigned under_state, over_state; std::string as_string() const; @@ -102,14 +107,11 @@ class bv_refinementt:public bv_pointerst void arrays_overapproximated(); void freeze_lazy_constraints(); - // MEMBERS - bool progress; std::list approximations; protected: - // use gui format - configt config_; + bv_refinement_configt config_; }; #endif // CPROVER_SOLVERS_REFINEMENT_BV_REFINEMENT_H diff --git a/src/solvers/refinement/bv_refinement_loop.cpp b/src/solvers/refinement/bv_refinement_loop.cpp index f55b56097be..45e3ccfeda4 100644 --- a/src/solvers/refinement/bv_refinement_loop.cpp +++ b/src/solvers/refinement/bv_refinement_loop.cpp @@ -10,24 +10,28 @@ Author: Daniel Kroening, kroening@kroening.com #include -bv_refinementt::bv_refinementt(const infot &info) - : bv_pointerst(*info.ns, *info.prop, *info.message_handler), +template +bv_refinementt::bv_refinementt(const infot &info) + : bv_pointers_baset(*info.ns, *info.prop, *info.message_handler), progress(false), config_(info) { // check features we need - PRECONDITION(prop.has_assumptions()); - PRECONDITION(prop.has_set_to()); - PRECONDITION(prop.has_is_in_conflict()); + PRECONDITION(this->prop.has_assumptions()); + PRECONDITION(this->prop.has_set_to()); + PRECONDITION(this->prop.has_is_in_conflict()); } -decision_proceduret::resultt bv_refinementt::dec_solve(const exprt &assumption) +template +decision_proceduret::resultt +bv_refinementt::dec_solve(const exprt &assumption) { // do the usual post-processing - log.progress() << "BV-Refinement: post-processing" << messaget::eom; - finish_eager_conversion(); + this->log.progress() << "BV-Refinement: post-processing" << messaget::eom; + this->finish_eager_conversion(); - log.debug() << "Solving with " << prop.solver_text() << messaget::eom; + this->log.debug() << "Solving with " << this->prop.solver_text() + << messaget::eom; unsigned iteration=0; @@ -36,14 +40,15 @@ decision_proceduret::resultt bv_refinementt::dec_solve(const exprt &assumption) { iteration++; - log.progress() << "BV-Refinement: iteration " << iteration << messaget::eom; + this->log.progress() << "BV-Refinement: iteration " << iteration + << messaget::eom; // output the very same information in a structured fashion if(config_.output_xml) { xmlt xml("refinement-iteration"); xml.data=std::to_string(iteration); - log.status() << xml << '\n'; + this->log.status() << xml << '\n'; } switch(prop_solve()) @@ -52,28 +57,31 @@ decision_proceduret::resultt bv_refinementt::dec_solve(const exprt &assumption) check_SAT(); if(!progress) { - log.status() << "BV-Refinement: got SAT, and it simulates => SAT" - << messaget::eom; - log.statistics() << "Total iterations: " << iteration << messaget::eom; + this->log.status() << "BV-Refinement: got SAT, and it simulates => SAT" + << messaget::eom; + this->log.statistics() + << "Total iterations: " << iteration << messaget::eom; return resultt::D_SATISFIABLE; } else - log.progress() << "BV-Refinement: got SAT, and it is spurious, refining" - << messaget::eom; + this->log.progress() + << "BV-Refinement: got SAT, and it is spurious, refining" + << messaget::eom; break; case resultt::D_UNSATISFIABLE: check_UNSAT(); if(!progress) { - log.status() + this->log.status() << "BV-Refinement: got UNSAT, and the proof passes => UNSAT" << messaget::eom; - log.statistics() << "Total iterations: " << iteration << messaget::eom; + this->log.statistics() + << "Total iterations: " << iteration << messaget::eom; return resultt::D_UNSATISFIABLE; } else - log.progress() + this->log.progress() << "BV-Refinement: got UNSAT, and the proof fails, refining" << messaget::eom; break; @@ -84,7 +92,8 @@ decision_proceduret::resultt bv_refinementt::dec_solve(const exprt &assumption) } } -decision_proceduret::resultt bv_refinementt::prop_solve() +template +decision_proceduret::resultt bv_refinementt::prop_solve() { // this puts the underapproximations into effect std::vector assumptions; @@ -101,9 +110,9 @@ decision_proceduret::resultt bv_refinementt::prop_solve() approximation.under_assumptions.end()); } - push(assumptions); - propt::resultt result = prop.prop_solve(assumption_stack); - pop(); + this->push(assumptions); + propt::resultt result = this->prop.prop_solve(this->assumption_stack); + this->pop(); // clang-format off switch(result) @@ -117,7 +126,8 @@ decision_proceduret::resultt bv_refinementt::prop_solve() UNREACHABLE; } -void bv_refinementt::check_SAT() +template +void bv_refinementt::check_SAT() { progress=false; @@ -131,10 +141,17 @@ void bv_refinementt::check_SAT() check_SAT(approximation); } -void bv_refinementt::check_UNSAT() +template +void bv_refinementt::check_UNSAT() { progress=false; for(approximationt &approximation : this->approximations) check_UNSAT(approximation); } + +// Explicit instantiations +#include + +template class bv_refinementt; +template class bv_refinementt; diff --git a/src/solvers/refinement/refine_arithmetic.cpp b/src/solvers/refinement/refine_arithmetic.cpp index 9ffb093e269..d5ef588a3be 100644 --- a/src/solvers/refinement/refine_arithmetic.cpp +++ b/src/solvers/refinement/refine_arithmetic.cpp @@ -22,37 +22,44 @@ Author: Daniel Kroening, kroening@kroening.com #define MAX_INTEGER_UNDERAPPROX 3 #define MAX_FLOAT_UNDERAPPROX 10 -void bv_refinementt::approximationt::add_over_assumption(literalt l) +template +void bv_refinementt::approximationt::add_over_assumption( + literalt l) { // if it's a constant already, give up if(!l.is_constant()) over_assumptions.push_back(literal_exprt(l)); } -void bv_refinementt::approximationt::add_under_assumption(literalt l) +template +void bv_refinementt::approximationt::add_under_assumption( + literalt l) { // if it's a constant already, give up if(!l.is_constant()) under_assumptions.push_back(literal_exprt(l)); } -bvt bv_refinementt::convert_floatbv_op(const ieee_float_op_exprt &expr) +template +bvt bv_refinementt::convert_floatbv_op( + const ieee_float_op_exprt &expr) { if(!config_.refine_arithmetic) - return SUB::convert_floatbv_op(expr); + return bv_pointers_baset::convert_floatbv_op(expr); if(expr.type().id() != ID_floatbv) - return SUB::convert_floatbv_op(expr); + return bv_pointers_baset::convert_floatbv_op(expr); bvt bv; add_approximation(expr, bv); return bv; } -bvt bv_refinementt::convert_mult(const mult_exprt &expr) +template +bvt bv_refinementt::convert_mult(const mult_exprt &expr) { if(!config_.refine_arithmetic || expr.type().id()==ID_fixedbv) - return SUB::convert_mult(expr); + return bv_pointers_baset::convert_mult(expr); // we catch any multiplication // unless it involves a constant @@ -69,7 +76,7 @@ bvt bv_refinementt::convert_mult(const mult_exprt &expr) // we keep multiplication by a constant for integers if(type.id()!=ID_floatbv) if(operands[0].is_constant() || operands[1].is_constant()) - return SUB::convert_mult(expr); + return bv_pointers_baset::convert_mult(expr); bvt bv; approximationt &a=add_approximation(expr, bv); @@ -79,28 +86,29 @@ bvt bv_refinementt::convert_mult(const mult_exprt &expr) type.id()==ID_unsignedbv) { // x*0==0 and 0*x==0 - literalt op0_zero=bv_utils.is_zero(a.op0_bv); - literalt op1_zero=bv_utils.is_zero(a.op1_bv); - literalt res_zero=bv_utils.is_zero(a.result_bv); - prop.l_set_to_true( - prop.limplies(prop.lor(op0_zero, op1_zero), res_zero)); + literalt op0_zero = this->bv_utils.is_zero(a.op0_bv); + literalt op1_zero = this->bv_utils.is_zero(a.op1_bv); + literalt res_zero = this->bv_utils.is_zero(a.result_bv); + this->prop.l_set_to_true( + this->prop.limplies(this->prop.lor(op0_zero, op1_zero), res_zero)); // x*1==x and 1*x==x - literalt op0_one=bv_utils.is_one(a.op0_bv); - literalt op1_one=bv_utils.is_one(a.op1_bv); - literalt res_op0=bv_utils.equal(a.op0_bv, a.result_bv); - literalt res_op1=bv_utils.equal(a.op1_bv, a.result_bv); - prop.l_set_to_true(prop.limplies(op0_one, res_op1)); - prop.l_set_to_true(prop.limplies(op1_one, res_op0)); + literalt op0_one = this->bv_utils.is_one(a.op0_bv); + literalt op1_one = this->bv_utils.is_one(a.op1_bv); + literalt res_op0 = this->bv_utils.equal(a.op0_bv, a.result_bv); + literalt res_op1 = this->bv_utils.equal(a.op1_bv, a.result_bv); + this->prop.l_set_to_true(this->prop.limplies(op0_one, res_op1)); + this->prop.l_set_to_true(this->prop.limplies(op1_one, res_op0)); } return bv; } -bvt bv_refinementt::convert_div(const div_exprt &expr) +template +bvt bv_refinementt::convert_div(const div_exprt &expr) { if(!config_.refine_arithmetic || expr.type().id()==ID_fixedbv) - return SUB::convert_div(expr); + return bv_pointers_baset::convert_div(expr); // we catch any division // unless it's integer division by a constant @@ -108,17 +116,18 @@ bvt bv_refinementt::convert_div(const div_exprt &expr) PRECONDITION(expr.operands().size()==2); if(expr.op1().is_constant()) - return SUB::convert_div(expr); + return bv_pointers_baset::convert_div(expr); bvt bv; add_approximation(expr, bv); return bv; } -bvt bv_refinementt::convert_mod(const mod_exprt &expr) +template +bvt bv_refinementt::convert_mod(const mod_exprt &expr) { if(!config_.refine_arithmetic || expr.type().id()==ID_fixedbv) - return SUB::convert_mod(expr); + return bv_pointers_baset::convert_mod(expr); // we catch any mod // unless it's integer + constant @@ -126,39 +135,41 @@ bvt bv_refinementt::convert_mod(const mod_exprt &expr) PRECONDITION(expr.operands().size()==2); if(expr.op1().is_constant()) - return SUB::convert_mod(expr); + return bv_pointers_baset::convert_mod(expr); bvt bv; add_approximation(expr, bv); return bv; } -void bv_refinementt::get_values(approximationt &a) +template +void bv_refinementt::get_values(approximationt &a) { std::size_t o=a.expr.operands().size(); if(o==1) - a.op0_value=get_value(a.op0_bv); + a.op0_value = this->get_value(a.op0_bv); else if(o==2) { - a.op0_value=get_value(a.op0_bv); - a.op1_value=get_value(a.op1_bv); + a.op0_value = this->get_value(a.op0_bv); + a.op1_value = this->get_value(a.op1_bv); } else if(o==3) { - a.op0_value=get_value(a.op0_bv); - a.op1_value=get_value(a.op1_bv); - a.op2_value=get_value(a.op2_bv); + a.op0_value = this->get_value(a.op0_bv); + a.op1_value = this->get_value(a.op1_bv); + a.op2_value = this->get_value(a.op2_bv); } else UNREACHABLE; - a.result_value=get_value(a.result_bv); + a.result_value = this->get_value(a.result_bv); } /// inspect if satisfying assignment extends to original formula, otherwise /// refine overapproximation -void bv_refinementt::check_SAT(approximationt &a) +template +void bv_refinementt::check_SAT(approximationt &a) { // see if the satisfying assignment is spurious in any way @@ -173,7 +184,7 @@ void bv_refinementt::check_SAT(approximationt &a) // get actual rounding mode constant_exprt rounding_mode_expr = - to_constant_expr(get(float_op.rounding_mode())); + to_constant_expr(this->get(float_op.rounding_mode())); const std::size_t rounding_mode_int = numeric_cast_v(rounding_mode_expr); ieee_floatt::rounding_modet rounding_mode = @@ -205,41 +216,40 @@ void bv_refinementt::check_SAT(approximationt &a) ieee_floatt rr(spec); rr.unpack(a.result_value); - log.debug() << "S1: " << o0 << " " << a.expr.id() << " " << o1 - << " != " << rr << messaget::eom; - log.debug() << "S2: " << integer2binary(a.op0_value, spec.width()) << " " - << a.expr.id() << " " - << integer2binary(a.op1_value, spec.width()) - << "!=" << integer2binary(a.result_value, spec.width()) - << messaget::eom; - log.debug() << "S3: " << integer2binary(a.op0_value, spec.width()) << " " - << a.expr.id() << " " - << integer2binary(a.op1_value, spec.width()) - << "==" << integer2binary(result.pack(), spec.width()) - << messaget::eom; + this->log.debug() << "S1: " << o0 << " " << a.expr.id() << " " << o1 + << " != " << rr << messaget::eom; + this->log.debug() << "S2: " << integer2binary(a.op0_value, spec.width()) + << " " << a.expr.id() << " " + << integer2binary(a.op1_value, spec.width()) + << "!=" << integer2binary(a.result_value, spec.width()) + << messaget::eom; + this->log.debug() << "S3: " << integer2binary(a.op0_value, spec.width()) + << " " << a.expr.id() << " " + << integer2binary(a.op1_value, spec.width()) + << "==" << integer2binary(result.pack(), spec.width()) + << messaget::eom; #endif if(a.over_stateprop); float_utils.spec=spec; float_utils.rounding_mode_bits.set(rounding_mode); - literalt op0_equal= - bv_utils.equal(a.op0_bv, float_utils.build_constant(o0)); + literalt op0_equal = + this->bv_utils.equal(a.op0_bv, float_utils.build_constant(o0)); - literalt op1_equal= - bv_utils.equal(a.op1_bv, float_utils.build_constant(o1)); + literalt op1_equal = + this->bv_utils.equal(a.op1_bv, float_utils.build_constant(o1)); - literalt result_equal= - bv_utils.equal(a.result_bv, float_utils.build_constant(result)); + literalt result_equal = + this->bv_utils.equal(a.result_bv, float_utils.build_constant(result)); - literalt op0_and_op1_equal= - prop.land(op0_equal, op1_equal); + literalt op0_and_op1_equal = this->prop.land(op0_equal, op1_equal); - prop.l_set_to_true( - prop.limplies(op0_and_op1_equal, result_equal)); + this->prop.l_set_to_true( + this->prop.limplies(op0_and_op1_equal, result_equal)); } else { @@ -249,7 +259,7 @@ void bv_refinementt::check_SAT(approximationt &a) a.over_state=MAX_STATE; bvt r; - float_utilst float_utils(prop); + float_utilst float_utils(this->prop); float_utils.spec=spec; float_utils.rounding_mode_bits.set(rounding_mode); @@ -267,7 +277,7 @@ void bv_refinementt::check_SAT(approximationt &a) UNREACHABLE; CHECK_RETURN(r.size()==res.size()); - bv_utils.set_equal(r, res); + this->bv_utils.set_equal(r, res); } } else if(type.id()==ID_signedbv || @@ -310,32 +320,35 @@ void bv_refinementt::check_SAT(approximationt &a) bvt r; if(a.expr.id()==ID_mult) { - r=bv_utils.multiplier( - a.op0_bv, a.op1_bv, - a.expr.type().id()==ID_signedbv? - bv_utilst::representationt::SIGNED: - bv_utilst::representationt::UNSIGNED); + r = this->bv_utils.multiplier( + a.op0_bv, + a.op1_bv, + a.expr.type().id() == ID_signedbv + ? bv_utilst::representationt::SIGNED + : bv_utilst::representationt::UNSIGNED); } else if(a.expr.id()==ID_div) { - r=bv_utils.divider( - a.op0_bv, a.op1_bv, - a.expr.type().id()==ID_signedbv? - bv_utilst::representationt::SIGNED: - bv_utilst::representationt::UNSIGNED); + r = this->bv_utils.divider( + a.op0_bv, + a.op1_bv, + a.expr.type().id() == ID_signedbv + ? bv_utilst::representationt::SIGNED + : bv_utilst::representationt::UNSIGNED); } else if(a.expr.id()==ID_mod) { - r=bv_utils.remainder( - a.op0_bv, a.op1_bv, - a.expr.type().id()==ID_signedbv? - bv_utilst::representationt::SIGNED: - bv_utilst::representationt::UNSIGNED); + r = this->bv_utils.remainder( + a.op0_bv, + a.op1_bv, + a.expr.type().id() == ID_signedbv + ? bv_utilst::representationt::SIGNED + : bv_utilst::representationt::UNSIGNED); } else UNREACHABLE; - bv_utils.set_equal(r, a.result_bv); + this->bv_utils.set_equal(r, a.result_bv); } else UNREACHABLE; @@ -350,8 +363,8 @@ void bv_refinementt::check_SAT(approximationt &a) UNREACHABLE; } - log.status() << "Found spurious '" << a.as_string() << "' (state " - << a.over_state << ")" << messaget::eom; + this->log.status() << "Found spurious '" << a.as_string() << "' (state " + << a.over_state << ")" << messaget::eom; progress=true; if(a.over_state +void bv_refinementt::check_UNSAT(approximationt &a) { // part of the conflict? if(!this->conflicts_with(a)) return; - log.status() << "Found assumption for '" << a.as_string() - << "' in proof (state " << a.under_state << ")" << messaget::eom; + this->log.status() << "Found assumption for '" << a.as_string() + << "' in proof (state " << a.under_state << ")" + << messaget::eom; PRECONDITION(!a.under_assumptions.empty()); @@ -380,7 +395,7 @@ void bv_refinementt::check_UNSAT(approximationt &a) a.under_assumptions.reserve(a.op0_bv.size()+a.op1_bv.size()); - float_utilst float_utils(prop); + float_utilst float_utils(this->prop); float_utils.spec=spec; // the fraction without hidden bit @@ -448,11 +463,12 @@ void bv_refinementt::check_UNSAT(approximationt &a) } /// check if an under-approximation is part of the conflict -bool bv_refinementt::conflicts_with(approximationt &a) +template +bool bv_refinementt::conflicts_with(approximationt &a) { for(std::size_t i=0; iprop.is_in_conflict( to_literal_expr(a.under_assumptions[i]).get_literal())) { return true; @@ -462,7 +478,8 @@ bool bv_refinementt::conflicts_with(approximationt &a) return false; } -void bv_refinementt::initialize(approximationt &a) +template +void bv_refinementt::initialize(approximationt &a) { a.over_state=a.under_state=0; @@ -477,41 +494,41 @@ void bv_refinementt::initialize(approximationt &a) a.add_under_assumption(!a.op1_bv[i]); } -bv_refinementt::approximationt & -bv_refinementt::add_approximation( - const exprt &expr, bvt &bv) +template +typename bv_refinementt::approximationt & +bv_refinementt::add_approximation(const exprt &expr, bvt &bv) { - approximations.push_back(approximationt(approximations.size())); + this->approximations.push_back(approximationt(approximations.size())); approximationt &a=approximations.back(); - std::size_t width=boolbv_width(expr.type()); + std::size_t width = this->boolbv_width(expr.type()); PRECONDITION(width!=0); a.expr=expr; - a.result_bv=prop.new_variables(width); + a.result_bv = this->prop.new_variables(width); a.no_operands=expr.operands().size(); - set_frozen(a.result_bv); + this->set_frozen(a.result_bv); if(a.no_operands==1) { - a.op0_bv = convert_bv(to_unary_expr(expr).op()); - set_frozen(a.op0_bv); + a.op0_bv = this->convert_bv(to_unary_expr(expr).op()); + this->set_frozen(a.op0_bv); } else if(a.no_operands==2) { - a.op0_bv = convert_bv(to_binary_expr(expr).op0()); - a.op1_bv = convert_bv(to_binary_expr(expr).op1()); - set_frozen(a.op0_bv); - set_frozen(a.op1_bv); + a.op0_bv = this->convert_bv(to_binary_expr(expr).op0()); + a.op1_bv = this->convert_bv(to_binary_expr(expr).op1()); + this->set_frozen(a.op0_bv); + this->set_frozen(a.op1_bv); } else if(a.no_operands==3) { - a.op0_bv = convert_bv(to_multi_ary_expr(expr).op0()); - a.op1_bv = convert_bv(to_multi_ary_expr(expr).op1()); - a.op2_bv = convert_bv(to_multi_ary_expr(expr).op2()); - set_frozen(a.op0_bv); - set_frozen(a.op1_bv); - set_frozen(a.op2_bv); + a.op0_bv = this->convert_bv(to_multi_ary_expr(expr).op0()); + a.op1_bv = this->convert_bv(to_multi_ary_expr(expr).op1()); + a.op2_bv = this->convert_bv(to_multi_ary_expr(expr).op2()); + this->set_frozen(a.op0_bv); + this->set_frozen(a.op1_bv); + this->set_frozen(a.op2_bv); } else UNREACHABLE; @@ -523,7 +540,14 @@ bv_refinementt::add_approximation( return a; } -std::string bv_refinementt::approximationt::as_string() const +template +std::string bv_refinementt::approximationt::as_string() const { return std::to_string(id_nr)+"/"+id2string(expr.id()); } + +// Explicit instantiations +#include + +template class bv_refinementt; +template class bv_refinementt; diff --git a/src/solvers/refinement/refine_arrays.cpp b/src/solvers/refinement/refine_arrays.cpp index 1456410dc7f..8d94c610135 100644 --- a/src/solvers/refinement/refine_arrays.cpp +++ b/src/solvers/refinement/refine_arrays.cpp @@ -18,22 +18,24 @@ Author: Daniel Kroening, kroening@kroening.com #include /// generate array constraints -void bv_refinementt::finish_eager_conversion_arrays() +template +void bv_refinementt::finish_eager_conversion_arrays() { - collect_indices(); + this->collect_indices(); // at this point all indices should in the index set // just build the data structure - update_index_map(true); + this->update_index_map(true); // we don't actually add any constraints - lazy_arrays=config_.refine_arrays; - add_array_constraints(); + this->lazy_arrays = config_.refine_arrays; + this->add_array_constraints(); freeze_lazy_constraints(); } /// check whether counterexample is spurious -void bv_refinementt::arrays_overapproximated() +template +void bv_refinementt::arrays_overapproximated() { if(!config_.refine_arrays) return; @@ -41,21 +43,23 @@ void bv_refinementt::arrays_overapproximated() unsigned nb_active=0; // Evaluate all lazy constraints while the solver is still in SAT state. - // We must not interleave get_value() calls with modifications to the + // We must not interleave this->get_value() calls with modifications to the // main solver (prop) because some SAT solvers (e.g., CaDiCaL) only // permit reading model values while in the satisfied state, and adding // clauses invalidates that state. + // Collect constraints to check with their iterators + using list_iterator_t = decltype(this->lazy_array_constraints.begin()); struct evaluated_constraintt { exprt constraint; exprt simplified; - std::list::iterator list_it; + list_iterator_t list_it; }; std::vector to_check; - to_check.reserve(lazy_array_constraints.size()); + to_check.reserve(this->lazy_array_constraints.size()); - for(auto it = lazy_array_constraints.begin(); - it != lazy_array_constraints.end(); + for(auto it = this->lazy_array_constraints.begin(); + it != this->lazy_array_constraints.end(); ++it) { const exprt ¤t = it->lazy; @@ -65,7 +69,7 @@ void bv_refinementt::arrays_overapproximated() if(current.id()==ID_implies) { implies_exprt imp=to_implies_expr(current); - exprt implies_simplified = get_value(imp.op0()); + exprt implies_simplified = this->get_value(imp.op0()); if(implies_simplified==false_exprt()) { continue; @@ -77,23 +81,23 @@ void bv_refinementt::arrays_overapproximated() or_exprt orexp=to_or_expr(current); INVARIANT( orexp.operands().size() == 2, "only treats the case of a binary or"); - exprt o1 = get_value(orexp.op0()); - exprt o2 = get_value(orexp.op1()); + exprt o1 = this->get_value(orexp.op0()); + exprt o2 = this->get_value(orexp.op1()); if(o1==true_exprt() || o2 == true_exprt()) { continue; } } - to_check.push_back({current, get_value(current), it}); + to_check.push_back({current, this->get_value(current), it}); } // Now check each evaluated constraint using a local solver and activate // violated ones. This phase may modify the main solver (prop). for(auto &entry : to_check) { - satcheck_no_simplifiert sat_check{log.get_message_handler()}; - bv_pointerst solver{ns, sat_check, log.get_message_handler()}; + satcheck_no_simplifiert sat_check{this->log.get_message_handler()}; + bv_pointerst solver{this->ns, sat_check, this->log.get_message_handler()}; solver.unbounded_array = bv_pointerst::unbounded_arrayt::U_ALL; solver << entry.simplified; @@ -103,48 +107,55 @@ void bv_refinementt::arrays_overapproximated() case decision_proceduret::resultt::D_SATISFIABLE: break; case decision_proceduret::resultt::D_UNSATISFIABLE: - prop.l_set_to_true(convert(entry.constraint)); + this->prop.l_set_to_true(this->convert(entry.constraint)); nb_active++; - lazy_array_constraints.erase(entry.list_it); + this->lazy_array_constraints.erase(entry.list_it); break; case decision_proceduret::resultt::D_ERROR: INVARIANT(false, "error in array over approximation check"); } } - log.debug() << "BV-Refinement: " << nb_active - << " array expressions become active" << messaget::eom; - log.debug() << "BV-Refinement: " << lazy_array_constraints.size() - << " inactive array expressions" << messaget::eom; + this->log.debug() << "BV-Refinement: " << nb_active + << " array expressions become active" << messaget::eom; + this->log.debug() << "BV-Refinement: " << this->lazy_array_constraints.size() + << " inactive array expressions" << messaget::eom; if(nb_active > 0) progress=true; } /// freeze symbols for incremental solving -void bv_refinementt::freeze_lazy_constraints() +template +void bv_refinementt::freeze_lazy_constraints() { - if(!lazy_arrays) + if(!this->lazy_arrays) return; - for(const auto &constraint : lazy_array_constraints) + for(const auto &constraint : this->lazy_array_constraints) { // Freeze all symbols in the constraint for(const auto &symbol : find_symbols(constraint.lazy)) { - if(!bv_width.get_width_opt(symbol.type()).has_value()) + if(!this->bv_width.get_width_opt(symbol.type()).has_value()) continue; - const bvt bv=convert_bv(symbol); + const bvt bv = this->convert_bv(symbol); for(const auto &literal : bv) if(!literal.is_constant()) - prop.set_frozen(literal); + this->prop.set_frozen(literal); } // Also freeze the full constraint literal and its sub-expressions - // so that convert() during refinement does not hit eliminated + // so that this->convert() during refinement does not hit eliminated // variables. - literalt constraint_lit = convert(constraint.lazy); + literalt constraint_lit = this->convert(constraint.lazy); if(!constraint_lit.is_constant()) - prop.set_frozen(constraint_lit); + this->prop.set_frozen(constraint_lit); } } + +// Explicit instantiations +#include + +template class bv_refinementt; +template class bv_refinementt; diff --git a/src/solvers/strings/string_refinement.h b/src/solvers/strings/string_refinement.h index 1af64802ea3..82371bddfa6 100644 --- a/src/solvers/strings/string_refinement.h +++ b/src/solvers/strings/string_refinement.h @@ -60,7 +60,7 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #define DEFAULT_MAX_NB_REFINEMENT std::numeric_limits::max() -class string_refinementt final : public bv_refinementt +class string_refinementt final : public bv_refinementt<> { private: struct configt @@ -71,7 +71,7 @@ class string_refinementt final : public bv_refinementt public: /// string_refinementt constructor arguments - struct infot : public bv_refinementt::infot, public configt + struct infot : public bv_refinement_infot, public configt { }; @@ -90,7 +90,7 @@ class string_refinementt final : public bv_refinementt private: // Base class - typedef bv_refinementt supert; + typedef bv_refinementt<> supert; string_refinementt(const infot &, bool); diff --git a/src/util/lower_byte_operators.cpp b/src/util/lower_byte_operators.cpp index 2ba9ecdb6b2..3c7b4af9fd6 100644 --- a/src/util/lower_byte_operators.cpp +++ b/src/util/lower_byte_operators.cpp @@ -12,6 +12,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "c_types.h" #include "endianness_map.h" #include "expr_util.h" +#include "magic.h" #include "namespace.h" #include "narrow.h" #include "pointer_offset_size.h" @@ -1112,7 +1113,13 @@ static exprt lower_byte_extract_array_vector( else num_elements = numeric_cast(to_vector_type(src.type()).size()); - if(num_elements.has_value()) + // For large arrays, element-by-element expansion creates N expressions + // that are each recursively lowered and simplified, resulting in O(N^2) + // behaviour. Use array_comprehension_exprt (below) instead, which + // represents the same semantics with a single symbolic expression. + if( + num_elements.has_value() && + !(src.type().id() == ID_array && *num_elements > MAX_FLATTENED_ARRAY_SIZE)) { exprt::operandst operands; operands.reserve(*num_elements); diff --git a/unit/Makefile b/unit/Makefile index b34692bdb72..73d16977c44 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -101,6 +101,7 @@ SRC += analyses/ai/ai.cpp \ pointer-analysis/value_set.cpp \ solvers/bdd/miniBDD/miniBDD.cpp \ solvers/flattening/boolbv.cpp \ + solvers/flattening/bv_pointers_wide.cpp \ solvers/flattening/boolbv_enumeration.cpp \ solvers/flattening/boolbv_onehot.cpp \ solvers/flattening/boolbv_update_bit.cpp \ diff --git a/unit/solvers/flattening/bv_pointers_wide.cpp b/unit/solvers/flattening/bv_pointers_wide.cpp new file mode 100644 index 00000000000..9e6d0c214dd --- /dev/null +++ b/unit/solvers/flattening/bv_pointers_wide.cpp @@ -0,0 +1,57 @@ +/*******************************************************************\ + +Module: Unit tests for bv_pointers_widet + +Author: CBMC Contributors + +\*******************************************************************/ + +/// \file +/// Unit tests for bv_pointers_widet + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +SCENARIO("bv_pointers_widet", "[core][solvers][flattening][bv_pointers_widet]") +{ + // Ensure config is set up for pointer width + config.ansi_c.set_ILP32(); + + console_message_handlert message_handler; + message_handler.set_verbosity(0); + + GIVEN("Two pointer symbols to distinct objects") + { + satcheckt satcheck(message_handler); + symbol_tablet symbol_table; + namespacet ns(symbol_table); + bv_pointers_widet bvp(ns, satcheck, message_handler); + + const signedbv_typet int_type(32); + const pointer_typet ptr_type = pointer_type(int_type); + + // &x != &y should be satisfiable + const symbol_exprt x("x", int_type); + const symbol_exprt y("y", int_type); + + const address_of_exprt addr_x(x); + const address_of_exprt addr_y(y); + + THEN("&x == &x is satisfiable") + { + bvp << equal_exprt(addr_x, addr_x); + REQUIRE(bvp() == decision_proceduret::resultt::D_SATISFIABLE); + } + } +}