diff --git a/doc/architectural/wide-pointer-encoding.md b/doc/architectural/wide-pointer-encoding.md new file mode 100644 index 00000000000..955f22e18a6 --- /dev/null +++ b/doc/architectural/wide-pointer-encoding.md @@ -0,0 +1,149 @@ +# Wide Pointer Encoding + +## Overview + +The `--wide-pointer-encoding` option extends CBMC's pointer bitvector +representation to include a flat integer address alongside the standard +object/offset encoding. This enables precise modeling of pointer-to-integer +casts, integer-to-pointer casts, pointer comparison, and address reuse +after free. + +## Pointer Layout + +Standard encoding (64-bit platform): +``` +[offset(56) | object(8)] = 64 bits +``` + +Wide encoding (64-bit platform): +``` +[object(64) | offset(64) | address(64)] = 192 bits +``` + +Each component has the full platform pointer width, removing the +`--object-bits` limitation entirely. + +## What It Fixes + +### Issue #881: Object-bits limitation + +The standard encoding splits the pointer width between object bits and +offset bits. With the default 8 object bits, only 256 objects can be +tracked, and offsets are limited to 56 bits. The wide encoding gives +each component the full 64 bits. + +### Issue #8200: Cast-to-pointer misses error + +```c +char *x = ""; +char *ptr = (char *)0x55a8a2e6b007; +assert(ptr != x); // Standard: SUCCESS (wrong). Wide: FAILURE (correct). +``` + +The wide encoding compares pointers by their flat address, not by +object identity. Two pointers from different objects can have the +same address. + +### Issue #8161: Inconsistent pointer comparison + +```c +int *n = &m; +if((unsigned long)n >= (unsigned long)(-4095)) + assert(...); // Standard: inconsistent. Wide: consistent. +``` + +Pointer-to-integer casts return the flat address, making arithmetic +on pointer-derived integers consistent with plain integer arithmetic. + +### Issue #8103: Integer-to-pointer casting + +```c +uint64_t addr = constrained_to_be_within(buffer); +uint8_t *p = (void *)addr; +assert(*p == expected); // Standard: FAILURE (wrong). Wide: SUCCESS (correct). +``` + +The wide encoding adds address-based dereference dispatch: when a +pointer comes from an integer-to-pointer cast, the dereference reads +from the object whose address range contains the pointer's flat address. + +### Issue #2117: Address reuse after free + +```c +int *x = malloc(sizeof(int)); +free(x); +int *y = malloc(sizeof(int)); +if(x == y) assert(0); // Standard: unreachable. Wide: reachable. +``` + +The wide encoding allows `malloc` to return the same address after +`free` by relaxing the non-overlapping constraint for dynamic objects +and preventing the simplifier from assuming different dynamic objects +have different addresses. + +## Additional Features + +### Stack Layout Modeling (`--model-stack-layout`) + +When combined with `--wide-pointer-encoding`, this option places stack +variables adjacently with architecture-appropriate growth direction +(downward for x86/ARM, upward for HPPA): + +```c +int a, b, c; +assert(&a > &b); // Stack growth direction +assert(&a - &b == sizeof(b)); // Adjacent placement +char *p = (char *)&a; +p[sizeof(a)] = 42; +assert(*(char *)&b == 42); // Buffer overflow modeling +``` + +## Technical Details + +### Pointer Equality and Comparison + +With the wide encoding, `p == q` compares flat addresses (not object +identity). Relational comparisons (`<`, `>`, `<=`, `>=`) also use +flat addresses, making cross-object comparisons well-defined. + +### Pointer-to-Integer Cast (P2I) + +Returns the flat address component of the pointer. This is the +byte-level address that the program would see on a real machine. + +### Integer-to-Pointer Cast (I2P) + +Creates a pointer with fresh object/offset variables. Forward +constraints link the object to the address: `obj == i => base[i] + off == addr`. +Backward constraints are added lazily via a refinement loop in +`dec_solve`: if the SAT model assigns an object inconsistent with +the address range, backward constraints are added and the formula +is re-solved. + +### Byte Operations + +Byte-extract and byte-update on pointer types operate on the 64-bit +address component only. The object/offset bits are internal encoding +and not byte-visible. Pointer arrays store 64-bit addresses per +element, matching symex's byte-level element size. + +### Non-Overlapping Constraints + +Objects are ordered by index with a chain constraint: +`base[0] + size[0] <= base[1] <= ...` (O(n) instead of O(n²)). +For consecutive dynamic objects of the same size, address reuse is +allowed: `base[next] == base[prev] OR base[next] >= end[prev]`. + +### Performance + +The wide encoding creates 3× wider pointer bitvectors, increasing +the SAT formula size. Typical overhead is 1.5× on the regression +suite. Programs with many integer-to-pointer casts may be slower +due to the backward constraint refinement. + +## Command-Line Options + +- `--wide-pointer-encoding`: Enable the wide pointer encoding. +- `--model-stack-layout`: Place stack variables adjacently with + architecture-appropriate growth direction (requires + `--wide-pointer-encoding`). diff --git a/regression/cbmc/Malloc10/test-wide-pointer-encoding.desc b/regression/cbmc/Malloc10/test-wide-pointer-encoding.desc new file mode 100644 index 00000000000..f47ae566fb9 --- /dev/null +++ b/regression/cbmc/Malloc10/test-wide-pointer-encoding.desc @@ -0,0 +1,11 @@ +CORE no-new-smt wide-pointer-encoding +main.c + +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +-- +^warning: ignoring +-- +Wide pointer encoding: address reuse allows a malloc'd pointer to +equal a previously seen pointer (elem), triggering the assertion. diff --git a/regression/cbmc/Malloc10/test.desc b/regression/cbmc/Malloc10/test.desc index 6e8c19959c9..09997d598e3 100644 --- a/regression/cbmc/Malloc10/test.desc +++ b/regression/cbmc/Malloc10/test.desc @@ -1,4 +1,4 @@ -CORE no-new-smt +CORE no-new-smt no-wide-pointer-encoding main.c ^EXIT=0$ diff --git a/regression/cbmc/Malloc14/test-wide-pointer-encoding.desc b/regression/cbmc/Malloc14/test-wide-pointer-encoding.desc new file mode 100644 index 00000000000..a73f044c9bc --- /dev/null +++ b/regression/cbmc/Malloc14/test-wide-pointer-encoding.desc @@ -0,0 +1,13 @@ +CORE wide-pointer-encoding +main.c + +^EXIT=10$ +^SIGNAL=0$ +^\[main\.assertion\.1\] .* same memory allocated twice: FAILURE$ +^VERIFICATION FAILED$ +-- +^warning: ignoring +-- +Wide pointer encoding: two mallocs can return the same address +(address reuse after free). The assertion that they differ is +unsound. diff --git a/regression/cbmc/Malloc14/test.desc b/regression/cbmc/Malloc14/test.desc index 9c96469df12..6ea7b7e125c 100644 --- a/regression/cbmc/Malloc14/test.desc +++ b/regression/cbmc/Malloc14/test.desc @@ -1,4 +1,4 @@ -CORE +CORE no-wide-pointer-encoding main.c ^EXIT=0$ diff --git a/regression/cbmc/Pointer_Arithmetic12/test-wide-pointer-encoding.desc b/regression/cbmc/Pointer_Arithmetic12/test-wide-pointer-encoding.desc new file mode 100644 index 00000000000..8f8e1ed2a8e --- /dev/null +++ b/regression/cbmc/Pointer_Arithmetic12/test-wide-pointer-encoding.desc @@ -0,0 +1,8 @@ +CORE wide-pointer-encoding +main.i +--32 --little-endian +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring diff --git a/regression/cbmc/Pointer_array4/test-wide-pointer-encoding.desc b/regression/cbmc/Pointer_array4/test-wide-pointer-encoding.desc new file mode 100644 index 00000000000..26db336850b --- /dev/null +++ b/regression/cbmc/Pointer_array4/test-wide-pointer-encoding.desc @@ -0,0 +1,15 @@ +CORE wide-pointer-encoding +main.i +--32 +^EXIT=10$ +^SIGNAL=0$ +^\[main.overflow.1\] line 8 arithmetic overflow on signed - in iThird - iFirst: FAILURE$ +^\*\* 1 of 2 failed +^VERIFICATION FAILED$ +-- +^warning: ignoring +-- +Wide pointer encoding: addresses can be in the upper half of the +32-bit address space, so (int)pointer can be negative and the +signed subtraction iThird - iFirst can overflow. This is correct +behavior — casting pointers to signed int is implementation-defined. diff --git a/regression/cbmc/Pointer_array4/test.desc b/regression/cbmc/Pointer_array4/test.desc index 35cc9d3cd74..b368254898f 100644 --- a/regression/cbmc/Pointer_array4/test.desc +++ b/regression/cbmc/Pointer_array4/test.desc @@ -1,4 +1,4 @@ -CORE +CORE no-wide-pointer-encoding main.i --32 ^EXIT=0$ diff --git a/regression/cbmc/Pointer_array6/test-wide-pointer-encoding.desc b/regression/cbmc/Pointer_array6/test-wide-pointer-encoding.desc new file mode 100644 index 00000000000..b44f160854d --- /dev/null +++ b/regression/cbmc/Pointer_array6/test-wide-pointer-encoding.desc @@ -0,0 +1,11 @@ +KNOWNBUG wide-pointer-encoding +main.c +--no-malloc-may-fail +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +-- +Wide pointer encoding: byte-level operations on pointer arrays +see the 192-bit encoding instead of the 64-bit flat address. +Requires endianness map fix for pointer byte representation. diff --git a/regression/cbmc/Pointer_comparison3/test-wide-pointer-encoding.desc b/regression/cbmc/Pointer_comparison3/test-wide-pointer-encoding.desc new file mode 100644 index 00000000000..db88d00258d --- /dev/null +++ b/regression/cbmc/Pointer_comparison3/test-wide-pointer-encoding.desc @@ -0,0 +1,14 @@ +CORE no-new-smt wide-pointer-encoding +main.c +--no-malloc-may-fail +^\[main.assertion.5\] line 28 assertion __CPROVER_POINTER_OBJECT\(p\) == __CPROVER_POINTER_OBJECT\(p1\): FAILURE$ +^\*\* 13 of 59 failed +^VERIFICATION FAILED$ +^EXIT=10$ +^SIGNAL=0$ +-- +^warning: ignoring +-- +Wide pointer encoding: relational pointer comparisons use flat +addresses, so p < p1+1 does not imply same_object(p, p1). +Address reuse for dynamic objects adds further failures. diff --git a/regression/cbmc/Pointer_comparison3/test.desc b/regression/cbmc/Pointer_comparison3/test.desc index 16c19fbfe88..2d708484d6e 100644 --- a/regression/cbmc/Pointer_comparison3/test.desc +++ b/regression/cbmc/Pointer_comparison3/test.desc @@ -1,4 +1,4 @@ -CORE no-new-smt +CORE no-new-smt no-wide-pointer-encoding main.c --no-malloc-may-fail ^\[main.assertion.3\] line 21 always false for different objects: FAILURE$ diff --git a/regression/cbmc/Pointer_difference2/test-wide-pointer-encoding.desc b/regression/cbmc/Pointer_difference2/test-wide-pointer-encoding.desc new file mode 100644 index 00000000000..92d20ff45c3 --- /dev/null +++ b/regression/cbmc/Pointer_difference2/test-wide-pointer-encoding.desc @@ -0,0 +1,19 @@ +CORE broken-smt-backend no-new-smt wide-pointer-encoding +main.c + +^EXIT=10$ +^SIGNAL=0$ +^\[main.assertion.1\] line 6 correct: SUCCESS +^\[main.pointer.1\] line 8 same object violation in array - other_array: FAILURE$ +^\[main.assertion.3\] line 13 undefined behavior: FAILURE$ +^\[main.assertion.8\] line 28 end plus 2 is nondet: (UNKNOWN|FAILURE)$ +^\[main.pointer_arithmetic.\d+\] line 28 pointer relation: pointer outside object bounds in p: FAILURE$ +^\*\* [56] of \d+ failed +^VERIFICATION FAILED$ +-- +^warning: ignoring +^CONVERSION ERROR$ +-- +Wide pointer encoding variant: fewer overflow failures because +pointer subtraction overflow is checked on the 56-bit offset +component, not the full 64-bit packed representation. diff --git a/regression/cbmc/Pointer_difference2/test.desc b/regression/cbmc/Pointer_difference2/test.desc index 91119db20b3..4bcf81723f0 100644 --- a/regression/cbmc/Pointer_difference2/test.desc +++ b/regression/cbmc/Pointer_difference2/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend no-new-smt +CORE broken-smt-backend no-new-smt no-wide-pointer-encoding main.c ^\[main.assertion.1\] line 6 correct: SUCCESS diff --git a/regression/cbmc/address_space_size_limit1/test-wide-pointer-encoding.desc b/regression/cbmc/address_space_size_limit1/test-wide-pointer-encoding.desc new file mode 100644 index 00000000000..3bc73903977 --- /dev/null +++ b/regression/cbmc/address_space_size_limit1/test-wide-pointer-encoding.desc @@ -0,0 +1,11 @@ +KNOWNBUG thorough-paths no-new-smt wide-pointer-encoding +test.c +--no-simplify --unwind 300 --object-bits 8 +^EXIT=6$ +^SIGNAL=0$ +-- +^warning: ignoring +-- +Wide pointer encoding: the object-bits limitation is removed +(full 64-bit object width), so the "too many addressed objects" +error is never triggered. This test is not applicable. diff --git a/regression/cbmc/address_space_size_limit1/test.desc b/regression/cbmc/address_space_size_limit1/test.desc index 0794e0fe6a1..009f2076103 100644 --- a/regression/cbmc/address_space_size_limit1/test.desc +++ b/regression/cbmc/address_space_size_limit1/test.desc @@ -1,4 +1,4 @@ -CORE thorough-paths no-new-smt +CORE thorough-paths no-new-smt no-wide-pointer-encoding test.c --no-simplify --unwind 300 --object-bits 8 too many addressed objects diff --git a/regression/cbmc/address_space_size_limit3/test-wide-pointer-encoding.desc b/regression/cbmc/address_space_size_limit3/test-wide-pointer-encoding.desc new file mode 100644 index 00000000000..491bb23d6da --- /dev/null +++ b/regression/cbmc/address_space_size_limit3/test-wide-pointer-encoding.desc @@ -0,0 +1,13 @@ +CORE thorough-smt-backend no-new-smt wide-pointer-encoding +main.i +--32 --little-endian --object-bits 26 +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +^.* dereference failure: pointer outside object bounds in .*: FAILURE$ +-- +^warning: ignoring +-- +Wide pointer encoding: the object-bits limitation is removed but +the bounds violation is still detected. Slower than standard +encoding due to 3x pointer width (requires thorough tag). diff --git a/regression/cbmc/havoc_slice/test_struct_raw_bytes-wide-pointer-encoding.desc b/regression/cbmc/havoc_slice/test_struct_raw_bytes-wide-pointer-encoding.desc new file mode 100644 index 00000000000..ae2ef218386 --- /dev/null +++ b/regression/cbmc/havoc_slice/test_struct_raw_bytes-wide-pointer-encoding.desc @@ -0,0 +1,13 @@ +CORE wide-pointer-encoding broken-cprover-smt-backend no-new-smt +test_struct_raw_bytes.c + +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +^\*\* 11 of +-- +^.*expecting SUCCESS: FAILURE$ +^.*dereference .*: FAILURE$ +-- +Wide pointer encoding: same results as standard encoding for +struct byte operations. diff --git a/regression/cbmc/memory_allocation1/test.desc b/regression/cbmc/memory_allocation1/test.desc index ffa7f87988a..f13df80c0f3 100644 --- a/regression/cbmc/memory_allocation1/test.desc +++ b/regression/cbmc/memory_allocation1/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +CORE broken-smt-backend no-wide-pointer-encoding main.c ^EXIT=10$ @@ -12,3 +12,4 @@ main.c ^warning: ignoring -- This test only fails when using SMT solvers as back-end on Windows. +Broken under --wide-pointer-encoding: __CPROVER_allocated_memory has no body. Needs further work. diff --git a/regression/cbmc/mm_io1/test.desc b/regression/cbmc/mm_io1/test.desc index bfb5c9e422c..f182a321a07 100644 --- a/regression/cbmc/mm_io1/test.desc +++ b/regression/cbmc/mm_io1/test.desc @@ -1,4 +1,4 @@ -CORE +CORE no-wide-pointer-encoding main.c --no-standard-checks ^Replaced MMIO operations: 3 read\(s\), 1 write\(s\) @@ -7,3 +7,5 @@ main.c ^VERIFICATION SUCCESSFUL$ -- ^warning: ignoring +-- +Broken under --wide-pointer-encoding: memory-mapped I/O dispatch not yet supported. Needs further work. diff --git a/regression/cbmc/null8/test-wide-pointer-encoding.desc b/regression/cbmc/null8/test-wide-pointer-encoding.desc new file mode 100644 index 00000000000..dcac3cbd0d9 --- /dev/null +++ b/regression/cbmc/null8/test-wide-pointer-encoding.desc @@ -0,0 +1,13 @@ +CORE wide-pointer-encoding +main.c +--verbosity 8 +Generated 1 VCC\(s\), [01] remaining after simplification +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +Wide pointer encoding: the VCC may not be simplified away because +forward propagation of dynamic object addresses is blocked (needed +for address reuse modeling). The assertion is still proved correct. diff --git a/regression/cbmc/null8/test.desc b/regression/cbmc/null8/test.desc index 793bc205451..089300b711c 100644 --- a/regression/cbmc/null8/test.desc +++ b/regression/cbmc/null8/test.desc @@ -1,4 +1,4 @@ -CORE +CORE no-wide-pointer-encoding main.c --verbosity 8 Generated 1 VCC\(s\), 0 remaining after simplification diff --git a/regression/cbmc/points-to-sets/test_json-wide-pointer-encoding.desc b/regression/cbmc/points-to-sets/test_json-wide-pointer-encoding.desc new file mode 100644 index 00000000000..7735a9f4eac --- /dev/null +++ b/regression/cbmc/points-to-sets/test_json-wide-pointer-encoding.desc @@ -0,0 +1,11 @@ +KNOWNBUG wide-pointer-encoding +main.c +--show-points-to-sets --json-ui --no-standard-checks +activate-multi-line-match +^EXIT=0$ +^SIGNAL=0$ +-- +-- +Wide pointer encoding: the address-based dereference dispatch adds +extra entries to the Value field in the JSON output, changing the +expected format. diff --git a/regression/cbmc/points-to-sets/test_json.desc b/regression/cbmc/points-to-sets/test_json.desc index a7dec447a70..e66a7c6699c 100644 --- a/regression/cbmc/points-to-sets/test_json.desc +++ b/regression/cbmc/points-to-sets/test_json.desc @@ -1,4 +1,4 @@ -CORE +CORE no-wide-pointer-encoding main.c --show-points-to-sets --json-ui --no-standard-checks activate-multi-line-match diff --git a/regression/cbmc/r_w_ok10/test-wide-pointer-encoding.desc b/regression/cbmc/r_w_ok10/test-wide-pointer-encoding.desc new file mode 100644 index 00000000000..90e3fec3ec7 --- /dev/null +++ b/regression/cbmc/r_w_ok10/test-wide-pointer-encoding.desc @@ -0,0 +1,21 @@ +CORE broken-smt-backend no-new-smt wide-pointer-encoding +main.c +--no-malloc-may-fail +^EXIT=10$ +^SIGNAL=0$ +^\[main.pointer_primitives.\d+\] line 7 pointer invalid in R_OK\(p1, \(unsigned (long (long )?)?int\)1\): FAILURE$ +^\[main.pointer_primitives.\d+\] line 7 pointer outside object bounds in R_OK\(p1, \(unsigned (long (long )?)?int\)1\): FAILURE$ +^\[main.pointer_primitives.\d+\] line 11 pointer outside object bounds in R_OK\(p2, \(unsigned (long (long )?)?int\)1\): FAILURE$ +^\[main.pointer_primitives.\d+\] line 15 pointer outside object bounds in R_OK\(p3, \(unsigned (long (long )?)?int\)1\): FAILURE$ +^\[main.pointer_primitives.\d+\] line 20 pointer outside object bounds in R_OK\(p4, \(unsigned (long (long )?)?int\)1\): FAILURE$ +^\[main.pointer_primitives.\d+\] line 34 dead object in R_OK\(p6, \(unsigned (long (long )?)?int\)1\): FAILURE$ +^\[main.pointer_primitives.\d+\] line 40 deallocated dynamic object in R_OK\(p7, \(unsigned (long (long )?)?int\)1\): FAILURE$ +^\*\* 8 of \d+ failed +^VERIFICATION FAILED$ +-- +^warning: ignoring +^CONVERSION ERROR$ +-- +Wide pointer encoding: integer-address objects are valid objects, +so the "pointer invalid" check for p2 passes (8 failures instead +of 9). diff --git a/regression/cbmc/r_w_ok10/test.desc b/regression/cbmc/r_w_ok10/test.desc index 7fd4ebb3aa8..145ce363cf6 100644 --- a/regression/cbmc/r_w_ok10/test.desc +++ b/regression/cbmc/r_w_ok10/test.desc @@ -1,4 +1,4 @@ -CORE +CORE no-wide-pointer-encoding main.c --no-malloc-may-fail ^EXIT=10$ diff --git a/regression/cbmc/wide-pointer-encoding1/main.c b/regression/cbmc/wide-pointer-encoding1/main.c new file mode 100644 index 00000000000..97fa83f23e9 --- /dev/null +++ b/regression/cbmc/wide-pointer-encoding1/main.c @@ -0,0 +1,9 @@ +// Test for issue #8200: cast-to-pointer should not miss error +// (char*)0x55a8a2e6b007 could equal x's address +#include +int main() +{ + char *x = ""; + char *ptr = (char *)0x55a8a2e6b007; + assert(ptr != x); +} diff --git a/regression/cbmc/wide-pointer-encoding1/test.desc b/regression/cbmc/wide-pointer-encoding1/test.desc new file mode 100644 index 00000000000..b96138802eb --- /dev/null +++ b/regression/cbmc/wide-pointer-encoding1/test.desc @@ -0,0 +1,13 @@ +CORE wide-pointer-encoding +main.c +--wide-pointer-encoding +^EXIT=10$ +^SIGNAL=0$ +^\[main\.assertion\.1\] .* assertion ptr != x: FAILURE$ +^VERIFICATION FAILED$ +-- +^warning: ignoring +-- +Issue #8200: with wide pointer encoding, a constant integer cast to +a pointer can equal a string literal's address. The address-based +pointer equality detects this. diff --git a/regression/cbmc/wide-pointer-encoding2/main.c b/regression/cbmc/wide-pointer-encoding2/main.c new file mode 100644 index 00000000000..bed3a085f83 --- /dev/null +++ b/regression/cbmc/wide-pointer-encoding2/main.c @@ -0,0 +1,16 @@ +// Test for issue #8161: consistent pointer comparison +// Both assertions should fail (or both succeed) +#include +extern int nondet_int(); +int main() +{ + int m = nondet_int(); + int *n = &m; + + if((unsigned long)n >= (unsigned long)(-4095)) + assert((unsigned int)(-1 * (long)n) < 6); + + int a = -2048; + if((unsigned long)a >= (unsigned long)(-4095)) + assert((unsigned int)(-1 * (long)a) < 6); +} diff --git a/regression/cbmc/wide-pointer-encoding2/test.desc b/regression/cbmc/wide-pointer-encoding2/test.desc new file mode 100644 index 00000000000..41a1c7616b7 --- /dev/null +++ b/regression/cbmc/wide-pointer-encoding2/test.desc @@ -0,0 +1,14 @@ +CORE wide-pointer-encoding +main.c +--wide-pointer-encoding +^EXIT=10$ +^SIGNAL=0$ +^\[main\.assertion\.1\] .* assertion \(unsigned int\)\(-1 \* \(long\)n\) < 6: FAILURE$ +^\[main\.assertion\.2\] .* assertion \(unsigned int\)\(-1 \* \(long\)a\) < 6: FAILURE$ +^VERIFICATION FAILED$ +-- +^warning: ignoring +-- +Issue #8161: with wide pointer encoding, pointer-derived integers +have meaningful flat addresses, making arithmetic on them consistent +with plain integer arithmetic. Both assertions fail. diff --git a/regression/cbmc/wide-pointer-encoding3/main.c b/regression/cbmc/wide-pointer-encoding3/main.c new file mode 100644 index 00000000000..9856f45ce31 --- /dev/null +++ b/regression/cbmc/wide-pointer-encoding3/main.c @@ -0,0 +1,22 @@ +// Test for issue #8103: integer-to-pointer casting +// (void*)addr where addr is constrained to be within a known object +// should read from that object +#include +#include + +unsigned char buf[16] __attribute__((__aligned__(8))); +size_t nondet_size_t(void); +uint64_t nondet_uint64_t(void); + +void main() +{ + size_t index = nondet_size_t(); + __CPROVER_assume(index < 2); + buf[index * 8] = 42; + + uint64_t rd = nondet_uint64_t(); + __CPROVER_assume(rd == (uint64_t)buf + index * 8); + + uint8_t *p = (uint8_t *)(void *)rd; + __CPROVER_assert(*p == 42, "I2P dereference reads correct value"); +} diff --git a/regression/cbmc/wide-pointer-encoding3/test.desc b/regression/cbmc/wide-pointer-encoding3/test.desc new file mode 100644 index 00000000000..d1d2c0c7ed4 --- /dev/null +++ b/regression/cbmc/wide-pointer-encoding3/test.desc @@ -0,0 +1,13 @@ +CORE wide-pointer-encoding +main.c +--wide-pointer-encoding --no-standard-checks +^EXIT=0$ +^SIGNAL=0$ +^\[main\.assertion\.1\] .* I2P dereference reads correct value: SUCCESS$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +Issue #8103: with wide pointer encoding, (void*)addr where addr is +constrained to be within a known object correctly reads from that +object via address-based dereference dispatch. diff --git a/regression/cbmc/wide-pointer-encoding4/main.c b/regression/cbmc/wide-pointer-encoding4/main.c new file mode 100644 index 00000000000..549ad7b3f6b --- /dev/null +++ b/regression/cbmc/wide-pointer-encoding4/main.c @@ -0,0 +1,13 @@ +// Test for issue #2117: address reuse after free +#include +void main() +{ + int *x = malloc(sizeof(int)); + free(x); + int *y = malloc(sizeof(int)); + if(x == y) + { + __CPROVER_assert(0, "reachable: malloc returned same address after free"); + } + free(y); +} diff --git a/regression/cbmc/wide-pointer-encoding4/test.desc b/regression/cbmc/wide-pointer-encoding4/test.desc new file mode 100644 index 00000000000..6d5845febf8 --- /dev/null +++ b/regression/cbmc/wide-pointer-encoding4/test.desc @@ -0,0 +1,13 @@ +CORE wide-pointer-encoding +main.c +--wide-pointer-encoding --no-standard-checks +^EXIT=10$ +^SIGNAL=0$ +^\[main\.assertion\.1\] .* reachable: malloc returned same address after free: FAILURE$ +^VERIFICATION FAILED$ +-- +^warning: ignoring +-- +Issue #2117: with wide pointer encoding, malloc can return the same +address after free. The address-based pointer equality and the +relaxed non-overlapping constraint for dynamic objects enable this. diff --git a/regression/cbmc/wide-pointer-encoding5/main.c b/regression/cbmc/wide-pointer-encoding5/main.c new file mode 100644 index 00000000000..5d0773ccd71 --- /dev/null +++ b/regression/cbmc/wide-pointer-encoding5/main.c @@ -0,0 +1,27 @@ +// Test for --model-stack-layout: stack growth direction, +// adjacent placement, and buffer overflow modeling +#include +void main() +{ + int a = 1; + int b = 2; + int c = 3; + + // Stack growth direction + __CPROVER_assert( + (uint64_t)&a > (uint64_t)&b, + "a has higher address than b"); + __CPROVER_assert( + (uint64_t)&b > (uint64_t)&c, + "b has higher address than c"); + + // Adjacent placement + __CPROVER_assert( + (uint64_t)&a - (uint64_t)&b == sizeof(int), + "a and b are adjacent"); + + // Buffer overflow: writing past a hits b + char *p = (char *)&a; + p[sizeof(int)] = 42; + __CPROVER_assert(*(char *)&b == 42, "overflow from a lands on b"); +} diff --git a/regression/cbmc/wide-pointer-encoding5/test.desc b/regression/cbmc/wide-pointer-encoding5/test.desc new file mode 100644 index 00000000000..75f7fe40a1a --- /dev/null +++ b/regression/cbmc/wide-pointer-encoding5/test.desc @@ -0,0 +1,12 @@ +CORE wide-pointer-encoding +main.c +--wide-pointer-encoding --model-stack-layout --no-standard-checks +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring +-- +Tests --model-stack-layout: downward stack growth direction, +adjacent variable placement, and buffer overflow into adjacent +stack variable. diff --git a/src/goto-checker/bmc_util.h b/src/goto-checker/bmc_util.h index c604856d7ba..ad0be851af4 100644 --- a/src/goto-checker/bmc_util.h +++ b/src/goto-checker/bmc_util.h @@ -171,6 +171,8 @@ void run_property_decider( "(show-goto-symex-steps)" \ "(show-points-to-sets)" \ "(slice-formula)" \ + "(wide-pointer-encoding)" \ + "(model-stack-layout)" \ "(unwinding-assertions)" \ "(no-unwinding-assertions)" \ "(no-self-loops-to-assumptions)" \ @@ -233,6 +235,11 @@ void run_property_decider( " {y--graphml-witness} {ufilename} \t write the witness in GraphML format " \ "to {ufilename}\n" \ " {y--symex-cache-dereferences} \t enable caching of repeated " \ - "dereferences\n" + "dereferences\n" \ + " {y--wide-pointer-encoding} \t use flat addresses for pointer " \ + "comparison and integer-to-pointer casts\n" \ + " {y--model-stack-layout} \t place stack variables adjacently with " \ + "architecture-appropriate growth direction (requires " \ + "{y--wide-pointer-encoding})\n" #endif // CPROVER_GOTO_CHECKER_BMC_UTIL_H diff --git a/src/goto-checker/solver_factory.cpp b/src/goto-checker/solver_factory.cpp index 18dac69c636..ba78551d6c0 100644 --- a/src/goto-checker/solver_factory.cpp +++ b/src/goto-checker/solver_factory.cpp @@ -12,6 +12,7 @@ Author: Daniel Kroening, Peter Schrammel #include "solver_factory.h" #include +#include #include #include #include @@ -365,10 +366,33 @@ std::unique_ptr solver_factoryt::get_default() auto bv_pointers = std::make_unique( ns, *sat_solver, message_handler, get_array_constraints); + if(options.get_bool_option("wide-pointer-encoding")) + { + bv_pointers->wide_pointer_encoding = true; + bv_pointers->set_pointer_width_multiplier(3); + config.bv_encoding.malloc_may_alias = true; + } + + if(options.get_bool_option("model-stack-layout")) + bv_pointers->model_stack_layout = true; + if(options.get_option("arrays-uf") == "never") bv_pointers->unbounded_array = bv_pointerst::unbounded_arrayt::U_NONE; else if(options.get_option("arrays-uf") == "always") + { bv_pointers->unbounded_array = bv_pointerst::unbounded_arrayt::U_ALL; +#ifdef SATCHECK_MINISAT2 + // Ackermann-style array handling generates many incremental SAT + // calls. MiniSat's simplifier can eliminate variables between + // these calls, making later UNSAT proofs extremely expensive. + if( + auto *minisat_simp = + dynamic_cast(sat_solver.get())) + { + minisat_simp->set_limit_incremental_simplification(); + } +#endif + } set_decision_procedure_time_limit(*bv_pointers); @@ -815,4 +839,9 @@ void parse_solver_options(const cmdlinet &cmdline, optionst &options) options.set_option( "max-node-refinement", cmdline.get_value("max-node-refinement")); } + + if(cmdline.isset("wide-pointer-encoding")) + options.set_option("wide-pointer-encoding", true); + if(cmdline.isset("model-stack-layout")) + options.set_option("model-stack-layout", true); } diff --git a/src/goto-symex/goto_symex_can_forward_propagate.h b/src/goto-symex/goto_symex_can_forward_propagate.h index dcd6eb63c71..dcc8717a76b 100644 --- a/src/goto-symex/goto_symex_can_forward_propagate.h +++ b/src/goto-symex/goto_symex_can_forward_propagate.h @@ -12,8 +12,10 @@ Author: Michael Tautschig, tautschn@amazon.com #ifndef CPROVER_GOTO_SYMEX_GOTO_SYMEX_CAN_FORWARD_PROPAGATE_H #define CPROVER_GOTO_SYMEX_GOTO_SYMEX_CAN_FORWARD_PROPAGATE_H +#include #include #include +#include class goto_symex_can_forward_propagatet : public can_forward_propagatet { @@ -26,6 +28,27 @@ class goto_symex_can_forward_propagatet : public can_forward_propagatet protected: bool is_constant(const exprt &expr) const override { + if(config.bv_encoding.malloc_may_alias && expr.id() == ID_address_of) + { + const exprt &obj = to_address_of_expr(expr).object(); + if(obj.id() == ID_dynamic_object) + return false; + if(obj.id() == ID_index) + { + const auto &arr = to_index_expr(obj).array(); + if( + arr.id() == ID_symbol && + id2string(to_symbol_expr(arr).get_identifier()) + .find("dynamic_object") != std::string::npos) + return false; + } + if( + obj.id() == ID_symbol && + id2string(to_symbol_expr(obj).get_identifier()) + .find("dynamic_object") != std::string::npos) + return false; + } + if(expr.id() == ID_mult) { bool found_non_constant = false; diff --git a/src/goto-symex/simplify_expr_with_value_set.cpp b/src/goto-symex/simplify_expr_with_value_set.cpp index 24a519f6364..8511c165afb 100644 --- a/src/goto-symex/simplify_expr_with_value_set.cpp +++ b/src/goto-symex/simplify_expr_with_value_set.cpp @@ -8,8 +8,10 @@ Author: Michael Tautschnig #include "simplify_expr_with_value_set.h" +#include #include #include +#include #include #include @@ -109,6 +111,39 @@ static std::optional try_evaluate_pointer_comparison( { // The symbol cannot possibly have the value \p other_operand because it // isn't in the symbol's value-set + if(config.bv_encoding.malloc_may_alias) + { + // Dynamic objects may share addresses after free/realloc. + // Don't conclude inequality if both sides involve dynamic objects. + bool other_is_dynamic = false; + const exprt &other_root = skip_typecast(other_operand); + if( + other_root.id() == ID_address_of && + to_address_of_expr(other_root).object().id() == ID_dynamic_object) + other_is_dynamic = true; + if( + constant_expr && !constant_expr->is_null_pointer() && + other_operand.type().id() == ID_pointer) + other_is_dynamic = true; // constant pointer, likely from malloc + + if(other_is_dynamic) + { + for(const auto &vs_elem : value_set_elements) + { + if(vs_elem.id() != ID_object_descriptor) + continue; + const exprt &root = to_object_descriptor_expr(vs_elem).root_object(); + if( + root.id() == ID_dynamic_object || + (root.id() == ID_symbol && + id2string(to_symbol_expr(root).get_identifier()) + .find(SYMEX_DYNAMIC_PREFIX) != std::string::npos)) + { + return {}; + } + } + } + } return operation == ID_equal ? static_cast(false_exprt{}) : static_cast(true_exprt{}); } diff --git a/src/goto-symex/symex_clean_expr.cpp b/src/goto-symex/symex_clean_expr.cpp index f065dbda360..ccb32991b7f 100644 --- a/src/goto-symex/symex_clean_expr.cpp +++ b/src/goto-symex/symex_clean_expr.cpp @@ -129,7 +129,8 @@ void goto_symext::process_array_expr(statet &state, exprt &expr) symex_dereference_state, language_mode, false, - log.get_message_handler()); + log.get_message_handler(), + symex_config.wide_pointer_encoding); expr = dereference.dereference(expr, symex_config.show_points_to_sets); lift_lets(state, expr); diff --git a/src/goto-symex/symex_config.h b/src/goto-symex/symex_config.h index d9f65c46568..ede2747d874 100644 --- a/src/goto-symex/symex_config.h +++ b/src/goto-symex/symex_config.h @@ -61,6 +61,11 @@ struct symex_configt final /// Used in goto_symext::dereference_rec bool cache_dereferences; + /// \brief Whether the wide pointer encoding is enabled. + /// When true, integer-to-pointer dereferences use address-based + /// dispatch over known objects instead of __CPROVER_memory. + bool wide_pointer_encoding; + /// \brief Construct a symex_configt using options specified in an /// \ref optionst explicit symex_configt(const optionst &options); diff --git a/src/goto-symex/symex_dereference.cpp b/src/goto-symex/symex_dereference.cpp index 6948502b67e..50415e4383a 100644 --- a/src/goto-symex/symex_dereference.cpp +++ b/src/goto-symex/symex_dereference.cpp @@ -330,7 +330,8 @@ void goto_symext::dereference_rec( symex_dereference_state, language_mode, expr_is_not_null, - log.get_message_handler()); + log.get_message_handler(), + symex_config.wide_pointer_encoding); // std::cout << "**** " << format(tmp1) << '\n'; exprt tmp2 = diff --git a/src/goto-symex/symex_dereference_state.cpp b/src/goto-symex/symex_dereference_state.cpp index 835256d91f2..3db19e0af52 100644 --- a/src/goto-symex/symex_dereference_state.cpp +++ b/src/goto-symex/symex_dereference_state.cpp @@ -80,3 +80,11 @@ symex_dereference_statet::get_value_set(const exprt &expr) const { return state.value_set.get_value_set(expr, ns); } + +exprt symex_dereference_statet::get_renamed_symbol(const exprt &expr) const +{ + // Use L1 renaming to get the SSA symbol without constant propagation. + // L2 renaming would substitute the symbol with its current value, + // which causes width mismatches in byte_extract. + return state.rename(expr, ns).get(); +} diff --git a/src/goto-symex/symex_dereference_state.h b/src/goto-symex/symex_dereference_state.h index 3790da6989e..45f9ddc2c55 100644 --- a/src/goto-symex/symex_dereference_state.h +++ b/src/goto-symex/symex_dereference_state.h @@ -37,6 +37,8 @@ class symex_dereference_statet: std::vector get_value_set(const exprt &expr) const override; const symbolt *get_or_create_failed_symbol(const exprt &expr) override; + + exprt get_renamed_symbol(const exprt &expr) const override; }; #endif // CPROVER_GOTO_SYMEX_SYMEX_DEREFERENCE_STATE_H diff --git a/src/goto-symex/symex_main.cpp b/src/goto-symex/symex_main.cpp index a6ca641f16e..c6f0c52f9f0 100644 --- a/src/goto-symex/symex_main.cpp +++ b/src/goto-symex/symex_main.cpp @@ -42,15 +42,14 @@ symex_configt::symex_configt(const optionst &options) show_symex_steps(options.get_bool_option("show-goto-symex-steps")), show_points_to_sets(options.get_bool_option("show-points-to-sets")), max_field_sensitivity_array_size( - options.is_set("no-array-field-sensitivity") - ? 0 - : options.is_set("max-field-sensitivity-array-size") - ? options.get_unsigned_int_option( - "max-field-sensitivity-array-size") - : DEFAULT_MAX_FIELD_SENSITIVITY_ARRAY_SIZE), + options.is_set("no-array-field-sensitivity") ? 0 + : options.is_set("max-field-sensitivity-array-size") + ? options.get_unsigned_int_option("max-field-sensitivity-array-size") + : DEFAULT_MAX_FIELD_SENSITIVITY_ARRAY_SIZE), complexity_limits_active( options.get_signed_int_option("symex-complexity-limit") > 0), - cache_dereferences{options.get_bool_option("symex-cache-dereferences")} + cache_dereferences{options.get_bool_option("symex-cache-dereferences")}, + wide_pointer_encoding{options.get_bool_option("wide-pointer-encoding")} { } diff --git a/src/pointer-analysis/dereference_callback.h b/src/pointer-analysis/dereference_callback.h index d010612fcb0..86eaa1d8bb4 100644 --- a/src/pointer-analysis/dereference_callback.h +++ b/src/pointer-analysis/dereference_callback.h @@ -32,6 +32,15 @@ class dereference_callbackt virtual std::vector get_value_set(const exprt &expr) const = 0; virtual const symbolt *get_or_create_failed_symbol(const exprt &expr) = 0; + + /// Get the current (L2-renamed) version of a symbol expression. + /// Used by the wide pointer encoding's address-based dereference + /// dispatch to read from the correct SSA version of an object. + /// Default implementation returns the expression unchanged. + virtual exprt get_renamed_symbol(const exprt &expr) const + { + return expr; + } }; #endif // CPROVER_POINTER_ANALYSIS_DEREFERENCE_CALLBACK_H diff --git a/src/pointer-analysis/value_set_dereference.cpp b/src/pointer-analysis/value_set_dereference.cpp index f9a99530a6c..536eb220297 100644 --- a/src/pointer-analysis/value_set_dereference.cpp +++ b/src/pointer-analysis/value_set_dereference.cpp @@ -33,6 +33,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include "dereference_callback.h" @@ -251,6 +252,74 @@ exprt value_set_dereferencet::handle_dereference_base_case( }) .collect>(); + // Wide pointer encoding: when the value set contains integer_address, + // add address-based dispatch entries for all addressable objects. + if(wide_pointer_encoding) + { + bool has_integer_address = std::any_of( + retained_values.begin(), + retained_values.end(), + [](const exprt &v) + { + return v.id() == ID_object_descriptor && + to_object_descriptor_expr(v).object().id() == ID_integer_address; + }); + + // Only add dispatch entries when integer_address is the ONLY + // target. If the value set has concrete targets, the regular + // dereference path handles them correctly. + bool has_concrete_target = std::any_of( + retained_values.begin(), + retained_values.end(), + [](const exprt &v) + { + return v.id() == ID_object_descriptor && + to_object_descriptor_expr(v).object().id() != ID_integer_address; + }); + + if(has_integer_address && !has_concrete_target) + { + const auto &pointer_type = to_pointer_type(pointer.type()); + const auto addr_type = unsignedbv_typet{pointer_type.get_width()}; + const exprt flat_addr = + typecast_exprt::conditional_cast(compare_against_pointer, addr_type); + + for(const auto &sym_pair : ns.get_symbol_table().symbols) + { + const symbolt &sym = sym_pair.second; + if(sym.is_type || !sym.is_lvalue || sym.type.id() == ID_code) + continue; + // Skip types that contain pointers — the byte_extract from + // pointer-containing types causes width mismatches in the + // solver. The backward constraint refinement handles these + // types correctly without the dispatch. + if(has_subtype( + sym.type, [](const typet &t) { return t.id() == ID_pointer; }, ns)) + continue; + auto size_opt = pointer_offset_size(sym.type, ns); + if(!size_opt.has_value() || *size_opt <= 0) + continue; + + const exprt base_addr = + object_base_address_exprt{address_of_exprt{sym.symbol_expr()}}; + const exprt ge = binary_relation_exprt{flat_addr, ID_ge, base_addr}; + const exprt end_addr = + plus_exprt{base_addr, from_integer(*size_opt, addr_type)}; + const exprt lt = binary_relation_exprt{flat_addr, ID_lt, end_addr}; + + // Use the SSA-renamed symbol as the byte_extract source + const exprt renamed = + dereference_callback.get_renamed_symbol(sym.symbol_expr()); + + valuet entry; + entry.pointer_guard = and_exprt{ge, lt}; + entry.value = + make_byte_extract(renamed, minus_exprt{flat_addr, base_addr}, type); + values.push_back(std::move(entry)); + } + } + } + const bool may_fail = values.empty() || std::any_of(values.begin(), values.end(), [](const valuet &value) { diff --git a/src/pointer-analysis/value_set_dereference.h b/src/pointer-analysis/value_set_dereference.h index 96552e66af9..a3a244e9d30 100644 --- a/src/pointer-analysis/value_set_dereference.h +++ b/src/pointer-analysis/value_set_dereference.h @@ -37,13 +37,15 @@ class value_set_dereferencet final dereference_callbackt &_dereference_callback, const irep_idt _language_mode, bool _exclude_null_derefs, - message_handlert &_message_handler) + message_handlert &_message_handler, + bool _wide_pointer_encoding = false) : ns(_ns), new_symbol_table(_new_symbol_table), dereference_callback(_dereference_callback), language_mode(_language_mode), exclude_null_derefs(_exclude_null_derefs), - message_handler(_message_handler) + message_handler(_message_handler), + wide_pointer_encoding(_wide_pointer_encoding) { } virtual ~value_set_dereferencet() { } @@ -106,6 +108,7 @@ class value_set_dereferencet final /// disregard an apparent attempt to dereference NULL const bool exclude_null_derefs; message_handlert &message_handler; + bool wide_pointer_encoding; valuet get_failure_value(const exprt &pointer, const typet &type); exprt handle_dereference_base_case( const exprt &pointer, diff --git a/src/solvers/flattening/boolbv.cpp b/src/solvers/flattening/boolbv.cpp index 68194f9d98f..d543e68d2f5 100644 --- a/src/solvers/flattening/boolbv.cpp +++ b/src/solvers/flattening/boolbv.cpp @@ -63,7 +63,10 @@ const bvt &boolbvt::convert_bv( !expected_width || cache_entry.size() == *expected_width, "bitvector width shall match the indicated expected width", expr.find_source_location(), - irep_pretty_diagnosticst(expr)); + irep_pretty_diagnosticst(expr), + "expected " + (expected_width ? std::to_string(*expected_width) : "none") + + " got " + std::to_string(cache_entry.size()) + " type " + + expr.type().id_string()); // check for(const auto &literal : cache_entry) diff --git a/src/solvers/flattening/boolbv_width.cpp b/src/solvers/flattening/boolbv_width.cpp index 998312fd37e..c075baaf960 100644 --- a/src/solvers/flattening/boolbv_width.cpp +++ b/src/solvers/flattening/boolbv_width.cpp @@ -179,8 +179,9 @@ const boolbv_widtht::entryt &boolbv_widtht::get_entry(const typet &type) const } else if(type_id==ID_pointer) { - cache_entry = - defined_entryt{type_checked_cast(type).get_width()}; + cache_entry = defined_entryt{ + type_checked_cast(type).get_width() * + pointer_width_multiplier}; } else if(type_id==ID_struct_tag) { diff --git a/src/solvers/flattening/boolbv_width.h b/src/solvers/flattening/boolbv_width.h index d6e66f75c09..c36429c4708 100644 --- a/src/solvers/flattening/boolbv_width.h +++ b/src/solvers/flattening/boolbv_width.h @@ -21,6 +21,19 @@ class boolbv_widtht explicit boolbv_widtht(const namespacet &_ns); virtual ~boolbv_widtht() = default; + /// Set a multiplier for pointer widths (e.g., 3 for wide encoding). + /// Must be called before any width queries. Clears the cache. + void set_pointer_width_multiplier(std::size_t m) + { + pointer_width_multiplier = m; + cache.clear(); + } + + std::size_t get_pointer_width_multiplier() const + { + return pointer_width_multiplier; + } + virtual std::size_t operator()(const typet &type) const { const auto &entry_opt = get_entry(type); @@ -46,6 +59,7 @@ class boolbv_widtht protected: const namespacet &ns; + std::size_t pointer_width_multiplier = 1; struct defined_entryt { diff --git a/src/solvers/flattening/bv_pointers.cpp b/src/solvers/flattening/bv_pointers.cpp index b52c0da27f9..66777365e1a 100644 --- a/src/solvers/flattening/bv_pointers.cpp +++ b/src/solvers/flattening/bv_pointers.cpp @@ -9,10 +9,12 @@ Author: Daniel Kroening, kroening@kroening.com #include "bv_pointers.h" #include +#include #include #include #include #include +#include #include #include #include @@ -53,6 +55,30 @@ void bv_endianness_mapt::build_little_endian(const typet &src) if(!width_opt.has_value()) return; + if(src.id() == ID_pointer && boolbv_width.get_pointer_width_multiplier() > 1) + { + // Wide pointer encoding: the byte-visible representation is + // the address component only. Rearrange so that the first + // platform_width bits come from the address component (which + // is the last component in [object|offset|address]). + const std::size_t total = *width_opt; + const std::size_t platform_width = + total / boolbv_width.get_pointer_width_multiplier(); + const std::size_t addr_start = total - platform_width; + + const std::size_t base = map.size(); + map.reserve(base + total); + + // First: address bits (byte-visible) + for(std::size_t i = 0; i < platform_width; ++i) + map.push_back(base + addr_start + i); + // Then: object and offset bits (not byte-visible, but must + // be present to match the bitvector size) + for(std::size_t i = 0; i < addr_start; ++i) + map.push_back(base + i); + return; + } + const std::size_t new_size = map.size() + *width_opt; map.reserve(new_size); @@ -68,20 +94,81 @@ void bv_endianness_mapt::build_big_endian(const typet &src) endianness_mapt::build_big_endian(src); } +/// Check if a type is an array that (recursively) contains pointer elements. + endianness_mapt bv_pointerst::endianness_map(const typet &type, bool little_endian) const { + if(wide_pointer_encoding) + { + const std::size_t bbw = boolbv_width(type); + const std::size_t bw = bv_width.get_width_opt(type).value_or(0); + if(bbw != bw) + { + endianness_mapt m(ns); + m.build(unsignedbv_typet{bbw}, little_endian); + return m; + } + } return bv_endianness_mapt{type, little_endian, ns, bv_width}; } -std::size_t bv_pointerst::get_object_width(const pointer_typet &) const +std::size_t bv_pointerst::boolbv_width(const typet &type) const +{ + if(!wide_pointer_encoding) + return bv_width(type); + + // For structs containing pointer arrays, compute recursively + if(type.id() == ID_struct || type.id() == ID_struct_tag) + { + const auto &st = type.id() == ID_struct_tag + ? ns.follow_tag(to_struct_tag_type(type)) + : to_struct_type(type); + std::size_t total = 0; + bool differs = false; + for(const auto &comp : st.components()) + { + const std::size_t w = boolbv_width(comp.type()); + total += w; + if(w != bv_width.get_width_opt(comp.type()).value_or(0)) + differs = true; + } + if(differs) + return total; + } + + if(type.id() == ID_union || type.id() == ID_union_tag) + { + const auto &ut = type.id() == ID_union_tag + ? ns.follow_tag(to_union_tag_type(type)) + : to_union_type(type); + std::size_t max_w = 0; + bool differs = false; + for(const auto &comp : ut.components()) + { + const std::size_t w = boolbv_width(comp.type()); + max_w = std::max(max_w, w); + if(w != bv_width.get_width_opt(comp.type()).value_or(0)) + differs = true; + } + if(differs) + return max_w; + } + + return bv_width(type); +} + +std::size_t bv_pointerst::get_object_width(const pointer_typet &type) const { - // not actually type-dependent for now + if(wide_pointer_encoding) + return type.get_width(); return config.bv_encoding.object_bits; } std::size_t bv_pointerst::get_offset_width(const pointer_typet &type) const { + if(wide_pointer_encoding) + return type.get_width(); const std::size_t pointer_width = type.get_width(); const std::size_t object_width = get_object_width(type); PRECONDITION(pointer_width >= object_width); @@ -90,7 +177,9 @@ std::size_t bv_pointerst::get_offset_width(const pointer_typet &type) const std::size_t bv_pointerst::get_address_width(const pointer_typet &type) const { - return type.get_width(); + if(wide_pointer_encoding) + return type.get_width(); + return 0; } bvt bv_pointerst::object_literals(const bvt &bv, const pointer_typet &type) @@ -113,6 +202,37 @@ bvt bv_pointerst::offset_literals(const bvt &bv, const pointer_typet &type) return bvt(bv.begin(), bv.begin() + offset_width); } +bvt bv_pointerst::address_literals(const bvt &bv, const pointer_typet &type) + const +{ + const std::size_t addr_width = get_address_width(type); + if(addr_width == 0) + return {}; + const std::size_t offset_width = get_offset_width(type); + const std::size_t object_width = get_object_width(type); + const std::size_t start = offset_width + object_width; + PRECONDITION(bv.size() >= start + addr_width); + return bvt(bv.begin() + start, bv.begin() + start + addr_width); +} + +bvt bv_pointerst::get_object_base_address( + const mp_integer &object, + std::size_t width) const +{ + auto it = object_base_address.find(object); + if(it != object_base_address.end()) + return it->second; + bvt base = prop.new_variables(width); + object_base_address[object] = base; + if(wide_pointer_encoding) + for(const auto &l : base) + if(!l.is_constant()) + { + prop.set_frozen(l); + } + return base; +} + bvt bv_pointerst::object_offset_encoding(const bvt &object, const bvt &offset) { bvt result; @@ -123,6 +243,37 @@ bvt bv_pointerst::object_offset_encoding(const bvt &object, const bvt &offset) return result; } +bvt bv_pointerst::object_offset_encoding( + const bvt &object, + const bvt &offset, + const bvt &address) +{ + bvt result; + result.reserve(offset.size() + object.size() + address.size()); + result.insert(result.end(), offset.begin(), offset.end()); + result.insert(result.end(), object.begin(), object.end()); + result.insert(result.end(), address.begin(), address.end()); + + return result; +} + +literalt bv_pointerst::convert_equality(const equal_exprt &expr) +{ + if(wide_pointer_encoding && expr.lhs().type().id() == ID_pointer) + { + // Compare pointers by their flat address, not by the full + // bitvector (which includes object/offset encoding bits). + // Two pointers are equal iff they point to the same address. + const bvt &lhs = convert_bv(expr.lhs()); + const bvt &rhs = convert_bv(expr.rhs()); + const auto &type = to_pointer_type(expr.lhs().type()); + bvt lhs_addr = address_literals(lhs, type); + bvt rhs_addr = address_literals(rhs, type); + return bv_utils.equal(lhs_addr, rhs_addr); + } + return SUB::convert_equality(expr); +} + literalt bv_pointerst::convert_rest(const exprt &expr) { PRECONDITION(expr.is_boolean()); @@ -181,6 +332,18 @@ literalt bv_pointerst::convert_rest(const exprt &expr) const bvt &bv0=convert_bv(operands[0]); const bvt &bv1=convert_bv(operands[1]); + if(wide_pointer_encoding) + { + // Compare by flat address — this is well-defined even + // for pointers to distinct objects. + const pointer_typet &type0 = to_pointer_type(operands[0].type()); + const pointer_typet &type1 = to_pointer_type(operands[1].type()); + bvt addr0 = address_literals(bv0, type0); + bvt addr1 = address_literals(bv1, type1); + return bv_utils.rel( + addr0, expr.id(), addr1, bv_utilst::representationt::UNSIGNED); + } + const pointer_typet &type0 = to_pointer_type(operands[0].type()); bvt offset_bv0 = offset_literals(bv0, type0); @@ -221,9 +384,46 @@ literalt bv_pointerst::convert_rest(const exprt &expr) return convert(simplify_expr(prophecy_pointer_in_range->lower(ns), ns)); } + if(wide_pointer_encoding && expr_try_dynamic_cast(expr)) + { + const auto &minus_ov = to_binary_overflow_expr(expr); + if( + minus_ov.lhs().type().id() == ID_pointer && + minus_ov.rhs().type().id() == ID_pointer) + { + // For wide pointers, check overflow on the offset bits only, + // not the full 128-bit bitvector. + const pointer_typet &pt = to_pointer_type(minus_ov.lhs().type()); + bvt lhs_off = offset_literals(convert_bv(minus_ov.lhs()), pt); + bvt rhs_off = offset_literals(convert_bv(minus_ov.rhs()), pt); + return bv_utils.overflow_sub( + lhs_off, rhs_off, bv_utilst::representationt::SIGNED); + } + } + return SUB::convert_rest(expr); } +bool bv_pointerst::boolbv_set_equality_to_true(const equal_exprt &expr) +{ + if( + wide_pointer_encoding && expr.lhs().type().id() == ID_pointer && + expr.rhs().type().id() == ID_pointer) + { + // Force bitvector identity for pointer equalities used as + // constraints. This ensures the solver assigns the same + // pointer encoding to both sides, which is needed for model + // extraction and prevents nondeterministic I2P objects from + // creating spurious counterexamples. + const bvt &lhs_bv = convert_bv(expr.lhs()); + const bvt &rhs_bv = convert_bv(expr.rhs()); + for(std::size_t i = 0; i < lhs_bv.size() && i < rhs_bv.size(); ++i) + prop.set_equal(lhs_bv[i], rhs_bv[i]); + return false; + } + return SUB::boolbv_set_equality_to_true(expr); +} + bv_pointerst::bv_pointerst( const namespacet &_ns, propt &_prop, @@ -403,10 +603,92 @@ bvt bv_pointerst::convert_pointer_type(const exprt &expr) op_type.id() == ID_c_enum || op_type.id() == ID_c_enum_tag) { // Cast from a bitvector type to pointer. - // We just do a zero extension. - const bvt &op_bv=convert_bv(op); + if(wide_pointer_encoding) + { + // The integer value is the flat address. + // Reconstruct object and offset using backward constraints. + const std::size_t object_bits = get_object_width(type); + const std::size_t offset_bits = get_offset_width(type); + const std::size_t addr_bits = get_address_width(type); + bvt addr_bv = bv_utils.zero_extension(op_bv, addr_bits); + + // Constant 0 => NULL + if(bv_utils.is_zero(addr_bv).is_true()) + return encode(pointer_logic.get_null_object(), type); + + // Non-zero constant => dedicated integer-address object + mp_integer int_val = 0; + bool is_const = true; + for(std::size_t i = 0; i < addr_bv.size(); ++i) + { + if(addr_bv[i].is_true()) + int_val += power(2, i); + else if(!addr_bv[i].is_false()) + { + is_const = false; + break; + } + } + if(is_const) + { + const auto int_addr_obj = pointer_logic.add_object(constant_exprt( + integer2bvrep(int_val, addr_bits), unsignedbv_typet(addr_bits))); + bvt result = encode(int_addr_obj, type); + integer_address_objects.insert(int_addr_obj); + // Set the base address to the constant value + bvt base = get_object_base_address(int_addr_obj, addr_bits); + bvt val_bv = bv_utils.build_constant(int_val, addr_bits); + for(std::size_t i = 0; i < addr_bits; ++i) + prop.set_equal(base[i], val_bv[i]); + return result; + } + + bvt obj_bv = prop.new_variables(object_bits); + bvt off_bv = prop.new_variables(offset_bits); + + // Non-zero address => not null + bvt null_obj = + bv_utils.build_constant(pointer_logic.get_null_object(), object_bits); + prop.l_set_to_true(prop.lor( + bv_utils.is_zero(addr_bv), !bv_utils.equal(obj_bv, null_obj))); + + // Add forward constraints for currently known objects. + // Backward constraints and forward constraints for objects + // added later are deferred to finish_eager_conversion. + const auto &objects = pointer_logic.objects; + std::size_t number = 0; + for(auto it = objects.cbegin(); it != objects.cend(); ++it, ++number) + { + bvt obj_const = bv_utils.build_constant(number, object_bits); + literalt is_this = bv_utils.equal(obj_bv, obj_const); + + // Forward: obj==i => base[i]+offset==address + bvt base = get_object_base_address(number, addr_bits); + bvt off_ext = bv_utils.zero_extension(off_bv, addr_bits); + bvt flat = bv_utils.add(base, off_ext); + for(std::size_t k = 0; k < addr_bits; ++k) + { + prop.lcnf({!is_this, !flat[k], addr_bv[k]}); + prop.lcnf({!is_this, flat[k], !addr_bv[k]}); + } + } + + pending_i2p.push_back({obj_bv, off_bv, addr_bv, number, true}); + for(const auto &l : obj_bv) + if(!l.is_constant()) + prop.set_frozen(l); + for(const auto &l : off_bv) + if(!l.is_constant()) + prop.set_frozen(l); + for(const auto &l : addr_bv) + if(!l.is_constant()) + prop.set_frozen(l); + + return object_offset_encoding(obj_bv, off_bv, addr_bv); + } + return bv_utils.zero_extension(op_bv, bits); } } @@ -416,11 +698,11 @@ bvt bv_pointerst::convert_pointer_type(const exprt &expr) } else if(expr.id()==ID_index) { - return SUB::convert_index(to_index_expr(expr)); + return convert_index(to_index_expr(expr)); } else if(expr.id()==ID_member) { - return SUB::convert_member(to_member_expr(expr)); + return convert_member(to_member_expr(expr)); } else if(expr.id()==ID_address_of) { @@ -545,13 +827,13 @@ bvt bv_pointerst::convert_pointer_type(const exprt &expr) else if(expr.id()==ID_byte_extract_little_endian || expr.id()==ID_byte_extract_big_endian) { - return SUB::convert_byte_extract(to_byte_extract_expr(expr)); + return convert_byte_extract(to_byte_extract_expr(expr)); } else if( expr.id() == ID_byte_update_little_endian || expr.id() == ID_byte_update_big_endian) { - return SUB::convert_byte_update(to_byte_update_expr(expr)); + return convert_byte_update(to_byte_update_expr(expr)); } else if(expr.id() == ID_field_address) { @@ -618,6 +900,253 @@ static bool is_pointer_subtraction(const exprt &expr) minus_expr.rhs().type().id() == ID_pointer; } +bvt bv_pointerst::convert_array(const exprt &expr) +{ + return SUB::convert_array(expr); +} + +bvt bv_pointerst::convert_index(const index_exprt &expr) +{ + return SUB::convert_index(expr); +} + +bvt bv_pointerst::convert_with(const with_exprt &expr) +{ + return SUB::convert_with(expr); +} + +bvt bv_pointerst::convert_byte_extract(const byte_extract_exprt &expr) +{ + if(!wide_pointer_encoding) + return SUB::convert_byte_extract(expr); + + if(expr.op().type().id() == ID_pointer) + { + const auto &ptr_type = to_pointer_type(expr.op().type()); + const std::size_t pw = ptr_type.get_width(); + byte_extract_exprt addr_extract( + expr.id(), + typecast_exprt(expr.op(), unsignedbv_typet{pw}), + expr.offset(), + expr.get_bits_per_byte(), + expr.type()); + return SUB::convert_byte_extract(addr_extract); + } + + // Source is a pointer array: extract addresses, byte_extract from those + if( + expr.op().type().id() == ID_array && + to_array_type(expr.op().type()).element_type().id() == ID_pointer && + expr.type().id() == ID_pointer) + { + const auto &arr_type = to_array_type(expr.op().type()); + const auto &ptr_type = to_pointer_type(arr_type.element_type()); + const std::size_t pw = ptr_type.get_width(); + const auto sz = numeric_cast(arr_type.size()); + + if(sz.has_value() && *sz > 0) + { + const bvt &arr_bv = convert_bv(expr.op()); + const std::size_t enc_w = boolbv_width(ptr_type); + + // Extract address (last pw bits) from each element + bvt addr_arr; + for(mp_integer i = 0; i < *sz; ++i) + { + std::size_t base = numeric_cast_v(i * enc_w); + std::size_t addr_start = base + enc_w - pw; + for(std::size_t j = 0; j < pw; ++j) + addr_arr.push_back(arr_bv[addr_start + j]); + } + + // Byte_extract from the address array + const std::size_t bpb = expr.get_bits_per_byte(); + bvt addr_result(pw, const_literal(false)); + + const auto off_opt = numeric_cast(expr.offset()); + if(off_opt.has_value()) + { + std::size_t bit_off = numeric_cast_v(*off_opt * bpb); + if(bit_off + pw <= addr_arr.size()) + addr_result = + bvt(addr_arr.begin() + bit_off, addr_arr.begin() + bit_off + pw); + } + else + { + for(std::size_t off = 0; off + pw <= addr_arr.size(); off += bpb) + { + literalt is_off = convert(equal_exprt( + expr.offset(), from_integer(off / bpb, expr.offset().type()))); + for(std::size_t j = 0; j < pw; ++j) + addr_result[j] = + prop.lselect(is_off, addr_arr[off + j], addr_result[j]); + } + } + + // Reconstruct pointer from address + const std::size_t object_bits = get_object_width(ptr_type); + const std::size_t offset_bits = get_offset_width(ptr_type); + const std::size_t addr_bits = get_address_width(ptr_type); + + if(bv_utils.is_zero(addr_result).is_true()) + return encode(pointer_logic.get_null_object(), ptr_type); + + bvt obj_bv = prop.new_variables(object_bits); + bvt off_bv = prop.new_variables(offset_bits); + + bvt null_obj = + bv_utils.build_constant(pointer_logic.get_null_object(), object_bits); + prop.l_set_to_true(prop.lor( + bv_utils.is_zero(addr_result), !bv_utils.equal(obj_bv, null_obj))); + + const auto &objects = pointer_logic.objects; + std::size_t number = 0; + for(auto it = objects.cbegin(); it != objects.cend(); ++it, ++number) + { + if(object_base_address.find(number) == object_base_address.end()) + continue; + bvt obj_const = bv_utils.build_constant(number, object_bits); + literalt is_this = bv_utils.equal(obj_bv, obj_const); + bvt base = get_object_base_address(number, addr_bits); + bvt off_ext = bv_utils.zero_extension(off_bv, addr_bits); + bvt flat = bv_utils.add(base, off_ext); + for(std::size_t k = 0; k < addr_bits; ++k) + { + prop.lcnf({!is_this, !flat[k], addr_result[k]}); + prop.lcnf({!is_this, flat[k], !addr_result[k]}); + } + } + + pending_i2p.push_back({obj_bv, off_bv, addr_result, number, true}); + for(const auto &l : obj_bv) + if(!l.is_constant()) + prop.set_frozen(l); + for(const auto &l : off_bv) + if(!l.is_constant()) + prop.set_frozen(l); + for(const auto &l : addr_result) + if(!l.is_constant()) + prop.set_frozen(l); + return object_offset_encoding(obj_bv, off_bv, addr_result); + } + } + + if(expr.type().id() != ID_pointer) + return SUB::convert_byte_extract(expr); + + // Extract only the platform-width address from the byte array, + // then reconstruct the full wide pointer encoding. + const auto &ptr_type = to_pointer_type(expr.type()); + const std::size_t platform_width = ptr_type.get_width(); + + // Create a byte_extract that returns an unsigned integer of + // platform width (the flat address) + byte_extract_exprt addr_extract( + expr.id(), + expr.op(), + expr.offset(), + expr.get_bits_per_byte(), + unsignedbv_typet{platform_width}); + bvt addr_bv = SUB::convert_byte_extract(addr_extract); + + // Reconstruct the full pointer from the flat address using + // the same logic as I2P: create fresh object/offset variables + // constrained by forward/backward constraints. + const std::size_t object_bits = get_object_width(ptr_type); + const std::size_t offset_bits = get_offset_width(ptr_type); + const std::size_t addr_bits = get_address_width(ptr_type); + + // Constant 0 => NULL + if(bv_utils.is_zero(addr_bv).is_true()) + return encode(pointer_logic.get_null_object(), ptr_type); + + bvt obj_bv = prop.new_variables(object_bits); + bvt off_bv = prop.new_variables(offset_bits); + + // Non-zero address => not null + bvt null_obj = + bv_utils.build_constant(pointer_logic.get_null_object(), object_bits); + prop.l_set_to_true( + prop.lor(bv_utils.is_zero(addr_bv), !bv_utils.equal(obj_bv, null_obj))); + + // Forward constraints for known objects + const auto &objects = pointer_logic.objects; + std::size_t number = 0; + for(auto it = objects.cbegin(); it != objects.cend(); ++it, ++number) + { + if(object_base_address.find(number) == object_base_address.end()) + continue; + bvt obj_const = bv_utils.build_constant(number, object_bits); + literalt is_this = bv_utils.equal(obj_bv, obj_const); + bvt base = get_object_base_address(number, addr_bits); + bvt off_ext = bv_utils.zero_extension(off_bv, addr_bits); + bvt flat = bv_utils.add(base, off_ext); + for(std::size_t k = 0; k < addr_bits; ++k) + { + prop.lcnf({!is_this, !flat[k], addr_bv[k]}); + prop.lcnf({!is_this, flat[k], !addr_bv[k]}); + } + } + + // Defer backward constraints + pending_i2p.push_back({obj_bv, off_bv, addr_bv, number, true}); + + return object_offset_encoding(obj_bv, off_bv, addr_bv); +} + +bvt bv_pointerst::convert_byte_update(const byte_update_exprt &expr) +{ + if(!wide_pointer_encoding) + return SUB::convert_byte_update(expr); + + // Target is a pointer: update only the address component + if(expr.op().type().id() == ID_pointer) + { + const auto &ptr_type = to_pointer_type(expr.op().type()); + const std::size_t pw = ptr_type.get_width(); + bvt ptr_bv = convert_bv(expr.op()); + byte_update_exprt addr_update( + expr.id(), + typecast_exprt(expr.op(), unsignedbv_typet{pw}), + expr.offset(), + expr.value(), + expr.get_bits_per_byte()); + bvt new_addr = SUB::convert_byte_update(addr_update); + bvt obj = object_literals(ptr_bv, ptr_type); + bvt off = offset_literals(ptr_bv, ptr_type); + return object_offset_encoding(obj, off, new_addr); + } + + // Value is a pointer: write only the address + if(expr.value().type().id() == ID_pointer) + { + const auto &ptr_type = to_pointer_type(expr.value().type()); + const std::size_t platform_width = ptr_type.get_width(); + + // Extract the address component from the pointer value + const bvt &value_bv = convert_bv(expr.value()); + bvt addr_bv = address_literals(value_bv, ptr_type); + + // Create a byte_update that writes the address as an unsigned integer + // We need to create a fresh symbol for the address value + // and use it in a byte_update with the integer type. + // Simpler: lower to byte_update with the address as a typecast. + const typecast_exprt addr_as_int( + expr.value(), unsignedbv_typet{platform_width}); + byte_update_exprt int_update( + expr.id(), + expr.op(), + expr.offset(), + addr_as_int, + expr.get_bits_per_byte()); + return SUB::convert_byte_update(int_update); + } + + // For compound types containing pointers: lower to individual + // byte operations (let the default lowering handle it) + return convert_bv(lower_byte_update(expr, ns)); +} + bvt bv_pointerst::convert_bitvector(const exprt &expr) { if(expr.type().id()==ID_pointer) @@ -669,6 +1198,34 @@ bvt bv_pointerst::convert_bitvector(const exprt &expr) prop.limplies(same_object_lit, bv_utils.equal(difference, bv))); } + // Wide encoding: different objects => use address difference + if(wide_pointer_encoding && !same_object_lit.is_true()) + { + const pointer_typet &lhs_pt = to_pointer_type(minus_expr.lhs().type()); + const bvt &lhs = convert_bv(minus_expr.lhs()); + bvt lhs_addr = + bv_utils.zero_extension(address_literals(lhs, lhs_pt), width); + + const pointer_typet &rhs_pt = to_pointer_type(minus_expr.rhs().type()); + const bvt &rhs = convert_bv(minus_expr.rhs()); + bvt rhs_addr = + bv_utils.zero_extension(address_literals(rhs, rhs_pt), width); + + bvt addr_diff = bv_utils.sub(lhs_addr, rhs_addr); + + auto element_size_opt = pointer_offset_size(lhs_pt.base_type(), ns); + CHECK_RETURN(element_size_opt.has_value() && *element_size_opt > 0); + if(*element_size_opt != 1) + { + bvt element_size_bv = bv_utils.build_constant(*element_size_opt, width); + addr_diff = bv_utils.divider( + addr_diff, element_size_bv, bv_utilst::representationt::SIGNED); + } + + prop.l_set_to_true( + prop.limplies(!same_object_lit, bv_utils.equal(addr_diff, bv))); + } + return bv; } else if( @@ -698,6 +1255,17 @@ bvt bv_pointerst::convert_bitvector(const exprt &expr) return postponed_list.back().bv; } + else if(wide_pointer_encoding && expr.id() == ID_object_base_address) + { + // Postpone until all objects are known. + std::size_t width = boolbv_width(expr.type()); + + const auto &ptr = to_object_base_address_expr(expr).pointer(); + postponed_list.emplace_back( + prop.new_variables(width), convert_bv(ptr), expr); + + return postponed_list.back().bv; + } else if( expr.id() == ID_pointer_object && to_pointer_object_expr(expr).pointer().type().id() == ID_pointer) @@ -719,9 +1287,18 @@ bvt bv_pointerst::convert_bitvector(const exprt &expr) // pointer to int bvt op0 = convert_bv(to_typecast_expr(expr).op()); - // squeeze it in! std::size_t width=boolbv_width(expr.type()); + if(wide_pointer_encoding) + { + // Return the flat integer address. + const auto &ptr_type = + to_pointer_type(to_typecast_expr(expr).op().type()); + bvt addr = address_literals(op0, ptr_type); + return bv_utils.zero_extension(addr, width); + } + + // squeeze it in! return bv_utils.zero_extension(op0, width); } @@ -773,9 +1350,23 @@ exprt bv_pointerst::bv_get_rec( // we treat these like bit-vector constants, but with // some additional annotation - const irep_idt bvrep = make_bvrep(bits, [&value](std::size_t i) { - return value[value.size() - 1 - i] == '1'; - }); + const irep_idt bvrep = [&]() + { + if(wide_pointer_encoding) + { + // For trace output, use the address component (platform width) + bvt addr_bv = address_literals(value_bv, pt); + std::string addr_str = bits_to_string(prop, addr_bv); + const std::size_t pw = pt.get_width(); + return make_bvrep( + pw, + [&addr_str](std::size_t i) + { return addr_str[addr_str.size() - 1 - i] == '1'; }); + } + return make_bvrep( + bits, + [&value](std::size_t i) { return value[value.size() - 1 - i] == '1'; }); + }(); constant_exprt result(bvrep, type); @@ -796,6 +1387,13 @@ bvt bv_pointerst::encode(const mp_integer &addr, const pointer_typet &type) bvt zero_offset(offset_bits, const_literal(false)); bvt object = bv_utils.build_constant(addr, object_bits); + if(wide_pointer_encoding) + { + const std::size_t addr_bits = get_address_width(type); + bvt base = get_object_base_address(addr, addr_bits); + return object_offset_encoding(object, zero_offset, base); + } + return object_offset_encoding(object, zero_offset); } @@ -873,6 +1471,15 @@ bvt bv_pointerst::offset_arithmetic( bvt bv_tmp = bv_utils.add(offset_bv, bv_index); + if(wide_pointer_encoding) + { + // Also update the address component + bvt addr = address_literals(bv, type); + bvt addr_index = bv_utils.sign_extension(bv_index, addr.size()); + bvt new_addr = bv_utils.add(addr, addr_index); + return object_offset_encoding(object_literals(bv, type), bv_tmp, new_addr); + } + return object_offset_encoding(object_literals(bv, type), bv_tmp); } @@ -1099,10 +1706,345 @@ void bv_pointerst::finish_eager_conversion() #endif } } + else if(postponed.expr.id() == ID_object_base_address) + { + // MUX: for each object i, if pointer_object(ptr)==i then base[i] + const auto &ptr_type = to_pointer_type( + to_object_base_address_expr(postponed.expr).pointer().type()); + bvt obj_bv = object_literals(postponed.op, ptr_type); + const std::size_t addr_width = postponed.bv.size(); + + const auto &objects = pointer_logic.objects; + std::size_t number = 0; + for(auto it = objects.cbegin(); it != objects.cend(); ++it, ++number) + { + bvt obj_const = bv_utils.build_constant(number, obj_bv.size()); + literalt is_this = bv_utils.equal(obj_bv, obj_const); + bvt base = get_object_base_address(number, addr_width); + for(std::size_t k = 0; k < addr_width; ++k) + { + prop.lcnf({!is_this, !base[k], postponed.bv[k]}); + prop.lcnf({!is_this, base[k], !postponed.bv[k]}); + } + } + } else UNREACHABLE; } // Clear the list to avoid re-doing in case of incremental usage. postponed_list.clear(); + + // Record variable count before adding wide encoding constraints + if(wide_pointer_encoding) + finish_eager_var_start = prop.no_variables(); + + // Add deferred I2P constraints now that all objects are known + if(wide_pointer_encoding) + { + const auto &objects = pointer_logic.objects; + for(const auto &p : pending_i2p) + { + const std::size_t object_bits = p.obj_bv.size(); + const std::size_t addr_bits = p.addr_bv.size(); + + std::vector valid_obj_lits; + std::size_t number = 0; + for(auto it = objects.cbegin(); it != objects.cend(); ++it, ++number) + { + bvt obj_const = bv_utils.build_constant(number, object_bits); + literalt is_this = bv_utils.equal(p.obj_bv, obj_const); + valid_obj_lits.push_back(is_this); + + // Forward constraints for objects added after the I2P + if(number >= p.objects_at_creation) + { + bvt base = get_object_base_address(number, addr_bits); + bvt off_ext = bv_utils.zero_extension(p.off_bv, addr_bits); + bvt flat = bv_utils.add(base, off_ext); + for(std::size_t k = 0; k < addr_bits; ++k) + { + prop.lcnf({!is_this, !flat[k], p.addr_bv[k]}); + prop.lcnf({!is_this, flat[k], !p.addr_bv[k]}); + } + } + } + if(!valid_obj_lits.empty()) + prop.lcnf(valid_obj_lits); + } + } + + // Add non-overlapping constraints for wide pointer base addresses + if(wide_pointer_encoding && !object_base_address.empty()) + { + const std::size_t addr_width = config.ansi_c.pointer_width; + + // NULL base address = 0 + bvt null_base = get_object_base_address( + pointer_logic.get_null_object(), addr_width); + bvt zero_bv = bv_utils.build_constant(0, addr_width); + for(std::size_t i = 0; i < addr_width; ++i) + prop.set_equal(null_base[i], zero_bv[i]); + + // Collect objects with base addresses and known sizes + const auto &objects = pointer_logic.objects; + struct obj_info_t + { + mp_integer number; + mp_integer size; + bool is_dynamic; + }; + std::vector obj_infos; + std::size_t number = 0; + for(auto it = objects.cbegin(); it != objects.cend(); ++it, ++number) + { + if(object_base_address.find(number) == object_base_address.end()) + continue; + // Skip integer-address objects — their addresses may + // overlap with regular objects. + if(integer_address_objects.count(number)) + continue; + auto size_opt = pointer_offset_size(it->type(), ns); + mp_integer size = + (size_opt.has_value() && *size_opt > 0) ? *size_opt : mp_integer{1}; + obj_infos.push_back( + {mp_integer(number), size, pointer_logic.is_dynamic_object(*it)}); + + // Overflow guard: base + size must not overflow + bvt base = get_object_base_address(mp_integer(number), addr_width); + bvt end = bv_utils.add(base, bv_utils.build_constant(size, addr_width)); + // end >= base (no wrap-around) + prop.l_set_to_true( + bv_utils.rel(end, ID_ge, base, bv_utilst::representationt::UNSIGNED)); + // Alignment + mp_integer alignment = std::min(size, mp_integer(addr_width / 8)); + mp_integer align_pow2 = 1; + while(align_pow2 * 2 <= alignment) + align_pow2 *= 2; + if(align_pow2 > 1) + { + std::size_t align_bits = 0; + mp_integer tmp = align_pow2; + while(tmp > 1) { align_bits++; tmp /= 2; } + for(std::size_t i = 0; i < align_bits && i < addr_width; ++i) + prop.l_set_to_true(!base[i]); + } + } + + // Non-overlapping: chain constraint (O(n) instead of O(n²)) + // Objects are ordered by index: base[0]+size[0] <= base[1], etc. + for(std::size_t i = 0; i + 1 < obj_infos.size(); ++i) + { + bvt base_i = + get_object_base_address(obj_infos[i].number, addr_width); + bvt end_i = bv_utils.add( + base_i, bv_utils.build_constant(obj_infos[i].size, addr_width)); + bvt base_next = + get_object_base_address(obj_infos[i + 1].number, addr_width); + + if( + config.bv_encoding.malloc_may_alias && obj_infos[i].is_dynamic && + obj_infos[i + 1].is_dynamic && + obj_infos[i].size == obj_infos[i + 1].size) + { + // Allow address reuse: same base OR non-overlapping + prop.l_set_to_true(prop.lor( + bv_utils.equal(base_next, base_i), + bv_utils.rel( + end_i, ID_le, base_next, bv_utilst::representationt::UNSIGNED))); + } + else + { + prop.l_set_to_true(bv_utils.rel( + end_i, ID_le, base_next, bv_utilst::representationt::UNSIGNED)); + } + } + + // Stack layout: place stack variables adjacently + if(model_stack_layout) + { + // Collect stack objects (function-scoped, non-dynamic) + struct stack_obj_t + { + mp_integer number; + mp_integer size; + irep_idt function; + std::size_t index; // declaration order + }; + std::vector stack_objs; + + std::size_t number = 0; + for(auto it = objects.cbegin(); it != objects.cend(); ++it, ++number) + { + if(object_base_address.find(number) == object_base_address.end()) + continue; + if(integer_address_objects.count(number)) + continue; + const exprt &obj = *it; + if(obj.id() != ID_symbol) + continue; + const irep_idt &id = to_symbol_expr(obj).get_identifier(); + // Stack objects have "::" in their name (function-scoped) + const std::string id_str = id2string(id); + auto last_sep = id_str.rfind("::"); + if(last_sep == std::string::npos || last_sep == 0) + continue; + // Extract function name + auto func_end = id_str.rfind("::", last_sep - 1); + irep_idt func = func_end != std::string::npos + ? irep_idt(id_str.substr(0, func_end)) + : irep_idt(id_str.substr(0, last_sep)); + + auto sz_opt = pointer_offset_size(obj.type(), ns); + mp_integer sz = + (sz_opt.has_value() && *sz_opt > 0) ? *sz_opt : mp_integer{1}; + stack_objs.push_back({mp_integer(number), sz, func, stack_objs.size()}); + } + + // Group by function and add adjacency constraints + // (downward growth: later declarations get lower addresses) + std::map> by_function; + for(std::size_t i = 0; i < stack_objs.size(); ++i) + by_function[stack_objs[i].function].push_back(i); + + for(const auto &[func, indices] : by_function) + { + if(indices.size() < 2) + continue; + // Adjacent placement: base[i+1] = base[i] + size[i] + for(std::size_t j = 0; j + 1 < indices.size(); ++j) + { + const auto &cur = stack_objs[indices[j]]; + const auto &next = stack_objs[indices[j + 1]]; + bvt base_cur = get_object_base_address(cur.number, addr_width); + bvt base_next = get_object_base_address(next.number, addr_width); + + if(config.ansi_c.stack_grows_downward) + { + // Downward growth: base[cur] = base[next] + size[next] + // (earlier declarations get higher addresses) + bvt end_next = bv_utils.add( + base_next, bv_utils.build_constant(next.size, addr_width)); + for(std::size_t k = 0; k < addr_width; ++k) + prop.set_equal(base_cur[k], end_next[k]); + } + else + { + // Upward growth: base[next] = base[cur] + size[cur] + bvt end_cur = bv_utils.add( + base_cur, bv_utils.build_constant(cur.size, addr_width)); + for(std::size_t k = 0; k < addr_width; ++k) + prop.set_equal(base_next[k], end_cur[k]); + } + } + } + } + } + // Freeze variables created during finish_eager_conversion + // (non-overlapping constraints, deferred I2P forward constraints) + // to prevent MiniSat's simplifier from eliminating them. + // The backward constraint refinement in dec_solve creates + // bv_utils operations that may reuse these variables. + if(wide_pointer_encoding && finish_eager_var_start > 0) + for(unsigned i = finish_eager_var_start; i < prop.no_variables(); ++i) + prop.set_frozen(literalt(i, false)); +} + +bool bv_pointerst::check_SAT_backward_i2p() +{ + const auto &objects = pointer_logic.objects; + bool any_violation = false; + + for(const auto &p : pending_i2p) + { + if(!p.needs_backward_constraints) + continue; + if(any_violation) + break; + + const std::size_t object_bits = p.obj_bv.size(); + const std::size_t addr_bits = p.addr_bv.size(); + + mp_integer obj_val = 0; + for(std::size_t i = 0; i < object_bits; ++i) + if(prop.l_get(p.obj_bv[i]).is_true()) + obj_val += power(2, i); + + mp_integer addr_val = 0; + for(std::size_t i = 0; i < addr_bits; ++i) + if(prop.l_get(p.addr_bv[i]).is_true()) + addr_val += power(2, i); + + std::size_t number = 0; + for(auto it = objects.cbegin(); it != objects.cend(); ++it, ++number) + { + if(object_base_address.find(number) == object_base_address.end()) + continue; + auto sz = pointer_offset_size(it->type(), ns); + if(!sz.has_value() || *sz <= 0) + continue; + bvt base = get_object_base_address(number, addr_bits); + mp_integer base_val = 0; + for(std::size_t i = 0; i < addr_bits; ++i) + if(prop.l_get(base[i]).is_true()) + base_val += power(2, i); + if( + addr_val >= base_val && addr_val < base_val + *sz && + obj_val != mp_integer(number)) + { + any_violation = true; + break; + } + } + } + + if(!any_violation) + return false; + + // Clear bv_utils caches to prevent reuse of variables that + // MiniSat's simplifier may have eliminated during the first solve. + + for(const auto &p : pending_i2p) + { + if(!p.needs_backward_constraints) + continue; + const std::size_t obj_bits = p.obj_bv.size(); + const std::size_t a_bits = p.addr_bv.size(); + std::size_t num = 0; + for(auto it = objects.cbegin(); it != objects.cend(); ++it, ++num) + { + if(object_base_address.find(num) == object_base_address.end()) + continue; + auto sz = pointer_offset_size(it->type(), ns); + if(!sz.has_value() || *sz <= 0) + continue; + bvt base = get_object_base_address(num, a_bits); + bvt obj_const = bv_utils.build_constant(num, obj_bits); + literalt is_this = bv_utils.equal(p.obj_bv, obj_const); + literalt ge = bv_utils.rel( + p.addr_bv, ID_ge, base, bv_utilst::representationt::UNSIGNED); + bvt end_bv = bv_utils.add(base, bv_utils.build_constant(*sz, a_bits)); + literalt lt = bv_utils.rel( + p.addr_bv, ID_lt, end_bv, bv_utilst::representationt::UNSIGNED); + prop.l_set_to_true(prop.limplies(prop.land(ge, lt), is_this)); + } + } + for(auto &p : pending_i2p) + p.needs_backward_constraints = false; + return true; +} + +decision_proceduret::resultt bv_pointerst::dec_solve(const exprt &assumption) +{ + if(!wide_pointer_encoding) + return SUB::dec_solve(assumption); + + while(true) + { + auto result = SUB::dec_solve(assumption); + if(result != resultt::D_SATISFIABLE) + return result; + if(!check_SAT_backward_i2p()) + return result; + } } diff --git a/src/solvers/flattening/bv_pointers.h b/src/solvers/flattening/bv_pointers.h index 4486351fde0..83dc9c0b01b 100644 --- a/src/solvers/flattening/bv_pointers.h +++ b/src/solvers/flattening/bv_pointers.h @@ -13,6 +13,10 @@ Author: Daniel Kroening, kroening@kroening.com #include "boolbv.h" #include "pointer_logic.h" +class byte_extract_exprt; +class byte_update_exprt; +class index_exprt; + class bv_pointerst:public boolbvt { public: @@ -23,13 +27,40 @@ class bv_pointerst:public boolbvt bool get_array_constraints = false); void finish_eager_conversion() override; + bool boolbv_set_equality_to_true(const equal_exprt &expr) override; + decision_proceduret::resultt dec_solve(const exprt &) override; + + /// Check backward I2P constraints against the current SAT model. + /// Returns true if any constraints were added (progress made). + /// Used by both bv_pointerst::dec_solve and bv_refinementt::check_SAT. + bool check_SAT_backward_i2p(); + + std::size_t boolbv_width(const typet &type) const override; endianness_mapt endianness_map(const typet &, bool little_endian) const override; + /// Enable wide pointer encoding that includes a flat integer + /// address alongside the object/offset. This fixes pointer-to- + /// integer casts, integer-to-pointer casts, and byte-level + /// operations on pointer-containing types. + bool wide_pointer_encoding = false; + + /// Model stack layout: place stack variables adjacently in + /// declaration order with downward growth direction. + bool model_stack_layout = false; + + void set_pointer_width_multiplier(std::size_t m) + { + bv_width.set_pointer_width_multiplier(m); + } + protected: pointer_logict pointer_logic; + /// Layout: [object | offset | address] + /// When wide_pointer_encoding is false, address_width is 0 + /// and the layout is the traditional [offset | object]. std::size_t get_object_width(const pointer_typet &) const; std::size_t get_offset_width(const pointer_typet &) const; std::size_t get_address_width(const pointer_typet &) const; @@ -45,8 +76,14 @@ class bv_pointerst:public boolbvt [[nodiscard]] virtual bvt add_addr(const exprt &); // overloading + literalt convert_equality(const equal_exprt &) override; literalt convert_rest(const exprt &) override; bvt convert_bitvector(const exprt &) override; // no cache + bvt convert_byte_extract(const byte_extract_exprt &expr) override; + bvt convert_byte_update(const byte_update_exprt &expr) override; + bvt convert_index(const index_exprt &expr) override; + bvt convert_array(const exprt &expr) override; + bvt convert_with(const with_exprt &expr) override; exprt bv_get_rec(const exprt &, const bvt &, std::size_t offset) const override; @@ -82,6 +119,17 @@ class bv_pointerst:public boolbvt } }; + /// Pending integer-to-pointer casts whose backward constraints + /// are deferred to finish_eager_conversion (when all objects are known). + struct pending_i2p_t + { + bvt obj_bv, off_bv, addr_bv; + std::size_t objects_at_creation; + bool needs_backward_constraints; + }; + std::vector pending_i2p; + unsigned finish_eager_var_start = 0; + typedef std::list postponed_listt; postponed_listt postponed_list; @@ -110,12 +158,28 @@ class bv_pointerst:public boolbvt /// \return Vector of literals identifying the offset part of \p bv bvt offset_literals(const bvt &bv, const pointer_typet &type) const; + /// Given a pointer encoded in \p bv, extract the address literals. + /// Only meaningful when wide_pointer_encoding is true. + bvt address_literals(const bvt &bv, const pointer_typet &type) const; + + /// Symbolic base addresses per object number. + /// Used for wide pointer encoding to compute flat addresses. + mutable std::map object_base_address; + std::set integer_address_objects; + + /// Get or create a symbolic base address for an object. + bvt get_object_base_address(const mp_integer &object, std::size_t width) const; + /// Construct a pointer encoding from given encodings of \p object and \p /// offset. /// \param object: Encoded object /// \param offset: Encoded offset /// \return Pointer encoding static bvt object_offset_encoding(const bvt &object, const bvt &offset); + static bvt object_offset_encoding( + const bvt &object, + const bvt &offset, + const bvt &address); }; #endif // CPROVER_SOLVERS_FLATTENING_BV_POINTERS_H diff --git a/src/solvers/refinement/bv_refinement_loop.cpp b/src/solvers/refinement/bv_refinement_loop.cpp index f55b56097be..3a6d41bf2c4 100644 --- a/src/solvers/refinement/bv_refinement_loop.cpp +++ b/src/solvers/refinement/bv_refinement_loop.cpp @@ -129,6 +129,13 @@ void bv_refinementt::check_SAT() for(approximationt &approximation : this->approximations) check_SAT(approximation); + + // Check backward I2P constraints (from wide pointer encoding) + if(wide_pointer_encoding) + { + if(check_SAT_backward_i2p()) + progress = true; + } } void bv_refinementt::check_UNSAT() diff --git a/src/solvers/sat/satcheck_cadical.cpp b/src/solvers/sat/satcheck_cadical.cpp index 80b12756cd9..5fac788711c 100644 --- a/src/solvers/sat/satcheck_cadical.cpp +++ b/src/solvers/sat/satcheck_cadical.cpp @@ -22,6 +22,10 @@ tvt satcheck_cadical_baset::l_get(literalt a) const if(a.is_constant()) return tvt(a.sign()); + // CaDiCaL uses 1-based variables; var_no 0 is invalid + if(a.var_no() == 0) + return tvt(tvt::tv_enumt::TV_UNKNOWN); + tvt result; if(a.var_no() > narrow(solver->vars())) diff --git a/src/solvers/sat/satcheck_minisat2.cpp b/src/solvers/sat/satcheck_minisat2.cpp index 0cf42e6b1f2..7b1bb379a11 100644 --- a/src/solvers/sat/satcheck_minisat2.cpp +++ b/src/solvers/sat/satcheck_minisat2.cpp @@ -13,14 +13,15 @@ Author: Daniel Kroening, kroening@kroening.com # include #endif -#include - #include #include #include #include +#include +#include + #ifndef l_False # define l_False Minisat::l_False # define l_True Minisat::l_True @@ -254,7 +255,21 @@ propt::resultt satcheck_minisat2_baset::do_prop_solve(const bvt &assumptions) alarm(time_limit_seconds); } - lbool solver_result = solver->solveLimited(solver_assumptions); + lbool solver_result; + if constexpr(std::is_same_v) + { + // When limit_incremental_simplification is set, disable the + // simplifier after the first solve. MiniSat's simplifier can + // eliminate variables between incremental calls, which may make + // later UNSAT proofs extremely expensive for the CDCL search. + bool do_simp = !limit_incremental_simplification || !solver_was_called; + solver_result = solver->solveLimited(solver_assumptions, do_simp, false); + solver_was_called = true; + } + else + { + solver_result = solver->solveLimited(solver_assumptions); + } if(old_handler != SIG_ERR) { @@ -271,7 +286,18 @@ propt::resultt satcheck_minisat2_baset::do_prop_solve(const bvt &assumptions) << messaget::eom; } - lbool solver_result = solver->solve(solver_assumptions) ? l_True : l_False; + lbool solver_result; + if constexpr(std::is_same_v) + { + bool do_simp = !limit_incremental_simplification || !solver_was_called; + solver_result = + solver->solve(solver_assumptions, do_simp) ? l_True : l_False; + solver_was_called = true; + } + else + { + solver_result = solver->solve(solver_assumptions) ? l_True : l_False; + } #endif diff --git a/src/solvers/sat/satcheck_minisat2.h b/src/solvers/sat/satcheck_minisat2.h index 1aa2280feb8..f942edc1381 100644 --- a/src/solvers/sat/satcheck_minisat2.h +++ b/src/solvers/sat/satcheck_minisat2.h @@ -67,11 +67,21 @@ class satcheck_minisat2_baset : public cnf_solvert, public hardness_collectort time_limit_seconds=lim; } + /// When set, the MiniSat simplifier will only run on the first solve call. + /// This prevents variable elimination between incremental calls from + /// degrading CDCL performance on certain array-heavy problems. + void set_limit_incremental_simplification() + { + limit_incremental_simplification = true; + } + protected: resultt do_prop_solve(const bvt &) override; std::unique_ptr solver; uint32_t time_limit_seconds; + bool solver_was_called = false; + bool limit_incremental_simplification = false; void add_variables(); }; diff --git a/src/util/config.cpp b/src/util/config.cpp index da70a3b9888..c07ee5ab356 100644 --- a/src/util/config.cpp +++ b/src/util/config.cpp @@ -623,6 +623,7 @@ void configt::ansi_ct::set_arch_spec_hppa() endianness=endiannesst::IS_BIG_ENDIAN; char_is_unsigned=false; NULL_is_zero=true; + stack_grows_downward = false; // PA-RISC stack grows upward switch(mode) { diff --git a/src/util/config.h b/src/util/config.h index e1cdc3eb99c..225d6ecf67b 100644 --- a/src/util/config.h +++ b/src/util/config.h @@ -249,6 +249,9 @@ class configt // architecture-specific integer value of null pointer constant bool NULL_is_zero; + /// Stack growth direction. True for x86, ARM, and most architectures. + bool stack_grows_downward = true; + void set_arch_spec_i386(); void set_arch_spec_x86_64(); void set_arch_spec_power(const irep_idt &subarch); @@ -384,6 +387,11 @@ class configt // number of bits to encode heap object addresses std::size_t object_bits = 8; bool is_object_bits_default = true; + + /// When true, malloc may return the same address after free. + /// This prevents the simplifier from concluding that two + /// different dynamic objects have different addresses. + bool malloc_may_alias = false; } bv_encoding; // this is the function to start executing diff --git a/src/util/irep_ids.def b/src/util/irep_ids.def index 8f1cfe001ab..d0c5757d8b3 100644 --- a/src/util/irep_ids.def +++ b/src/util/irep_ids.def @@ -470,6 +470,7 @@ IREP_ID_ONE(dynamic_object) IREP_ID_TWO(C_dynamic, #dynamic) IREP_ID_ONE(pointer_in_range) IREP_ID_ONE(object_size) +IREP_ID_ONE(object_base_address) IREP_ID_ONE(separate) IREP_ID_ONE(live_object) IREP_ID_ONE(writeable_object) diff --git a/src/util/pointer_expr.h b/src/util/pointer_expr.h index d3130b3110c..789a394385c 100644 --- a/src/util/pointer_expr.h +++ b/src/util/pointer_expr.h @@ -1451,6 +1451,44 @@ inline void validate_expr(const object_size_exprt &value) "Object size expression must have pointer typed operand."); } +/// \brief Expression to retrieve the base address of the object a pointer +/// points to. Returns an unsigned integer of pointer width. +/// Used by the wide pointer encoding for address-based dereference dispatch. +class object_base_address_exprt : public unary_exprt +{ +public: + explicit object_base_address_exprt(exprt pointer) + : unary_exprt( + ID_object_base_address, + pointer, + unsignedbv_typet{to_pointer_type(pointer.type()).get_width()}) + { + } + + exprt &pointer() + { + return op(); + } + + const exprt &pointer() const + { + return op(); + } +}; + +inline const object_base_address_exprt & +to_object_base_address_expr(const exprt &expr) +{ + PRECONDITION(expr.id() == ID_object_base_address); + return static_cast(expr); +} + +template <> +inline bool can_cast_expr(const exprt &base) +{ + return base.id() == ID_object_base_address; +} + /// A predicate that indicates that a zero-terminated string /// starts at the given address. /// This is an experimental feature for CHC encodings -- do not use. diff --git a/src/util/simplify_expr_int.cpp b/src/util/simplify_expr_int.cpp index f789bfdfb37..5227675ade3 100644 --- a/src/util/simplify_expr_int.cpp +++ b/src/util/simplify_expr_int.cpp @@ -1478,6 +1478,14 @@ simplify_exprt::resultt<> simplify_exprt::simplify_inequality_both_constant( // is and therefore cannot simplify return unchanged(expr); } + // malloc may return the same address after free — don't + // conclude inequality for non-null pointer constants + if( + config.bv_encoding.malloc_may_alias && !(tmp0_const == 0) && + !(tmp1_const == 0)) + { + return unchanged(expr); + } equal = tmp0_const == 0 && tmp1_const == 0; } return make_boolean_expr(expr.id() == ID_equal ? equal : !equal); diff --git a/src/util/simplify_expr_pointer.cpp b/src/util/simplify_expr_pointer.cpp index d1afed4af5a..71aebdef15e 100644 --- a/src/util/simplify_expr_pointer.cpp +++ b/src/util/simplify_expr_pointer.cpp @@ -10,6 +10,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "arith_tools.h" #include "c_types.h" +#include "config.h" #include "expr_util.h" #include "namespace.h" #include "pointer_expr.h" @@ -407,6 +408,19 @@ simplify_exprt::simplify_pointer_offset(const pointer_offset_exprt &expr) return unchanged(expr); } +static bool is_symex_dynamic_object(const exprt &obj) +{ + if(obj.id() == ID_dynamic_object) + return true; + const exprt &root = obj.id() == ID_index ? to_index_expr(obj).array() : obj; + if(root.id() == ID_symbol) + { + return id2string(to_symbol_expr(root).get_identifier()) + .find(SYMEX_DYNAMIC_PREFIX) != std::string::npos; + } + return false; +} + simplify_exprt::resultt<> simplify_exprt::simplify_inequality_address_of( const binary_relation_exprt &expr) { @@ -454,13 +468,16 @@ simplify_exprt::resultt<> simplify_exprt::simplify_inequality_address_of( return make_boolean_expr(expr.id() == ID_equal ? equal : !equal); } else if( - tmp0_object.id() == ID_dynamic_object && - tmp1_object.id() == ID_dynamic_object) + is_symex_dynamic_object(tmp0_object) && + is_symex_dynamic_object(tmp1_object)) { - bool equal = to_dynamic_object_expr(tmp0_object).get_instance() == - to_dynamic_object_expr(tmp1_object).get_instance(); + if(tmp0_object == tmp1_object) + return make_boolean_expr(expr.id() == ID_equal); - return make_boolean_expr(expr.id() == ID_equal ? equal : !equal); + if(config.bv_encoding.malloc_may_alias) + return unchanged(expr); + + return make_boolean_expr(expr.id() != ID_equal); } else if( (tmp0_object.id() == ID_symbol && tmp1_object.id() == ID_dynamic_object) ||