Skip to content

Commit 1b60243

Browse files
committed
fix: get_padding for larger costs and padding lengths
Cost::get_padding previously always assumed only 1 byte for compactsize encoding when calculating required padding size. For larger differences in cost/budget, this incorrectly resulted in an additional 1 or 2 bytes of padding depending on the difference. I found this when calculating padding for the SimplicityHL hash loop example, where rust-simplicity was calculating a 7426 byte annex padding while libsimplicity required a 7424 byte padding, since the compactsize encoding requires 2 additional bytes. See ElementsProject/elements#1539
1 parent 80f3821 commit 1b60243

1 file changed

Lines changed: 53 additions & 7 deletions

File tree

src/analysis.rs

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
use crate::jet::Jet;
44
use std::{cmp, fmt};
55

6+
use bitcoin::taproot::TAPROOT_ANNEX_PREFIX;
7+
68
use crate::value::Word;
79
#[cfg(feature = "elements")]
810
use elements::encode::Encodable;
@@ -152,15 +154,34 @@ impl Cost {
152154
return None;
153155
}
154156

155-
// Two bytes are automatically added to the encoded witness stack by adding the annex:
157+
// Adding the annex to the witness stack increases the serialized size by:
156158
//
157-
// 1. The encoded annex starts with the annex byte length
158-
// 2. The first annex byte is always 0x50
159+
// 1. CompactSize(annex_len): the length prefix of the annex item
160+
// 2. annex_len: the annex bytes themselves (0x50 tag + zero padding)
159161
//
160-
// The remaining padding is done by adding (zero) bytes to the annex.
161-
let required_padding = weight - budget - U32Weight(2);
162-
let padding_len = required_padding.0 as usize; // cast safety: 32-bit machine or higher
163-
let annex_bytes: Vec<u8> = std::iter::once(0x50)
162+
// CompactSize uses 1 byte for values <= 252, 3 bytes for <= 65535,
163+
// and 5 bytes for larger values. The overhead subtracted must account
164+
// for the actual CompactSize encoding length of the resulting annex.
165+
let deficit = (weight - budget).0 as usize; // cast safety: 32-bit machine or higher
166+
167+
// overhead = compact_size_len + 1 (for 0x50 tag)
168+
let padding_len = match deficit {
169+
// annex_len <= 252, compact_size uses 1 byte, overhead = 2
170+
0..=253 => deficit.saturating_sub(2),
171+
// Boundary region: annex must be >= 253 bytes (3-byte compact_size),
172+
// but deficit - 4 < 252. Use minimum padding for 3-byte encoding.
173+
254..=255 => 252,
174+
// annex_len in 253..=65535, compact_size uses 3 bytes, overhead = 4
175+
256..=65538 => deficit - 4,
176+
// Boundary region for 5-byte compact_size encoding.
177+
65539..=65540 => 65535,
178+
// annex_len >= 65536, compact_size uses 5 bytes, overhead = 6
179+
_ => deficit - 6,
180+
// Note: the 9-byte compact_size boundary (deficit > 4_294_967_300)
181+
// is unreachable because Cost uses u32 milliweight, limiting the
182+
// maximum deficit to ~4_294_968 weight units.
183+
};
184+
let annex_bytes: Vec<u8> = std::iter::once(TAPROOT_ANNEX_PREFIX)
164185
.chain(std::iter::repeat(0x00).take(padding_len))
165186
.collect();
166187

@@ -435,6 +456,31 @@ mod tests {
435456
(Cost::from_milliweight(empty + 4_000), vec![], Some(3)),
436457
(Cost::from_milliweight(empty + 4_001), vec![], Some(4)),
437458
(Cost::from_milliweight(empty + 50_000), vec![], Some(49)),
459+
// Test around CompactSize boundary (annex_len crossing 252 -> 253)
460+
// deficit = 253: annex_len = 252, compact_size = 1 byte, overhead = 2
461+
(Cost::from_milliweight(empty + 253_000), vec![], Some(252)),
462+
// deficit = 254: annex_len must be 253 (3-byte compact_size), overhead = 4
463+
(Cost::from_milliweight(empty + 254_000), vec![], Some(253)),
464+
// deficit = 255: same boundary case
465+
(Cost::from_milliweight(empty + 255_000), vec![], Some(253)),
466+
// deficit = 256: annex_len = 253, compact_size = 3, exact fit
467+
(Cost::from_milliweight(empty + 256_000), vec![], Some(253)),
468+
// deficit = 257: annex_len = 254
469+
(Cost::from_milliweight(empty + 257_000), vec![], Some(254)),
470+
// Large annex (exercises the 3-byte compact_size path)
471+
(
472+
Cost::from_milliweight(empty + 7_424_000),
473+
vec![],
474+
Some(7_421),
475+
),
476+
// Hash loop example
477+
(
478+
Cost::from_milliweight(8_045_103),
479+
vec![vec![], vec![0; 497], vec![0; 32], vec![0; 33]],
480+
Some(7_424),
481+
),
482+
// Max
483+
(Cost::CONSENSUS_MAX, vec![], Some(3_999_994)),
438484
];
439485

440486
for (cost, mut witness, maybe_padding) in test_vectors {

0 commit comments

Comments
 (0)