Skip to content

Commit 0d8cbf0

Browse files
committed
fix: get_padding for larger costs and padding lengths
Cost::get_padding previously always assumed only 1 byte for compactsize encoding when calculating required padding size. For larger differences in cost/budget, this incorrectly resulted in an additional 1 or 2 bytes of padding depending on the difference. I found this when calculating padding for the SimplicityHL hash loop example, where rust-simplicity was calculating a 7426 byte annex padding while libsimplicity required a 7424 byte padding, since the compactsize encoding requires 2 additional bytes. See ElementsProject/elements#1539
1 parent 80f3821 commit 0d8cbf0

File tree

1 file changed

+56
-7
lines changed

1 file changed

+56
-7
lines changed

src/analysis.rs

Lines changed: 56 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
use crate::jet::Jet;
44
use std::{cmp, fmt};
55

6+
use bitcoin::taproot::TAPROOT_ANNEX_PREFIX;
7+
68
use crate::value::Word;
79
#[cfg(feature = "elements")]
810
use elements::encode::Encodable;
@@ -152,15 +154,33 @@ impl Cost {
152154
return None;
153155
}
154156

155-
// Two bytes are automatically added to the encoded witness stack by adding the annex:
157+
// Adding the annex to the witness stack increases the serialized size by:
156158
//
157-
// 1. The encoded annex starts with the annex byte length
158-
// 2. The first annex byte is always 0x50
159+
// 1. CompactSize(annex_len): the length prefix of the annex item
160+
// 2. annex_len: the annex bytes themselves (0x50 tag + zero padding)
159161
//
160-
// The remaining padding is done by adding (zero) bytes to the annex.
161-
let required_padding = weight - budget - U32Weight(2);
162-
let padding_len = required_padding.0 as usize; // cast safety: 32-bit machine or higher
163-
let annex_bytes: Vec<u8> = std::iter::once(0x50)
162+
// CompactSize uses 1 byte for values <= 252, 3 bytes for <= 65535,
163+
// and 5 bytes for larger values. The overhead subtracted must account
164+
// for the actual CompactSize encoding length of the resulting annex.
165+
let deficit = (weight - budget).0 as usize; // cast safety: 32-bit machine or higher
166+
// overhead = compact_size_len + 1 (for 0x50 tag)
167+
let padding_len = match deficit {
168+
// annex_len <= 252, compact_size uses 1 byte, overhead = 2
169+
0..=253 => deficit.saturating_sub(2),
170+
// Boundary region: annex must be >= 253 bytes (3-byte compact_size),
171+
// but deficit - 4 < 252. Use minimum padding for 3-byte encoding.
172+
254..=255 => 252,
173+
// annex_len in 253..=65535, compact_size uses 3 bytes, overhead = 4
174+
256..=65538 => deficit - 4,
175+
// Boundary region for 5-byte compact_size encoding.
176+
65539..=65540 => 65535,
177+
// annex_len >= 65536, compact_size uses 5 bytes, overhead = 6
178+
_ => deficit - 6,
179+
// Note: the 9-byte compact_size boundary (deficit > 4_294_967_300)
180+
// is unreachable because Cost uses u32 milliweight, limiting the
181+
// maximum deficit to ~4_294_968 weight units.
182+
};
183+
let annex_bytes: Vec<u8> = std::iter::once(TAPROOT_ANNEX_PREFIX)
164184
.chain(std::iter::repeat(0x00).take(padding_len))
165185
.collect();
166186

@@ -435,6 +455,35 @@ mod tests {
435455
(Cost::from_milliweight(empty + 4_000), vec![], Some(3)),
436456
(Cost::from_milliweight(empty + 4_001), vec![], Some(4)),
437457
(Cost::from_milliweight(empty + 50_000), vec![], Some(49)),
458+
// Test around CompactSize boundary (annex_len crossing 252 -> 253)
459+
// deficit = 253: annex_len = 252, compact_size = 1 byte, overhead = 2
460+
(Cost::from_milliweight(empty + 253_000), vec![], Some(252)),
461+
// deficit = 254: annex_len must be 253 (3-byte compact_size), overhead = 4
462+
(Cost::from_milliweight(empty + 254_000), vec![], Some(253)),
463+
// deficit = 255: same boundary case
464+
(Cost::from_milliweight(empty + 255_000), vec![], Some(253)),
465+
// deficit = 256: annex_len = 253, compact_size = 3, exact fit
466+
(Cost::from_milliweight(empty + 256_000), vec![], Some(253)),
467+
// deficit = 257: annex_len = 254
468+
(Cost::from_milliweight(empty + 257_000), vec![], Some(254)),
469+
// Large annex (exercises the 3-byte compact_size path)
470+
(
471+
Cost::from_milliweight(empty + 7_424_000),
472+
vec![],
473+
Some(7_421),
474+
),
475+
// Hash loop example
476+
(
477+
Cost::from_milliweight(8_045_103),
478+
vec![vec![], vec![0; 497], vec![0; 32], vec![0; 33]],
479+
Some(7_424),
480+
),
481+
// Max
482+
(
483+
Cost::CONSENSUS_MAX,
484+
vec![],
485+
Some(3_999_994),
486+
),
438487
];
439488

440489
for (cost, mut witness, maybe_padding) in test_vectors {

0 commit comments

Comments
 (0)