diff --git a/packages/evm/core/src/compression.rs b/packages/evm/core/src/compression.rs index a6563df9e..3a6244daf 100644 --- a/packages/evm/core/src/compression.rs +++ b/packages/evm/core/src/compression.rs @@ -1,53 +1,158 @@ use std::{borrow::Cow, ops::Deref}; -const VERSION: u8 = 1; +// First byte of every stored value tags how the remainder is encoded: +// TAG_RAW: [0][raw bincode] (uncompressed) +// TAG_ZSTD: [1][orig_len: u32 LE][zstd payload] (compressed) +const TAG_RAW: u8 = 0; +const TAG_ZSTD: u8 = 1; + const ZSTD_LEVEL: i32 = 3; +// Compression is only attempted for serialized values at least this large. The frequently-written +// values are poor candidates for two independent reasons: +// - small size (accounts ~70 B, proofs ~120 B): too little context, and zstd's frame overhead +// plus the 4-byte length header outweigh any gain; +// - high-entropy content: keccak code hashes and BLS signatures are effectively random, so there +// is no redundancy to exploit at any size. +// The threshold skips the first case cheaply; the "keep raw unless actually smaller" check on the +// result handles the second. Larger structured values (headers with a sparse logs bloom, +// ABI-padded calldata, receipt logs, bytecode) still compress and are kept only when it shrinks them. +const MIN_COMPRESS_LEN: usize = 256; + #[derive(Debug)] -pub struct CompressedBincode(pub T); -impl<'a, T: serde::Serialize + 'a> heed::BytesEncode<'a> for CompressedBincode { - type EItem = CompressedBincode<&'a T>; +pub struct CompactBincode(pub T); +impl<'a, T: serde::Serialize + 'a> heed::BytesEncode<'a> for CompactBincode { + type EItem = CompactBincode<&'a T>; fn bytes_encode(item: &'a Self::EItem) -> Result, heed::BoxedError> { let raw = bincode::serialize(&item.0)?; - let orig_len = raw.len(); - let compressed = zstd::bulk::compress(&raw, ZSTD_LEVEL)?; - let mut out = Vec::with_capacity(1 + 4 + compressed.len()); + if raw.len() >= MIN_COMPRESS_LEN { + let compressed = zstd::bulk::compress(&raw, ZSTD_LEVEL)?; - // [1 byte version][4 bytes orig_len LE][compressed...] - out.push(VERSION); - out.extend_from_slice(&(orig_len as u32).to_le_bytes()); - out.extend_from_slice(&compressed); + // The compressed layout carries an extra 4-byte original-length header; only keep it + // when the result is genuinely smaller than storing raw (both forms share the 1-byte + // tag, so it cancels out of the comparison). + if compressed.len() + 4 < raw.len() { + let mut out = Vec::with_capacity(1 + 4 + compressed.len()); + out.push(TAG_ZSTD); + out.extend_from_slice(&(raw.len() as u32).to_le_bytes()); + out.extend_from_slice(&compressed); + return Ok(Cow::Owned(out)); + } + } + // Store raw. A value is therefore never persisted larger than its bincode encoding plus the + // single tag byte. + let mut out = Vec::with_capacity(1 + raw.len()); + out.push(TAG_RAW); + out.extend_from_slice(&raw); Ok(Cow::Owned(out)) } } -impl<'a, T: serde::de::DeserializeOwned + 'a> heed::BytesDecode<'a> for CompressedBincode { - type DItem = CompressedBincode; +impl<'a, T: serde::de::DeserializeOwned + 'a> heed::BytesDecode<'a> for CompactBincode { + type DItem = CompactBincode; fn bytes_decode(bytes: &'_ [u8]) -> Result { - let version = bytes[0]; - assert_eq!(version, VERSION, "unsupported version"); - - let mut len_bytes = [0u8; 4]; - len_bytes.copy_from_slice(&bytes[1..5]); - let orig_len = u32::from_le_bytes(len_bytes) as usize; + let (&tag, payload) = bytes + .split_first() + .ok_or("CompressedBincode: empty value")?; - let payload = &bytes[5..]; - let decompressed = zstd::bulk::decompress(payload, orig_len)?; + let deserialized = match tag { + TAG_ZSTD => { + if payload.len() < 4 { + return Err("CompressedBincode: truncated zstd header".into()); + } + let (len_bytes, compressed) = payload.split_at(4); + let orig_len = u32::from_le_bytes(len_bytes.try_into().unwrap()) as usize; + let decompressed = zstd::bulk::decompress(compressed, orig_len)?; + bincode::deserialize(&decompressed)? + } + TAG_RAW => bincode::deserialize(payload)?, + other => return Err(format!("CompressedBincode: unknown tag {other}").into()), + }; - let deserialized = bincode::deserialize(&decompressed)?; - - Ok(CompressedBincode(deserialized)) + Ok(CompactBincode(deserialized)) } } -impl Deref for CompressedBincode { +impl Deref for CompactBincode { type Target = T; fn deref(&self) -> &Self::Target { &self.0 } } + +#[cfg(test)] +mod tests { + use heed::{BytesDecode, BytesEncode}; + + use super::*; + + fn encode(value: &Vec) -> Vec { + let item = CompactBincode(value); + > as BytesEncode>::bytes_encode(&item) + .unwrap() + .into_owned() + } + + fn decode(bytes: &[u8]) -> Vec { + > as BytesDecode>::bytes_decode(bytes) + .unwrap() + .0 + } + + #[test] + fn small_value_is_stored_raw() { + let value = vec![1u8, 2, 3, 4, 5]; + let encoded = encode(&value); + assert_eq!(encoded[0], TAG_RAW); + assert_eq!(decode(&encoded), value); + } + + #[test] + fn large_compressible_value_is_stored_zstd_and_smaller() { + let value = vec![7u8; 4096]; + let encoded = encode(&value); + assert_eq!(encoded[0], TAG_ZSTD); + assert!(encoded.len() < value.len()); + assert_eq!(decode(&encoded), value); + } + + #[test] + fn output_never_exceeds_raw_plus_tag() { + // A large, hard-to-compress value: compression is attempted but must fall back to raw + // rather than store something bigger. + let value: Vec = (0..2048u32) + .map(|i| (i.wrapping_mul(2_654_435_761) >> 13) as u8) + .collect(); + let raw_len = bincode::serialize(&value).unwrap().len(); + + let encoded = encode(&value); + assert!( + encoded.len() <= raw_len + 1, + "encoded {} raw {}", + encoded.len(), + raw_len + ); + assert_eq!(decode(&encoded), value); + } + + #[test] + fn empty_input_is_an_error_not_a_panic() { + assert!(> as BytesDecode>::bytes_decode(&[]).is_err()); + } + + #[test] + fn truncated_zstd_header_is_an_error_not_a_panic() { + // TAG_ZSTD with fewer than the four orig_len header bytes must error, not panic on the slice. + assert!(> as BytesDecode>::bytes_decode(&[TAG_ZSTD, 1, 2]).is_err()); + } + + #[test] + fn unknown_tag_is_an_error_not_a_panic() { + assert!(> as BytesDecode>::bytes_decode(&[9, 0, 0]).is_err()); + } +} diff --git a/packages/evm/core/src/db.rs b/packages/evm/core/src/db.rs index 78954494a..2be8dc0f5 100644 --- a/packages/evm/core/src/db.rs +++ b/packages/evm/core/src/db.rs @@ -22,7 +22,7 @@ use serde::{Deserialize, Serialize}; use crate::{ account::{AccountInfoExtended, StoredAccountInfo}, bytecode::StoredBytecode, - compression::CompressedBincode, + compression::CompactBincode, historical::{AccountHistory, HistoricalAccountData}, legacy::{LegacyAccountAttributes, LegacyAddress, LegacyColdWallet}, logger::{LogLevel, Logger}, @@ -151,19 +151,14 @@ pub(crate) struct CommitReceipts { } pub(crate) struct InnerStorage { - pub accounts: heed::Database>, + pub accounts: heed::Database>, pub accounts_history: Option< - heed::Database< - HeedBlockNumber, - CompressedBincode>, - >, + heed::Database>>, >, - pub commits: heed::Database>, - pub contracts: heed::Database>, - pub legacy_attributes: - heed::Database>, - pub legacy_cold_wallets: - heed::Database>, + pub commits: heed::Database>, + pub contracts: heed::Database>, + pub legacy_attributes: heed::Database>, + pub legacy_cold_wallets: heed::Database>, pub storage: heed::Database< AddressWrapper, StorageEntryWrapper, @@ -172,10 +167,10 @@ pub(crate) struct InnerStorage { >, // Carried over from previous database-service.ts lmdb backend pub state: heed::Database>, - pub proofs: heed::Database>, - pub blocks: heed::Database>, + pub proofs: heed::Database>, + pub blocks: heed::Database>, pub blocks_hash_number: heed::Database, - pub transactions: heed::Database>, + pub transactions: heed::Database>, pub transactions_hash_key: heed::Database>, // } @@ -374,36 +369,35 @@ impl PersistentDB { let tx_env = env.clone(); let mut wtxn = tx_env.write_txn()?; - let accounts = env - .create_database::>( - &mut wtxn, - Some("accounts"), - )?; + let accounts = env.create_database::>( + &mut wtxn, + Some("accounts"), + )?; let (accounts_history_db, accounts_history) = match opts.history_size { Some(history_size) if history_size > 0 => { - let db = env.create_database::>>(&mut wtxn, Some("accounts_history")) ?; (Some(db), Some(AccountHistory::new(history_size))) } _ => (None, None), }; - let commits = env.create_database::>( + let commits = env.create_database::>( &mut wtxn, Some("commits"), )?; - let contracts = env.create_database::>( + let contracts = env.create_database::>( &mut wtxn, Some("contracts"), )?; let legacy_attributes = env - .create_database::>( + .create_database::>( &mut wtxn, Some("legacy_attributes"), )?; let legacy_cold_wallets = env - .create_database::>( + .create_database::>( &mut wtxn, Some("legacy_cold_wallets"), )?; @@ -420,11 +414,11 @@ impl PersistentDB { &mut wtxn, Some("state"), )?; - let proofs = env.create_database::>( + let proofs = env.create_database::>( &mut wtxn, Some("proofs"), )?; - let blocks = env.create_database::>( + let blocks = env.create_database::>( &mut wtxn, Some("blocks"), )?; @@ -432,11 +426,10 @@ impl PersistentDB { &mut wtxn, Some("blocks_hash_number"), )?; - let transactions = env - .create_database::>( - &mut wtxn, - Some("transactions"), - )?; + let transactions = env.create_database::>( + &mut wtxn, + Some("transactions"), + )?; let transactions_hash_key = env .create_database::>( &mut wtxn, @@ -480,7 +473,7 @@ impl PersistentDB { inner.accounts.put( &mut wtxn, &AddressWrapper(genesis_info.account), - &CompressedBincode(&StoredAccountInfo::new( + &CompactBincode(&StoredAccountInfo::new( genesis_info.initial_supply, 0, KECCAK_EMPTY, @@ -863,7 +856,7 @@ impl PersistentDB { inner.accounts.put( rwtxn, &address, - &CompressedBincode(&StoredAccountInfo::new( + &CompactBincode(&StoredAccountInfo::new( account.balance, account.nonce, account.code_hash, @@ -893,11 +886,9 @@ impl PersistentDB { // Update legacy attributes for (address, legacy_attributes) in legacy_attributes.into_iter() { let address = AddressWrapper(*address); - inner.legacy_attributes.put( - rwtxn, - &address, - &CompressedBincode(legacy_attributes), - )?; + inner + .legacy_attributes + .put(rwtxn, &address, &CompactBincode(legacy_attributes))?; } // Update legacy cold wallets @@ -906,7 +897,7 @@ impl PersistentDB { inner.legacy_cold_wallets.put( rwtxn, &address, - &CompressedBincode(legacy_cold_wallets), + &CompactBincode(legacy_cold_wallets), )?; } @@ -915,7 +906,7 @@ impl PersistentDB { inner.contracts.put( rwtxn, &HashWrapper(*hash), - &CompressedBincode(&bytecode.clone().into()), + &CompactBincode(&bytecode.clone().into()), )?; } @@ -994,18 +985,16 @@ impl PersistentDB { assert!(legacy_cold_wallet.merge_info.is_none()); legacy_cold_wallet.merge_info.replace((legacy.0, *address)); - inner.legacy_cold_wallets.put( - rwtxn, - key, - &CompressedBincode(&legacy_cold_wallet), - )?; + inner + .legacy_cold_wallets + .put(rwtxn, key, &CompactBincode(&legacy_cold_wallet))?; // The legacy balance has already been applied to the `PendingCommit`, // thus only the legacy attributes need to be moved to a different storage. inner.legacy_attributes.put( rwtxn, &AddressWrapper(*address), - &CompressedBincode(&legacy_cold_wallet.legacy_attributes), + &CompactBincode(&legacy_cold_wallet.legacy_attributes), )?; } @@ -1019,15 +1008,13 @@ impl PersistentDB { } = commit_data; // Update blocks - inner - .blocks - .put(rwtxn, &key.0, &CompressedBincode(header))?; + inner.blocks.put(rwtxn, &key.0, &CompactBincode(header))?; inner .blocks_hash_number .put(rwtxn, &HashWrapper(header.hash), &key.0)?; // Update proofs - inner.proofs.put(rwtxn, &key.0, &CompressedBincode(proof))?; + inner.proofs.put(rwtxn, &key.0, &CompactBincode(proof))?; // Update transactions for (sequence, _) in transactions.iter().enumerate() { @@ -1043,7 +1030,7 @@ impl PersistentDB { inner.transactions.put( rwtxn, &StringWrapper(key), - &CompressedBincode(transaction), + &CompactBincode(transaction), )?; } @@ -1070,7 +1057,7 @@ impl PersistentDB { inner.commits.put( rwtxn, &key.0, - &CompressedBincode(&CommitReceipts { tx_receipts }), + &CompactBincode(&CommitReceipts { tx_receipts }), )?; Ok(()) @@ -1255,7 +1242,7 @@ mod tests { use crate::{ account::StoredAccountInfo, - compression::CompressedBincode, + compression::CompactBincode, db::{ AddressWrapper, BlockHeaderData, CommitData, CommitKey, CommitReceipts, HashWrapper, LegacyAddressWrapper, MAP_SIZE_UNIT, PendingCommit, PersistentDB, PersistentDBOptions, @@ -1680,7 +1667,7 @@ mod tests { .put( &mut wtxn, &AddressWrapper(*address), - &CompressedBincode(&StoredAccountInfo { + &CompactBincode(&StoredAccountInfo { balance: U256::from(index), nonce: index as u64, ..Default::default() @@ -1694,7 +1681,7 @@ mod tests { .put( &mut wtxn, &AddressWrapper(*address), - &CompressedBincode(&LegacyAccountAttributes::default()), + &CompactBincode(&LegacyAccountAttributes::default()), ) .unwrap(); } @@ -1751,7 +1738,7 @@ mod tests { .put( &mut wtxn, &LegacyAddressWrapper(legacy_address), - &CompressedBincode(&LegacyColdWallet { + &CompactBincode(&LegacyColdWallet { address: legacy_address, balance: U256::from(index), legacy_attributes: Default::default(), @@ -1828,7 +1815,7 @@ mod tests { .borrow_mut() .accounts_history .unwrap() - .put(&mut wtxn, &1, &CompressedBincode(&entries)) + .put(&mut wtxn, &1, &CompactBincode(&entries)) .unwrap(); wtxn.commit().unwrap(); @@ -1970,9 +1957,7 @@ mod tests { .put( &mut wtxn, &HashWrapper(hash), - &CompressedBincode( - &Bytecode::new_raw(Bytes::from_static(&[0, 1, 2, 3])).into(), - ), + &CompactBincode(&Bytecode::new_raw(Bytes::from_static(&[0, 1, 2, 3])).into()), ) .unwrap(); @@ -2028,7 +2013,7 @@ mod tests { .put( &mut wtxn, &1, - &CompressedBincode(&BlockHeaderData { + &CompactBincode(&BlockHeaderData { hash: b256!( "0000000000000000000000000000000000000000000000000000000000000001" ), @@ -2101,7 +2086,7 @@ mod tests { .put( &mut wtxn, &1, - &CompressedBincode(&CommitReceipts { + &CompactBincode(&CommitReceipts { tx_receipts, ..Default::default() }), @@ -2158,7 +2143,7 @@ mod tests { .put( &mut wtxn, &block_number, - &CompressedBincode(&CommitReceipts { + &CompactBincode(&CommitReceipts { tx_receipts: receipts, ..Default::default() }), @@ -2216,7 +2201,7 @@ mod tests { .put( &mut wtxn, &AddressWrapper(address), - &CompressedBincode(&LegacyAccountAttributes { + &CompactBincode(&LegacyAccountAttributes { legacy_nonce: Some(1234), second_public_key: Some("key".into()), multi_signature: None, @@ -2252,7 +2237,7 @@ mod tests { .put( &mut wtxn, &1, - &CompressedBincode(&BlockHeaderData { + &CompactBincode(&BlockHeaderData { hash: b256!( "0000000000000000000000000000000000000000000000000000000000000001" ), @@ -2292,7 +2277,7 @@ mod tests { .put( &mut wtxn, &255, - &CompressedBincode(&BlockHeaderData { + &CompactBincode(&BlockHeaderData { number: 255, hash: b256!( "0000000000000000000000000000000000000000000000000000000000000001" @@ -2346,7 +2331,7 @@ mod tests { .put( &mut wtxn, &1, - &CompressedBincode(&BlockHeaderData { + &CompactBincode(&BlockHeaderData { number: 1, hash, ..Default::default() @@ -2382,7 +2367,7 @@ mod tests { .put( &mut wtxn, &1, - &CompressedBincode(&ProofData { + &CompactBincode(&ProofData { round: 1, validator_set: 1234, ..Default::default() @@ -2421,7 +2406,7 @@ mod tests { .put( &mut wtxn, &StringWrapper(key.clone()), - &CompressedBincode(&TransactionData { + &CompactBincode(&TransactionData { tx_hash: hash, ..Default::default() }), diff --git a/packages/evm/core/src/historical.rs b/packages/evm/core/src/historical.rs index 3fbe087e4..f29408806 100644 --- a/packages/evm/core/src/historical.rs +++ b/packages/evm/core/src/historical.rs @@ -6,7 +6,7 @@ use revm::{ state::AccountInfo, }; -use crate::{compression::CompressedBincode, db::Error}; +use crate::{compression::CompactBincode, db::Error}; #[derive(Clone, Debug, Default, PartialEq, Eq, serde::Serialize, serde::Deserialize)] pub struct HistoricalAccountData { @@ -39,7 +39,7 @@ impl AccountHistory { txn: &mut RwTxn, database: &heed::Database< heed::types::U64, - CompressedBincode>, + CompactBincode>, >, block_number: u64, accounts: Vec<(Address, AccountInfo)>, @@ -58,7 +58,7 @@ impl AccountHistory { .map(|a| (a.0, HistoricalAccountData::from(a.1))) .collect::>(); - database.put(txn, &block_number, &CompressedBincode(&data))?; + database.put(txn, &block_number, &CompactBincode(&data))?; Ok(()) } @@ -68,7 +68,7 @@ impl AccountHistory { txn: &RoTxn, database: &heed::Database< heed::types::U64, - CompressedBincode>, + CompactBincode>, >, block_number: u64, address: &Address,