@@ -3,6 +3,9 @@ use crate::opcodes::*;
33use crate :: types:: PickleValue ;
44use num_bigint:: BigInt ;
55
6+ const MAX_MEMO_SIZE : usize = 100_000 ;
7+ const MAX_BINARY_SIZE : u64 = 256 * 1024 * 1024 ; // 256 MB
8+
69/// Decode pickle bytes into a PickleValue AST.
710///
811/// This implements a subset of the pickle virtual machine sufficient
@@ -99,6 +102,9 @@ impl<'a> Decoder<'a> {
99102 let line = self . read_line ( ) ?;
100103 let s = std:: str:: from_utf8 ( line) . map_err ( |_| CodecError :: InvalidUtf8 ) ?;
101104 let s = s. trim ( ) . trim_end_matches ( 'L' ) ;
105+ if s. len ( ) > 10_000 {
106+ return Err ( CodecError :: InvalidData ( "LONG value too large" . to_string ( ) ) ) ;
107+ }
102108 let val: BigInt = s
103109 . parse ( )
104110 . map_err ( |e| CodecError :: InvalidData ( format ! ( "LONG parse: {e}" ) ) ) ?;
@@ -120,7 +126,11 @@ impl<'a> Decoder<'a> {
120126 }
121127 }
122128 LONG4 => {
123- let n = self . read_i32 ( ) ? as usize ;
129+ let n = self . read_i32 ( ) ?;
130+ if n < 0 {
131+ return Err ( CodecError :: InvalidData ( "negative length in LONG4" . to_string ( ) ) ) ;
132+ }
133+ let n = n as usize ;
124134 let bytes = self . read_bytes ( n) ?;
125135 let val = BigInt :: from_signed_bytes_le ( bytes) ;
126136 if let Ok ( v) = i64:: try_from ( & val) {
@@ -148,7 +158,11 @@ impl<'a> Decoder<'a> {
148158
149159 // -- Strings (Python 2 str / bytes) --
150160 BINSTRING => {
151- let n = self . read_i32 ( ) ? as usize ;
161+ let n = self . read_i32 ( ) ?;
162+ if n < 0 {
163+ return Err ( CodecError :: InvalidData ( "negative length in BINSTRING" . to_string ( ) ) ) ;
164+ }
165+ let n = n as usize ;
152166 let bytes = self . read_bytes ( n) ?. to_vec ( ) ;
153167 self . push ( PickleValue :: Bytes ( bytes) ) ;
154168 }
@@ -193,7 +207,11 @@ impl<'a> Decoder<'a> {
193207 self . push ( PickleValue :: String ( s. to_string ( ) ) ) ;
194208 }
195209 BINUNICODE8 => {
196- let n = self . read_u64 ( ) ? as usize ;
210+ let n = self . read_u64 ( ) ?;
211+ if n > MAX_BINARY_SIZE {
212+ return Err ( CodecError :: InvalidData ( "BINUNICODE8 data too large" . to_string ( ) ) ) ;
213+ }
214+ let n = n as usize ;
197215 let bytes = self . read_bytes ( n) ?;
198216 let s =
199217 std:: str:: from_utf8 ( bytes) . map_err ( |_| CodecError :: InvalidUtf8 ) ?;
@@ -212,7 +230,11 @@ impl<'a> Decoder<'a> {
212230 self . push ( PickleValue :: Bytes ( bytes) ) ;
213231 }
214232 BINBYTES8 => {
215- let n = self . read_u64 ( ) ? as usize ;
233+ let n = self . read_u64 ( ) ?;
234+ if n > MAX_BINARY_SIZE {
235+ return Err ( CodecError :: InvalidData ( "BINBYTES8 data too large" . to_string ( ) ) ) ;
236+ }
237+ let n = n as usize ;
216238 let bytes = self . read_bytes ( n) ?. to_vec ( ) ;
217239 self . push ( PickleValue :: Bytes ( bytes) ) ;
218240 }
@@ -555,17 +577,17 @@ impl<'a> Decoder<'a> {
555577 BINPUT => {
556578 let idx = self . read_u8 ( ) ? as usize ;
557579 let val = self . peek_value ( ) ?. clone ( ) ;
558- self . memo_put ( idx, val) ;
580+ self . memo_put ( idx, val) ? ;
559581 }
560582 LONG_BINPUT => {
561583 let idx = self . read_u32 ( ) ? as usize ;
562584 let val = self . peek_value ( ) ?. clone ( ) ;
563- self . memo_put ( idx, val) ;
585+ self . memo_put ( idx, val) ? ;
564586 }
565587 MEMOIZE => {
566588 let val = self . peek_value ( ) ?. clone ( ) ;
567589 let idx = self . memo . len ( ) ;
568- self . memo_put ( idx, val) ;
590+ self . memo_put ( idx, val) ? ;
569591 }
570592 BINGET => {
571593 let idx = self . read_u8 ( ) ? as usize ;
@@ -585,7 +607,7 @@ impl<'a> Decoder<'a> {
585607 . parse ( )
586608 . map_err ( |e| CodecError :: InvalidData ( format ! ( "PUT index: {e}" ) ) ) ?;
587609 let val = self . peek_value ( ) ?. clone ( ) ;
588- self . memo_put ( idx, val) ;
610+ self . memo_put ( idx, val) ? ;
589611 }
590612 GET => {
591613 let line = self . read_line ( ) ?;
@@ -705,11 +727,15 @@ impl<'a> Decoder<'a> {
705727
706728 // -- Memo operations --
707729
708- fn memo_put ( & mut self , idx : usize , val : PickleValue ) {
730+ fn memo_put ( & mut self , idx : usize , val : PickleValue ) -> Result < ( ) , CodecError > {
731+ if idx >= MAX_MEMO_SIZE {
732+ return Err ( CodecError :: InvalidData ( format ! ( "memo index {idx} exceeds maximum {MAX_MEMO_SIZE}" ) ) ) ;
733+ }
709734 if idx >= self . memo . len ( ) {
710735 self . memo . resize ( idx + 1 , PickleValue :: None ) ;
711736 }
712737 self . memo [ idx] = val;
738+ Ok ( ( ) )
713739 }
714740
715741 fn memo_get ( & self , idx : usize ) -> Result < PickleValue , CodecError > {
@@ -881,4 +907,61 @@ mod tests {
881907 panic ! ( "expected Tuple, got {:?}" , result) ;
882908 }
883909 }
910+
911+ #[ test]
912+ fn test_long4_negative_length ( ) {
913+ // PROTO 2, LONG4 with length=-1 (0xFFFFFFFF as i32)
914+ let data = b"\x80 \x02 \x8b \xff \xff \xff \xff " ;
915+ let err = decode_pickle ( data) . unwrap_err ( ) ;
916+ assert ! ( err. to_string( ) . contains( "negative length" ) ) ;
917+ }
918+
919+ #[ test]
920+ fn test_binstring_negative_length ( ) {
921+ // PROTO 2, BINSTRING with length=-1
922+ let data = b"\x80 \x02 T\xff \xff \xff \xff " ;
923+ let err = decode_pickle ( data) . unwrap_err ( ) ;
924+ assert ! ( err. to_string( ) . contains( "negative length" ) ) ;
925+ }
926+
927+ #[ test]
928+ fn test_memo_index_too_large ( ) {
929+ // PROTO 2, NONE, LONG_BINPUT with index=4_000_000_000
930+ let idx_bytes = 4_000_000_000u32 . to_le_bytes ( ) ;
931+ let mut data = vec ! [ 0x80 , 0x02 , b'N' , b'r' ] ;
932+ data. extend_from_slice ( & idx_bytes) ;
933+ let err = decode_pickle ( & data) . unwrap_err ( ) ;
934+ assert ! ( err. to_string( ) . contains( "memo index" ) ) ;
935+ }
936+
937+ #[ test]
938+ fn test_long_value_too_large ( ) {
939+ // PROTO 2, LONG with huge text representation
940+ let mut data = vec ! [ 0x80 , 0x02 , b'L' ] ;
941+ data. extend_from_slice ( & vec ! [ b'9' ; 20_000 ] ) ;
942+ data. push ( b'\n' ) ;
943+ data. push ( b'.' ) ;
944+ let err = decode_pickle ( & data) . unwrap_err ( ) ;
945+ assert ! ( err. to_string( ) . contains( "too large" ) ) ;
946+ }
947+
948+ #[ test]
949+ fn test_binunicode8_too_large ( ) {
950+ // PROTO 4, BINUNICODE8 with huge length
951+ let mut data = vec ! [ 0x80 , 0x04 ] ;
952+ data. push ( 0x8d ) ; // BINUNICODE8
953+ data. extend_from_slice ( & ( 1u64 << 40 ) . to_le_bytes ( ) ) ; // 1 TB
954+ let err = decode_pickle ( & data) . unwrap_err ( ) ;
955+ assert ! ( err. to_string( ) . contains( "too large" ) ) ;
956+ }
957+
958+ #[ test]
959+ fn test_binbytes8_too_large ( ) {
960+ // PROTO 4, BINBYTES8 with huge length
961+ let mut data = vec ! [ 0x80 , 0x04 ] ;
962+ data. push ( 0x8e ) ; // BINBYTES8
963+ data. extend_from_slice ( & ( 1u64 << 40 ) . to_le_bytes ( ) ) ; // 1 TB
964+ let err = decode_pickle ( & data) . unwrap_err ( ) ;
965+ assert ! ( err. to_string( ) . contains( "too large" ) ) ;
966+ }
884967}
0 commit comments