@@ -35,6 +35,13 @@ struct Decoder<'a> {
3535 memo : Vec < PickleValue > ,
3636 /// Metastack for MARK-based operations (saves/restores stack at MARK)
3737 metastack : Vec < Vec < PickleValue > > ,
38+ /// Tracks which memo indices are bound to each stack slot (parallel to stack).
39+ /// When BINPUT stores from stack top, the memo index is recorded here.
40+ /// After in-place mutations (SETITEMS, APPENDS, etc.), stale memo entries
41+ /// are updated via sync_memo_top().
42+ stack_memo : Vec < Vec < usize > > ,
43+ /// Saved stack_memo during MARK (parallel to metastack).
44+ meta_stack_memo : Vec < Vec < Vec < usize > > > ,
3845}
3946
4047impl < ' a > Decoder < ' a > {
@@ -45,6 +52,8 @@ impl<'a> Decoder<'a> {
4552 stack : Vec :: with_capacity ( 16 ) ,
4653 memo : Vec :: with_capacity ( 16 ) ,
4754 metastack : Vec :: with_capacity ( 4 ) ,
55+ stack_memo : Vec :: with_capacity ( 16 ) ,
56+ meta_stack_memo : Vec :: with_capacity ( 4 ) ,
4857 }
4958 }
5059
@@ -244,6 +253,8 @@ impl<'a> Decoder<'a> {
244253 // Save current stack, start a new one
245254 let old_stack = std:: mem:: take ( & mut self . stack ) ;
246255 self . metastack . push ( old_stack) ;
256+ let old_sm = std:: mem:: take ( & mut self . stack_memo ) ;
257+ self . meta_stack_memo . push ( old_sm) ;
247258 // Don't push Mark itself; everything above the mark
248259 // is captured by the current stack being empty
249260 }
@@ -299,6 +310,7 @@ impl<'a> Decoder<'a> {
299310 ) ) ;
300311 }
301312 }
313+ self . sync_memo_top ( ) ?;
302314 }
303315 APPENDS => {
304316 let items = self . pop_mark ( ) ?;
@@ -323,6 +335,7 @@ impl<'a> Decoder<'a> {
323335 ) ) ;
324336 }
325337 }
338+ self . sync_memo_top ( ) ?;
326339 }
327340
328341 // -- Dict --
@@ -356,6 +369,7 @@ impl<'a> Decoder<'a> {
356369 ) ) ;
357370 }
358371 }
372+ self . sync_memo_top ( ) ?;
359373 }
360374 SETITEMS => {
361375 let items = self . pop_mark ( ) ?;
@@ -381,6 +395,7 @@ impl<'a> Decoder<'a> {
381395 ) ) ;
382396 }
383397 }
398+ self . sync_memo_top ( ) ?;
384399 }
385400
386401 // -- Set/FrozenSet (protocol 4) --
@@ -395,6 +410,7 @@ impl<'a> Decoder<'a> {
395410 "ADDITEMS on non-set" . to_string ( ) ,
396411 ) ) ;
397412 }
413+ self . sync_memo_top ( ) ?;
398414 }
399415 FROZENSET => {
400416 let items = self . pop_mark ( ) ?;
@@ -493,9 +509,9 @@ impl<'a> Decoder<'a> {
493509 }
494510 BUILD => {
495511 let state = self . pop_value ( ) ?;
496- let obj = self . pop_value ( ) ? ;
497- // Save pre-BUILD value so we can update stale memo entries.
498- let old_obj = obj . clone ( ) ;
512+ // Pop object and its memo bindings (so we can transfer them)
513+ let obj_bindings = self . stack_memo . pop ( ) . unwrap_or_default ( ) ;
514+ let obj = self . stack . pop ( ) . ok_or ( CodecError :: StackUnderflow ) ? ;
499515 match obj {
500516 PickleValue :: Global { module, name } => {
501517 self . push ( PickleValue :: Instance ( Box :: new ( InstanceData {
@@ -588,15 +604,12 @@ impl<'a> Decoder<'a> {
588604 } ) ) ) ;
589605 }
590606 }
591- // Update memo: replace stale pre-BUILD entries with the
592- // new post-BUILD value so BINGET returns the correct form.
593- // Move new_val on first match (typically only one entry).
594- let new_val = self . peek_value ( ) ?. clone ( ) ;
595- for entry in self . memo . iter_mut ( ) {
596- if * entry == old_obj {
597- * entry = new_val. clone ( ) ;
598- }
607+ // Transfer memo bindings from the old object to the new
608+ // stack top and update memo entries.
609+ if let Some ( top_bindings) = self . stack_memo . last_mut ( ) {
610+ top_bindings. extend ( obj_bindings) ;
599611 }
612+ self . sync_memo_top ( ) ?;
600613 }
601614 NEWOBJ => {
602615 let args = self . pop_value ( ) ?;
@@ -645,16 +658,19 @@ impl<'a> Decoder<'a> {
645658 let idx = self . read_u8 ( ) ? as usize ;
646659 let val = self . peek_value ( ) ?. clone ( ) ;
647660 self . memo_put ( idx, val) ?;
661+ self . record_memo_binding ( idx) ;
648662 }
649663 LONG_BINPUT => {
650664 let idx = self . read_u32 ( ) ? as usize ;
651665 let val = self . peek_value ( ) ?. clone ( ) ;
652666 self . memo_put ( idx, val) ?;
667+ self . record_memo_binding ( idx) ;
653668 }
654669 MEMOIZE => {
655670 let val = self . peek_value ( ) ?. clone ( ) ;
656671 let idx = self . memo . len ( ) ;
657672 self . memo_put ( idx, val) ?;
673+ self . record_memo_binding ( idx) ;
658674 }
659675 BINGET => {
660676 let idx = self . read_u8 ( ) ? as usize ;
@@ -675,6 +691,7 @@ impl<'a> Decoder<'a> {
675691 . map_err ( |e| CodecError :: InvalidData ( format ! ( "PUT index: {e}" ) ) ) ?;
676692 let val = self . peek_value ( ) ?. clone ( ) ;
677693 self . memo_put ( idx, val) ?;
694+ self . record_memo_binding ( idx) ;
678695 }
679696 GET => {
680697 let line = self . read_line ( ) ?;
@@ -761,10 +778,12 @@ impl<'a> Decoder<'a> {
761778 #[ inline]
762779 fn push ( & mut self , val : PickleValue ) {
763780 self . stack . push ( val) ;
781+ self . stack_memo . push ( Vec :: new ( ) ) ;
764782 }
765783
766784 #[ inline]
767785 fn pop_value ( & mut self ) -> Result < PickleValue , CodecError > {
786+ self . stack_memo . pop ( ) ;
768787 self . stack . pop ( ) . ok_or ( CodecError :: StackUnderflow )
769788 }
770789
@@ -784,10 +803,16 @@ impl<'a> Decoder<'a> {
784803 // This is a pointer swap — no element-by-element drain needed.
785804 let items = std:: mem:: take ( & mut self . stack ) ;
786805
806+ // Also discard the memo bindings for popped items
807+ self . stack_memo . clear ( ) ;
808+
787809 // Restore the previous stack from metastack
788810 if let Some ( old_stack) = self . metastack . pop ( ) {
789811 self . stack = old_stack;
790812 }
813+ if let Some ( old_sm) = self . meta_stack_memo . pop ( ) {
814+ self . stack_memo = old_sm;
815+ }
791816
792817 Ok ( items)
793818 }
@@ -811,6 +836,31 @@ impl<'a> Decoder<'a> {
811836 . cloned ( )
812837 . ok_or_else ( || CodecError :: InvalidData ( format ! ( "memo index {idx} not found" ) ) )
813838 }
839+
840+ /// Record that the current stack top was stored in memo at `idx`.
841+ #[ inline]
842+ fn record_memo_binding ( & mut self , idx : usize ) {
843+ if let Some ( bindings) = self . stack_memo . last_mut ( ) {
844+ bindings. push ( idx) ;
845+ }
846+ }
847+
848+ /// After an in-place mutation of the stack top (SETITEMS, APPENDS, etc.),
849+ /// update any memo entries that were cloned from the pre-mutation state.
850+ fn sync_memo_top ( & mut self ) -> Result < ( ) , CodecError > {
851+ if let Some ( bindings) = self . stack_memo . last ( ) {
852+ if !bindings. is_empty ( ) {
853+ let new_val = self . stack . last ( )
854+ . ok_or ( CodecError :: StackUnderflow ) ?. clone ( ) ;
855+ for & idx in bindings {
856+ if idx < self . memo . len ( ) {
857+ self . memo [ idx] = new_val. clone ( ) ;
858+ }
859+ }
860+ }
861+ }
862+ Ok ( ( ) )
863+ }
814864}
815865
816866/// Convert a flat list [k1, v1, k2, v2, ...] into pairs [(k1, v1), (k2, v2), ...].
@@ -977,6 +1027,97 @@ mod tests {
9771027 }
9781028 }
9791029
1030+ #[ test]
1031+ fn test_memo_updated_after_setitems ( ) {
1032+ // Reproduces issue #18: EMPTY_DICT + BINPUT + SETITEMS leaves stale
1033+ // empty dict in memo. When the same dict is referenced twice (once in
1034+ // a list, once as a dict value), the second reference via BINGET returns
1035+ // the empty snapshot.
1036+ //
1037+ // Pickle VM sequence:
1038+ // PROTO 3
1039+ // EMPTY_DICT -> outer dict
1040+ // MARK
1041+ // BINUNICODE "a"
1042+ // EMPTY_DICT -> inner dict (shared)
1043+ // BINPUT 0 -> memo[0] = empty clone
1044+ // MARK
1045+ // BINUNICODE "x"
1046+ // BININT1 1
1047+ // SETITEMS -> inner dict = {"x": 1}
1048+ // BINUNICODE "b"
1049+ // BINGET 0 -> should be {"x": 1} (was {} before fix)
1050+ // SETITEMS -> outer = {"a": {"x": 1}, "b": {"x": 1}}
1051+ // STOP
1052+ let data: & [ u8 ] = & [
1053+ 0x80 , 0x03 , // PROTO 3
1054+ b'}' , // EMPTY_DICT (outer)
1055+ b'(' , // MARK
1056+ b'X' , 0x01 , 0x00 , 0x00 , 0x00 , b'a' , // BINUNICODE "a"
1057+ b'}' , // EMPTY_DICT (inner)
1058+ b'q' , 0x00 , // BINPUT 0
1059+ b'(' , // MARK
1060+ b'X' , 0x01 , 0x00 , 0x00 , 0x00 , b'x' , // BINUNICODE "x"
1061+ b'K' , 0x01 , // BININT1 1
1062+ b'u' , // SETITEMS (fills inner)
1063+ b'X' , 0x01 , 0x00 , 0x00 , 0x00 , b'b' , // BINUNICODE "b"
1064+ b'h' , 0x00 , // BINGET 0
1065+ b'u' , // SETITEMS (fills outer)
1066+ b'.' , // STOP
1067+ ] ;
1068+ let result = decode_pickle ( data) . unwrap ( ) ;
1069+
1070+ let inner = PickleValue :: Dict ( vec ! [ (
1071+ PickleValue :: String ( "x" . to_string( ) ) ,
1072+ PickleValue :: Int ( 1 ) ,
1073+ ) ] ) ;
1074+ let expected = PickleValue :: Dict ( vec ! [
1075+ ( PickleValue :: String ( "a" . to_string( ) ) , inner. clone( ) ) ,
1076+ ( PickleValue :: String ( "b" . to_string( ) ) , inner) ,
1077+ ] ) ;
1078+ assert_eq ! (
1079+ result, expected,
1080+ "BINGET after SETITEMS should return populated dict, not empty"
1081+ ) ;
1082+ }
1083+
1084+ #[ test]
1085+ fn test_memo_updated_after_appends ( ) {
1086+ // Same pattern but for lists: EMPTY_LIST + BINPUT + APPENDS.
1087+ //
1088+ // Pickle VM sequence:
1089+ // PROTO 3
1090+ // EMPTY_LIST -> inner list (shared)
1091+ // BINPUT 0 -> memo[0] = empty clone
1092+ // MARK
1093+ // BININT1 1
1094+ // BININT1 2
1095+ // APPENDS -> inner list = [1, 2]
1096+ // BINGET 0 -> should be [1, 2]
1097+ // TUPLE2 -> ([1,2], [1,2])
1098+ // STOP
1099+ let data: & [ u8 ] = & [
1100+ 0x80 , 0x03 , // PROTO 3
1101+ b']' , // EMPTY_LIST
1102+ b'q' , 0x00 , // BINPUT 0
1103+ b'(' , // MARK
1104+ b'K' , 0x01 , // BININT1 1
1105+ b'K' , 0x02 , // BININT1 2
1106+ b'e' , // APPENDS
1107+ b'h' , 0x00 , // BINGET 0
1108+ 0x86 , // TUPLE2
1109+ b'.' , // STOP
1110+ ] ;
1111+ let result = decode_pickle ( data) . unwrap ( ) ;
1112+
1113+ let inner = PickleValue :: List ( vec ! [ PickleValue :: Int ( 1 ) , PickleValue :: Int ( 2 ) ] ) ;
1114+ let expected = PickleValue :: Tuple ( vec ! [ inner. clone( ) , inner] ) ;
1115+ assert_eq ! (
1116+ result, expected,
1117+ "BINGET after APPENDS should return populated list, not empty"
1118+ ) ;
1119+ }
1120+
9801121 #[ test]
9811122 fn test_long4_negative_length ( ) {
9821123 // PROTO 2, LONG4 with length=-1 (0xFFFFFFFF as i32)
0 commit comments