Skip to content

Commit b230920

Browse files
authored
Merge pull request #2 from webcloud7/mle-multiple-instances-different-places
Fix shared reference data loss: update memo after BUILD
2 parents 5e8e753 + 38f2a77 commit b230920

3 files changed

Lines changed: 157 additions & 15 deletions

File tree

CHANGES.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
## 1.2.1 (unreleased)
44

5-
- nothing yet
5+
- Fix shared reference data loss: update memo after BUILD
66

77
## 1.2.0 (2026-02-10)
88

src/decode.rs

Lines changed: 86 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,13 @@ impl<'a> Decoder<'a> {
414414
BUILD => {
415415
let state = self.pop_value()?;
416416
let obj = self.pop_value()?;
417+
// Save pre-BUILD value so we can update stale memo entries.
418+
// BINPUT clones the stack top into memo *before* BUILD runs,
419+
// so memo entries still reference the old (e.g. Reduce) value.
420+
// After BUILD transforms it (e.g. to Instance), we must
421+
// propagate the change to memo — mirroring how CPython's
422+
// pickle VM uses object identity (shared references).
423+
let old_obj = obj.clone();
417424
match obj {
418425
PickleValue::Global { module, name } => {
419426
self.push(PickleValue::Instance {
@@ -495,24 +502,24 @@ impl<'a> Decoder<'a> {
495502
});
496503
}
497504
}
505+
// Update memo: replace stale pre-BUILD entries with the
506+
// new post-BUILD value so BINGET returns the correct form.
507+
let new_val = self.peek_value()?.clone();
508+
if old_obj != new_val {
509+
for entry in self.memo.iter_mut() {
510+
if *entry == old_obj {
511+
*entry = new_val.clone();
512+
}
513+
}
514+
}
498515
}
499516
NEWOBJ => {
500517
let args = self.pop_value()?;
501518
let cls = self.pop_value()?;
502-
match cls {
503-
PickleValue::Global { module, name } => {
504-
self.push(PickleValue::Reduce {
505-
callable: Box::new(PickleValue::Global { module, name }),
506-
args: Box::new(args),
507-
});
508-
}
509-
_ => {
510-
self.push(PickleValue::Reduce {
511-
callable: Box::new(cls),
512-
args: Box::new(args),
513-
});
514-
}
515-
}
519+
self.push(PickleValue::Reduce {
520+
callable: Box::new(cls),
521+
args: Box::new(args),
522+
});
516523
}
517524
NEWOBJ_EX => {
518525
let kwargs = self.pop_value()?;
@@ -809,4 +816,69 @@ mod tests {
809816
)])
810817
);
811818
}
819+
820+
#[test]
821+
fn test_memo_updated_after_build() {
822+
// Reproduces the shared-reference bug: NEWOBJ + BINPUT creates a
823+
// Reduce in memo, BUILD transforms the stack top to Instance, but
824+
// memo was stale. After the fix, BINGET should return Instance.
825+
//
826+
// Pickle VM sequence:
827+
// PROTO 3
828+
// GLOBAL "mymod\nMyClass\n"
829+
// EMPTY_TUPLE
830+
// NEWOBJ -> Reduce{Global{mymod,MyClass}, ()}
831+
// BINPUT 0 -> memo[0] = clone of Reduce
832+
// EMPTY_DICT
833+
// SHORT_BINUNICODE "name"
834+
// SHORT_BINUNICODE "test"
835+
// SETITEM -> {"name": "test"}
836+
// BUILD -> Instance{mymod, MyClass, {"name": "test"}}
837+
// BINGET 0 -> should be Instance (was stale Reduce before fix)
838+
// TUPLE2 -> (Instance_from_build, memo_copy)
839+
// STOP
840+
let data: &[u8] = &[
841+
0x80, 0x03, // PROTO 3
842+
b'c', b'm', b'y', b'm', b'o', b'd', b'\n', // GLOBAL module="mymod"
843+
b'M', b'y', b'C', b'l', b's', b'\n', // GLOBAL name="MyCls"
844+
b')', // EMPTY_TUPLE
845+
0x81, // NEWOBJ
846+
b'q', 0x00, // BINPUT 0
847+
b'}', // EMPTY_DICT
848+
0x8c, 0x04, b'n', b'a', b'm', b'e', // SHORT_BINUNICODE "name"
849+
0x8c, 0x04, b't', b'e', b's', b't', // SHORT_BINUNICODE "test"
850+
b's', // SETITEM
851+
b'b', // BUILD
852+
b'h', 0x00, // BINGET 0
853+
0x86, // TUPLE2
854+
b'.', // STOP
855+
];
856+
let result = decode_pickle(data).unwrap();
857+
858+
// Result should be a tuple of two elements
859+
if let PickleValue::Tuple(items) = &result {
860+
assert_eq!(items.len(), 2, "expected 2-tuple");
861+
862+
let expected = PickleValue::Instance {
863+
module: "mymod".to_string(),
864+
name: "MyCls".to_string(),
865+
state: Box::new(PickleValue::Dict(vec![(
866+
PickleValue::String("name".to_string()),
867+
PickleValue::String("test".to_string()),
868+
)])),
869+
};
870+
871+
// First element: the Instance from BUILD (stack top)
872+
assert_eq!(items[0], expected, "BUILD result should be Instance");
873+
874+
// Second element: the memo copy from BINGET — must also be
875+
// Instance after the fix (was stale Reduce before)
876+
assert_eq!(
877+
items[1], expected,
878+
"BINGET after BUILD should return updated Instance, not stale Reduce"
879+
);
880+
} else {
881+
panic!("expected Tuple, got {:?}", result);
882+
}
883+
}
812884
}

tests/test_zodb_records.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,76 @@ def test_datetime_in_state(self, zodb):
301301
assert result == result2
302302

303303

304+
class SharedRefHelper:
305+
"""Non-Persistent helper class for testing shared-reference pickling.
306+
307+
When the same instance appears in two dict values, pickle uses a memo
308+
backreference (BINGET) for the second occurrence. The codec must preserve
309+
state for both occurrences.
310+
"""
311+
312+
def __init__(self, name="<undefined>"):
313+
self.name = name
314+
315+
316+
class TestSharedReferences:
317+
"""Test that objects referenced multiple times via pickle memo preserve state."""
318+
319+
def test_shared_object_in_dict_values(self):
320+
"""Same object in two dict values: both should have full state."""
321+
obj = SharedRefHelper("acl_users")
322+
state = {"first": obj, "second": obj}
323+
data = pickle.dumps(state, protocol=3)
324+
325+
result = zodb_json_codec.pickle_to_dict(data)
326+
327+
first = result["first"]
328+
second = result["second"]
329+
330+
assert "@cls" in first, (
331+
"first occurrence should be Instance (@cls), got: %s" % first
332+
)
333+
assert "@cls" in second, (
334+
"second occurrence should be Instance (@cls), not @reduce. "
335+
"This fails when memo is not updated after BUILD. Got: %s" % second
336+
)
337+
assert first["@s"]["name"] == "acl_users"
338+
assert second["@s"]["name"] == "acl_users"
339+
340+
def test_shared_object_roundtrip(self):
341+
"""Shared reference round-trips through JSON correctly."""
342+
obj = SharedRefHelper("test_hook")
343+
state = {"a": obj, "b": obj}
344+
data = pickle.dumps(state, protocol=3)
345+
346+
json_str = zodb_json_codec.pickle_to_json(data)
347+
parsed = json.loads(json_str)
348+
349+
assert parsed["a"]["@cls"] == [__name__, "SharedRefHelper"]
350+
assert parsed["b"]["@cls"] == [__name__, "SharedRefHelper"]
351+
assert parsed["a"]["@s"]["name"] == "test_hook"
352+
assert parsed["b"]["@s"]["name"] == "test_hook"
353+
354+
def test_shared_object_in_zodb_record(self):
355+
"""Shared references inside a ZODB record state are preserved."""
356+
obj = SharedRefHelper("hook_name")
357+
record = make_zodb_record(
358+
"myapp", "Container",
359+
{"hook_a": obj, "hook_b": obj},
360+
)
361+
result = zodb_json_codec.decode_zodb_record(record)
362+
363+
hook_a = result["@s"]["hook_a"]
364+
hook_b = result["@s"]["hook_b"]
365+
366+
assert "@cls" in hook_a, "hook_a should be Instance"
367+
assert "@cls" in hook_b, (
368+
"hook_b (shared ref) should be Instance, not @reduce"
369+
)
370+
assert hook_a["@s"]["name"] == "hook_name"
371+
assert hook_b["@s"]["name"] == "hook_name"
372+
373+
304374
class TestDecodeZodbRecordForPg:
305375
"""Test decode_zodb_record_for_pg: single-pass decode + refs + null sanitize."""
306376

0 commit comments

Comments
 (0)