Skip to content

Commit 0a4ebab

Browse files
jensensclaude
andcommitted
Add decode_zodb_record_for_pg for single-pass PG optimization
New Rust function that combines ZODB record decode + persistent ref extraction + null-byte sanitization in one pass, eliminating two separate Python-level tree walks. Returns (class_mod, class_name, state_dict, refs_list) directly. - collect_refs_from_pickle_value: pure Rust ref collector on PickleValue - pickle_value_to_pyobject_pg: null-byte sanitization via @ns markers - 8 new Rust unit tests, 9 new Python integration tests - Decode path 2.5x faster (0.35ms → 0.14ms per 100 objects) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent cfe99ee commit 0a4ebab

4 files changed

Lines changed: 413 additions & 48 deletions

File tree

python/zodb_json_codec/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from zodb_json_codec._rust import (
44
decode_zodb_record,
5+
decode_zodb_record_for_pg,
56
encode_zodb_record,
67
pickle_to_dict,
78
pickle_to_json,
@@ -15,5 +16,6 @@
1516
"pickle_to_dict",
1617
"dict_to_pickle",
1718
"decode_zodb_record",
19+
"decode_zodb_record_for_pg",
1820
"encode_zodb_record",
1921
]

src/lib.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,42 @@ fn decode_zodb_record(py: Python<'_>, data: &[u8]) -> PyResult<Py<PyAny>> {
7474
Ok(dict.into_any().unbind())
7575
}
7676

77+
/// Decode a ZODB record for PostgreSQL JSONB storage.
78+
///
79+
/// Combines decode + ref extraction + null-byte sanitization in a single pass.
80+
/// Returns: `(class_mod: str, class_name: str, state: dict, refs: list[int])`
81+
///
82+
/// - `state` has null-byte strings replaced with `{"@ns": base64}` markers
83+
/// (PostgreSQL JSONB cannot store `\u0000`)
84+
/// - `refs` contains all persistent reference OIDs as integers (for the
85+
/// `refs` column used by pure-SQL pack)
86+
#[pyfunction]
87+
fn decode_zodb_record_for_pg(py: Python<'_>, data: &[u8]) -> PyResult<Py<PyAny>> {
88+
let (class_val, state_val) = decode_zodb_pickles(data).map_err(CodecError::from)?;
89+
let (module, name) = zodb::extract_class_info(&class_val);
90+
91+
// Collect persistent reference OIDs from the PickleValue tree
92+
let mut refs = Vec::new();
93+
pyconv::collect_refs_from_pickle_value(&state_val, &mut refs);
94+
95+
// BTree-aware state conversion with null-byte sanitization + ref compaction
96+
let state_obj = if let Some(info) = btrees::classify_btree(&module, &name) {
97+
pyconv::btree_state_to_pyobject_pg(py, &info, &state_val, true)?
98+
} else {
99+
pyconv::pickle_value_to_pyobject_pg(py, &state_val, true)?
100+
};
101+
102+
// Build result tuple: (class_mod, class_name, state, refs)
103+
let refs_list = PyList::new(py, &refs)?;
104+
let result = (
105+
module.into_pyobject(py)?,
106+
name.into_pyobject(py)?,
107+
state_obj.into_bound(py),
108+
refs_list.into_any(),
109+
);
110+
Ok(result.into_pyobject(py)?.into_any().unbind())
111+
}
112+
77113
/// Encode a ZODB JSON record back into two concatenated pickles.
78114
/// Uses the direct Py<PyAny> → pickle encoder, bypassing PickleValue allocations.
79115
#[pyfunction]
@@ -116,6 +152,7 @@ fn _rust(m: &Bound<'_, PyModule>) -> PyResult<()> {
116152
m.add_function(wrap_pyfunction!(pickle_to_dict, m)?)?;
117153
m.add_function(wrap_pyfunction!(dict_to_pickle, m)?)?;
118154
m.add_function(wrap_pyfunction!(decode_zodb_record, m)?)?;
155+
m.add_function(wrap_pyfunction!(decode_zodb_record_for_pg, m)?)?;
119156
m.add_function(wrap_pyfunction!(encode_zodb_record, m)?)?;
120157
Ok(())
121158
}

0 commit comments

Comments
 (0)