From 6e8563d6594a718054aa64066ea1d5e315bb2557 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 9 Sep 2025 15:27:13 +0800 Subject: [PATCH 01/20] implement meta share mode for pyfory --- python/pyfory/_fory.py | 57 ++++- python/pyfory/_registry.py | 98 ++++++++ python/pyfory/_serialization.pyx | 306 ++++++++++++++++++++++++- python/pyfory/tests/test_meta_share.py | 165 +++++++++++++ 4 files changed, 615 insertions(+), 11 deletions(-) create mode 100644 python/pyfory/tests/test_meta_share.py diff --git a/python/pyfory/_fory.py b/python/pyfory/_fory.py index 6bc87ba1a9..478aaa6909 100644 --- a/python/pyfory/_fory.py +++ b/python/pyfory/_fory.py @@ -120,6 +120,7 @@ def __init__( language=Language.PYTHON, ref_tracking: bool = False, require_type_registration: bool = True, + meta_share: bool = False, ): """ :param require_type_registration: @@ -130,6 +131,10 @@ def __init__( Do not disable type registration if you can't ensure your environment are *indeed secure*. We are not responsible for security risks if you disable this option. + :param meta_share: + Whether to enable meta share mode for cross-language serialization. + When enabled, type definitions will be shared between serialization calls + to reduce overhead for repeated types. """ self.language = language self.is_py = language == Language.PYTHON @@ -145,7 +150,7 @@ def __init__( self.metastring_resolver = MetaStringResolver() self.type_resolver = TypeResolver(self) self.type_resolver.initialize() - self.serialization_context = SerializationContext() + self.serialization_context = SerializationContext(scoped_meta_share_enabled=meta_share) self.buffer = Buffer.allocate(32) if not require_type_registration: warnings.warn( @@ -470,7 +475,7 @@ def read_ref_pyobject(self, buffer): def reset_write(self): self.ref_resolver.reset_write() self.type_resolver.reset_write() - self.serialization_context.reset() + self.serialization_context.reset_write() self.metastring_resolver.reset_write() self.pickler.clear_memo() self._buffer_callback = None @@ -479,7 +484,7 @@ def reset_write(self): def reset_read(self): self.ref_resolver.reset_read() self.type_resolver.reset_read() - self.serialization_context.reset() + self.serialization_context.reset_read() self.metastring_resolver.reset_write() self.unpickler = None self._buffers = None @@ -498,10 +503,16 @@ class SerializationContext: object tree. """ - __slots__ = ("objects",) + __slots__ = ("objects", "meta_context", "scoped_meta_share_enabled") - def __init__(self): + def __init__(self, scoped_meta_share_enabled: bool = False): self.objects = dict() + self.scoped_meta_share_enabled = scoped_meta_share_enabled + if scoped_meta_share_enabled: + from pyfory._serialization import MetaContext + self.meta_context = MetaContext() + else: + self.meta_context = None def add(self, key, obj): self.objects[id(key)] = obj @@ -515,9 +526,45 @@ def __getitem__(self, key): def get(self, key): return self.objects.get(id(key)) + def get_meta_context(self): + """Get the meta context for meta share mode.""" + return self.meta_context + + def set_meta_context(self, meta_context): + """ + Set meta context, which can be used to share data across multiple serialization call. + Note that meta_context will be cleared after the serialization is finished. + Please set the context before every serialization if metaShare is enabled. + """ + assert not self.scoped_meta_share_enabled, "Cannot set meta context when scoped meta share is enabled" + self.meta_context = meta_context + + def reset_write(self): + """Reset write state.""" + if len(self.objects) > 0: + self.objects.clear() + if self.scoped_meta_share_enabled and self.meta_context: + self.meta_context.reset_write() + elif not self.scoped_meta_share_enabled: + self.meta_context = None + + def reset_read(self): + """Reset read state.""" + if len(self.objects) > 0: + self.objects.clear() + if self.scoped_meta_share_enabled and self.meta_context: + self.meta_context.reset_read() + elif not self.scoped_meta_share_enabled: + self.meta_context = None + def reset(self): + """Reset both read and write state.""" if len(self.objects) > 0: self.objects.clear() + if self.scoped_meta_share_enabled and self.meta_context: + self.meta_context.reset() + elif not self.scoped_meta_share_enabled: + self.meta_context = None _ENABLE_TYPE_REGISTRATION_FORCIBLY = os.getenv("ENABLE_TYPE_REGISTRATION_FORCIBLY", "0") in { diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py index 2333ff7824..566b4b4eee 100644 --- a/python/pyfory/_registry.py +++ b/python/pyfory/_registry.py @@ -557,12 +557,24 @@ def write_typeinfo(self, buffer, typeinfo): return type_id = typeinfo.type_id internal_type_id = type_id & 0xFF + + # Check if meta share is enabled first + meta_context = self.fory.serialization_context.get_meta_context() + if meta_context is not None: + self.write_shared_type_meta(buffer, typeinfo) + return + buffer.write_varuint32(type_id) if TypeId.is_namespaced_type(internal_type_id): self.metastring_resolver.write_meta_string_bytes(buffer, typeinfo.namespace_bytes) self.metastring_resolver.write_meta_string_bytes(buffer, typeinfo.typename_bytes) def read_typeinfo(self, buffer): + # Check if meta share is enabled first + meta_context = self.fory.serialization_context.get_meta_context() + if meta_context is not None: + return self.read_shared_type_meta(buffer) + type_id = buffer.read_varuint32() internal_type_id = type_id & 0xFF if TypeId.is_namespaced_type(internal_type_id): @@ -595,6 +607,92 @@ def get_typeinfo_by_name(self, namespace, typename): def get_meta_compressor(self): return self.meta_compressor + def write_shared_type_meta(self, buffer, typeinfo): + """Write shared type meta information.""" + meta_context = self.fory.serialization_context.get_meta_context() + assert meta_context is not None, "Meta context must be set when meta share is enabled" + + type_id, is_new = meta_context.put_or_get_type_id(typeinfo.cls) + if not is_new: + # Type already sent, just write the ID + buffer.write_varuint32(type_id) + else: + # New type, write ID and store typeinfo for later use + buffer.write_varuint32(type_id) + # Store the typeinfo in meta context for deserialization + meta_context.set_read_type_info(type_id, typeinfo) + + def read_shared_type_meta(self, buffer): + """Read shared type meta information.""" + meta_context = self.fory.serialization_context.get_meta_context() + assert meta_context is not None, "Meta context must be set when meta share is enabled" + + type_id = buffer.read_varuint32() + typeinfo = meta_context.get_read_type_info(type_id) + if typeinfo is None: + # Need to read type definition + typeinfo = self._read_type_info_with_meta_share(meta_context, type_id) + return typeinfo + + def _build_type_def(self, typeinfo): + """Build TypeDef for a TypeInfo.""" + from pyfory.meta.typedef_encoder import encode_typedef + return encode_typedef(self, typeinfo.cls) + + def _read_type_info_with_meta_share(self, meta_context, type_id): + """Read type info with meta share support.""" + # First check if we already have the typeinfo cached + typeinfo = meta_context.get_read_type_info(type_id) + if typeinfo is not None: + return typeinfo + + # If not found, this is an error in our current implementation + raise ValueError(f"Type info not found for ID {type_id}") + + def _create_type_info_from_def(self, type_def): + """Create TypeInfo from TypeDef.""" + # This is a simplified implementation + # In practice, you'd need to create the appropriate serializer based on the type definition + return TypeInfo( + cls=type_def.name, # This would need to be resolved to actual class + type_id=type_def.type_id, + serializer=None, # Would be created based on type_def + namespace_bytes=None, + typename_bytes=None, + dynamic_type=False + ) + + def write_type_defs(self, buffer): + """Write all type definitions that need to be sent.""" + meta_context = self.fory.serialization_context.get_meta_context() + if meta_context is None: + return + + writing_type_defs = meta_context.get_writing_type_defs() + buffer.write_varuint32(len(writing_type_defs)) + + for type_def in writing_type_defs: + # Write type definition header (ID and size) + buffer.write_int64(type_def.id if hasattr(type_def, 'id') else 0) + buffer.write_bytes(type_def.encoded) + + meta_context.clear_writing_type_defs() + + def read_type_defs(self, buffer): + """Read all type definitions from the buffer.""" + meta_context = self.fory.serialization_context.get_meta_context() + if meta_context is None: + return + + num_type_defs = buffer.read_varuint32() + for i in range(num_type_defs): + # Read type definition header + type_def_id = buffer.read_int64() + # Read the encoded type definition + from pyfory.meta.typedef_decoder import decode_typedef + type_def = decode_typedef(buffer, self) + meta_context.add_read_type_def(type_def) + def reset(self): pass diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx index 315333a704..56b7bd9a78 100644 --- a/python/pyfory/_serialization.pyx +++ b/python/pyfory/_serialization.pyx @@ -546,12 +546,24 @@ cdef class TypeResolver: cdef: int32_t type_id = typeinfo.type_id int32_t internal_type_id = type_id & 0xFF + + # Check if meta share is enabled first + meta_context = self._resolver.fory.serialization_context.get_meta_context() + if meta_context is not None: + self.write_shared_type_meta(buffer, typeinfo) + return + buffer.write_varuint32(type_id) if IsNamespacedType(internal_type_id): self.metastring_resolver.write_meta_string_bytes(buffer, typeinfo.namespace_bytes) self.metastring_resolver.write_meta_string_bytes(buffer, typeinfo.typename_bytes) cpdef inline TypeInfo read_typeinfo(self, Buffer buffer): + # Check if meta share is enabled first + meta_context = self._resolver.fory.serialization_context.get_meta_context() + if meta_context is not None: + return self.read_shared_type_meta(buffer) + cdef: int32_t type_id = buffer.read_varuint32() if type_id < 0: @@ -580,6 +592,83 @@ cdef class TypeResolver: def get_meta_compressor(self): return self._resolver.get_meta_compressor() + cpdef write_shared_type_meta(self, Buffer buffer, TypeInfo typeinfo): + """Write shared type meta information.""" + meta_context = self._resolver.fory.serialization_context.get_meta_context() + if meta_context is None: + raise ValueError("Meta context must be set when meta share is enabled") + + type_id, is_new = meta_context.put_or_get_type_id(typeinfo.cls) + if not is_new: + # Type already sent, just write the ID + buffer.write_varuint32(type_id) + else: + # New type, write ID and add to writing queue + buffer.write_varuint32(type_id) + # Store the typeinfo in the read cache for immediate deserialization + meta_context.set_read_type_info(type_id, typeinfo) + type_def = self._resolver._build_type_def(typeinfo) + meta_context.add_writing_type_def(type_def) + + cpdef TypeInfo read_shared_type_meta(self, Buffer buffer): + """Read shared type meta information.""" + meta_context = self._resolver.fory.serialization_context.get_meta_context() + if meta_context is None: + raise ValueError("Meta context must be set when meta share is enabled") + + type_id = buffer.read_varuint32() + typeinfo = meta_context.get_read_type_info(type_id) + if typeinfo is None: + # Need to read type definition + typeinfo = self._read_type_info_with_meta_share(meta_context, type_id) + return typeinfo + + cpdef write_type_defs(self, Buffer buffer): + """Write all type definitions that need to be sent.""" + meta_context = self._resolver.fory.serialization_context.get_meta_context() + if meta_context is None: + return + + writing_type_defs = meta_context.get_writing_type_defs() + buffer.write_varuint32(len(writing_type_defs)) + + for type_def in writing_type_defs: + # Write type definition header (ID and size) + buffer.write_int64(type_def.id if hasattr(type_def, 'id') else 0) + buffer.write_bytes(type_def.encoded) + + meta_context.clear_writing_type_defs() + + cpdef read_type_defs(self, Buffer buffer): + """Read all type definitions from the buffer.""" + meta_context = self._resolver.fory.serialization_context.get_meta_context() + if meta_context is None: + return + + num_type_defs = buffer.read_varuint32() + for i in range(num_type_defs): + # Read type definition header + type_def_id = buffer.read_int64() + # Read the encoded type definition + from pyfory.meta.typedef_decoder import decode_typedef + type_def = decode_typedef(buffer, self._resolver) + meta_context.add_read_type_def(type_def) + + def _read_type_info_with_meta_share(self, meta_context, type_id): + """Read type info with meta share support.""" + # First check if we already have the typeinfo cached + typeinfo = meta_context.get_read_type_info(type_id) + if typeinfo is not None: + return typeinfo + + # If not found, this is an error in our current implementation + raise ValueError(f"Type info not found for ID {type_id}") + + def _build_type_def(self, typeinfo): + """Build TypeDef for a TypeInfo.""" + from pyfory.meta.typedef_encoder import encode_typedef + return encode_typedef(self, typeinfo.cls) + cpdef inline reset(self): pass @@ -590,6 +679,176 @@ cdef class TypeResolver: pass +@cython.final +cdef class MetaContext: + """ + Context for sharing type meta across multiple serialization. Type name, field name and field + type will be shared between different serialization. + + This is the Cython-optimized equivalent of Java's MetaContext class. + """ + cdef: + # Types which have sent definitions to peer + # Maps type objects to their assigned IDs + flat_hash_map[uint64_t, int32_t] _c_type_map + + # Type definitions read from peer + vector[PyObject *] _c_read_type_defs + + # Type infos read from peer (cached for performance) + vector[PyObject *] _c_read_type_infos + + # New type definitions which need sending to peer + # This will be filled up when there are new type definitions need sending, + # and will be cleared after writing to buffer + vector[PyObject *] _c_writing_type_defs + + # Counter for assigning new IDs + int32_t _next_id + + # Python objects for compatibility + dict _type_map + list _read_type_defs + list _read_type_infos + list _writing_type_defs + + def __cinit__(self): + self._next_id = 0 + self._type_map = {} + self._read_type_defs = [] + self._read_type_infos = [] + self._writing_type_defs = [] + + cpdef inline int32_t get_type_id(self, type_cls): + """Get the ID for a type, or -1 if not found.""" + cdef uint64_t type_addr = type_cls + cdef flat_hash_map[uint64_t, int32_t].iterator it = self._c_type_map.find(type_addr) + if it == self._c_type_map.end(): + return -1 + return deref(it).second + + cpdef inline tuple put_or_get_type_id(self, type_cls): + """ + Put a type in the map and return its ID, or get existing ID. + Returns (id, is_new) where is_new indicates if this is a new type. + """ + cdef uint64_t type_addr = type_cls + cdef flat_hash_map[uint64_t, int32_t].iterator it = self._c_type_map.find(type_addr) + if it != self._c_type_map.end(): + return (deref(it).second, False) + + cdef int32_t new_id = self._next_id + self._c_type_map[type_addr] = new_id + self._next_id += 1 + # Also update Python dict for compatibility + self._type_map[type_cls] = new_id + return (new_id, True) + + cpdef inline add_writing_type_def(self, type_def): + """Add a type definition to the writing queue.""" + self._c_writing_type_defs.push_back( type_def) + Py_INCREF(type_def) + self._writing_type_defs.append(type_def) + + cpdef inline list get_writing_type_defs(self): + """Get all type definitions that need to be written.""" + return self._writing_type_defs + + cpdef inline clear_writing_type_defs(self): + """Clear the writing type definitions queue.""" + cdef PyObject * ptr + for ptr in self._c_writing_type_defs: + Py_XDECREF(ptr) + self._c_writing_type_defs.clear() + self._writing_type_defs.clear() + + cpdef inline add_read_type_def(self, type_def): + """Add a type definition read from peer.""" + self._c_read_type_defs.push_back( type_def) + Py_INCREF(type_def) + self._read_type_defs.append(type_def) + + cpdef inline get_read_type_def(self, int32_t index): + """Get a type definition by index.""" + if 0 <= index < self._c_read_type_defs.size(): + return self._c_read_type_defs[index] + return None + + cpdef inline add_read_type_info(self, type_info): + """Add a type info read from peer.""" + self._c_read_type_infos.push_back( type_info) + Py_INCREF(type_info) + self._read_type_infos.append(type_info) + + cpdef inline get_read_type_info(self, int32_t index): + """Get a type info by index.""" + if 0 <= index < self._c_read_type_infos.size(): + return self._c_read_type_infos[index] + return None + + cpdef inline set_read_type_info(self, int32_t index, type_info): + """Set a type info at a specific index.""" + cdef int32_t current_size = self._c_read_type_infos.size() + while current_size <= index: + self._c_read_type_infos.push_back(NULL) + self._read_type_infos.append(None) + current_size += 1 + + # Decrease ref count of old object if it exists + if self._c_read_type_infos[index] != NULL: + Py_XDECREF(self._c_read_type_infos[index]) + + # Set new object + self._c_read_type_infos[index] = type_info + Py_INCREF(type_info) + self._read_type_infos[index] = type_info + + cpdef inline reset_write(self): + """Reset write state.""" + # In meta share mode, we don't clear the type map to preserve type IDs across serialization calls + # Only clear the writing queue + self.clear_writing_type_defs() + # Note: _next_id is not reset to preserve type ID assignments + + cpdef inline reset_read(self): + """Reset read state.""" + # In meta share mode, we don't clear the read type infos to preserve them across deserialization calls + # Only clear the type definitions if needed + # self._read_type_defs.clear() + # self._read_type_infos.clear() + pass + + cpdef inline reset(self): + """Reset both read and write state.""" + self.reset_write() + self.reset_read() + + def __dealloc__(self): + """Clean up C++ containers and Python object references.""" + cdef PyObject * ptr + + # Clear writing type defs + for ptr in self._c_writing_type_defs: + Py_XDECREF(ptr) + self._c_writing_type_defs.clear() + + # Clear read type defs + for ptr in self._c_read_type_defs: + Py_XDECREF(ptr) + self._c_read_type_defs.clear() + + # Clear read type infos + for ptr in self._c_read_type_infos: + Py_XDECREF(ptr) + self._c_read_type_infos.clear() + + def __repr__(self): + return (f"MetaContext(type_map_size={len(self._type_map)}, " + f"read_defs={len(self._read_type_defs)}, " + f"read_infos={len(self._read_type_infos)}, " + f"writing_defs={len(self._writing_type_defs)})") + + @cython.final cdef class Fory: cdef readonly object language @@ -614,6 +873,7 @@ cdef class Fory: language=Language.PYTHON, ref_tracking: bool = False, require_type_registration: bool = True, + meta_share: bool = False, ): """ :param require_type_registration: @@ -621,10 +881,14 @@ cdef class Fory: If disabled, unknown insecure types can be deserialized, which can be insecure and cause remote code execution attack if the types `__new__`/`__init__`/`__eq__`/`__hash__` method contain malicious code. - Do not disable type registration if you can't ensure your environment are - *indeed secure*. We are not responsible for security risks if - you disable this option. - """ + Do not disable type registration if you can't ensure your environment are + *indeed secure*. We are not responsible for security risks if + you disable this option. + :param meta_share: + Whether to enable meta share mode for cross-language serialization. + When enabled, type definitions are shared across multiple serialization calls + to reduce overhead for repeated types. + """ self.language = language if _ENABLE_TYPE_REGISTRATION_FORCIBLY or require_type_registration: self.require_type_registration = True @@ -636,7 +900,7 @@ cdef class Fory: self.metastring_resolver = MetaStringResolver() self.type_resolver = TypeResolver(self) self.type_resolver.initialize() - self.serialization_context = SerializationContext() + self.serialization_context = SerializationContext(scoped_meta_share_enabled=meta_share) self.buffer = Buffer.allocate(32) if not require_type_registration: warnings.warn( @@ -1074,9 +1338,24 @@ cpdef inline read_nullable_pystr(Buffer buffer): @cython.final cdef class SerializationContext: cdef dict objects + cdef readonly bint scoped_meta_share_enabled + cdef object _meta_context - def __init__(self): + def __init__(self, scoped_meta_share_enabled: bool = False): self.objects = dict() + self.scoped_meta_share_enabled = scoped_meta_share_enabled + if scoped_meta_share_enabled: + self._meta_context = MetaContext() + else: + self._meta_context = None + + @property + def meta_context(self): + return self._meta_context + + @meta_context.setter + def meta_context(self, value): + self._meta_context = value def add(self, key, obj): self.objects[id(key)] = obj @@ -1090,10 +1369,25 @@ cdef class SerializationContext: def get(self, key): return self.objects.get(id(key)) + def get_meta_context(self): + return self._meta_context + def reset(self): if len(self.objects) > 0: self.objects.clear() + def reset_write(self): + if len(self.objects) > 0: + self.objects.clear() + if self.scoped_meta_share_enabled and self._meta_context is not None: + self._meta_context.reset_write() + + def reset_read(self): + if len(self.objects) > 0: + self.objects.clear() + if self.scoped_meta_share_enabled and self._meta_context is not None: + self._meta_context.reset_read() + cdef class Serializer: cdef readonly Fory fory cdef readonly object type_ diff --git a/python/pyfory/tests/test_meta_share.py b/python/pyfory/tests/test_meta_share.py new file mode 100644 index 0000000000..7476e6bf52 --- /dev/null +++ b/python/pyfory/tests/test_meta_share.py @@ -0,0 +1,165 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest +import dataclasses +from pyfory import Fory, Language +from pyfory.buffer import Buffer +from pyfory.type import TypeId + + +@dataclasses.dataclass +class SimpleDataClass: + name: str + age: int + active: bool + + +@dataclasses.dataclass +class SimpleNestedDataClass: + value: int + name: str + + +class TestMetaShareMode: + + def setup_method(self): + """Setup method to register dataclasses for each test.""" + pass + + def test_meta_share_enabled(self): + """Test that meta share mode can be enabled.""" + fory = Fory(language=Language.XLANG, meta_share=True) + assert fory.serialization_context.scoped_meta_share_enabled + assert fory.serialization_context.meta_context is not None + + def test_meta_share_disabled(self): + """Test that meta share mode can be disabled.""" + fory = Fory(language=Language.XLANG, meta_share=False) + assert not fory.serialization_context.scoped_meta_share_enabled + assert fory.serialization_context.meta_context is None + + def test_simple_dataclass_serialization(self): + """Test serialization of simple dataclass with meta share.""" + fory = Fory(language=Language.XLANG, meta_share=True) + + # Register the dataclass + fory.register_type(SimpleDataClass) + + obj = SimpleDataClass(name="test", age=25, active=True) + buffer = fory.serialize(obj) + + # Deserialize + deserialized = fory.deserialize(buffer) + assert deserialized.name == obj.name + assert deserialized.age == obj.age + assert deserialized.active == obj.active + + def test_multiple_objects_same_type(self): + """Test that multiple objects of same type reuse type definition.""" + fory = Fory(language=Language.XLANG, meta_share=True) + + # Register the dataclass + fory.register_type(SimpleDataClass) + + obj1 = SimpleDataClass(name="test1", age=25, active=True) + obj2 = SimpleDataClass(name="test2", age=30, active=False) + + # Serialize both objects + buffer1 = fory.serialize(obj1) + buffer2 = fory.serialize(obj2) + + # Create a new fory instance with the same meta context for deserialization + fory2 = Fory(language=Language.XLANG, meta_share=True) + fory2.register_type(SimpleDataClass) + # Copy the meta context from the first fory instance + fory2.serialization_context.meta_context = fory.serialization_context.meta_context + + # Deserialize both + deserialized1 = fory2.deserialize(buffer1) + deserialized2 = fory2.deserialize(buffer2) + + assert deserialized1.name == obj1.name + assert deserialized2.name == obj2.name + assert deserialized1.age == obj1.age + assert deserialized2.age == obj2.age + + def test_simple_nested_dataclass_serialization(self): + """Test serialization of simple nested dataclass with meta share.""" + fory = Fory(language=Language.XLANG, meta_share=True) + + # Register the dataclass + fory.register_type(SimpleNestedDataClass) + + obj = SimpleNestedDataClass(value=42, name="test") + + buffer = fory.serialize(obj) + deserialized = fory.deserialize(buffer) + + assert deserialized.value == obj.value + assert deserialized.name == obj.name + + def test_meta_context_type_mapping(self): + """Test that meta context properly maps types to IDs.""" + fory = Fory(language=Language.XLANG, meta_share=True) + meta_context = fory.serialization_context.meta_context + + # Register the dataclass + fory.register_type(SimpleDataClass) + + obj = SimpleDataClass(name="test", age=25, active=True) + buffer = fory.serialize(obj) + + # Check that type was added to meta context + type_id = meta_context.get_type_id(SimpleDataClass) + assert type_id is not None + assert type_id >= 0 + + def test_serialization_without_meta_share(self): + """Test that serialization works without meta share mode.""" + fory = Fory(language=Language.XLANG, meta_share=False) + + # Register the dataclass + fory.register_type(SimpleDataClass) + + obj = SimpleDataClass(name="test", age=25, active=True) + buffer = fory.serialize(obj) + deserialized = fory.deserialize(buffer) + + assert deserialized.name == obj.name + assert deserialized.age == obj.age + assert deserialized.active == obj.active + + def test_meta_context_reset(self): + """Test that meta context is properly reset.""" + fory = Fory(language=Language.XLANG, meta_share=True) + meta_context = fory.serialization_context.meta_context + + # Register the dataclass + fory.register_type(SimpleDataClass) + + obj = SimpleDataClass(name="test", age=25, active=True) + fory.serialize(obj) + + # Check that type was added + type_id = meta_context.get_type_id(SimpleDataClass) + assert type_id is not None + + # Reset and check that type mapping is preserved (meta share behavior) + fory.reset_write() + type_id_after_reset = meta_context.get_type_id(SimpleDataClass) + assert type_id_after_reset is not None # Should be preserved in meta share mode From b797086719986c8a005041c6f6de427a88541553 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 9 Sep 2025 15:39:04 +0800 Subject: [PATCH 02/20] fix typedef --- python/pyfory/_registry.py | 7 ++----- python/pyfory/_serialization.pyx | 7 ++----- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py index 566b4b4eee..defb0f2f42 100644 --- a/python/pyfory/_registry.py +++ b/python/pyfory/_registry.py @@ -672,8 +672,7 @@ def write_type_defs(self, buffer): buffer.write_varuint32(len(writing_type_defs)) for type_def in writing_type_defs: - # Write type definition header (ID and size) - buffer.write_int64(type_def.id if hasattr(type_def, 'id') else 0) + # Just copy the encoded bytes directly buffer.write_bytes(type_def.encoded) meta_context.clear_writing_type_defs() @@ -686,9 +685,7 @@ def read_type_defs(self, buffer): num_type_defs = buffer.read_varuint32() for i in range(num_type_defs): - # Read type definition header - type_def_id = buffer.read_int64() - # Read the encoded type definition + # Read the encoded type definition directly from pyfory.meta.typedef_decoder import decode_typedef type_def = decode_typedef(buffer, self) meta_context.add_read_type_def(type_def) diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx index 56b7bd9a78..0c9f1e1020 100644 --- a/python/pyfory/_serialization.pyx +++ b/python/pyfory/_serialization.pyx @@ -633,8 +633,7 @@ cdef class TypeResolver: buffer.write_varuint32(len(writing_type_defs)) for type_def in writing_type_defs: - # Write type definition header (ID and size) - buffer.write_int64(type_def.id if hasattr(type_def, 'id') else 0) + # Just copy the encoded bytes directly buffer.write_bytes(type_def.encoded) meta_context.clear_writing_type_defs() @@ -647,9 +646,7 @@ cdef class TypeResolver: num_type_defs = buffer.read_varuint32() for i in range(num_type_defs): - # Read type definition header - type_def_id = buffer.read_int64() - # Read the encoded type definition + # Read the encoded type definition directly from pyfory.meta.typedef_decoder import decode_typedef type_def = decode_typedef(buffer, self._resolver) meta_context.add_read_type_def(type_def) From 323aab8e382b6981d1680b08cdf5fcde6db2a73b Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 9 Sep 2025 15:57:38 +0800 Subject: [PATCH 03/20] write/read type defs --- python/pyfory/_fory.py | 30 +++++++++++++++++++++++++++ python/pyfory/_serialization.pyx | 30 +++++++++++++++++++++++++++ python/pyfory/meta/typedef_encoder.py | 2 +- 3 files changed, 61 insertions(+), 1 deletion(-) diff --git a/python/pyfory/_fory.py b/python/pyfory/_fory.py index 478aaa6909..2d5c629d46 100644 --- a/python/pyfory/_fory.py +++ b/python/pyfory/_fory.py @@ -260,10 +260,26 @@ def _serialize( set_bit(buffer, mask_index, 3) else: clear_bit(buffer, mask_index, 3) + # Reserve space for type definitions offset, similar to Java implementation + type_defs_offset_pos = None + if self.serialization_context.scoped_meta_share_enabled: + type_defs_offset_pos = buffer.writer_index + buffer.write_int32(-1) # Reserve 4 bytes for type definitions offset + if self.language == Language.PYTHON: self.serialize_ref(buffer, obj) else: self.xserialize_ref(buffer, obj) + + # Write type definitions at the end, similar to Java implementation + if self.serialization_context.scoped_meta_share_enabled: + meta_context = self.serialization_context.get_meta_context() + if meta_context is not None and len(meta_context.get_writing_type_defs()) > 0: + # Update the offset to point to current position + current_pos = buffer.writer_index + buffer.put_int32(type_defs_offset_pos, current_pos - type_defs_offset_pos - 4) + self.type_resolver.write_type_defs(buffer) + self.reset_write() if buffer is not self.buffer: return buffer @@ -374,6 +390,20 @@ def _deserialize( self._buffers = iter(buffers) else: assert buffers is None, "buffers should be null when the serialized stream is produced with buffer_callback null." + + # Read type definitions at the start, similar to Java implementation + if self.serialization_context.scoped_meta_share_enabled: + relative_type_defs_offset = buffer.read_int32() + if relative_type_defs_offset != -1: + # Save current reader position + current_reader_index = buffer.reader_index + # Jump to type definitions + buffer.reader_index = current_reader_index + relative_type_defs_offset + # Read type definitions + self.type_resolver.read_type_defs(buffer) + # Jump back to continue with object deserialization + buffer.reader_index = current_reader_index + if is_target_x_lang: obj = self.xdeserialize_ref(buffer) else: diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx index 0c9f1e1020..853477d65c 100644 --- a/python/pyfory/_serialization.pyx +++ b/python/pyfory/_serialization.pyx @@ -996,11 +996,27 @@ cdef class Fory: set_bit(buffer, mask_index, 3) else: clear_bit(buffer, mask_index, 3) + # Reserve space for type definitions offset, similar to Java implementation + cdef int32_t type_defs_offset_pos = -1 + if self.serialization_context.scoped_meta_share_enabled: + type_defs_offset_pos = buffer.writer_index + buffer.write_int32(-1) # Reserve 4 bytes for type definitions offset + cdef int32_t start_offset if self.language == Language.PYTHON: self.serialize_ref(buffer, obj) else: self.xserialize_ref(buffer, obj) + + # Write type definitions at the end, similar to Java implementation + if self.serialization_context.scoped_meta_share_enabled: + meta_context = self.serialization_context.get_meta_context() + if meta_context is not None and len(meta_context.get_writing_type_defs()) > 0: + # Update the offset to point to current position + current_pos = buffer.writer_index + buffer.put_int32(type_defs_offset_pos, current_pos - type_defs_offset_pos - 4) + self.type_resolver.write_type_defs(buffer) + if buffer is not self.buffer: return buffer else: @@ -1131,6 +1147,20 @@ cdef class Fory: "buffers should be null when the serialized stream is " "produced with buffer_callback null." ) + + # Read type definitions at the start, similar to Java implementation + if self.serialization_context.scoped_meta_share_enabled: + relative_type_defs_offset = buffer.read_int32() + if relative_type_defs_offset != -1: + # Save current reader position + current_reader_index = buffer.reader_index + # Jump to type definitions + buffer.reader_index = current_reader_index + relative_type_defs_offset + # Read type definitions + self.type_resolver.read_type_defs(buffer) + # Jump back to continue with object deserialization + buffer.reader_index = current_reader_index + if not is_target_x_lang: return self.deserialize_ref(buffer) return self.xdeserialize_ref(buffer) diff --git a/python/pyfory/meta/typedef_encoder.py b/python/pyfory/meta/typedef_encoder.py index f652fc40f9..3a02b6d718 100644 --- a/python/pyfory/meta/typedef_encoder.py +++ b/python/pyfory/meta/typedef_encoder.py @@ -125,7 +125,7 @@ def prepend_header(buffer: bytes, is_compressed: bool, has_fields_meta: bool): result.write_varuint32(meta_size - META_SIZE_MASKS) result.write_bytes(buffer) - return result + return result.to_bytes() def write_namespace(buffer: Buffer, namespace: str): From 02ed68f52edd7488a36471460ac0979920bf31dc Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 9 Sep 2025 15:59:49 +0800 Subject: [PATCH 04/20] clean code --- python/pyfory/_fory.py | 71 +----------------------------------------- 1 file changed, 1 insertion(+), 70 deletions(-) diff --git a/python/pyfory/_fory.py b/python/pyfory/_fory.py index 2d5c629d46..21d11fde22 100644 --- a/python/pyfory/_fory.py +++ b/python/pyfory/_fory.py @@ -150,6 +150,7 @@ def __init__( self.metastring_resolver = MetaStringResolver() self.type_resolver = TypeResolver(self) self.type_resolver.initialize() + from pyfory._serialization import SerializationContext self.serialization_context = SerializationContext(scoped_meta_share_enabled=meta_share) self.buffer = Buffer.allocate(32) if not require_type_registration: @@ -525,76 +526,6 @@ def reset(self): self.reset_read() -class SerializationContext: - """ - A context is used to add some context-related information, so that the - serializers can setup relation between serializing different objects. - The context will be reset after finished serializing/deserializing the - object tree. - """ - - __slots__ = ("objects", "meta_context", "scoped_meta_share_enabled") - - def __init__(self, scoped_meta_share_enabled: bool = False): - self.objects = dict() - self.scoped_meta_share_enabled = scoped_meta_share_enabled - if scoped_meta_share_enabled: - from pyfory._serialization import MetaContext - self.meta_context = MetaContext() - else: - self.meta_context = None - - def add(self, key, obj): - self.objects[id(key)] = obj - - def __contains__(self, key): - return id(key) in self.objects - - def __getitem__(self, key): - return self.objects[id(key)] - - def get(self, key): - return self.objects.get(id(key)) - - def get_meta_context(self): - """Get the meta context for meta share mode.""" - return self.meta_context - - def set_meta_context(self, meta_context): - """ - Set meta context, which can be used to share data across multiple serialization call. - Note that meta_context will be cleared after the serialization is finished. - Please set the context before every serialization if metaShare is enabled. - """ - assert not self.scoped_meta_share_enabled, "Cannot set meta context when scoped meta share is enabled" - self.meta_context = meta_context - - def reset_write(self): - """Reset write state.""" - if len(self.objects) > 0: - self.objects.clear() - if self.scoped_meta_share_enabled and self.meta_context: - self.meta_context.reset_write() - elif not self.scoped_meta_share_enabled: - self.meta_context = None - - def reset_read(self): - """Reset read state.""" - if len(self.objects) > 0: - self.objects.clear() - if self.scoped_meta_share_enabled and self.meta_context: - self.meta_context.reset_read() - elif not self.scoped_meta_share_enabled: - self.meta_context = None - - def reset(self): - """Reset both read and write state.""" - if len(self.objects) > 0: - self.objects.clear() - if self.scoped_meta_share_enabled and self.meta_context: - self.meta_context.reset() - elif not self.scoped_meta_share_enabled: - self.meta_context = None _ENABLE_TYPE_REGISTRATION_FORCIBLY = os.getenv("ENABLE_TYPE_REGISTRATION_FORCIBLY", "0") in { From f098e094512bbff912c1b3e116439787a114e223 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 9 Sep 2025 17:23:48 +0800 Subject: [PATCH 05/20] support create serializer from typedef --- python/pyfory/_registry.py | 47 +++++++++++++++++++-------- python/pyfory/_serialization.pyx | 15 +++++++++ python/pyfory/meta/typedef.py | 13 ++++++-- python/pyfory/meta/typedef_decoder.py | 14 +++++--- python/pyfory/meta/typedef_encoder.py | 13 +++----- python/pyfory/serializer.py | 18 +++++----- 6 files changed, 84 insertions(+), 36 deletions(-) diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py index defb0f2f42..815afc153a 100644 --- a/python/pyfory/_registry.py +++ b/python/pyfory/_registry.py @@ -423,6 +423,9 @@ def __register_type( if type_id not in self._type_id_to_typeinfo or not internal: self._type_id_to_typeinfo[type_id] = typeinfo self._types_info[cls] = typeinfo + + if self.meta_share and isinstance(serializer, DataClassSerializer): + self._set_struct_typeinfo(typeinfo) return typeinfo def _next_type_id(self): @@ -502,7 +505,7 @@ def _create_serializer(self, cls): # Use FunctionSerializer for function types (including lambdas) serializer = FunctionSerializer(self.fory, cls) elif dataclasses.is_dataclass(cls): - serializer = DataClassSerializer(self.fory, cls) + serializer = DataClassSerializer(self.fory, cls, xlang=not self.fory.is_py) elif issubclass(cls, enum.Enum): serializer = EnumSerializer(self.fory, cls) elif (hasattr(cls, "__reduce__") and cls.__reduce__ is not object.__reduce__) or ( @@ -535,7 +538,36 @@ def _create_serializer(self, cls): else: serializer = PickleSerializer(self.fory, cls) return serializer + + def _set_struct_typeinfo(self, typeinfo): + assert self.meta_share, "Meta share must be enabled" + from pyfory.meta.typedef_encoder import encode_typedef + type_def = encode_typedef(self, typeinfo.cls) + typeinfo.serializer = type_def.create_serializer(self) + typeinfo.type_def = type_def + def is_registered_by_name(self, cls): + typeinfo = self._types_info.get(cls) + if typeinfo is None: + return False + return TypeId.is_namespaced_type(typeinfo.type_id & 0xFF) + + def is_registered_by_id(self, cls): + typeinfo = self._types_info.get(cls) + if typeinfo is None: + return False + return not TypeId.is_namespaced_type(typeinfo.type_id & 0xFF) + + def get_registered_name(self, cls): + typeinfo = self._types_info.get(cls) + assert typeinfo is not None, f"{cls} not registered" + return typeinfo.decode_namespace(), typeinfo.decode_typename() + + def get_registered_id(self, cls): + typeinfo = self._types_info.get(cls) + assert typeinfo is not None, f"{cls} not registered" + return typeinfo.type_id + def _load_metabytes_to_typeinfo(self, ns_metabytes, type_metabytes): typeinfo = self._ns_type_to_typeinfo.get((ns_metabytes, type_metabytes)) if typeinfo is not None: @@ -649,19 +681,6 @@ def _read_type_info_with_meta_share(self, meta_context, type_id): # If not found, this is an error in our current implementation raise ValueError(f"Type info not found for ID {type_id}") - def _create_type_info_from_def(self, type_def): - """Create TypeInfo from TypeDef.""" - # This is a simplified implementation - # In practice, you'd need to create the appropriate serializer based on the type definition - return TypeInfo( - cls=type_def.name, # This would need to be resolved to actual class - type_id=type_def.type_id, - serializer=None, # Would be created based on type_def - namespace_bytes=None, - typename_bytes=None, - dynamic_type=False - ) - def write_type_defs(self, buffer): """Write all type definitions that need to be sent.""" meta_context = self.fory.serialization_context.get_meta_context() diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx index 853477d65c..283d433b28 100644 --- a/python/pyfory/_serialization.pyx +++ b/python/pyfory/_serialization.pyx @@ -399,6 +399,7 @@ cdef class TypeInfo: cdef public MetaStringBytes namespace_bytes cdef public MetaStringBytes typename_bytes cdef public c_bool dynamic_type + cdef public object type_def def __init__( self, @@ -408,6 +409,7 @@ cdef class TypeInfo: namespace_bytes: MetaStringBytes = None, typename_bytes: MetaStringBytes = None, dynamic_type: bool = False, + type_def: object = None ): self.cls = cls self.type_id = type_id @@ -415,6 +417,7 @@ cdef class TypeInfo: self.namespace_bytes = namespace_bytes self.typename_bytes = typename_bytes self.dynamic_type = dynamic_type + self.type_def = type_def def __repr__(self): return f"TypeInfo(cls={self.cls}, type_id={self.type_id}, " \ @@ -527,6 +530,18 @@ cdef class TypeResolver: self._c_types_info[ cls] = type_info self._populate_typeinfo(type_info) return type_info + + def is_registered_by_name(self, cls): + return self._resolver.is_registered_by_name(cls) + + def is_registered_by_id(self, cls): + return self._resolver.is_registered_by_id(cls) + + def get_registered_name(self, cls): + return self._resolver.get_registered_name(cls) + + def get_registered_id(self, cls): + return self._resolver.get_registered_id(cls) cdef inline TypeInfo _load_bytes_to_typeinfo( self, int32_t type_id, MetaStringBytes ns_metabytes, MetaStringBytes type_metabytes): diff --git a/python/pyfory/meta/typedef.py b/python/pyfory/meta/typedef.py index dd786e990e..193d260092 100644 --- a/python/pyfory/meta/typedef.py +++ b/python/pyfory/meta/typedef.py @@ -43,8 +43,9 @@ class TypeDef: - def __init__(self, name: str, type_id: int, fields: List["FieldInfo"], encoded: bytes = None, is_compressed: bool = False): + def __init__(self, name: str, cls: type, type_id: int, fields: List["FieldInfo"], encoded: bytes = None, is_compressed: bool = False): self.name = name + self.cls = cls self.type_id = type_id self.fields = fields self.encoded = encoded @@ -54,8 +55,16 @@ def create_fields_serializer(self, resolver): serializers = [field_info.field_type.create_serializer(resolver) for field_info in self.fields] return serializers + def get_field_names(self): + return [field_info.name for field_info in self.fields] + + def create_serializer(self, resolver): + from pyfory.serializer import DataClassSerializer + fory = resolver.fory + return DataClassSerializer(fory, self.cls, xlang=not fory.is_py, field_names=self.get_field_names(), serializers=self.create_fields_serializer(resolver)) + def __repr__(self): - return f"TypeDef(name={self.name}, type_id={self.type_id}, fields={self.fields}, is_compressed={self.is_compressed})" + return f"TypeDef(name={self.name}, cls={self.cls}, type_id={self.type_id}, fields={self.fields}, is_compressed={self.is_compressed})" class FieldInfo: diff --git a/python/pyfory/meta/typedef_decoder.py b/python/pyfory/meta/typedef_decoder.py index 18f6cdbb43..bbf81989e1 100644 --- a/python/pyfory/meta/typedef_decoder.py +++ b/python/pyfory/meta/typedef_decoder.py @@ -36,7 +36,7 @@ NUM_HASH_BITS, FIELD_NAME_ENCODINGS, ) -from pyfory.type import TypeId +from pyfory.type import TypeId, record_class_factory from pyfory.meta.metastring import MetaStringDecoder, Encoding @@ -90,6 +90,7 @@ def decode_typedef(buffer: Buffer, resolver) -> TypeDef: # Check if registered by name is_registered_by_name = (meta_header & REGISTER_BY_NAME_FLAG) != 0 + type_cls = None # Read type info if is_registered_by_name: namespace = read_namespace(meta_buffer) @@ -105,15 +106,20 @@ def decode_typedef(buffer: Buffer, resolver) -> TypeDef: else: type_id = meta_buffer.read_varuint32() type_info = resolver.get_typeinfo_by_id(type_id) - name = type_info.cls.__name__ - + if type_info is not None: + type_cls = type_info.cls + name = type_info.cls.__name__ + else: + name = f"fory.Nonexistent{type_id}" # Read fields info if present field_infos = [] if has_fields_meta: field_infos = read_fields_info(meta_buffer, resolver, name, num_fields) + if type_cls is None: + type_cls = record_class_factory(name, [field_info.name for field_info in field_infos]) # Create TypeDef object - return TypeDef(name, type_id, field_infos, meta_data, is_compressed) + return TypeDef(name, type_cls, type_id, field_infos, meta_data, is_compressed) def read_namespace(buffer: Buffer) -> str: diff --git a/python/pyfory/meta/typedef_encoder.py b/python/pyfory/meta/typedef_encoder.py index 3a02b6d718..0d26d6ddd9 100644 --- a/python/pyfory/meta/typedef_encoder.py +++ b/python/pyfory/meta/typedef_encoder.py @@ -75,17 +75,14 @@ def encode_typedef(type_resolver, cls): buffer.write_varuint32(len(field_infos) - SMALL_NUM_FIELDS_THRESHOLD) # Write type info - type_info = type_resolver.get_typeinfo(cls) - assert type_info.type_id > 0 - - if not TypeId.is_namespaced_type(type_info.type_id): - buffer.write_varuint32(type_info.type_id) - else: + if type_resolver.is_registered_by_name(cls): header |= REGISTER_BY_NAME_FLAG - namespace = type_info.decode_namespace() - typename = type_info.decode_typename() + namespace, typename = type_resolver.get_registered_name(cls) write_namespace(buffer, namespace) write_typename(buffer, typename) + else: + assert type_resolver.is_registered_by_id(cls), "Class must be registered by name or id" + buffer.write_varuint32(type_resolver.get_registered_id(cls)) # Update header byte buffer.put_uint8(0, header) diff --git a/python/pyfory/serializer.py b/python/pyfory/serializer.py index 0018232d92..1353ba01c1 100644 --- a/python/pyfory/serializer.py +++ b/python/pyfory/serializer.py @@ -24,6 +24,7 @@ import pickle import types import typing +from typing_extensions import List import warnings from weakref import WeakValueDictionary @@ -297,21 +298,22 @@ def xread(self, buffer): class DataClassSerializer(Serializer): - def __init__(self, fory, clz: type, xlang: bool = False): + def __init__(self, fory, clz: type, xlang: bool = False, field_names: List[str] = None, serializers: List[Serializer] = None): super().__init__(fory, clz) self._xlang = xlang # This will get superclass type hints too. self._type_hints = typing.get_type_hints(clz) - self._field_names = self._get_field_names(clz) + self._field_names = field_names or self._get_field_names(clz) self._has_slots = hasattr(clz, "__slots__") if self._xlang: - self._serializers = [None] * len(self._field_names) - visitor = ComplexTypeVisitor(fory) - for index, key in enumerate(self._field_names): - serializer = infer_field(key, self._type_hints[key], visitor, types_path=[]) - self._serializers[index] = serializer - self._field_names, self._serializers = _sort_fields(fory.type_resolver, self._field_names, self._serializers) + self._serializers = serializers or [None] * len(self._field_names) + if serializers is None: + visitor = ComplexTypeVisitor(fory) + for index, key in enumerate(self._field_names): + serializer = infer_field(key, self._type_hints[key], visitor, types_path=[]) + self._serializers[index] = serializer + self._field_names, self._serializers = _sort_fields(fory.type_resolver, self._field_names, self._serializers) self._hash = 0 # Will be computed on first xwrite/xread self._generated_xwrite_method = self._gen_xwrite_method() self._generated_xread_method = self._gen_xread_method() From 2d9c3c5eefa19e6e53a8de13292859637c6f6a65 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 9 Sep 2025 18:07:45 +0800 Subject: [PATCH 06/20] build typeinfo from typedef --- python/pyfory/_fory.py | 2 +- python/pyfory/_registry.py | 68 +++++++++++++------- python/pyfory/_serialization.pyx | 15 +---- python/pyfory/meta/typedef.py | 9 +-- python/pyfory/meta/typedef_decoder.py | 10 +-- python/pyfory/meta/typedef_encoder.py | 9 +-- python/pyfory/tests/test_typedef_encoding.py | 3 +- 7 files changed, 65 insertions(+), 51 deletions(-) diff --git a/python/pyfory/_fory.py b/python/pyfory/_fory.py index 21d11fde22..ac7febf9af 100644 --- a/python/pyfory/_fory.py +++ b/python/pyfory/_fory.py @@ -148,7 +148,7 @@ def __init__( from pyfory._registry import TypeResolver self.metastring_resolver = MetaStringResolver() - self.type_resolver = TypeResolver(self) + self.type_resolver = TypeResolver(self, meta_share=meta_share) self.type_resolver.initialize() from pyfory._serialization import SerializationContext self.serialization_context = SerializationContext(scoped_meta_share_enabled=meta_share) diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py index 815afc153a..efa9a50f69 100644 --- a/python/pyfory/_registry.py +++ b/python/pyfory/_registry.py @@ -104,6 +104,7 @@ class TypeInfo: "namespace_bytes", "typename_bytes", "dynamic_type", + "type_def", ) def __init__( @@ -121,6 +122,7 @@ def __init__( self.namespace_bytes = namespace_bytes self.typename_bytes = typename_bytes self.dynamic_type = dynamic_type + self.type_def = None def __repr__(self): return f"TypeInfo(cls={self.cls}, type_id={self.type_id}, serializer={self.serializer})" @@ -160,9 +162,11 @@ class TypeResolver: "metastring_resolver", "language", "_type_id_to_typeinfo", + "_typedef_cache", + "meta_share", ) - def __init__(self, fory): + def __init__(self, fory, meta_share=False): self.fory = fory self.metastring_resolver = fory.metastring_resolver self.language = fory.language @@ -182,9 +186,12 @@ def __init__(self, fory): self._named_type_to_typeinfo = dict() self.namespace_encoder = MetaStringEncoder(".", "_") self.namespace_decoder = MetaStringDecoder(".", "_") + # Cache for TypeDef and TypeInfo tuples (similar to Java's classIdToDef) + self._typedef_cache = {} self.typename_encoder = MetaStringEncoder("$", "_") self.typename_decoder = MetaStringDecoder("$", "_") self.meta_compressor = DeflaterMetaCompressor() + self.meta_share = meta_share def initialize(self): self._initialize_xlang() @@ -661,25 +668,7 @@ def read_shared_type_meta(self, buffer): type_id = buffer.read_varuint32() typeinfo = meta_context.get_read_type_info(type_id) - if typeinfo is None: - # Need to read type definition - typeinfo = self._read_type_info_with_meta_share(meta_context, type_id) - return typeinfo - - def _build_type_def(self, typeinfo): - """Build TypeDef for a TypeInfo.""" - from pyfory.meta.typedef_encoder import encode_typedef - return encode_typedef(self, typeinfo.cls) - - def _read_type_info_with_meta_share(self, meta_context, type_id): - """Read type info with meta share support.""" - # First check if we already have the typeinfo cached - typeinfo = meta_context.get_read_type_info(type_id) - if typeinfo is not None: - return typeinfo - - # If not found, this is an error in our current implementation - raise ValueError(f"Type info not found for ID {type_id}") + assert typeinfo is not None, f"Type info not found for ID {type_id}" def write_type_defs(self, buffer): """Write all type definitions that need to be sent.""" @@ -691,7 +680,8 @@ def write_type_defs(self, buffer): buffer.write_varuint32(len(writing_type_defs)) for type_def in writing_type_defs: - # Just copy the encoded bytes directly + # Write type ID first, then the encoded bytes + buffer.write_int64(type_def.type_id) buffer.write_bytes(type_def.encoded) meta_context.clear_writing_type_defs() @@ -704,10 +694,38 @@ def read_type_defs(self, buffer): num_type_defs = buffer.read_varuint32() for i in range(num_type_defs): - # Read the encoded type definition directly - from pyfory.meta.typedef_decoder import decode_typedef - type_def = decode_typedef(buffer, self) - meta_context.add_read_type_def(type_def) + # Read type ID first + type_id = buffer.read_int64() + + # Check if we already have this TypeDef cached + if type_id in self._typedef_cache: + # Skip the TypeDef binary for faster performance + type_def, type_info = self._typedef_cache[type_id] + meta_context.add_read_type_def(type_def) + meta_context.set_read_type_info(type_id, type_info) + else: + # Read the TypeDef and create TypeInfo + from pyfory.meta.typedef_decoder import decode_typedef + type_def = decode_typedef(buffer, self) + type_info = self._build_type_info_from_typedef(type_def) + + # Cache the tuple for future use + self._typedef_cache[type_id] = (type_def, type_info) + + meta_context.add_read_type_def(type_def) + meta_context.set_read_type_info(type_id, type_info) + + def _build_type_info_from_typedef(self, type_def): + """Build TypeInfo from TypeDef using TypeDef's create_serializer method.""" + # Create serializer using TypeDef's create_serializer method + serializer = type_def.create_serializer(self) + typeinfo = self._types_info.get(type_def.cls) + ns_metastr = self.namespace_encoder.encode(type_def.namespace or "") + ns_meta_bytes = self.metastring_resolver.get_metastr_bytes(ns_metastr) + type_metastr = self.typename_encoder.encode(type_def.typename) + type_meta_bytes = self.metastring_resolver.get_metastr_bytes(type_metastr) + typeinfo = TypeInfo(type_def.cls, type_def.type_id, serializer, ns_meta_bytes, type_meta_bytes, False) + return typeinfo def reset(self): pass diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx index 283d433b28..2c271b2a0d 100644 --- a/python/pyfory/_serialization.pyx +++ b/python/pyfory/_serialization.pyx @@ -447,11 +447,11 @@ cdef class TypeResolver: flat_hash_map[pair[int64_t, int64_t], PyObject *] _c_meta_hash_to_typeinfo MetaStringResolver meta_string_resolver - def __init__(self, fory): + def __init__(self, fory, meta_share=False): self.fory = fory self.metastring_resolver = fory.metastring_resolver from pyfory._registry import TypeResolver - self._resolver = TypeResolver(fory) + self._resolver = TypeResolver(fory, meta_share=meta_share) def initialize(self): self._resolver.initialize() @@ -655,16 +655,7 @@ cdef class TypeResolver: cpdef read_type_defs(self, Buffer buffer): """Read all type definitions from the buffer.""" - meta_context = self._resolver.fory.serialization_context.get_meta_context() - if meta_context is None: - return - - num_type_defs = buffer.read_varuint32() - for i in range(num_type_defs): - # Read the encoded type definition directly - from pyfory.meta.typedef_decoder import decode_typedef - type_def = decode_typedef(buffer, self._resolver) - meta_context.add_read_type_def(type_def) + self._resolver.read_type_defs(buffer) def _read_type_info_with_meta_share(self, meta_context, type_id): """Read type info with meta share support.""" diff --git a/python/pyfory/meta/typedef.py b/python/pyfory/meta/typedef.py index 193d260092..0c6d9b41a1 100644 --- a/python/pyfory/meta/typedef.py +++ b/python/pyfory/meta/typedef.py @@ -43,8 +43,9 @@ class TypeDef: - def __init__(self, name: str, cls: type, type_id: int, fields: List["FieldInfo"], encoded: bytes = None, is_compressed: bool = False): - self.name = name + def __init__(self, namespace: str, typename: str, cls: type, type_id: int, fields: List["FieldInfo"], encoded: bytes = None, is_compressed: bool = False): + self.namespace = namespace + self.typename = typename self.cls = cls self.type_id = type_id self.fields = fields @@ -62,9 +63,9 @@ def create_serializer(self, resolver): from pyfory.serializer import DataClassSerializer fory = resolver.fory return DataClassSerializer(fory, self.cls, xlang=not fory.is_py, field_names=self.get_field_names(), serializers=self.create_fields_serializer(resolver)) - + def __repr__(self): - return f"TypeDef(name={self.name}, cls={self.cls}, type_id={self.type_id}, fields={self.fields}, is_compressed={self.is_compressed})" + return f"TypeDef(namespace={self.namespace}, typename={self.typename}, cls={self.cls}, type_id={self.type_id}, fields={self.fields}, is_compressed={self.is_compressed})" class FieldInfo: diff --git a/python/pyfory/meta/typedef_decoder.py b/python/pyfory/meta/typedef_decoder.py index bbf81989e1..f25b963562 100644 --- a/python/pyfory/meta/typedef_decoder.py +++ b/python/pyfory/meta/typedef_decoder.py @@ -95,7 +95,6 @@ def decode_typedef(buffer: Buffer, resolver) -> TypeDef: if is_registered_by_name: namespace = read_namespace(meta_buffer) typename = read_typename(meta_buffer) - name = namespace + "." + typename if namespace else typename # Look up the type_id from namespace and typename type_info = resolver.get_typeinfo_by_name(namespace, typename) if type_info: @@ -108,9 +107,12 @@ def decode_typedef(buffer: Buffer, resolver) -> TypeDef: type_info = resolver.get_typeinfo_by_id(type_id) if type_info is not None: type_cls = type_info.cls - name = type_info.cls.__name__ + namespace = type_info.decode_namespace() + typename = type_info.decode_typename() else: - name = f"fory.Nonexistent{type_id}" + namespace = "fory" + typename = f"Nonexistent{type_id}" + name = namespace + "." + typename if namespace else typename # Read fields info if present field_infos = [] if has_fields_meta: @@ -119,7 +121,7 @@ def decode_typedef(buffer: Buffer, resolver) -> TypeDef: type_cls = record_class_factory(name, [field_info.name for field_info in field_infos]) # Create TypeDef object - return TypeDef(name, type_cls, type_id, field_infos, meta_data, is_compressed) + return TypeDef(namespace, typename, type_cls, type_id, field_infos, meta_data, is_compressed) def read_namespace(buffer: Buffer) -> str: diff --git a/python/pyfory/meta/typedef_encoder.py b/python/pyfory/meta/typedef_encoder.py index 0d26d6ddd9..5865f09b9a 100644 --- a/python/pyfory/meta/typedef_encoder.py +++ b/python/pyfory/meta/typedef_encoder.py @@ -80,10 +80,11 @@ def encode_typedef(type_resolver, cls): namespace, typename = type_resolver.get_registered_name(cls) write_namespace(buffer, namespace) write_typename(buffer, typename) + type_id = TypeId.NAMED_COMPATIBLE_STRUCT else: assert type_resolver.is_registered_by_id(cls), "Class must be registered by name or id" - buffer.write_varuint32(type_resolver.get_registered_id(cls)) - + type_id = type_resolver.get_registered_id(cls) + buffer.write_varuint32(type_id) # Update header byte buffer.put_uint8(0, header) @@ -99,8 +100,8 @@ def encode_typedef(type_resolver, cls): if is_compressed: binary = compressed_binary # Prepend header - binary = prepend_header(binary, is_compressed, len(field_infos) > 0) - return TypeDef(cls.__name__, type_info.type_id, field_infos, binary, is_compressed) + binary = prepend_header(binary, is_compressed, len(field_infos) > 0) + return TypeDef(cls.__name__, cls, type_id, field_infos, binary, is_compressed) def prepend_header(buffer: bytes, is_compressed: bool, has_fields_meta: bool): diff --git a/python/pyfory/tests/test_typedef_encoding.py b/python/pyfory/tests/test_typedef_encoding.py index 70ad351828..8b98b7bd85 100644 --- a/python/pyfory/tests/test_typedef_encoding.py +++ b/python/pyfory/tests/test_typedef_encoding.py @@ -77,7 +77,8 @@ def test_typedef_creation(): typedef = TypeDef("TestTypeDef", TypeId.STRUCT, fields, b"encoded_data", False) - assert typedef.name == "TestTypeDef" + assert typedef.namespace == "" + assert typedef.typename == "TestTypeDef" assert typedef.type_id == TypeId.STRUCT assert len(typedef.fields) == 2 assert typedef.encoded == b"encoded_data" From 54bef7c6a982ac5cc06778198f487a85bfb4a6a6 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 9 Sep 2025 19:16:09 +0800 Subject: [PATCH 07/20] support skip meta --- python/pyfory/_registry.py | 37 ++++++++++++--------------- python/pyfory/_serialization.pyx | 2 -- python/pyfory/meta/typedef.py | 9 +++++-- python/pyfory/meta/typedef_decoder.py | 19 ++++++++++++-- python/pyfory/meta/typedef_encoder.py | 10 +++++++- 5 files changed, 50 insertions(+), 27 deletions(-) diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py index efa9a50f69..e5823e1b6b 100644 --- a/python/pyfory/_registry.py +++ b/python/pyfory/_registry.py @@ -82,6 +82,9 @@ # preserve 0 as flag for type id not set in TypeInfo` NO_TYPE_ID, ) +from pyfory.meta.typedef import TypeDef +from pyfory.meta.typedef_decoder import decode_typedef, skip_typedef +from pyfory.meta.typedef_encoder import encode_typedef try: import numpy as np @@ -115,6 +118,7 @@ def __init__( namespace_bytes=None, typename_bytes=None, dynamic_type: bool = False, + type_def: TypeDef = None, ): self.cls = cls self.type_id = type_id @@ -162,7 +166,7 @@ class TypeResolver: "metastring_resolver", "language", "_type_id_to_typeinfo", - "_typedef_cache", + "_meta_shared_typeinfo", "meta_share", ) @@ -187,7 +191,7 @@ def __init__(self, fory, meta_share=False): self.namespace_encoder = MetaStringEncoder(".", "_") self.namespace_decoder = MetaStringDecoder(".", "_") # Cache for TypeDef and TypeInfo tuples (similar to Java's classIdToDef) - self._typedef_cache = {} + self._meta_shared_typeinfo = {} self.typename_encoder = MetaStringEncoder("$", "_") self.typename_decoder = MetaStringDecoder("$", "_") self.meta_compressor = DeflaterMetaCompressor() @@ -548,7 +552,6 @@ def _create_serializer(self, cls): def _set_struct_typeinfo(self, typeinfo): assert self.meta_share, "Meta share must be enabled" - from pyfory.meta.typedef_encoder import encode_typedef type_def = encode_typedef(self, typeinfo.cls) typeinfo.serializer = type_def.create_serializer(self) typeinfo.type_def = type_def @@ -680,8 +683,7 @@ def write_type_defs(self, buffer): buffer.write_varuint32(len(writing_type_defs)) for type_def in writing_type_defs: - # Write type ID first, then the encoded bytes - buffer.write_int64(type_def.type_id) + # Just copy the encoded bytes directly buffer.write_bytes(type_def.encoded) meta_context.clear_writing_type_defs() @@ -695,36 +697,31 @@ def read_type_defs(self, buffer): num_type_defs = buffer.read_varuint32() for i in range(num_type_defs): # Read type ID first - type_id = buffer.read_int64() - + header = buffer.read_int64() # Check if we already have this TypeDef cached - if type_id in self._typedef_cache: - # Skip the TypeDef binary for faster performance - type_def, type_info = self._typedef_cache[type_id] - meta_context.add_read_type_def(type_def) - meta_context.set_read_type_info(type_id, type_info) + type_info = self._meta_shared_typeinfo.get(header) + if type_info is not None: + meta_context.add_read_type_def(type_info.type_def) + meta_context.set_read_type_info(header, type_info) + skip_typedef(buffer, header) else: # Read the TypeDef and create TypeInfo - from pyfory.meta.typedef_decoder import decode_typedef - type_def = decode_typedef(buffer, self) + type_def = decode_typedef(buffer, self, header=header) type_info = self._build_type_info_from_typedef(type_def) - # Cache the tuple for future use - self._typedef_cache[type_id] = (type_def, type_info) - + self._meta_shared_typeinfo[header] = type_info meta_context.add_read_type_def(type_def) - meta_context.set_read_type_info(type_id, type_info) + meta_context.set_read_type_info(header, type_info) def _build_type_info_from_typedef(self, type_def): """Build TypeInfo from TypeDef using TypeDef's create_serializer method.""" # Create serializer using TypeDef's create_serializer method serializer = type_def.create_serializer(self) - typeinfo = self._types_info.get(type_def.cls) ns_metastr = self.namespace_encoder.encode(type_def.namespace or "") ns_meta_bytes = self.metastring_resolver.get_metastr_bytes(ns_metastr) type_metastr = self.typename_encoder.encode(type_def.typename) type_meta_bytes = self.metastring_resolver.get_metastr_bytes(type_metastr) - typeinfo = TypeInfo(type_def.cls, type_def.type_id, serializer, ns_meta_bytes, type_meta_bytes, False) + typeinfo = TypeInfo(type_def.cls, type_def.type_id, serializer, ns_meta_bytes, type_meta_bytes, False, type_def) return typeinfo def reset(self): diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx index 2c271b2a0d..cc74a9b42e 100644 --- a/python/pyfory/_serialization.pyx +++ b/python/pyfory/_serialization.pyx @@ -620,8 +620,6 @@ cdef class TypeResolver: else: # New type, write ID and add to writing queue buffer.write_varuint32(type_id) - # Store the typeinfo in the read cache for immediate deserialization - meta_context.set_read_type_info(type_id, typeinfo) type_def = self._resolver._build_type_def(typeinfo) meta_context.add_writing_type_def(type_def) diff --git a/python/pyfory/meta/typedef.py b/python/pyfory/meta/typedef.py index 0c6d9b41a1..b99a3dae93 100644 --- a/python/pyfory/meta/typedef.py +++ b/python/pyfory/meta/typedef.py @@ -19,8 +19,6 @@ import typing from pyfory.type import TypeId from pyfory._util import Buffer -from pyfory.serializer import MapSerializer, ListSerializer, SetSerializer -from pyfory._struct import _sort_fields, StructTypeIdVisitor, get_field_names from pyfory.type import TypeId, infer_field, is_primitive_type, is_polymorphic_type from pyfory.meta.metastring import Encoding @@ -155,6 +153,8 @@ def __init__( self.element_type = element_type def create_serializer(self, resolver): + from pyfory.serializer import ListSerializer, SetSerializer + if self.type_id == TypeId.LIST: return ListSerializer(resolver.fory, list, self.element_type.create_serializer(resolver)) elif self.type_id == TypeId.SET: @@ -180,6 +180,8 @@ def __init__( def create_serializer(self, resolver): key_serializer = self.key_type.create_serializer(resolver) value_serializer = self.value_type.create_serializer(resolver) + from pyfory.serializer import MapSerializer + return MapSerializer(resolver.fory, dict, key_serializer, value_serializer) def __repr__(self): @@ -202,6 +204,8 @@ def __repr__(self): def build_field_infos(type_resolver, cls): """Build field information for the class.""" + from pyfory._struct import _sort_fields, StructTypeIdVisitor, get_field_names + field_names = get_field_names(cls) type_hints = typing.get_type_hints(cls) @@ -215,6 +219,7 @@ def build_field_infos(type_resolver, cls): field_infos.append(field_info) serializers = [field_info.field_type.create_serializer(type_resolver) for field_info in field_infos] + field_names, serializers = _sort_fields(type_resolver, field_names, serializers) field_infos_map = {field_info.name: field_info for field_info in field_infos} new_field_infos = [] diff --git a/python/pyfory/meta/typedef_decoder.py b/python/pyfory/meta/typedef_decoder.py index f25b963562..cc65524e29 100644 --- a/python/pyfory/meta/typedef_decoder.py +++ b/python/pyfory/meta/typedef_decoder.py @@ -46,7 +46,21 @@ FIELD_NAME_DECODER = MetaStringDecoder("$", "_") -def decode_typedef(buffer: Buffer, resolver) -> TypeDef: +def skip_typedef(buffer: Buffer, header) -> None: + """ + Skip a TypeDef from the buffer. + """ + header = buffer.read_int64() + # Extract components from header + meta_size = header & META_SIZE_MASKS + # If meta size is at maximum, read additional size + if meta_size == META_SIZE_MASKS: + meta_size += buffer.read_varuint32() + # Read meta data + buffer.read_bytes(meta_size) + + +def decode_typedef(buffer: Buffer, resolver, header=None) -> TypeDef: """ Decode a TypeDef from the buffer. @@ -58,7 +72,8 @@ def decode_typedef(buffer: Buffer, resolver) -> TypeDef: The decoded TypeDef. """ # Read global binary header - header = buffer.read_int64() + if header is None: + header = buffer.read_int64() # Extract components from header meta_size = header & META_SIZE_MASKS diff --git a/python/pyfory/meta/typedef_encoder.py b/python/pyfory/meta/typedef_encoder.py index 5865f09b9a..a150591d5f 100644 --- a/python/pyfory/meta/typedef_encoder.py +++ b/python/pyfory/meta/typedef_encoder.py @@ -101,7 +101,15 @@ def encode_typedef(type_resolver, cls): binary = compressed_binary # Prepend header binary = prepend_header(binary, is_compressed, len(field_infos) > 0) - return TypeDef(cls.__name__, cls, type_id, field_infos, binary, is_compressed) + # Extract namespace and typename + if type_resolver.is_registered_by_name(cls): + namespace, typename = type_resolver.get_registered_name(cls) + else: + splits = cls.__name__.rsplit(".", 1) + if len(splits) == 1: + splits.insert(0, "") + namespace, typename = splits + return TypeDef(namespace, typename, cls, type_id, field_infos, binary, is_compressed) def prepend_header(buffer: bytes, is_compressed: bool, has_fields_meta: bool): From dcfeb39c6b7b1ea1cd5a6258da2c3b9ba9ce1cac Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 9 Sep 2025 20:35:31 +0800 Subject: [PATCH 08/20] fix type def read/write --- python/pyfory/_registry.py | 29 +-- python/pyfory/_serialization.pyx | 314 +++++++++---------------------- 2 files changed, 101 insertions(+), 242 deletions(-) diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py index e5823e1b6b..c698e71cce 100644 --- a/python/pyfory/_registry.py +++ b/python/pyfory/_registry.py @@ -601,8 +601,7 @@ def write_typeinfo(self, buffer, typeinfo): internal_type_id = type_id & 0xFF # Check if meta share is enabled first - meta_context = self.fory.serialization_context.get_meta_context() - if meta_context is not None: + if self.meta_share: self.write_shared_type_meta(buffer, typeinfo) return @@ -613,8 +612,7 @@ def write_typeinfo(self, buffer, typeinfo): def read_typeinfo(self, buffer): # Check if meta share is enabled first - meta_context = self.fory.serialization_context.get_meta_context() - if meta_context is not None: + if self.meta_share: return self.read_shared_type_meta(buffer) type_id = buffer.read_varuint32() @@ -653,41 +651,37 @@ def write_shared_type_meta(self, buffer, typeinfo): """Write shared type meta information.""" meta_context = self.fory.serialization_context.get_meta_context() assert meta_context is not None, "Meta context must be set when meta share is enabled" - - type_id, is_new = meta_context.put_or_get_type_id(typeinfo.cls) - if not is_new: + index = meta_context.add_writing_type(typeinfo.cls) + if index >= 0: # Type already sent, just write the ID - buffer.write_varuint32(type_id) + buffer.write_varuint32(index) else: + index = -index - 1 # New type, write ID and store typeinfo for later use - buffer.write_varuint32(type_id) + buffer.write_varuint32(index) # Store the typeinfo in meta context for deserialization - meta_context.set_read_type_info(type_id, typeinfo) + meta_context.add_writing_type_def(index, typeinfo.type_def) def read_shared_type_meta(self, buffer): """Read shared type meta information.""" meta_context = self.fory.serialization_context.get_meta_context() assert meta_context is not None, "Meta context must be set when meta share is enabled" - type_id = buffer.read_varuint32() typeinfo = meta_context.get_read_type_info(type_id) assert typeinfo is not None, f"Type info not found for ID {type_id}" + return typeinfo def write_type_defs(self, buffer): """Write all type definitions that need to be sent.""" meta_context = self.fory.serialization_context.get_meta_context() if meta_context is None: return - writing_type_defs = meta_context.get_writing_type_defs() buffer.write_varuint32(len(writing_type_defs)) - for type_def in writing_type_defs: # Just copy the encoded bytes directly buffer.write_bytes(type_def.encoded) - meta_context.clear_writing_type_defs() - def read_type_defs(self, buffer): """Read all type definitions from the buffer.""" meta_context = self.fory.serialization_context.get_meta_context() @@ -701,8 +695,6 @@ def read_type_defs(self, buffer): # Check if we already have this TypeDef cached type_info = self._meta_shared_typeinfo.get(header) if type_info is not None: - meta_context.add_read_type_def(type_info.type_def) - meta_context.set_read_type_info(header, type_info) skip_typedef(buffer, header) else: # Read the TypeDef and create TypeInfo @@ -710,8 +702,7 @@ def read_type_defs(self, buffer): type_info = self._build_type_info_from_typedef(type_def) # Cache the tuple for future use self._meta_shared_typeinfo[header] = type_info - meta_context.add_read_type_def(type_def) - meta_context.set_read_type_info(header, type_info) + meta_context.add_read_type_info(type_info) def _build_type_info_from_typedef(self, type_def): """Build TypeInfo from TypeDef using TypeDef's create_serializer method.""" diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx index cc74a9b42e..c58d48287e 100644 --- a/python/pyfory/_serialization.pyx +++ b/python/pyfory/_serialization.pyx @@ -446,12 +446,14 @@ cdef class TypeResolver: # hash -> TypeInfo flat_hash_map[pair[int64_t, int64_t], PyObject *] _c_meta_hash_to_typeinfo MetaStringResolver meta_string_resolver + SerializationContext serialization_context def __init__(self, fory, meta_share=False): self.fory = fory self.metastring_resolver = fory.metastring_resolver from pyfory._registry import TypeResolver self._resolver = TypeResolver(fory, meta_share=meta_share) + self.serialization_context = fory.serialization_context def initialize(self): self._resolver.initialize() @@ -562,9 +564,7 @@ cdef class TypeResolver: int32_t type_id = typeinfo.type_id int32_t internal_type_id = type_id & 0xFF - # Check if meta share is enabled first - meta_context = self._resolver.fory.serialization_context.get_meta_context() - if meta_context is not None: + if self.meta_share: self.write_shared_type_meta(buffer, typeinfo) return @@ -574,9 +574,7 @@ cdef class TypeResolver: self.metastring_resolver.write_meta_string_bytes(buffer, typeinfo.typename_bytes) cpdef inline TypeInfo read_typeinfo(self, Buffer buffer): - # Check if meta share is enabled first - meta_context = self._resolver.fory.serialization_context.get_meta_context() - if meta_context is not None: + if self.meta_share: return self.read_shared_type_meta(buffer) cdef: @@ -609,67 +607,36 @@ cdef class TypeResolver: cpdef write_shared_type_meta(self, Buffer buffer, TypeInfo typeinfo): """Write shared type meta information.""" - meta_context = self._resolver.fory.serialization_context.get_meta_context() - if meta_context is None: - raise ValueError("Meta context must be set when meta share is enabled") - - type_id, is_new = meta_context.put_or_get_type_id(typeinfo.cls) - if not is_new: + meta_context = self.serialization_context.get_meta_context() + assert meta_context is not None, "Meta context must be set when meta share is enabled" + index = meta_context.add_writing_type(typeinfo.cls) + if index >= 0: # Type already sent, just write the ID - buffer.write_varuint32(type_id) + buffer.write_varuint32(index) else: - # New type, write ID and add to writing queue - buffer.write_varuint32(type_id) - type_def = self._resolver._build_type_def(typeinfo) - meta_context.add_writing_type_def(type_def) + index = -index - 1 + # New type, write ID and store typeinfo for later use + buffer.write_varuint32(index) + # Store the typeinfo in meta context for deserialization + meta_context.add_writing_type_def(index, typeinfo.type_def) cpdef TypeInfo read_shared_type_meta(self, Buffer buffer): """Read shared type meta information.""" - meta_context = self._resolver.fory.serialization_context.get_meta_context() - if meta_context is None: - raise ValueError("Meta context must be set when meta share is enabled") - + meta_context = self.serialization_context.get_meta_context() + assert meta_context is not None, "Meta context must be set when meta share is enabled" type_id = buffer.read_varuint32() typeinfo = meta_context.get_read_type_info(type_id) - if typeinfo is None: - # Need to read type definition - typeinfo = self._read_type_info_with_meta_share(meta_context, type_id) + assert typeinfo is not None, f"Type info not found for ID {type_id}" return typeinfo cpdef write_type_defs(self, Buffer buffer): """Write all type definitions that need to be sent.""" - meta_context = self._resolver.fory.serialization_context.get_meta_context() - if meta_context is None: - return - - writing_type_defs = meta_context.get_writing_type_defs() - buffer.write_varuint32(len(writing_type_defs)) - - for type_def in writing_type_defs: - # Just copy the encoded bytes directly - buffer.write_bytes(type_def.encoded) - - meta_context.clear_writing_type_defs() + self._resolver.write_type_defs(buffer) cpdef read_type_defs(self, Buffer buffer): """Read all type definitions from the buffer.""" self._resolver.read_type_defs(buffer) - def _read_type_info_with_meta_share(self, meta_context, type_id): - """Read type info with meta share support.""" - # First check if we already have the typeinfo cached - typeinfo = meta_context.get_read_type_info(type_id) - if typeinfo is not None: - return typeinfo - - # If not found, this is an error in our current implementation - raise ValueError(f"Type info not found for ID {type_id}") - - def _build_type_def(self, typeinfo): - """Build TypeDef for a TypeInfo.""" - from pyfory.meta.typedef_encoder import encode_typedef - return encode_typedef(self, typeinfo.cls) - cpdef inline reset(self): pass @@ -691,165 +658,119 @@ cdef class MetaContext: cdef: # Types which have sent definitions to peer # Maps type objects to their assigned IDs - flat_hash_map[uint64_t, int32_t] _c_type_map - - # Type definitions read from peer - vector[PyObject *] _c_read_type_defs - - # Type infos read from peer (cached for performance) - vector[PyObject *] _c_read_type_infos - - # New type definitions which need sending to peer - # This will be filled up when there are new type definitions need sending, - # and will be cleared after writing to buffer - vector[PyObject *] _c_writing_type_defs + flat_hash_map[uint64_t, int32_t] _c_type_map # Counter for assigning new IDs int32_t _next_id - - # Python objects for compatibility - dict _type_map - list _read_type_defs - list _read_type_infos list _writing_type_defs + list _read_type_infos def __cinit__(self): self._next_id = 0 - self._type_map = {} - self._read_type_defs = [] - self._read_type_infos = [] self._writing_type_defs = [] + self._read_type_infos = [] - cpdef inline int32_t get_type_id(self, type_cls): - """Get the ID for a type, or -1 if not found.""" - cdef uint64_t type_addr = type_cls - cdef flat_hash_map[uint64_t, int32_t].iterator it = self._c_type_map.find(type_addr) - if it == self._c_type_map.end(): - return -1 - return deref(it).second - - cpdef inline tuple put_or_get_type_id(self, type_cls): - """ - Put a type in the map and return its ID, or get existing ID. - Returns (id, is_new) where is_new indicates if this is a new type. - """ + cpdef inline int32_t add_writing_type(self, typeinfo): + """Add a type definition to the writing queue.""" + type_cls = typeinfo.cls cdef uint64_t type_addr = type_cls cdef flat_hash_map[uint64_t, int32_t].iterator it = self._c_type_map.find(type_addr) if it != self._c_type_map.end(): - return (deref(it).second, False) + return deref(it).second - cdef int32_t new_id = self._next_id - self._c_type_map[type_addr] = new_id + self._c_type_map[type_addr] = self._next_id self._next_id += 1 - # Also update Python dict for compatibility - self._type_map[type_cls] = new_id - return (new_id, True) - - cpdef inline add_writing_type_def(self, type_def): - """Add a type definition to the writing queue.""" - self._c_writing_type_defs.push_back( type_def) - Py_INCREF(type_def) + type_def = type_info.type_def self._writing_type_defs.append(type_def) - + cpdef inline list get_writing_type_defs(self): """Get all type definitions that need to be written.""" return self._writing_type_defs - cpdef inline clear_writing_type_defs(self): - """Clear the writing type definitions queue.""" - cdef PyObject * ptr - for ptr in self._c_writing_type_defs: - Py_XDECREF(ptr) - self._c_writing_type_defs.clear() + cpdef inline reset_write(self): + """Reset write state.""" self._writing_type_defs.clear() - - cpdef inline add_read_type_def(self, type_def): - """Add a type definition read from peer.""" - self._c_read_type_defs.push_back( type_def) - Py_INCREF(type_def) - self._read_type_defs.append(type_def) - - cpdef inline get_read_type_def(self, int32_t index): - """Get a type definition by index.""" - if 0 <= index < self._c_read_type_defs.size(): - return self._c_read_type_defs[index] - return None + self._c_type_map.clear() + self._next_id = 0 cpdef inline add_read_type_info(self, type_info): """Add a type info read from peer.""" - self._c_read_type_infos.push_back( type_info) - Py_INCREF(type_info) self._read_type_infos.append(type_info) cpdef inline get_read_type_info(self, int32_t index): """Get a type info by index.""" - if 0 <= index < self._c_read_type_infos.size(): - return self._c_read_type_infos[index] - return None - - cpdef inline set_read_type_info(self, int32_t index, type_info): - """Set a type info at a specific index.""" - cdef int32_t current_size = self._c_read_type_infos.size() - while current_size <= index: - self._c_read_type_infos.push_back(NULL) - self._read_type_infos.append(None) - current_size += 1 - - # Decrease ref count of old object if it exists - if self._c_read_type_infos[index] != NULL: - Py_XDECREF(self._c_read_type_infos[index]) - - # Set new object - self._c_read_type_infos[index] = type_info - Py_INCREF(type_info) - self._read_type_infos[index] = type_info - - cpdef inline reset_write(self): - """Reset write state.""" - # In meta share mode, we don't clear the type map to preserve type IDs across serialization calls - # Only clear the writing queue - self.clear_writing_type_defs() - # Note: _next_id is not reset to preserve type ID assignments + return self._read_type_infos[index] cpdef inline reset_read(self): """Reset read state.""" - # In meta share mode, we don't clear the read type infos to preserve them across deserialization calls - # Only clear the type definitions if needed - # self._read_type_defs.clear() - # self._read_type_infos.clear() - pass + self._read_type_infos.clear() cpdef inline reset(self): """Reset both read and write state.""" self.reset_write() self.reset_read() - def __dealloc__(self): - """Clean up C++ containers and Python object references.""" - cdef PyObject * ptr - - # Clear writing type defs - for ptr in self._c_writing_type_defs: - Py_XDECREF(ptr) - self._c_writing_type_defs.clear() - - # Clear read type defs - for ptr in self._c_read_type_defs: - Py_XDECREF(ptr) - self._c_read_type_defs.clear() - - # Clear read type infos - for ptr in self._c_read_type_infos: - Py_XDECREF(ptr) - self._c_read_type_infos.clear() - def __repr__(self): - return (f"MetaContext(type_map_size={len(self._type_map)}, " + return (f"MetaContext(" f"read_defs={len(self._read_type_defs)}, " f"read_infos={len(self._read_type_infos)}, " f"writing_defs={len(self._writing_type_defs)})") +@cython.final +cdef class SerializationContext: + cdef dict objects + cdef readonly bint scoped_meta_share_enabled + cdef object _meta_context + + def __init__(self, scoped_meta_share_enabled: bool = False): + self.objects = dict() + self.scoped_meta_share_enabled = scoped_meta_share_enabled + if scoped_meta_share_enabled: + self._meta_context = MetaContext() + else: + self._meta_context = None + + @property + def meta_context(self): + return self._meta_context + + @meta_context.setter + def meta_context(self, value): + self._meta_context = value + + def add(self, key, obj): + self.objects[id(key)] = obj + + def __contains__(self, key): + return id(key) in self.objects + + def __getitem__(self, key): + return self.objects[id(key)] + + def get(self, key): + return self.objects.get(id(key)) + + def get_meta_context(self): + return self._meta_context + + def reset(self): + if len(self.objects) > 0: + self.objects.clear() + + def reset_write(self): + if len(self.objects) > 0: + self.objects.clear() + if self.scoped_meta_share_enabled and self._meta_context is not None: + self._meta_context.reset_write() + + def reset_read(self): + if len(self.objects) > 0: + self.objects.clear() + if self.scoped_meta_share_enabled and self._meta_context is not None: + self._meta_context.reset_read() + + @cython.final cdef class Fory: cdef readonly object language @@ -1296,7 +1217,7 @@ cdef class Fory: self.ref_resolver.reset_write() self.type_resolver.reset_write() self.metastring_resolver.reset_write() - self.serialization_context.reset() + self.serialization_context.reset_write() self.pickler.clear_memo() self._unsupported_callback = None @@ -1304,7 +1225,7 @@ cdef class Fory: self.ref_resolver.reset_read() self.type_resolver.reset_read() self.metastring_resolver.reset_read() - self.serialization_context.reset() + self.serialization_context.reset_read() self._buffers = None self.unpickler = None self._unsupported_objects = None @@ -1366,59 +1287,6 @@ cpdef inline read_nullable_pystr(Buffer buffer): return None -@cython.final -cdef class SerializationContext: - cdef dict objects - cdef readonly bint scoped_meta_share_enabled - cdef object _meta_context - - def __init__(self, scoped_meta_share_enabled: bool = False): - self.objects = dict() - self.scoped_meta_share_enabled = scoped_meta_share_enabled - if scoped_meta_share_enabled: - self._meta_context = MetaContext() - else: - self._meta_context = None - - @property - def meta_context(self): - return self._meta_context - - @meta_context.setter - def meta_context(self, value): - self._meta_context = value - - def add(self, key, obj): - self.objects[id(key)] = obj - - def __contains__(self, key): - return id(key) in self.objects - - def __getitem__(self, key): - return self.objects[id(key)] - - def get(self, key): - return self.objects.get(id(key)) - - def get_meta_context(self): - return self._meta_context - - def reset(self): - if len(self.objects) > 0: - self.objects.clear() - - def reset_write(self): - if len(self.objects) > 0: - self.objects.clear() - if self.scoped_meta_share_enabled and self._meta_context is not None: - self._meta_context.reset_write() - - def reset_read(self): - if len(self.objects) > 0: - self.objects.clear() - if self.scoped_meta_share_enabled and self._meta_context is not None: - self._meta_context.reset_read() - cdef class Serializer: cdef readonly Fory fory cdef readonly object type_ From 68e6c3b24d520ed8eb26467586bcf4f2b97c633e Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 9 Sep 2025 20:59:52 +0800 Subject: [PATCH 09/20] fix meta share --- python/pyfory/_registry.py | 15 +++-------- python/pyfory/_serialization.pyx | 27 +++++++------------- python/pyfory/meta/typedef_decoder.py | 1 - python/pyfory/tests/test_meta_share.py | 35 -------------------------- 4 files changed, 12 insertions(+), 66 deletions(-) diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py index c698e71cce..f691de5a2d 100644 --- a/python/pyfory/_registry.py +++ b/python/pyfory/_registry.py @@ -650,17 +650,7 @@ def get_meta_compressor(self): def write_shared_type_meta(self, buffer, typeinfo): """Write shared type meta information.""" meta_context = self.fory.serialization_context.get_meta_context() - assert meta_context is not None, "Meta context must be set when meta share is enabled" - index = meta_context.add_writing_type(typeinfo.cls) - if index >= 0: - # Type already sent, just write the ID - buffer.write_varuint32(index) - else: - index = -index - 1 - # New type, write ID and store typeinfo for later use - buffer.write_varuint32(index) - # Store the typeinfo in meta context for deserialization - meta_context.add_writing_type_def(index, typeinfo.type_def) + meta_context.write_typeinfo(buffer, typeinfo) def read_shared_type_meta(self, buffer): """Read shared type meta information.""" @@ -690,11 +680,12 @@ def read_type_defs(self, buffer): num_type_defs = buffer.read_varuint32() for i in range(num_type_defs): - # Read type ID first + # Read the header (first 8 bytes) to get the type ID header = buffer.read_int64() # Check if we already have this TypeDef cached type_info = self._meta_shared_typeinfo.get(header) if type_info is not None: + # Skip the rest of the TypeDef binary for faster performance skip_typedef(buffer, header) else: # Read the TypeDef and create TypeInfo diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx index c58d48287e..c22c1d381f 100644 --- a/python/pyfory/_serialization.pyx +++ b/python/pyfory/_serialization.pyx @@ -446,11 +446,13 @@ cdef class TypeResolver: # hash -> TypeInfo flat_hash_map[pair[int64_t, int64_t], PyObject *] _c_meta_hash_to_typeinfo MetaStringResolver meta_string_resolver + c_bool meta_share SerializationContext serialization_context def __init__(self, fory, meta_share=False): self.fory = fory self.metastring_resolver = fory.metastring_resolver + self.meta_share = meta_share from pyfory._registry import TypeResolver self._resolver = TypeResolver(fory, meta_share=meta_share) self.serialization_context = fory.serialization_context @@ -609,16 +611,7 @@ cdef class TypeResolver: """Write shared type meta information.""" meta_context = self.serialization_context.get_meta_context() assert meta_context is not None, "Meta context must be set when meta share is enabled" - index = meta_context.add_writing_type(typeinfo.cls) - if index >= 0: - # Type already sent, just write the ID - buffer.write_varuint32(index) - else: - index = -index - 1 - # New type, write ID and store typeinfo for later use - buffer.write_varuint32(index) - # Store the typeinfo in meta context for deserialization - meta_context.add_writing_type_def(index, typeinfo.type_def) + meta_context.write_typeinfo(buffer, typeinfo) cpdef TypeInfo read_shared_type_meta(self, Buffer buffer): """Read shared type meta information.""" @@ -661,26 +654,25 @@ cdef class MetaContext: flat_hash_map[uint64_t, int32_t] _c_type_map # Counter for assigning new IDs - int32_t _next_id list _writing_type_defs list _read_type_infos def __cinit__(self): - self._next_id = 0 self._writing_type_defs = [] self._read_type_infos = [] - cpdef inline int32_t add_writing_type(self, typeinfo): + cpdef inline int32_t write_typeinfo(self, Buffer buffer, typeinfo): """Add a type definition to the writing queue.""" type_cls = typeinfo.cls cdef uint64_t type_addr = type_cls cdef flat_hash_map[uint64_t, int32_t].iterator it = self._c_type_map.find(type_addr) if it != self._c_type_map.end(): - return deref(it).second + buffer.write_varuint32(deref(it).second) - self._c_type_map[type_addr] = self._next_id - self._next_id += 1 - type_def = type_info.type_def + cdef index = self._c_type_map.size() + buffer.write_varuint32(index) + self._c_type_map[type_addr] = index + type_def = typeinfo.type_def self._writing_type_defs.append(type_def) cpdef inline list get_writing_type_defs(self): @@ -691,7 +683,6 @@ cdef class MetaContext: """Reset write state.""" self._writing_type_defs.clear() self._c_type_map.clear() - self._next_id = 0 cpdef inline add_read_type_info(self, type_info): """Add a type info read from peer.""" diff --git a/python/pyfory/meta/typedef_decoder.py b/python/pyfory/meta/typedef_decoder.py index cc65524e29..c2ebd6f506 100644 --- a/python/pyfory/meta/typedef_decoder.py +++ b/python/pyfory/meta/typedef_decoder.py @@ -50,7 +50,6 @@ def skip_typedef(buffer: Buffer, header) -> None: """ Skip a TypeDef from the buffer. """ - header = buffer.read_int64() # Extract components from header meta_size = header & META_SIZE_MASKS # If meta size is at maximum, read additional size diff --git a/python/pyfory/tests/test_meta_share.py b/python/pyfory/tests/test_meta_share.py index 7476e6bf52..3ccf09af04 100644 --- a/python/pyfory/tests/test_meta_share.py +++ b/python/pyfory/tests/test_meta_share.py @@ -113,22 +113,6 @@ def test_simple_nested_dataclass_serialization(self): assert deserialized.value == obj.value assert deserialized.name == obj.name - def test_meta_context_type_mapping(self): - """Test that meta context properly maps types to IDs.""" - fory = Fory(language=Language.XLANG, meta_share=True) - meta_context = fory.serialization_context.meta_context - - # Register the dataclass - fory.register_type(SimpleDataClass) - - obj = SimpleDataClass(name="test", age=25, active=True) - buffer = fory.serialize(obj) - - # Check that type was added to meta context - type_id = meta_context.get_type_id(SimpleDataClass) - assert type_id is not None - assert type_id >= 0 - def test_serialization_without_meta_share(self): """Test that serialization works without meta share mode.""" fory = Fory(language=Language.XLANG, meta_share=False) @@ -144,22 +128,3 @@ def test_serialization_without_meta_share(self): assert deserialized.age == obj.age assert deserialized.active == obj.active - def test_meta_context_reset(self): - """Test that meta context is properly reset.""" - fory = Fory(language=Language.XLANG, meta_share=True) - meta_context = fory.serialization_context.meta_context - - # Register the dataclass - fory.register_type(SimpleDataClass) - - obj = SimpleDataClass(name="test", age=25, active=True) - fory.serialize(obj) - - # Check that type was added - type_id = meta_context.get_type_id(SimpleDataClass) - assert type_id is not None - - # Reset and check that type mapping is preserved (meta share behavior) - fory.reset_write() - type_id_after_reset = meta_context.get_type_id(SimpleDataClass) - assert type_id_after_reset is not None # Should be preserved in meta share mode From c698ad529b0883eb6214c4341d2f20bb8ef8886b Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 9 Sep 2025 21:40:14 +0800 Subject: [PATCH 10/20] add compatible tests --- python/pyfory/_fory.py | 16 +++--- python/pyfory/_serialization.pyx | 13 ++--- python/pyfory/serializer.py | 37 +++++++++----- python/pyfory/tests/test_meta_share.py | 69 +++++++++++++++++++++++--- 4 files changed, 101 insertions(+), 34 deletions(-) diff --git a/python/pyfory/_fory.py b/python/pyfory/_fory.py index ac7febf9af..b6993438d6 100644 --- a/python/pyfory/_fory.py +++ b/python/pyfory/_fory.py @@ -98,6 +98,7 @@ class Fory: __slots__ = ( "language", "is_py", + "compatbile", "ref_tracking", "ref_resolver", "type_resolver", @@ -113,14 +114,13 @@ class Fory: "_unsupported_objects", "_peer_language", ) - serialization_context: "SerializationContext" def __init__( self, language=Language.PYTHON, ref_tracking: bool = False, require_type_registration: bool = True, - meta_share: bool = False, + compatbile: bool = False, ): """ :param require_type_registration: @@ -131,14 +131,14 @@ def __init__( Do not disable type registration if you can't ensure your environment are *indeed secure*. We are not responsible for security risks if you disable this option. - :param meta_share: - Whether to enable meta share mode for cross-language serialization. - When enabled, type definitions will be shared between serialization calls - to reduce overhead for repeated types. + :param compatbile: + Whether to enable compatbile mode for cross-language serialization. + When enabled, type forward/backward compatibility for struct fields will be enabled. """ self.language = language self.is_py = language == Language.PYTHON self.require_type_registration = _ENABLE_TYPE_REGISTRATION_FORCIBLY or require_type_registration + self.compatbile = compatbile self.ref_tracking = ref_tracking if self.ref_tracking: self.ref_resolver = MapRefResolver() @@ -148,10 +148,10 @@ def __init__( from pyfory._registry import TypeResolver self.metastring_resolver = MetaStringResolver() - self.type_resolver = TypeResolver(self, meta_share=meta_share) + self.type_resolver = TypeResolver(self, meta_share=compatbile) self.type_resolver.initialize() from pyfory._serialization import SerializationContext - self.serialization_context = SerializationContext(scoped_meta_share_enabled=meta_share) + self.serialization_context = SerializationContext(scoped_meta_share_enabled=compatbile) self.buffer = Buffer.allocate(32) if not require_type_registration: warnings.warn( diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx index c22c1d381f..754e45b53f 100644 --- a/python/pyfory/_serialization.pyx +++ b/python/pyfory/_serialization.pyx @@ -768,6 +768,7 @@ cdef class Fory: cdef readonly c_bool ref_tracking cdef readonly c_bool require_type_registration cdef readonly c_bool is_py + cdef readonly c_bool compatbile cdef readonly MapRefResolver ref_resolver cdef readonly TypeResolver type_resolver cdef readonly MetaStringResolver metastring_resolver @@ -786,7 +787,7 @@ cdef class Fory: language=Language.PYTHON, ref_tracking: bool = False, require_type_registration: bool = True, - meta_share: bool = False, + compatbile: bool = False, ): """ :param require_type_registration: @@ -797,23 +798,23 @@ cdef class Fory: Do not disable type registration if you can't ensure your environment are *indeed secure*. We are not responsible for security risks if you disable this option. - :param meta_share: - Whether to enable meta share mode for cross-language serialization. - When enabled, type definitions are shared across multiple serialization calls - to reduce overhead for repeated types. + :param compatbile: + Whether to enable compatbile mode for cross-language serialization. + When enabled, type forward/backward compatibility for struct fields will be enabled. """ self.language = language if _ENABLE_TYPE_REGISTRATION_FORCIBLY or require_type_registration: self.require_type_registration = True else: self.require_type_registration = False + self.compatbile = compatbile self.ref_tracking = ref_tracking self.ref_resolver = MapRefResolver(ref_tracking) self.is_py = self.language == Language.PYTHON self.metastring_resolver = MetaStringResolver() self.type_resolver = TypeResolver(self) self.type_resolver.initialize() - self.serialization_context = SerializationContext(scoped_meta_share_enabled=meta_share) + self.serialization_context = SerializationContext(scoped_meta_share_enabled=compatbile) self.buffer = Buffer.allocate(32) if not require_type_registration: warnings.warn( diff --git a/python/pyfory/serializer.py b/python/pyfory/serializer.py index 1353ba01c1..6ef15c0f90 100644 --- a/python/pyfory/serializer.py +++ b/python/pyfory/serializer.py @@ -445,13 +445,14 @@ def _gen_xwrite_method(self): context["_field_names"] = self._field_names context["_type_hints"] = self._type_hints context["_serializers"] = self._serializers - # Compute hash at generation time since we're in xlang mode - if self._hash == 0: - self._hash = _get_hash(self.fory, self._field_names, self._type_hints) stmts = [ f'"""xwrite method for {self.type_}"""', - f"{buffer}.write_int32({self._hash})", ] + if not self.fory.compatbile: + # Compute hash at generation time since we're in xlang mode + if self._hash == 0: + self._hash = _get_hash(self.fory, self._field_names, self._type_hints) + stmts.append(f"{buffer}.write_int32({self._hash})") if not self._has_slots: stmts.append(f"{value_dict} = {value}.__dict__") for index, field_name in enumerate(self._field_names): @@ -489,18 +490,27 @@ def _gen_xread_method(self): context["_field_names"] = self._field_names context["_type_hints"] = self._type_hints context["_serializers"] = self._serializers - # Compute hash at generation time since we're in xlang mode - if self._hash == 0: - self._hash = _get_hash(self.fory, self._field_names, self._type_hints) + + current_class_field_names = set(self._get_field_names(self.type_)) + stmts = [ f'"""xread method for {self.type_}"""', - f"read_hash = {buffer}.read_int32()", - f"if read_hash != {self._hash}:", - f""" raise TypeNotCompatibleError( - f"Hash {{read_hash}} is not consistent with {self._hash} for type {self.type_}")""", + ] + if not self.fory.compatbile: + # Compute hash at generation time since we're in xlang mode + if self._hash == 0: + self._hash = _get_hash(self.fory, self._field_names, self._type_hints) + stmts.extend([ + f"read_hash = {buffer}.read_int32()", + f"if read_hash != {self._hash}:", + f""" raise TypeNotCompatibleError( + f"Hash {{read_hash}} is not consistent with {self._hash} for type {self.type_}")""", + ]) + stmts.extend([ f"{obj} = {obj_class}.__new__({obj_class})", f"{ref_resolver}.reference({obj})", - ] + ]) + if not self._has_slots: stmts.append(f"{obj_dict} = {obj}.__dict__") @@ -509,6 +519,9 @@ def _gen_xread_method(self): context[serializer_var] = self._serializers[index] field_value = f"field_value{index}" stmts.append(f"{field_value} = {fory}.xdeserialize_ref({buffer}, serializer={serializer_var})") + if field_name not in current_class_field_names: + stmts.append(f"# {field_name} is not in {self.type_}") + continue if not self._has_slots: stmts.append(f"{obj_dict}['{field_name}'] = {field_value}") else: diff --git a/python/pyfory/tests/test_meta_share.py b/python/pyfory/tests/test_meta_share.py index 3ccf09af04..3624781612 100644 --- a/python/pyfory/tests/test_meta_share.py +++ b/python/pyfory/tests/test_meta_share.py @@ -15,7 +15,6 @@ # specific language governing permissions and limitations # under the License. -import pytest import dataclasses from pyfory import Fory, Language from pyfory.buffer import Buffer @@ -35,6 +34,21 @@ class SimpleNestedDataClass: name: str +@dataclasses.dataclass +class ExtendedDataClass: + name: str + age: int + active: bool + email: str # Additional field + + +@dataclasses.dataclass +class ReducedDataClass: + name: str + age: int + # Missing 'active' field + + class TestMetaShareMode: def setup_method(self): @@ -43,19 +57,19 @@ def setup_method(self): def test_meta_share_enabled(self): """Test that meta share mode can be enabled.""" - fory = Fory(language=Language.XLANG, meta_share=True) + fory = Fory(language=Language.XLANG, compatbile=True) assert fory.serialization_context.scoped_meta_share_enabled assert fory.serialization_context.meta_context is not None def test_meta_share_disabled(self): """Test that meta share mode can be disabled.""" - fory = Fory(language=Language.XLANG, meta_share=False) + fory = Fory(language=Language.XLANG, compatbile=False) assert not fory.serialization_context.scoped_meta_share_enabled assert fory.serialization_context.meta_context is None def test_simple_dataclass_serialization(self): """Test serialization of simple dataclass with meta share.""" - fory = Fory(language=Language.XLANG, meta_share=True) + fory = Fory(language=Language.XLANG, compatbile=True) # Register the dataclass fory.register_type(SimpleDataClass) @@ -71,7 +85,7 @@ def test_simple_dataclass_serialization(self): def test_multiple_objects_same_type(self): """Test that multiple objects of same type reuse type definition.""" - fory = Fory(language=Language.XLANG, meta_share=True) + fory = Fory(language=Language.XLANG, compatbile=True) # Register the dataclass fory.register_type(SimpleDataClass) @@ -84,7 +98,7 @@ def test_multiple_objects_same_type(self): buffer2 = fory.serialize(obj2) # Create a new fory instance with the same meta context for deserialization - fory2 = Fory(language=Language.XLANG, meta_share=True) + fory2 = Fory(language=Language.XLANG, compatbile=True) fory2.register_type(SimpleDataClass) # Copy the meta context from the first fory instance fory2.serialization_context.meta_context = fory.serialization_context.meta_context @@ -100,7 +114,7 @@ def test_multiple_objects_same_type(self): def test_simple_nested_dataclass_serialization(self): """Test serialization of simple nested dataclass with meta share.""" - fory = Fory(language=Language.XLANG, meta_share=True) + fory = Fory(language=Language.XLANG, compatbile=True) # Register the dataclass fory.register_type(SimpleNestedDataClass) @@ -115,7 +129,7 @@ def test_simple_nested_dataclass_serialization(self): def test_serialization_without_meta_share(self): """Test that serialization works without meta share mode.""" - fory = Fory(language=Language.XLANG, meta_share=False) + fory = Fory(language=Language.XLANG, compatbile=False) # Register the dataclass fory.register_type(SimpleDataClass) @@ -128,3 +142,42 @@ def test_serialization_without_meta_share(self): assert deserialized.age == obj.age assert deserialized.active == obj.active + def test_schema_evolution_more_fields(self): + # Serialize with original schema + fory1 = Fory(language=Language.XLANG, compatbile=True) + fory1.register_type(SimpleDataClass) + + obj = SimpleDataClass(name="test", age=25, active=True) + buffer = fory1.serialize(obj) + + # Deserialize with extended schema (more fields) + fory2 = Fory(language=Language.XLANG, compatbile=True) + fory2.register_type(ExtendedDataClass) + deserialized = fory2.deserialize(buffer) + + # Current behavior: deserialized object is of the new registered type + assert isinstance(deserialized, ExtendedDataClass) + assert deserialized.name == obj.name + assert deserialized.age == obj.age + assert deserialized.active == obj.active + assert not hasattr(deserialized, 'email') + + + def test_schema_evolution_fewer_fields(self): + # Serialize with original schema + fory1 = Fory(language=Language.XLANG, compatbile=True) + fory1.register_type(SimpleDataClass) + obj = SimpleDataClass(name="test", age=25, active=True) + buffer = fory1.serialize(obj) + + # Deserialize with reduced schema (fewer fields) + fory2 = Fory(language=Language.XLANG, compatbile=True) + fory2.register_type(ReducedDataClass) + deserialized = fory2.deserialize(buffer) + + assert isinstance(deserialized, ReducedDataClass) + assert deserialized.name == obj.name + assert deserialized.age == obj.age + # The missing field should not be present + assert not hasattr(deserialized, 'active') + From bab26cea421c5373622dbb834a6ebdf4d6de7004 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 9 Sep 2025 22:02:08 +0800 Subject: [PATCH 11/20] fix cython --- python/pyfory/_fory.py | 2 +- python/pyfory/_registry.py | 8 +++---- python/pyfory/_serialization.pyx | 41 ++++++++++++-------------------- python/pyfory/serializer.py | 2 -- 4 files changed, 20 insertions(+), 33 deletions(-) diff --git a/python/pyfory/_fory.py b/python/pyfory/_fory.py index b6993438d6..242a56a48b 100644 --- a/python/pyfory/_fory.py +++ b/python/pyfory/_fory.py @@ -274,7 +274,7 @@ def _serialize( # Write type definitions at the end, similar to Java implementation if self.serialization_context.scoped_meta_share_enabled: - meta_context = self.serialization_context.get_meta_context() + meta_context = self.serialization_context.meta_context if meta_context is not None and len(meta_context.get_writing_type_defs()) > 0: # Update the offset to point to current position current_pos = buffer.writer_index diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py index f691de5a2d..2844dd83bf 100644 --- a/python/pyfory/_registry.py +++ b/python/pyfory/_registry.py @@ -649,12 +649,12 @@ def get_meta_compressor(self): def write_shared_type_meta(self, buffer, typeinfo): """Write shared type meta information.""" - meta_context = self.fory.serialization_context.get_meta_context() + meta_context = self.fory.serialization_context.meta_context meta_context.write_typeinfo(buffer, typeinfo) def read_shared_type_meta(self, buffer): """Read shared type meta information.""" - meta_context = self.fory.serialization_context.get_meta_context() + meta_context = self.fory.serialization_context.meta_context assert meta_context is not None, "Meta context must be set when meta share is enabled" type_id = buffer.read_varuint32() typeinfo = meta_context.get_read_type_info(type_id) @@ -663,7 +663,7 @@ def read_shared_type_meta(self, buffer): def write_type_defs(self, buffer): """Write all type definitions that need to be sent.""" - meta_context = self.fory.serialization_context.get_meta_context() + meta_context = self.fory.serialization_context.meta_context if meta_context is None: return writing_type_defs = meta_context.get_writing_type_defs() @@ -674,7 +674,7 @@ def write_type_defs(self, buffer): def read_type_defs(self, buffer): """Read all type definitions from the buffer.""" - meta_context = self.fory.serialization_context.get_meta_context() + meta_context = self.fory.serialization_context.meta_context if meta_context is None: return diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx index 754e45b53f..65c674c6ed 100644 --- a/python/pyfory/_serialization.pyx +++ b/python/pyfory/_serialization.pyx @@ -609,13 +609,13 @@ cdef class TypeResolver: cpdef write_shared_type_meta(self, Buffer buffer, TypeInfo typeinfo): """Write shared type meta information.""" - meta_context = self.serialization_context.get_meta_context() + meta_context = self.serialization_context.meta_context assert meta_context is not None, "Meta context must be set when meta share is enabled" meta_context.write_typeinfo(buffer, typeinfo) cpdef TypeInfo read_shared_type_meta(self, Buffer buffer): """Read shared type meta information.""" - meta_context = self.serialization_context.get_meta_context() + meta_context = self.serialization_context.meta_context assert meta_context is not None, "Meta context must be set when meta share is enabled" type_id = buffer.read_varuint32() typeinfo = meta_context.get_read_type_info(type_id) @@ -712,23 +712,15 @@ cdef class MetaContext: cdef class SerializationContext: cdef dict objects cdef readonly bint scoped_meta_share_enabled - cdef object _meta_context + cdef public object meta_context def __init__(self, scoped_meta_share_enabled: bool = False): self.objects = dict() self.scoped_meta_share_enabled = scoped_meta_share_enabled if scoped_meta_share_enabled: - self._meta_context = MetaContext() + self.meta_context = MetaContext() else: - self._meta_context = None - - @property - def meta_context(self): - return self._meta_context - - @meta_context.setter - def meta_context(self, value): - self._meta_context = value + self.meta_context = None def add(self, key, obj): self.objects[id(key)] = obj @@ -742,24 +734,21 @@ cdef class SerializationContext: def get(self, key): return self.objects.get(id(key)) - def get_meta_context(self): - return self._meta_context - - def reset(self): + cpdef reset(self): if len(self.objects) > 0: self.objects.clear() - def reset_write(self): + cpdef reset_write(self): if len(self.objects) > 0: self.objects.clear() - if self.scoped_meta_share_enabled and self._meta_context is not None: - self._meta_context.reset_write() + if self.scoped_meta_share_enabled and self.meta_context is not None: + self.meta_context.reset_write() - def reset_read(self): + cpdef reset_read(self): if len(self.objects) > 0: self.objects.clear() - if self.scoped_meta_share_enabled and self._meta_context is not None: - self._meta_context.reset_read() + if self.scoped_meta_share_enabled and self.meta_context is not None: + self.meta_context.reset_read() @cython.final @@ -812,9 +801,9 @@ cdef class Fory: self.ref_resolver = MapRefResolver(ref_tracking) self.is_py = self.language == Language.PYTHON self.metastring_resolver = MetaStringResolver() - self.type_resolver = TypeResolver(self) - self.type_resolver.initialize() self.serialization_context = SerializationContext(scoped_meta_share_enabled=compatbile) + self.type_resolver = TypeResolver(self, meta_share=compatbile) + self.type_resolver.initialize() self.buffer = Buffer.allocate(32) if not require_type_registration: warnings.warn( @@ -927,7 +916,7 @@ cdef class Fory: # Write type definitions at the end, similar to Java implementation if self.serialization_context.scoped_meta_share_enabled: - meta_context = self.serialization_context.get_meta_context() + meta_context = self.serialization_context.meta_context if meta_context is not None and len(meta_context.get_writing_type_defs()) > 0: # Update the offset to point to current position current_pos = buffer.writer_index diff --git a/python/pyfory/serializer.py b/python/pyfory/serializer.py index 6ef15c0f90..5bee612899 100644 --- a/python/pyfory/serializer.py +++ b/python/pyfory/serializer.py @@ -490,9 +490,7 @@ def _gen_xread_method(self): context["_field_names"] = self._field_names context["_type_hints"] = self._type_hints context["_serializers"] = self._serializers - current_class_field_names = set(self._get_field_names(self.type_)) - stmts = [ f'"""xread method for {self.type_}"""', ] From 8595e7437d7292ee7c6de06ed904d3ee31bd726a Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 9 Sep 2025 22:03:28 +0800 Subject: [PATCH 12/20] lint code --- python/pyfory/_fory.py | 13 +++--- python/pyfory/_registry.py | 18 ++++---- python/pyfory/_serializer.py | 50 +++++--------------- python/pyfory/format/__init__.py | 3 +- python/pyfory/format/tests/test_encoder.py | 8 +--- python/pyfory/meta/typedef.py | 11 +++-- python/pyfory/meta/typedef_encoder.py | 2 +- python/pyfory/serializer.py | 24 ++++++---- python/pyfory/tests/benchmark.py | 20 ++------ python/pyfory/tests/record.py | 4 +- python/pyfory/tests/test_buffer.py | 5 +- python/pyfory/tests/test_codegen.py | 4 +- python/pyfory/tests/test_meta_share.py | 53 ++++++++++------------ python/pyfory/tests/test_metastring.py | 4 +- python/pyfory/type.py | 23 +++------- 15 files changed, 91 insertions(+), 151 deletions(-) diff --git a/python/pyfory/_fory.py b/python/pyfory/_fory.py index 242a56a48b..d56a20f0fd 100644 --- a/python/pyfory/_fory.py +++ b/python/pyfory/_fory.py @@ -151,6 +151,7 @@ def __init__( self.type_resolver = TypeResolver(self, meta_share=compatbile) self.type_resolver.initialize() from pyfory._serialization import SerializationContext + self.serialization_context = SerializationContext(scoped_meta_share_enabled=compatbile) self.buffer = Buffer.allocate(32) if not require_type_registration: @@ -266,12 +267,12 @@ def _serialize( if self.serialization_context.scoped_meta_share_enabled: type_defs_offset_pos = buffer.writer_index buffer.write_int32(-1) # Reserve 4 bytes for type definitions offset - + if self.language == Language.PYTHON: self.serialize_ref(buffer, obj) else: self.xserialize_ref(buffer, obj) - + # Write type definitions at the end, similar to Java implementation if self.serialization_context.scoped_meta_share_enabled: meta_context = self.serialization_context.meta_context @@ -280,7 +281,7 @@ def _serialize( current_pos = buffer.writer_index buffer.put_int32(type_defs_offset_pos, current_pos - type_defs_offset_pos - 4) self.type_resolver.write_type_defs(buffer) - + self.reset_write() if buffer is not self.buffer: return buffer @@ -391,7 +392,7 @@ def _deserialize( self._buffers = iter(buffers) else: assert buffers is None, "buffers should be null when the serialized stream is produced with buffer_callback null." - + # Read type definitions at the start, similar to Java implementation if self.serialization_context.scoped_meta_share_enabled: relative_type_defs_offset = buffer.read_int32() @@ -404,7 +405,7 @@ def _deserialize( self.type_resolver.read_type_defs(buffer) # Jump back to continue with object deserialization buffer.reader_index = current_reader_index - + if is_target_x_lang: obj = self.xdeserialize_ref(buffer) else: @@ -526,8 +527,6 @@ def reset(self): self.reset_read() - - _ENABLE_TYPE_REGISTRATION_FORCIBLY = os.getenv("ENABLE_TYPE_REGISTRATION_FORCIBLY", "0") in { "1", "true", diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py index 2844dd83bf..70b6861c00 100644 --- a/python/pyfory/_registry.py +++ b/python/pyfory/_registry.py @@ -549,7 +549,7 @@ def _create_serializer(self, cls): else: serializer = PickleSerializer(self.fory, cls) return serializer - + def _set_struct_typeinfo(self, typeinfo): assert self.meta_share, "Meta share must be enabled" type_def = encode_typedef(self, typeinfo.cls) @@ -561,7 +561,7 @@ def is_registered_by_name(self, cls): if typeinfo is None: return False return TypeId.is_namespaced_type(typeinfo.type_id & 0xFF) - + def is_registered_by_id(self, cls): typeinfo = self._types_info.get(cls) if typeinfo is None: @@ -577,7 +577,7 @@ def get_registered_id(self, cls): typeinfo = self._types_info.get(cls) assert typeinfo is not None, f"{cls} not registered" return typeinfo.type_id - + def _load_metabytes_to_typeinfo(self, ns_metabytes, type_metabytes): typeinfo = self._ns_type_to_typeinfo.get((ns_metabytes, type_metabytes)) if typeinfo is not None: @@ -599,12 +599,12 @@ def write_typeinfo(self, buffer, typeinfo): return type_id = typeinfo.type_id internal_type_id = type_id & 0xFF - + # Check if meta share is enabled first if self.meta_share: self.write_shared_type_meta(buffer, typeinfo) return - + buffer.write_varuint32(type_id) if TypeId.is_namespaced_type(internal_type_id): self.metastring_resolver.write_meta_string_bytes(buffer, typeinfo.namespace_bytes) @@ -614,7 +614,7 @@ def read_typeinfo(self, buffer): # Check if meta share is enabled first if self.meta_share: return self.read_shared_type_meta(buffer) - + type_id = buffer.read_varuint32() internal_type_id = type_id & 0xFF if TypeId.is_namespaced_type(internal_type_id): @@ -671,13 +671,13 @@ def write_type_defs(self, buffer): for type_def in writing_type_defs: # Just copy the encoded bytes directly buffer.write_bytes(type_def.encoded) - + def read_type_defs(self, buffer): """Read all type definitions from the buffer.""" meta_context = self.fory.serialization_context.meta_context if meta_context is None: return - + num_type_defs = buffer.read_varuint32() for i in range(num_type_defs): # Read the header (first 8 bytes) to get the type ID @@ -696,7 +696,7 @@ def read_type_defs(self, buffer): meta_context.add_read_type_info(type_info) def _build_type_info_from_typedef(self, type_def): - """Build TypeInfo from TypeDef using TypeDef's create_serializer method.""" + """Build TypeInfo from TypeDef using TypeDef's create_serializer method.""" # Create serializer using TypeDef's create_serializer method serializer = type_def.create_serializer(self) ns_metastr = self.namespace_encoder.encode(type_def.namespace or "") diff --git a/python/pyfory/_serializer.py b/python/pyfory/_serializer.py index dacdae6704..a12b8c0966 100644 --- a/python/pyfory/_serializer.py +++ b/python/pyfory/_serializer.py @@ -53,15 +53,11 @@ # Key is null, value type is declared type, and ref tracking for value is disabled. NULL_KEY_VALUE_DECL_TYPE = KEY_HAS_NULL | VALUE_DECL_TYPE # Key is null, value type is declared type, and ref tracking for value is enabled. -NULL_KEY_VALUE_DECL_TYPE_TRACKING_REF = ( - KEY_HAS_NULL | VALUE_DECL_TYPE | TRACKING_VALUE_REF -) +NULL_KEY_VALUE_DECL_TYPE_TRACKING_REF = KEY_HAS_NULL | VALUE_DECL_TYPE | TRACKING_VALUE_REF # Value is null, key type is declared type, and ref tracking for key is disabled. NULL_VALUE_KEY_DECL_TYPE = VALUE_HAS_NULL | KEY_DECL_TYPE # Value is null, key type is declared type, and ref tracking for key is enabled. -NULL_VALUE_KEY_DECL_TYPE_TRACKING_REF = ( - VALUE_HAS_NULL | KEY_DECL_TYPE | TRACKING_VALUE_REF -) +NULL_VALUE_KEY_DECL_TYPE_TRACKING_REF = VALUE_HAS_NULL | KEY_DECL_TYPE | TRACKING_VALUE_REF class Serializer(ABC): @@ -182,11 +178,7 @@ def read(self, buffer): class DateSerializer(CrossLanguageCompatibleSerializer): def write(self, buffer, value: datetime.date): if not isinstance(value, datetime.date): - raise TypeError( - "{} should be {} instead of {}".format( - value, datetime.date, type(value) - ) - ) + raise TypeError("{} should be {} instead of {}".format(value, datetime.date, type(value))) days = (value - _base_date).days buffer.write_int32(days) @@ -208,9 +200,7 @@ def _get_timestamp(self, value: datetime.datetime): def write(self, buffer, value: datetime.datetime): if not isinstance(value, datetime.datetime): - raise TypeError( - "{} should be {} instead of {}".format(value, datetime, type(value)) - ) + raise TypeError("{} should be {} instead of {}".format(value, datetime, type(value))) # TimestampType represent micro seconds buffer.write_int64(self._get_timestamp(value)) @@ -287,10 +277,7 @@ def write_header(self, buffer, value): collect_flag |= COLLECTION_TRACKING_REF buffer.write_varuint32(len(value)) buffer.write_int8(collect_flag) - if ( - not has_different_type - and (collect_flag & COLLECTION_NOT_DECL_ELEMENT_TYPE) != 0 - ): + if not has_different_type and (collect_flag & COLLECTION_NOT_DECL_ELEMENT_TYPE) != 0: self.type_resolver.write_typeinfo(buffer, elem_typeinfo) return collect_flag, elem_typeinfo @@ -385,9 +372,7 @@ def _read_different_types(self, buffer, len_, collection_): for _ in range(len_): self._add_element( collection_, - get_next_element( - buffer, self.ref_resolver, self.type_resolver, self.is_py - ), + get_next_element(buffer, self.ref_resolver, self.type_resolver, self.is_py), ) def xwrite(self, buffer, value): @@ -532,12 +517,8 @@ def write(self, buffer, o): type_resolver.write_typeinfo(buffer, value_typeinfo) value_serializer = value_typeinfo.serializer - key_write_ref = ( - key_serializer.need_to_write_ref if key_serializer else False - ) - value_write_ref = ( - value_serializer.need_to_write_ref if value_serializer else False - ) + key_write_ref = key_serializer.need_to_write_ref if key_serializer else False + value_write_ref = value_serializer.need_to_write_ref if value_serializer else False if key_write_ref: chunk_header |= TRACKING_KEY_REF if value_write_ref: @@ -547,18 +528,11 @@ def write(self, buffer, o): chunk_size = 0 while chunk_size < MAX_CHUNK_SIZE: - if ( - key is None - or value is None - or type(key) is not key_cls - or type(value) is not value_cls - ): + if key is None or value is None or type(key) is not key_cls or type(value) is not value_cls: break if not key_write_ref or not ref_resolver.write_ref_or_null(buffer, key): self._write_obj(key_serializer, buffer, key) - if not value_write_ref or not ref_resolver.write_ref_or_null( - buffer, value - ): + if not value_write_ref or not ref_resolver.write_ref_or_null(buffer, value): value_serializer.write(buffer, value) chunk_size += 1 @@ -583,9 +557,7 @@ def read(self, buffer): if size != 0: chunk_header = buffer.read_uint8() key_serializer, value_serializer = self.key_serializer, self.value_serializer - deserialize_ref = ( - fory.deserialize_ref if self.fory.is_py else fory.xdeserialize_ref - ) + deserialize_ref = fory.deserialize_ref if self.fory.is_py else fory.xdeserialize_ref while size > 0: while True: key_has_null = (chunk_header & KEY_HAS_NULL) != 0 diff --git a/python/pyfory/format/__init__.py b/python/pyfory/format/__init__.py index 3bc70502fc..f6fd1d8f5a 100644 --- a/python/pyfory/format/__init__.py +++ b/python/pyfory/format/__init__.py @@ -41,8 +41,7 @@ ) except (ImportError, AttributeError) as e: warnings.warn( - f"Fory format initialization failed, please ensure pyarrow is installed " - f"with version which fory is compiled with: {e}", + f"Fory format initialization failed, please ensure pyarrow is installed with version which fory is compiled with: {e}", RuntimeWarning, stacklevel=2, ) diff --git a/python/pyfory/format/tests/test_encoder.py b/python/pyfory/format/tests/test_encoder.py index ac9dbdfdce..b4b01fc1c9 100644 --- a/python/pyfory/format/tests/test_encoder.py +++ b/python/pyfory/format/tests/test_encoder.py @@ -63,9 +63,7 @@ def test_encoder_with_schema(): @require_pyarrow def test_dict(): dict_ = {"f1": 1, "f2": "str"} - encoder = pyfory.create_row_encoder( - pa.schema([("f1", pa.int32()), ("f2", pa.utf8())]) - ) + encoder = pyfory.create_row_encoder(pa.schema([("f1", pa.int32()), ("f2", pa.utf8())])) row = encoder.to_row(dict_) new_obj = encoder.from_row(row) assert new_obj.f1 == dict_["f1"] @@ -74,9 +72,7 @@ def test_dict(): @require_pyarrow def test_ints(): - cls = pyfory.record_class_factory( - "TestNumeric", ["f" + str(i) for i in range(1, 9)] - ) + cls = pyfory.record_class_factory("TestNumeric", ["f" + str(i) for i in range(1, 9)]) schema = pa.schema( [ ("f1", pa.int64()), diff --git a/python/pyfory/meta/typedef.py b/python/pyfory/meta/typedef.py index b99a3dae93..aab925cc0a 100644 --- a/python/pyfory/meta/typedef.py +++ b/python/pyfory/meta/typedef.py @@ -41,7 +41,9 @@ class TypeDef: - def __init__(self, namespace: str, typename: str, cls: type, type_id: int, fields: List["FieldInfo"], encoded: bytes = None, is_compressed: bool = False): + def __init__( + self, namespace: str, typename: str, cls: type, type_id: int, fields: List["FieldInfo"], encoded: bytes = None, is_compressed: bool = False + ): self.namespace = namespace self.typename = typename self.cls = cls @@ -59,9 +61,12 @@ def get_field_names(self): def create_serializer(self, resolver): from pyfory.serializer import DataClassSerializer + fory = resolver.fory - return DataClassSerializer(fory, self.cls, xlang=not fory.is_py, field_names=self.get_field_names(), serializers=self.create_fields_serializer(resolver)) - + return DataClassSerializer( + fory, self.cls, xlang=not fory.is_py, field_names=self.get_field_names(), serializers=self.create_fields_serializer(resolver) + ) + def __repr__(self): return f"TypeDef(namespace={self.namespace}, typename={self.typename}, cls={self.cls}, type_id={self.type_id}, fields={self.fields}, is_compressed={self.is_compressed})" diff --git a/python/pyfory/meta/typedef_encoder.py b/python/pyfory/meta/typedef_encoder.py index a150591d5f..b0680fca6c 100644 --- a/python/pyfory/meta/typedef_encoder.py +++ b/python/pyfory/meta/typedef_encoder.py @@ -100,7 +100,7 @@ def encode_typedef(type_resolver, cls): if is_compressed: binary = compressed_binary # Prepend header - binary = prepend_header(binary, is_compressed, len(field_infos) > 0) + binary = prepend_header(binary, is_compressed, len(field_infos) > 0) # Extract namespace and typename if type_resolver.is_registered_by_name(cls): namespace, typename = type_resolver.get_registered_name(cls) diff --git a/python/pyfory/serializer.py b/python/pyfory/serializer.py index 5bee612899..50762691af 100644 --- a/python/pyfory/serializer.py +++ b/python/pyfory/serializer.py @@ -449,7 +449,7 @@ def _gen_xwrite_method(self): f'"""xwrite method for {self.type_}"""', ] if not self.fory.compatbile: - # Compute hash at generation time since we're in xlang mode + # Compute hash at generation time since we're in xlang mode if self._hash == 0: self._hash = _get_hash(self.fory, self._field_names, self._type_hints) stmts.append(f"{buffer}.write_int32({self._hash})") @@ -498,16 +498,20 @@ def _gen_xread_method(self): # Compute hash at generation time since we're in xlang mode if self._hash == 0: self._hash = _get_hash(self.fory, self._field_names, self._type_hints) - stmts.extend([ - f"read_hash = {buffer}.read_int32()", - f"if read_hash != {self._hash}:", - f""" raise TypeNotCompatibleError( + stmts.extend( + [ + f"read_hash = {buffer}.read_int32()", + f"if read_hash != {self._hash}:", + f""" raise TypeNotCompatibleError( f"Hash {{read_hash}} is not consistent with {self._hash} for type {self.type_}")""", - ]) - stmts.extend([ - f"{obj} = {obj_class}.__new__({obj_class})", - f"{ref_resolver}.reference({obj})", - ]) + ] + ) + stmts.extend( + [ + f"{obj} = {obj_class}.__new__({obj_class})", + f"{ref_resolver}.reference({obj})", + ] + ) if not self._has_slots: stmts.append(f"{obj_dict} = {obj}.__dict__") diff --git a/python/pyfory/tests/benchmark.py b/python/pyfory/tests/benchmark.py index ab6abe1b70..75c883296e 100644 --- a/python/pyfory/tests/benchmark.py +++ b/python/pyfory/tests/benchmark.py @@ -33,13 +33,9 @@ def test_encode(): assert foo == encoder.from_row(row) t1 = timeit.timeit(lambda: encoder.to_row(foo), number=iter_nums) - print( - "encoder take {0} for {1} times, avg: {2}".format(t1, iter_nums, t1 / iter_nums) - ) + print("encoder take {0} for {1} times, avg: {2}".format(t1, iter_nums, t1 / iter_nums)) t2 = timeit.timeit(lambda: pickle.dumps(foo), number=iter_nums) - print( - "pickle take {0} for {1} times, avg: {2}".format(t2, iter_nums, t2 / iter_nums) - ) + print("pickle take {0} for {1} times, avg: {2}".format(t2, iter_nums, t2 / iter_nums)) @pytest.mark.skip(reason="take too long") @@ -51,18 +47,10 @@ def test_decode(): row = encoder.to_row(foo) assert foo == encoder.from_row(row) t1 = timeit.timeit(lambda: encoder.from_row(row), number=iter_nums) - print( - "encoder take {0} for {1} times, avg: {2}, size {3}".format( - t1, iter_nums, t1 / iter_nums, row.size_bytes() - ) - ) + print("encoder take {0} for {1} times, avg: {2}, size {3}".format(t1, iter_nums, t1 / iter_nums, row.size_bytes())) pickled_data = pickle.dumps(foo) t2 = timeit.timeit(lambda: pickle.loads(pickled_data), number=iter_nums) - print( - "pickle take {0} for {1} times, avg: {2}, size {3}".format( - t2, iter_nums, t2 / iter_nums, len(pickled_data) - ) - ) + print("pickle take {0} for {1} times, avg: {2}, size {3}".format(t2, iter_nums, t2 / iter_nums, len(pickled_data))) if __name__ == "__main__": diff --git a/python/pyfory/tests/record.py b/python/pyfory/tests/record.py index 2f56a9ad81..31ebd66a81 100644 --- a/python/pyfory/tests/record.py +++ b/python/pyfory/tests/record.py @@ -117,9 +117,7 @@ def foo_schema(): ("f4", pa.map_(pa.string(), pa.int32())), ("f5", pa.list_(pa.int32())), ("f6", pa.int32()), - pa.field( - "f7", bar_struct, metadata={"cls": fory.get_qualified_classname(Bar)} - ), + pa.field("f7", bar_struct, metadata={"cls": fory.get_qualified_classname(Bar)}), ], metadata={"cls": fory.get_qualified_classname(Foo)}, ) diff --git a/python/pyfory/tests/test_buffer.py b/python/pyfory/tests/test_buffer.py index 3ba9c388ed..cefd6abf5a 100644 --- a/python/pyfory/tests/test_buffer.py +++ b/python/pyfory/tests/test_buffer.py @@ -217,10 +217,7 @@ def check_varuint64(buf: Buffer, value: int, bytes_written: int): assert buf.writer_index == buf.reader_index assert value == varint # test slow read branch in `read_varint64` - assert ( - buf.slice(reader_index, buf.reader_index - reader_index).read_varuint64() - == value - ) + assert buf.slice(reader_index, buf.reader_index - reader_index).read_varuint64() == value def test_write_buffer(): diff --git a/python/pyfory/tests/test_codegen.py b/python/pyfory/tests/test_codegen.py index 3b2243b29a..b73d2465e2 100644 --- a/python/pyfory/tests/test_codegen.py +++ b/python/pyfory/tests/test_codegen.py @@ -43,8 +43,6 @@ def _debug_compiled(x): def test_compile_function(): - code, func = codegen.compile_function( - "test_compile_function", ["x"], ["print(1)", "print(2)", "return x"], {} - ) + code, func = codegen.compile_function("test_compile_function", ["x"], ["print(1)", "print(2)", "return x"], {}) print(code) assert func(100) == 100 diff --git a/python/pyfory/tests/test_meta_share.py b/python/pyfory/tests/test_meta_share.py index 3624781612..5c20b56d9b 100644 --- a/python/pyfory/tests/test_meta_share.py +++ b/python/pyfory/tests/test_meta_share.py @@ -50,11 +50,10 @@ class ReducedDataClass: class TestMetaShareMode: - def setup_method(self): """Setup method to register dataclasses for each test.""" pass - + def test_meta_share_enabled(self): """Test that meta share mode can be enabled.""" fory = Fory(language=Language.XLANG, compatbile=True) @@ -70,13 +69,13 @@ def test_meta_share_disabled(self): def test_simple_dataclass_serialization(self): """Test serialization of simple dataclass with meta share.""" fory = Fory(language=Language.XLANG, compatbile=True) - + # Register the dataclass fory.register_type(SimpleDataClass) - + obj = SimpleDataClass(name="test", age=25, active=True) buffer = fory.serialize(obj) - + # Deserialize deserialized = fory.deserialize(buffer) assert deserialized.name == obj.name @@ -86,27 +85,27 @@ def test_simple_dataclass_serialization(self): def test_multiple_objects_same_type(self): """Test that multiple objects of same type reuse type definition.""" fory = Fory(language=Language.XLANG, compatbile=True) - + # Register the dataclass fory.register_type(SimpleDataClass) - + obj1 = SimpleDataClass(name="test1", age=25, active=True) obj2 = SimpleDataClass(name="test2", age=30, active=False) - + # Serialize both objects buffer1 = fory.serialize(obj1) buffer2 = fory.serialize(obj2) - + # Create a new fory instance with the same meta context for deserialization fory2 = Fory(language=Language.XLANG, compatbile=True) fory2.register_type(SimpleDataClass) # Copy the meta context from the first fory instance fory2.serialization_context.meta_context = fory.serialization_context.meta_context - + # Deserialize both deserialized1 = fory2.deserialize(buffer1) deserialized2 = fory2.deserialize(buffer2) - + assert deserialized1.name == obj1.name assert deserialized2.name == obj2.name assert deserialized1.age == obj1.age @@ -115,29 +114,29 @@ def test_multiple_objects_same_type(self): def test_simple_nested_dataclass_serialization(self): """Test serialization of simple nested dataclass with meta share.""" fory = Fory(language=Language.XLANG, compatbile=True) - + # Register the dataclass fory.register_type(SimpleNestedDataClass) - + obj = SimpleNestedDataClass(value=42, name="test") - + buffer = fory.serialize(obj) deserialized = fory.deserialize(buffer) - + assert deserialized.value == obj.value assert deserialized.name == obj.name def test_serialization_without_meta_share(self): """Test that serialization works without meta share mode.""" fory = Fory(language=Language.XLANG, compatbile=False) - + # Register the dataclass fory.register_type(SimpleDataClass) - + obj = SimpleDataClass(name="test", age=25, active=True) buffer = fory.serialize(obj) deserialized = fory.deserialize(buffer) - + assert deserialized.name == obj.name assert deserialized.age == obj.age assert deserialized.active == obj.active @@ -146,22 +145,21 @@ def test_schema_evolution_more_fields(self): # Serialize with original schema fory1 = Fory(language=Language.XLANG, compatbile=True) fory1.register_type(SimpleDataClass) - + obj = SimpleDataClass(name="test", age=25, active=True) buffer = fory1.serialize(obj) - + # Deserialize with extended schema (more fields) fory2 = Fory(language=Language.XLANG, compatbile=True) fory2.register_type(ExtendedDataClass) deserialized = fory2.deserialize(buffer) - + # Current behavior: deserialized object is of the new registered type assert isinstance(deserialized, ExtendedDataClass) assert deserialized.name == obj.name assert deserialized.age == obj.age assert deserialized.active == obj.active - assert not hasattr(deserialized, 'email') - + assert not hasattr(deserialized, "email") def test_schema_evolution_fewer_fields(self): # Serialize with original schema @@ -169,15 +167,14 @@ def test_schema_evolution_fewer_fields(self): fory1.register_type(SimpleDataClass) obj = SimpleDataClass(name="test", age=25, active=True) buffer = fory1.serialize(obj) - + # Deserialize with reduced schema (fewer fields) fory2 = Fory(language=Language.XLANG, compatbile=True) - fory2.register_type(ReducedDataClass) + fory2.register_type(ReducedDataClass) deserialized = fory2.deserialize(buffer) - + assert isinstance(deserialized, ReducedDataClass) assert deserialized.name == obj.name assert deserialized.age == obj.age # The missing field should not be present - assert not hasattr(deserialized, 'active') - + assert not hasattr(deserialized, "active") diff --git a/python/pyfory/tests/test_metastring.py b/python/pyfory/tests/test_metastring.py index f21e09585c..d470de2994 100644 --- a/python/pyfory/tests/test_metastring.py +++ b/python/pyfory/tests/test_metastring.py @@ -196,7 +196,5 @@ def test_non_ascii_encoding_and_non_utf8(): non_ascii_string = "こんにちは" # Non-ASCII string - with pytest.raises( - ValueError, match="Unsupported character for LOWER_SPECIAL encoding: こ" - ): + with pytest.raises(ValueError, match="Unsupported character for LOWER_SPECIAL encoding: こ"): encoder.encode_with_encoding(non_ascii_string, Encoding.LOWER_SPECIAL) diff --git a/python/pyfory/type.py b/python/pyfory/type.py index 7018f504fb..02d0c8a23c 100644 --- a/python/pyfory/type.py +++ b/python/pyfory/type.py @@ -129,6 +129,7 @@ class TypeId: Fory type for cross-language serialization. See `org.apache.fory.types.Type` """ + UNKNOWN = -1 # null value NA = 0 @@ -356,7 +357,7 @@ def is_map_type(type_): return issubclass(type_, typing.Dict) except TypeError: return False - + _polymorphic_type_ids = { TypeId.STRUCT, @@ -401,30 +402,18 @@ def visit_other(self, field_name, type_, types_path=None): def infer_field(field_name, type_, visitor: TypeVisitor, types_path=None): types_path = list(types_path or []) types_path.append(type_) - origin = ( - typing.get_origin(type_) - if hasattr(typing, "get_origin") - else getattr(type_, "__origin__", type_) - ) + origin = typing.get_origin(type_) if hasattr(typing, "get_origin") else getattr(type_, "__origin__", type_) origin = origin or type_ - args = ( - typing.get_args(type_) - if hasattr(typing, "get_args") - else getattr(type_, "__args__", ()) - ) + args = typing.get_args(type_) if hasattr(typing, "get_args") else getattr(type_, "__args__", ()) if args: if origin is list or origin == typing.List: elem_type = args[0] return visitor.visit_list(field_name, elem_type, types_path=types_path) elif origin is dict or origin == typing.Dict: key_type, value_type = args - return visitor.visit_dict( - field_name, key_type, value_type, types_path=types_path - ) + return visitor.visit_dict(field_name, key_type, value_type, types_path=types_path) else: - raise TypeError( - f"Collection types should be {list, dict} instead of {type_}" - ) + raise TypeError(f"Collection types should be {list, dict} instead of {type_}") else: if is_function(origin) or not hasattr(origin, "__annotations__"): return visitor.visit_other(field_name, type_, types_path=types_path) From d16417effcce45bc952b1abdf7d5ed7f7ee4e22f Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 9 Sep 2025 22:06:48 +0800 Subject: [PATCH 13/20] fix imports --- python/pyfory/serializer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyfory/serializer.py b/python/pyfory/serializer.py index 50762691af..52ddd92820 100644 --- a/python/pyfory/serializer.py +++ b/python/pyfory/serializer.py @@ -24,7 +24,7 @@ import pickle import types import typing -from typing_extensions import List +from typing import List import warnings from weakref import WeakValueDictionary From f5d1e4e79b9dcac08c7967c56b5310bcf2493705 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 9 Sep 2025 22:40:27 +0800 Subject: [PATCH 14/20] support nested struct --- python/pyfory/_registry.py | 32 +++-- python/pyfory/_struct.py | 3 +- python/pyfory/meta/typedef.py | 7 +- python/pyfory/tests/test_meta_share.py | 117 +++++++++++++++++++ python/pyfory/tests/test_typedef_encoding.py | 2 +- python/pyfory/type.py | 9 ++ 6 files changed, 154 insertions(+), 16 deletions(-) diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py index 70b6861c00..93be843b0a 100644 --- a/python/pyfory/_registry.py +++ b/python/pyfory/_registry.py @@ -76,6 +76,7 @@ Float32Type, Float64Type, load_class, + is_struct_type, ) from pyfory._fory import ( DYNAMIC_TYPE_ID, @@ -367,7 +368,7 @@ def _register_xtype( serializer = FunctionSerializer(self.fory, cls) type_id = TypeId.NAMED_EXT if type_id is None else ((type_id << 8) + TypeId.EXT) else: - serializer = DataClassSerializer(self.fory, cls, xlang=True) + serializer = None type_id = TypeId.NAMED_STRUCT if type_id is None else ((type_id << 8) + TypeId.STRUCT) elif not internal: type_id = TypeId.NAMED_EXT if type_id is None else ((type_id << 8) + TypeId.EXT) @@ -434,9 +435,6 @@ def __register_type( if type_id not in self._type_id_to_typeinfo or not internal: self._type_id_to_typeinfo[type_id] = typeinfo self._types_info[cls] = typeinfo - - if self.meta_share and isinstance(serializer, DataClassSerializer): - self._set_struct_typeinfo(typeinfo) return typeinfo def _next_type_id(self): @@ -474,7 +472,7 @@ def get_typeinfo(self, cls, create=True): type_info = self._types_info.get(cls) if type_info is not None: if type_info.serializer is None: - type_info.serializer = self._create_serializer(cls) + self._set_typeinfo(type_info) return type_info elif not create: return None @@ -505,6 +503,20 @@ def get_typeinfo(self, cls, create=True): serializer=serializer, ) + def _set_typeinfo(self, typeinfo): + type_id = typeinfo.type_id & 0xff + if is_struct_type(type_id): + if self.meta_share: + type_def = encode_typedef(self, typeinfo.cls) + typeinfo.serializer = type_def.create_serializer(self) + typeinfo.type_def = type_def + else: + typeinfo.serializer = DataClassSerializer(self.fory, typeinfo.cls, xlang=not self.fory.is_py) + else: + typeinfo.serializer = self._create_serializer(typeinfo.cls) + + return typeinfo + def _create_serializer(self, cls): for clz in cls.__mro__: type_info = self._types_info.get(clz) @@ -516,7 +528,8 @@ def _create_serializer(self, cls): # Use FunctionSerializer for function types (including lambdas) serializer = FunctionSerializer(self.fory, cls) elif dataclasses.is_dataclass(cls): - serializer = DataClassSerializer(self.fory, cls, xlang=not self.fory.is_py) + # lazy create serializer to handle nested struct fields. + serializer = None elif issubclass(cls, enum.Enum): serializer = EnumSerializer(self.fory, cls) elif (hasattr(cls, "__reduce__") and cls.__reduce__ is not object.__reduce__) or ( @@ -550,12 +563,6 @@ def _create_serializer(self, cls): serializer = PickleSerializer(self.fory, cls) return serializer - def _set_struct_typeinfo(self, typeinfo): - assert self.meta_share, "Meta share must be enabled" - type_def = encode_typedef(self, typeinfo.cls) - typeinfo.serializer = type_def.create_serializer(self) - typeinfo.type_def = type_def - def is_registered_by_name(self, cls): typeinfo = self._types_info.get(cls) if typeinfo is None: @@ -649,6 +656,7 @@ def get_meta_compressor(self): def write_shared_type_meta(self, buffer, typeinfo): """Write shared type meta information.""" + assert typeinfo.type_def is not None, "Type info must be set when meta share is enabled" meta_context = self.fory.serialization_context.meta_context meta_context.write_typeinfo(buffer, typeinfo) diff --git a/python/pyfory/_struct.py b/python/pyfory/_struct.py index 4e251c1c8b..ef1cdc68a4 100644 --- a/python/pyfory/_struct.py +++ b/python/pyfory/_struct.py @@ -243,7 +243,8 @@ def visit_dict(self, field_name, key_type, value_type, types_path=None): return TypeId.MAP, key_ids, value_ids def visit_customized(self, field_name, type_, types_path=None): - return None, None + typeinfo = self.fory.type_resolver.get_typeinfo(type_) + return [typeinfo.type_id] def visit_other(self, field_name, type_, types_path=None): from pyfory.serializer import PickleSerializer # Local import diff --git a/python/pyfory/meta/typedef.py b/python/pyfory/meta/typedef.py index aab925cc0a..0ad2e7df80 100644 --- a/python/pyfory/meta/typedef.py +++ b/python/pyfory/meta/typedef.py @@ -19,7 +19,7 @@ import typing from pyfory.type import TypeId from pyfory._util import Buffer -from pyfory.type import TypeId, infer_field, is_primitive_type, is_polymorphic_type +from pyfory.type import TypeId, infer_field, is_primitive_type, is_polymorphic_type, is_struct_type from pyfory.meta.metastring import Encoding @@ -237,12 +237,15 @@ def build_field_infos(type_resolver, cls): def build_field_type(type_resolver, field_name: str, type_hint, visitor): """Build field type from type hint.""" type_ids = infer_field(field_name, type_hint, visitor) + print(f"=??????????=> {field_name, type_hint, visitor,type_ids}") return build_field_type_from_type_ids(type_resolver, field_name, type_ids, visitor) def build_field_type_from_type_ids(type_resolver, field_name: str, type_ids, visitor): tracking_ref = type_resolver.fory.ref_tracking type_id = type_ids[0] + if type_id is not None and type_id >= 0: + type_id = type_id & 0xff morphic = not is_polymorphic_type(type_id) if type_id in [TypeId.SET, TypeId.LIST]: elem_type = build_field_type_from_type_ids(type_resolver, field_name, type_ids[1], visitor) @@ -254,7 +257,7 @@ def build_field_type_from_type_ids(type_resolver, field_name: str, type_ids, vis elif type_id in [TypeId.UNKNOWN, TypeId.EXT, TypeId.STRUCT, TypeId.NAMED_STRUCT, TypeId.COMPATIBLE_STRUCT, TypeId.NAMED_COMPATIBLE_STRUCT]: return DynamicFieldType(type_id, False, True, tracking_ref) else: - assert is_primitive_type(type_id) or type_id in [TypeId.STRING, TypeId.ENUM, TypeId.NAMED_ENUM], ( + assert is_primitive_type(type_id) or type_id in [TypeId.STRING, TypeId.ENUM, TypeId.NAMED_ENUM] or is_struct_type(type_id), ( f"Unknown type: {type_id} for field: {field_name}" ) return FieldType(type_id, morphic, True, tracking_ref) diff --git a/python/pyfory/tests/test_meta_share.py b/python/pyfory/tests/test_meta_share.py index 5c20b56d9b..f212f3e0da 100644 --- a/python/pyfory/tests/test_meta_share.py +++ b/python/pyfory/tests/test_meta_share.py @@ -16,9 +16,11 @@ # under the License. import dataclasses +from typing import List, Dict from pyfory import Fory, Language from pyfory.buffer import Buffer from pyfory.type import TypeId +import pyfory @dataclasses.dataclass @@ -49,6 +51,46 @@ class ReducedDataClass: # Missing 'active' field +@dataclasses.dataclass +class NestedStructClass: + name: str + nested: SimpleNestedDataClass + + +@dataclasses.dataclass +class NestedStructClassInconsistent: + name: str + nested: ExtendedDataClass # Different nested type + + +@dataclasses.dataclass +class ListFieldsClass: + name: str + int_list: List[pyfory.Int32Type] + str_list: List[str] + + +@dataclasses.dataclass +class ListFieldsClassInconsistent: + name: str + int_list: List[str] # Changed from Int32Type to str + str_list: List[pyfory.Int32Type] # Changed from str to Int32Type + + +@dataclasses.dataclass +class DictFieldsClass: + name: str + int_dict: Dict[str, pyfory.Int32Type] + str_dict: Dict[str, str] + + +@dataclasses.dataclass +class DictFieldsClassInconsistent: + name: str + int_dict: Dict[str, str] # Changed from Int32Type to str + str_dict: Dict[str, pyfory.Int32Type] # Changed from str to Int32Type + + class TestMetaShareMode: def setup_method(self): """Setup method to register dataclasses for each test.""" @@ -178,3 +220,78 @@ def test_schema_evolution_fewer_fields(self): assert deserialized.age == obj.age # The missing field should not be present assert not hasattr(deserialized, "active") + + def test_schema_inconsistent_nested_struct(self): + """Test schema inconsistency with nested struct types.""" + # Serialize with original schema + fory1 = Fory(language=Language.XLANG, compatbile=True) + fory1.register_type(NestedStructClass) + fory1.register_type(SimpleNestedDataClass) + + obj = NestedStructClass( + name="test", + nested=SimpleNestedDataClass(value=42, name="nested_test") + ) + buffer = fory1.serialize(obj) + + # Deserialize with inconsistent schema (different nested type) + fory2 = Fory(language=Language.XLANG, compatbile=True) + fory2.register_type(NestedStructClassInconsistent) + fory2.register_type(ExtendedDataClass) + + # This should handle the schema inconsistency gracefully + deserialized = fory2.deserialize(buffer) + assert isinstance(deserialized, NestedStructClassInconsistent) + assert deserialized.name == obj.name + # The nested field type has changed, so we expect different behavior + assert hasattr(deserialized, "nested") + + def test_schema_inconsistent_list_fields(self): + """Test schema inconsistency with List field types.""" + # Serialize with original schema + fory1 = Fory(language=Language.XLANG, compatbile=True) + fory1.register_type(ListFieldsClass) + + obj = ListFieldsClass( + name="test", + int_list=[1, 2, 3], + str_list=["a", "b", "c"] + ) + buffer = fory1.serialize(obj) + + # Deserialize with inconsistent schema (swapped List types) + fory2 = Fory(language=Language.XLANG, compatbile=True) + fory2.register_type(ListFieldsClassInconsistent) + + # This should handle the schema inconsistency gracefully + deserialized = fory2.deserialize(buffer) + assert isinstance(deserialized, ListFieldsClassInconsistent) + assert deserialized.name == obj.name + # The field types have been swapped, so we expect different behavior + assert hasattr(deserialized, "int_list") + assert hasattr(deserialized, "str_list") + + def test_schema_inconsistent_dict_fields(self): + """Test schema inconsistency with Dict field types.""" + # Serialize with original schema + fory1 = Fory(language=Language.XLANG, compatbile=True) + fory1.register_type(DictFieldsClass) + + obj = DictFieldsClass( + name="test", + int_dict={"key1": 1, "key2": 2}, + str_dict={"key1": "value1", "key2": "value2"} + ) + buffer = fory1.serialize(obj) + + # Deserialize with inconsistent schema (swapped Dict value types) + fory2 = Fory(language=Language.XLANG, compatbile=True) + fory2.register_type(DictFieldsClassInconsistent) + + # This should handle the schema inconsistency gracefully + deserialized = fory2.deserialize(buffer) + assert isinstance(deserialized, DictFieldsClassInconsistent) + assert deserialized.name == obj.name + # The field value types have been swapped, so we expect different behavior + assert hasattr(deserialized, "int_dict") + assert hasattr(deserialized, "str_dict") diff --git a/python/pyfory/tests/test_typedef_encoding.py b/python/pyfory/tests/test_typedef_encoding.py index 8b98b7bd85..9ecca4c42d 100644 --- a/python/pyfory/tests/test_typedef_encoding.py +++ b/python/pyfory/tests/test_typedef_encoding.py @@ -75,7 +75,7 @@ def test_typedef_creation(): FieldInfo("age", FieldType(TypeId.INT32, True, True, False), "TestTypeDef"), ] - typedef = TypeDef("TestTypeDef", TypeId.STRUCT, fields, b"encoded_data", False) + typedef = TypeDef("", "TestTypeDef", TypeId.STRUCT, fields, b"encoded_data", False) assert typedef.namespace == "" assert typedef.typename == "TestTypeDef" diff --git a/python/pyfory/type.py b/python/pyfory/type.py index 02d0c8a23c..4df2806623 100644 --- a/python/pyfory/type.py +++ b/python/pyfory/type.py @@ -369,11 +369,20 @@ def is_map_type(type_): TypeId.UNKNOWN, } +_struct_type_ids = { + TypeId.STRUCT, + TypeId.COMPATIBLE_STRUCT, + TypeId.NAMED_STRUCT, + TypeId.NAMED_COMPATIBLE_STRUCT, +} def is_polymorphic_type(type_id: int) -> bool: return type_id in _polymorphic_type_ids +def is_struct_type(type_id: int) -> bool: + return type_id in _struct_type_ids + def is_subclass(from_type, to_type): try: return issubclass(from_type, to_type) From 658d3e654252f7f18c9e0e4b41234da3bc591e35 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 9 Sep 2025 23:23:32 +0800 Subject: [PATCH 15/20] fix tests --- python/pyfory/_registry.py | 2 ++ python/pyfory/meta/typedef.py | 5 ++++- python/pyfory/meta/typedef_decoder.py | 2 +- python/pyfory/meta/typedef_encoder.py | 3 ++- python/pyfory/tests/test_typedef_encoding.py | 2 +- 5 files changed, 10 insertions(+), 4 deletions(-) diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py index 93be843b0a..51f7791e67 100644 --- a/python/pyfory/_registry.py +++ b/python/pyfory/_registry.py @@ -480,6 +480,8 @@ def get_typeinfo(self, cls, create=True): raise TypeUnregisteredError(f"{cls} not registered") logger.info("Type %s not registered", cls) serializer = self._create_serializer(cls) + if serializer is None: + serializer = DataClassSerializer(self.fory, cls, xlang=not self.fory.is_py) type_id = None if self.language == Language.PYTHON: if isinstance(serializer, EnumSerializer): diff --git a/python/pyfory/meta/typedef.py b/python/pyfory/meta/typedef.py index 0ad2e7df80..34318e4962 100644 --- a/python/pyfory/meta/typedef.py +++ b/python/pyfory/meta/typedef.py @@ -134,7 +134,10 @@ def xread_with_type(cls, buffer: Buffer, resolver, xtype_id: int, is_nullable: b elif xtype_id == TypeId.UNKNOWN: return DynamicFieldType(xtype_id, False, is_nullable, is_tracking_ref) else: - return FieldType(xtype_id, False, is_nullable, is_tracking_ref) + # For primitive types, determine if they are monomorphic based on the type + from pyfory.type import is_polymorphic_type + is_monomorphic = not is_polymorphic_type(xtype_id) + return FieldType(xtype_id, is_monomorphic, is_nullable, is_tracking_ref) def create_serializer(self, resolver): if self.type_id in [TypeId.EXT, TypeId.STRUCT, TypeId.NAMED_STRUCT, TypeId.COMPATIBLE_STRUCT, TypeId.NAMED_COMPATIBLE_STRUCT, TypeId.UNKNOWN]: diff --git a/python/pyfory/meta/typedef_decoder.py b/python/pyfory/meta/typedef_decoder.py index c2ebd6f506..147535d329 100644 --- a/python/pyfory/meta/typedef_decoder.py +++ b/python/pyfory/meta/typedef_decoder.py @@ -196,7 +196,7 @@ def read_field_info(buffer: Buffer, resolver, defined_class: str) -> FieldInfo: field_name_size += 1 encoding = FIELD_NAME_ENCODINGS[field_name_encoding] is_nullable = (header & 0b10) != 0 - is_tracking_ref = header & 0b1 + is_tracking_ref = (header & 0b1) != 0 # Read field type info (without flags since they're in the header) xtype_id = buffer.read_varuint32() diff --git a/python/pyfory/meta/typedef_encoder.py b/python/pyfory/meta/typedef_encoder.py index b0680fca6c..1e9c2907e6 100644 --- a/python/pyfory/meta/typedef_encoder.py +++ b/python/pyfory/meta/typedef_encoder.py @@ -80,7 +80,8 @@ def encode_typedef(type_resolver, cls): namespace, typename = type_resolver.get_registered_name(cls) write_namespace(buffer, namespace) write_typename(buffer, typename) - type_id = TypeId.NAMED_COMPATIBLE_STRUCT + # Use the actual type_id from the resolver, not a generic one + type_id = type_resolver.get_registered_id(cls) else: assert type_resolver.is_registered_by_id(cls), "Class must be registered by name or id" type_id = type_resolver.get_registered_id(cls) diff --git a/python/pyfory/tests/test_typedef_encoding.py b/python/pyfory/tests/test_typedef_encoding.py index 9ecca4c42d..b53fa0986c 100644 --- a/python/pyfory/tests/test_typedef_encoding.py +++ b/python/pyfory/tests/test_typedef_encoding.py @@ -75,7 +75,7 @@ def test_typedef_creation(): FieldInfo("age", FieldType(TypeId.INT32, True, True, False), "TestTypeDef"), ] - typedef = TypeDef("", "TestTypeDef", TypeId.STRUCT, fields, b"encoded_data", False) + typedef = TypeDef("", "TestTypeDef", None, TypeId.STRUCT, fields, b"encoded_data", False) assert typedef.namespace == "" assert typedef.typename == "TestTypeDef" From aa7ef5a89b451c17a3cc4e56e87dcadfd7a7074c Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Tue, 9 Sep 2025 23:52:23 +0800 Subject: [PATCH 16/20] fix test error --- python/pyfory/_registry.py | 10 ++++++---- python/pyfory/_serialization.pyx | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py index 51f7791e67..95a73c9b42 100644 --- a/python/pyfory/_registry.py +++ b/python/pyfory/_registry.py @@ -86,6 +86,7 @@ from pyfory.meta.typedef import TypeDef from pyfory.meta.typedef_decoder import decode_typedef, skip_typedef from pyfory.meta.typedef_encoder import encode_typedef +from pyfory.type import TypeId try: import numpy as np @@ -480,8 +481,6 @@ def get_typeinfo(self, cls, create=True): raise TypeUnregisteredError(f"{cls} not registered") logger.info("Type %s not registered", cls) serializer = self._create_serializer(cls) - if serializer is None: - serializer = DataClassSerializer(self.fory, cls, xlang=not self.fory.is_py) type_id = None if self.language == Language.PYTHON: if isinstance(serializer, EnumSerializer): @@ -530,8 +529,11 @@ def _create_serializer(self, cls): # Use FunctionSerializer for function types (including lambdas) serializer = FunctionSerializer(self.fory, cls) elif dataclasses.is_dataclass(cls): - # lazy create serializer to handle nested struct fields. - serializer = None + if not self.meta_share: + serializer = DataClassSerializer(self.fory, cls, xlang=not self.fory.is_py) + else: + # lazy create serializer to handle nested struct fields. + serializer = None elif issubclass(cls, enum.Enum): serializer = EnumSerializer(self.fory, cls) elif (hasattr(cls, "__reduce__") and cls.__reduce__ is not object.__reduce__) or ( diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx index 65c674c6ed..596ea571ad 100644 --- a/python/pyfory/_serialization.pyx +++ b/python/pyfory/_serialization.pyx @@ -525,7 +525,7 @@ cdef class TypeResolver: if type_info.serializer is not None: return type_info else: - type_info.serializer = self._resolver._create_serializer(cls) + type_info.serializer = self._resolver.get_typeinfo(cls).serializer return type_info elif not create: return None From 67e964ddbeb07460ff51896317e9ea3e25c50ae3 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 10 Sep 2025 00:05:31 +0800 Subject: [PATCH 17/20] skip cython format check --- ci/format.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/format.sh b/ci/format.sh index b80b26a9d8..fed6ab9df7 100755 --- a/ci/format.sh +++ b/ci/format.sh @@ -125,11 +125,11 @@ format_files() { format_all_scripts() { echo "$(date)" "Ruff format...." - git ls-files -- '*.py' '*.pyx' '*.pxd' '*.pxi' "${GIT_LS_EXCLUDES[@]}" | xargs -P 10 \ + git ls-files -- '*.py' "${GIT_LS_EXCLUDES[@]}" | xargs -P 10 \ ruff format echo "$(date)" "Ruff check...." - git ls-files -- '*.py' '*.pyx' '*.pxd' '*.pxi' "${GIT_LS_EXCLUDES[@]}" | xargs \ + git ls-files -- '*.py' "${GIT_LS_EXCLUDES[@]}" | xargs \ ruff check --fix } @@ -193,10 +193,10 @@ format_changed() { # exist on both branches. MERGEBASE="$(git merge-base origin/main HEAD)" - if ! git diff --diff-filter=ACRM --quiet --exit-code "$MERGEBASE" -- '*.py' '*.pyx' '*.pxd' '*.pxi' &>/dev/null; then - git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.py' '*.pyx' '*.pxd' '*.pxi' | xargs -P 5 \ + if ! git diff --diff-filter=ACRM --quiet --exit-code "$MERGEBASE" -- '*.py' &>/dev/null; then + git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.py' | xargs -P 5 \ ruff format - git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.py' '*.pyx' '*.pxd' '*.pxi' | xargs -P 5 \ + git diff --name-only --diff-filter=ACRM "$MERGEBASE" -- '*.py' | xargs -P 5 \ ruff check --fix fi From e186f6c3201b3a911605b2d958279e04478a4f8a Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 10 Sep 2025 00:27:10 +0800 Subject: [PATCH 18/20] lint code --- python/pyfory/_registry.py | 4 ++-- python/pyfory/meta/typedef.py | 5 +++-- python/pyfory/tests/test_meta_share.py | 17 +++-------------- python/pyfory/type.py | 2 ++ 4 files changed, 10 insertions(+), 18 deletions(-) diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py index 95a73c9b42..dc53bbdf57 100644 --- a/python/pyfory/_registry.py +++ b/python/pyfory/_registry.py @@ -505,7 +505,7 @@ def get_typeinfo(self, cls, create=True): ) def _set_typeinfo(self, typeinfo): - type_id = typeinfo.type_id & 0xff + type_id = typeinfo.type_id & 0xFF if is_struct_type(type_id): if self.meta_share: type_def = encode_typedef(self, typeinfo.cls) @@ -515,7 +515,7 @@ def _set_typeinfo(self, typeinfo): typeinfo.serializer = DataClassSerializer(self.fory, typeinfo.cls, xlang=not self.fory.is_py) else: typeinfo.serializer = self._create_serializer(typeinfo.cls) - + return typeinfo def _create_serializer(self, cls): diff --git a/python/pyfory/meta/typedef.py b/python/pyfory/meta/typedef.py index 34318e4962..6ea3cc2035 100644 --- a/python/pyfory/meta/typedef.py +++ b/python/pyfory/meta/typedef.py @@ -136,6 +136,7 @@ def xread_with_type(cls, buffer: Buffer, resolver, xtype_id: int, is_nullable: b else: # For primitive types, determine if they are monomorphic based on the type from pyfory.type import is_polymorphic_type + is_monomorphic = not is_polymorphic_type(xtype_id) return FieldType(xtype_id, is_monomorphic, is_nullable, is_tracking_ref) @@ -240,7 +241,7 @@ def build_field_infos(type_resolver, cls): def build_field_type(type_resolver, field_name: str, type_hint, visitor): """Build field type from type hint.""" type_ids = infer_field(field_name, type_hint, visitor) - print(f"=??????????=> {field_name, type_hint, visitor,type_ids}") + print(f"=??????????=> {field_name, type_hint, visitor, type_ids}") return build_field_type_from_type_ids(type_resolver, field_name, type_ids, visitor) @@ -248,7 +249,7 @@ def build_field_type_from_type_ids(type_resolver, field_name: str, type_ids, vis tracking_ref = type_resolver.fory.ref_tracking type_id = type_ids[0] if type_id is not None and type_id >= 0: - type_id = type_id & 0xff + type_id = type_id & 0xFF morphic = not is_polymorphic_type(type_id) if type_id in [TypeId.SET, TypeId.LIST]: elem_type = build_field_type_from_type_ids(type_resolver, field_name, type_ids[1], visitor) diff --git a/python/pyfory/tests/test_meta_share.py b/python/pyfory/tests/test_meta_share.py index f212f3e0da..350f2c9115 100644 --- a/python/pyfory/tests/test_meta_share.py +++ b/python/pyfory/tests/test_meta_share.py @@ -228,10 +228,7 @@ def test_schema_inconsistent_nested_struct(self): fory1.register_type(NestedStructClass) fory1.register_type(SimpleNestedDataClass) - obj = NestedStructClass( - name="test", - nested=SimpleNestedDataClass(value=42, name="nested_test") - ) + obj = NestedStructClass(name="test", nested=SimpleNestedDataClass(value=42, name="nested_test")) buffer = fory1.serialize(obj) # Deserialize with inconsistent schema (different nested type) @@ -252,11 +249,7 @@ def test_schema_inconsistent_list_fields(self): fory1 = Fory(language=Language.XLANG, compatbile=True) fory1.register_type(ListFieldsClass) - obj = ListFieldsClass( - name="test", - int_list=[1, 2, 3], - str_list=["a", "b", "c"] - ) + obj = ListFieldsClass(name="test", int_list=[1, 2, 3], str_list=["a", "b", "c"]) buffer = fory1.serialize(obj) # Deserialize with inconsistent schema (swapped List types) @@ -277,11 +270,7 @@ def test_schema_inconsistent_dict_fields(self): fory1 = Fory(language=Language.XLANG, compatbile=True) fory1.register_type(DictFieldsClass) - obj = DictFieldsClass( - name="test", - int_dict={"key1": 1, "key2": 2}, - str_dict={"key1": "value1", "key2": "value2"} - ) + obj = DictFieldsClass(name="test", int_dict={"key1": 1, "key2": 2}, str_dict={"key1": "value1", "key2": "value2"}) buffer = fory1.serialize(obj) # Deserialize with inconsistent schema (swapped Dict value types) diff --git a/python/pyfory/type.py b/python/pyfory/type.py index 4df2806623..1ff93e509f 100644 --- a/python/pyfory/type.py +++ b/python/pyfory/type.py @@ -376,6 +376,7 @@ def is_map_type(type_): TypeId.NAMED_COMPATIBLE_STRUCT, } + def is_polymorphic_type(type_id: int) -> bool: return type_id in _polymorphic_type_ids @@ -383,6 +384,7 @@ def is_polymorphic_type(type_id: int) -> bool: def is_struct_type(type_id: int) -> bool: return type_id in _struct_type_ids + def is_subclass(from_type, to_type): try: return issubclass(from_type, to_type) From ea761f400eaa49138a4660b1580fbb4982c96b94 Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 10 Sep 2025 00:33:22 +0800 Subject: [PATCH 19/20] fix lint error --- python/pyfory/_registry.py | 1 - python/pyfory/meta/typedef.py | 2 +- python/pyfory/meta/typedef_decoder.py | 3 --- python/pyfory/meta/typedef_encoder.py | 1 - python/pyfory/tests/test_meta_share.py | 2 -- 5 files changed, 1 insertion(+), 8 deletions(-) diff --git a/python/pyfory/_registry.py b/python/pyfory/_registry.py index dc53bbdf57..8b421f036e 100644 --- a/python/pyfory/_registry.py +++ b/python/pyfory/_registry.py @@ -86,7 +86,6 @@ from pyfory.meta.typedef import TypeDef from pyfory.meta.typedef_decoder import decode_typedef, skip_typedef from pyfory.meta.typedef_encoder import encode_typedef -from pyfory.type import TypeId try: import numpy as np diff --git a/python/pyfory/meta/typedef.py b/python/pyfory/meta/typedef.py index 6ea3cc2035..1f054c1564 100644 --- a/python/pyfory/meta/typedef.py +++ b/python/pyfory/meta/typedef.py @@ -19,7 +19,7 @@ import typing from pyfory.type import TypeId from pyfory._util import Buffer -from pyfory.type import TypeId, infer_field, is_primitive_type, is_polymorphic_type, is_struct_type +from pyfory.type import infer_field, is_primitive_type, is_polymorphic_type, is_struct_type from pyfory.meta.metastring import Encoding diff --git a/python/pyfory/meta/typedef_decoder.py b/python/pyfory/meta/typedef_decoder.py index 147535d329..2cf5633c7e 100644 --- a/python/pyfory/meta/typedef_decoder.py +++ b/python/pyfory/meta/typedef_decoder.py @@ -25,15 +25,12 @@ from pyfory._util import Buffer from pyfory.meta.typedef import TypeDef, FieldInfo, FieldType from pyfory.meta.typedef import ( - FieldInfo, - TypeDef, SMALL_NUM_FIELDS_THRESHOLD, REGISTER_BY_NAME_FLAG, FIELD_NAME_SIZE_THRESHOLD, COMPRESS_META_FLAG, HAS_FIELDS_META_FLAG, META_SIZE_MASKS, - NUM_HASH_BITS, FIELD_NAME_ENCODINGS, ) from pyfory.type import TypeId, record_class_factory diff --git a/python/pyfory/meta/typedef_encoder.py b/python/pyfory/meta/typedef_encoder.py index 1e9c2907e6..7d8b5fdb3e 100644 --- a/python/pyfory/meta/typedef_encoder.py +++ b/python/pyfory/meta/typedef_encoder.py @@ -33,7 +33,6 @@ from pyfory.meta.metastring import MetaStringEncoder from pyfory._util import Buffer -from pyfory.type import TypeId from pyfory.lib.mmh3 import hash_buffer diff --git a/python/pyfory/tests/test_meta_share.py b/python/pyfory/tests/test_meta_share.py index 350f2c9115..63febfc6df 100644 --- a/python/pyfory/tests/test_meta_share.py +++ b/python/pyfory/tests/test_meta_share.py @@ -18,8 +18,6 @@ import dataclasses from typing import List, Dict from pyfory import Fory, Language -from pyfory.buffer import Buffer -from pyfory.type import TypeId import pyfory From 50f5b39f703b6b65194862427440020b6b8c2d0c Mon Sep 17 00:00:00 2001 From: chaokunyang Date: Wed, 10 Sep 2025 00:37:10 +0800 Subject: [PATCH 20/20] fix compatible name --- python/pyfory/_fory.py | 14 +++++------ python/pyfory/_serialization.pyx | 14 +++++------ python/pyfory/serializer.py | 4 +-- python/pyfory/tests/test_meta_share.py | 34 +++++++++++++------------- 4 files changed, 33 insertions(+), 33 deletions(-) diff --git a/python/pyfory/_fory.py b/python/pyfory/_fory.py index d56a20f0fd..b05b37ffcf 100644 --- a/python/pyfory/_fory.py +++ b/python/pyfory/_fory.py @@ -98,7 +98,7 @@ class Fory: __slots__ = ( "language", "is_py", - "compatbile", + "compatible", "ref_tracking", "ref_resolver", "type_resolver", @@ -120,7 +120,7 @@ def __init__( language=Language.PYTHON, ref_tracking: bool = False, require_type_registration: bool = True, - compatbile: bool = False, + compatible: bool = False, ): """ :param require_type_registration: @@ -131,14 +131,14 @@ def __init__( Do not disable type registration if you can't ensure your environment are *indeed secure*. We are not responsible for security risks if you disable this option. - :param compatbile: - Whether to enable compatbile mode for cross-language serialization. + :param compatible: + Whether to enable compatible mode for cross-language serialization. When enabled, type forward/backward compatibility for struct fields will be enabled. """ self.language = language self.is_py = language == Language.PYTHON self.require_type_registration = _ENABLE_TYPE_REGISTRATION_FORCIBLY or require_type_registration - self.compatbile = compatbile + self.compatible = compatible self.ref_tracking = ref_tracking if self.ref_tracking: self.ref_resolver = MapRefResolver() @@ -148,11 +148,11 @@ def __init__( from pyfory._registry import TypeResolver self.metastring_resolver = MetaStringResolver() - self.type_resolver = TypeResolver(self, meta_share=compatbile) + self.type_resolver = TypeResolver(self, meta_share=compatible) self.type_resolver.initialize() from pyfory._serialization import SerializationContext - self.serialization_context = SerializationContext(scoped_meta_share_enabled=compatbile) + self.serialization_context = SerializationContext(scoped_meta_share_enabled=compatible) self.buffer = Buffer.allocate(32) if not require_type_registration: warnings.warn( diff --git a/python/pyfory/_serialization.pyx b/python/pyfory/_serialization.pyx index 596ea571ad..458eb640f6 100644 --- a/python/pyfory/_serialization.pyx +++ b/python/pyfory/_serialization.pyx @@ -757,7 +757,7 @@ cdef class Fory: cdef readonly c_bool ref_tracking cdef readonly c_bool require_type_registration cdef readonly c_bool is_py - cdef readonly c_bool compatbile + cdef readonly c_bool compatible cdef readonly MapRefResolver ref_resolver cdef readonly TypeResolver type_resolver cdef readonly MetaStringResolver metastring_resolver @@ -776,7 +776,7 @@ cdef class Fory: language=Language.PYTHON, ref_tracking: bool = False, require_type_registration: bool = True, - compatbile: bool = False, + compatible: bool = False, ): """ :param require_type_registration: @@ -787,8 +787,8 @@ cdef class Fory: Do not disable type registration if you can't ensure your environment are *indeed secure*. We are not responsible for security risks if you disable this option. - :param compatbile: - Whether to enable compatbile mode for cross-language serialization. + :param compatible: + Whether to enable compatible mode for cross-language serialization. When enabled, type forward/backward compatibility for struct fields will be enabled. """ self.language = language @@ -796,13 +796,13 @@ cdef class Fory: self.require_type_registration = True else: self.require_type_registration = False - self.compatbile = compatbile + self.compatible = compatible self.ref_tracking = ref_tracking self.ref_resolver = MapRefResolver(ref_tracking) self.is_py = self.language == Language.PYTHON self.metastring_resolver = MetaStringResolver() - self.serialization_context = SerializationContext(scoped_meta_share_enabled=compatbile) - self.type_resolver = TypeResolver(self, meta_share=compatbile) + self.serialization_context = SerializationContext(scoped_meta_share_enabled=compatible) + self.type_resolver = TypeResolver(self, meta_share=compatible) self.type_resolver.initialize() self.buffer = Buffer.allocate(32) if not require_type_registration: diff --git a/python/pyfory/serializer.py b/python/pyfory/serializer.py index 52ddd92820..c19bfc777b 100644 --- a/python/pyfory/serializer.py +++ b/python/pyfory/serializer.py @@ -448,7 +448,7 @@ def _gen_xwrite_method(self): stmts = [ f'"""xwrite method for {self.type_}"""', ] - if not self.fory.compatbile: + if not self.fory.compatible: # Compute hash at generation time since we're in xlang mode if self._hash == 0: self._hash = _get_hash(self.fory, self._field_names, self._type_hints) @@ -494,7 +494,7 @@ def _gen_xread_method(self): stmts = [ f'"""xread method for {self.type_}"""', ] - if not self.fory.compatbile: + if not self.fory.compatible: # Compute hash at generation time since we're in xlang mode if self._hash == 0: self._hash = _get_hash(self.fory, self._field_names, self._type_hints) diff --git a/python/pyfory/tests/test_meta_share.py b/python/pyfory/tests/test_meta_share.py index 63febfc6df..d405b24dc0 100644 --- a/python/pyfory/tests/test_meta_share.py +++ b/python/pyfory/tests/test_meta_share.py @@ -96,19 +96,19 @@ def setup_method(self): def test_meta_share_enabled(self): """Test that meta share mode can be enabled.""" - fory = Fory(language=Language.XLANG, compatbile=True) + fory = Fory(language=Language.XLANG, compatible=True) assert fory.serialization_context.scoped_meta_share_enabled assert fory.serialization_context.meta_context is not None def test_meta_share_disabled(self): """Test that meta share mode can be disabled.""" - fory = Fory(language=Language.XLANG, compatbile=False) + fory = Fory(language=Language.XLANG, compatible=False) assert not fory.serialization_context.scoped_meta_share_enabled assert fory.serialization_context.meta_context is None def test_simple_dataclass_serialization(self): """Test serialization of simple dataclass with meta share.""" - fory = Fory(language=Language.XLANG, compatbile=True) + fory = Fory(language=Language.XLANG, compatible=True) # Register the dataclass fory.register_type(SimpleDataClass) @@ -124,7 +124,7 @@ def test_simple_dataclass_serialization(self): def test_multiple_objects_same_type(self): """Test that multiple objects of same type reuse type definition.""" - fory = Fory(language=Language.XLANG, compatbile=True) + fory = Fory(language=Language.XLANG, compatible=True) # Register the dataclass fory.register_type(SimpleDataClass) @@ -137,7 +137,7 @@ def test_multiple_objects_same_type(self): buffer2 = fory.serialize(obj2) # Create a new fory instance with the same meta context for deserialization - fory2 = Fory(language=Language.XLANG, compatbile=True) + fory2 = Fory(language=Language.XLANG, compatible=True) fory2.register_type(SimpleDataClass) # Copy the meta context from the first fory instance fory2.serialization_context.meta_context = fory.serialization_context.meta_context @@ -153,7 +153,7 @@ def test_multiple_objects_same_type(self): def test_simple_nested_dataclass_serialization(self): """Test serialization of simple nested dataclass with meta share.""" - fory = Fory(language=Language.XLANG, compatbile=True) + fory = Fory(language=Language.XLANG, compatible=True) # Register the dataclass fory.register_type(SimpleNestedDataClass) @@ -168,7 +168,7 @@ def test_simple_nested_dataclass_serialization(self): def test_serialization_without_meta_share(self): """Test that serialization works without meta share mode.""" - fory = Fory(language=Language.XLANG, compatbile=False) + fory = Fory(language=Language.XLANG, compatible=False) # Register the dataclass fory.register_type(SimpleDataClass) @@ -183,14 +183,14 @@ def test_serialization_without_meta_share(self): def test_schema_evolution_more_fields(self): # Serialize with original schema - fory1 = Fory(language=Language.XLANG, compatbile=True) + fory1 = Fory(language=Language.XLANG, compatible=True) fory1.register_type(SimpleDataClass) obj = SimpleDataClass(name="test", age=25, active=True) buffer = fory1.serialize(obj) # Deserialize with extended schema (more fields) - fory2 = Fory(language=Language.XLANG, compatbile=True) + fory2 = Fory(language=Language.XLANG, compatible=True) fory2.register_type(ExtendedDataClass) deserialized = fory2.deserialize(buffer) @@ -203,13 +203,13 @@ def test_schema_evolution_more_fields(self): def test_schema_evolution_fewer_fields(self): # Serialize with original schema - fory1 = Fory(language=Language.XLANG, compatbile=True) + fory1 = Fory(language=Language.XLANG, compatible=True) fory1.register_type(SimpleDataClass) obj = SimpleDataClass(name="test", age=25, active=True) buffer = fory1.serialize(obj) # Deserialize with reduced schema (fewer fields) - fory2 = Fory(language=Language.XLANG, compatbile=True) + fory2 = Fory(language=Language.XLANG, compatible=True) fory2.register_type(ReducedDataClass) deserialized = fory2.deserialize(buffer) @@ -222,7 +222,7 @@ def test_schema_evolution_fewer_fields(self): def test_schema_inconsistent_nested_struct(self): """Test schema inconsistency with nested struct types.""" # Serialize with original schema - fory1 = Fory(language=Language.XLANG, compatbile=True) + fory1 = Fory(language=Language.XLANG, compatible=True) fory1.register_type(NestedStructClass) fory1.register_type(SimpleNestedDataClass) @@ -230,7 +230,7 @@ def test_schema_inconsistent_nested_struct(self): buffer = fory1.serialize(obj) # Deserialize with inconsistent schema (different nested type) - fory2 = Fory(language=Language.XLANG, compatbile=True) + fory2 = Fory(language=Language.XLANG, compatible=True) fory2.register_type(NestedStructClassInconsistent) fory2.register_type(ExtendedDataClass) @@ -244,14 +244,14 @@ def test_schema_inconsistent_nested_struct(self): def test_schema_inconsistent_list_fields(self): """Test schema inconsistency with List field types.""" # Serialize with original schema - fory1 = Fory(language=Language.XLANG, compatbile=True) + fory1 = Fory(language=Language.XLANG, compatible=True) fory1.register_type(ListFieldsClass) obj = ListFieldsClass(name="test", int_list=[1, 2, 3], str_list=["a", "b", "c"]) buffer = fory1.serialize(obj) # Deserialize with inconsistent schema (swapped List types) - fory2 = Fory(language=Language.XLANG, compatbile=True) + fory2 = Fory(language=Language.XLANG, compatible=True) fory2.register_type(ListFieldsClassInconsistent) # This should handle the schema inconsistency gracefully @@ -265,14 +265,14 @@ def test_schema_inconsistent_list_fields(self): def test_schema_inconsistent_dict_fields(self): """Test schema inconsistency with Dict field types.""" # Serialize with original schema - fory1 = Fory(language=Language.XLANG, compatbile=True) + fory1 = Fory(language=Language.XLANG, compatible=True) fory1.register_type(DictFieldsClass) obj = DictFieldsClass(name="test", int_dict={"key1": 1, "key2": 2}, str_dict={"key1": "value1", "key2": "value2"}) buffer = fory1.serialize(obj) # Deserialize with inconsistent schema (swapped Dict value types) - fory2 = Fory(language=Language.XLANG, compatbile=True) + fory2 = Fory(language=Language.XLANG, compatible=True) fory2.register_type(DictFieldsClassInconsistent) # This should handle the schema inconsistency gracefully