Skip to content

Commit 16f20bf

Browse files
author
gabriel
committed
Add serialisation
1 parent 98975b5 commit 16f20bf

2 files changed

Lines changed: 57 additions & 4 deletions

File tree

dataframely/collection/collection.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
from dataframely._storage.constants import COLLECTION_METADATA_KEY
3333
from dataframely._storage.delta import DeltaStorageBackend
3434
from dataframely._storage.parquet import ParquetStorageBackend
35-
from dataframely._typing import LazyFrame, Validation
35+
from dataframely._typing import DataFrame, LazyFrame, Validation
3636
from dataframely.exc import (
3737
DeserializationError,
3838
ValidationError,
@@ -841,6 +841,7 @@ def serialize(cls) -> str:
841841
name: {
842842
"schema": info.schema._as_dict(),
843843
"is_optional": info.is_optional,
844+
"is_lazy": info.is_lazy,
844845
"ignored_in_filters": info.ignored_in_filters,
845846
"inline_for_sampling": info.inline_for_sampling,
846847
}
@@ -1329,11 +1330,14 @@ def deserialize_collection(data: str, strict: bool = True) -> type[Collection] |
13291330

13301331
annotations: dict[str, Any] = {}
13311332
for name, info in decoded["members"].items():
1332-
lf_type = LazyFrame[_schema_from_dict(info["schema"])] # type: ignore
1333+
schema = _schema_from_dict(info["schema"])
1334+
# Default to lazy for backwards compatibility with old serialized data
1335+
is_lazy = info.get("is_lazy", True)
1336+
frame_type = LazyFrame[schema] if is_lazy else DataFrame[schema] # type: ignore
13331337
if info["is_optional"]:
1334-
lf_type = lf_type | None # type: ignore
1338+
frame_type = frame_type | None # type: ignore
13351339
annotations[name] = Annotated[
1336-
lf_type,
1340+
frame_type,
13371341
CollectionMember(
13381342
ignored_in_filters=info["ignored_in_filters"],
13391343
inline_for_sampling=info["inline_for_sampling"],

tests/collection/test_serialization.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,55 @@ def test_roundtrip_matches(collection: type[dy.Collection]) -> None:
8484
assert collection.matches(decoded)
8585

8686

87+
# --------------------------------- DATAFRAME MEMBERS -------------------------------- #
88+
89+
90+
class EagerSchema(dy.Schema):
91+
id = dy.Int64(primary_key=True)
92+
93+
94+
class MixedEagerCollection(dy.Collection):
95+
"""Collection with mixed DataFrame and LazyFrame members."""
96+
97+
eager: dy.DataFrame[EagerSchema]
98+
lazy: dy.LazyFrame[EagerSchema]
99+
100+
101+
def test_serialize_includes_is_lazy() -> None:
102+
"""Serialization includes the is_lazy field for each member."""
103+
serialized = MixedEagerCollection.serialize()
104+
decoded = json.loads(serialized)
105+
106+
assert decoded["members"]["eager"]["is_lazy"] is False
107+
assert decoded["members"]["lazy"]["is_lazy"] is True
108+
109+
110+
def test_roundtrip_dataframe_members() -> None:
111+
"""DataFrame members round-trip correctly through serialization."""
112+
serialized = MixedEagerCollection.serialize()
113+
decoded = dy.deserialize_collection(serialized)
114+
115+
assert MixedEagerCollection.matches(decoded)
116+
assert not decoded.members()["eager"].is_lazy
117+
assert decoded.members()["lazy"].is_lazy
118+
119+
120+
def test_deserialize_without_is_lazy_defaults_to_lazy() -> None:
121+
"""Old serialized data without is_lazy defaults to lazy for backwards compat."""
122+
collection = create_collection(
123+
"test", {"s1": create_schema("schema1", {"a": dy.Int64()})}
124+
)
125+
serialized = collection.serialize()
126+
127+
# Remove is_lazy from serialized data to simulate old format
128+
decoded_dict = json.loads(serialized)
129+
del decoded_dict["members"]["s1"]["is_lazy"]
130+
modified = json.dumps(decoded_dict)
131+
132+
result = dy.deserialize_collection(modified)
133+
assert result.members()["s1"].is_lazy is True
134+
135+
87136
# ----------------------------- DESERIALIZATION FAILURES ----------------------------- #
88137

89138

0 commit comments

Comments
 (0)