66import pytest
77
88import dataframely as dy
9+ from dataframely .exc import ImplementationError
910from dataframely .random import Generator
11+ from dataframely .testing .factory import create_collection_raw
1012
1113
1214class MyFirstSchema (dy .Schema ):
@@ -37,6 +39,21 @@ def _preprocess_sample(
3739 return sample
3840
3941
42+ class MyInlinedCollection (dy .Collection ):
43+ first : Annotated [
44+ dy .LazyFrame [MyFirstSchema ],
45+ dy .CollectionMember (inline_for_sampling = True ),
46+ ]
47+ second : dy .LazyFrame [MySecondSchema ]
48+
49+ @classmethod
50+ def _preprocess_sample (
51+ cls , sample : dict [str , Any ], index : int , generator : Generator
52+ ) -> dict [str , Any ]:
53+ sample ["a" ] = index
54+ return sample
55+
56+
4057class SmallCollection (dy .Collection ):
4158 first : dy .LazyFrame [MyFirstSchema ]
4259
@@ -100,6 +117,22 @@ def test_sample_with_overrides() -> None:
100117 assert collection .second .collect ()["c" ].to_list () == [3 , 4 , 6 ]
101118
102119
120+ def test_sample_inline_with_overrides () -> None :
121+ collection = MyInlinedCollection .sample (
122+ overrides = [
123+ {"b" : 4 , "second" : [{"c" : 3 }, {"c" : 4 }]},
124+ {"b" : 8 , "second" : [{"c" : 6 }]},
125+ ]
126+ )
127+ assert collection .first .collect ()["a" ].to_list () == [0 , 1 ]
128+ assert collection .first .collect ()["b" ].to_list () == [4 , 8 ]
129+
130+ assert collection .second is not None
131+ assert collection .second .collect ()["a" ].to_list () == [0 , 0 , 1 ]
132+ assert collection .second .collect ()["b" ].to_list () != [4 , 4 , 8 ]
133+ assert collection .second .collect ()["c" ].to_list () == [3 , 4 , 6 ]
134+
135+
103136@pytest .mark .parametrize ("n" , [0 , 1000 ])
104137def test_sample_without_dependent_members (n : int ) -> None :
105138 collection = SmallCollection .sample (n )
@@ -125,3 +158,34 @@ def test_sample_no_common_primary_key() -> None:
125158def test_sample_no_overwrite () -> None :
126159 with pytest .raises (ValueError , match = r"`_preprocess_sample` must be overwritten" ):
127160 IncompleteCollection .sample ()
161+
162+
163+ def test_invalid_inline_for_sampling () -> None :
164+ with pytest .raises (ImplementationError , match = r"its primary key is a superset" ):
165+ create_collection_raw (
166+ "test" ,
167+ {
168+ "first" : dy .LazyFrame [MyFirstSchema ],
169+ "second" : Annotated [
170+ dy .LazyFrame [MySecondSchema ],
171+ dy .CollectionMember (inline_for_sampling = True ),
172+ ],
173+ },
174+ )
175+
176+
177+ def test_duplicate_column_inlined_for_sampling () -> None :
178+ with pytest .raises (ImplementationError , match = r"clashes with a column name" ):
179+ create_collection_raw (
180+ "test" ,
181+ {
182+ "first" : Annotated [
183+ dy .LazyFrame [MyFirstSchema ],
184+ dy .CollectionMember (inline_for_sampling = True ),
185+ ],
186+ "second" : Annotated [
187+ dy .LazyFrame [MyFirstSchema ],
188+ dy .CollectionMember (inline_for_sampling = True ),
189+ ],
190+ },
191+ )
0 commit comments