66import pytest
77
88import dataframely as dy
9+ from dataframely .exc import ImplementationError
910from dataframely .random import Generator
11+ from dataframely .testing import create_collection
12+ from dataframely .testing .factory import create_collection_raw
1013
1114
1215class MyFirstSchema (dy .Schema ):
@@ -37,6 +40,21 @@ def _preprocess_sample(
3740 return sample
3841
3942
43+ class MyInlinedCollection (dy .Collection ):
44+ first : Annotated [
45+ dy .LazyFrame [MyFirstSchema ],
46+ dy .CollectionMember (inline_for_sampling = True ),
47+ ]
48+ second : dy .LazyFrame [MySecondSchema ]
49+
50+ @classmethod
51+ def _preprocess_sample (
52+ cls , sample : dict [str , Any ], index : int , generator : Generator
53+ ) -> dict [str , Any ]:
54+ sample ["a" ] = index
55+ return sample
56+
57+
4058class SmallCollection (dy .Collection ):
4159 first : dy .LazyFrame [MyFirstSchema ]
4260
@@ -100,6 +118,21 @@ def test_sample_with_overrides():
100118 assert collection .second .collect ()["c" ].to_list () == [3 , 4 , 6 ]
101119
102120
121+ def test_sample_inline_with_overrides ():
122+ collection = MyInlinedCollection .sample (
123+ overrides = [
124+ {"b" : 4 , "second" : [{"c" : 3 }, {"c" : 4 }]},
125+ {"b" : 8 , "second" : [{"c" : 6 }]},
126+ ]
127+ )
128+ assert collection .first .collect ()["a" ].to_list () == [0 , 1 ]
129+ assert collection .first .collect ()["b" ].to_list () == [4 , 8 ]
130+
131+ assert collection .second is not None
132+ assert collection .second .collect ()["a" ].to_list () == [0 , 0 , 1 ]
133+ assert collection .second .collect ()["c" ].to_list () == [3 , 4 , 6 ]
134+
135+
103136@pytest .mark .parametrize ("n" , [0 , 1000 ])
104137def test_sample_without_dependent_members (n : int ):
105138 collection = SmallCollection .sample (n )
@@ -125,3 +158,34 @@ def test_sample_no_common_primary_key():
125158def test_sample_no_overwrite ():
126159 with pytest .raises (ValueError , match = r"`_preprocess_sample` must be overwritten" ):
127160 IncompleteCollection .sample ()
161+
162+
163+ def test_invalid_inline_for_sampling ():
164+ with pytest .raises (ImplementationError , match = r"its primary key is a superset" ):
165+ create_collection_raw (
166+ "test" ,
167+ {
168+ "first" : dy .LazyFrame [MyFirstSchema ],
169+ "second" : Annotated [
170+ dy .LazyFrame [MySecondSchema ],
171+ dy .CollectionMember (inline_for_sampling = True ),
172+ ],
173+ },
174+ )
175+
176+
177+ def test_duplicate_column_inlined_for_sampling ():
178+ with pytest .raises (ImplementationError , match = r"clashes with a column name" ):
179+ create_collection_raw (
180+ "test" ,
181+ {
182+ "first" : Annotated [
183+ dy .LazyFrame [MyFirstSchema ],
184+ dy .CollectionMember (inline_for_sampling = True ),
185+ ],
186+ "second" : Annotated [
187+ dy .LazyFrame [MyFirstSchema ],
188+ dy .CollectionMember (inline_for_sampling = True ),
189+ ],
190+ },
191+ )
0 commit comments