@@ -25,22 +25,27 @@ def counting_loader(file_resource, current_file, **kwargs):
2525class SimpleSource (Source ):
2626 provider = "test_provider"
2727
28+ def __init__ (self , name , n_datasets = 1 ):
29+ super ().__init__ (name )
30+ self .n_datasets = n_datasets
31+
2832 def find_datasets (
2933 self , dataset_type , data_spec_versions , dataset_collection_metadata , ** kwargs
3034 ):
31- r = DatasetResource (
32- dataset_resource_id = {"item_id" : 1 },
33- provider = self .provider ,
34- dataset_type = "test" ,
35- name = "item-1" ,
36- )
37- r .add_file (
38- last_modified = FIXED_TIME ,
39- data_feed_key = "f1" ,
40- data_spec_version = "v1" ,
41- file_loader = counting_loader ,
42- )
43- yield r
35+ for i in range (self .n_datasets ):
36+ r = DatasetResource (
37+ dataset_resource_id = {"item_id" : i },
38+ provider = self .provider ,
39+ dataset_type = "test" ,
40+ name = f"item-{ i } " ,
41+ )
42+ r .add_file (
43+ last_modified = FIXED_TIME ,
44+ data_feed_key = "f1" ,
45+ data_spec_version = "v1" ,
46+ file_loader = counting_loader ,
47+ )
48+ yield r
4449
4550
4651def _setup (engine ):
@@ -99,3 +104,47 @@ def test_invalidate_revision_triggers_refetch(engine):
99104 # Second run: should refetch
100105 engine .run ()
101106 assert call_count == 2 , "Dataset with invalidated revision should be refetched"
107+
108+
109+ def test_invalidate_revisions_batch (engine ):
110+ """invalidate_revisions works on multiple datasets at once."""
111+ global call_count
112+ call_count = 0
113+
114+ dsv = DataSpecVersionCollection .from_dict ({"default" : {"v1" }})
115+ engine .add_ingestion_plan (
116+ IngestionPlan (
117+ source = SimpleSource ("s" , n_datasets = 5 ),
118+ fetch_policy = FetchPolicy (),
119+ dataset_type = "test" ,
120+ selectors = [Selector .build ({}, data_spec_versions = dsv )],
121+ data_spec_versions = dsv ,
122+ )
123+ )
124+
125+ # First run: creates 5 datasets
126+ engine .run ()
127+ assert call_count == 5
128+
129+ # Batch invalidate all 5
130+ datasets = list (
131+ engine .store .get_dataset_collection (
132+ provider = "test_provider" , dataset_type = "test"
133+ )
134+ )
135+ assert len (datasets ) == 5
136+ engine .store .invalidate_revisions (datasets , reason = "Batch test" )
137+
138+ # Verify all invalidated
139+ datasets = list (
140+ engine .store .get_dataset_collection (
141+ provider = "test_provider" , dataset_type = "test"
142+ )
143+ )
144+ for ds in datasets :
145+ assert ds .current_revision .state == RevisionState .VALIDATION_FAILED
146+ assert ds .last_modified_at is None
147+
148+ # Second run: should refetch all 5
149+ engine .run ()
150+ assert call_count == 10 , "All 5 invalidated datasets should be refetched"
0 commit comments