@@ -10662,3 +10662,121 @@ def _run_restatement_plan(tmp_path: Path, config: Config, q: queue.Queue):
1066210662 assert len (model_a .intervals )
1066310663
1066410664 set_console (orig_console )
10665+
10666+
10667+ def test_seed_model_metadata_update_does_not_trigger_backfill (tmp_path : Path ):
10668+ """
10669+ Scenario:
10670+ - Create a seed model; perform initial population
10671+ - Modify the model with a metadata-only change and trigger a plan
10672+
10673+ Outcome:
10674+ - The seed model is modified (metadata-only) but this should NOT trigger backfill
10675+ - There should be no missing_intervals on the plan to backfill
10676+ """
10677+
10678+ models_path = tmp_path / "models"
10679+ seeds_path = tmp_path / "seeds"
10680+ models_path .mkdir ()
10681+ seeds_path .mkdir ()
10682+
10683+ seed_model_path = models_path / "seed.sql"
10684+ seed_path = seeds_path / "seed_data.csv"
10685+
10686+ seed_path .write_text ("\n " .join (["id,name" , "1,test" ]))
10687+
10688+ seed_model_path .write_text ("""
10689+ MODEL (
10690+ name test.source_data,
10691+ kind SEED (
10692+ path '../seeds/seed_data.csv'
10693+ )
10694+ );
10695+ """ )
10696+
10697+ config = Config (
10698+ gateways = {"" : GatewayConfig (connection = DuckDBConnectionConfig ())},
10699+ model_defaults = ModelDefaultsConfig (dialect = "duckdb" , start = "2024-01-01" ),
10700+ )
10701+ ctx = Context (paths = tmp_path , config = config )
10702+
10703+ plan = ctx .plan (auto_apply = True )
10704+
10705+ original_seed_snapshot = ctx .snapshots ['"memory"."test"."source_data"' ]
10706+ assert plan .directly_modified == {original_seed_snapshot .snapshot_id }
10707+ assert plan .metadata_updated == set ()
10708+ assert plan .missing_intervals
10709+
10710+ # prove data loaded
10711+ assert ctx .engine_adapter .fetchall ("select id, name from memory.test.source_data" ) == [
10712+ (1 , "test" )
10713+ ]
10714+
10715+ # prove no diff
10716+ ctx .load ()
10717+ plan = ctx .plan (auto_apply = True )
10718+ assert not plan .has_changes
10719+ assert not plan .missing_intervals
10720+
10721+ # make metadata-only change
10722+ seed_model_path .write_text ("""
10723+ MODEL (
10724+ name test.source_data,
10725+ kind SEED (
10726+ path '../seeds/seed_data.csv'
10727+ ),
10728+ description 'updated by test'
10729+ );
10730+ """ )
10731+
10732+ ctx .load ()
10733+ plan = ctx .plan (auto_apply = True )
10734+ assert plan .has_changes
10735+
10736+ new_seed_snapshot = ctx .snapshots ['"memory"."test"."source_data"' ]
10737+ assert (
10738+ new_seed_snapshot .version == original_seed_snapshot .version
10739+ ) # should be using the same physical table
10740+ assert (
10741+ new_seed_snapshot .snapshot_id != original_seed_snapshot .snapshot_id
10742+ ) # but still be different due to the metadata change
10743+ assert plan .directly_modified == set ()
10744+ assert plan .metadata_updated == {new_seed_snapshot .snapshot_id }
10745+
10746+ # there should be no missing intervals to backfill since all we did is update a description
10747+ assert not plan .missing_intervals
10748+
10749+ # there should still be no diff or missing intervals in 3 days time
10750+ assert new_seed_snapshot .model .interval_unit .is_day
10751+ with time_machine .travel (timedelta (days = 3 )):
10752+ ctx .clear_caches ()
10753+ ctx .load ()
10754+ plan = ctx .plan (auto_apply = True )
10755+ assert not plan .has_changes
10756+ assert not plan .missing_intervals
10757+
10758+ # change seed data
10759+ seed_path .write_text ("\n " .join (["id,name" , "1,test" , "2,updated" ]))
10760+
10761+ # new plan - NOW we should backfill because data changed
10762+ ctx .load ()
10763+ plan = ctx .plan (auto_apply = True )
10764+ assert plan .has_changes
10765+
10766+ updated_seed_snapshot = ctx .snapshots ['"memory"."test"."source_data"' ]
10767+
10768+ assert (
10769+ updated_seed_snapshot .snapshot_id
10770+ != new_seed_snapshot .snapshot_id
10771+ != original_seed_snapshot .snapshot_id
10772+ )
10773+ assert not updated_seed_snapshot .forward_only
10774+ assert plan .directly_modified == {updated_seed_snapshot .snapshot_id }
10775+ assert plan .metadata_updated == set ()
10776+ assert plan .missing_intervals
10777+
10778+ # prove backfilled data loaded
10779+ assert ctx .engine_adapter .fetchall ("select id, name from memory.test.source_data" ) == [
10780+ (1 , "test" ),
10781+ (2 , "updated" ),
10782+ ]
0 commit comments