@@ -112,6 +112,130 @@ def test_allow_insert(clean_autopopulate, subject, experiment):
112112 experiment .insert1 (key )
113113
114114
115+ def test_populate_antijoin_with_secondary_attrs (clean_autopopulate , subject , experiment ):
116+ """Test that populate correctly computes pending keys via antijoin.
117+
118+ Verifies that partial populate + antijoin gives correct pending counts.
119+ Note: Experiment.make() inserts fake_experiments_per_subject rows per key.
120+ """
121+ assert subject , "root tables are empty"
122+ assert not experiment , "table already filled?"
123+
124+ total_keys = len (experiment .key_source )
125+ assert total_keys > 0
126+
127+ # Partially populate (2 keys from key_source)
128+ experiment .populate (max_calls = 2 )
129+ assert len (experiment ) == 2 * experiment .fake_experiments_per_subject
130+
131+ # key_source - target must return only unpopulated keys
132+ pending = experiment .key_source - experiment
133+ assert len (pending ) == total_keys - 2 , f"Antijoin returned { len (pending )} pending keys, expected { total_keys - 2 } ."
134+
135+ # Verify progress() reports correct counts
136+ remaining , total = experiment .progress ()
137+ assert total == total_keys
138+ assert remaining == total_keys - 2
139+
140+ # Populate the rest and verify antijoin returns 0
141+ experiment .populate ()
142+ pending_after = experiment .key_source - experiment
143+ assert len (pending_after ) == 0 , f"Antijoin returned { len (pending_after )} pending keys after full populate, expected 0."
144+
145+
146+ def test_populate_antijoin_overlapping_attrs (prefix , connection_test ):
147+ """Regression test: antijoin with overlapping secondary attribute names.
148+
149+ This reproduces the bug where `key_source - self` returns ALL keys instead
150+ of just unpopulated ones. The condition is:
151+
152+ 1. key_source returns secondary attributes (e.g., num_samples, quality)
153+ 2. The target table has secondary attributes with the SAME NAMES
154+ 3. The VALUES differ between source and target after populate
155+
156+ Without .proj() on the target, SQL matches on ALL common column names
157+ (including secondary attrs), so different values mean no match, and all
158+ keys appear "pending" even after populate.
159+
160+ Real-world example: LightningPoseOutput (key_source) has num_frames,
161+ quality, processing_datetime as secondary attrs. InitialContainer (target)
162+ also has those same-named columns with different values.
163+ """
164+ test_schema = dj .Schema (f"{ prefix } _antijoin_overlap" , connection = connection_test )
165+
166+ @test_schema
167+ class Sensor (dj .Lookup ):
168+ definition = """
169+ sensor_id : int32
170+ ---
171+ num_samples : int32
172+ quality : decimal(4,2)
173+ """
174+ contents = [
175+ (1 , 100 , 0.95 ),
176+ (2 , 200 , 0.87 ),
177+ (3 , 150 , 0.92 ),
178+ (4 , 175 , 0.89 ),
179+ ]
180+
181+ @test_schema
182+ class ProcessedSensor (dj .Computed ):
183+ definition = """
184+ -> Sensor
185+ ---
186+ num_samples : int32 # same name as Sensor's secondary attr
187+ quality : decimal(4,2) # same name as Sensor's secondary attr
188+ result : decimal(8,2)
189+ """
190+
191+ @property
192+ def key_source (self ):
193+ return Sensor () # returns sensor_id + num_samples + quality
194+
195+ def make (self , key ):
196+ # Fetch source data (key only contains PK after projection)
197+ source = (Sensor () & key ).fetch1 ()
198+ # Values intentionally differ from source — this is what triggers
199+ # the bug: the antijoin tries to match on num_samples and quality
200+ # too, and since values differ, no match is found.
201+ self .insert1 (
202+ dict (
203+ sensor_id = key ["sensor_id" ],
204+ num_samples = source ["num_samples" ] * 2 ,
205+ quality = float (source ["quality" ]) + 0.05 ,
206+ result = float (source ["num_samples" ]) * float (source ["quality" ]),
207+ )
208+ )
209+
210+ try :
211+ # Partially populate (2 out of 4)
212+ ProcessedSensor ().populate (max_calls = 2 )
213+ assert len (ProcessedSensor ()) == 2
214+
215+ total_keys = len (ProcessedSensor ().key_source )
216+ assert total_keys == 4
217+
218+ # The critical test: populate() must correctly identify remaining keys.
219+ # Before the fix, populate() used `key_source - self` which matched on
220+ # num_samples and quality too, returning all 4 keys as "pending".
221+ ProcessedSensor ().populate ()
222+ assert len (ProcessedSensor ()) == 4 , (
223+ f"After full populate, expected 4 entries but got { len (ProcessedSensor ())} . "
224+ f"populate() likely re-processed already-completed keys."
225+ )
226+
227+ # Verify progress reports 0 remaining
228+ remaining , total = ProcessedSensor ().progress ()
229+ assert remaining == 0 , f"Expected 0 remaining, got { remaining } "
230+ assert total == 4
231+
232+ # Verify antijoin with .proj() is correct
233+ pending = ProcessedSensor ().key_source - ProcessedSensor ().proj ()
234+ assert len (pending ) == 0
235+ finally :
236+ test_schema .drop (prompt = False )
237+
238+
115239def test_load_dependencies (prefix , connection_test ):
116240 schema = dj .Schema (f"{ prefix } _load_dependencies_populate" , connection = connection_test )
117241
0 commit comments