@@ -258,8 +258,7 @@ def _add_warehouse_labels(self, node_store: Any, num_nodes: int,
258258 create_silo_labels : bool = True ,
259259 create_anomaly_labels : bool = True ,
260260 use_dummy_fallback : bool = False ) -> None :
261- """
262- Add warehouse task labels with 'Ready-for-Real-Data' pattern.
261+ """Add warehouse task labels with 'Ready-for-Real-Data' pattern.
263262
264263 Precedence order: Real Data > Structural Inference > Dummy
265264 Fallback > None
@@ -274,7 +273,6 @@ def _add_warehouse_labels(self, node_store: Any, num_nodes: int,
274273 create_anomaly_labels: Whether to create anomaly detection labels
275274 use_dummy_fallback: Whether to use dummy data as last resort
276275 """
277-
278276 # ETL Lineage Labels
279277 if create_lineage_labels :
280278 lineage_labels = self ._get_lineage_labels (table_name , db ,
@@ -299,8 +297,8 @@ def _get_lineage_labels(
299297 self , table_name : Optional [str ], db : Any , num_nodes : int ,
300298 use_dummy_fallback : bool ) -> Optional [torch .Tensor ]:
301299 """Get ETL lineage labels with precedence: real > inferred >
302- dummy > None."""
303-
300+ dummy > None.
301+ """
304302 # Method 1: Check for real lineage data
305303 if self ._has_real_lineage (db , table_name ):
306304 return self ._load_real_lineage (db , table_name )
@@ -326,8 +324,8 @@ def _get_silo_labels(self, table_name: Optional[str], db: Any,
326324 num_nodes : int ,
327325 use_dummy_fallback : bool ) -> Optional [torch .Tensor ]:
328326 """Get silo detection labels with precedence: real > inferred >
329- dummy > None."""
330-
327+ dummy > None.
328+ """
331329 # Method 1: Check for real silo data
332330 if self ._has_real_silo_data (db , table_name ):
333331 return self ._load_real_silo_labels (db , table_name )
@@ -341,8 +339,8 @@ def _get_anomaly_labels(
341339 self , table_name : Optional [str ], db : Any , num_nodes : int ,
342340 use_dummy_fallback : bool ) -> Optional [torch .Tensor ]:
343341 """Get anomaly detection labels with precedence: real > inferred >
344- dummy > None."""
345-
342+ dummy > None.
343+ """
346344 # Method 1: Check for real anomaly data
347345 if self ._has_real_anomaly_data (db , table_name ):
348346 return self ._load_real_anomaly_labels (db , table_name )
@@ -366,8 +364,7 @@ def _get_anomaly_labels(
366364
367365 # Real data checking methods
368366 def _has_real_lineage (self , db : Any , table_name : Optional [str ]) -> bool :
369- """
370- Check if real ETL lineage data is available.
367+ """Check if real ETL lineage data is available.
371368
372369 Args:
373370 db: RelBench database object
@@ -381,8 +378,7 @@ def _has_real_lineage(self, db: Any, table_name: Optional[str]) -> bool:
381378 and 'etl_stages' in db .lineage_metadata [table_name ])
382379
383380 def _has_real_silo_data (self , db : Any , table_name : Optional [str ]) -> bool :
384- """
385- Check if real silo detection data is available.
381+ """Check if real silo detection data is available.
386382
387383 Args:
388384 db: RelBench database object
@@ -396,8 +392,7 @@ def _has_real_silo_data(self, db: Any, table_name: Optional[str]) -> bool:
396392
397393 def _has_real_anomaly_data (self , db : Any ,
398394 table_name : Optional [str ]) -> bool :
399- """
400- Check if real anomaly detection data is available.
395+ """Check if real anomaly detection data is available.
401396
402397 Args:
403398 db: RelBench database object
@@ -481,7 +476,6 @@ def _infer_lineage_from_structure(self, table_name: str,
481476 def _infer_silo_from_connectivity (self , table_name : str , db : Any ,
482477 num_nodes : int ) -> torch .Tensor :
483478 """Infer silo detection labels from table connectivity."""
484-
485479 # Count connections to other tables
486480 connections = 0
487481
@@ -524,9 +518,8 @@ def _infer_anomalies_from_statistics(self, table_name: str,
524518
525519 if IQR > 0 : # Avoid division by zero
526520 # Mark outliers as anomalies
527- outlier_mask = ((values <
528- (Q1 - 1.5 * IQR )) | (values >
529- (Q3 + 1.5 * IQR )))
521+ outlier_mask = ((values < (Q1 - 1.5 * IQR )) |
522+ (values > (Q3 + 1.5 * IQR )))
530523
531524 # Update anomaly labels for outlier rows
532525 outlier_indices = table_df [col ].index [table_df [col ].isin (
0 commit comments