Skip to content

Commit 4e4e854

Browse files
committed
refactor: remove overlapping examples and prepare for G-Retriever integration
1 parent d2e6abd commit 4e4e854

5 files changed

Lines changed: 33 additions & 160 deletions

File tree

examples/llm/relbench_example.py

Lines changed: 0 additions & 41 deletions
This file was deleted.

examples/relbench/00_llm_gnn_bridge.ipynb

Lines changed: 0 additions & 37 deletions
This file was deleted.

examples/relbench/02_train_rgcn.py

Lines changed: 0 additions & 77 deletions
This file was deleted.

torch_geometric/datasets/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
# RelBench integration utilities (optional dependencies)
2828
try:
29-
from .relbench import RelBenchDataset, RelBenchProcessor, create_relbench_hetero_data, get_warehouse_task_info # noqa: F401
29+
from .relbench import RelBenchDataset, RelBenchProcessor, create_relbench_hetero_data, get_warehouse_task_info, prepare_for_gretriever # noqa: F401
3030
_relbench_available = True
3131
except ImportError:
3232
_relbench_available = False
@@ -260,4 +260,5 @@
260260
"RelBenchProcessor",
261261
"create_relbench_hetero_data",
262262
"get_warehouse_task_info",
263+
"prepare_for_gretriever",
263264
])

torch_geometric/datasets/relbench.py

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
"""RelBench integration utilities for PyTorch Geometric.
22
33
Provides utilities for converting RelBench datasets to PyG HeteroData objects
4-
with semantic embeddings and graph structure for warehouse applications.
4+
with semantic embeddings and warehouse-specific enhancements.
55
6-
TODO: Add subgraph sampling utilities for inference
7-
TODO: Implement configurable edge weighting schemes
8-
TODO: Add support for lineage tracking
6+
Complements examples/rdl.py with G-Retriever preparation and warehouse tasks.
97
"""
108

119
import warnings
@@ -1044,6 +1042,35 @@ def process(self) -> None:
10441042
torch.save((collated_data, slices), self.processed_paths[0])
10451043

10461044

1045+
def prepare_for_gretriever(
1046+
hetero_data: HeteroData) -> Tuple[HeteroData, Dict[str, Any]]:
1047+
"""Prepare RelBench HeteroData for G-Retriever training.
1048+
1049+
Enhances HeteroData with G-Retriever-specific attributes and metadata.
1050+
1051+
Args:
1052+
hetero_data: HeteroData object from RelBench integration
1053+
1054+
Returns:
1055+
Tuple of (enhanced_hetero_data, metadata_dict)
1056+
"""
1057+
metadata = {
1058+
'embedding_dim': getattr(hetero_data, 'embedding_dim', 384),
1059+
'node_types': list(hetero_data.node_types),
1060+
'edge_types': list(hetero_data.edge_types),
1061+
'warehouse_tasks': ['lineage', 'silo', 'anomaly'],
1062+
'recommended_qa_pairs': get_warehouse_task_info(),
1063+
'conversion_ready': True,
1064+
}
1065+
1066+
# Add G-Retriever specific attributes
1067+
hetero_data.gretriever_ready = True
1068+
hetero_data.embedding_type = 'sbert' # Indicates SBERT embeddings
1069+
hetero_data.warehouse_enhanced = True
1070+
1071+
return hetero_data, metadata
1072+
1073+
10471074
# Backward compatibility aliases
10481075
RelBenchHeteroDataProcessor = RelBenchProcessor
10491076
create_hetero_data_from_relbench = create_relbench_hetero_data

0 commit comments

Comments
 (0)