Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
9a3f31b
speed up of feature encodings
Coerulatus Mar 27, 2026
a668a4b
sparse implementation for positional encodings
Coerulatus Mar 27, 2026
f4947ef
fixed docs for feature encodings
Coerulatus Mar 27, 2026
9d169e3
random walk reduced memory usage. debug set to false
Coerulatus Mar 30, 2026
4d4fa40
added gpu support to PreProcessor
Coerulatus Mar 30, 2026
4b4de6b
renamed device to preprocessor_device to avoid conflicts
Coerulatus Mar 30, 2026
5756b1a
delete_checkpoint_after_test added to run config
Coerulatus Mar 31, 2026
29f4528
gpu and entity selection
Coerulatus Mar 31, 2026
b9401c7
fixed runs logging. fixed data processing with filelock
Coerulatus Apr 1, 2026
204e888
changes for experiments
Coerulatus Apr 1, 2026
656b331
type casting for tests
Coerulatus Apr 1, 2026
e329c45
Fix PROTEINS experiments
gbg141 Apr 1, 2026
21e72d9
HOPSE-M script
gbg141 Apr 1, 2026
c1e4f61
Topotune script
gbg141 Apr 6, 2026
b167082
Resume flag in experiments scripts
gbg141 Apr 6, 2026
21633a1
Add TDC ADME dataset loader and configs
LouisVanLangendonck Apr 9, 2026
eafc897
Add PyTDC dependencies and fix import issues
LouisVanLangendonck Apr 9, 2026
f185d64
Merge remote-tracking branch 'origin/hopse_sparse' into hopse_cyp2c19…
LouisVanLangendonck Apr 9, 2026
8dc2861
Set node features as float not long
LouisVanLangendonck Apr 9, 2026
e17d363
fix toml dependency
LouisVanLangendonck Apr 11, 2026
0e13fe4
Fix hopse_m.sh
gbg141 Apr 11, 2026
1f852c8
Merge pull request #304 from geometric-intelligence/hopse_cyp2c19_dat…
LouisVanLangendonck Apr 11, 2026
9ca4ff4
HOPSE-M encoding ablation script
gbg141 Apr 13, 2026
45fc53b
Update hopse-m ablation script
gbg141 Apr 13, 2026
66636cc
Add hopse plotting scripts and update .gitignore
LouisVanLangendonck Apr 14, 2026
f5b642e
Baselines' scripts
gbg141 Apr 16, 2026
c8a9fcf
Fix bug in SaNN preprocessing
gbg141 Apr 16, 2026
9971530
Merge branch 'hopse_sparse' of https://github.com/geometric-intellige…
gbg141 Apr 16, 2026
b1682d6
GIN script
gbg141 Apr 16, 2026
b6730a2
Merge branch 'hopse_sparse' of https://github.com/geometric-intellige…
gbg141 Apr 16, 2026
a98137c
hopse_g script
Coerulatus Apr 17, 2026
762461b
Merge branch 'hopse_sparse' of https://github.com/geometric-intellige…
Coerulatus Apr 17, 2026
7f962f5
Update wandb loading and analysis
LouisVanLangendonck Apr 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,11 @@ notebooks/tmp
/tutorials/lightning_logs/
/tutorials/datasets/

# scripts
scripts/hopse_plotting/csvs
scripts/hopse_plotting/plots
scripts/hopse_plotting/tables

# wandb
wandb/
result_BREC/
Expand Down
36 changes: 36 additions & 0 deletions configs/dataset/graph/BBB_Martins.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Dataset loader config
loader:
_target_: topobench.data.loaders.ADMEDatasetLoader
parameters:
data_domain: graph
data_type: ADME
data_name: BBB_Martins
data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type}

# Dataset parameters
parameters:
num_features:
- 9 # OGB atom features
- 3 # OGB edge features
num_classes: 2
task: classification
loss_type: cross_entropy
monitor_metric: accuracy
task_level: graph
max_dim_if_lifted: 3
preserve_edge_attr_if_lifted: ${set_preserve_edge_attr:${model.model_name},True}

# Splits - using fixed scaffold split from TDC
split_params:
learning_setting: inductive
data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name}
data_seed: 0
split_type: fixed # TDC provides predefined scaffold splits
k: 10
train_prop: 0.5

# Dataloader parameters
dataloader_params:
batch_size: 64
num_workers: 0
pin_memory: False
36 changes: 36 additions & 0 deletions configs/dataset/graph/CYP3A4_Veith.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Dataset loader config
loader:
_target_: topobench.data.loaders.ADMEDatasetLoader
parameters:
data_domain: graph
data_type: ADME
data_name: CYP3A4_Veith
data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type}

# Dataset parameters
parameters:
num_features:
- 9 # OGB atom features
- 3 # OGB edge features
num_classes: 2
task: classification
loss_type: cross_entropy
monitor_metric: accuracy
task_level: graph
max_dim_if_lifted: 3
preserve_edge_attr_if_lifted: ${set_preserve_edge_attr:${model.model_name},True}

# Splits - using fixed scaffold split from TDC
split_params:
learning_setting: inductive
data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name}
data_seed: 0
split_type: fixed # TDC provides predefined scaffold splits
k: 10
train_prop: 0.5

# Dataloader parameters
dataloader_params:
batch_size: 64
num_workers: 0
pin_memory: False
36 changes: 36 additions & 0 deletions configs/dataset/graph/Caco2_Wang.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Dataset loader config
loader:
_target_: topobench.data.loaders.ADMEDatasetLoader
parameters:
data_domain: graph
data_type: ADME
data_name: Caco2_Wang
data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type}

# Dataset parameters
parameters:
num_features:
- 9 # OGB atom features
- 3 # OGB edge features
num_classes: 1 # Regression task
task: regression
loss_type: mse
monitor_metric: mae
task_level: graph
max_dim_if_lifted: 3
preserve_edge_attr_if_lifted: ${set_preserve_edge_attr:${model.model_name},True}

# Splits - using fixed scaffold split from TDC
split_params:
learning_setting: inductive
data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name}
data_seed: 0
split_type: fixed # TDC provides predefined scaffold splits
k: 10
train_prop: 0.5

# Dataloader parameters
dataloader_params:
batch_size: 64
num_workers: 0
pin_memory: False
36 changes: 36 additions & 0 deletions configs/dataset/graph/Clearance_Hepatocyte_AZ.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Dataset loader config
loader:
_target_: topobench.data.loaders.ADMEDatasetLoader
parameters:
data_domain: graph
data_type: ADME
data_name: Clearance_Hepatocyte_AZ
data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type}

# Dataset parameters
parameters:
num_features:
- 9 # OGB atom features
- 3 # OGB edge features
num_classes: 1 # Regression task
task: regression
loss_type: mse
monitor_metric: mae
task_level: graph
max_dim_if_lifted: 3
preserve_edge_attr_if_lifted: ${set_preserve_edge_attr:${model.model_name},True}

# Splits - using fixed scaffold split from TDC
split_params:
learning_setting: inductive
data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name}
data_seed: 0
split_type: fixed # TDC provides predefined scaffold splits
k: 10
train_prop: 0.5

# Dataloader parameters
dataloader_params:
batch_size: 64
num_workers: 0
pin_memory: False
4 changes: 2 additions & 2 deletions configs/hydra/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ defaults:

# output directory, generated dynamically on each run
run:
dir: ${paths.log_dir}/${task_name}/runs/${now:%Y-%m-%d}_${now:%H-%M-%S}
dir: ${paths.log_dir}/${task_name}/runs/${now:%Y-%m-%d}_${now:%H-%M-%S}_${pid:}
sweep:
dir: ${paths.log_dir}/${task_name}/multiruns/${now:%Y-%m-%d}_${now:%H-%M-%S}
dir: ${paths.log_dir}/${task_name}/multiruns/${now:%Y-%m-%d}_${now:%H-%M-%S}_${pid:}
subdir: ${hydra.job.num}

job_logging:
Expand Down
1 change: 1 addition & 0 deletions configs/run.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ train: True
# evaluate on test set, using best model weights achieved during training
# lightning chooses best weights based on the metric specified in checkpoint callback
test: True
delete_checkpoint_after_test: False

# simply provide checkpoint path to resume training
ckpt_path: null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ parameters:
include_eigenvalues: false
include_first: false
concat_to_x: false
device: 'cpu' # Force CPU for eigen computations
RWSE:
max_pe_dim: 10
concat_to_x: false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ transform_type: "data manipulation"
max_hop: 1
use_initial_features: True
complex_dim: ${oc.select:dataset.parameters.max_dim_if_lifted,3}
in_channels: ${infer_in_khop_feature_dim:${model.feature_encoder.dataset_in_channels},${.max_hop}}
in_channels: ${infer_in_khop_feature_dim:${model.feature_encoder.dataset_in_channels},${.max_hop},${.complex_dim}}
max_rank: 2
# in_features: ${infer_in_sann_khop_feature_dim:${model},${3}}

Expand Down
4 changes: 4 additions & 0 deletions configs/transforms/model_dataset_defaults/gps_PROTEINS.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
defaults:
- data_manipulations: identity # PROTEINS dataset needs identity transform to avoid adding random float feature to feature matrix
- data_manipulations@CombinedPSEs: combined_positional_and_structural_encodings
- liftings@_here_: ${get_required_lifting:${dataset},${model}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
defaults:
- data_manipulations: identity # PROTEINS dataset needs identity transform to avoid adding random float feature to feature matrix
- liftings@_here_: ${get_required_lifting:${dataset},${model}}
- data_manipulations@hopse_encoding: add_gpse_information
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
defaults:
- data_manipulations: identity # PROTEINS dataset needs identity transform to avoid adding random float feature to feature matrix
- liftings@_here_: ${get_required_lifting:${dataset},${model}}
- data_manipulations@hopse_encoding: hopse_ps_information
28 changes: 0 additions & 28 deletions configs/transforms/model_dataset_defaults/hopse_m_ZINC.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,6 @@ defaults:
- liftings@_here_: ${get_required_lifting:${dataset},${model}}
- data_manipulations@hopse_encoding: hopse_ps_information

hopse_encoding:
pe_types:
- 'RWSE'
- 'ElstaticPE'
- 'HKdiagSE'
- 'LapPE'


# Different PS have different sizes, need to unify them.
target_pe_dim: 20

# LapPE config
laplacian_norm_type: 'sym'
posenc_LapPE_eigen_max_freqs: 18
posenc_LapPE_eigen_eigvec_norm: 'L2'
posenc_LapPE_eigen_skip_zero_freq: True
posenc_LapPE_eigen_eigvec_abs: True

# RWSE config
kernel_param_RWSE:
- 2
- 20

# HKdiagSE config
kernel_param_HKdiagSE:
- 1
- 22

one_hot_node_degree_features:
degrees_field: x
features_field: x
4 changes: 4 additions & 0 deletions configs/transforms/model_dataset_defaults/sann_PROTEINS.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
defaults:
- data_manipulations: identity # PROTEINS dataset needs identity transform to avoid adding random float feature to feature matrix
- liftings@_here_: ${get_required_lifting:${dataset},${model}}
- data_manipulations@sann_encoding: precompute_khop_features
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ dependencies=[
"torch-scatter",
"torch-sparse",
"torch-cluster",
"rdkit-pypi",
"PyTDC==1.1.15",
# PyTDC imports pkg_resources; setuptools>=82 dropped it.
"setuptools>=69,<82",
]

[project.optional-dependencies]
Expand Down
Loading
Loading