Skip to content

Commit 9e99402

Browse files
authored
Merge pull request #296 from geometric-intelligence/hopse_sparse
Hopse sparse: added sparse implementations for feature encodings
2 parents 22df89e + 7f962f5 commit 9e99402

47 files changed

Lines changed: 10728 additions & 425 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,11 @@ notebooks/tmp
189189
/tutorials/lightning_logs/
190190
/tutorials/datasets/
191191

192+
# scripts
193+
scripts/hopse_plotting/csvs
194+
scripts/hopse_plotting/plots
195+
scripts/hopse_plotting/tables
196+
192197
# wandb
193198
wandb/
194199
result_BREC/
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Dataset loader config
2+
loader:
3+
_target_: topobench.data.loaders.ADMEDatasetLoader
4+
parameters:
5+
data_domain: graph
6+
data_type: ADME
7+
data_name: BBB_Martins
8+
data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type}
9+
10+
# Dataset parameters
11+
parameters:
12+
num_features:
13+
- 9 # OGB atom features
14+
- 3 # OGB edge features
15+
num_classes: 2
16+
task: classification
17+
loss_type: cross_entropy
18+
monitor_metric: accuracy
19+
task_level: graph
20+
max_dim_if_lifted: 3
21+
preserve_edge_attr_if_lifted: ${set_preserve_edge_attr:${model.model_name},True}
22+
23+
# Splits - using fixed scaffold split from TDC
24+
split_params:
25+
learning_setting: inductive
26+
data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name}
27+
data_seed: 0
28+
split_type: fixed # TDC provides predefined scaffold splits
29+
k: 10
30+
train_prop: 0.5
31+
32+
# Dataloader parameters
33+
dataloader_params:
34+
batch_size: 64
35+
num_workers: 0
36+
pin_memory: False
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Dataset loader config
2+
loader:
3+
_target_: topobench.data.loaders.ADMEDatasetLoader
4+
parameters:
5+
data_domain: graph
6+
data_type: ADME
7+
data_name: CYP3A4_Veith
8+
data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type}
9+
10+
# Dataset parameters
11+
parameters:
12+
num_features:
13+
- 9 # OGB atom features
14+
- 3 # OGB edge features
15+
num_classes: 2
16+
task: classification
17+
loss_type: cross_entropy
18+
monitor_metric: accuracy
19+
task_level: graph
20+
max_dim_if_lifted: 3
21+
preserve_edge_attr_if_lifted: ${set_preserve_edge_attr:${model.model_name},True}
22+
23+
# Splits - using fixed scaffold split from TDC
24+
split_params:
25+
learning_setting: inductive
26+
data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name}
27+
data_seed: 0
28+
split_type: fixed # TDC provides predefined scaffold splits
29+
k: 10
30+
train_prop: 0.5
31+
32+
# Dataloader parameters
33+
dataloader_params:
34+
batch_size: 64
35+
num_workers: 0
36+
pin_memory: False
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Dataset loader config
2+
loader:
3+
_target_: topobench.data.loaders.ADMEDatasetLoader
4+
parameters:
5+
data_domain: graph
6+
data_type: ADME
7+
data_name: Caco2_Wang
8+
data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type}
9+
10+
# Dataset parameters
11+
parameters:
12+
num_features:
13+
- 9 # OGB atom features
14+
- 3 # OGB edge features
15+
num_classes: 1 # Regression task
16+
task: regression
17+
loss_type: mse
18+
monitor_metric: mae
19+
task_level: graph
20+
max_dim_if_lifted: 3
21+
preserve_edge_attr_if_lifted: ${set_preserve_edge_attr:${model.model_name},True}
22+
23+
# Splits - using fixed scaffold split from TDC
24+
split_params:
25+
learning_setting: inductive
26+
data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name}
27+
data_seed: 0
28+
split_type: fixed # TDC provides predefined scaffold splits
29+
k: 10
30+
train_prop: 0.5
31+
32+
# Dataloader parameters
33+
dataloader_params:
34+
batch_size: 64
35+
num_workers: 0
36+
pin_memory: False
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Dataset loader config
2+
loader:
3+
_target_: topobench.data.loaders.ADMEDatasetLoader
4+
parameters:
5+
data_domain: graph
6+
data_type: ADME
7+
data_name: Clearance_Hepatocyte_AZ
8+
data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type}
9+
10+
# Dataset parameters
11+
parameters:
12+
num_features:
13+
- 9 # OGB atom features
14+
- 3 # OGB edge features
15+
num_classes: 1 # Regression task
16+
task: regression
17+
loss_type: mse
18+
monitor_metric: mae
19+
task_level: graph
20+
max_dim_if_lifted: 3
21+
preserve_edge_attr_if_lifted: ${set_preserve_edge_attr:${model.model_name},True}
22+
23+
# Splits - using fixed scaffold split from TDC
24+
split_params:
25+
learning_setting: inductive
26+
data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name}
27+
data_seed: 0
28+
split_type: fixed # TDC provides predefined scaffold splits
29+
k: 10
30+
train_prop: 0.5
31+
32+
# Dataloader parameters
33+
dataloader_params:
34+
batch_size: 64
35+
num_workers: 0
36+
pin_memory: False

configs/hydra/default.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@ defaults:
77

88
# output directory, generated dynamically on each run
99
run:
10-
dir: ${paths.log_dir}/${task_name}/runs/${now:%Y-%m-%d}_${now:%H-%M-%S}
10+
dir: ${paths.log_dir}/${task_name}/runs/${now:%Y-%m-%d}_${now:%H-%M-%S}_${pid:}
1111
sweep:
12-
dir: ${paths.log_dir}/${task_name}/multiruns/${now:%Y-%m-%d}_${now:%H-%M-%S}
12+
dir: ${paths.log_dir}/${task_name}/multiruns/${now:%Y-%m-%d}_${now:%H-%M-%S}_${pid:}
1313
subdir: ${hydra.job.num}
1414

1515
job_logging:

configs/run.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ train: True
4141
# evaluate on test set, using best model weights achieved during training
4242
# lightning chooses best weights based on the metric specified in checkpoint callback
4343
test: True
44+
delete_checkpoint_after_test: False
4445

4546
# simply provide checkpoint path to resume training
4647
ckpt_path: null

configs/transforms/data_manipulations/hopse_ps_information.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ parameters:
4242
include_eigenvalues: false
4343
include_first: false
4444
concat_to_x: false
45+
device: 'cpu' # Force CPU for eigen computations
4546
RWSE:
4647
max_pe_dim: 10
4748
concat_to_x: false

configs/transforms/data_manipulations/precompute_khop_features.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ transform_type: "data manipulation"
33
max_hop: 1
44
use_initial_features: True
55
complex_dim: ${oc.select:dataset.parameters.max_dim_if_lifted,3}
6-
in_channels: ${infer_in_khop_feature_dim:${model.feature_encoder.dataset_in_channels},${.max_hop}}
6+
in_channels: ${infer_in_khop_feature_dim:${model.feature_encoder.dataset_in_channels},${.max_hop},${.complex_dim}}
77
max_rank: 2
88
# in_features: ${infer_in_sann_khop_feature_dim:${model},${3}}
99

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
defaults:
2+
- data_manipulations: identity # PROTEINS dataset needs identity transform to avoid adding random float feature to feature matrix
3+
- data_manipulations@CombinedPSEs: combined_positional_and_structural_encodings
4+
- liftings@_here_: ${get_required_lifting:${dataset},${model}}

0 commit comments

Comments
 (0)