diff --git a/tools/ms2deepscore/macros.xml b/tools/ms2deepscore/macros.xml index 12ef78fe0..8f200a81a 100644 --- a/tools/ms2deepscore/macros.xml +++ b/tools/ms2deepscore/macros.xml @@ -1,9 +1,14 @@ - 2.0.0 - 1.16.2 + 2.6.0 + 1.19.1 + + + + tmp + tmp + + + ms2deepscore @@ -56,7 +68,7 @@
- + @@ -67,19 +79,28 @@
- - - - - - + + + + + +
- - - + + + @@ -88,11 +109,23 @@ - + - +
+ +
+ + + + +
diff --git a/tools/ms2deepscore/ms2deepscore_config_generator.xml b/tools/ms2deepscore/ms2deepscore_config_generator.xml index fbf01b953..16df61b65 100644 --- a/tools/ms2deepscore/ms2deepscore_config_generator.xml +++ b/tools/ms2deepscore/ms2deepscore_config_generator.xml @@ -1,4 +1,4 @@ - + Generates model parameters for MS2DeepScore in JSON format macros.xml @@ -13,7 +13,10 @@ - + + + + import numpy as np from typing import Optional @@ -38,11 +41,13 @@ params = { "mz_bin_width": $tensorization_settings.mz_bin_width, "intensity_scaling": $tensorization_settings.intensity_scaling, "batch_size": $training_settings.batch_size, - "average_pairs_per_bin": $training_settings.average_pairs_per_bin, - "same_prob_bins": np.array([(0, 0.2), (0.2, 1.0)]), "random_seed": random_seed, "fingerprint_type": "$tensorization_settings.fingerprint_type", - "fingerprint_nbits": $tensorization_settings.fingerprint_nbits + "fingerprint_nbits": $tensorization_settings.fingerprint_nbits, + "average_inchikey_sampling_count": $data_generator_settings.average_inchikey_sampling_count, + "max_inchikey_sampling": $data_generator_settings.max_inchikey_sampling_count, + "max_pairs_per_bin": $data_generator_settings.max_pairs_per_bin, + "max_pair_resampling": $data_generator_settings.max_pair_resampling, } settings = SettingsMS2Deepscore(**params) @@ -55,19 +60,31 @@ settings.save_to_file("$output_file") - + - - - - - - - - +
+ + + + + + + + +
+
+ + + + +
+
+ + +
diff --git a/tools/ms2deepscore/ms2deepscore_similarity.xml b/tools/ms2deepscore/ms2deepscore_similarity.xml index e5146d277..bc25404ff 100644 --- a/tools/ms2deepscore/ms2deepscore_similarity.xml +++ b/tools/ms2deepscore/ms2deepscore_similarity.xml @@ -1,4 +1,4 @@ - + Compute similarity scores using a pre-trained MS2DeepScore model macros.xml @@ -14,6 +14,9 @@ + + + @init_logger@ @@ -71,16 +74,20 @@ scores.to_json("$similarity_scores") - - - + + + + + - - + + + + diff --git a/tools/ms2deepscore/ms2deepscore_training.xml b/tools/ms2deepscore/ms2deepscore_training.xml index 17081c9a8..872001eb9 100644 --- a/tools/ms2deepscore/ms2deepscore_training.xml +++ b/tools/ms2deepscore/ms2deepscore_training.xml @@ -1,4 +1,4 @@ - + Compute similarity scores using a pre-trained MS2DeepScore model macros.xml @@ -16,6 +16,9 @@ cp $spectra processing/input."$spectra.ext"; python3 ${python_wrapper} ]]> + + + import onnx @@ -63,15 +66,15 @@ torch.onnx.export( - + - - - - + + + + diff --git a/tools/ms2deepscore/test-data/Galaxy4-[Model Parameter JSON].json b/tools/ms2deepscore/test-data/Galaxy4-[Model Parameter JSON].json new file mode 100644 index 000000000..3bf719cbb --- /dev/null +++ b/tools/ms2deepscore/test-data/Galaxy4-[Model Parameter JSON].json @@ -0,0 +1,49 @@ +{ + "base_dims": [ + 2000 + ], + "embedding_dim": 400, + "ionisation_mode": "both", + "train_binning_layer": false, + "train_binning_layer_group_size": 20, + "train_binning_layer_output_per_group": 2, + "dropout_rate": 0.01, + "learning_rate": 0.00025, + "epochs": 10, + "patience": 5, + "loss_function": "mse", + "weighting_factor": 0, + "model_file_name": "ms2deepscore_model.pt", + "history_plot_file_name": "history.svg", + "time_stamp": "2025_06_12_17_53_28", + "min_mz": 10, + "max_mz": 1000, + "mz_bin_width": 0.1, + "intensity_scaling": 0.5, + "additional_metadata": [], + "batch_size": 64, + "num_turns": 1, + "shuffle": true, + "use_fixed_set": false, + "average_pairs_per_bin": 20, + "max_pairs_per_bin": 100, + "same_prob_bins": [ + [ + 0.0, + 0.2 + ], + [ + 0.2, + 1.0 + ] + ], + "include_diagonal": true, + "random_seed": null, + "fingerprint_type": "daylight", + "fingerprint_nbits": 2048, + "augment_removal_max": 0.2, + "augment_removal_intensity": 0.2, + "augment_intensity": 0.2, + "augment_noise_max": 10, + "augment_noise_intensity": 0.02 +} \ No newline at end of file diff --git a/tools/ms2deepscore/test-data/Model_Parameter_JSON.json b/tools/ms2deepscore/test-data/Model_Parameter_JSON.json index fad876bfa..5e24428e7 100644 --- a/tools/ms2deepscore/test-data/Model_Parameter_JSON.json +++ b/tools/ms2deepscore/test-data/Model_Parameter_JSON.json @@ -5,18 +5,19 @@ ], "embedding_dim": 15, "ionisation_mode": "negative", + "activation_function": "relu", "train_binning_layer": false, "train_binning_layer_group_size": 20, "train_binning_layer_output_per_group": 2, "dropout_rate": 0.0, "learning_rate": 0.00025, "epochs": 2, - "patience": 20, + "patience": 2, "loss_function": "mse", "weighting_factor": 0, "model_file_name": "ms2deepscore_model.pt", "history_plot_file_name": "history.svg", - "time_stamp": "2024_08_16_07_50_22", + "time_stamp": "2025_06_23_11_16_54", "min_mz": 10, "max_mz": 1000, "mz_bin_width": 0.1, @@ -26,20 +27,55 @@ "num_turns": 1, "shuffle": true, "use_fixed_set": false, - "average_pairs_per_bin": 2, - "max_pairs_per_bin": 100, + "average_inchikey_sampling_count": 0, + "max_inchikey_sampling": 110, + "max_pairs_per_bin": 200, "same_prob_bins": [ [ - 0.0, - 0.2 + 0.8, + 0.9 ], [ - 0.2, + 0.7, + 0.8 + ], + [ + 0.9, 1.0 + ], + [ + 0.6, + 0.7 + ], + [ + 0.5, + 0.6 + ], + [ + 0.4, + 0.5 + ], + [ + 0.3, + 0.4 + ], + [ + 0.2, + 0.3 + ], + [ + 0.1, + 0.2 + ], + [ + -0.01, + 0.1 ] ], "include_diagonal": true, + "val_spectra_per_inchikey": 1, "random_seed": 42, + "max_pair_resampling": 100, "fingerprint_type": "daylight", "fingerprint_nbits": 2048, "augment_removal_max": 0.2, diff --git a/tools/ms2deepscore/test-data/Model_Parameter_JSON_test.json b/tools/ms2deepscore/test-data/Model_Parameter_JSON_test.json new file mode 100644 index 000000000..74fd0e160 --- /dev/null +++ b/tools/ms2deepscore/test-data/Model_Parameter_JSON_test.json @@ -0,0 +1,54 @@ +{ + "base_dims": [ + 20, + 20 + ], + "embedding_dim": 15, + "ionisation_mode": "negative", + "activation_function": "relu", + "train_binning_layer": false, + "train_binning_layer_group_size": 20, + "train_binning_layer_output_per_group": 2, + "dropout_rate": 0.0, + "learning_rate": 0.00025, + "epochs": 1, + "patience": 2, + "loss_function": "mse", + "weighting_factor": 0, + "model_file_name": "ms2deepscore_model.pt", + "history_plot_file_name": "history.svg", + "time_stamp": "2025_06_23_11_16_54", + "min_mz": 10, + "max_mz": 1000, + "mz_bin_width": 0.1, + "intensity_scaling": 0.5, + "additional_metadata": [], + "batch_size": 2, + "num_turns": 1, + "shuffle": true, + "use_fixed_set": false, + "average_inchikey_sampling_count": 1, + "max_inchikey_sampling": 110, + "max_pairs_per_bin": 200, + "same_prob_bins": [ + [ + -0.01, + 0.5 + ], + [ + 0.5, + 1.0 + ] + ], + "include_diagonal": true, + "val_spectra_per_inchikey": 1, + "random_seed": 42, + "max_pair_resampling": 100, + "fingerprint_type": "daylight", + "fingerprint_nbits": 2048, + "augment_removal_max": 0.2, + "augment_removal_intensity": 0.2, + "augment_intensity": 0.2, + "augment_noise_max": 10, + "augment_noise_intensity": 0.02 +} \ No newline at end of file