Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 47 additions & 14 deletions tools/ms2deepscore/macros.xml
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
<macros>
<token name="@TOOL_VERSION@">2.0.0</token>
<token name="@ONNX_VERSION@">1.16.2</token>
<token name="@TOOL_VERSION@">2.6.0</token>
<token name="@ONNX_VERSION@">1.19.1</token>

<xml name="creator">
<creator>
<person
givenName="Helge"
familyName="Hecht"
url="https://github.com/hechth"
identifier="0000-0001-6744-996X" />
<person
givenName="Zargham"
familyName="Ahmad"
Expand All @@ -16,6 +21,13 @@
</creator>
</xml>

<xml name="environment_variables">
<environment_variables>
<environment_variable name="NUMBA_CACHE_DIR">tmp</environment_variable>
<environment_variable name="MPLCONFIGDIR">tmp</environment_variable>
</environment_variables>
</xml>

<xml name="edam">
<xrefs>
<xref type="bio.tools">ms2deepscore</xref>
Expand Down Expand Up @@ -56,7 +68,7 @@
<xml name="config_generator">
<section name="model_structure" title="Model Structure" expanded="true">
<repeat name="layers" title="Layer" min="1" default="1" >
<param name="dims" type="integer" label="Dimensions" min = "0" value="2000" help="Size of the in-between layer to add." />
<param name="dims" type="integer" label="Dimensions" min="1" value="2000" max="10000" help="Size of the in-between layer to add." />
</repeat>
<param name="embedding_dim" type="integer" label="Embedding Dimension" value="400" help="The dimension of the final embedding layer." />
<param name="ionisation_mode" type="select" label="Ionisation Mode">
Expand All @@ -67,19 +79,28 @@
</section>

<section name="tensorization_settings" title="Tensorization Settings" expanded="true">
<param name="min_mz" type="integer" label="Min m/z" value="10" />
<param name="max_mz" type="integer" label="Max m/z" value="1000" />
<param name="mz_bin_width" type="float" label="m/z Bin Width" value="0.1" />
<param name="intensity_scaling" type="float" label="Intensity Scaling" value="0.5" />
<param name="fingerprint_type" type="text" value="daylight" label="Fingerprint Type" help="The fingerprint type that should be used for tanimoto score calculations." />
<param name="fingerprint_nbits" type="integer" label="Fingerprint Number of Bits" value="2048" help="The number of bits to use for the fingerprint." />
<param name="min_mz" type="integer" label="Min m/z" min="0" max="1000" value="10"
help="Minimum m/z value to consider for tensorization." />
<param name="max_mz" type="integer" label="Max m/z" min="0" max="2000" value="1000"
help="Maximum m/z value to consider for tensorization." />
<param name="mz_bin_width" type="float" label="m/z Bin Width" min="0" max="1" value="0.1"
help="Width of each m/z bin for tensorization." />
<param name="intensity_scaling" type="float" label="Intensity Scaling" min="0" max="1" value="0.5"
help="Scaling factor applied to intensities during tensorization." />
<param name="fingerprint_type" type="text" value="daylight" label="Fingerprint Type"
help="The fingerprint type that should be used for tanimoto score calculations." />
<param name="fingerprint_nbits" type="integer" label="Fingerprint Number of Bits" value="2048"
help="The number of bits to use for the fingerprint." />
</section>


<section name="training_settings" title="Training Settings" expanded="false">
<param name="dropout_rate" type="float" label="Dropout Rate" value="0.0" />
<param name="learning_rate" type="float" label="Learning Rate" value="0.00025" />
<param name="epochs" type="integer" label="Epochs" value="250" />
<param name="dropout_rate" type="float" label="Dropout Rate" value="0.0"
help="Dropout rate to use during training to prevent overfitting." />
<param name="learning_rate" type="float" label="Learning Rate" value="0.00025"
help="Learning rate for the optimizer during training." />
<param name="epochs" type="integer" label="Epochs" value="250"
help="Number of training epochs." />
<param name="patience" type="integer" label="Patience" value="20" help="How long the model should keep training if validation does not improve" />
<param name="loss_function" type="select" label="Loss Function">
<option value="mse" selected="true">Mean Squared Error (mse)</option>
Expand All @@ -88,11 +109,23 @@
<option value="risk_mae">Risk Aware MAE (risk_aware_mae)</option>
<option value="risk_mse">Risk Aware MSE (risk_aware_mse)</option>
</param>
<param name="weighting_factor" type="integer" label="Weighting Factor" value="0" />
<param name="weighting_factor" type="integer" label="Weighting Factor" value="0"
help="Weighting factor for the loss function." />
<param name="batch_size" type="integer" value="32" label="Batch Size" help="Number of pairs per batch" />
<param name="average_pairs_per_bin" type="integer" value="20" label="Average pairs per bin" help="The aimed average number of pairs of spectra per spectrum in each bin." />
<!-- <param name="average_pairs_per_bin" type="integer" value="20" label="Average pairs per bin" help="The aimed average number of pairs of spectra per spectrum in each bin." /> -->
<param name="random_seed" type="text" label="Random seed" value="None" help="Specify random seed for reproducible random number generation." />
</section>

<section name="data_generator_settings" title="Data Generator Settings" expanded="false">
<param name="average_inchikey_sampling_count" type="integer" label="Average InChIKey Sampling Count" value="100"
help="The average number of inchikeys that should be sampled for each bin. This is used to create pairs of spectra." />
<param name="max_inchikey_sampling_count" type="integer" label="Max InChIKey Sampling Count" value="110"
help="The maximum number of inchikeys that should be sampled for each bin. This is used to create pairs of spectra." />
<param name="max_pairs_per_bin" type="integer" label="Max Pairs per Bin" value="200"
help="The maximum number of pairs of spectra that should be created for each bin. This is used to create pairs of spectra." />
<param name="max_pair_resampling" type="integer" label="Max Pair Resampling" value="10000000"
help="The maximum number of times a pair of spectra can be resampled. This is used to create pairs of spectra." />
</section>
</xml>

<xml name="citations">
Expand Down
45 changes: 31 additions & 14 deletions tools/ms2deepscore/ms2deepscore_config_generator.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="ms2deepscore_config_generator" name="MS2DeepScore Configuration Generator" version="@TOOL_VERSION@+galaxy0">
<tool id="ms2deepscore_config_generator" name="MS2DeepScore Configuration Generator" version="@TOOL_VERSION@+galaxy0" profile="23.0" license="MIT">
<description>Generates model parameters for MS2DeepScore in JSON format</description>
<macros>
<import>macros.xml</import>
Expand All @@ -13,7 +13,10 @@
<command detect_errors="exit_code"><![CDATA[
python3 ${python_wrapper}
]]></command>
<configfiles>

<expand macro="environment_variables" />

<configfiles>
<configfile name="python_wrapper">
import numpy as np
from typing import Optional
Expand All @@ -38,11 +41,13 @@ params = {
"mz_bin_width": $tensorization_settings.mz_bin_width,
"intensity_scaling": $tensorization_settings.intensity_scaling,
"batch_size": $training_settings.batch_size,
"average_pairs_per_bin": $training_settings.average_pairs_per_bin,
"same_prob_bins": np.array([(0, 0.2), (0.2, 1.0)]),
"random_seed": random_seed,
"fingerprint_type": "$tensorization_settings.fingerprint_type",
"fingerprint_nbits": $tensorization_settings.fingerprint_nbits
"fingerprint_nbits": $tensorization_settings.fingerprint_nbits,
"average_inchikey_sampling_count": $data_generator_settings.average_inchikey_sampling_count,
"max_inchikey_sampling": $data_generator_settings.max_inchikey_sampling_count,
"max_pairs_per_bin": $data_generator_settings.max_pairs_per_bin,
"max_pair_resampling": $data_generator_settings.max_pair_resampling,
}

settings = SettingsMS2Deepscore(**params)
Expand All @@ -55,19 +60,31 @@ settings.save_to_file("$output_file")
</inputs>

<outputs>
<data name="output_file" format="json" label="Model Parameter JSON" />
<data name="output_file" format="json" label="MS2DeepScore Model Configuration" />
</outputs>

<tests>
<test expect_num_outputs="1">
<param name="layers_0|dims" value="20"/>
<param name="layers_1|dims" value="20"/>
<param name="embedding_dim" value="15" />
<param name="ionisation_mode" value="negative" />
<param name="epochs" value="2" />
<param name="batch_size" value="2" />
<param name="average_pairs_per_bin" value="2" />
<param name="random_seed" value="42"/>
<section name="model_structure">
<repeat name="layers">
<param name="dims" value="20" />
</repeat>
<repeat name="layers">
<param name="dims" value="20" />
</repeat>
<param name="embedding_dim" value="15" />
<param name="ionisation_mode" value="negative" />
</section>
<section name="training_settings">
<param name="epochs" value="2" />
<param name="batch_size" value="2" />
<param name="random_seed" value="42"/>
<param name="patience" value="2"/>
</section>
<section name="data_generator_settings">
<param name="average_inchikey_sampling_count" value="0"/>
<param name="max_pair_resampling" value="100"/>
</section>
<output name="output_file" value="Model_Parameter_JSON.json" ftype="json" compare="diff" lines_diff="2"/>
</test>
</tests>
Expand Down
19 changes: 13 additions & 6 deletions tools/ms2deepscore/ms2deepscore_similarity.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="ms2deepscore_similarity" name="MS2DeepScore Similarity" version="@TOOL_VERSION@+galaxy0">
<tool id="ms2deepscore_similarity" name="MS2DeepScore Similarity" version="@TOOL_VERSION@+galaxy0" profile="23.0" license="MIT">
<description>Compute similarity scores using a pre-trained MS2DeepScore model</description>
<macros>
<import>macros.xml</import>
Expand All @@ -14,6 +14,9 @@
<command detect_errors="exit_code"><![CDATA[
python3 ${python_wrapper}
]]></command>

<expand macro="environment_variables"/>

<configfiles>
<configfile name="python_wrapper">
@init_logger@
Expand Down Expand Up @@ -71,16 +74,20 @@ scores.to_json("$similarity_scores")

<tests>
<test expect_num_outputs="1">
<param name="use_scores" value="False"/>
<param name="references" value="inp_filtered_library.msp" ftype="msp"/>
<param name="queries" value="inp_filtered_spectra.msp" ftype="msp"/>
<conditional name="scores">
<param name="use_scores" value="False"/>
<param name="references" value="inp_filtered_library.msp" ftype="msp"/>
<param name="queries" value="inp_filtered_spectra.msp" ftype="msp"/>
</conditional>
<param name="model" value="Trained_model.onnx" ftype="onnx"/>
<param name="model_param" value="Model_Parameter_JSON.json" ftype="json"/>
<output name="similarity_scores" value="msp_json_score_out.json" ftype="json" compare="sim_size" />
</test>
<test expect_num_outputs="1">
<param name="use_scores" value="True"/>
<param name="scores_in" value="ri_match_60.json" ftype="json"/>
<conditional name="scores">
<param name="use_scores" value="True"/>
<param name="scores_in" value="ri_match_60.json" ftype="json"/>
</conditional>
<param name="model" value="Trained_model.onnx" ftype="onnx"/>
<param name="model_param" value="Model_Parameter_JSON.json" ftype="json"/>
<output name="similarity_scores" value="usescore_json_score_out.json" ftype="json" compare="sim_size" />
Expand Down
15 changes: 9 additions & 6 deletions tools/ms2deepscore/ms2deepscore_training.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="ms2deepscore_training" name="MS2DeepScore Model Training" version="@TOOL_VERSION@+galaxy0">
<tool id="ms2deepscore_training" name="MS2DeepScore Model Training" version="@TOOL_VERSION@+galaxy0" profile="23.0" license="MIT">
<description>Compute similarity scores using a pre-trained MS2DeepScore model</description>
<macros>
<import>macros.xml</import>
Expand All @@ -16,6 +16,9 @@
cp $spectra processing/input."$spectra.ext";
python3 ${python_wrapper}
]]></command>

<expand macro="environment_variables"/>

<configfiles>
<configfile name="python_wrapper">
import onnx
Expand Down Expand Up @@ -63,15 +66,15 @@ torch.onnx.export(
</inputs>

<outputs>
<data label="Trained model" name="onnx_trained_model" format="onnx"/>
<data name="onnx_trained_model" format="onnx"/>
</outputs>

<tests>
<test expect_num_outputs="1">
<param name="spectra" value="clean_spectra.mgf" ftype="mgf"/>
<param name="model_param" value="Model_Parameter_JSON.json" ftype="json" />
<param name="validation_split_fraction" value="5"/>
<output name="onnx_trained_model" value="Trained_model.onnx" ftype="onnx" compare="sim_size"/>
<param name="spectra" ftype="msp" location='https://galaxy-umsa.grid.cesnet.cz/api/datasets/fa57798f4c6835f5/display?to_ext=msp'/>
<param name="model_param" value="Model_Parameter_JSON_test.json" ftype="json" />
<param name="validation_split_fraction" value="20"/>
<output name="onnx_trained_model" location='https://galaxy-umsa.grid.cesnet.cz/api/datasets/feaa5809758067ab/display?to_ext=onnx' ftype="onnx" compare="sim_size"/>
</test>
</tests>

Expand Down
49 changes: 49 additions & 0 deletions tools/ms2deepscore/test-data/Galaxy4-[Model Parameter JSON].json
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
"base_dims": [
2000
],
"embedding_dim": 400,
"ionisation_mode": "both",
"train_binning_layer": false,
"train_binning_layer_group_size": 20,
"train_binning_layer_output_per_group": 2,
"dropout_rate": 0.01,
"learning_rate": 0.00025,
"epochs": 10,
"patience": 5,
"loss_function": "mse",
"weighting_factor": 0,
"model_file_name": "ms2deepscore_model.pt",
"history_plot_file_name": "history.svg",
"time_stamp": "2025_06_12_17_53_28",
"min_mz": 10,
"max_mz": 1000,
"mz_bin_width": 0.1,
"intensity_scaling": 0.5,
"additional_metadata": [],
"batch_size": 64,
"num_turns": 1,
"shuffle": true,
"use_fixed_set": false,
"average_pairs_per_bin": 20,
"max_pairs_per_bin": 100,
"same_prob_bins": [
[
0.0,
0.2
],
[
0.2,
1.0
]
],
"include_diagonal": true,
"random_seed": null,
"fingerprint_type": "daylight",
"fingerprint_nbits": 2048,
"augment_removal_max": 0.2,
"augment_removal_intensity": 0.2,
"augment_intensity": 0.2,
"augment_noise_max": 10,
"augment_noise_intensity": 0.02
}
50 changes: 43 additions & 7 deletions tools/ms2deepscore/test-data/Model_Parameter_JSON.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,19 @@
],
"embedding_dim": 15,
"ionisation_mode": "negative",
"activation_function": "relu",
"train_binning_layer": false,
"train_binning_layer_group_size": 20,
"train_binning_layer_output_per_group": 2,
"dropout_rate": 0.0,
"learning_rate": 0.00025,
"epochs": 2,
"patience": 20,
"patience": 2,
"loss_function": "mse",
"weighting_factor": 0,
"model_file_name": "ms2deepscore_model.pt",
"history_plot_file_name": "history.svg",
"time_stamp": "2024_08_16_07_50_22",
"time_stamp": "2025_06_23_11_16_54",
"min_mz": 10,
"max_mz": 1000,
"mz_bin_width": 0.1,
Expand All @@ -26,20 +27,55 @@
"num_turns": 1,
"shuffle": true,
"use_fixed_set": false,
"average_pairs_per_bin": 2,
"max_pairs_per_bin": 100,
"average_inchikey_sampling_count": 0,
"max_inchikey_sampling": 110,
"max_pairs_per_bin": 200,
"same_prob_bins": [
[
0.0,
0.2
0.8,
0.9
],
[
0.2,
0.7,
0.8
],
[
0.9,
1.0
],
[
0.6,
0.7
],
[
0.5,
0.6
],
[
0.4,
0.5
],
[
0.3,
0.4
],
[
0.2,
0.3
],
[
0.1,
0.2
],
[
-0.01,
0.1
]
],
"include_diagonal": true,
"val_spectra_per_inchikey": 1,
"random_seed": 42,
"max_pair_resampling": 100,
"fingerprint_type": "daylight",
"fingerprint_nbits": 2048,
"augment_removal_max": 0.2,
Expand Down
Loading
Loading