-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathnextflow_schema.json
More file actions
289 lines (289 loc) · 13.5 KB
/
nextflow_schema.json
File metadata and controls
289 lines (289 loc) · 13.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://raw.githubusercontent.com/openscpca-nf/main/nextflow_schema.json",
"title": "openscpca-nf pipeline parameters",
"description": "A workflow to run modules from the OpenScPCA project.",
"type": "object",
"$defs": {
"input_and_output_locations": {
"title": "Input and Output Locations",
"type": "object",
"description": "",
"default": "",
"properties": {
"workflow": {
"type": "string",
"default": "default",
"options": ["default", "test", "simulate"],
"description": "Workflow to run, either 'default' for the standard workflow, 'test' for a workflow with parameters set for testing, or 'simulate' to run the workflow on simulated data. Default is 'default'."
},
"release_bucket": {
"type": "string",
"default": "s3://openscpca-data-release",
"format": "directory-path",
"description": "Base URI for input data from OpenScPCA",
"help_text": "Standard configurations will use an S3 bucket, but local paths can also be used."
},
"release_prefix": {
"type": "string",
"default": "2025-06-30",
"format": "directory-path",
"description": "Prefix for the specific release used as input"
},
"results_bucket": {
"type": "string",
"default": "s3://openscpca-nf-workflow-results-staging",
"description": "Base URI for results output",
"help_text": "Standard configurations will use an S3 bucket, but local paths can also be used."
},
"sim_bucket": {
"type": "string",
"default": "s3://openscpca-test-data-release-staging",
"description": "Base URI for simulated data output",
"help_text": "Standard configurations will use an S3 bucket, but local paths can also be used."
},
"annotations_bucket": {
"type": "string",
"default": "s3://openscpca-celltype-annotations-public-access",
"description": "Base URI for saving cell type annotations output",
"help_text": "Standard configurations will use an S3 bucket, but local paths can also be used."
},
"project": {
"type": "string",
"default": "all",
"description": "Projects that will be used in the workflow run",
"help_text": "Default is to run all projects. To run a single project or specified projects, include them as a list of SCPCP ids, separated by commas, semicolons, or spaces.",
"pattern": "^[aA][lL][lL]$|^(SCPCP\\d{6}([,; ]+|$))+$"
},
"gtf_file": {
"type": "string",
"default": "s3://scpca-nf-references/homo_sapiens/ensembl-104/annotation/Homo_sapiens.GRCh38.104.gtf.gz",
"description": "Path or URI to GTF file corresponding to reference genome build used with scpca-nf"
},
"cytoband_file": {
"type": "string",
"default": "ftp://hgdownload.cse.ucsc.edu/goldenPath/hg38/database/cytoBand.txt.gz",
"description": "Path or URI to cytoband file corresponding to reference genome build used with scpca-nf"
}
}
},
"containers": {
"title": "Containers",
"type": "object",
"description": "Locations for Docker images used by workflow processes",
"default": "",
"properties": {
"pullthrough_registry": {
"type": "string",
"description": "address of docker pull-through registry, if using",
"help_text": "If using a pull-through registry, this is the address that will be used as prefix, i.e., '<account_id>.dkr.ecr.<region>.amazonaws.com'."
},
"python_container": {
"type": "string",
"default": "docker.io/library/python:3.11"
},
"scpcatools_slim_container": {
"type": "string",
"default": "ghcr.io/alexslemonade/scpcatools-slim:v0.4.3"
},
"scpcatools_reports_container": {
"type": "string",
"default": "ghcr.io/alexslemonade/scpcatools-reports:v0.4.3"
},
"scpcatools_anndata_container": {
"type": "string",
"default": "ghcr.io/alexslemonade/scpcatools-anndata:v0.4.3"
},
"simulate_sce_container": {
"type": "string",
"default": "public.ecr.aws/openscpca/simulate-sce:v0.2.4"
},
"doublet_detection_container": {
"type": "string",
"default": "public.ecr.aws/openscpca/doublet-detection:v0.2.4",
"description": "Docker container for the doublet-detection module to run in"
},
"seurat_conversion_container": {
"type": "string",
"default": "public.ecr.aws/openscpca/seurat-conversion:v0.2.4",
"description": "Docker container for the seurat-conversion module to run in"
},
"consensus_cell_type_container": {
"type": "string",
"default": "public.ecr.aws/openscpca/cell-type-consensus:v0.2.4",
"description": "Docker container for the cell-type-consensus module to run in"
},
"cell_type_ewing_container": {
"type": "string",
"default": "public.ecr.aws/openscpca/cell-type-ewings:v0.2.4",
"description": "Docker container for the cell-type-ewings module to run in"
},
"cell_type_nb_04_container": {
"type": "string",
"default": "public.ecr.aws/openscpca/cell-type-neuroblastoma-04:v0.2.4"
},
"cell_type_scimilarity_container": {
"type": "string",
"default": "public.ecr.aws/openscpca/cell-type-scimilarity:v0.2.4"
}
}
},
"module_specific_parameters": {
"title": "Module-specific parameters",
"type": "object",
"description": "Parameters used by individual modules",
"default": "",
"properties": {
"merge_reuse": {
"type": "boolean",
"description": "Whether to reuse previous merge data output"
},
"merge_max_libraries": {
"type": "integer",
"default": 75,
"description": "Maximum number of libraries to merge into a single object"
},
"merge_hvg": {
"type": "integer",
"default": 2000,
"description": "Number of highly variable genes to use when performing dimensionality reduction on merged objects"
},
"cell_type_blueprint_ref_file": {
"type": "string",
"default": "https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/refs/tags/v0.2.4/analyses/cell-type-consensus/references/blueprint-mapped-ontologies.tsv",
"pattern": "\\.tsv$",
"format": "file-path",
"mimetype": "text/tab-separated-values",
"description": "BlueprintEncodeData cell type reference file"
},
"cell_type_panglao_ref_file": {
"type": "string",
"default": "https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/refs/tags/v0.2.4/analyses/cell-type-consensus/references/panglao-cell-type-ontologies.tsv",
"pattern": "\\.tsv$",
"format": "file-path",
"mimetype": "text/tab-separated-values",
"description": "PanglaoDB cell type reference file"
},
"cell_type_consensus_ref_file": {
"type": "string",
"default": "https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/refs/tags/v0.2.4/analyses/cell-type-consensus/references/consensus-cell-type-reference.tsv",
"pattern": "\\.tsv$",
"format": "file-path",
"mimetype": "text/tab-separated-values",
"description": "Consensus cell types reference file"
},
"cell_type_consensus_validation_marker_genes_file": {
"type": "string",
"default": "https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/refs/tags/v0.2.4/analyses/cell-type-consensus/references/validation-markers.tsv",
"pattern": "\\.tsv$",
"format": "file-path",
"mimetype": "text/tab-separated-values",
"description": "Table of marker genes for validation groups used to validate consensus cell types"
},
"cell_type_consensus_all_marker_genes_file": {
"type": "string",
"default": "https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/refs/tags/v0.2.4/analyses/cell-type-consensus/references/consensus-markers.tsv",
"pattern": "\\.tsv$",
"format": "file-path",
"mimetype": "text/tab-separated-values",
"description": "Table of marker genes for all consensus cell types"
},
"cell_type_ewings_auc_max_rank": {
"type": "integer",
"default": 425,
"description": "Number of detected genes to use as the threshold for the max AUC rank when running AUCell"
},
"cell_type_ewings_msigdb_list": {
"type": "string",
"default": "https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/refs/tags/v0.2.4/analyses/cell-type-ewings/references/msigdb-gene-sets.tsv",
"pattern": "\\.tsv$",
"format": "file-path",
"mimetype": "text/tab-separated-values",
"description": "Table of MSigDB gene sets"
},
"cell_type_ewings_ews_high_list": {
"type": "string",
"default": "https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/refs/tags/v0.2.4/analyses/cell-type-ewings/references/gene_signatures/aynaud-ews-targets.tsv",
"pattern": "\\.tsv$",
"format": "file-path",
"mimetype": "text/tab-separated-values",
"description": "Table with custom gene set expressed in EWS-FLI1 high tumor cells"
},
"cell_type_ewings_ews_low_list": {
"type": "string",
"default": "https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/refs/tags/v0.2.4/analyses/cell-type-ewings/references/gene_signatures/wrenn-nt5e-genes.tsv",
"pattern": "\\.tsv$",
"format": "file-path",
"mimetype": "text/tab-separated-values",
"description": "Table with custom gene set expressed in EWS-FLI1 low tumor cells"
},
"cell_type_ewings_marker_gene_file": {
"type": "string",
"default": "https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/refs/tags/v0.2.4/analyses/cell-type-ewings/references/tumor-cell-state-markers.tsv",
"pattern": "\\.tsv$",
"format": "file-path",
"mimetype": "text/tab-separated-values",
"description": "Table with custom gene sets specific to Ewing sarcoma tumor cells"
},
"cell_type_ewings_auc_thresholds_file": {
"type": "string",
"default": "${projectDir}/modules/cell-type-ewings/resources/auc-thresholds.tsv",
"pattern": "\\.tsv$",
"format": "file-path",
"mimetype": "text/tab-separated-values",
"description": "Table with AUC thresholds to use for each gene set to define cell states"
},
"cell_type_scimilarity_model": {
"type": "string",
"default": "s3://scpca-references/celltype/scimilarity_references/model_v1.1",
"format": "directory-path",
"description": "URI to SCimilarity model"
},
"cell_type_scimilarity_ontology_ref_file": {
"type": "string",
"default": "https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/refs/tags/v0.2.4/analyses/cell-type-scimilarity/references/scimilarity-mapped-ontologies.tsv",
"pattern": "\\.tsv$",
"format": "file-path",
"mimetype": "text/tab-separated-values",
"description": "TSV file with SCimilarity annotation labels and associated cell ontology identifiers"
},
"cell_type_nb_04_scanvi_pp_threshold": {
"type": "number",
"default": 0.75,
"description": "Posterior probability threshold for annotating cells with scANVI/scArches"
},
"cell_type_nb_04_label_map_file": {
"type": "string",
"default": "https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/refs/tags/v0.2.4/analyses/cell-type-neuroblastoma-04/references/nbatlas-label-map.tsv",
"description": "Path or URL to TSV mapping NBAtlas labels across levels of organization"
},
"cell_type_nb_04_validation_group_file": {
"type": "string",
"default": "https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/refs/tags/v0.2.4/analyses/cell-type-consensus/references/consensus-validation-groups.tsv",
"description": "Path or URL to TSV mapping consensus cell types to broad validation groups"
},
"cell_type_nb_04_ontology_map_file": {
"type": "string",
"default": "https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/refs/tags/v0.2.4/analyses/cell-type-neuroblastoma-04/references/nbatlas-ontology-ids.tsv",
"description": "Path or URL to TSV mapping NBAtlas labels to ontology ids"
},
"cell_type_nb_04_nbatlas_url": {
"type": "string",
"default": "https://data.mendeley.com/public-files/datasets/yhcf6787yp/files/f5969395-5f6e-4c5d-a61a-5894773d0fee/file_downloaded",
"description": "Path or URL to the released NBAtlas object"
}
}
}
},
"allOf": [
{
"$ref": "#/$defs/input_and_output_locations"
},
{
"$ref": "#/$defs/containers"
},
{
"$ref": "#/$defs/module_specific_parameters"
}
]
}