-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.nf
More file actions
143 lines (121 loc) · 4.66 KB
/
main.nf
File metadata and controls
143 lines (121 loc) · 4.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
nextflow.enable.dsl = 2
params.project = null
params.qupath_bin = "/stornext/System/data/software/rhel/9/base/tools/QuPath/0.6.0/bin/QuPath"
params.script = "${projectDir}/bin/import_large_geojson.groovy"
params.geojson_dir = null
params.clear_existing = true
params.file_pattern = "{stem}.geojson"
params.resolve_hierarchy = true
params.outdir = "results"
params.publish_dir_mode = "copy"
params.validate_params = true
process IMPORT_LARGE_GEOJSON {
tag "${image_stem}"
label 'process_heavy'
publishDir "${params.outdir}", mode: params.publish_dir_mode
input:
tuple val(project_path), val(qupath_bin), val(script_path), val(geojson_dir), val(clear_existing), val(file_pattern), val(resolve_hierarchy), val(image_stem)
output:
path "*.log"
script:
def safeTag = image_stem.replaceAll('[^a-zA-Z0-9_.-]', '_')
"""
set -euo pipefail
if [[ ! -f "${project_path}" ]]; then
echo "ERROR: QuPath project not found: ${project_path}" >&2
exit 1
fi
if [[ ! -x "${qupath_bin}" ]]; then
echo "ERROR: QuPath binary is not executable: ${qupath_bin}" >&2
exit 1
fi
if [[ ! -f "${script_path}" ]]; then
echo "ERROR: Groovy script not found: ${script_path}" >&2
exit 1
fi
if [[ ! -d "${geojson_dir}" ]]; then
echo "ERROR: GeoJSON directory not found: ${geojson_dir}" >&2
exit 1
fi
export GEOJSON_DIR="${geojson_dir}"
export CLEAR_EXISTING="${clear_existing}"
export FILE_PATTERN="${file_pattern}"
export RESOLVE_HIERARCHY="${resolve_hierarchy}"
export IMAGE_STEM="${image_stem}"
"${qupath_bin}" script "${script_path}" --project "${project_path}" \
2>&1 | tee "qupath_geojson_import_${safeTag}.log"
"""
}
workflow {
if (!params.project) {
error "Missing required parameter: --project"
}
if (!params.geojson_dir) {
error "Missing required parameter: --geojson_dir"
}
def projectFile = file(params.project)
if (!projectFile.exists()) {
error "Project file does not exist: ${params.project}"
}
def qupathExe = file(params.qupath_bin)
if (!qupathExe.exists()) {
error "QuPath binary does not exist: ${params.qupath_bin}"
}
def geojsonDirFile = file(params.geojson_dir)
if (!geojsonDirFile.exists()) {
error "GeoJSON directory does not exist: ${params.geojson_dir}"
}
def scriptParam = params.script.toString()
def scriptCandidates = [
file(scriptParam),
file("${projectDir}/${scriptParam}")
]
def scriptFile = scriptCandidates.find { candidate -> candidate.exists() }
if (!scriptFile) {
error "Groovy script does not exist: ${params.script} (tried: ${scriptCandidates*.toString().join(', ')})"
}
def clearExistingParam = params.get('clear_existing', true) as boolean
def filePatternParam = params.get('file_pattern', '{stem}.geojson').toString()
def resolveHierarchyParam = params.get('resolve_hierarchy', true) as boolean
// Build regex to extract image stems from GeoJSON filenames.
// E.g. "{stem}.geojson" → regex "(.+)\.geojson"
def stemPattern = filePatternParam
.replace('.', '\\.')
.replace('{stem}', '(.+)')
// Scan geojson_dir for GeoJSON files, extract one stem per file,
// then launch a parallel QuPath process per image.
// NOTE: Channel.fromPath glob can silently return empty on some NFS/VAST
// filesystems; File.listFiles() is more reliable in that environment.
def geojsonFiles = geojsonDirFile.listFiles()
?.findAll { f -> f.name.endsWith('.geojson') || f.name.endsWith('.geojson.gz') }
?: []
if (geojsonFiles.isEmpty()) {
error "No .geojson or .geojson.gz files found in: ${params.geojson_dir}"
}
Channel
.from(geojsonFiles)
.map { f ->
// Try matching against the original filename first (e.g. file_pattern = "{stem}.geojson.gz"),
// then fall back to the .gz-stripped name (e.g. file_pattern = "{stem}.geojson").
def m = (f.name =~ /${stemPattern}/)
if (m.matches()) return m.group(1)
def stripped = f.name.replaceAll(/\.gz$/, '')
def m2 = (stripped =~ /${stemPattern}/)
m2.matches() ? m2.group(1) : null
}
.filter { it != null }
.unique()
.map { stem ->
tuple(
projectFile.toString(),
qupathExe.toString(),
scriptFile.toString(),
geojsonDirFile.toString(),
clearExistingParam,
filePatternParam,
resolveHierarchyParam,
stem
)
}
| IMPORT_LARGE_GEOJSON
}