Skip to content

Commit 87cb790

Browse files
committed
run: support registered workflow Snakefile and configfile
Allow maintainers to define paths to each workflow's Snakefile and configfile in the nextstrain-pathogen.yaml. Using mpox as an example, this allows the named workflows to use the shared Snakefile and their own custom configfiles without needing to create separate Snakefiles for each workflow. ``` --- compatibility: nextstrain run: ingest: ~ phylogenetic/all-clades: snakefile: phylogenetic/Snakefile configfile: phylogenetic/defaults/mpxv/config.yaml phylogenetic/clade-I: snakefile: phylogenetic/Snakefile configfile: phylogenetic/defaults/clade-i/config.yaml phylogenetic/clade-IIb: snakefile: phylogenetic/Snakefile configfile: phylogenetic/defaults/hmpxv1/config.yaml phylogenetic/lineage-B.1: snakefile: phylogenetic/Snakefile configfile: phylogenetic/deafults/hmpxv1_big/config.yaml ```
1 parent bb36adf commit 87cb790

2 files changed

Lines changed: 44 additions & 9 deletions

File tree

nextstrain/cli/command/run.py

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -231,27 +231,42 @@ def run(opts):
231231
if opts.workflow not in pathogen.registered_workflows():
232232
print(f"The {opts.workflow!r} workflow is not registered as a compatible workflow, but trying to run anyways.")
233233

234-
workflow_directory = pathogen.workflow_path(opts.workflow)
234+
workflow_files = pathogen.workflow_files(opts.workflow)
235+
workflow_snakefile = workflow_files["snakefile"]
235236

236-
if not workflow_directory.is_dir() or not (workflow_directory / "Snakefile").is_file():
237+
if not workflow_snakefile.is_file():
237238
raise UserError(f"""
238-
No {opts.workflow!r} workflow for pathogen {opts.pathogen!r} found {f"in {str(workflow_directory)!r}" if DEBUGGING else "locally"}.
239+
No {opts.workflow!r} workflow for pathogen {opts.pathogen!r} found {f"(Snakefile {workflow_snakefile!r} does not exist)" if DEBUGGING else "locally"}.
239240
240241
Maybe you need to update to a newer version of the pathogen?
241242
242243
Hint: to update the pathogen, run `nextstrain update {shquote(pathogen.name)}`.
243244
""")
244245

246+
if workflow_configfile := workflow_files["configfile"]:
247+
assert workflow_configfile.is_file(), \
248+
f"Workflow's registered config file {workflow_configfile!r} does not exist."
249+
245250
# The pathogen volume is the pathogen directory (i.e. repo).
246-
# The workflow volume is the workflow directory within the pathogen directory.
247251
# The build volume is the user's analysis directory and will be the working directory.
248-
pathogen_volume, workflow_volume = build.pathogen_volumes(workflow_directory, name = "pathogen")
252+
pathogen_volume, _ = build.pathogen_volumes(pathogen.path, name = "pathogen")
249253
build_volume = NamedVolume("build", opts.analysis_directory)
250254

251255
# for containerized runtimes (e.g. Docker, Singularity, and AWS Batch)
252256
opts.volumes.append(pathogen_volume)
253257
opts.volumes.append(build_volume)
254258

259+
# Resolve paths for workflow files
260+
resolved_pathogen = (
261+
docker.mount_point(pathogen_volume)
262+
if opts.__runner__ in {docker, singularity, aws_batch} else
263+
pathogen_volume.src.resolve(strict = True)
264+
)
265+
resolved_snakefile = resolved_pathogen / workflow_snakefile.relative_to(pathogen.path)
266+
resolved_configfile = None
267+
if workflow_configfile:
268+
resolved_configfile = resolved_pathogen / workflow_configfile.relative_to(pathogen.path)
269+
255270
print(f"Running the {opts.workflow!r} workflow for pathogen {pathogen}")
256271

257272
# Set up Snakemake invocation.
@@ -276,10 +291,10 @@ def run(opts):
276291

277292
# Workdir will be the analysis volume (/nextstrain/build in a
278293
# containerized runtime), so explicitly point to the Snakefile.
279-
"--snakefile=%s/Snakefile" % (
280-
docker.mount_point(workflow_volume)
281-
if opts.__runner__ in {docker, singularity, aws_batch} else
282-
workflow_volume.src.resolve(strict = True)),
294+
"--snakefile=%s" % (resolved_snakefile),
295+
296+
*(["--configfile=%s" % (resolved_configfile)]
297+
if resolved_configfile else []),
283298

284299
# Pass thru appropriate resource options.
285300
#

nextstrain/cli/pathogens.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,26 @@ def workflow_path(self, workflow: str) -> Path:
326326
return self.path / workflow
327327

328328

329+
def workflow_files(self, workflow: str) -> Dict:
330+
"""
331+
Parses :attr:`.registration` to get the path to a *workflow* files,
332+
snakefile and configfile.
333+
"""
334+
files = {
335+
"snakefile": self.workflow_path(workflow) / "Snakefile",
336+
"configfile": None,
337+
}
338+
339+
if workflow_registration := self.registered_workflows().get(workflow):
340+
if snakefile := workflow_registration.get("snakefile"):
341+
files["snakefile"] = self.path / snakefile
342+
343+
if configfile := workflow_registration.get("configfile"):
344+
files["configfile"] = self.path / configfile
345+
346+
return files
347+
348+
329349
def setup(self, dry_run: bool = False, force: bool = False) -> SetupStatus:
330350
"""
331351
Downloads and installs this pathogen version from :attr:`.url`.

0 commit comments

Comments
 (0)