Skip to content

Commit 1deaceb

Browse files
committed
Add support for nextstrain run
Workflow changes to support running the workflows with the `nextstrain run` command. I specifically chose _not_ to declare compatibility for `nextstrain run` in the `nextstrain-pathogen.yaml` because the phylogenetic and nextclade workflows are incomplete in this guide. Once workflow specific compatibility is supported,¹ we can declare compatibility for the ingest workflow. ¹ <nextstrain/cli#472>
1 parent 4c8872c commit 1deaceb

5 files changed

Lines changed: 51 additions & 22 deletions

File tree

ingest/Snakefile

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@ and defines its default outputs.
55
# Utility functions shared across all workflows.
66
include: "../shared/vendored/snakemake/config.smk"
77

8-
# The workflow filepaths are written relative to this Snakefile's base directory
9-
workdir: workflow.current_basedir
8+
# Use default configuration values. Extend with Snakemake's --configfile/--config options.
9+
configfile: os.path.join(workflow.basedir, "defaults/config.yaml")
10+
11+
# Use custom configuration from analysis directory (i.e. working dir), if any.
12+
if os.path.exists("config.yaml"):
13+
configfile: "config.yaml"
1014

11-
# Use default configuration values. Override with Snakemake's --configfile/--config options.
12-
configfile: "defaults/config.yaml"
1315

1416
# This is the default rule that Snakemake will run when there are no specified targets.
1517
# The default output of the ingest workflow is usually the curated metadata and sequences.
@@ -74,4 +76,10 @@ else:
7476
if "custom_rules" in config:
7577
for rule_file in config["custom_rules"]:
7678

77-
include: rule_file
79+
# Relative custom rule paths in the config are relative to the analysis
80+
# directory (i.e. the current working directory, or workdir, usually
81+
# given by --directory), but the "include" directive treats relative
82+
# paths as relative to the workflow (e.g. workflow.current_basedir).
83+
# Convert to an absolute path based on the analysis/current directory
84+
# to avoid this mismatch of expectations.
85+
include: os.path.join(os.getcwd(), rule_file)

ingest/defaults/config.yaml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,10 @@ ncbi_datasets_fields:
3535

3636
# Config parameters related to the curate pipeline
3737
curate:
38-
# The path to the local geolocation rules within the pathogen repo
39-
# The path should be relative to the ingest directory.
40-
local_geolocation_rules: "defaults/geolocation_rules.tsv"
38+
# The path to the local geolocation rules for this pathogen.
39+
# The path should be relative to the working directory (e.g. --directory).
40+
# If the path doesn't exist in the working directory, the file in the workflow's defaults/ directory it used instead (if it exists).
41+
local_geolocation_rules: "geolocation_rules.tsv"
4142
# List of field names to change where the key is the original field name and the value is the new field name
4243
# The original field names should match the ncbi_datasets_fields provided above.
4344
# This is the first step in the pipeline, so any references to field names in the configs below should use the new field names
@@ -90,8 +91,9 @@ curate:
9091
# Name to use for the generated abbreviated authors field
9192
abbr_authors_field: "authors"
9293
# Path to the manual annotations file
93-
# The path should be relative to the ingest directory
94-
annotations: "defaults/annotations.tsv"
94+
# The path should be relative to the working directory (e.g. --directory).
95+
# If the path doesn't exist in the working directory, the file in the workflow's defaults/ directory it used instead (if it exists).
96+
annotations: "annotations.tsv"
9597
# The ID field in the metadata to use to merge the manual annotations
9698
annotations_id: "accession"
9799
# The ID field in the metadata to use as the sequence id in the output FASTA file

ingest/rules/curate.smk

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ def format_field_map(field_map: dict[str, str]) -> list[str]:
3232
rule curate:
3333
input:
3434
sequences_ndjson="data/ncbi.ndjson",
35-
geolocation_rules=config["curate"]["local_geolocation_rules"],
36-
annotations=config["curate"]["annotations"],
35+
geolocation_rules=resolve_config_path(config["curate"]["local_geolocation_rules"]),
36+
annotations=resolve_config_path(config["curate"]["annotations"]),
3737
output:
3838
metadata="data/all_metadata.tsv",
3939
sequences="results/sequences.fasta",

nextclade/Snakefile

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22
This is the main Nextclade Snakefile that orchestrates the workflow to produce
33
a Nextclade dataset.
44
"""
5-
# The workflow filepaths are written relative to this Snakefile's base directory
6-
workdir: workflow.current_basedir
5+
# Use default configuration values. Extend with Snakemake's --configfile/--config options.
6+
configfile: os.path.join(workflow.basedir, "defaults/config.yaml")
77

8-
# Use default configuration values. Override with Snakemake's --configfile/--config options.
9-
configfile: "defaults/config.yaml"
8+
# Use custom configuration from analysis directory (i.e. working dir), if any.
9+
if os.path.exists("config.yaml"):
10+
configfile: "config.yaml"
1011

1112
# This is the default rule that Snakemake will run when there are no specified targets.
1213
# The default output of the Nextclade workflow is usually the produced Nextclade dataset.
@@ -17,6 +18,10 @@ rule all:
1718
# Fill in paths to the final exported Nextclade dataset.
1819

1920

21+
# Shared Snakemake files with generic functions are shared across pathogens
22+
# Use `resolve_config_path` to resolve file paths for config files
23+
include: "../shared/vendored/snakemake/config.smk"
24+
2025
# These rules are imported in the order that they are expected to run.
2126
# Each Snakefile will have documented inputs and outputs that should be kept as
2227
# consistent interfaces across pathogen repos. This allows us to define typical
@@ -46,4 +51,10 @@ include: "rules/export.smk"
4651
if "custom_rules" in config:
4752
for rule_file in config["custom_rules"]:
4853

49-
include: rule_file
54+
# Relative custom rule paths in the config are relative to the analysis
55+
# directory (i.e. the current working directory, or workdir, usually
56+
# given by --directory), but the "include" directive treats relative
57+
# paths as relative to the workflow (e.g. workflow.current_basedir).
58+
# Convert to an absolute path based on the analysis/current directory
59+
# to avoid this mismatch of expectations.
60+
include: os.path.join(os.getcwd(), rule_file)

phylogenetic/Snakefile

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,12 @@
22
This is the main phylogenetic Snakefile that orchestrates the full phylogenetic
33
workflow and defines its default output(s).
44
"""
5-
# The workflow filepaths are written relative to this Snakefile's base directory
6-
workdir: workflow.current_basedir
5+
# Use default configuration values. Extend with Snakemake's --configfile/--config options.
6+
configfile: os.path.join(workflow.basedir, "defaults/config.yaml")
77

8-
# Use default configuration values. Override with Snakemake's --configfile/--config options.
9-
configfile: "defaults/config.yaml"
8+
# Use custom configuration from analysis directory (i.e. working dir), if any.
9+
if os.path.exists("config.yaml"):
10+
configfile: "config.yaml"
1011

1112

1213
# This is the default rule that Snakemake will run when there are no specified targets.
@@ -21,6 +22,7 @@ rule all:
2122

2223

2324
# Shared Snakemake files with generic functions are shared across pathogens
25+
# Use `resolve_config_path` to resolve file paths for config files
2426
include: "../shared/vendored/snakemake/config.smk"
2527
include: "../shared/vendored/snakemake/remote_files.smk"
2628

@@ -54,4 +56,10 @@ include: "rules/export.smk"
5456
if "custom_rules" in config:
5557
for rule_file in config["custom_rules"]:
5658

57-
include: rule_file
59+
# Relative custom rule paths in the config are relative to the analysis
60+
# directory (i.e. the current working directory, or workdir, usually
61+
# given by --directory), but the "include" directive treats relative
62+
# paths as relative to the workflow (e.g. workflow.current_basedir).
63+
# Convert to an absolute path based on the analysis/current directory
64+
# to avoid this mismatch of expectations.
65+
include: os.path.join(os.getcwd(), rule_file)

0 commit comments

Comments
 (0)