Skip to content

Commit 3556b10

Browse files
authored
Ingest: fetch and append pathoplexus global lineage calls #40
2 parents 20237ec + a347e7c commit 3556b10

4 files changed

Lines changed: 81 additions & 4 deletions

File tree

ingest/defaults/config.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,4 +131,9 @@ curate:
131131

132132
nextclade:
133133
nextclade_dataset_path: '../nextclade/dataset'
134-
nextclade_field: 'clade_membership'
134+
nextclade_field: 'clade_membership'
135+
136+
pathoplexus:
137+
URL: 'https://lapis.pathoplexus.org/west-nile/sample/details'
138+
fields: 'insdcAccessionBase,lineage'
139+
accession_field: 'insdcAccessionBase'

ingest/rules/nextclade.smk

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,55 @@ like to customize the rules:
1313
https://docs.nextstrain.org/projects/nextclade/page/user/nextclade-cli.html
1414
"""
1515

16+
rule pathoplexus_classify:
17+
"""
18+
Pulls global lineage calls from Pathoplexus API
19+
"""
20+
output:
21+
pathoplexus_tsv="data/pathoplexus_results/global_lineages.tsv",
22+
params:
23+
URL=config["pathoplexus"]["URL"],
24+
fields=config["pathoplexus"]["fields"],
25+
accession_field=config["pathoplexus"]["accession_field"],
26+
id_field=config["curate"]["output_id_field"],
27+
shell:
28+
"""
29+
curl "{params.URL}?dataFormat=TSV&downloadAsFile=false&fields={params.fields}" \
30+
| uniq \
31+
| csvtk -t rename -f {params.accession_field} -n {params.id_field} \
32+
> {output.pathoplexus_tsv}
33+
"""
34+
35+
rule select_USA_potential_samples:
36+
"""
37+
Select 1A or "unassigned" sequences from the USA
38+
"""
39+
input:
40+
sequences="results/sequences.fasta",
41+
pathoplexus_tsv="data/pathoplexus_results/global_lineages.tsv",
42+
output:
43+
potential_1A_samples="data/pathoplexus_results/potential_1A_samples.tsv",
44+
sequences="data/potential_1A_sequences.fasta",
45+
params:
46+
id_field=config["curate"]["output_id_field"],
47+
shell:
48+
"""
49+
tsv-filter -H \
50+
--not-regex 'lineage:1B|[2,3,4,5,6,7,8]' \
51+
{input.pathoplexus_tsv} \
52+
> {output.potential_1A_samples}
53+
54+
augur filter \
55+
--sequences {input.sequences} \
56+
--metadata {output.potential_1A_samples} \
57+
--metadata-id-column {params.id_field} \
58+
--output-sequences {output.sequences}
59+
"""
60+
1661
rule nextclade_classify:
1762
#Classifies sequences into clades using Nextclade
1863
input:
19-
sequences="results/sequences.fasta",
64+
sequences="data/potential_1A_sequences.fasta",
2065
dataset=config["nextclade"]["nextclade_dataset_path"],
2166
output:
2267
nextclade_tsv="data/nextclade_results/nextclade.tsv",
@@ -55,7 +100,7 @@ rule append_nextclade_columns:
55100
metadata="data/raw_metadata.tsv",
56101
nextclade_subtypes="data/nextclade_clades.tsv",
57102
output:
58-
metadata_all="results/metadata.tsv",
103+
metadata_all="data/metadata_nextclade.tsv",
59104
params:
60105
id_field=config["curate"]["output_id_field"],
61106
nextclade_field=config["nextclade"]["nextclade_field"],
@@ -69,3 +114,28 @@ rule append_nextclade_columns:
69114
{input.metadata} \
70115
> {output.metadata_all}
71116
"""
117+
118+
rule append_pathoplexus_columns:
119+
"""
120+
Append the pathoplexus results to the metadata
121+
"""
122+
input:
123+
metadata="data/metadata_nextclade.tsv",
124+
pathoplexus_tsv="data/pathoplexus_results/global_lineages.tsv",
125+
output:
126+
metadata="results/metadata.tsv",
127+
params:
128+
id_field=config["curate"]["output_id_field"],
129+
pathoplexus_field=config["curate"]["output_id_field"],
130+
shell:
131+
r"""
132+
augur merge \
133+
--metadata \
134+
metadata={input.metadata:q} \
135+
pathoplexus={input.pathoplexus_tsv:q} \
136+
--metadata-id-columns \
137+
metadata={params.id_field:q} \
138+
pathoplexus={params.pathoplexus_field:q} \
139+
--output-metadata {output.metadata:q} \
140+
--no-source-columns
141+
"""

phylogenetic/defaults/auspice_config.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
{"key": "state", "title": "State", "type": "categorical"},
88
{"key": "division", "title": "Division", "type": "categorical"},
99
{"key": "location", "title": "Location", "type": "categorical"},
10-
{"key": "lineage", "title": "Strain", "type": "categorical"},
10+
{"key": "lineage", "title": "Pathoplexus lineage", "type": "categorical"},
1111
{"key": "clade_membership", "title": "Clade", "type": "categorical"},
1212
{"key": "author", "title": "Authors", "type": "categorical"},
1313
{"key": "host", "title": "Host Species", "type": "categorical"}

phylogenetic/defaults/auspice_config_global.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
{"key": "num_date", "title": "Sampling Date", "type": "continuous"},
66
{"key": "region", "title": "Region", "type": "categorical"},
77
{"key": "country", "title": "Country", "type": "categorical"},
8+
{"key": "lineage", "title": "Pathoplexus lineage", "type": "categorical"},
9+
{"key": "clade_membership", "title": "Clade", "type": "categorical"},
810
{"key": "author", "title": "Authors", "type": "categorical"},
911
{"key": "host", "title": "Host Species", "type": "categorical"}
1012
],

0 commit comments

Comments
 (0)