@@ -13,10 +13,55 @@ like to customize the rules:
1313https://docs.nextstrain.org/projects/nextclade/page/user/nextclade-cli.html
1414"""
1515
16+ rule pathoplexus_classify :
17+ """
18+ Pulls global lineage calls from Pathoplexus API
19+ """
20+ output :
21+ pathoplexus_tsv = "data/pathoplexus_results/global_lineages.tsv" ,
22+ params :
23+ URL = config ["pathoplexus" ]["URL" ],
24+ fields = config ["pathoplexus" ]["fields" ],
25+ accession_field = config ["pathoplexus" ]["accession_field" ],
26+ id_field = config ["curate" ]["output_id_field" ],
27+ shell :
28+ """
29+ curl "{params.URL}?dataFormat=TSV&downloadAsFile=false&fields={params.fields}" \
30+ | uniq \
31+ | csvtk -t rename -f {params.accession_field} -n {params.id_field} \
32+ > {output.pathoplexus_tsv}
33+ """
34+
35+ rule select_USA_potential_samples :
36+ """
37+ Select 1A or "unassigned" sequences from the USA
38+ """
39+ input :
40+ sequences = "results/sequences.fasta" ,
41+ pathoplexus_tsv = "data/pathoplexus_results/global_lineages.tsv" ,
42+ output :
43+ potential_1A_samples = "data/pathoplexus_results/potential_1A_samples.tsv" ,
44+ sequences = "data/potential_1A_sequences.fasta" ,
45+ params :
46+ id_field = config ["curate" ]["output_id_field" ],
47+ shell :
48+ """
49+ tsv-filter -H \
50+ --not-regex 'lineage:1B|[2,3,4,5,6,7,8]' \
51+ {input.pathoplexus_tsv} \
52+ > {output.potential_1A_samples}
53+
54+ augur filter \
55+ --sequences {input.sequences} \
56+ --metadata {output.potential_1A_samples} \
57+ --metadata-id-column {params.id_field} \
58+ --output-sequences {output.sequences}
59+ """
60+
1661rule nextclade_classify :
1762 #Classifies sequences into clades using Nextclade
1863 input :
19- sequences = "results/sequences .fasta" ,
64+ sequences = "data/potential_1A_sequences .fasta" ,
2065 dataset = config ["nextclade" ]["nextclade_dataset_path" ],
2166 output :
2267 nextclade_tsv = "data/nextclade_results/nextclade.tsv" ,
@@ -55,7 +100,7 @@ rule append_nextclade_columns:
55100 metadata = "data/raw_metadata.tsv" ,
56101 nextclade_subtypes = "data/nextclade_clades.tsv" ,
57102 output :
58- metadata_all = "results/metadata .tsv" ,
103+ metadata_all = "data/metadata_nextclade .tsv" ,
59104 params :
60105 id_field = config ["curate" ]["output_id_field" ],
61106 nextclade_field = config ["nextclade" ]["nextclade_field" ],
@@ -69,3 +114,28 @@ rule append_nextclade_columns:
69114 {input.metadata} \
70115 > {output.metadata_all}
71116 """
117+
118+ rule append_pathoplexus_columns :
119+ """
120+ Append the pathoplexus results to the metadata
121+ """
122+ input :
123+ metadata = "data/metadata_nextclade.tsv" ,
124+ pathoplexus_tsv = "data/pathoplexus_results/global_lineages.tsv" ,
125+ output :
126+ metadata = "results/metadata.tsv" ,
127+ params :
128+ id_field = config ["curate" ]["output_id_field" ],
129+ pathoplexus_field = config ["curate" ]["output_id_field" ],
130+ shell :
131+ r"""
132+ augur merge \
133+ --metadata \
134+ metadata={input.metadata:q} \
135+ pathoplexus={input.pathoplexus_tsv:q} \
136+ --metadata-id-columns \
137+ metadata={params.id_field:q} \
138+ pathoplexus={params.pathoplexus_field:q} \
139+ --output-metadata {output.metadata:q} \
140+ --no-source-columns
141+ """
0 commit comments