-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSnakefile
More file actions
69 lines (60 loc) · 1.79 KB
/
Copy pathSnakefile
File metadata and controls
69 lines (60 loc) · 1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
"""
1. Download Nextclade dataset
2. Run Nextclade on the dataset reference
3. Move reference and translations to output folder
"""
import json
print(config)
rule all:
input:
artefacts="artefacts/" + config["output_dir"],
rule download_dataset:
output:
"dataset/pathogen.json",
reference="dataset/reference.fasta",
genome_annotation="dataset/genome_annotation.gff3",
params:
dataset_server=(
"--server " + config["dataset_server"]
if "dataset_server" in config
else ""
),
dataset_name=config["dataset_name"],
shell:
"""
nextclade dataset get \
--name {params.dataset_name} \
--output-dir dataset \
{params.dataset_server}
"""
rule run_nextclade:
input:
reference="dataset/reference.fasta",
genome_annotation="dataset/genome_annotation.gff3",
output:
translations=directory("output"),
reference="output/reference.fasta",
params:
translation_template=lambda w: f"output/{{cds}}.fasta",
shell:
"""
nextclade run \
--input-ref {input.reference} \
--input-annotation {input.genome_annotation} \
--output-translations {params.translation_template} \
-- {input.reference}
cp {input.reference} {output.reference}
"""
rule postprocess:
input:
translations="output",
output:
directory("artefacts/" + config["output_dir"]),
shell:
"""
mkdir -p {output}
for f in {input.translations}/*.fasta; do
# Remove the header, use single line sequence, and ensure no trailing newlines
seqkit seq -sw0 $f | tr -d '\n' > {output}/$(basename $f)
done
"""