Skip to content

Commit 331bdbc

Browse files
authored
Merge pull request #446 from nextstrain/andv
andv: initial addition
2 parents 41c473c + ca1b713 commit 331bdbc

48 files changed

Lines changed: 16003 additions & 9149 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

data/nextstrain/collection.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,9 @@
9797
"nextstrain/flu/h2n2/na",
9898
"nextstrain/flu/h2n2/mp",
9999
"nextstrain/flu/h2n2/ns",
100-
"nextstrain/wnv/all-lineages"
100+
"nextstrain/wnv/all-lineages",
101+
"nextstrain/orthohantavirus/andv/l",
102+
"nextstrain/orthohantavirus/andv/m",
103+
"nextstrain/orthohantavirus/andv/s"
101104
]
102105
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
## Unreleased
2+
3+
- initial release
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Andesvirus segment L dataset
2+
3+
| Key | Value |
4+
| :-- | :-- |
5+
| name | Andes virus segment L Tree |
6+
| authors | [Nextstrain](https://nextstrain.org) |
7+
| reference | NC_003468 |
8+
| workflow | https://github.com/nextstrain/andv/tree/main/nextclade |
9+
| path | `nextstrain/orthohantavirus/andv/l` |
10+
11+
12+
13+
## What are Nextclade datasets
14+
15+
Read more about Nextclade datasets in the Nextclade documentation: https://docs.nextstrain.org/projects/nextclade/en/stable/user/datasets.html
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
##gff-version 3
2+
#!gff-spec-version 1.21
3+
#!processor NCBI annotwriter
4+
##sequence-region NC_003468.2 1 6562
5+
##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=1980456
6+
NC_003468.2 RefSeq region 1 6562 . + . ID=NC_003468.2:1..6562;Dbxref=taxon:1980456;Name=L;gbkey=Src;genome=genomic;mol_type=genomic RNA;old-name=Andes virus;segment=L;strain=Chile-9717869
7+
NC_003468.2 RefSeq CDS 36 6497 . + 0 Name=RdRp;gbkey=CDS;locus_tag=ANDVsLgp1;protein_id=NP_604473.1;product=RNA polymerase;ID=cds-NP_604473.1;Dbxref=GenBank:NP_604473.1,GeneID:991234
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
{
2+
"$schema": "https://raw.githubusercontent.com/nextstrain/nextclade/refs/heads/release/packages/nextclade-schemas/input-pathogen-json.schema.json",
3+
"files": {
4+
"reference": "reference.fasta",
5+
"pathogenJson": "pathogen.json",
6+
"genomeAnnotation": "genome_annotation.gff3",
7+
"treeJson": "tree.json",
8+
"examples": "sequences.fasta",
9+
"readme": "README.md",
10+
"changelog": "CHANGELOG.md"
11+
},
12+
"schemaVersion": "3.0.0",
13+
"defaultCds": "RdRp",
14+
"attributes": {
15+
"name": "Andesvirus (segment L)",
16+
"reference name": "Chile-9717869",
17+
"reference accession": "NC_003468"
18+
},
19+
"experimental": true,
20+
"alignmentParams": {
21+
"penaltyGapExtend": 1,
22+
"penaltyGapOpen": 8,
23+
"penaltyGapOpenInFrame": 9,
24+
"penaltyGapOpenOutOfFrame": 10,
25+
"penaltyMismatch": 1,
26+
"scoreMatch": 5,
27+
"retryReverseComplement": true,
28+
"allowedMismatches": 10,
29+
"minSeedCover": 0.12,
30+
"minLength": 100
31+
},
32+
"qc": {
33+
"missingData": {
34+
"enabled": true,
35+
"missingDataThreshold": 200,
36+
"scoreBias": 100
37+
},
38+
"mixedSites": {
39+
"enabled": true,
40+
"mixedSitesThreshold": 4
41+
},
42+
"frameShifts": {
43+
"enabled": true
44+
},
45+
"stopCodons": {
46+
"enabled": true
47+
},
48+
"privateMutations": {
49+
"enabled": true,
50+
"cutoff": 150,
51+
"typical": 100,
52+
"weightLabeledSubstitutions": 1,
53+
"weightReversionSubstitutions": 1,
54+
"weightUnlabeledSubstitutions": 1
55+
},
56+
"snpClusters": {
57+
"enabled": true,
58+
"clusterCutOff": 15,
59+
"scoreWeight": 50,
60+
"windowSize": 50
61+
}
62+
}
63+
}
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
>NC_003468.2 Andes virus segment L, complete genome
2+
TAGTAGTAGACTCCGGGATAGAAAAAGTTAGAAAAATGGAAAAGTATAGAGAGATTCATC
3+
AGAGAGTTAGGGACCTTGCACCTGGAACGGTATCAGCATTAGAATGCATAGATCTACTGG
4+
ATAGGCTCTACGCTGTCAGACATGACCTGGTTGACCAGATGATAAAACATGACTGGTCTG
5+
ATAATAAAGATGTAGAAAGACCTATAGGTCAAGTTTTACTGATGGCTGGCATACCTAATG
6+
ATATTATACAAGGCATGGAGAAGAAGATTATACCAAATAGCCCTTCTGGACAAGTATTGA
7+
AAAGCTTTTTCCGAATGACACCAGATAATTATAAAATTACAGGTAACTTGATTGAGTTTA
8+
TTGAAGTGACTGTAACAGCTGATGTGTCACGAGGTATTAGGGAGAAGAAAATAAAGTATG
9+
AAGGAGGCCTCCAATTTGTTGAGCACTTACTGGAAACTGAATCAAGGAAGGGTAATATAC
10+
CGCAACCTTATAAAATAACATTCTCAGTGGTTGCAGTTAAAACAGATGGATCAAACATCT
11+
CGACTCAGTGGCCCAGTCGGAGGAACGATGGGGTAGTTCAGCACATGCGTCTAGTCCAAG
12+
CTGATATAAATTATGTCAGAGAGCATTTAATAAAGTTAGATGAGAGAGCATCTTTGGAGG
13+
CAATGTTTAACTTAAAGTTCCATGTATCAGGCCCTAAACTGAGATACTTTAACATCCCTG
14+
ATTATAGACCACAGCAGCTATGTGAACCACGGATTGACAACTTAATACAATATTGCAAGA
15+
ATTGGTTGACAAAAGAACATAAGTTTGTATTCAAAGAAGTCAGTGGAGCTAATGTGATTC
16+
AAGCATTTGAGAGTCATGAACAGTTACATTTACAGAAATACAACGAATCACGAAAACCAA
17+
GAAATTTTTTACTCTTGCAGCTTACAGTGCAAGGGGCATATCTACCATCAACAATCAGTT
18+
CTGACCAGTGCAATACTAGGATTGGGTGTCTAGAAATATCAAAAAACCAACCAGAAACAC
19+
CAGTACAGATGCTTGCATTGGATATATCTTATAAGTATCTGAGTCTTACAAGGGATGAGT
20+
TGATCAATTATTATAGCCCTAGAGTGCACTTTCAATCGAGCCCTAATGTGAAGGAACCAG
21+
GGACACTGAAGTTAGGATTATCACAATTAAATCCACTCTCTAAATCAATTCTTGACAATG
22+
TTGGAAAGCATAAAAAGGATAAAGGATTATTTGGTGAGATCATAGATAGCATAAATGTGG
23+
CAAGTCAAATACAGATCAATGCATGTGCAAAAATAATTGAGCAGATCTTATCAAATCTTG
24+
AAATAAACATTGGAGAAATAAATGCTAGTATGCCTTCTCCTAATAAGACAACAGGTGTAG
25+
ATGACCTGTTAAATAAATTTTATGATAATGAGCTTGGTAAATATATGTTATCCATTCTGA
26+
GGAAAACAGCAGCATGGCATATAGGCCATCTAGTCAGAGATATCACAGAAAGTTTAATTG
27+
CACATGCTGGGCTGCGCCGTTCTAAATATTGGTCAGTACATGCATATGACCATGGGAATG
28+
TAATTTTGTTTATCTTGCCATCAAAGTCACTAGAGGTAGTAGGTTCTTATATAAGGTATT
29+
TCACAGTATTTAAAGATGGTATAGGGTTGATAGACGCAGATAATATTGATTCTAAGGCCG
30+
AAATTGATGGTGTCACCTGGTGTTATTCTAAGGTCATGAGTATTGATTTAAACAGGTTAT
31+
TGGCTTTGAACATAGCTTTTGAGAAGTCACTTCTTGCCACGGCTACATGGTTCCAATATT
32+
ATACTGAAGACCAAGGCCATTTTCCCCTTCAACATGCATTAAGGTCAATCTTTTCTTTCC
33+
ACTTTTTACTCTGTGTGTCACAAAAGATGAAGCTATGTGCAATATTTGATAACCTTCGTT
34+
ATCTGATACCATCAGTAACATCTTTGTACTCTGGGTACGAGTTGTTAATAGAAAAATTCT
35+
TTGAGAGACCATTTAAGAGTTCACTGGATGTATACCTTTATTCTATCATAAAATCTCTAT
36+
TAATTAGTTTGGCACAAAATAATAAAGTTCGATTTTACTCAAGAGTTCGTTTGTTAGGAT
37+
TGACAGTTGATCACTCCACGGTCGGAGCAAGTGGTGTTTACCCCTCTTTAATGTCCCGTG
38+
TTGTTTACAAACATTACAGAAGTTTAATCTCTGAAGCTACAACTTGTTTTTTCTTATTTG
39+
AAAAGGGTTTGCATGGGAATTTACCAGAAGAGGCTAAAATACATCTTGAAACCATTGAAT
40+
GGGCTCGGAAGTTCCAGGAGAAAGAAAAACAATATGGTGATATTCTTCTAAAGGAAGGCT
41+
ATACAATTGAATCTGTAATCAATGGAGAAGTTGATGTAGAACAACAGCTTTTTTGTCAGG
42+
AGGTCTCAGAGCTAAGTGCACAAGAGCTCAACAAATATTTACAGGCAAAATCTCAAGTTT
43+
TATGTGCTAATATCATGAATAAACACTGGGACAAACCATATTTCAGTCAAACACGCAATA
44+
TCAGTCTCAAGGGAATGTCTGGGGCATTGCAAGAGGATGGACATTTAGCTGCTAGTGTGA
45+
CACTGATTGAAGCAATTAGGTTTTTAAATAGATCACAAACCAATCCAAATGTTATTGATA
46+
TGTATGAGCAGACTAAACAATCAAAGGCACAAGCTAGGATTGTTAGGAAATACCAGAGAA
47+
CAGAAGCAGATAGAGGATTTTTTATCACAACATTACCAACTAGGGTGCGATTAGAAATAA
48+
TAGAAGATTATTTCGATGCAATTGCAAAGGTTGTGCCTGAAGAATATATTTCTTATGGTG
49+
GGGATAAAAAAGTTCTAAATATTCAGAATGCACTAGAGAAAGCACTTAGATGGGCATCTG
50+
GAGTATCAGAAATTACAACAAGCACTGGTAAAAGCATCAAGTTTAAGCGGAAATTAATGT
51+
ATGTTAGTGCTGATGCCACAAAATGGTCACCAGGAGATAATTCTGCTAAGTTTAGGAGAT
52+
TTACACAAGCAATATATGATGGCTTATCAGACAACAAACTGAAATGTTGTGTTGTTGATG
53+
CATTACGTAACATTTATGAGACTGAATTTTTTATGTCCAGGAAATTACACCGATATATTG
54+
ATAGTATGGAAAATCATTCAGATGCGGTTGAAGATTTCTTGGCATTTTTCTCAAATGGAG
55+
TCTCAGCCAATGTAAAGGGAAACTGGCTTCAAGGGAACTTAAATAAATGCTCATCATTAT
56+
TTGGTGCTGCTGTCTCATTACTTTTTCGGGAGGTCTGGAAACAATTGTTTCCAGAATTAG
57+
AGTGTTTTTTTGAATTTGCACATCATTCAGATGATGCATTGTTCATTTATGGCTATCTGG
58+
AGCCTGAAGATGATGGAACAGATTGGTTTTTGTATGTATCACAGCAGATACAGGCAGGAA
59+
ACTTTCATTGGCATGCTATAAATCAAGAGATGTGGAAGAGCATGTTTAATCTACATGAGC
60+
ACTTACTATTAATGGGTTCTATTAAAGTGTCACCTAAGAAGACAACAGTATCACCTACTA
61+
ATGCAGAATTTCTTTCTACTTTTTTTGAAGGTTGTGCTGTGTCAATCCCTTTTGTTAAAA
62+
TCTTACTGGGTTCATTATCAGATCTTCCTGGGTTAGGTTTCTTTGATGATTTAGCAGCAG
63+
CACAAAGCAGATGTGTAAAGTCACTAGATTTGGGTGCTTGCCCACAATTAGCTCAACTAG
64+
CTATAGTATTATGCACAAGCAAAGTTGAGAGGTTGTATGGTACTGCTGATGGAATGGTAA
65+
ACTCTCCAACAGCATTCCTTAAGGTGAATAAAGCACACGTACCAGTACCACTTGGTGGTG
66+
ATGGCTCAATGTCTATTATGGAGCTTGCAACAGCTGGTTTTGGGATGGCAGATAAGAATA
67+
TTTTAAAAAATGCATTCATATCTTATAAGCATACTCGTAGAGATGGTGATAGGTACGTAT
68+
TGGGTTTATTTAAATTTTTGATGTCATTAAGTGAGGATGTATTCCAGCACGACCGATTGG
69+
GTGAGTTTAGTTTTGTAGGTAAAGTTCAATGGAAAGTGTTCACTCCTAAAGCTGAATTTG
70+
AATTTCATGATCAATTTTCACATAATTATTTATTAGAGTGGACACGTCAACATCCTGTGT
71+
ATGACTATATTATTCCTAGAAATAGAGATAATTTGCTTGTATACCTTGTAAGAAAGTTGA
72+
ATGATCCTAGCATCATTACAGCTATGACTATGCAGTCACCATTACAACTTCGTTTCCGTA
73+
TGCAAGCAAAGCAACATATGAAAGTATGCCGGTATGAAGGTGAATGGGTCACATTCAGGG
74+
AGGTACTTGCTGCAGCTGATAGTTTTGCTACGAGTTACCAACCTACTGAAAGGGACATGG
75+
ATCTCTTTAATACACTTGTAAGTTGTACATTTTCTAAAGAGTATGCTTGGAAAGACTTTT
76+
TAAATGAAGTAAGGTGTGAGGTCTTAACAACAAGACATGTACATAGGCCTAAAATTGCTA
77+
GGACATTCACTGTTAGAGAAAAGGACCAGGCTATACAAAATCCAATAAATTCGGTGATTG
78+
GCTATAAGTATGCTCTTACAGTGGATGAAGTCAGTGATGTTCTTGATAGTGCATTCTTCC
79+
CAGAGTCTCTATCTGCAGACTTACAGGTTATGAAAGATGGAGTTTACAGAGAATTAGGAC
80+
TTGATATAAGTTCTCCTGAAGTCCTAAAACGCATAGCACCACTATTATATAAGGCAGGAA
81+
GGTCACGTGTTGTTATTGTGGAAGGAAATGTAGAAGGGACAGCTGAGTCAATCTGTAGTT
82+
ATTGGCTCAAGACAATGTCACTGATTAAAACAATCAGAGTAAGACCTAAGAAGGAGGTAC
83+
TGAAAGCTATGTCTTTATATAGTGTTAAAGAAAATATTGGATTGCAGGATGATATTGCAG
84+
CAACTCGACTATGCATAGAAATCTGGAGATGGTGTAAGGCAAATGAACAGGATGTTAAAG
85+
AATGGCTAACATCTCTGTACTTTGAAAAACAGACATTGATGGATTGGGTAGAAAGGTTTA
86+
GAAGGAAAGGAGTTGTTCCTATTGATCCTGAAATACAATGTATTGGCCTACTCTTATATG
87+
ATGTATTAGGTTATAAAAGTGTGTTACAAATGCAAGCAAACCGAAGAGCCTATTCAGGTA
88+
AGCAATATGATGCATACTGTGTGCAAACATATAACGAGGAAACAAAACTATATGAAGGTG
89+
ACCTTCGTGTTACTTTTAATTTTGGTTTAGATTGTGCAAGGTTAGAAGTTTTTTGGGATA
90+
AAAAAGAGTATATCTTAGAGACATCTATCACCCAACGACATGTGTTGCGGTTACTGATGG
91+
AAGAAGTGTCACAAGAATTAATTAGATGTGGAATGAGATTCAAAACAGAGCAAGTCAATC
92+
AAACTCGGAGCTTAGTGTTATTCAAAACAGAGGCTGGTTTTGAATGGGGTAAGCCTAATG
93+
TGCCATGTATTGTATATAAACACTGTGTCTTGAGAACTGGGCTTCGTACGAAACAGCCAA
94+
TTAATAAAGAGTTCATGATAAATGTACAAAGTGATGGTTTCCGTGCAATAGCACAGATGG
95+
ATATTGAGAGTCCACGGTTCTTGTTAGCACATGCATATCATACACTGCGTGATATTAGAT
96+
ATCAAGCAGTGCAGGCAGTAGGGAATGTATGGTTTAAAACAGAACAGCACAAACTATTTA
97+
TTAACCCAATTATATCATCAGGGCTTTTAGAAAACTTTATGAAAGGCTTACCTGCTGCCA
98+
TACCTCCTGCTGCATATTCCCTCATAATGAACAAGGCTAAGATTTCTGTGGATTTGTTTA
99+
TGTTCAATGAGCTATTAGCACTTATAAATAGGAATAATATCCTCAACCTTGATGGGATTG
100+
AAGAAACATCTGAAGGTTATAGTACTGTGACATCAATGTCTAGCAAGCAGTGGTCTGAAG
101+
AGATGAGTTTAATGTCTGATGATGATATTGATGATATGGAGGACTTTACTATAGCACTGG
102+
ATGATATTGACTTTGAACAAATAAATTTGGAAGAGGATATACAACACTTTCTGCAGGATG
103+
AATCAGCATATGTTGGTGATTTATTGATTCAGACAGAAGACATTGAGGTTAAAAAGATAC
104+
GTGGGGTGACAAGAGTATTAGAGCCAGTCAAGCTATTAAAAAGCTGGGTTTCTAAAGGCC
105+
TTGCTATAGACAAAGTATACAATCCTATCGGGATAATCTTAATGGCAAGATACATGTCAA
106+
AAACATACAATTTCAGTTCAACACCTCTTGCACTATTAAATCCATATGACTTGACAGAAC
107+
TTGAAAGTGTTGTAAAGGGATGGGGAGAAACTGTAAATGATCGATTCAAAGATTTAGATA
108+
TTGAGGCACAAACAGTTGTTAAAGAAAAGGGTGTACAGCCAGAAGATGTACTCCCTGATT
109+
CATTATTCTCTTTCAGGCATGTTGATGTTTTGCTGCGAAGGTTGTTCCCGCGTGACCCTG
110+
TATCAACATTCTATTAGTGGATTTTATACCTTATTCATACAGTATGTATATTGTAGTGTT
111+
CTTTTCCCGGAGCATACTACTA

0 commit comments

Comments
 (0)