Skip to content

Commit 8fcdbee

Browse files
committed
countfa only loads one char at a time
1 parent 7405600 commit 8fcdbee

2 files changed

Lines changed: 37 additions & 2 deletions

File tree

README

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@ countfa
2525
Counts the number of characters per sequence in a fasta file. For each sequence,
2626
the name followed by the character count are returned to stdout. The aim is to
2727
count sequence lengths without taking up too much memory. To this end, only one
28-
line is kept in memory at a time. Though this means that if the sequence is not
29-
split up with newlines, the entire thing will be loaded into memory.
28+
character is loaded into memory at a time.
3029

3130
Example usage:
3231

src/countfa.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,41 @@ void usage() {
3939

4040
void do_countfa(istream &input) {
4141

42+
bool at_name{false};
43+
int counter{0};
44+
char l;
45+
46+
while (input.get(l)) {
47+
48+
if (l == '>') {
49+
if (counter > 0) cout << counter << endl;
50+
at_name = true;
51+
counter = 0;
52+
}
53+
54+
if (l == '\n' && at_name) {
55+
at_name = false;
56+
cout << endl;
57+
}
58+
59+
if (at_name) cout << l;
60+
else {
61+
if (l != ' ' && l != '\n') ++counter;
62+
}
63+
64+
}
65+
66+
if (!at_name && counter > 0) cout << counter << endl;
67+
68+
return;
69+
70+
}
71+
72+
/*
73+
void do_countfa(istream &input) {
74+
75+
// this version is faster, but potentially loads entire sequence in memory
76+
4277
string name, line;
4378
int counter{0};
4479
@@ -81,6 +116,7 @@ void do_countfa(istream &input) {
81116
}
82117
83118
}
119+
*/
84120

85121
int main(int argc, char **argv) {
86122

0 commit comments

Comments
 (0)