Skip to content

Commit 172b8bd

Browse files
committed
countfa v1.0, changed output of shuffler -f
1 parent 484f9c6 commit 172b8bd

5 files changed

Lines changed: 167 additions & 5 deletions

File tree

ChangeLog

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
2019-04-26 Benjamin Jean-Marie Tremblay <benjmtremblay@gmail.com>
2+
3+
* Added countfa v1.0
4+
* shuffler -f now inserts newlines every 80 characters
5+
16
2019-04-25 Benjamin Jean-Marie Tremblay <benjmtremblay@gmail.com>
27

38
* Added seqgen v1.0

Makefile

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
SRC := countlets.cpp klets.cpp shuffler.cpp shuffle_euler.cpp \
2-
shuffle_linear.cpp shuffle_markov.cpp seqgen.cpp
2+
shuffle_linear.cpp shuffle_markov.cpp seqgen.cpp countfa.cpp
33
OBJ_COUNTLETS := countlets.o klets.o
44
OBJ_SHUFFLER := shuffler.o klets.o shuffle_euler.o shuffle_linear.o \
55
shuffle_markov.o
66
OBJ_SEQGEN := seqgen.o
7+
OBJ_COUNTFA := countfa.o
78
CC := g++
89

910
all: build install
@@ -12,6 +13,10 @@ build:
1213
cd src;\
1314
$(CC) --std=c++11 -O2 -Wall -c $(SRC)
1415

16+
countfa:
17+
cd src;\
18+
$(CC) $(OBJ_COUNTFA) -o ../bin/countfa
19+
1520
countlets:
1621
cd src;\
1722
$(CC) $(OBJ_COUNTLETS) -o ../bin/countlets
@@ -27,7 +32,7 @@ seqgen:
2732
makebin:
2833
mkdir -p bin
2934

30-
install: makebin countlets shuffler seqgen
35+
install: makebin countfa countlets shuffler seqgen
3136

3237
clean:
3338
cd src;\

README

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,30 @@ Installation
1414

1515
The following binaries are created:
1616

17+
bin/countfa
1718
bin/countlets
1819
bin/seqgen
1920
bin/shuffler
2021

2122
Run these without any arguments or with the -h flag to see usage.
2223

2324

25+
countfa
26+
-------
27+
28+
Counts the number of characters per sequence in a fasta file. For each sequence,
29+
the name followed by the character count are returned to stdout.
30+
31+
Example usage:
32+
33+
echo ">1\nACAAG\n>2\nGCCCGGTTAT" | bin/countfa
34+
35+
>1
36+
5
37+
>2
38+
10
39+
40+
2441
countlets
2542
---------
2643

src/countfa.cpp

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
/*
2+
* Copyright (C) 2019 Benjamin Jean-Marie Tremblay
3+
*
4+
* This file is part of sequenceshuffler.
5+
*
6+
* sequenceshuffler is free software: you can redistribute it and/or modify
7+
* it under the terms of the GNU General Public License as published by
8+
* the Free Software Foundation, either version 3 of the License, or
9+
* (at your option) any later version.
10+
*
11+
* sequenceshuffler is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
* GNU General Public License for more details.
15+
*
16+
* You should have received a copy of the GNU General Public License
17+
* along with sequenceshuffler. If not, see <https://www.gnu.org/licenses/>.
18+
*
19+
*/
20+
21+
#include <iostream>
22+
#include <fstream>
23+
#include <unistd.h>
24+
#include <string>
25+
using namespace std;
26+
27+
void usage() {
28+
printf(
29+
"countfa v1.0 Copyright (C) 2019 Benjamin Jean-Marie Tremblay \n"
30+
" \n"
31+
" Usage: coutfa -i [filename] \n"
32+
" cat [filename] | coutfa \n"
33+
" \n"
34+
" -i <str> Input filename. File must be fasta-formatted. Alternatively, takes \n"
35+
" input from a pipe. \n"
36+
" -h Print usage and exit. \n"
37+
);
38+
}
39+
40+
void do_countfa(istream &input) {
41+
42+
string name, line;
43+
int counter{0};
44+
45+
while (getline(input, line).good()) {
46+
47+
if (line.empty() || line[0] == '>') {
48+
49+
if (!name.empty()) {
50+
cout << name << endl;
51+
name.clear();
52+
}
53+
if (!line.empty()) {
54+
name = line;
55+
}
56+
if (counter > 0) {
57+
cout << counter << endl;
58+
}
59+
counter = 0;
60+
61+
} else if (!name.empty()) {
62+
63+
if (line.find(' ') != string::npos) {
64+
line.erase(remove(line.begin(), line.end(), ' '), line.end());
65+
}
66+
67+
if (line.length() == 0) {
68+
name.clear();
69+
counter = 0;
70+
} else {
71+
counter += line.length();
72+
}
73+
74+
}
75+
76+
}
77+
78+
if (!name.empty()) {
79+
cout << name << endl;
80+
cout << counter << endl;
81+
}
82+
83+
}
84+
85+
int main(int argc, char **argv) {
86+
87+
int opt;
88+
bool has_file{false};
89+
ifstream seqfile;
90+
91+
while ((opt = getopt(argc, argv, "i:h")) != -1) {
92+
switch (opt) {
93+
case 'i': if (optarg) {
94+
seqfile.open(optarg);
95+
if (seqfile.bad()) {
96+
cerr << "Error: file not found" << endl;
97+
exit(EXIT_FAILURE);
98+
}
99+
has_file = true;
100+
}
101+
break;
102+
case 'h': usage();
103+
return 0;
104+
}
105+
}
106+
107+
if (!has_file) {
108+
if (isatty(STDIN_FILENO)) {
109+
usage();
110+
exit(EXIT_FAILURE);
111+
}
112+
do_countfa(cin);
113+
} else {
114+
do_countfa(seqfile);
115+
}
116+
117+
return 0;
118+
119+
}

src/shuffler.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ void usage() {
4141
" \n"
4242
" -i <str> Input filename. All white space will be removed. Alternatively, can \n"
4343
" take string input from a pipe. \n"
44-
" -o <str> Output filename. Alternatively, prints to stdout. \n"
44+
" -o <str> Output filename. Alternatively, prints to stdout. For fasta input, a\n"
45+
" newline is inserted every 80 characters. \n"
4546
" -k <int> K-let size. Defaults to 1. \n"
4647
" -s <int> RNG seed number. Defaults to time in seconds. \n"
4748
" -m Use the markov shuffling method. Defaults to euler. \n"
@@ -291,6 +292,7 @@ int main(int argc, char **argv) {
291292
}
292293

293294
for (int i = 0; i < fa_names.size(); ++i) {
295+
294296
vector<char> letters2(fa_seqs[i].begin(), fa_seqs[i].end());
295297
if (k >= letters2.size()) {
296298
cerr << "Error: sequence length must be greater than k" << endl;
@@ -300,9 +302,23 @@ int main(int argc, char **argv) {
300302
outletters = do_shuffle(letters2, k, gen, false, method_i);
301303

302304
if (has_out) {
303-
outfile << fa_names[i] << "\n" << outletters << "\n";
305+
outfile << fa_names[i] << endl;
306+
for (int j = 0; j < outletters.length(); ++j) {
307+
if (j % 80 == 0 && j != 0) {
308+
outfile << endl;
309+
}
310+
outfile << outletters[j];
311+
}
312+
outfile << endl;
304313
} else {
305-
cout << fa_names[i] << "\n" << outletters << "\n";
314+
cout << fa_names[i] << endl;
315+
for (int j = 0; j < outletters.length(); ++j) {
316+
if (j % 80 == 0 && j != 0) {
317+
cout << endl;
318+
}
319+
cout << outletters[j];
320+
}
321+
cout << endl;
306322
}
307323

308324
}

0 commit comments

Comments
 (0)