Skip to content

Commit 14a5f18

Browse files
committed
int->long
1 parent 11fb515 commit 14a5f18

9 files changed

Lines changed: 75 additions & 74 deletions

File tree

ChangeLog

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
2019-05-04 Benjamin Jean-Marie Tremblay <benjmtremblay@gmail.com
22

3+
* converted a lot of int variables to long
34
* faster char to int conversion in klets.cpp
45
* countlets is much faster when providing alphabet (using unordered_map)
56
* countlets version bumped to 1.3

src/countlets.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,15 @@ void usage() {
4949
);
5050
}
5151

52-
unordered_map<string, unsigned int> count_stream(istream &input, vector<string> klets,
52+
unordered_map<string, unsigned long> count_stream(istream &input, vector<string> klets,
5353
unsigned int k) {
5454

5555
char l;
5656

5757
string let;
5858
let.reserve(k + 1);
5959

60-
unordered_map<string, unsigned int> counts;
60+
unordered_map<string, unsigned long> counts;
6161
counts.reserve(klets.size());
6262
for (size_t i = 0; i < klets.size(); ++i) {
6363
counts[klets[i]] = 0;
@@ -156,7 +156,7 @@ int main(int argc, char **argv) {
156156

157157
/* this version loads the entire sequence into memory */
158158

159-
vector<unsigned int> counts;
159+
vector<unsigned long> counts;
160160
vector<char> letters;
161161
size_t seqlen;
162162
char l;
@@ -199,7 +199,7 @@ int main(int argc, char **argv) {
199199

200200
/* this version only keeps k+1 characters in memory */
201201

202-
unordered_map<string, unsigned int> counts;
202+
unordered_map<string, unsigned long> counts;
203203

204204
if (alph.length() < 1) {
205205
cerr << "Error: could not parse -a option" << '\n';

src/countwin.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ void usage() {
4949
);
5050
}
5151

52-
string make_row(string START, string STOP, vector<unsigned int> counts,
52+
string make_row(string START, string STOP, vector<unsigned long> counts,
5353
vector<string> klets, bool nozero) {
5454

5555
string out;
@@ -63,11 +63,11 @@ string make_row(string START, string STOP, vector<unsigned int> counts,
6363

6464
}
6565

66-
string extract_window(istream &input, unsigned int window) {
66+
string extract_window(istream &input, unsigned long window) {
6767

6868
string out;
6969
char l;
70-
unsigned int counter{0};
70+
unsigned long counter{0};
7171
out.reserve(window);
7272

7373
while (input >> l) {
@@ -84,9 +84,9 @@ int main(int argc, char **argv) {
8484

8585
int ku{1};
8686
unsigned int k;
87-
unsigned int START{1};
87+
unsigned long START{1};
8888
int opt;
89-
unsigned int window, STOP, step;
89+
unsigned long window, step, STOP;
9090
size_t alphlen;
9191
string alph, seq;
9292
ifstream infile;
@@ -95,7 +95,7 @@ int main(int argc, char **argv) {
9595
vector<string> klets;
9696
set<unsigned int> lets_set;
9797
vector<char> lets_uniq;
98-
vector<unsigned int> counts;
98+
vector<unsigned long> counts;
9999

100100
while ((opt = getopt(argc, argv, "i:o:a:k:w:s:nh")) != -1) {
101101
switch (opt) {
@@ -129,13 +129,13 @@ int main(int argc, char **argv) {
129129
break;
130130

131131
case 'w': if (optarg) {
132-
window = atoi(optarg);
132+
window = atol(optarg);
133133
has_win = true;
134134
}
135135
break;
136136

137137
case 's': if (optarg) {
138-
step = atoi(optarg);
138+
step = atol(optarg);
139139
has_step = true;
140140
}
141141
break;

src/klets.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ using Clock = chrono::high_resolution_clock;
3535
vector<string> make_klets(vector<char> lets_uniq, unsigned int k) {
3636

3737
size_t alphlen = lets_uniq.size();
38-
unsigned int nlets = pow(alphlen, k);
39-
unsigned int let_i, counter, step;
38+
unsigned long nlets = pow(alphlen, k);
39+
unsigned long let_i, counter, step;
4040
vector<string> klets(nlets, "");
4141

4242
/* perhaps a bit primitive, but it works */
@@ -49,7 +49,7 @@ vector<string> make_klets(vector<char> lets_uniq, unsigned int k) {
4949

5050
while (counter < nlets) {
5151

52-
for (unsigned int j = 0; j < step; ++j) {
52+
for (unsigned long j = 0; j < step; ++j) {
5353
klets[counter] += lets_uniq[let_i];
5454
++counter;
5555
}
@@ -67,7 +67,7 @@ vector<string> make_klets(vector<char> lets_uniq, unsigned int k) {
6767

6868
}
6969

70-
vector<unsigned int> count_klets(vector<char> letters, vector<char> lets_uniq,
70+
vector<unsigned long> count_klets(vector<char> letters, vector<char> lets_uniq,
7171
unsigned int k, size_t alphlen) {
7272

7373
/* Scales very well with increasing k, but requires having the entire
@@ -80,9 +80,9 @@ vector<unsigned int> count_klets(vector<char> letters, vector<char> lets_uniq,
8080
#endif
8181

8282
size_t seqlen = letters.size();
83-
unsigned int nlets = pow(alphlen, k);
84-
unsigned int l, counter;
85-
vector<unsigned int> let_counts(nlets, 0);
83+
unsigned long nlets = pow(alphlen, k);
84+
unsigned long l, counter;
85+
vector<unsigned long> let_counts(nlets, 0);
8686
vector<unsigned int> intletters;
8787
intletters.reserve(seqlen);
8888
unordered_map<char, unsigned int> let2int;

src/klets.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
std::vector<std::string> make_klets(std::vector<char> lets_uniq, unsigned int k);
2828

29-
std::vector<unsigned int> count_klets(std::vector<char> letters,
29+
std::vector<unsigned long> count_klets(std::vector<char> letters,
3030
std::vector<char> lets_uniq, unsigned int k, size_t alphlen);
3131

3232
#endif

src/shuffle_euler.cpp

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -35,19 +35,19 @@ using namespace std;
3535
using Clock = chrono::high_resolution_clock;
3636
#endif
3737

38-
vector<vector<unsigned int>> make_edgelist(vector<unsigned int> let_counts,
39-
unsigned int nletsm1, size_t alphlen) {
38+
vector<vector<unsigned long>> make_edgelist(vector<unsigned long> let_counts,
39+
unsigned long nletsm1, size_t alphlen) {
4040

4141
/* 1D vector<int> --> 2D vector<vector<int>>
4242
* The first layer elements are vertices, second layer are the edges.
4343
*/
4444

4545
/* TODO: find a cheaper alternative */
4646

47-
vector<vector<unsigned int>> edgelist(nletsm1, vector<unsigned int>(alphlen));
48-
unsigned int counter{0};
47+
vector<vector<unsigned long>> edgelist(nletsm1, vector<unsigned long>(alphlen));
48+
unsigned long counter{0};
4949

50-
for (unsigned int i = 0; i < nletsm1; ++i) {
50+
for (unsigned long i = 0; i < nletsm1; ++i) {
5151

5252
for (size_t j = 0; j < alphlen; ++j) {
5353
edgelist[i][j] = let_counts[counter];
@@ -60,16 +60,16 @@ vector<vector<unsigned int>> make_edgelist(vector<unsigned int> let_counts,
6060

6161
}
6262

63-
vector<unsigned int> find_euler(vector<vector<unsigned int>> edgelist, unsigned int lasti,
64-
unsigned int nletsm1, default_random_engine gen, size_t alphlen, unsigned int k,
65-
vector<bool> empty_vertices, bool verbose) {
63+
vector<unsigned long> find_euler(vector<vector<unsigned long>> edgelist,
64+
unsigned long lasti, unsigned long nletsm1, default_random_engine gen,
65+
size_t alphlen, unsigned int k, vector<bool> empty_vertices, bool verbose) {
6666

67-
unsigned int u;
68-
unsigned int nletsm2 = pow(alphlen, k - 2);
69-
unsigned int good_v{0}, counter{0};
67+
unsigned long u;
68+
unsigned long nletsm2 = pow(alphlen, k - 2);
69+
unsigned long good_v{0}, counter{0};
7070
vector<bool> vertices(nletsm1, false);
71-
vector<unsigned int> last_letsi(nletsm1, 0);
72-
vector<unsigned int> next_let_i;
71+
vector<unsigned long> last_letsi(nletsm1, 0);
72+
vector<unsigned long> next_let_i;
7373
next_let_i.reserve(nletsm1);
7474

7575
/* The idea is to go through and make sure that every last letter for each
@@ -83,26 +83,26 @@ vector<unsigned int> find_euler(vector<vector<unsigned int>> edgelist, unsigned
8383
vertices[lasti] = true; /* tree root */
8484

8585
/* I don't think there's a formula for this, so just prepare these beforehand */
86-
for (unsigned int i = 0; i < nletsm1; ++i) {
86+
for (unsigned long i = 0; i < nletsm1; ++i) {
8787
next_let_i.push_back(counter * alphlen);
8888
if (counter == nletsm2 - 1) counter = 0;
8989
else ++counter;
9090
}
9191

92-
for (unsigned int i = 0; i < nletsm1; ++i) {
92+
for (unsigned long i = 0; i < nletsm1; ++i) {
9393
if (empty_vertices[i]) vertices[i] = true; /* ignore unconnected vertices */
9494
else ++good_v;
9595
}
9696

9797
if (verbose) cerr << " Total vertices to travel: " << good_v << endl;
9898

99-
for (unsigned int i = 0; i < nletsm1; ++i) {
99+
for (unsigned long i = 0; i < nletsm1; ++i) {
100100

101101
u = i;
102102

103103
while (!vertices[u]) {
104104
/* pick a random possible edge from the vertex */
105-
discrete_distribution<unsigned int> next_let(edgelist[u].begin(), edgelist[u].end());
105+
discrete_distribution<unsigned long> next_let(edgelist[u].begin(), edgelist[u].end());
106106
last_letsi[u] = next_let(gen);
107107
/* now follow the edge to the next vertex */
108108
if (k == 2)
@@ -130,20 +130,20 @@ vector<unsigned int> find_euler(vector<vector<unsigned int>> edgelist, unsigned
130130

131131
}
132132

133-
vector<vector<unsigned int>> fill_vertices(vector<vector<unsigned int>> edgelist,
134-
vector<unsigned int> last_letsi, unsigned int nletsm1, size_t alphlen,
135-
unsigned int lasti, default_random_engine gen, vector<bool> empty_vertices) {
133+
vector<vector<unsigned long>> fill_vertices(vector<vector<unsigned long>> edgelist,
134+
vector<unsigned long> last_letsi, unsigned long nletsm1, size_t alphlen,
135+
unsigned long lasti, default_random_engine gen, vector<bool> empty_vertices) {
136136

137137
/* The incoming edgelist is just a set of counts for each letter. This
138138
* will actually create vectors of letter indices based on counts.
139139
*/
140140

141141
/* TODO: find a cheaper alternative */
142142

143-
vector<vector<unsigned int>> edgelist2(nletsm1);
144-
unsigned int b;
143+
vector<vector<unsigned long>> edgelist2(nletsm1);
144+
unsigned long b;
145145

146-
for (unsigned int i = 0; i < nletsm1; ++i) {
146+
for (unsigned long i = 0; i < nletsm1; ++i) {
147147

148148
if (empty_vertices[i]) continue;
149149

@@ -152,7 +152,7 @@ vector<vector<unsigned int>> fill_vertices(vector<vector<unsigned int>> edgelist
152152
for (size_t j = 0; j < alphlen; ++j) {
153153

154154
b = edgelist[i][j];
155-
for (unsigned int h = 0; h < b; ++h) {
155+
for (unsigned long h = 0; h < b; ++h) {
156156
edgelist2[i].push_back(j);
157157
}
158158

@@ -169,15 +169,15 @@ vector<vector<unsigned int>> fill_vertices(vector<vector<unsigned int>> edgelist
169169

170170
}
171171

172-
vector<unsigned int> walk_euler(vector<vector<unsigned int>> edgelist,
172+
vector<unsigned long> walk_euler(vector<vector<unsigned long>> edgelist,
173173
size_t seqlen, vector<char> lets_uniq, string firstl) {
174174

175175
size_t alphlen = lets_uniq.size();
176176
size_t nletsm1 = edgelist.size();
177-
unsigned int current{0};
177+
unsigned long current{0};
178178
size_t n = firstl.length();
179-
vector<unsigned int> edgelist_counter(nletsm1, 0);
180-
vector<unsigned int> out_i;
179+
vector<unsigned long> edgelist_counter(nletsm1, 0);
180+
vector<unsigned long> out_i;
181181
out_i.reserve(seqlen);
182182

183183
/* initialize shuffled sequence with starting vertex */
@@ -219,14 +219,14 @@ string shuffle_euler(vector<char> letters, default_random_engine gen, unsigned i
219219
#endif
220220

221221
size_t seqlen = letters.size();
222-
unsigned int nlets, nletsm1;
222+
unsigned long nlets, nletsm1;
223223
size_t alphlen;
224-
unsigned int lasti{0};
225-
vector<unsigned int> last_letsi, out_i;
226-
vector<unsigned int> let_counts;
224+
unsigned long lasti{0};
225+
vector<unsigned long> last_letsi, out_i;
226+
vector<unsigned long> let_counts;
227227
vector<char> lets_uniq;
228228
set<unsigned int> lets_set;
229-
vector<vector<unsigned int>> edgelist;
229+
vector<vector<unsigned long>> edgelist;
230230
string firstl, out;
231231

232232
/* the first and last letters remain unchanged; these are special vertices
@@ -275,7 +275,7 @@ string shuffle_euler(vector<char> letters, default_random_engine gen, unsigned i
275275
*/
276276
vector<bool> empty_vertices;
277277
empty_vertices.reserve(nletsm1);
278-
for (unsigned int i = 0; i < nletsm1; ++i) {
278+
for (unsigned long i = 0; i < nletsm1; ++i) {
279279
empty_vertices.push_back(true);
280280
for (size_t j = 0; j < alphlen; ++j) {
281281
if (edgelist[i][j] > 0) {
@@ -303,7 +303,7 @@ string shuffle_euler(vector<char> letters, default_random_engine gen, unsigned i
303303
#endif
304304

305305
/* delete last edges from edge pool */
306-
vector<vector<unsigned int>> edgelist2;
306+
vector<vector<unsigned long>> edgelist2;
307307
for (size_t i = 0; i < last_letsi.size(); ++i) {
308308
if (i != lasti) --edgelist[i][last_letsi[i]];
309309
}

src/shuffle_linear.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@ string shuffle_linear(vector<char> letters, default_random_engine gen,
2929

3030
/* variables */
3131

32-
unsigned int seqlen1 = letters.size();
33-
unsigned int seqlen2{seqlen1 / k};
34-
unsigned int seqrem{seqlen1 % k};
35-
unsigned int seqremlen{seqlen1 - seqrem};
32+
unsigned long seqlen1 = letters.size();
33+
unsigned long seqlen2{seqlen1 / k};
34+
unsigned long seqrem{seqlen1 % k};
35+
unsigned long seqremlen{seqlen1 - seqrem};
3636

3737
if (verbose) {
3838
cerr << " Times split: " << seqlen2 << endl;
@@ -42,20 +42,20 @@ string shuffle_linear(vector<char> letters, default_random_engine gen,
4242
string out;
4343
out.reserve(seqlen1);
4444

45-
vector<unsigned int> seqindex;
45+
vector<unsigned long> seqindex;
4646
seqindex.reserve(seqlen2);
4747

4848
/* shuffle index */
4949

50-
for (unsigned int i = 0; i < seqlen2; ++i) {
50+
for (unsigned long i = 0; i < seqlen2; ++i) {
5151
seqindex.push_back(i * k);
5252
}
5353

5454
shuffle(seqindex.begin(), seqindex.end(), gen);
5555

5656
/* build output string from shuffled index */
5757

58-
for (unsigned int i = 0; i < seqlen2; ++i) {
58+
for (unsigned long i = 0; i < seqlen2; ++i) {
5959
for (unsigned int j = 0; j < k; ++j) {
6060
out += letters[seqindex[i] + j];
6161
}
@@ -64,7 +64,7 @@ string shuffle_linear(vector<char> letters, default_random_engine gen,
6464
/* add leftover letters */
6565

6666
if (seqrem > 0) {
67-
for (unsigned int i = seqremlen; i < seqlen1; ++i) {
67+
for (unsigned long i = seqremlen; i < seqlen1; ++i) {
6868
out += letters[i];
6969
}
7070
}

0 commit comments

Comments
 (0)