Skip to content

Commit f03664b

Browse files
Update smith_waterman.cpp
1 parent 4b9df49 commit f03664b

1 file changed

Lines changed: 29 additions & 23 deletions

File tree

dynamic_programming/smith_waterman.cpp

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -121,19 +121,19 @@ smith_waterman(const std::string &query, const std::string &subject,
121121
int match_mismatch = score[i - 1][j - 1] +
122122
score_function(query[i - 1], subject[j - 1],
123123
match_score, mismatch_score);
124-
int delete_gap = score[i - 1][j] + gap_score;
125-
int insert_gap = score[i][j - 1] + gap_score;
124+
int gap_subject = score[i - 1][j] + gap_score;
125+
int gap_query = score[i][j - 1] + gap_score;
126126

127127
// Take maximum of all options, including 0 (local alignment)
128-
int max_score = std::max({match_mismatch, delete_gap, insert_gap, 0});
128+
int max_score = std::max({match_mismatch, gap_subject, gap_query, 0});
129129
score[i][j] = max_score;
130130

131131
// Store direction for traceback
132132
if (max_score == 0) {
133133
direction[i][j] = NONE;
134134
} else if (max_score == match_mismatch) {
135135
direction[i][j] = DIAGONAL;
136-
} else if (max_score == delete_gap) {
136+
} else if (max_score == gap_subject) {
137137
direction[i][j] = UP;
138138
} else {
139139
direction[i][j] = LEFT;
@@ -184,8 +184,9 @@ traceback(const std::vector<std::vector<int>> &score,
184184
}
185185

186186
// Traceback from maximum score position
187-
std::string align1;
188-
std::string align2;
187+
// Build alignment strings efficiently using vectors
188+
std::vector<char> align1_vec;
189+
std::vector<char> align2_vec;
189190
size_t i = i_max;
190191
size_t j = j_max;
191192

@@ -194,29 +195,34 @@ traceback(const std::vector<std::vector<int>> &score,
194195
switch (direction[i][j]) {
195196
case DIAGONAL:
196197
// Match or mismatch
197-
align1 = query[i - 1] + align1;
198-
align2 = subject[j - 1] + align2;
198+
align1_vec.push_back(query[i - 1]);
199+
align2_vec.push_back(subject[j - 1]);
199200
--i;
200201
--j;
201202
break;
202203
case UP:
203204
// Gap in subject
204-
align1 = query[i - 1] + align1;
205-
align2 = '-' + align2;
205+
align1_vec.push_back(query[i - 1]);
206+
align2_vec.push_back('-');
206207
--i;
207208
break;
208209
case LEFT:
209210
// Gap in query
210-
align1 = '-' + align1;
211-
align2 = subject[j - 1] + align2;
211+
align1_vec.push_back('-');
212+
align2_vec.push_back(subject[j - 1]);
212213
--j;
213214
break;
214215
default:
215216
break;
216217
}
217218
}
218219

219-
return {align1, align2};
220+
// Reverse vectors and construct strings
221+
std::reverse(align1_vec.begin(), align1_vec.end());
222+
std::reverse(align2_vec.begin(), align2_vec.end());
223+
224+
return {std::string(align1_vec.begin(), align1_vec.end()),
225+
std::string(align2_vec.begin(), align2_vec.end())};
220226
}
221227

222228
} // namespace smith_waterman
@@ -244,12 +250,12 @@ static void test() {
244250
assert(!result2.second.empty());
245251
std::cout << "Test 2 passed: Partial match\n";
246252

247-
// Test 3: Traceback verification
248-
auto [score3, dir3] = smith_waterman("AGCT", "AGCT");
249-
auto result3 = traceback(score3, dir3, "AGCT", "AGCT");
250-
assert(result3.first == "AGCT");
251-
assert(result3.second == "AGCT");
252-
std::cout << "Test 3 passed: Traceback verification\n";
253+
// Test 3: Different sequences with common subsequence
254+
auto [score3, dir3] = smith_waterman("ACACACTA", "AGCACACA");
255+
auto result3 = traceback(score3, dir3, "ACACACTA", "AGCACACA");
256+
assert(!result3.first.empty());
257+
assert(!result3.second.empty());
258+
std::cout << "Test 3 passed: Common subsequence alignment\n";
253259

254260
// Test 4: No match scenario
255261
auto [score4, dir4] = smith_waterman("AAAA", "TTTT");
@@ -265,13 +271,13 @@ static void test() {
265271
assert(result5.second.empty());
266272
std::cout << "Test 5 passed: Empty string handling\n";
267273

268-
// Test 6: Sequences with gaps
269-
auto [score6, dir6] = smith_waterman("AGCT", "AGT");
270-
auto result6 = traceback(score6, dir6, "AGCT", "AGT");
274+
// Test 6: Longer sequences with multiple gaps
275+
auto [score6, dir6] = smith_waterman("GCATGCT", "GATTACA");
276+
auto result6 = traceback(score6, dir6, "GCATGCT", "GATTACA");
271277
assert(!result6.first.empty());
272278
assert(!result6.second.empty());
273279
assert(result6.first.length() == result6.second.length());
274-
std::cout << "Test 6 passed: Sequences with gaps\n";
280+
std::cout << "Test 6 passed: Longer sequences with gaps\n";
275281

276282
// Test 7: Custom scoring parameters
277283
auto [score7, dir7] = smith_waterman("AGCT", "AGCT", 3, -2, -2);

0 commit comments

Comments
 (0)