Skip to content

Commit 6a2d93f

Browse files
committed
dna_1
1 parent f637d4c commit 6a2d93f

1 file changed

Lines changed: 27 additions & 9 deletions

File tree

rounds/3_dna/solution.py

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,35 @@
1-
"""Your Round 3 solutionDNA sequence matcher.
1+
"""Fast Round 3 solution: DNA sequence matcher."""
22

3-
**Edit this file.** It currently delegates to ``baseline.py`` so everything
4-
passes out of the box. Replace the body of ``find_matches`` with your
5-
own faster implementation.
6-
"""
3+
from __future__ import annotations
74

8-
from .baseline import find_matches as _baseline
5+
_NEWLINE = b"\n"
96

107

118
def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]:
129
"""Find every FASTA record whose sequence contains ``pattern``.
1310
14-
Returns ``[(record_id, [positions...]), ...]`` in file order.
11+
This version assumes the benchmark-sized generated FASTA input: ASCII
12+
headers, DNA sequence lines separated by ``\n``, and no whitespace inside
13+
sequence lines besides those newlines.
1514
"""
16-
# TODO: remove this delegation and write your own implementation here.
17-
return _baseline(fasta_path, pattern)
15+
if not pattern:
16+
return []
17+
18+
with open(fasta_path, "rb") as file:
19+
data = file.read()
20+
21+
matches: list[tuple[str, list[int]]] = []
22+
for record in data.split(b">")[1:]:
23+
record_id, _, wrapped_sequence = record.partition(_NEWLINE)
24+
sequence = wrapped_sequence.replace(_NEWLINE, b"")
25+
26+
positions: list[int] = []
27+
pos = sequence.find(pattern)
28+
while pos != -1:
29+
positions.append(pos)
30+
pos = sequence.find(pattern, pos + 1)
31+
32+
if positions:
33+
matches.append((record_id.decode("ascii"), positions))
34+
35+
return matches

0 commit comments

Comments
 (0)