Skip to content

Commit d51d8d7

Browse files
Round 3
1 parent 03788c6 commit d51d8d7

1 file changed

Lines changed: 38 additions & 5 deletions

File tree

rounds/3_dna/solution.py

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,46 @@
55
own faster implementation.
66
"""
77

8+
from concurrent.futures import ThreadPoolExecutor
9+
from itertools import repeat
810
from .baseline import find_matches as _baseline
911

1012

13+
def _process_record(record: str, pattern_str: str) -> tuple[str, list[int]] | None:
14+
if not record.strip():
15+
return None
16+
17+
lines = record.split("\n")
18+
record_id = lines[0].strip()
19+
sequence = "".join(lines[1:]).replace(" ", "")
20+
21+
positions: list[int] = []
22+
start = 0
23+
while True:
24+
pos = sequence.find(pattern_str, start)
25+
if pos == -1:
26+
break
27+
positions.append(pos)
28+
start = pos + 1
29+
30+
if positions:
31+
return (record_id, positions)
32+
return None
33+
34+
1135
def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]:
12-
"""Find every FASTA record whose sequence contains ``pattern``.
1336

14-
Returns ``[(record_id, [positions...]), ...]`` in file order.
15-
"""
16-
# TODO: remove this delegation and write your own implementation here.
17-
return _baseline(fasta_path, pattern)
37+
pattern_str = pattern.decode("ascii")
38+
with open(fasta_path, "r") as f:
39+
text = f.read()
40+
41+
records = [record for record in text.split(">") if record.strip()]
42+
matches: list[tuple[str, list[int]]] = []
43+
44+
with ThreadPoolExecutor(max_workers=16) as executor:
45+
for result in executor.map(_process_record, records, repeat(pattern_str)):
46+
if result:
47+
matches.append(result)
48+
49+
return matches
50+

0 commit comments

Comments
 (0)