Skip to content

Commit 12310e9

Browse files
committed
Just bytes
1 parent 8b3c68e commit 12310e9

1 file changed

Lines changed: 37 additions & 2 deletions

File tree

rounds/3_dna/solution.py

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,47 @@
66
"""
77

88
from .baseline import find_matches as _baseline
9+
from threading import Thread
10+
import numpy as np
911

1012

1113
def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]:
1214
"""Find every FASTA record whose sequence contains ``pattern``.
1315
1416
Returns ``[(record_id, [positions...]), ...]`` in file order.
1517
"""
16-
# TODO: remove this delegation and write your own implementation here.
17-
return _baseline(fasta_path, pattern)
18+
# Step 1: read the whole FASTA file as text and decode the pattern so the
19+
# search below can use a single ``str`` API.
20+
pattern_str = pattern.decode("ascii")
21+
with open(fasta_path, "rb") as f:
22+
text = f.read()
23+
matches: list[tuple[str, list[int]]] = []
24+
25+
# Step 2: split the file on '>' to peel off one record at a time. The
26+
# first element is the chunk before any header (empty for well-formed
27+
# files) and is skipped by the ``.strip()`` guard below.
28+
for record in text.split(b">"):
29+
if not record.strip():
30+
continue
31+
32+
# Step 3: a record looks like ``"<id>\n<seq line 1>\n<seq line 2>\n..."``.
33+
# The id is the first line; the remaining lines are joined back into a
34+
# single contiguous sequence string.
35+
lines = record.split(b"\n")
36+
record_id = lines[0].strip().decode("ascii")
37+
sequence = b"".join(lines[1:]).replace(b" ", b"").decode("ascii")
38+
39+
# Step 4: walk the sequence with ``str.find()``, advancing one byte
40+
# past each hit so overlapping matches are reported too.
41+
positions: list[int] = []
42+
start = 0
43+
while True:
44+
pos = sequence.find(pattern_str, start)
45+
if pos == -1:
46+
break
47+
positions.append(pos)
48+
start = pos + 1
49+
50+
if positions:
51+
matches.append((record_id, positions))
52+
return matches

0 commit comments

Comments
 (0)