Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,4 @@ scripts/
```

Each round's `data/` directory is generated locally and gitignored.
This is trevored-py's PR
52 changes: 48 additions & 4 deletions rounds/3_dna/solution.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,57 @@
own faster implementation.
"""

from .baseline import find_matches as _baseline


def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]:
"""Find every FASTA record whose sequence contains ``pattern``.

Returns ``[(record_id, [positions...]), ...]`` in file order.
"""
# TODO: remove this delegation and write your own implementation here.
return _baseline(fasta_path, pattern)

results = []

current_id = None
chunks = []

with open(fasta_path, "rb") as f:
for line in f:
line = line.rstrip(b"\n")

if line.startswith(b">"):
if current_id is not None:
seq = b"".join(chunks)
positions = find_all(seq, pattern)

if positions:
results.append((current_id, positions))

current_id = line[1:].decode("ascii")
chunks = []

else:
chunks.append(line)

if current_id is not None:
seq = b"".join(chunks)
positions = find_all(seq, pattern)

if positions:
results.append((current_id, positions))

return results


def find_all(seq: bytes, pattern: bytes) -> list[int]:
positions = []
start = 0

while True:
pos = seq.find(pattern, start)

if pos == -1:
break

positions.append(pos)
start = pos + 1

return positions
1 change: 1 addition & 0 deletions rounds/3_dna/target_b.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
target = bytearray([65, 71, 84, 67, 67, 71, 84, 65])
61 changes: 61 additions & 0 deletions rounds/3_dna/working_solution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""Your Round 3 solution — DNA sequence matcher.

**Edit this file.** It currently delegates to ``baseline.py`` so everything
passes out of the box. Replace the body of ``find_matches`` with your
own faster implementation.
"""


def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]:
"""Find every FASTA record whose sequence contains ``pattern``.

Returns ``[(record_id, [positions...]), ...]`` in file order.
"""

results = []

current_id = None
chunks = []

with open(fasta_path, "rb") as f:
for line in f:
line = line.rstrip(b"\n")

if line.startswith(b">"):
if current_id is not None:
seq = b"".join(chunks)
positions = find_all(seq, pattern)

if positions:
results.append((current_id, positions))

current_id = line[1:].decode("ascii")
chunks = []

else:
chunks.append(line)

if current_id is not None:
seq = b"".join(chunks)
positions = find_all(seq, pattern)

if positions:
results.append((current_id, positions))

return results


def find_all(seq: bytes, pattern: bytes) -> list[int]:
positions = []
start = 0

while True:
pos = seq.find(pattern, start)

if pos == -1:
break

positions.append(pos)
start = pos + 1

return positions