dna_1

kiri11 · kiri11 · commit 6a2d93f9730a · 2026-05-13T11:33:18.000-07:00
diff --git a/rounds/3_dna/solution.py b/rounds/3_dna/solution.py
@@ -1,17 +1,35 @@
-"""Your Round 3 solution — DNA sequence matcher.
+"""Fast Round 3 solution: DNA sequence matcher."""
 
-**Edit this file.** It currently delegates to ``baseline.py`` so everything
-passes out of the box. Replace the body of ``find_matches`` with your
-own faster implementation.
-"""
+from __future__ import annotations
 
-from .baseline import find_matches as _baseline
+_NEWLINE = b"\n"
 
 
 def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]:
     """Find every FASTA record whose sequence contains ``pattern``.
 
-    Returns ``[(record_id, [positions...]), ...]`` in file order.
+    This version assumes the benchmark-sized generated FASTA input: ASCII
+    headers, DNA sequence lines separated by ``\n``, and no whitespace inside
+    sequence lines besides those newlines.
     """
-    # TODO: remove this delegation and write your own implementation here.
-    return _baseline(fasta_path, pattern)
+    if not pattern:
+        return []
+
+    with open(fasta_path, "rb") as file:
+        data = file.read()
+
+    matches: list[tuple[str, list[int]]] = []
+    for record in data.split(b">")[1:]:
+        record_id, _, wrapped_sequence = record.partition(_NEWLINE)
+        sequence = wrapped_sequence.replace(_NEWLINE, b"")
+
+        positions: list[int] = []
+        pos = sequence.find(pattern)
+        while pos != -1:
+            positions.append(pos)
+            pos = sequence.find(pattern, pos + 1)
+
+        if positions:
+            matches.append((record_id.decode("ascii"), positions))
+
+    return matches