Skip to content

Commit d54ecc6

Browse files
committed
use regex and threadpool
1 parent f3da8d0 commit d54ecc6

3 files changed

Lines changed: 115 additions & 4 deletions

File tree

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,10 @@ version = "0.1.0"
44
description = "Python Performance Lab: Sharpening Your Instincts — PyCon US 2026 tutorial"
55
readme = "README.md"
66
requires-python = ">=3.13"
7-
dependencies = ["numpy>=2.0"]
7+
dependencies = [
8+
"numpy>=2.0",
9+
"regex>=2026.5.9",
10+
]
811

912
[dependency-groups]
1013
dev = ["pytest>=8.0", "pytest-codspeed>=5.0.1"]

rounds/3_dna/solution.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,44 @@
66
"""
77

88
from .baseline import find_matches as _baseline
9+
import regex
10+
from multiprocessing.pool import ThreadPool
11+
12+
def match(record, pattern_str):
13+
if not record.strip():
14+
return None, []
15+
16+
# split record ID
17+
lines = record.split("\n")
18+
record_id = lines[0].strip()
19+
sequence = "".join(lines[1:]).replace(" ", "")
20+
21+
# regex pattern match, get position if match
22+
match_inds = []
23+
for match in regex.finditer(pattern_str, sequence, overlapped=True):
24+
match_inds.append(match.start())
25+
26+
return record_id, match_inds
927

1028

1129
def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]:
1230
"""Find every FASTA record whose sequence contains ``pattern``.
1331
1432
Returns ``[(record_id, [positions...]), ...]`` in file order.
1533
"""
16-
# TODO: remove this delegation and write your own implementation here.
17-
return _baseline(fasta_path, pattern)
34+
pattern_str = pattern.decode("ascii")
35+
with open(fasta_path, "r") as f:
36+
text = f.read()
37+
38+
results = []
39+
records = text.split(">")
40+
args = [(record, pattern_str) for record in records]
41+
42+
with ThreadPool(10) as pool:
43+
44+
for record_id, match_inds in pool.starmap(match, args):
45+
if len(match_inds) > 0:
46+
# append to results
47+
results.append((record_id, match_inds))
48+
49+
return results

0 commit comments

Comments
 (0)