Skip to content

Commit be73d34

Browse files
committed
dna threads
Signed-off-by: Drew Wock <dwock@esri.com>
1 parent d2b310d commit be73d34

1 file changed

Lines changed: 31 additions & 18 deletions

File tree

rounds/3_dna/solution.py

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,24 @@
66
"""
77

88
import re
9+
from concurrent.futures import ThreadPoolExecutor
10+
11+
def find_match(args):
12+
regex,record = args
13+
# Step 3: a record looks like ``"<id>\n<seq line 1>\n<seq line 2>\n..."``.
14+
# The id is the first line; the remaining lines are joined back into a
15+
# single contiguous sequence string.
16+
lines = record.split("\n")
17+
record_id = lines[0].strip()
18+
sequence = "".join(lines[1:]).replace(" ", "")
19+
20+
positions: list[int] = []
21+
positions = [m.start() for m in regex.finditer(sequence)]
22+
if positions:
23+
return (record_id, positions)
24+
else:
25+
return None
26+
927

1028
def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]:
1129
"""Find every FASTA record whose sequence contains ``pattern``.
@@ -22,22 +40,17 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]
2240
pattern_str = pattern.decode('ascii')
2341
regex = re.compile(pattern_str)
2442

25-
# Step 2: split the file on '>' to peel off one record at a time. The
26-
# first element is the chunk before any header (empty for well-formed
27-
# files) and is skipped by the ``.strip()`` guard below.
28-
for record in text.split(">"):
29-
if not record.strip():
30-
continue
31-
32-
# Step 3: a record looks like ``"<id>\n<seq line 1>\n<seq line 2>\n..."``.
33-
# The id is the first line; the remaining lines are joined back into a
34-
# single contiguous sequence string.
35-
lines = record.split("\n")
36-
record_id = lines[0].strip()
37-
sequence = "".join(lines[1:]).replace(" ", "")
38-
39-
positions: list[int] = []
40-
positions = [m.start() for m in regex.finditer(sequence)]
41-
if positions:
42-
matches.append((record_id, positions))
43+
with ThreadPoolExecutor() as ex:
44+
futures = []
45+
for record in text.split(">"):
46+
if not record.strip():
47+
continue
48+
49+
t = ex.submit(find_match, args=(regex,record))
50+
futures.append(t)
51+
52+
for t in futures:
53+
result = t.result()
54+
if result:
55+
matches.append(result)
4356
return matches

0 commit comments

Comments
 (0)