@@ -25,6 +25,7 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]
2525 # Step 2: split the file on '>' to peel off one record at a time. The
2626 # first element is the chunk before any header (empty for well-formed
2727 # files) and is skipped by the ``.strip()`` guard below.
28+ sequences = []
2829 for record in text .split (b">" ):
2930 if not record .strip ():
3031 continue
@@ -35,18 +36,26 @@ def find_matches(fasta_path: str, pattern: bytes) -> list[tuple[str, list[int]]]
3536 lines = record .split (b"\n " )
3637 record_id = lines [0 ].strip ().decode ("ascii" )
3738 sequence = b"" .join (lines [1 :]).replace (b" " , b"" ).decode ("ascii" )
38-
39- # Step 4: walk the sequence with ``str.find()``, advancing one byte
40- # past each hit so overlapping matches are reported too.
41- positions : list [int ] = []
42- start = 0
43- while True :
44- pos = sequence .find (pattern_str , start )
45- if pos == - 1 :
46- break
47- positions .append (pos )
48- start = pos + 1
49-
50- if positions :
51- matches .append ((record_id , positions ))
39+ sequences .append ((record_id , sequence ))
40+
41+ threads = []
42+ for record_id , sequence in sequences :
43+ thread = Thread (target = match_record , args = (record_id , sequence , pattern_str , matches ))
44+ thread .start ()
45+ threads .append (thread )
46+ for thread in threads :
47+ thread .join ()
5248 return matches
49+
50+ def match_record (record_id , sequence , pattern_str , matches ):
51+ positions : list [int ] = []
52+ start = 0
53+ while True :
54+ pos = sequence .find (pattern_str , start )
55+ if pos == - 1 :
56+ break
57+ positions .append (pos )
58+ start = pos + 1
59+
60+ if positions :
61+ matches .append ((record_id , positions ))
0 commit comments