Skip to content

Commit 4124a06

Browse files
author
Jacob Summerville
committed
round 2
1 parent 7a26897 commit 4124a06

1 file changed

Lines changed: 53 additions & 9 deletions

File tree

rounds/2_corruption/solution.py

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,58 @@
1-
"""Your Round 2 solution corruption scanner.
1+
"""Your Round 2 solution - corruption scanner."""
22

3-
**Edit this file.** It currently delegates to ``baseline.py`` so everything
4-
passes out of the box. Replace the body of ``find_corruptions`` with your
5-
own faster implementation.
6-
"""
3+
from __future__ import annotations
74

8-
from .baseline import find_corruptions as _baseline
5+
import mmap
6+
7+
8+
_BLOCK_SIZE = 4096
99

1010

1111
def find_corruptions(ref_path: str, cor_path: str) -> list[tuple[int, int]]:
12-
"""Return ``[(offset, length), ...]`` for every differing byte range."""
13-
# TODO: remove this delegation and write your own implementation here.
14-
return _baseline(ref_path, cor_path)
12+
""" Return ``[(offset, length), ...]`` for every differing byte range. """
13+
14+
with open(ref_path, "rb") as ref_file, open(cor_path, "rb") as cor_file:
15+
# Use the file size as the single source of truth before mapping.
16+
size = ref_file.seek(0, 2)
17+
if size != cor_file.seek(0, 2):
18+
raise ValueError("reference and corrupted files differ in length")
19+
if size == 0:
20+
return []
21+
22+
ref_file.seek(0)
23+
cor_file.seek(0)
24+
25+
with mmap.mmap(ref_file.fileno(), 0, access=mmap.ACCESS_READ) as ref:
26+
with mmap.mmap(cor_file.fileno(), 0, access=mmap.ACCESS_READ) as cor:
27+
ranges: list[tuple[int, int]] = []
28+
# -1 means there is no currently open corruption range.
29+
run_start = -1
30+
append = ranges.append
31+
block_size = _BLOCK_SIZE
32+
33+
for block_start in range(0, size, block_size):
34+
block_end = min(block_start + block_size, size)
35+
36+
# Most blocks are identical, so skip them with a C-level
37+
# bytes comparison instead of a Python loop over each byte.
38+
if ref[block_start:block_end] == cor[block_start:block_end]:
39+
if run_start != -1:
40+
append((run_start, block_start - run_start))
41+
run_start = -1
42+
continue
43+
44+
# Only scan inside blocks that actually differ. Keeping
45+
# run_start outside this loop lets ranges cross block edges.
46+
for pos in range(block_start, block_end):
47+
if ref[pos] != cor[pos]:
48+
if run_start == -1:
49+
run_start = pos
50+
elif run_start != -1:
51+
append((run_start, pos - run_start))
52+
run_start = -1
53+
54+
# Close a corruption range that reaches the end of the file.
55+
if run_start != -1:
56+
append((run_start, size - run_start))
57+
58+
return ranges

0 commit comments

Comments
 (0)