File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 55own faster implementation.
66"""
77
8+ from concurrent .futures import ProcessPoolExecutor
9+ import os
10+
11+ CHUNK_SIZE = 8 * 1024 * 1024
812
913def compute_histogram (path : str ) -> dict [bytes , int ]:
1014 """Frequency of every 2-byte bigram in the file at ``path``."""
1115 # TODO: remove this delegation and write your own implementation here.
12- from .baseline import compute_histogram as _baseline
16+ with open (path , "rb" ) as f :
17+ data = f .read ()
1318
14- return _baseline (path )
19+ counts : list [int ] = [0 ] * 65536
20+ if len (data ) == 0 :
21+ return {}
22+ data_iter = iter (data )
23+ window_idx = next (data_iter )
24+ for b in data_iter :
25+ window_idx <<= 8
26+ window_idx &= 0xff00
27+ window_idx |= b
28+ counts [window_idx ] += 1
29+ d = {}
30+ for i ,cnt in enumerate (counts ):
31+ if counts [i ] != 0 :
32+ b = i .to_bytes (2 , byteorder = "big" )
33+ d [b ] = cnt
34+ return d
You can’t perform that action at this time.
0 commit comments