File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 55own faster implementation.
66"""
77
8+ def compute_histogram (path : str ) -> dict [bytes , int ]:
9+ with open (path , "rb" ) as f :
10+ data = f .read ()
811
9- # def compute_histogram(path: str) -> dict[bytes, int]:
10- """Frequency of every 2-byte bigram in the file at ``path``."""
11- # TODO: Used chatgpt for optimization of byte-pair histogram
12- # from .baseline import compute_histogram as _baseline
12+ counts = {}
1313
14- #return _baseline(path)
14+ for a , b in zip (data , data [1 :]):
15+ k = (a << 8 ) | b
16+ counts [k ] = counts .get (k , 0 ) + 1
1517
16- def histogram_dict (counts : list [int ]) -> dict [bytes , int ]:
17- out = {}
18-
19- for i , count in enumerate (counts ):
20- if count :
21- out [i .to_bytes (2 , "big" )] = count
22-
23- return out
18+ return {
19+ k .to_bytes (2 , "big" ): v
20+ for k , v in counts .items ()
21+ }
You can’t perform that action at this time.
0 commit comments