File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 66"""
77
88
9- def compute_histogram (path : str ) -> dict [bytes , int ]:
9+ # def compute_histogram(path: str) -> dict[bytes, int]:
1010 """Frequency of every 2-byte bigram in the file at ``path``."""
11- # TODO: Add comment to push to branch
12- from .baseline import compute_histogram as _baseline
11+ # TODO: Used chatgpt for optimization of byte-pair histogram
12+ # from .baseline import compute_histogram as _baseline
1313
14- return _baseline (path )
14+ #return _baseline(path)
15+
16+ from array import array
17+
18+
19+ def compute_histogram (path : str ) -> list [int ]:
20+ """
21+ Frequency table for every 2-byte bigram.
22+
23+ Result index:
24+ index = (byte1 << 8) | byte2
25+
26+ Example:
27+ b"AB" -> (65 << 8) | 66
28+ """
29+ with open (path , "rb" ) as f :
30+ data = f .read ()
31+
32+ n = len (data )
33+ if n < 2 :
34+ return [0 ] * 65536
35+
36+ # Fixed-size contiguous integer array
37+ counts = array ('I' , [0 ]) * 65536
38+
39+ prev = data [0 ]
40+
41+ for i in range (1 , n ):
42+ curr = data [i ]
43+ counts [(prev << 8 ) | curr ] += 1
44+ prev = curr
45+
46+ return counts
You can’t perform that action at this time.
0 commit comments