77from math import ceil , log
88
99
10- INF = float ('infinity' )
10+ # an easy way to store positive infinity in a manner compatible with Python 2.x
11+ INF = float ('inf' )
1112
1213
1314def _encoding_ratio (base_from , base_to , chunk_sizes ):
1415 """
1516 An algorithm for finding the most efficient encoding ratio
1617 from one base to another within a range limit.
1718 """
19+ # a ratio of 1:Infinity is the theoretical worst possible ratio
1820 best_ratio = (1.0 , INF )
1921 for s in chunk_sizes :
2022 # validate each chunk size here
2123 if not isinstance (s , int ):
2224 raise TypeError ('chunk sizes must be list of ints' )
25+ '''
26+ base_from ** s is the total number of values represented by the input
27+ base and chunk size
28+
29+ base_to logarithm of this number, rounded to ceiling is the minimum
30+ number of symbols required in the output ratio to store this number of
31+ values (it might be able to store more than needed, but that doesn't
32+ matter)
33+ '''
2334 match = ceil (log (base_from ** s , base_to ))
35+ # the efficiency ratio is input:output
2436 ratio = (float (s ), match )
37+ # ratio efficiences can be compared by dividing them like fractions
2538 if (ratio [0 ] / ratio [1 ]) > (best_ratio [0 ] / best_ratio [1 ]):
39+ # this is the new best ratio found so far
2640 best_ratio = ratio
2741 return (int (best_ratio [0 ]), int (best_ratio [1 ]))
2842
@@ -37,17 +51,22 @@ def best_ratio(input_base, output_bases, chunk_sizes):
3751 if not isinstance (input_base , int ):
3852 raise TypeError ('input base must be of int type' )
3953
54+ # we will store the most efficient output base here
4055 encoder = 0
56+ # a ratio of 1:Infinity is the theoretical worst possible ratio
4157 best_ratio = (1.0 , INF )
4258 for base_to in output_bases :
4359 # validate each output base here
4460 if not isinstance (base_to , int ):
4561 raise TypeError ('output bases must be list of ints' )
62+ # get the best encoding ratio for this base out of all chunk sizes
4663 ratio = _encoding_ratio (input_base , base_to , chunk_sizes )
64+ # if it's more efficient, then set it as the most efficient one yet
4765 if (
4866 (float (ratio [0 ]) / float (ratio [1 ])) >
4967 (float (best_ratio [0 ]) / float (best_ratio [1 ]))
5068 ):
5169 best_ratio = ratio
5270 encoder = base_to
71+ # we now have the best output base and ratio for it
5372 return encoder , (int (best_ratio [0 ]), int (best_ratio [1 ]))
0 commit comments