Skip to content

Commit a9de114

Browse files
authored
best_ratio() comments and minor changes
Added a bunch of comments to best_ratio() as it was largely devoid of these before and what it does is really quite fundamentally important for basest. Also changed some minor stylistic points, i.e. change from using float('infinity') to float('inf')
1 parent f2c9c2a commit a9de114

File tree

1 file changed

+20
-1
lines changed

1 file changed

+20
-1
lines changed

basest/core/best_ratio.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,22 +7,36 @@
77
from math import ceil, log
88

99

10-
INF = float('infinity')
10+
# an easy way to store positive infinity in a manner compatible with Python 2.x
11+
INF = float('inf')
1112

1213

1314
def _encoding_ratio(base_from, base_to, chunk_sizes):
1415
"""
1516
An algorithm for finding the most efficient encoding ratio
1617
from one base to another within a range limit.
1718
"""
19+
# a ratio of 1:Infinity is the theoretical worst possible ratio
1820
best_ratio = (1.0, INF)
1921
for s in chunk_sizes:
2022
# validate each chunk size here
2123
if not isinstance(s, int):
2224
raise TypeError('chunk sizes must be list of ints')
25+
'''
26+
base_from ** s is the total number of values represented by the input
27+
base and chunk size
28+
29+
base_to logarithm of this number, rounded to ceiling is the minimum
30+
number of symbols required in the output ratio to store this number of
31+
values (it might be able to store more than needed, but that doesn't
32+
matter)
33+
'''
2334
match = ceil(log(base_from ** s, base_to))
35+
# the efficiency ratio is input:output
2436
ratio = (float(s), match)
37+
# ratio efficiences can be compared by dividing them like fractions
2538
if (ratio[0] / ratio[1]) > (best_ratio[0] / best_ratio[1]):
39+
# this is the new best ratio found so far
2640
best_ratio = ratio
2741
return (int(best_ratio[0]), int(best_ratio[1]))
2842

@@ -37,17 +51,22 @@ def best_ratio(input_base, output_bases, chunk_sizes):
3751
if not isinstance(input_base, int):
3852
raise TypeError('input base must be of int type')
3953

54+
# we will store the most efficient output base here
4055
encoder = 0
56+
# a ratio of 1:Infinity is the theoretical worst possible ratio
4157
best_ratio = (1.0, INF)
4258
for base_to in output_bases:
4359
# validate each output base here
4460
if not isinstance(base_to, int):
4561
raise TypeError('output bases must be list of ints')
62+
# get the best encoding ratio for this base out of all chunk sizes
4663
ratio = _encoding_ratio(input_base, base_to, chunk_sizes)
64+
# if it's more efficient, then set it as the most efficient one yet
4765
if (
4866
(float(ratio[0]) / float(ratio[1])) >
4967
(float(best_ratio[0]) / float(best_ratio[1]))
5068
):
5169
best_ratio = ratio
5270
encoder = base_to
71+
# we now have the best output base and ratio for it
5372
return encoder, (int(best_ratio[0]), int(best_ratio[1]))

0 commit comments

Comments
 (0)