Skip to content

Commit f2c9c2a

Browse files
authored
Merge pull request #28 from saxbophone/josh/22-validation
Add validation to best_ratio() and encode_raw()
2 parents dc40df6 + 67a5248 commit f2c9c2a

File tree

4 files changed

+97
-1
lines changed

4 files changed

+97
-1
lines changed

basest/core/best_ratio.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ def _encoding_ratio(base_from, base_to, chunk_sizes):
1717
"""
1818
best_ratio = (1.0, INF)
1919
for s in chunk_sizes:
20+
# validate each chunk size here
21+
if not isinstance(s, int):
22+
raise TypeError('chunk sizes must be list of ints')
2023
match = ceil(log(base_from ** s, base_to))
2124
ratio = (float(s), match)
2225
if (ratio[0] / ratio[1]) > (best_ratio[0] / best_ratio[1]):
@@ -30,9 +33,16 @@ def best_ratio(input_base, output_bases, chunk_sizes):
3033
sizes, find the most efficient encoding ratio.
3134
Returns the chosen output base, and the chosen encoding ratio.
3235
"""
36+
# validate input base type
37+
if not isinstance(input_base, int):
38+
raise TypeError('input base must be of int type')
39+
3340
encoder = 0
3441
best_ratio = (1.0, INF)
3542
for base_to in output_bases:
43+
# validate each output base here
44+
if not isinstance(base_to, int):
45+
raise TypeError('output bases must be list of ints')
3646
ratio = _encoding_ratio(input_base, base_to, chunk_sizes)
3747
if (
3848
(float(ratio[0]) / float(ratio[1])) >

basest/core/encode.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,17 @@ def encode_raw(input_base, output_base, input_ratio, output_ratio, input_data):
3434
input_workon = list(input_data)
3535
# store length of input data for future reference
3636
input_length = len(input_workon)
37+
'''
38+
Special validation: if the output base is larger than the input base, then
39+
the length of the input data MUST be an exact multiple of the input ratio.
40+
Otherwise, the data will be corrupted if we continue, so we will raise
41+
ValueError instead.
42+
'''
43+
if input_base < output_base and input_length % input_ratio != 0:
44+
raise ValueError(
45+
'Input data length must be exact multiple of input ratio when '
46+
'output base is larger than input base'
47+
)
3748
# get nearest data length that the input data fits in
3849
input_nearest_length = _nearest_length(input_length, input_ratio)
3950
# calculate the amount of padding needed

tests/core/test_best_ratio.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,18 @@ def test_best_ratio(self, input_base, output_bases, chunk_sizes, expected):
3333
self.assertEqual(
3434
best_ratio(input_base, output_bases, chunk_sizes), expected
3535
)
36+
37+
@data(str, float, bytes)
38+
def test_invalid_inputs(self, data_type):
39+
"""
40+
Any non-integer types (or lists of non-integers) passed to the function
41+
should raise TypeError.
42+
"""
43+
with self.assertRaises(TypeError):
44+
best_ratio(data_type(), [2], [2])
45+
46+
with self.assertRaises(TypeError):
47+
best_ratio(2, [data_type()], [2])
48+
49+
with self.assertRaises(TypeError):
50+
best_ratio(2, [0, 1], [data_type()])

tests/core/test_encode_decode_raw.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,47 @@ def test_encode_raw(
4848

4949
self.assertEqual(output_data, expected_output_data)
5050

51+
@data(str, bool, float, bytes)
52+
def test_encode_raw_invalid_inputs(self, data_type):
53+
"""
54+
Any non-integer types (or lists of non-integers) passed to the function
55+
should raise TypeError.
56+
"""
57+
with self.assertRaises(TypeError):
58+
encode_raw(
59+
input_base=data_type(), output_base=data_type(),
60+
input_ratio=data_type(), output_ratio=data_type(),
61+
input_data=data_type()
62+
)
63+
64+
@data(
65+
(94, 256, 10, 9, [1, 2, 3, 4, 5]),
66+
(94, 256, 10, 9, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
67+
(78, 256, 20, 16, [70]),
68+
(78, 256, 20, 16, [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60])
69+
)
70+
@unpack
71+
def test_encode_raw_invalid_input_ratio(
72+
self,
73+
input_base, output_base,
74+
input_ratio, output_ratio,
75+
input_data
76+
):
77+
"""
78+
When encoding from a smaller base to a larger one, it is impossible to
79+
encode input if the number of symbols is not an exact multiple of the
80+
input ratio. This is because such an action normally can be solved with
81+
padding, however padding can only be used successfully on the 'smaller'
82+
side of the transformation, in any other case data corruption occurs.
83+
If this is attempted, then ValueError should be raised.
84+
"""
85+
with self.assertRaises(ValueError):
86+
encode_raw(
87+
input_base=input_base, output_base=output_base,
88+
input_ratio=input_ratio, output_ratio=output_ratio,
89+
input_data=input_data
90+
)
91+
5192
@data(
5293
# Base-85 - no padding
5394
(
@@ -81,11 +122,30 @@ def test_decode_raw(
81122

82123
self.assertEqual(output_data, expected_output_data)
83124

125+
@data(str, bool, float, bytes)
126+
def test_decode_raw_invalid_inputs(self, data_type):
127+
"""
128+
Any non-integer types (or lists of non-integers) passed to the function
129+
should raise TypeError.
130+
"""
131+
with self.assertRaises(TypeError):
132+
decode_raw(
133+
input_base=data_type(), output_base=data_type(),
134+
input_ratio=data_type(), output_ratio=data_type(),
135+
input_data=data_type()
136+
)
137+
84138
@data(
85139
# Base-85 - no padding required
86140
(256, 85, 4, 5, [99, 97, 98, 98, 97, 103, 101, 115]),
87141
# Base-85 - padding is required
88-
(256, 85, 4, 5, [43, 42, 41, 40, 39])
142+
(256, 85, 4, 5, [43, 42, 41, 40, 39]),
143+
# Base-94 to Base-256 --the 'wrong' way round, but it is valid as long
144+
# as the input data is a multiple of the input_ratio size.
145+
# Otherwise, padding-related errors occur (because padding happens on
146+
# the 'smaller' side of the transformation only).
147+
(94, 256, 10, 9, [0, 10, 20, 30, 40, 50, 60, 70, 80, 90]),
148+
(94, 256, 10, 9, [93, 88, 77, 66, 55, 44, 33, 22, 11, 0])
89149
)
90150
@unpack
91151
def test_encode_decode_raw(

0 commit comments

Comments
 (0)