Merge pull request #28 from saxbophone/josh/22-validation

saxbophone · web-flow · commit f2c9c2afb51f · 2018-10-18T22:00:57.000+01:00
Add validation to best_ratio() and encode_raw()
diff --git a/basest/core/best_ratio.py b/basest/core/best_ratio.py
@@ -17,6 +17,9 @@ def _encoding_ratio(base_from, base_to, chunk_sizes):
     """
     best_ratio = (1.0, INF)
     for s in chunk_sizes:
+        # validate each chunk size here
+        if not isinstance(s, int):
+            raise TypeError('chunk sizes must be list of ints')
         match = ceil(log(base_from ** s, base_to))
         ratio = (float(s), match)
         if (ratio[0] / ratio[1]) > (best_ratio[0] / best_ratio[1]):
@@ -30,9 +33,16 @@ def best_ratio(input_base, output_bases, chunk_sizes):
     sizes, find the most efficient encoding ratio.
     Returns the chosen output base, and the chosen encoding ratio.
     """
+    # validate input base type
+    if not isinstance(input_base, int):
+        raise TypeError('input base must be of int type')
+
     encoder = 0
     best_ratio = (1.0, INF)
     for base_to in output_bases:
+        # validate each output base here
+        if not isinstance(base_to, int):
+            raise TypeError('output bases must be list of ints')
         ratio = _encoding_ratio(input_base, base_to, chunk_sizes)
         if (
             (float(ratio[0]) / float(ratio[1])) >
diff --git a/basest/core/encode.py b/basest/core/encode.py
@@ -34,6 +34,17 @@ def encode_raw(input_base, output_base, input_ratio, output_ratio, input_data):
     input_workon = list(input_data)
     # store length of input data for future reference
     input_length = len(input_workon)
+    '''
+    Special validation: if the output base is larger than the input base, then
+    the length of the input data MUST be an exact multiple of the input ratio.
+    Otherwise, the data will be corrupted if we continue, so we will raise
+    ValueError instead.
+    '''
+    if input_base < output_base and input_length % input_ratio != 0:
+        raise ValueError(
+            'Input data length must be exact multiple of input ratio when '
+            'output base is larger than input base'
+        )
     # get nearest data length that the input data fits in
     input_nearest_length = _nearest_length(input_length, input_ratio)
     # calculate the amount of padding needed
diff --git a/tests/core/test_best_ratio.py b/tests/core/test_best_ratio.py
@@ -33,3 +33,18 @@ def test_best_ratio(self, input_base, output_bases, chunk_sizes, expected):
         self.assertEqual(
             best_ratio(input_base, output_bases, chunk_sizes), expected
         )
+
+    @data(str, float, bytes)
+    def test_invalid_inputs(self, data_type):
+        """
+        Any non-integer types (or lists of non-integers) passed to the function
+        should raise TypeError.
+        """
+        with self.assertRaises(TypeError):
+            best_ratio(data_type(), [2], [2])
+
+        with self.assertRaises(TypeError):
+            best_ratio(2, [data_type()], [2])
+
+        with self.assertRaises(TypeError):
+            best_ratio(2, [0, 1], [data_type()])
diff --git a/tests/core/test_encode_decode_raw.py b/tests/core/test_encode_decode_raw.py
@@ -48,6 +48,47 @@ def test_encode_raw(
 
         self.assertEqual(output_data, expected_output_data)
 
+    @data(str, bool, float, bytes)
+    def test_encode_raw_invalid_inputs(self, data_type):
+        """
+        Any non-integer types (or lists of non-integers) passed to the function
+        should raise TypeError.
+        """
+        with self.assertRaises(TypeError):
+            encode_raw(
+                input_base=data_type(), output_base=data_type(),
+                input_ratio=data_type(), output_ratio=data_type(),
+                input_data=data_type()
+            )
+
+    @data(
+        (94, 256, 10, 9, [1, 2, 3, 4, 5]),
+        (94, 256, 10, 9, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]),
+        (78, 256, 20, 16, [70]),
+        (78, 256, 20, 16, [0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60])
+    )
+    @unpack
+    def test_encode_raw_invalid_input_ratio(
+        self,
+        input_base, output_base,
+        input_ratio, output_ratio,
+        input_data
+    ):
+        """
+        When encoding from a smaller base to a larger one, it is impossible to
+        encode input if the number of symbols is not an exact multiple of the
+        input ratio. This is because such an action normally can be solved with
+        padding, however padding can only be used successfully on the 'smaller'
+        side of the transformation, in any other case data corruption occurs.
+        If this is attempted, then ValueError should be raised.
+        """
+        with self.assertRaises(ValueError):
+            encode_raw(
+                input_base=input_base, output_base=output_base,
+                input_ratio=input_ratio, output_ratio=output_ratio,
+                input_data=input_data
+            )
+
     @data(
         # Base-85 - no padding
         (
@@ -81,11 +122,30 @@ def test_decode_raw(
 
         self.assertEqual(output_data, expected_output_data)
 
+    @data(str, bool, float, bytes)
+    def test_decode_raw_invalid_inputs(self, data_type):
+        """
+        Any non-integer types (or lists of non-integers) passed to the function
+        should raise TypeError.
+        """
+        with self.assertRaises(TypeError):
+            decode_raw(
+                input_base=data_type(), output_base=data_type(),
+                input_ratio=data_type(), output_ratio=data_type(),
+                input_data=data_type()
+            )
+
     @data(
         # Base-85 - no padding required
         (256, 85, 4, 5, [99, 97, 98, 98, 97, 103, 101, 115]),
         # Base-85 - padding is required
-        (256, 85, 4, 5, [43, 42, 41, 40, 39])
+        (256, 85, 4, 5, [43, 42, 41, 40, 39]),
+        # Base-94 to Base-256 --the 'wrong' way round, but it is valid as long
+        # as the input data is a multiple of the input_ratio size.
+        # Otherwise, padding-related errors occur (because padding happens on
+        # the 'smaller' side of the transformation only).
+        (94, 256, 10, 9, [0, 10, 20, 30, 40, 50, 60, 70, 80, 90]),
+        (94, 256, 10, 9, [93, 88, 77, 66, 55, 44, 33, 22, 11, 0])
     )
     @unpack
     def test_encode_decode_raw(