saxbophone
diff --git a/‎.travis.yml‎
Lines changed: 13 additions & 0 deletions b/‎.travis.yml‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎LICENSE‎
Lines changed: 373 additions & 1 deletion b/‎LICENSE‎
Lines changed: 373 additions & 1 deletion
diff --git a/‎Makefile‎
Lines changed: 5 additions & 2 deletions b/‎Makefile‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 80 additions & 78 deletions b/‎README.md‎
Lines changed: 80 additions & 78 deletions
diff --git a/‎basest/__init__.py‎
Lines changed: 9 additions & 3 deletions b/‎basest/__init__.py‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎basest/core/__init__.py‎
Lines changed: 7 additions & 1 deletion b/‎basest/core/__init__.py‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎basest/core/best_ratio.py‎
Lines changed: 45 additions & 3 deletions b/‎basest/core/best_ratio.py‎
Lines changed: 45 additions & 3 deletions
@@ -0,0 +1,13 @@
+language: python
+python:
+  - "2.7"
+  - "3.3"
+  - "3.4"
+  - "3.5"
+  - "3.6"
+  - "pypy3.5"
+install:
+  - make install-deps
+script:
+  - make
+  - make stress-test
@@ -13,9 +13,9 @@ clean:
 	rm -rf basest.egg-info build dist
 
 lint:
-	flake8 basest tests setup.py
+	flake8 basest tests setup.py stress_test.py
 	isort -rc -c basest tests
-	isort -c setup.py
+	isort -c setup.py stress_test.py
 
 test:
 	coverage run --source='basest' tests/__main__.py
@@ -25,5 +25,8 @@ cover:
 
 tests: clean lint test cover
 
+stress-test:
+	python stress_test.py
+
 package:
 	python setup.py sdist bdist_wheel
@@ -12,6 +12,8 @@ It is also not just 8-bit binary data that could be serialised. Any collection o
 
 This library is my implementation of a generic, base-to-base converter which addresses this last point. An encoder and decoder for every binary-to-text format currently existing can be created and used with this library, requiring only for the details of the desired format to be given. Due to its flexibility, the library also makes it trivial to invent new wonderful and interesting base-to-base serialisation/conversion formats (I myself plan to work on and release one that translates binary files into a purely emoji-based format!).
 
+One limitation of the library is that it cannot encode data from a smaller input base to a larger output base with padding on the input (i.e. if you're encoding from base 2 to base 1000, you need to ensure that the number of input symbols exactly matches the encoding ratio you're using). This is an accepted limitation due to the complexities of implementing a padding system that works in the same manner as base-64 and others but which can be extended to any arbitrary base.
+
 So, I hope you find this library fun, useful or both!
 
 ## Installation
@@ -43,20 +45,20 @@ There is a functional interface and a class-based interface (the class-based one
 To use the class-based interface, you will need to create a subclass of `basest.encoders.Encoder` and override attributes of the class, as shown below (using base64 as an example):
 
 ```py
->>> from basest.encoders import Encoder
->>> 
->>> class CustomEncoder(Encoder):
-...     input_base = 256
-...     output_base = 64
-...     input_ratio = 3
-...     output_ratio = 4
-...     # these attributes are only required if using decode() and encode()
-...     input_symbol_table = [chr(c) for c in range(256)]
-...     output_symbol_table = [
-...         s for s in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
-...     ]
-...     padding_symbol = '='
->>> 
+from basest.encoders import Encoder
+
+class CustomEncoder(Encoder):
+    input_base = 256
+    output_base = 64
+    input_ratio = 3
+    output_ratio = 4
+    # these attributes are only required if using decode() and encode()
+    input_symbol_table = [chr(c) for c in range(256)]
+    output_symbol_table = [
+        s for s in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
+    ]
+    padding_symbol = '='
+
 ```
 
 > **Note:** You must subclass `Encoder`, you cannot use it directly!
@@ -67,36 +69,36 @@ Subclasses of `Encoder` have the following public methods available:
 `encode()` will encode an iterable of symbols in the class' **input symbol table** into an iterable of symbols in the class' **output symbol table**, observing the chosen encoding ratios and padding symbol.
 
 ```py
->>> encoder = CustomEncoder()
->>> encoder.encode(['c', 'a', 'b', 'b', 'a', 'g', 'e', 's'])
-['Y', '2', 'F', 'i', 'Y', 'm', 'F', 'n', 'Z', 'X', 'M', '=']
+encoder = CustomEncoder()
+encoder.encode(['c', 'a', 'b', 'b', 'a', 'g', 'e', 's'])
+# -> ['Y', '2', 'F', 'i', 'Y', 'm', 'F', 'n', 'Z', 'X', 'M', '=']
 ```
 
 #### Encode Raw
 `encode_raw()` works just like `encode()`, except that symbols are not interpreted. Instead, plain integers within range 0->(base - 1) should be used. the value of the base is used as the padding symbol.
 
 ```py
->>> encoder = CustomEncoder()
->>> encoder.encode_raw([1, 2, 3, 4, 5, 6, 7])
-[0, 16, 8, 3, 1, 0, 20, 6, 1, 48, 64, 64]
+encoder = CustomEncoder()
+encoder.encode_raw([1, 2, 3, 4, 5, 6, 7])
+# -> [0, 16, 8, 3, 1, 0, 20, 6, 1, 48, 64, 64]
 ```
 
 #### Decode from one base to another
 `decode()` works in the exact same way as `encode()`, but in the inverse.
 
 ```py
->>> encoder = CustomEncoder()
->>> encoder.decode(['Y', '2', 'F', 'i', 'Y', 'm', 'F', 'n', 'Z', 'X', 'M', '='])
-['c', 'a', 'b', 'b', 'a', 'g', 'e', 's']
+encoder = CustomEncoder()
+encoder.decode(['Y', '2', 'F', 'i', 'Y', 'm', 'F', 'n', 'Z', 'X', 'M', '='])
+# -> ['c', 'a', 'b', 'b', 'a', 'g', 'e', 's']
 ```
 
 #### Decode Raw
 `decode_raw()` works just like `decode()`, except that symbols are not interpreted. Instead, plain integers within range 0->(base - 1) should be used. the value of the base is used as the padding symbol.
 
 ```py
->>> encoder = CustomEncoder()
->>> encoder.decode_raw([0, 16, 8, 3, 1, 0, 20, 6, 1, 48, 64, 64])
-[1, 2, 3, 4, 5, 6, 7]
+encoder = CustomEncoder()
+encoder.decode_raw([0, 16, 8, 3, 1, 0, 20, 6, 1, 48, 64, 64])
+# -> [1, 2, 3, 4, 5, 6, 7]
 ```
 
 ### Functional Interface
@@ -107,33 +109,33 @@ Return the input data, encoded into the specified base using the specified encod
 Returns the output data as a list of items that are guaranteed to be in the **output symbol table**, or the **output padding** symbol.
 
 ```py
->>> import basest
->>>
->>> basest.core.encode(
-...     input_base=256,
-...     input_symbol_table=[chr(c) for c in range(256)],
-...     output_base=64,
-...     output_symbol_table=[
-...         s for s in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
-...     ],
-...     output_padding='=', input_ratio=3, output_ratio=4,
-...     input_data='falafel'
-... )
-['Z', 'm', 'F', 's', 'Y', 'W', 'Z', 'l', 'b', 'A', '=', '=']
+import basest
+
+basest.core.encode(
+    input_base=256,
+    input_symbol_table=[chr(c) for c in range(256)],
+    output_base=64,
+    output_symbol_table=[
+        s for s in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
+    ],
+    output_padding='=', input_ratio=3, output_ratio=4,
+    input_data='falafel'
+)
+# -> ['Z', 'm', 'F', 's', 'Y', 'W', 'Z', 'l', 'b', 'A', '=', '=']
 ```
 
 #### Encode Raw
 Similar to the function above, `basest.core.encode_raw` will encode one base into another, but only accepts and returns arrays of integers (e.g. bytes would be passed as integers between 0-255, not as `byte` objects). As such, it omits the **padding** and **symbol table** arguments, but is otherwise identical in function and form to `encode`.
 
 ```py
->>> import basest
->>>
->>> basest.core.encode_raw(
-...     input_base=256, output_base=85,
-...     input_ratio=4, output_ratio=5,
-...     input_data=[99, 97, 98, 98, 97, 103, 101, 115]
-... )
-[31, 79, 81, 71, 52, 31, 25, 82, 13, 76]
+import basest
+
+basest.core.encode_raw(
+    input_base=256, output_base=85,
+    input_ratio=4, output_ratio=5,
+    input_data=[99, 97, 98, 98, 97, 103, 101, 115]
+)
+# -> [31, 79, 81, 71, 52, 31, 25, 82, 13, 76]
 ```
 
 #### Decode from one encoded base to another.
@@ -143,33 +145,33 @@ Returns the output data as a list of items that are guaranteed to be in the **ou
 > This is essentially the inverse of `encode()`
 
 ```py
->>> import basest
->>>
->>> basest.core.decode(
-...     input_base=64,
-...     input_symbol_table=[
-...         s for s in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
-...     ],
-...     input_padding='=',
-...     output_base=256, output_symbol_table=[chr(c) for c in range(256)],
-...     input_ratio=4, output_ratio=3,
-...     input_data='YWJhY3VzIFpaWg=='
-... )
-['a', 'b', 'a', 'c', 'u', 's', ' ', 'Z', 'Z', 'Z']
+import basest
+
+basest.core.decode(
+    input_base=64,
+    input_symbol_table=[
+        s for s in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
+    ],
+    input_padding='=',
+    output_base=256, output_symbol_table=[chr(c) for c in range(256)],
+    input_ratio=4, output_ratio=3,
+    input_data='YWJhY3VzIFpaWg=='
+)
+# -> ['a', 'b', 'a', 'c', 'u', 's', ' ', 'Z', 'Z', 'Z']
 ```
 
 #### Decode Raw
 Similar to the function above, `basest.core.decode_raw` will decode from one base to another, but only accepts and returns arrays of integers (e.g. base64 would be passed as integers between 0-65 (65 is for the padding symbol), not as `str` objects). As such, it omits the **padding** and **symbol table** arguments, but is otherwise identical in function and form to `decode`.
 
 ```py
->>> import basest
->>>
->>> basest.core.decode_raw(
-...     input_base=85, output_base=256,
-...     input_ratio=5, output_ratio=4,
-...     input_data=[31, 79, 81, 71, 52, 31, 25, 82, 13, 76]
-... )
-[99, 97, 98, 98, 97, 103, 101, 115]
+import basest
+
+basest.core.decode_raw(
+    input_base=85, output_base=256,
+    input_ratio=5, output_ratio=4,
+    input_data=[31, 79, 81, 71, 52, 31, 25, 82, 13, 76]
+)
+# -> [99, 97, 98, 98, 97, 103, 101, 115]
 ```
 
 #### Finding the best encoding ratio from one base to any base within a given range
@@ -178,14 +180,14 @@ For a given **input base** (e.g. base-256 / 8-bit Bytes), a given desired **outp
 Returns tuples containing an integer as the first item (representing the output base that is most efficient) and a tuple as the second, containing two integers representing the ratio of **input base** symbols to **output base** symbols.
 
 ```py
->>> import basest
->>>
->>> basest.core.best_ratio(input_base=256, output_bases=[94], chunk_sizes=range(1, 256))
-(94, (68, 83))
->>> basest.core.best_ratio(input_base=256, output_bases=[94], chunk_sizes=range(1, 512))
-(94, (458, 559))
->>> basest.core.best_ratio(input_base=256, output_bases=range(2, 95), chunk_sizes=range(1, 256))
-(94, (68, 83))
->>> basest.core.best_ratio(input_base=256, output_bases=range(2, 334), chunk_sizes=range(1, 256))
-(333, (243, 232))
+import basest
+
+basest.core.best_ratio(input_base=256, output_bases=[94], chunk_sizes=range(1, 256))
+# -> (94, (68, 83))
+basest.core.best_ratio(input_base=256, output_bases=[94], chunk_sizes=range(1, 512))
+# -> (94, (458, 559))
+basest.core.best_ratio(input_base=256, output_bases=range(2, 95), chunk_sizes=range(1, 256))
+# -> (94, (68, 83))
+basest.core.best_ratio(input_base=256, output_bases=range(2, 334), chunk_sizes=range(1, 256))
+# -> (333, (243, 232))
 ```
@@ -1,10 +1,16 @@
-#!/usr/bin/python
 # -*- coding: utf-8 -*-
+#
+# Copyright (C) 2016, 2018, Joshua Saxby <joshua.a.saxby@gmail.com>
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
 from __future__ import (
     absolute_import, division, print_function, unicode_literals
 )
 
-from . import core, encoders
+from . import core, encoders, exceptions
 
 
-__all__ = ['core', 'encoders']
+__all__ = ['core', 'encoders', 'exceptions']
@@ -1,5 +1,11 @@
-#!/usr/bin/python
 # -*- coding: utf-8 -*-
+#
+# Copyright (C) 2016, 2018, Joshua Saxby <joshua.a.saxby@gmail.com>
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
 from __future__ import (
     absolute_import, division, print_function, unicode_literals
 )
 
@@ -1,25 +1,55 @@
-#!/usr/bin/python
 # -*- coding: utf-8 -*-
+#
+# Copyright (C) 2016, 2018, Joshua Saxby <joshua.a.saxby@gmail.com>
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
 from __future__ import (
     absolute_import, division, print_function, unicode_literals
 )
 
 from math import ceil, log
 
 
-INF = float('infinity')
+# an easy way to store positive infinity in a manner compatible with Python 2.x
+INF = float('inf')
 
 
 def _encoding_ratio(base_from, base_to, chunk_sizes):
     """
     An algorithm for finding the most efficient encoding ratio
     from one base to another within a range limit.
     """
+    # a ratio of 1:Infinity is the theoretical worst possible ratio
     best_ratio = (1.0, INF)
     for s in chunk_sizes:
-        match = ceil(log(base_from ** s, base_to))
+        # validate each chunk size here
+        if not isinstance(s, int):
+            raise TypeError('chunk sizes must be list of ints')
+        '''
+        We need to work out how many digits in the output base are needed to
+        represent a number s digits long in the input base.
+
+        The number of values represented by an s digit long number in the input
+        base is `base_from ** s`
+
+        The number of digits in base x needed to represent n values is
+        `ceil(logx(n))`
+
+        Altogether this is `ceil(logx(base_from ** s))`
+
+        This can be simplified using the law `n log(x) = log(x ** n)`
+
+        To become the following:
+        '''
+        match = ceil(s * log(base_from, base_to))
+        # the efficiency ratio is input:output
         ratio = (float(s), match)
+        # ratio efficiences can be compared by dividing them like fractions
         if (ratio[0] / ratio[1]) > (best_ratio[0] / best_ratio[1]):
+            # this is the new best ratio found so far
             best_ratio = ratio
     return (int(best_ratio[0]), int(best_ratio[1]))
 
@@ -30,14 +60,26 @@ def best_ratio(input_base, output_bases, chunk_sizes):
     sizes, find the most efficient encoding ratio.
     Returns the chosen output base, and the chosen encoding ratio.
     """
+    # validate input base type
+    if not isinstance(input_base, int):
+        raise TypeError('input base must be of int type')
+
+    # we will store the most efficient output base here
     encoder = 0
+    # a ratio of 1:Infinity is the theoretical worst possible ratio
     best_ratio = (1.0, INF)
     for base_to in output_bases:
+        # validate each output base here
+        if not isinstance(base_to, int):
+            raise TypeError('output bases must be list of ints')
+        # get the best encoding ratio for this base out of all chunk sizes
         ratio = _encoding_ratio(input_base, base_to, chunk_sizes)
+        # if it's more efficient, then set it as the most efficient one yet
         if (
             (float(ratio[0]) / float(ratio[1])) >
             (float(best_ratio[0]) / float(best_ratio[1]))
         ):
             best_ratio = ratio
             encoder = base_to
+    # we now have the best output base and ratio for it
     return encoder, (int(best_ratio[0]), int(best_ratio[1]))