Skip to content

Commit 1629399

Browse files
committed
Fine-tuned checksum codecs
1 parent 8903aea commit 1629399

File tree

5 files changed

+41
-113
lines changed

5 files changed

+41
-113
lines changed

src/codext/__common__.py

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# -*- coding: UTF-8 -*-
22
import _codecs
3+
import builtins
34
import codecs
45
import hashlib
56
import json
@@ -20,22 +21,6 @@
2021
from random import randint
2122
from string import *
2223
from types import FunctionType, ModuleType
23-
try: # Python2
24-
import __builtin__ as builtins
25-
except ImportError:
26-
import builtins
27-
try: # Python2
28-
from inspect import getfullargspec
29-
except ImportError:
30-
from inspect import getargspec as getfullargspec
31-
try: # Python2
32-
from string import maketrans
33-
except ImportError:
34-
maketrans = str.maketrans
35-
try: # Python3
36-
from importlib import reload
37-
except ImportError:
38-
pass
3924
try:
4025
import re._parser as sre_parse
4126
except ImportError:
@@ -44,6 +29,8 @@
4429
# from Python 3.11, 'sre_parse' is bound as '_parser' ; monkey-patch it for backward-compatibility
4530
re.sre_parse = sre_parse
4631

32+
maketrans = str.maketrans
33+
4734

4835
__all__ = ["add", "add_macro", "add_map", "b", "clear", "codecs", "decode", "encode", "ensure_str", "examples", "guess",
4936
"isb", "generate_strings_from_regex", "get_alphabet_from_mask", "handle_error", "hashlib", "i2s",
@@ -277,6 +264,7 @@ def getregentry(encoding):
277264
# this occurs while m is not None, but possibly no capture group that gives at least 1 group index ;
278265
# in this case, if fenc/fdec is a decorated function, execute it with no arg
279266
if len(args) == 0:
267+
from inspect import getfullargspec
280268
if fenc and len(getfullargspec(fenc).args) == 1:
281269
fenc = fenc()
282270
if fdec and len(getfullargspec(fdec).args) == 1:
@@ -767,6 +755,7 @@ def remove(name):
767755

768756
def reset():
769757
""" Reset codext's local registry of search functions and macros. """
758+
from importlib import reload
770759
global __codecs_registry, CODECS_REGISTRY, MACROS, PERS_MACROS # noqa: F824
771760
clear()
772761
d = os.path.dirname(__file__)
@@ -1142,9 +1131,8 @@ def generate_string_from_regex(regex):
11421131

11431132
def generate_strings_from_regex(regex, star_plus_max=STAR_PLUS_MAX, repeat_max=REPEAT_MAX, yield_max=YIELD_MAX):
11441133
""" Utility function to generate strings from a regex pattern. """
1145-
i = 0
1146-
for result in __gen_str_from_re(regex, star_plus_max, repeat_max, yield_max):
1147-
yield result
1134+
for r in __gen_str_from_re(regex, star_plus_max, repeat_max, yield_max):
1135+
yield r
11481136

11491137

11501138
# guess feature objects

src/codext/checksums/adler.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
44
This is a codec for computing checksums, for use with other codecs in encoding chains.
55
6-
These codecs:
7-
- transform strings from str to str
8-
- transform strings from bytes to bytes
9-
- transform file content from str to bytes (write)
6+
This codec:
7+
- transforms strings from str to str
8+
- transforms strings from bytes to bytes
9+
- transforms file content from str to bytes (write)
1010
"""
1111
from zlib import adler32
1212

src/codext/checksums/crc.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,10 @@
33
44
This is a codec for computing checksums, for use with other codecs in encoding chains.
55
6-
These codecs:
7-
- transform strings from str to str
8-
- transform strings from bytes to bytes
9-
- transform file content from str to bytes (write)
6+
This codec:
7+
- transforms strings from str to str
8+
- transforms strings from bytes to bytes
9+
- transforms file content from str to bytes (write)
1010
"""
1111
from ..__common__ import add
1212

@@ -212,7 +212,7 @@
212212
},
213213
}
214214

215-
_pattern = lambda n="": r"^crc" + str(n) + r"(|[-_]?(?:%s))$" % "|".join(x for x in CRC[n].keys() if len(x) > 0)
215+
_pattern = lambda n="": rf"^crc(?:[-_]?){n}(|[-_]?(?:{'|'.join(x for x in CRC[n].keys() if len(x) > 0)}))$"
216216
_rev_int = lambda i, l=None: int(bin(i)[2:].zfill(l or len(bin(i)[2:]))[::-1], 2)
217217

218218

src/codext/checksums/luhn.py

Lines changed: 14 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,96 +1,33 @@
11
# -*- coding: UTF-8 -*-
22
"""Luhn Codec - Luhn Mod N checksum algorithm.
33
4-
The Luhn algorithm, also known as the "modulus 10" algorithm, is a simple checksum
5-
formula used to validate identification numbers (e.g. credit card numbers, IMEI
6-
numbers). Encoding appends a check character; decoding verifies the check character
7-
and strips it.
8-
9-
The Luhn Mod N generalization extends the algorithm to alphabets of arbitrary size N.
10-
When called as 'luhn' or 'luhn-10', the standard decimal alphabet (0-9, N=10) is
11-
used. When called as 'luhn-<N>' for 2 ≤ N ≤ 36, the first N characters of
12-
'0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ' form the alphabet.
4+
This is a codec for computing checksums, for use with other codecs in encoding chains.
135
146
This codec:
15-
- en/decodes strings from str to str
16-
- en/decodes strings from bytes to bytes
17-
- decodes file content to str (read)
18-
- encodes file content from str to bytes (write)
19-
20-
Reference: https://en.wikipedia.org/wiki/Luhn_algorithm
21-
https://bitcoinwiki.org/wiki/luhn-mod-n-algorithm
7+
- transforms strings from str to str
8+
- transforms strings from bytes to bytes
9+
- transforms file content from str to bytes (write)
2210
"""
2311
from ..__common__ import *
2412

2513

26-
__examples__ = {
27-
'enc(luhn|luhn-10|luhn10)': {
28-
'7992739871': '79927398713',
29-
'': '',
30-
'0': '00',
31-
'1': '18',
32-
},
33-
'dec(luhn|luhn-10|luhn10)': {
34-
'79927398713': '7992739871',
35-
'': '',
36-
'00': '0',
37-
'18': '1',
38-
},
39-
'enc-dec(luhn)': ['123456789', '0' * 10, '9999999999999999'],
40-
'enc-dec(luhn-16)': ['0123456789ABCDEF', 'DEADBEEF'],
41-
'enc-dec(luhn-36)': ['HELLO', 'WORLD123'],
42-
}
43-
44-
_FULL_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
45-
46-
47-
def _luhn_encode(n=""):
48-
mod = n if isinstance(n, int) else 10
49-
alphabet = _FULL_ALPHABET[:mod]
50-
51-
def _encode(text, errors="strict"):
52-
text = ensure_str(text).upper() if mod > 10 else ensure_str(text)
53-
if not text:
14+
def luhn(n=""):
15+
alphabet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"[:(mod := n if isinstance(n, int) else 10)]
16+
def encode(data, errors="strict"):
17+
total, data = 0, "".join(c if c in alphabet else handle_error("luhn", errors, kind="character")(c, i, data) \
18+
for i, c in enumerate(data))
19+
if not (data := ensure_str(data).upper() if mod > 10 else ensure_str(data)):
5420
return "", 0
55-
for pos, c in enumerate(text):
56-
if c not in alphabet:
57-
handle_error("luhn", errors, kind="character")(c, pos, text)
58-
total = 0
59-
for i, c in enumerate(reversed(text)):
21+
for i, c in enumerate(reversed(data)):
6022
code = alphabet.index(c)
6123
if i % 2 == 0:
6224
d = code * 2
6325
code = d % mod + d // mod
6426
total += code
6527
check = (mod - total % mod) % mod
66-
return text + alphabet[check], len(b(text))
67-
68-
return _encode
69-
70-
71-
def _luhn_decode(n=""):
72-
mod = n if isinstance(n, int) else 10
73-
alphabet = _FULL_ALPHABET[:mod]
74-
75-
def _decode(text, errors="strict"):
76-
text = ensure_str(text).upper() if mod > 10 else ensure_str(text)
77-
if not text:
78-
return "", 0
79-
for pos, c in enumerate(text):
80-
if c not in alphabet:
81-
handle_error("luhn", errors, decode=True, kind="character")(c, pos, text)
82-
total = 0
83-
for i, c in enumerate(reversed(text)):
84-
code = alphabet.index(c)
85-
if i % 2 == 1:
86-
d = code * 2
87-
code = d % mod + d // mod
88-
total += code
89-
if total % mod != 0:
90-
handle_error("luhn", errors, decode=True)(text[-1], len(text) - 1, text[:-1])
91-
return text[:-1], len(b(text))
28+
return alphabet[check], len(b(data))
29+
return encode
9230

93-
return _decode
9431

32+
add("luhn", luhn, pattern=r"^luhn[-_]?(\d{1,2})?$", guess=None)
9533

96-
add("luhn", _luhn_encode, _luhn_decode, pattern=r"^luhn[-_]?(\d{1,2})?$", guess=None)

tests/test_manual.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,17 @@ def test_codec_case_related_manips(self):
9696
self.assertRaises(NotImplementedError, codecs.decode, STR, "slug")
9797
self.assertRaises(NotImplementedError, codecs.decode, STR, "snake")
9898

99+
def test_codec_checksum_functions(self):
100+
from codext.checksums.crc import CRC
101+
for n, variants in CRC.items():
102+
for name, params in variants.items():
103+
enc = ("crc%d-%s" % (n, name) if isinstance(n, int) else "crc-%s" % name).rstrip("-")
104+
self.assertEqual(codecs.encode("123456789", enc), "%0{}x".format(round((n or 16)/4+.5)) % params[5])
105+
from codext.checksums.luhn import luhn
106+
for s, r in [("", ""), ("0", "0"), ("1", "8"), ("7992739871", "3")]:
107+
self.assertEqual(codecs.encode(s, "luhn"), r)
108+
self.assertEqual(codecs.encode("-", "luhn", errors="ignore"), "")
109+
99110
def test_codec_dummy_str_manips(self):
100111
STR = "this is a test"
101112
self.assertEqual(codecs.decode(STR, "reverse"), "tset a si siht")
@@ -109,7 +120,6 @@ def test_codec_dummy_str_manips(self):
109120
self.assertRaises(LookupError, codecs.encode, STR, "tokenize-200")
110121

111122
def test_codec_hash_functions(self):
112-
from codext.checksums.crc import CRC
113123
STR = b"This is a test string!"
114124
for h in ["adler32", "md2", "md5", "sha1", "sha224", "sha256", "sha384", "sha512"]:
115125
self.assertIsNotNone(codecs.encode(STR, h))
@@ -145,13 +155,6 @@ def test_codec_hash_functions(self):
145155
h = "crypt-" + m
146156
self.assertIsNotNone(codecs.encode(STR, h))
147157
self.assertRaises(NotImplementedError, codecs.decode, STR, h)
148-
# CRC checks
149-
STR = "123456789"
150-
for n, variants in CRC.items():
151-
for name, params in variants.items():
152-
enc = ("crc%d-%s" % (n, name) if isinstance(n, int) else "crc-%s" % name).rstrip("-")
153-
print(enc)
154-
self.assertEqual(codecs.encode(STR, enc), "%0{}x".format(round((n or 16)/4+.5)) % params[5])
155158

156159
def test_codec_markdown(self):
157160
HTM = "<h1>Test title</h1>\n\n<p>Test paragraph</p>\n"

0 commit comments

Comments
 (0)