diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml new file mode 100644 index 0000000..3a34bfa --- /dev/null +++ b/.github/workflows/ruff.yml @@ -0,0 +1,15 @@ +name: Ruff +on: [workflow_dispatch, pull_request] +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Python + uses: actions/setup-python@v5 + with: + python-version: "3.13" + - uses: astral-sh/ruff-action@v3 + with: + args: "check --fix" + continue-on-error: false \ No newline at end of file diff --git a/probables/__init__.py b/probables/__init__.py index c3173b1..532cb8e 100644 --- a/probables/__init__.py +++ b/probables/__init__.py @@ -1,6 +1,4 @@ -""" pyprobables module """ - -from typing import List +"""pyprobables module""" from probables.blooms import ( BloomFilter, @@ -9,13 +7,7 @@ ExpandingBloomFilter, RotatingBloomFilter, ) -from probables.countminsketch import ( - CountMeanMinSketch, - CountMeanSketch, - CountMinSketch, - HeavyHitters, - StreamThreshold, -) +from probables.countminsketch import CountMeanMinSketch, CountMeanSketch, CountMinSketch, HeavyHitters, StreamThreshold from probables.cuckoo import CountingCuckooFilter, CuckooFilter from probables.exceptions import ( CuckooFilterFullError, diff --git a/probables/blooms/__init__.py b/probables/blooms/__init__.py index a66cbd2..02b8787 100644 --- a/probables/blooms/__init__.py +++ b/probables/blooms/__init__.py @@ -1,4 +1,4 @@ -""" Bloom Filters """ +"""Bloom Filters""" from probables.blooms.bloom import BloomFilter, BloomFilterOnDisk from probables.blooms.countingbloom import CountingBloomFilter diff --git a/probables/blooms/bloom.py b/probables/blooms/bloom.py index c4c30e5..888f895 100644 --- a/probables/blooms/bloom.py +++ b/probables/blooms/bloom.py @@ -1,14 +1,15 @@ -""" BloomFilter and BloomFiter on Disk, python implementation - License: MIT - Author: Tyler Barrus (barrust@gmail.com) - URL: https://github.com/barrust/bloom +"""BloomFilter and BloomFiter on Disk, python implementation +License: MIT +Author: Tyler Barrus (barrust@gmail.com) +URL: https://github.com/barrust/bloom """ + import math import os from array import array from binascii import hexlify, unhexlify from collections.abc import ByteString -from io import BytesIO, IOBase +from io import BufferedRandom, BytesIO, IOBase from mmap import mmap from numbers import Number from pathlib import Path @@ -308,10 +309,7 @@ def export_c_header(self, filename: Union[str, Path]) -> None: Args: filename (str): The filename to which the Bloom Filter will be written.""" data = (" " + line for line in wrap(", ".join(f"0x{e:02x}" for e in bytearray.fromhex(self.export_hex())), 80)) - if self._type in ["regular", "regular-on-disk"]: - bloom_type = "standard BloomFilter" - else: - bloom_type = "CountingBloomFilter" + bloom_type = "standard BloomFilter" if self._type in ["regular", "regular-on-disk"] else "CountingBloomFilter" with open(filename, "w", encoding="utf-8") as file: print(f"/* BloomFilter Export of a {bloom_type} */", file=file) @@ -570,9 +568,7 @@ def _verify_bloom_similarity(self, second: SimpleBloomT) -> bool: hash_match = self.number_hashes != second.number_hashes same_bits = self.number_bits != second.number_bits next_hash = self.hashes("test") != second.hashes("test") - if hash_match or same_bits or next_hash: - return False - return True + return not (hash_match or same_bits or next_hash) class BloomFilterOnDisk(BloomFilter): @@ -609,7 +605,7 @@ def __init__( ) -> None: # set some things up self._filepath = resolve_path(filepath) - self.__file_pointer = None + self.__file_pointer: Union[BufferedRandom, None] = None super().__init__(est_elements, false_positive_rate, filepath, hex_string, hash_function) def _load_init(self, filepath, hash_function, hex_string, est_elements, false_positive_rate): @@ -644,7 +640,7 @@ def close(self) -> None: """Clean up the BloomFilterOnDisk object""" if self.__file_pointer is not None and not self.__file_pointer.closed: self.__update() - self._bloom.close() + self._bloom.close() # type: ignore self.__file_pointer.close() self.__file_pointer = None @@ -673,7 +669,7 @@ def _load(self, file: Union[str, Path], hash_function: Union[HashFuncT, None] = fpr, n_hashes, n_bits = self._get_optimized_params(est_els, fpr) self._set_values(est_els, fpr, n_hashes, n_bits, hash_function) # setup a few additional items - self.__file_pointer = open(file, "r+b") # type: ignore + self.__file_pointer = open(file, "r+b") # noqa: SIM115 self._bloom = mmap(self.__file_pointer.fileno(), 0) # type: ignore self._on_disk = True diff --git a/probables/blooms/countingbloom.py b/probables/blooms/countingbloom.py index aa9a478..99015cb 100644 --- a/probables/blooms/countingbloom.py +++ b/probables/blooms/countingbloom.py @@ -1,8 +1,9 @@ -""" CountingBloomFilter, python implementation - License: MIT - Author: Tyler Barrus (barrust@gmail.com) - URL: https://github.com/barrust/counting_bloom +"""CountingBloomFilter, python implementation +License: MIT +Author: Tyler Barrus (barrust@gmail.com) +URL: https://github.com/barrust/counting_bloom """ + from array import array from collections.abc import ByteString from pathlib import Path diff --git a/probables/blooms/expandingbloom.py b/probables/blooms/expandingbloom.py index 2a89a73..ef9e765 100644 --- a/probables/blooms/expandingbloom.py +++ b/probables/blooms/expandingbloom.py @@ -1,7 +1,7 @@ -""" Expanding and Rotating BloomFilter, python implementations - License: MIT - Author: Tyler Barrus (barrust@gmail.com) - URL: https://github.com/barrust/pyprobables +"""Expanding and Rotating BloomFilter, python implementations +License: MIT +Author: Tyler Barrus (barrust@gmail.com) +URL: https://github.com/barrust/pyprobables """ from array import array @@ -145,10 +145,7 @@ def check_alt(self, hashes: HashResultsT) -> bool: hashes (list): The hash representation to check for in the Bloom Filter Returns: bool: `True` if the element is likely present; `False` if definately not present""" - for blm in self._blooms: - if blm.check_alt(hashes): - return True - return False + return any(blm.check_alt(hashes) for blm in self._blooms) def add(self, key: KeyT, force: bool = False) -> None: """Add the key to the Bloom Filter diff --git a/probables/constants.py b/probables/constants.py index 96cf5e5..41c7719 100644 --- a/probables/constants.py +++ b/probables/constants.py @@ -1,4 +1,5 @@ -""" Project Constants (or basic numerical constants...) """ +"""Project Constants (or basic numerical constants...)""" + INT32_T_MIN = -2147483648 INT32_T_MAX = 2147483647 INT64_T_MIN = -9223372036854775808 diff --git a/probables/countminsketch/__init__.py b/probables/countminsketch/__init__.py index e7c08c1..03cff2c 100644 --- a/probables/countminsketch/__init__.py +++ b/probables/countminsketch/__init__.py @@ -1,4 +1,4 @@ -""" Count-Min Sketchs """ +"""Count-Min Sketchs""" from probables.countminsketch.countminsketch import ( CountMeanMinSketch, diff --git a/probables/countminsketch/countminsketch.py b/probables/countminsketch/countminsketch.py index 9fa12fa..98a5611 100644 --- a/probables/countminsketch/countminsketch.py +++ b/probables/countminsketch/countminsketch.py @@ -1,7 +1,7 @@ -""" Count-Min Sketch, Heavy Hitters, and Stream Threshold, python implementations - License: MIT - Author: Tyler Barrus (barrust@gmail.com) - URL: https://github.com/barrust/count-min-sketch +"""Count-Min Sketch, Heavy Hitters, and Stream Threshold, python implementations +License: MIT +Author: Tyler Barrus (barrust@gmail.com) +URL: https://github.com/barrust/count-min-sketch """ import math diff --git a/probables/cuckoo/__init__.py b/probables/cuckoo/__init__.py index cf6f548..9918484 100644 --- a/probables/cuckoo/__init__.py +++ b/probables/cuckoo/__init__.py @@ -1,4 +1,4 @@ -""" Cuckoo Filters """ +"""Cuckoo Filters""" from probables.cuckoo.countingcuckoo import CountingCuckooFilter from probables.cuckoo.cuckoo import CuckooFilter diff --git a/probables/cuckoo/countingcuckoo.py b/probables/cuckoo/countingcuckoo.py index 471fa75..e073757 100644 --- a/probables/cuckoo/countingcuckoo.py +++ b/probables/cuckoo/countingcuckoo.py @@ -1,6 +1,6 @@ -""" Counting Cuckoo Filter, python implementation - License: MIT - Author: Tyler Barrus (barrust@gmail.com) +"""Counting Cuckoo Filter, python implementation +License: MIT +Author: Tyler Barrus (barrust@gmail.com) """ import random @@ -136,9 +136,7 @@ def frombytes( def __contains__(self, val: KeyT) -> bool: """setup the `in` keyword""" - if self.check(val) > 0: - return True - return False + return self.check(val) > 0 @property def unique_elements(self) -> int: diff --git a/probables/cuckoo/cuckoo.py b/probables/cuckoo/cuckoo.py index e7a4749..cbc8136 100644 --- a/probables/cuckoo/cuckoo.py +++ b/probables/cuckoo/cuckoo.py @@ -1,6 +1,6 @@ -""" Cuckoo Filter, python implementation - License: MIT - Author: Tyler Barrus (barrust@gmail.com) +"""Cuckoo Filter, python implementation +License: MIT +Author: Tyler Barrus (barrust@gmail.com) """ import math @@ -313,9 +313,7 @@ def check(self, key: KeyT) -> bool: bool: True if likely present, False if definately not""" idx_1, idx_2, fingerprint = self._generate_fingerprint_info(key) is_present = self._check_if_present(idx_1, idx_2, fingerprint) - if is_present is not None: - return True - return False + return is_present is not None def remove(self, key: KeyT) -> bool: """Remove an element from the filter diff --git a/probables/exceptions.py b/probables/exceptions.py index b76eb37..b0e3bf1 100644 --- a/probables/exceptions.py +++ b/probables/exceptions.py @@ -1,4 +1,4 @@ -""" PyProbables Exceptions """ +"""PyProbables Exceptions""" class ProbablesBaseException(Exception): diff --git a/probables/hashes.py b/probables/hashes.py index b380696..8875869 100644 --- a/probables/hashes.py +++ b/probables/hashes.py @@ -1,4 +1,4 @@ -""" Probables Hashing Utilities """ +"""Probables Hashing Utilities""" from functools import wraps from hashlib import md5, sha256 diff --git a/probables/quotientfilter/__init__.py b/probables/quotientfilter/__init__.py index 72edc3a..ada749a 100644 --- a/probables/quotientfilter/__init__.py +++ b/probables/quotientfilter/__init__.py @@ -1,5 +1,4 @@ -""" Quotient Filters """ - +"""Quotient Filters""" from probables.quotientfilter.quotientfilter import QuotientFilter diff --git a/probables/quotientfilter/quotientfilter.py b/probables/quotientfilter/quotientfilter.py index b4635a9..d8e1113 100644 --- a/probables/quotientfilter/quotientfilter.py +++ b/probables/quotientfilter/quotientfilter.py @@ -1,6 +1,6 @@ -""" Quotient Filter, python implementation - License: MIT - Author: Tyler Barrus (barrust@gmail.com) +"""Quotient Filter, python implementation +License: MIT +Author: Tyler Barrus (barrust@gmail.com) """ import sys @@ -496,9 +496,7 @@ def _is_run_start(self, elt: int) -> bool: def _is_run_or_cluster_start(self, elt: int) -> bool: if self._is_cluster_start(elt): return True - if self._is_run_start(elt): - return True - return False + return bool(self._is_run_start(elt)) def _is_empty_element(self, elt: int) -> bool: """Is this an empty element?""" diff --git a/probables/utilities.py b/probables/utilities.py index 3029ae3..0553d1f 100644 --- a/probables/utilities.py +++ b/probables/utilities.py @@ -1,4 +1,4 @@ -""" Utility Functions """ +"""Utility Functions""" import math import mmap @@ -41,7 +41,7 @@ class MMap: def __init__(self, path: Union[Path, str]): self.__p = Path(path) - self.__f = self.path.open("rb") + self.__f = self.path.open("rb") # noqa: SIM115 self.__m = mmap.mmap(self.__f.fileno(), 0, access=mmap.ACCESS_READ) self._closed = False diff --git a/tests/bloom_test.py b/tests/bloom_test.py index 13e3733..21f8788 100755 --- a/tests/bloom_test.py +++ b/tests/bloom_test.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" Unittest class """ +"""Unittest class""" import hashlib import os @@ -13,11 +12,11 @@ sys.path.insert(0, str(this_dir)) sys.path.insert(0, str(this_dir.parent)) -from probables import BloomFilter, BloomFilterOnDisk -from probables.constants import UINT64_T_MAX -from probables.exceptions import InitializationError, NotSupportedError -from probables.hashes import hash_with_depth_int -from tests.utilities import calc_file_md5, different_hash +from probables import BloomFilter, BloomFilterOnDisk # noqa: E402 +from probables.constants import UINT64_T_MAX # noqa: E402 +from probables.exceptions import InitializationError, NotSupportedError # noqa: E402 +from probables.hashes import hash_with_depth_int # noqa: E402 +from tests.utilities import calc_file_md5, different_hash # noqa: E402 DELETE_TEMP_FILES = True @@ -252,7 +251,7 @@ def test_bf_stats(self): ) blm = BloomFilter(est_elements=10, false_positive_rate=0.05) for i in range(0, 10): - tmp = "this is a test {0}".format(i) + tmp = f"this is a test {i}" blm.add(tmp) stats = str(blm) self.assertEqual(stats, msg) @@ -262,7 +261,7 @@ def test_bf_export_hex(self): hex_val = "6da491461a6bba4d000000000000000a000000000000000a3d4ccccd" blm = BloomFilter(est_elements=10, false_positive_rate=0.05) for i in range(0, 10): - tmp = "this is a test {0}".format(i) + tmp = f"this is a test {i}" blm.add(tmp) hex_out = blm.export_hex() @@ -295,23 +294,23 @@ def test_bf_export_c_header(self): hex_val = "6da491461a6bba4d000000000000000a000000000000000a3d4ccccd" blm = BloomFilter(est_elements=10, false_positive_rate=0.05) for i in range(0, 10): - tmp = "this is a test {0}".format(i) + tmp = f"this is a test {i}" blm.add(tmp) with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm.export_c_header(fobj.name) # now load the file, parse it and do some tests! - with open(fobj.name, "r") as fobj: + with open(fobj.name) as fobj: data = fobj.readlines() data = [x.strip() for x in data] self.assertEqual("/* BloomFilter Export of a standard BloomFilter */", data[0]) self.assertEqual("#include ", data[1]) - self.assertEqual("const uint64_t estimated_elements = {};".format(blm.estimated_elements), data[2]) - self.assertEqual("const uint64_t elements_added = {};".format(blm.elements_added), data[3]) - self.assertEqual("const float false_positive_rate = {};".format(blm.false_positive_rate), data[4]) - self.assertEqual("const uint64_t number_bits = {};".format(blm.number_bits), data[5]) - self.assertEqual("const unsigned int number_hashes = {};".format(blm.number_hashes), data[6]) + self.assertEqual(f"const uint64_t estimated_elements = {blm.estimated_elements};", data[2]) + self.assertEqual(f"const uint64_t elements_added = {blm.elements_added};", data[3]) + self.assertEqual(f"const float false_positive_rate = {blm.false_positive_rate};", data[4]) + self.assertEqual(f"const uint64_t number_bits = {blm.number_bits};", data[5]) + self.assertEqual(f"const unsigned int number_hashes = {blm.number_hashes};", data[6]) self.assertEqual("const unsigned char bloom[] = {", data[7]) self.assertEqual("};", data[-1]) @@ -481,7 +480,7 @@ def test_bf_clear(self): blm = BloomFilter(est_elements=10, false_positive_rate=0.05) self.assertEqual(blm.elements_added, 0) for i in range(0, 10): - tmp = "this is a test {0}".format(i) + tmp = f"this is a test {i}" blm.add(tmp) self.assertEqual(blm.elements_added, 10) @@ -518,13 +517,13 @@ def my_hash(key, depth=1, encoding="utf-8"): self.assertNotEqual(md5_out, md5_val) for i in range(0, 10): - tmp = "this is a test {0}".format(i) + tmp = f"this is a test {i}" blm.add(tmp) self.assertEqual(blm.elements_added, 11) for i in range(0, 10): - tmp = "this is a test {0}".format(i) + tmp = f"this is a test {i}" self.assertTrue(blm.check(tmp)) self.assertEqual(blm.hashes("this is a test", 5), results) @@ -563,13 +562,13 @@ def my_hash(key, depth, encoding="utf-8"): self.assertNotEqual(md5_out, md5_val) for i in range(0, 10): - tmp = "this is a test {0}".format(i) + tmp = f"this is a test {i}" blm.add(tmp) self.assertEqual(blm.elements_added, 11) for i in range(0, 10): - tmp = "this is a test {0}".format(i) + tmp = f"this is a test {i}" self.assertTrue(blm.check(tmp)) self.assertEqual(blm.hashes("this is a test", 5), results) @@ -714,7 +713,7 @@ def test_bfod_close_del(self): blm.add("this is a test") del blm try: - self.assertEqual(True, blm) + self.assertEqual(True, blm) # noqa: F821 except UnboundLocalError as ex: msg1 = "local variable 'blm' referenced before assignment" msg2 = "cannot access local variable 'blm' where it is not associated with a value" @@ -731,7 +730,7 @@ def test_bfod_close_del(self): # export to new file def test_bfod_export(self): """export to on disk to new file""" - with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: + with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: # noqa: SIM117 with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj1: blm = BloomFilterOnDisk(fobj.name, 10, 0.05) blm.add("this is a test") @@ -781,7 +780,7 @@ def test_bfod_export_hex(self): with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, est_elements=10, false_positive_rate=0.05) for i in range(0, 10): - tmp = "this is a test {0}".format(i) + tmp = f"this is a test {i}" blm.add(tmp) hex_out = blm.export_hex() self.assertEqual(hex_out, hex_val) @@ -813,24 +812,24 @@ def test_bfod_export_c_header(self): with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm = BloomFilterOnDisk(fobj.name, est_elements=10, false_positive_rate=0.05) for i in range(0, 10): - tmp = "this is a test {0}".format(i) + tmp = f"this is a test {i}" blm.add(tmp) with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm.export_c_header(fobj.name) # now load the file, parse it and do some tests! - with open(fobj.name, "r") as fobj: + with open(fobj.name) as fobj: data = fobj.readlines() data = [x.strip() for x in data] self.assertEqual("/* BloomFilter Export of a standard BloomFilter */", data[0]) self.assertEqual("#include ", data[1]) - self.assertEqual("const uint64_t estimated_elements = {};".format(blm.estimated_elements), data[2]) - self.assertEqual("const uint64_t elements_added = {};".format(blm.elements_added), data[3]) - self.assertEqual("const float false_positive_rate = {};".format(blm.false_positive_rate), data[4]) - self.assertEqual("const uint64_t number_bits = {};".format(blm.number_bits), data[5]) - self.assertEqual("const unsigned int number_hashes = {};".format(blm.number_hashes), data[6]) + self.assertEqual(f"const uint64_t estimated_elements = {blm.estimated_elements};", data[2]) + self.assertEqual(f"const uint64_t elements_added = {blm.elements_added};", data[3]) + self.assertEqual(f"const float false_positive_rate = {blm.false_positive_rate};", data[4]) + self.assertEqual(f"const uint64_t number_bits = {blm.number_bits};", data[5]) + self.assertEqual(f"const unsigned int number_hashes = {blm.number_hashes};", data[6]) self.assertEqual("const unsigned char bloom[] = {", data[7]) self.assertEqual("};", data[-1]) @@ -844,7 +843,7 @@ def test_bfod_clear(self): blm = BloomFilterOnDisk(filepath=fobj.name, est_elements=10, false_positive_rate=0.05) self.assertEqual(blm.elements_added, 0) for i in range(0, 10): - tmp = "this is a test {0}".format(i) + tmp = f"this is a test {i}" blm.add(tmp) self.assertEqual(blm.elements_added, 10) diff --git a/tests/countingbloom_test.py b/tests/countingbloom_test.py index aab6c6f..6527945 100755 --- a/tests/countingbloom_test.py +++ b/tests/countingbloom_test.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" Unittest class """ +"""Unittest class""" import hashlib import os @@ -13,9 +12,9 @@ sys.path.insert(0, str(this_dir)) sys.path.insert(0, str(this_dir.parent)) -from probables import CountingBloomFilter -from probables.exceptions import InitializationError -from tests.utilities import calc_file_md5, different_hash +from probables import CountingBloomFilter # noqa: E402 +from probables.exceptions import InitializationError # noqa: E402 +from tests.utilities import calc_file_md5, different_hash # noqa: E402 DELETE_TEMP_FILES = True @@ -87,7 +86,7 @@ def test_cbf_stats(self): ) blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05) for i in range(0, 10): - blm.add("this is a test {0}".format(i)) + blm.add(f"this is a test {i}") stats = str(blm) self.assertEqual(stats, msg) @@ -96,7 +95,7 @@ def test_cbf_clear(self): blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05) self.assertEqual(blm.elements_added, 0) for i in range(0, 10): - blm.add("this is a test {0}".format(i)) + blm.add(f"this is a test {i}") self.assertEqual(blm.elements_added, 10) blm.clear() @@ -215,7 +214,7 @@ def test_cbf_export_hex(self): blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05) for i in range(0, 10): - tmp = "this is a test {0}".format(i) + tmp = f"this is a test {i}" blm.add(tmp) hex_out = blm.export_hex() @@ -277,25 +276,25 @@ def test_cbf_export_c_header(self): ) blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05) for i in range(0, 10): - tmp = "this is a test {0}".format(i) + tmp = f"this is a test {i}" blm.add(tmp) with NamedTemporaryFile(dir=os.getcwd(), suffix=".blm", delete=DELETE_TEMP_FILES) as fobj: blm.export_c_header(fobj.name) # now load the file, parse it and do some tests! - with open(fobj.name, "r") as fobj: + with open(fobj.name) as fobj: data = fobj.readlines() data = [x.strip() for x in data] self.assertEqual("/* BloomFilter Export of a CountingBloomFilter */", data[0]) self.assertEqual("#include ", data[1]) - self.assertEqual("const uint64_t estimated_elements = {};".format(blm.estimated_elements), data[2]) - self.assertEqual("const uint64_t elements_added = {};".format(blm.elements_added), data[3]) - self.assertEqual("const float false_positive_rate = {};".format(blm.false_positive_rate), data[4]) - self.assertEqual("const uint64_t number_bits = {};".format(blm.number_bits), data[5]) - self.assertEqual("const unsigned int number_hashes = {};".format(blm.number_hashes), data[6]) + self.assertEqual(f"const uint64_t estimated_elements = {blm.estimated_elements};", data[2]) + self.assertEqual(f"const uint64_t elements_added = {blm.elements_added};", data[3]) + self.assertEqual(f"const float false_positive_rate = {blm.false_positive_rate};", data[4]) + self.assertEqual(f"const uint64_t number_bits = {blm.number_bits};", data[5]) + self.assertEqual(f"const unsigned int number_hashes = {blm.number_hashes};", data[6]) self.assertEqual("const unsigned char bloom[] = {", data[7]) self.assertEqual("};", data[-1]) @@ -410,7 +409,7 @@ def test_cbf_remove(self): blm = CountingBloomFilter(est_elements=10, false_positive_rate=0.05) self.assertEqual(blm.elements_added, 0) for i in range(0, 5): - tmp = "this is a test {0}".format(i) + tmp = f"this is a test {i}" blm.add(tmp) self.assertEqual(blm.elements_added, 5) res = blm.remove("this is a test 0") diff --git a/tests/countingcuckoo_test.py b/tests/countingcuckoo_test.py index bb7fcf6..da4e4ae 100755 --- a/tests/countingcuckoo_test.py +++ b/tests/countingcuckoo_test.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" Unittest class """ +"""Unittest class""" import hashlib import os @@ -13,8 +12,8 @@ sys.path.insert(0, str(this_dir)) sys.path.insert(0, str(this_dir.parent)) -from probables import CountingCuckooFilter, CuckooFilterFullError -from tests.utilities import calc_file_md5 +from probables import CountingCuckooFilter, CuckooFilterFullError # noqa: E402 +from tests.utilities import calc_file_md5 # noqa: E402 DELETE_TEMP_FILES = True diff --git a/tests/countminsketch_test.py b/tests/countminsketch_test.py index 085484d..910e14d 100755 --- a/tests/countminsketch_test.py +++ b/tests/countminsketch_test.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" Unittest class """ +"""Unittest class""" import hashlib import os @@ -13,10 +12,10 @@ sys.path.insert(0, str(this_dir)) sys.path.insert(0, str(this_dir.parent)) -from probables import CountMeanMinSketch, CountMeanSketch, CountMinSketch, HeavyHitters, StreamThreshold -from probables.constants import INT32_T_MAX, INT32_T_MIN, INT64_T_MAX, INT64_T_MIN -from probables.exceptions import CountMinSketchError, InitializationError, NotSupportedError -from tests.utilities import calc_file_md5, different_hash +from probables import CountMeanMinSketch, CountMeanSketch, CountMinSketch, HeavyHitters, StreamThreshold # noqa: E402 +from probables.constants import INT32_T_MAX, INT32_T_MIN, INT64_T_MAX, INT64_T_MIN # noqa: E402 +from probables.exceptions import CountMinSketchError, InitializationError, NotSupportedError # noqa: E402 +from tests.utilities import calc_file_md5, different_hash # noqa: E402 DELETE_TEMP_FILES = True diff --git a/tests/cuckoo_test.py b/tests/cuckoo_test.py index edfffa5..c49e23b 100755 --- a/tests/cuckoo_test.py +++ b/tests/cuckoo_test.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" Unittest class """ +"""Unittest class""" import hashlib import os @@ -13,8 +12,8 @@ sys.path.insert(0, str(this_dir)) sys.path.insert(0, str(this_dir.parent)) -from probables import CuckooFilter, CuckooFilterFullError, InitializationError -from tests.utilities import calc_file_md5 +from probables import CuckooFilter, CuckooFilterFullError, InitializationError # noqa: E402 +from tests.utilities import calc_file_md5 # noqa: E402 DELETE_TEMP_FILES = True @@ -77,10 +76,10 @@ def my_hash(key): hash_function=my_hash, ) for i in range(50): - cko.add("this is a test - {}".format(i)) + cko.add(f"this is a test - {i}") for i in range(50): - self.assertTrue("this is a test - {}".format(i) in cko) + self.assertTrue(f"this is a test - {i}" in cko) def test_cuckoo_filter_remove(self): """test removing from the cuckoo filter""" diff --git a/tests/expandingbloom_test.py b/tests/expandingbloom_test.py index e0ce310..468aefa 100755 --- a/tests/expandingbloom_test.py +++ b/tests/expandingbloom_test.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" Unittest class """ +"""Unittest class""" import hashlib import os @@ -13,9 +12,9 @@ sys.path.insert(0, str(this_dir)) sys.path.insert(0, str(this_dir.parent)) -from probables import ExpandingBloomFilter, RotatingBloomFilter -from probables.exceptions import RotatingBloomFilterError -from tests.utilities import calc_file_md5, different_hash +from probables import ExpandingBloomFilter, RotatingBloomFilter # noqa: E402 +from probables.exceptions import RotatingBloomFilterError # noqa: E402 +from tests.utilities import calc_file_md5, different_hash # noqa: E402 DELETE_TEMP_FILES = True @@ -35,14 +34,14 @@ def test_ebf_add_lots(self): """test adding "lots" of elements to force the expansion""" blm = ExpandingBloomFilter(est_elements=10, false_positive_rate=0.05) for i in range(100): - blm.add("{}".format(i), True) + blm.add(f"{i}", True) self.assertEqual(blm.expansions, 9) def test_ebf_add_lots_diff_hash(self): """test adding "lots" of elements to force the expansion using a different hash""" blm = ExpandingBloomFilter(est_elements=10, false_positive_rate=0.05, hash_function=different_hash) for i in range(100): - blm.add("{}".format(i), True) + blm.add(f"{i}", True) self.assertEqual(blm.expansions, 9) def test_ebf_add_lots_without_force(self): @@ -50,7 +49,7 @@ def test_ebf_add_lots_without_force(self): blm = ExpandingBloomFilter(est_elements=10, false_positive_rate=0.05) # simulate false positives... notice it didn't grow a few... for i in range(120): - blm.add("{}".format(i)) + blm.add(f"{i}") self.assertEqual(blm.expansions, 8) self.assertEqual(blm.elements_added, 120) @@ -59,7 +58,7 @@ def test_ebf_check(self): blm = ExpandingBloomFilter(est_elements=30, false_positive_rate=0.05) # expand it out some first! for i in range(100): - blm.add("{}".format(i)) + blm.add(f"{i}") blm.add("this is a test") blm.add("this is another test") self.assertGreater(blm.expansions, 1) @@ -74,7 +73,7 @@ def test_ebf_contains(self): blm = ExpandingBloomFilter(est_elements=30, false_positive_rate=0.05) # expand it out some first! for i in range(100): - blm.add("{}".format(i)) + blm.add(f"{i}") blm.add("this is a test") blm.add("this is another test") self.assertGreater(blm.expansions, 1) @@ -142,7 +141,7 @@ def test_ebf_import_non_empty(self): with NamedTemporaryFile(dir=os.getcwd(), suffix=".ebf", delete=DELETE_TEMP_FILES) as fobj: blm = ExpandingBloomFilter(est_elements=25, false_positive_rate=0.05) for i in range(15): - blm.add("{}".format(i)) + blm.add(f"{i}") blm.push() blm.export(fobj.name) @@ -150,11 +149,11 @@ def test_ebf_import_non_empty(self): blm2 = ExpandingBloomFilter(filepath=fobj.name) self.assertEqual(blm2.expansions, 15) for i in range(15): - self.assertEqual("{}".format(i) in blm2, True) + self.assertEqual(f"{i}" in blm2, True) # check for things that are not there! for i in range(99, 125): - self.assertEqual("{}".format(i) in blm2, False) + self.assertEqual(f"{i}" in blm2, False) class TestRotatingBloomFilter(unittest.TestCase): @@ -173,28 +172,28 @@ def test_rbf_rotate(self): blm.add("test") self.assertEqual(blm.expansions, 0) for i in range(10): - blm.add("{}".format(i), force=True) + blm.add(f"{i}", force=True) self.assertEqual(blm.expansions, 1) self.assertEqual(blm.current_queue_size, 2) self.assertEqual(blm.check("test"), True) for i in range(10, 20): - blm.add("{}".format(i), force=True) + blm.add(f"{i}", force=True) self.assertEqual(blm.check("test"), True) self.assertEqual(blm.current_queue_size, 3) for i in range(20, 30): - blm.add("{}".format(i), force=True) + blm.add(f"{i}", force=True) self.assertEqual(blm.check("test"), True) self.assertEqual(blm.current_queue_size, 4) for i in range(30, 40): - blm.add("{}".format(i), force=True) + blm.add(f"{i}", force=True) self.assertEqual(blm.check("test"), True) self.assertEqual(blm.current_queue_size, 5) for i in range(40, 50): - blm.add("{}".format(i), force=True) + blm.add(f"{i}", force=True) self.assertEqual(blm.check("test"), False) # it should roll off self.assertEqual(blm.current_queue_size, 5) @@ -249,7 +248,7 @@ def test_rbf_pop_exception_msg(self): except RotatingBloomFilterError as ex: msg = "Popping a Bloom Filter will result in an unusable system!" self.assertEqual(str(ex), msg) - except: + except: # noqa: E722 self.assertEqual(True, False) def test_rfb_basic_export(self): @@ -297,17 +296,17 @@ def test_rbf_non_basic_import(self): with NamedTemporaryFile(dir=os.getcwd(), suffix=".rbf", delete=DELETE_TEMP_FILES) as fobj: blm = RotatingBloomFilter(est_elements=25, false_positive_rate=0.05) for i in range(15): - blm.add("{}".format(i)) + blm.add(f"{i}") blm.push() blm.export(fobj.name) blm2 = RotatingBloomFilter(filepath=fobj.name) # test those that should be popped off... for i in range(5): - self.assertEqual("{}".format(i) in blm2, False) + self.assertEqual(f"{i}" in blm2, False) # test things that would not be popped for i in range(6, 15): - self.assertEqual("{}".format(i) in blm2, True) + self.assertEqual(f"{i}" in blm2, True) self.assertEqual(blm2.current_queue_size, 10) self.assertEqual(blm2.expansions, 9) self.assertEqual(blm2.elements_added, 15) diff --git a/tests/hashes_test.py b/tests/hashes_test.py index 0fa7cb7..2322325 100755 --- a/tests/hashes_test.py +++ b/tests/hashes_test.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" Unittest class """ +"""Unittest class""" import hashlib import sys @@ -11,8 +10,8 @@ sys.path.insert(0, str(this_dir)) sys.path.insert(0, str(this_dir.parent)) -from probables.constants import UINT64_T_MAX -from probables.hashes import ( +from probables.constants import UINT64_T_MAX # noqa: E402 +from probables.hashes import ( # noqa: E402 default_fnv_1a, default_md5, default_sha256, diff --git a/tests/quotientfilter_test.py b/tests/quotientfilter_test.py index 0171a54..773c998 100644 --- a/tests/quotientfilter_test.py +++ b/tests/quotientfilter_test.py @@ -1,8 +1,6 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" Unittest class """ +"""Unittest class""" -import hashlib import os import random import sys @@ -15,8 +13,7 @@ this_dir = Path(__file__).parent sys.path.insert(0, str(this_dir)) sys.path.insert(0, str(this_dir.parent)) -from probables import QuotientFilter -from tests.utilities import calc_file_md5, different_hash +from probables import QuotientFilter # noqa: E402 DELETE_TEMP_FILES = True @@ -242,8 +239,8 @@ def test_qf_remove_missing_elm(self): """test removing a missing element""" alpha = [a for a in "abcd.efghij;klm-nopqrs=tuvwxyz"] qf = QuotientFilter(quotient=7) - for l in alpha: - qf.add(l) + for a in alpha: + qf.add(a) qf.remove("~") @@ -258,8 +255,8 @@ def test_qf_remove_cluster_start(self): """test removing a cluster start followed by empty""" alpha = [a for a in "abcd.efghij;klm-nopqrs=tuvwxyz"] qf = QuotientFilter(quotient=7) - for l in alpha: - qf.add(l) + for a in alpha: + qf.add(a) qf.remove(".") @@ -274,8 +271,8 @@ def test_qf_remove_cluster_start_cluster(self): """test removing a cluster start followed by cluster start""" alpha = [a for a in "abcd.efghij;klm-nopqrs=tuvwxyz"] qf = QuotientFilter(quotient=7) - for l in alpha: - qf.add(l) + for a in alpha: + qf.add(a) qf.remove("-") @@ -290,8 +287,8 @@ def test_qf_remove_shifted_run_start_followed_by_empty(self): """test removing a shifted run start followed by empty""" alpha = [a for a in "abcd.efghij;klm-nopqrs=tuvwxyz"] qf = QuotientFilter(quotient=7) - for l in alpha: - qf.add(l) + for a in alpha: + qf.add(a) qf.remove("z") @@ -306,8 +303,8 @@ def test_qf_remove_shifted_run_start_followed_continuation(self): """test removing a shifted run start followed by continuation""" alpha = [a for a in "abcd.efghij;klm-nopqrs=tuvwxyz"] qf = QuotientFilter(quotient=7) - for l in alpha: - qf.add(l) + for a in alpha: + qf.add(a) qf.remove("y") @@ -322,8 +319,8 @@ def test_qf_remove_shifted_continuation_followed_run_start(self): """test removing a shifted continuation followed by run start""" alpha = [a for a in "abcd.efghij;klm-nopqrs=tuvwxyz"] qf = QuotientFilter(quotient=7) - for l in alpha: - qf.add(l) + for a in alpha: + qf.add(a) qf.remove("x") @@ -338,8 +335,8 @@ def test_qf_remove_shifted_run_start_followed_run_start(self): """test removing a shifted run start followed by run start""" alpha = [a for a in "abcd.efghij;klm-nopqrs=tuvwxyz"] qf = QuotientFilter(quotient=7) - for l in alpha: - qf.add(l) + for a in alpha: + qf.add(a) qf.remove("a") @@ -354,8 +351,8 @@ def test_qf_remove_cluster_start_followed_continuation_follow_run_start(self): """test removing a cluster start followed by continuation putting a run start into a cluster start position""" alpha = [a for a in "abcd.efghij;klm-nopqrs=tuvwxyz"] qf = QuotientFilter(quotient=7) - for l in alpha: - qf.add(l) + for a in alpha: + qf.add(a) qf.remove("d") @@ -370,13 +367,13 @@ def test_qf_remove_full(self): """Test removing all elements, but find each one after each removal""" alpha = [a for a in "abcd.efghij;klm-nopqrs=tuvwxyz"] qf = QuotientFilter(quotient=7) - for l in alpha: - _hash = qf._hash_func(l, 0) - print(l, _hash >> qf._r, _hash & ((1 << qf._r) - 1)) - qf.add(l) + for a in alpha: + _hash = qf._hash_func(a, 0) + print(a, _hash >> qf._r, _hash & ((1 << qf._r) - 1)) + qf.add(a) - for l in alpha: - self.assertTrue(qf.check(l), "failed to insert") + for a in alpha: + self.assertTrue(qf.check(a), "failed to insert") while alpha: missing_vals = [] @@ -393,11 +390,11 @@ def test_qf_remove_full_random(self): """Test removing all elements, but in a random order""" alpha = [a for a in "abcd.efghij;klm-nopqrs=tuvwxyz"] qf = QuotientFilter(quotient=7) - for l in alpha: - qf.add(l) + for a in alpha: + qf.add(a) - for l in alpha: - self.assertTrue(qf.check(l), "failed to insert") + for a in alpha: + self.assertTrue(qf.check(a), "failed to insert") self.assertTrue(qf.validate_metadata()) while alpha: @@ -416,11 +413,11 @@ def test_qf_remove_full_random_take_2(self): """Test removing all elements, but in a random order - take 2""" alpha = [a for a in "abcd.efghij;klm-nopqrs=tuvwxyz"] qf = QuotientFilter(quotient=7) - for l in alpha: - qf.add(l) + for a in alpha: + qf.add(a) - for l in alpha: - self.assertTrue(qf.check(l), "failed to insert") + for a in alpha: + self.assertTrue(qf.check(a), "failed to insert") while alpha: missing_vals = [] @@ -441,7 +438,7 @@ def test_quotient_filter_print_empty(self): qf.print(file=fobj.file) fobj.flush() - with open(fobj.name, "r") as fobj: + with open(fobj.name) as fobj: data = fobj.readlines() data = [x.strip() for x in data] self.assertEqual(data[0], "idx\t--\tO-C-S\tStatus") @@ -452,14 +449,14 @@ def test_quotient_filter_print(self): """Test printing the data of a quotient filter in a manner to be read through not empty""" alpha = [a for a in "abcd.efghij;klm-nopqrs=tuvwxyz"] qf = QuotientFilter(quotient=7) - for l in alpha: - qf.add(l) + for a in alpha: + qf.add(a) with NamedTemporaryFile(dir=os.getcwd(), suffix=".txt", delete=DELETE_TEMP_FILES, mode="wt") as fobj: qf.print(file=fobj.file) fobj.flush() - with open(fobj.name, "r") as fobj: + with open(fobj.name) as fobj: data = fobj.readlines() data = [x.strip() for x in data] self.assertEqual(data[0], "idx\t--\tO-C-S\tStatus") diff --git a/tests/test_utilities.py b/tests/test_utilities.py index 11bcc3d..10fc721 100755 --- a/tests/test_utilities.py +++ b/tests/test_utilities.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -""" probables utilitites tests """ +"""probables utilitites tests""" import os import sys @@ -11,8 +11,8 @@ sys.path.insert(0, str(this_dir)) sys.path.insert(0, str(this_dir.parent)) -from probables.utilities import Bitarray, MMap, get_x_bits, is_hex_string, is_valid_file, resolve_path -from tests.utilities import different_hash +from probables.utilities import Bitarray, MMap, get_x_bits, is_hex_string, is_valid_file, resolve_path # noqa: E402 +from tests.utilities import different_hash # noqa: E402 DELETE_TEMP_FILES = True @@ -105,7 +105,7 @@ def test_resolve_path(self): with NamedTemporaryFile(dir=os.getcwd(), suffix=".rbf", delete=DELETE_TEMP_FILES) as fobj: with open(fobj.name, "w"): pass - p2 = resolve_path("./{}".format(fobj.name)) + p2 = resolve_path(f"./{fobj.name}") self.assertTrue(p2.is_absolute()) def test_bitarray(self): diff --git a/tests/utilities.py b/tests/utilities.py index 39e6fea..9d0e875 100644 --- a/tests/utilities.py +++ b/tests/utilities.py @@ -1,7 +1,8 @@ -""" utility functions """ +"""utility functions""" + from hashlib import md5 from pathlib import Path -from typing import List, Union +from typing import Union from probables.constants import UINT64_T_MAX from probables.hashes import KeyT @@ -14,7 +15,7 @@ def calc_file_md5(filename: Union[str, Path]) -> str: return md5(res).hexdigest() -def different_hash(key: KeyT, depth: int) -> List[int]: +def different_hash(key: KeyT, depth: int) -> list[int]: """the default fnv-1a hashing routine, but different""" def __fnv_1a(key: KeyT) -> int: