capitalone · shania-m · Jun 30, 2026 · May 6, 2026 · May 6, 2026 · May 6, 2026
@@ -2,7 +2,7 @@ repos:
   # Black: format Python code
   # https://github.com/psf/black/blob/master/.pre-commit-hooks.yaml
   - repo: https://github.com/psf/black
-    rev: 22.3.0
+    rev: 24.3.0
     hooks:
       - id: black
         exclude: (versioneer.py|dataprofiler/_version.py|_docs/)
@@ -21,7 +21,7 @@ repos:
   # Flake8: complexity and style checking
   # https://flake8.pycqa.org/en/latest/user/using-hooks.html
   - repo: https://github.com/pycqa/flake8
-    rev: 4.0.1
+    rev: 7.3.0
     hooks:
       - id: flake8
         additional_dependencies: [flake8-docstrings]
@@ -50,29 +50,31 @@ repos:
             # requirements.txt
             h5py>=2.10.0,
             wheel>=0.33.1,
-            numpy<2.0.0,
+            'numpy>=1.22.0,<3.0.0',
             'pandas>=1.1.2,<3.0.0',
             python-dateutil>=2.7.5,
             pytz>=2020.1,
             pyarrow>=1.0.1,
             'chardet>=3.0.4,<7.0.0',
-            fastavro>=1.0.0.post1,
+            fastavro>=1.1.0,
             python-snappy>=0.7.1,
             charset-normalizer>=1.3.6,
             psutil>=4.0.0,
-            scipy>=1.4.1,
-            requests>=2.28.1,
+            scipy>=1.10.0,
+            requests>=2.32.4,
             networkx>=2.5.1,
             typing-extensions>=3.10.0.2,
             HLL>=2.0.3,
             datasketches>=4.1.0,
-            boto3>=1.28.61,
+            packaging>=23.0,
+            boto3>=1.37.15,
+            urllib3>=2.5.0,
 
             # requirements-dev.txt
-            check-manifest>=0.48,
-            black==22.3.0,
+            check-manifest>=0.50,
+            black>=24.3.0,
             isort==5.12.0,
-            pre-commit==2.19.0,
+            pre-commit==4.3.0,
             tox==3.25.1,
             types-setuptools==67.7.0.1,
             types-python-dateutil==2.8.19.12,
@@ -82,11 +84,9 @@ repos:
 
             # requirements-ml.txt
             scikit-learn>=0.23.2,
-            'keras>=2.4.3,<=3.4.0',
+            'keras>3.4.0,<4.0.0',
             rapidfuzz>=2.6.1,
-            "tensorflow>=2.6.4,<2.15.0; sys.platform != 'darwin'",
-            "tensorflow>=2.6.4,<2.15.0; sys_platform == 'darwin' and platform_machine != 'arm64'",
-            "tensorflow-macos>=2.6.4,<2.15.0; sys_platform == 'darwin' and platform_machine == 'arm64'",
+            "tensorflow>=2.16.0",
             tqdm>=4.0.0,
 
             # requirements-reports.txt
@@ -101,18 +101,20 @@ repos:
             pytest-xdist>=2.1.0,
             pytest-forked>=1.3.0,
             toolz>=0.10.0,
-            'memray>=1.7.0,<1.12.0',
+            'memray>=1.18.0',
           ]
   # Check-manifest: ensures required non-Python files are included in MANIFEST.in
   # https://github.com/mgedmin/check-manifest/blob/master/.pre-commit-hooks.yaml
   - repo: https://github.com/mgedmin/check-manifest
-    rev: "0.48"
+    rev: "0.50"
     hooks:
       - id: check-manifest
-        additional_dependencies: ['h5py', 'wheel', 'future', 'numpy<2.0.0', 'pandas',
-        'python-dateutil', 'pytz', 'pyarrow', 'chardet', 'fastavro',
-        'python-snappy', 'charset-normalizer', 'psutil', 'scipy', 'requests',
-        'networkx','typing-extensions', 'HLL', 'datasketches', 'boto3']
+        additional_dependencies: ['h5py', 'wheel', 'future', 'numpy>=1.22.0,<3.0.0',
+        'pandas', 'python-dateutil', 'pytz', 'pyarrow', 'chardet',
+        'fastavro>=1.1.0', 'python-snappy', 'charset-normalizer', 'psutil',
+        'scipy>=1.10.0', 'requests>=2.32.4', 'networkx', 'typing-extensions',
+        'HLL', 'datasketches', 'packaging>=23.0', 'boto3>=1.37.15',
+        'urllib3>=2.5.0']
   # Pyupgrade - standardize and modernize Python syntax for newer versions of the language
   - repo: https://github.com/asottile/pyupgrade
     rev: v3.3.0

@@ -0,0 +1,10 @@
+# Changelog
+
+## Unreleased
+
+- Added compatibility support for NumPy 2.0 while constraining `numpy` to
+  `>=1.22.0,<3.0.0` to avoid future breakage from NumPy 3.
+- Added compatibility support for Keras versions newer than 3.4.0 while
+  constraining `keras` to `>3.4.0,<4.0.0` to avoid future breakage from Keras 4.
+- Updated the pre-commit configuration to align hook versions and hook
+  dependencies with the current project requirements.
@@ -2,6 +2,7 @@ global-exclude .DS_Store
 global-exclude */__pycache__/*
 
 include *.txt
+include CHANGELOG.md
 include CODEOWNERS
 recursive-include dataprofiler *.avro
 recursive-include dataprofiler *.csv

@@ -1,4 +1,5 @@
 """Package for dataprofiler."""
+
 from . import settings
 from ._version import get_versions
 from .data_readers.data import Data

@@ -1,4 +1,5 @@
 """Contains typing aliases."""
+
 from typing import Dict, List, NewType, Union
 
 import numpy as np

@@ -1,4 +1,5 @@
 """Contains class for saving and loading spreadsheet data."""
+
 from io import BytesIO, StringIO
 from typing import Any, Dict, List, Optional, Union
 

@@ -1,4 +1,5 @@
 """Contains abstract class for data loading and saving."""
+
 import locale
 import sys
 from collections import OrderedDict

@@ -1,4 +1,5 @@
 """Contains class that saves and loads spreadsheet data."""
+
 import csv
 import random
 import re

@@ -1,4 +1,5 @@
 """Contains functions for data readers."""
+
 import json
 import logging
 import os
@@ -334,7 +335,7 @@ def reservoir(file: TextIOWrapper, sample_nrows: int) -> list:
         except StopIteration:
             break
         # Append new, replace old with dummy, and keep track of order
-        remove_index = rng.integers(0, sample_nrows)
+        remove_index = int(rng.integers(0, sample_nrows))
         values[indices[remove_index]] = str(None)
         indices[remove_index] = len(values)
         values.append(newval)

@@ -1,4 +1,5 @@
 """Contains functions and classes for handling filepaths and buffers."""
+
 from io import BytesIO, StringIO, TextIOWrapper
 from typing import IO, Any, Optional, Type, Union, cast
 

@@ -1,4 +1,5 @@
 """Contains class for identifying, reading, and loading graph data."""
+
 import csv
 from typing import Dict, List, Optional, Union, cast
 

@@ -1,4 +1,5 @@
 """Contains class to save and load json data."""
+
 import json
 import re
 import warnings
@@ -71,9 +72,9 @@ def __init__(
 
         self._data_formats["records"] = self._get_data_as_records
         self._data_formats["json"] = self._get_data_as_json
-        self._data_formats[
-            "flattened_dataframe"
-        ] = self._get_data_as_flattened_dataframe
+        self._data_formats["flattened_dataframe"] = (
+            self._get_data_as_flattened_dataframe
+        )
         self._selected_data_format: str = options.get(
             "data_format", "flattened_dataframe"
         )

@@ -1,4 +1,5 @@
 """Contains class to save and load parquet data."""
+
 from io import BytesIO, StringIO
 from typing import Any, Dict, List, Optional, Union
 

@@ -1,4 +1,5 @@
 """Contains mixin data class for loading datasets of tye SpreadSheet."""
+
 from logging import Logger
 from typing import Any, Dict, List, Optional, Union, cast
 

@@ -26,6 +26,7 @@
         2. structured_model
         3. regex_model
 """
+
 # import data labelers
 # import models
 from .base_data_labeler import BaseDataLabeler, TrainableDataLabeler

@@ -1,4 +1,5 @@
 """Contains abstract classes for labeling data."""
+
 from __future__ import annotations
 
 import abc
@@ -78,7 +79,7 @@ def __eq__(self, other: object) -> bool:
         :rtype: bool
         """
         if (
-            type(self) != type(other)
+            type(self) is not type(other)
             or not isinstance(other, BaseModel)
             or self._parameters != other._parameters
             or self._label_mapping != other._label_mapping