Skip to content

Commit 75f0ef8

Browse files
authored
Update keras, tf and new model usage, numpy 2.0 updates (#1206)
* refactor: move from deprecated pkg_resources * fix: to use func * fix: add missing change * refactor: resources to be in package * fix: tests bc of almost * feat: refactor to pass in a path or string or None * fix: import for older versions * fix: Tranversable must be done at runtime * refactor: keras reqs and others * refactor: losses for keras and tests * fix: remove unneeded global * fix: accidentally duplicated test on rebase * fix: rebase duplicates * fix: keras reqs * refactor: update to be more than 3.4.0 for keras * refactor: numpy2 and mypy * fix: mypy 3.10 * fix: bugs * fix: float * refactor: for hist fix too * fix: issue with none in hist * fix: remove comment * refactor: to still utilize dict mapping for losses * fix: int pre-commit * fix: train labeling * refactor notes, reqs, and change log * fix: pre-commit * refactor: add unit tests validating usage of the old load format
1 parent 52b5275 commit 75f0ef8

59 files changed

Lines changed: 641 additions & 307 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.pre-commit-config.yaml

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ repos:
22
# Black: format Python code
33
# https://github.com/psf/black/blob/master/.pre-commit-hooks.yaml
44
- repo: https://github.com/psf/black
5-
rev: 22.3.0
5+
rev: 24.3.0
66
hooks:
77
- id: black
88
exclude: (versioneer.py|dataprofiler/_version.py|_docs/)
@@ -21,7 +21,7 @@ repos:
2121
# Flake8: complexity and style checking
2222
# https://flake8.pycqa.org/en/latest/user/using-hooks.html
2323
- repo: https://github.com/pycqa/flake8
24-
rev: 4.0.1
24+
rev: 7.3.0
2525
hooks:
2626
- id: flake8
2727
additional_dependencies: [flake8-docstrings]
@@ -50,29 +50,31 @@ repos:
5050
# requirements.txt
5151
h5py>=2.10.0,
5252
wheel>=0.33.1,
53-
numpy<2.0.0,
53+
'numpy>=1.22.0,<3.0.0',
5454
'pandas>=1.1.2,<3.0.0',
5555
python-dateutil>=2.7.5,
5656
pytz>=2020.1,
5757
pyarrow>=1.0.1,
5858
'chardet>=3.0.4,<7.0.0',
59-
fastavro>=1.0.0.post1,
59+
fastavro>=1.1.0,
6060
python-snappy>=0.7.1,
6161
charset-normalizer>=1.3.6,
6262
psutil>=4.0.0,
63-
scipy>=1.4.1,
64-
requests>=2.28.1,
63+
scipy>=1.10.0,
64+
requests>=2.32.4,
6565
networkx>=2.5.1,
6666
typing-extensions>=3.10.0.2,
6767
HLL>=2.0.3,
6868
datasketches>=4.1.0,
69-
boto3>=1.28.61,
69+
packaging>=23.0,
70+
boto3>=1.37.15,
71+
urllib3>=2.5.0,
7072

7173
# requirements-dev.txt
72-
check-manifest>=0.48,
73-
black==22.3.0,
74+
check-manifest>=0.50,
75+
black>=24.3.0,
7476
isort==5.12.0,
75-
pre-commit==2.19.0,
77+
pre-commit==4.3.0,
7678
tox==3.25.1,
7779
types-setuptools==67.7.0.1,
7880
types-python-dateutil==2.8.19.12,
@@ -82,11 +84,9 @@ repos:
8284

8385
# requirements-ml.txt
8486
scikit-learn>=0.23.2,
85-
'keras>=2.4.3,<=3.4.0',
87+
'keras>3.4.0,<4.0.0',
8688
rapidfuzz>=2.6.1,
87-
"tensorflow>=2.6.4,<2.15.0; sys.platform != 'darwin'",
88-
"tensorflow>=2.6.4,<2.15.0; sys_platform == 'darwin' and platform_machine != 'arm64'",
89-
"tensorflow-macos>=2.6.4,<2.15.0; sys_platform == 'darwin' and platform_machine == 'arm64'",
89+
"tensorflow>=2.16.0",
9090
tqdm>=4.0.0,
9191

9292
# requirements-reports.txt
@@ -101,18 +101,20 @@ repos:
101101
pytest-xdist>=2.1.0,
102102
pytest-forked>=1.3.0,
103103
toolz>=0.10.0,
104-
'memray>=1.7.0,<1.12.0',
104+
'memray>=1.18.0',
105105
]
106106
# Check-manifest: ensures required non-Python files are included in MANIFEST.in
107107
# https://github.com/mgedmin/check-manifest/blob/master/.pre-commit-hooks.yaml
108108
- repo: https://github.com/mgedmin/check-manifest
109-
rev: "0.48"
109+
rev: "0.50"
110110
hooks:
111111
- id: check-manifest
112-
additional_dependencies: ['h5py', 'wheel', 'future', 'numpy<2.0.0', 'pandas',
113-
'python-dateutil', 'pytz', 'pyarrow', 'chardet', 'fastavro',
114-
'python-snappy', 'charset-normalizer', 'psutil', 'scipy', 'requests',
115-
'networkx','typing-extensions', 'HLL', 'datasketches', 'boto3']
112+
additional_dependencies: ['h5py', 'wheel', 'future', 'numpy>=1.22.0,<3.0.0',
113+
'pandas', 'python-dateutil', 'pytz', 'pyarrow', 'chardet',
114+
'fastavro>=1.1.0', 'python-snappy', 'charset-normalizer', 'psutil',
115+
'scipy>=1.10.0', 'requests>=2.32.4', 'networkx', 'typing-extensions',
116+
'HLL', 'datasketches', 'packaging>=23.0', 'boto3>=1.37.15',
117+
'urllib3>=2.5.0']
116118
# Pyupgrade - standardize and modernize Python syntax for newer versions of the language
117119
- repo: https://github.com/asottile/pyupgrade
118120
rev: v3.3.0

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Changelog
2+
3+
## Unreleased
4+
5+
- Added compatibility support for NumPy 2.0 while constraining `numpy` to
6+
`>=1.22.0,<3.0.0` to avoid future breakage from NumPy 3.
7+
- Added compatibility support for Keras versions newer than 3.4.0 while
8+
constraining `keras` to `>3.4.0,<4.0.0` to avoid future breakage from Keras 4.
9+
- Updated the pre-commit configuration to align hook versions and hook
10+
dependencies with the current project requirements.

MANIFEST.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ global-exclude .DS_Store
22
global-exclude */__pycache__/*
33

44
include *.txt
5+
include CHANGELOG.md
56
include CODEOWNERS
67
recursive-include dataprofiler *.avro
78
recursive-include dataprofiler *.csv

dataprofiler/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Package for dataprofiler."""
2+
23
from . import settings
34
from ._version import get_versions
45
from .data_readers.data import Data

dataprofiler/_typing.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Contains typing aliases."""
2+
23
from typing import Dict, List, NewType, Union
34

45
import numpy as np

dataprofiler/data_readers/avro_data.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Contains class for saving and loading spreadsheet data."""
2+
23
from io import BytesIO, StringIO
34
from typing import Any, Dict, List, Optional, Union
45

dataprofiler/data_readers/base_data.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Contains abstract class for data loading and saving."""
2+
23
import locale
34
import sys
45
from collections import OrderedDict

dataprofiler/data_readers/csv_data.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Contains class that saves and loads spreadsheet data."""
2+
23
import csv
34
import random
45
import re

dataprofiler/data_readers/data_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Contains functions for data readers."""
2+
23
import json
34
import logging
45
import os
@@ -334,7 +335,7 @@ def reservoir(file: TextIOWrapper, sample_nrows: int) -> list:
334335
except StopIteration:
335336
break
336337
# Append new, replace old with dummy, and keep track of order
337-
remove_index = rng.integers(0, sample_nrows)
338+
remove_index = int(rng.integers(0, sample_nrows))
338339
values[indices[remove_index]] = str(None)
339340
indices[remove_index] = len(values)
340341
values.append(newval)

dataprofiler/data_readers/filepath_or_buffer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Contains functions and classes for handling filepaths and buffers."""
2+
23
from io import BytesIO, StringIO, TextIOWrapper
34
from typing import IO, Any, Optional, Type, Union, cast
45

0 commit comments

Comments
 (0)