Skip to content

Commit 4e4450a

Browse files
Add Python 3.11 to GHA (#1090)
* add downloads tile (#1085) * Add Python 3.11 to GHA * Replace snappy with cramjam (#1091) * add downloads tile (#1085) * Replace snappy with cramjam * Delete test_no_snappy --------- Co-authored-by: Taylor Turner <taylorfturner@gmail.com> * Update dask modules * Install dask dataframe * Update dask modules in precommit * Correct copy/paste error * Try again to clear Unicode * Rolled back pre-commit dask version * Add py311 to tox * Bump dask to 2024.4.1 * Bump python-snappy 0.7.1 * Rewrite labeler test * Correct isort * Satisfy black * And flake8 * Synced with requirements --------- Co-authored-by: Taylor Turner <taylorfturner@gmail.com>
1 parent 1af22bb commit 4e4450a

9 files changed

Lines changed: 25 additions & 86 deletions

File tree

.github/workflows/publish-python-package.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020
- name: Set up Python
2121
uses: actions/setup-python@v5
2222
with:
23-
python-version: '3.10'
23+
python-version: '3.11'
2424
- name: Install dependencies
2525
run: |
2626
python -m pip install --upgrade pip

.github/workflows/test-python-package.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
runs-on: ubuntu-latest
1717
strategy:
1818
matrix:
19-
python-version: [3.9, "3.10"]
19+
python-version: [3.9, "3.10", "3.11"]
2020

2121
steps:
2222
- uses: actions/checkout@v4

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ repos:
5555
pyarrow>=1.0.1,
5656
chardet>=3.0.4,
5757
fastavro>=1.0.0.post1,
58-
python-snappy>=0.5.4,
58+
python-snappy>=0.7.1,
5959
charset-normalizer>=1.3.6,
6060
psutil>=4.0.0,
6161
scipy>=1.4.1,

dataprofiler/__init__.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -20,22 +20,6 @@
2020
from .validators.base_validators import Validator
2121
from .version import __version__
2222

23-
try:
24-
import snappy
25-
except ImportError:
26-
import warnings
27-
28-
warnings.warn(
29-
"Snappy must be installed to use parquet/avro datasets."
30-
"\n\n"
31-
"For macOS use Homebrew:\n"
32-
"\t`brew install snappy`"
33-
"\n\n"
34-
"For linux use apt-get:\n`"
35-
"\tsudo apt-get -y install libsnappy-dev`\n",
36-
ImportWarning,
37-
)
38-
3923

4024
def set_seed(seed=None):
4125
# also check it's an integer

dataprofiler/tests/labelers/test_labeler_utils.py

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import logging
2+
import tempfile
23
import unittest
3-
from unittest import mock
44

55
import numpy as np
66
import pandas as pd
@@ -235,9 +235,7 @@ def test_verbose(self):
235235
self.assertIn("f1-score ", log_output)
236236
self.assertIn("F1 Score: ", log_output)
237237

238-
@mock.patch("dataprofiler.labelers.labeler_utils.classification_report")
239-
@mock.patch("pandas.DataFrame")
240-
def test_save_conf_mat(self, mock_dataframe, mock_report):
238+
def test_save_conf_mat(self):
241239

242240
# ideally mock out the actual contents written to file, but
243241
# would be difficult to get this completely worked out.
@@ -248,28 +246,25 @@ def test_save_conf_mat(self, mock_dataframe, mock_report):
248246
[0, 1, 2],
249247
]
250248
)
251-
expected_row_col_names = dict(
252-
columns=["pred:PAD", "pred:UNKNOWN", "pred:OTHER"],
253-
index=["true:PAD", "true:UNKNOWN", "true:OTHER"],
254-
)
255-
mock_instance_df = mock.Mock(spec=pd.DataFrame)()
256-
mock_dataframe.return_value = mock_instance_df
257-
258-
# still omit bc confusion mat should include all despite omit
259-
f1, f1_report = labeler_utils.evaluate_accuracy(
260-
self.y_pred,
261-
self.y_true,
262-
self.num_labels,
263-
self.reverse_label_mapping,
264-
omitted_labels=["PAD"],
265-
verbose=False,
266-
confusion_matrix_file="test.csv",
267-
)
249+
expected_columns = ["pred:PAD", "pred:UNKNOWN", "pred:OTHER"]
250+
expected_index = ["true:PAD", "true:UNKNOWN", "true:OTHER"]
268251

269-
self.assertTrue((mock_dataframe.call_args[0][0] == expected_conf_mat).all())
270-
self.assertDictEqual(expected_row_col_names, mock_dataframe.call_args[1])
252+
with tempfile.NamedTemporaryFile() as tmpFile:
253+
# still omit bc confusion mat should include all despite omit
254+
f1, f1_report = labeler_utils.evaluate_accuracy(
255+
self.y_pred,
256+
self.y_true,
257+
self.num_labels,
258+
self.reverse_label_mapping,
259+
omitted_labels=["PAD"],
260+
verbose=False,
261+
confusion_matrix_file=tmpFile.name,
262+
)
271263

272-
mock_instance_df.to_csv.assert_called()
264+
df1 = pd.read_csv(tmpFile.name, index_col=0)
265+
self.assertListEqual(list(df1.columns), expected_columns)
266+
self.assertListEqual(list(df1.index), expected_index)
267+
np.testing.assert_array_equal(df1.values, expected_conf_mat)
273268

274269

275270
class TestTFFunctions(unittest.TestCase):

dataprofiler/tests/test_data_profiler.py

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -56,46 +56,6 @@ def test_data_profiling(self):
5656
self.assertIsNotNone(profile.profile)
5757
self.assertIsNotNone(profile.report())
5858

59-
def test_no_snappy(self):
60-
import importlib
61-
import sys
62-
import types
63-
64-
orig_import = __import__
65-
# necessary for any wrapper around the library to test if snappy caught
66-
# as an issue
67-
68-
def reload_data_profiler():
69-
"""Recursively reload modules."""
70-
sys_modules = sys.modules.copy()
71-
for module_name, module in sys_modules.items():
72-
# Only reload top level of the dataprofiler
73-
if "dataprofiler" in module_name and len(module_name.split(".")) < 3:
74-
if isinstance(module, types.ModuleType):
75-
importlib.reload(module)
76-
77-
def import_mock(name, *args, **kwargs):
78-
if name == "snappy":
79-
raise ImportError("test")
80-
return orig_import(name, *args, **kwargs)
81-
82-
with mock.patch("builtins.__import__", side_effect=import_mock):
83-
with self.assertWarns(ImportWarning) as w:
84-
import dataprofiler
85-
86-
reload_data_profiler()
87-
88-
self.assertEqual(
89-
str(w.warning),
90-
"Snappy must be installed to use parquet/avro datasets."
91-
"\n\n"
92-
"For macOS use Homebrew:\n"
93-
"\t`brew install snappy`"
94-
"\n\n"
95-
"For linux use apt-get:\n`"
96-
"\tsudo apt-get -y install libsnappy-dev`\n",
97-
)
98-
9959
def test_no_tensorflow(self):
10060
import sys
10161

requirements-test.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
coverage>=5.0.1
2-
dask>=2.29.0,<2024.2.0
2+
dask[dask-expr,dataframe]>=2024.4.1
33
fsspec>=0.3.3
44
pytest>=6.0.1
55
pytest-cov>=2.8.1

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ pytz>=2020.1
77
pyarrow>=1.0.1
88
chardet>=3.0.4
99
fastavro>=1.1.0
10-
python-snappy>=0.5.4
10+
python-snappy>=0.7.1
1111
charset-normalizer>=1.3.6
1212
psutil>=4.0.0
1313
scipy>=1.10.0

tox.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[tox]
2-
envlist = py39, py310, pypi-description, manifest, precom
2+
envlist = py39, py310, py311, pypi-description, manifest, precom
33

44

55
[testenv]

0 commit comments

Comments
 (0)