Skip to content

Commit 52b5275

Browse files
authored
refactor: move from deprecated pkg_resources (#1202)
* refactor: move from deprecated pkg_resources * fix: base str traversal * fix: to use func * fix: add missing change * refactor: move from data_files to package_files * refactor: resources to be in package * fix: utils * fix: path error * fix: str required * fix: tests bc of almost * feat: refactor to pass in a path or string or None * fix: import for older versions * fix: Tranversable must be done at runtime * fix: add test * fix: accidentally duplicated test on rebase * fix: remove pass
1 parent 07298a2 commit 52b5275

45 files changed

Lines changed: 312 additions & 90 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ repos:
5555
python-dateutil>=2.7.5,
5656
pytz>=2020.1,
5757
pyarrow>=1.0.1,
58-
chardet>=3.0.4,
58+
'chardet>=3.0.4,<7.0.0',
5959
fastavro>=1.0.0.post1,
6060
python-snappy>=0.7.1,
6161
charset-normalizer>=1.3.6,

MANIFEST.in

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@ recursive-include dataprofiler *.parquet
1313
recursive-include dataprofiler *.py
1414
recursive-include dataprofiler *.txt
1515

16-
recursive-include resources *.json
17-
recursive-include resources *.pb
18-
recursive-include resources *.py
16+
recursive-include dataprofiler/resources *.json
17+
recursive-include dataprofiler/resources *.pb
18+
recursive-include dataprofiler/resources *.py
19+
recursive-include dataprofiler/resources *.keras
1920

2021
recursive-include dataprofiler/labelers/embeddings *.txt
2122
include versioneer.py

dataprofiler/labelers/base_data_labeler.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Contains abstract classes from which labeler classes will inherit."""
2+
23
from __future__ import annotations
34

45
import json
@@ -9,15 +10,14 @@
910

1011
import numpy as np
1112
import pandas as pd
12-
import pkg_resources
1313

1414
from dataprofiler._typing import DataArray
1515

1616
from .. import data_readers
17-
from . import data_processing
17+
from . import data_processing, utils
1818
from .base_model import BaseModel
1919

20-
default_labeler_dir = pkg_resources.resource_filename("resources", "labelers")
20+
default_labeler_dir = utils.find_resources_dir("labelers")
2121

2222

2323
class BaseDataLabeler:
@@ -246,7 +246,8 @@ def set_params(self, params: dict) -> None:
246246
self._postprocessor.set_params(**params["postprocessor"])
247247

248248
self.check_pipeline(
249-
skip_postprocessor=self._postprocessor is None, error_on_mismatch=False
249+
skip_postprocessor=self._postprocessor is None,
250+
error_on_mismatch=False,
250251
)
251252

252253
def add_label(self, label: str, same_as: str = None) -> None:
@@ -438,7 +439,9 @@ def get_parameter_overlap_mismatches(
438439
messages.append(
439440
"Preprocessor and postprocessor value for `{}` do not "
440441
"match. {} != {}".format(
441-
param, preprocessor_params[param], postprocessor_params[param]
442+
param,
443+
preprocessor_params[param],
444+
postprocessor_params[param],
442445
)
443446
)
444447
if messages:
@@ -490,7 +493,8 @@ def _load_parameters(dirpath: str, load_options: dict = None) -> dict[str, dict]
490493
"The load_options preprocessor class does not "
491494
"match the required DataLabeler preprocessor."
492495
"\n {} != {}".format(
493-
processor_class.__class__.__name__, param_processor_class
496+
processor_class.__class__.__name__,
497+
param_processor_class,
494498
)
495499
)
496500
params["preprocessor"]["class"] = load_options.get("preprocessor_class")
@@ -505,7 +509,8 @@ def _load_parameters(dirpath: str, load_options: dict = None) -> dict[str, dict]
505509
raise ValueError(
506510
"The load_options postprocessor class does not match "
507511
"the required DataLabeler postprocessor.\n {} != {}".format(
508-
processor_class.__class__.__name__, param_processor_class
512+
processor_class.__class__.__name__,
513+
param_processor_class,
509514
)
510515
)
511516
params["postprocessor"]["class"] = load_options.get("postprocessor_class")

dataprofiler/labelers/data_labelers.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
"""Module to train and choose between structured and unstructured data labelers."""
2+
23
from __future__ import annotations
34

45
import os
56

67
import pandas as pd
7-
import pkg_resources
88

99
from .. import data_readers
10+
from . import utils
1011
from .base_data_labeler import BaseDataLabeler, TrainableDataLabeler
1112
from .base_model import BaseModel
1213
from .data_processing import BaseDataPostprocessor, BaseDataPreprocessor
1314

14-
default_labeler_dir = pkg_resources.resource_filename("resources", "labelers")
15+
default_labeler_dir = utils.find_resources_dir("labelers")
1516

1617

1718
def train_structured_labeler(

dataprofiler/labelers/data_processing.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
"""Contains pre-built processors for data labeling/processing."""
2+
23
from __future__ import annotations
34

45
import abc
@@ -15,9 +16,11 @@
1516

1617
import numpy as np
1718
import numpy.typing as npt
18-
import pkg_resources
1919

20-
default_labeler_dir = pkg_resources.resource_filename("resources", "labelers")
20+
from . import utils
21+
22+
default_labeler_dir = utils.find_resources_dir("labelers")
23+
2124

2225
Processor = TypeVar("Processor", bound="BaseDataProcessor")
2326

dataprofiler/labelers/utils.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,18 @@
11
"""Contains functions for checking for installations/dependencies."""
2+
3+
import importlib.resources
24
import sys
35
import warnings
6+
from pathlib import Path
47
from typing import Any, Callable, List
58

9+
try:
10+
# Newer Pythons / newer typeshed
11+
from importlib.resources.abc import Traversable
12+
except ModuleNotFoundError:
13+
# Older Pythons
14+
from importlib.abc import Traversable
15+
616

717
def warn_missing_module(labeler_function: str, module_name: str) -> None:
818
"""
@@ -50,3 +60,15 @@ def new_f(*args: Any, **kwds: Any) -> Any:
5060
return new_f
5161

5262
return check_module
63+
64+
65+
def find_resources_dir(resource_path: str | Path | None = None) -> Traversable:
66+
"""Return the path to the package resources."""
67+
resource = importlib.resources.files("dataprofiler") / "resources"
68+
if resource_path:
69+
resource /= resource_path
70+
71+
if not (resource.is_file() or resource.is_dir()):
72+
raise FileNotFoundError(f"Resource not found: {resource_path}")
73+
74+
return resource

resources/labelers/column_name_labeler/data_labeler_parameters.json renamed to dataprofiler/resources/labelers/column_name_labeler/data_labeler_parameters.json

File renamed without changes.

resources/labelers/column_name_labeler/label_mapping.json renamed to dataprofiler/resources/labelers/column_name_labeler/label_mapping.json

File renamed without changes.

resources/labelers/column_name_labeler/model_parameters.json renamed to dataprofiler/resources/labelers/column_name_labeler/model_parameters.json

File renamed without changes.

resources/labelers/column_name_labeler/postprocessor_parameters.json renamed to dataprofiler/resources/labelers/column_name_labeler/postprocessor_parameters.json

File renamed without changes.

0 commit comments

Comments
 (0)