Skip to content

Commit 21f7b3a

Browse files
Relax dependency constraints (#1713)
* Define dependencies in pyproject.toml Moves dependency constraints to pyproject.toml. Makes requirements.txt a lockfile. * Support click 8.3.0 Fixes an incompatibility caused by click 8.3.0, which passes the default value as-is. * Support pyreadstat 1.2.9 Fixes an incompatibility caused by pyreadstat 1.2.9, which changed original_variable_type from 'NULL' to None * Support jsonpath-ng 1.8.0 Works around an behavior change in jsonpath-ng 1.8.0 where Child.str gets wrapped in parenthesis. * Suport dask 2024.8.1 Fixes tokenization errors when using dask 2024.8.1+. Starting with this version, dask enforces that tokens remain stable across pickle round-trips (dask/dask#11320). Capturing self in a lambda fails this check because instance objects can have non-deterministic pickle representations. Since calculate_variable_value_length is already a static method, replacing self with the class name is enough to remove the capture. * Support dask 2024.12.1 Fixes an import error caused by dask 2024.12.1, which removed the legacy dask.dataframe.dd submodule (dask/dask#11604). Changes the import to `import dask.dataframe as dd`, consistent with every other file in the codebase. * Support dask 2025.4.0 Dask 2025.4.0 optimizes multiple DataFrames together, which exposes division mismatches when assigning a pandas Series to a dask DataFrame column. The old reset_index/set_index workaround no longer avoids this. Replacing it with compute-assign-rewrap via dd.from_pandas, which builds a clean expression graph. This is safe because __getitem__ already computes the DataFrame to produce the Series being assigned. * Support pandas 2.2.0 Fixes a unit test to support pandas 2.2.0+. The pandas release fixes a sorting bug with pandas-dev/pandas#54611. This commit changes the expected results accordingly. Also fixes a merge type mismatch introduced by upstream #1709: the codelist metadata side was cast to StringDtype but the evaluation dataset side was not. With pandas 2.2.0, empty columns infer as float64, and merging float64 with string is rejected. Casting both sides to string before the merge resolves this. --------- Co-authored-by: Samuel Johnson <96841389+SFJohnson24@users.noreply.github.com>
1 parent 4afe6e2 commit 21f7b3a

16 files changed

Lines changed: 74 additions & 26 deletions

File tree

.github/workflows/lint-format.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ jobs:
4949
python-version: "3.12"
5050
- name: Install linters
5151
run: |
52-
pip install black flake8 -c requirements-dev.txt
52+
pip install black flake8
5353
- name: Run flake8
5454
run: |
5555
flake8 ${{needs.get_changed_files.outputs.py}} --count --select=E9,F63,F7,F82 --show-source --statistics

.github/workflows/test-unit.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ jobs:
1616
python-version: "3.12"
1717
- name: Install requirements
1818
run: |
19-
pip install -r requirements-dev.txt
20-
pip install -e .
19+
pip install -r requirements.txt
20+
pip install --group dev -e .
2121
- name: Running Tests
2222
env:
2323
CDISC_LIBRARY_API_KEY: fakekey12341234

cdisc_rules_engine/dataset_builders/contents_define_vlm_dataset_builder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def build(self):
6666
data_contents_with_vlm["variable_value_length"] = data_contents_with_vlm.data[
6767
["variable_value", "define_vlm_data_type"]
6868
].apply(
69-
lambda row: self.calculate_variable_value_length(
69+
lambda row: ValuesDatasetBuilder.calculate_variable_value_length(
7070
row["variable_value"], row["define_vlm_data_type"]
7171
),
7272
axis=1,

cdisc_rules_engine/models/dataset/dask_dataset.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,9 @@ def __setitem__(self, key, value):
8181
array_values = da.from_array(value, chunks=tuple(chunks))
8282
self._data[key] = array_values
8383
elif isinstance(value, pd.Series):
84-
self._data = self._data.reset_index()
85-
self._data = self._data.set_index("index")
86-
self._data[key] = value
84+
pdf = self._data.compute()
85+
pdf[key] = value.values
86+
self._data = dd.from_pandas(pdf, npartitions=1)
8787
elif isinstance(value, dd.DataFrame):
8888
for column in value:
8989
self._data[column] = value[column]

cdisc_rules_engine/operations/codelist_extensible.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ def _handle_multiple_versions(self) -> pd.Series:
3232
"codelist_code": "string",
3333
}
3434
)
35+
cast_cols = {self.params.ct_version: "string"}
36+
if self.params.codelist_code in self.evaluation_dataset.columns:
37+
cast_cols[self.params.codelist_code] = "string"
38+
self.evaluation_dataset = self.evaluation_dataset.astype(cast_cols)
3539
if self.params.codelist_code in self.evaluation_dataset.columns:
3640
is_extensible = self.evaluation_dataset.merge(
3741
ct_df.data,

cdisc_rules_engine/operations/codelist_terms.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ def _handle_multiple_versions(self) -> pd.Series:
6464
"codelist_code": "string",
6565
}
6666
)
67+
cast_cols = {self.params.ct_version: "string"}
68+
if self.params.codelist_code in self.evaluation_dataset.columns:
69+
cast_cols[self.params.codelist_code] = "string"
70+
self.evaluation_dataset = self.evaluation_dataset.astype(cast_cols)
6771
if self.params.codelist_code in self.evaluation_dataset.columns:
6872
result = self.evaluation_dataset.merge(
6973
ct_df.data,

cdisc_rules_engine/services/data_readers/csv_reader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import tempfile
22

3-
from dask.dataframe import dd
3+
import dask.dataframe as dd
44

55
from cdisc_rules_engine.exceptions.custom_exceptions import InvalidCSVFile
66
from cdisc_rules_engine.interfaces import DataReaderInterface

cdisc_rules_engine/services/data_services/usdm_data_service.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -399,8 +399,19 @@ def __read_node_metadata(
399399
}
400400

401401
@staticmethod
402-
def __get_full_path(node: DatumInContext):
403-
return f"{node.full_path}".replace(".[", "[")
402+
def __get_full_path(node: DatumInContext) -> str:
403+
parts = []
404+
current = node
405+
while current is not None and current.context is not None:
406+
parts.append(str(current.path))
407+
current = current.context
408+
result = ""
409+
for part in reversed(parts):
410+
if part.startswith("["):
411+
result += part
412+
else:
413+
result = (result + "." if result else "") + part
414+
return result
404415

405416
def __get_datasets_content_index(self) -> List[dict]:
406417
"""

cdisc_rules_engine/services/datasetxpt_metadata_reader.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def read(self) -> dict:
6161
"variable_labels": list(metadata.column_labels),
6262
"variable_names": list(metadata.column_names),
6363
"variable_formats": [
64-
"" if data_type == "NULL" else data_type
64+
"" if (data_type == "NULL" or data_type is None) else data_type
6565
for data_type in metadata.original_variable_types.values()
6666
],
6767
"variable_name_to_label_map": metadata.column_names_to_labels,

core.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -357,15 +357,13 @@ def load_custom_dotenv_from_data_options(_ctx, _param, value):
357357
"-s",
358358
"--standard",
359359
required=True,
360-
default=None,
361360
help="CDISC standard to validate against",
362361
envvar="PRODUCT",
363362
)
364363
@click.option(
365364
"-v",
366365
"--version",
367366
required=True,
368-
default=None,
369367
help="Standard version to validate against",
370368
envvar="VERSION",
371369
)

0 commit comments

Comments
 (0)