Skip to content

Commit 77d3ca3

Browse files
Compatibility for cuML deprecation warnings (rapidsai#20884)
This fixes some deprecated usage of cuml. It also updates our call to pytest to error on warnings, which will help us catch these warnings earlier. This revealed a resource warning from an unclosed file handle. Authors: - Tom Augspurger (https://github.com/TomAugspurger) Approvers: - Bradley Dice (https://github.com/bdice) URL: rapidsai#20884
1 parent 058e832 commit 77d3ca3

10 files changed

Lines changed: 84 additions & 36 deletions

File tree

ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ runtest() {
2121
pytest \
2222
"${plugin[@]}" \
2323
-v \
24+
-W error \
2425
--continue-on-collection-errors \
2526
--cache-clear \
2627
--numprocesses="${NUM_PROCESSES}" \
@@ -40,6 +41,7 @@ main() {
4041
--compare \
4142
-p cudf.pandas \
4243
-v \
44+
-W error \
4345
--continue-on-collection-errors \
4446
--cache-clear \
4547
--numprocesses="${NUM_PROCESSES}" \

python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/conftest.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,24 @@ def read_all_results(pattern):
9696

9797

9898
def pytest_configure(config: _pytest.config.Config):
99+
# Register custom markers to avoid PytestUnknownMarkWarning
100+
config.addinivalue_line(
101+
"markers",
102+
"assert_eq(fn): custom assertion function for comparing results",
103+
)
104+
config.addinivalue_line(
105+
"markers",
106+
"xfail_gold: mark test as expected to fail in gold (pandas) pass",
107+
)
108+
config.addinivalue_line(
109+
"markers",
110+
"xfail_cudf_pandas: mark test as expected to fail in cudf.pandas pass",
111+
)
112+
config.addinivalue_line(
113+
"markers",
114+
"xfail_compare: mark test as expected to fail in compare pass",
115+
)
116+
99117
gold_basename = "results-gold"
100118
cudf_basename = "results-cudf-pandas"
101119
test_folder = os.path.join(os.path.dirname(__file__))
@@ -189,4 +207,5 @@ def pytest_unconfigure(config):
189207
config.stash[file_handle_key].write(f.read())
190208
os.remove(worker_result)
191209
# Close our file
210+
config.stash[file_handle_key].close()
192211
del config.stash[file_handle_key]

python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cugraph.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
"betweenness_centrality",
1212
"degree_centrality",
1313
"katz_centrality",
14-
"sorensen_coefficient",
15-
"jaccard_coefficient",
14+
"sorensen",
15+
"jaccard",
1616
]
1717

1818
nx_algos = [
@@ -33,7 +33,13 @@ def assert_cugraph_equal(expect, got):
3333
assert expect == got
3434

3535

36-
pytestmark = pytest.mark.assert_eq(fn=assert_cugraph_equal)
36+
pytestmark = [
37+
pytest.mark.assert_eq(fn=assert_cugraph_equal),
38+
# We can't pass a valid value here to avoid the warning, so we ignore it.
39+
pytest.mark.filterwarnings(
40+
"ignore:This parameter is deprecated:PendingDeprecationWarning"
41+
),
42+
]
3743

3844

3945
@pytest.fixture(scope="session")
@@ -57,13 +63,15 @@ def adjacency_matrix():
5763
def test_cugraph_from_pandas_edgelist(df, algo):
5864
G = cugraph.Graph()
5965
G.from_pandas_edgelist(df)
66+
G.store_transposed = algo == "katz_centrality"
6067
return getattr(cugraph, algo)(G).to_pandas().values
6168

6269

6370
@pytest.mark.parametrize("algo", cugraph_algos)
6471
def test_cugraph_from_pandas_adjacency(adjacency_matrix, algo):
6572
G = cugraph.Graph()
6673
G.from_pandas_adjacency(adjacency_matrix)
74+
G.store_transposed = algo == "katz_centrality"
6775
res = getattr(cugraph, algo)(G).to_pandas()
6876
return res.sort_values(list(res.columns)).values
6977

@@ -72,6 +80,7 @@ def test_cugraph_from_pandas_adjacency(adjacency_matrix, algo):
7280
def test_cugraph_from_numpy_array(df, algo):
7381
G = cugraph.Graph()
7482
G.from_numpy_array(df.values)
83+
G.store_transposed = algo == "katz_centrality"
7584
return getattr(cugraph, algo)(G).to_pandas().values
7685

7786

python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_cuml.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ def test_random_forest(binary_classification_data):
9494
X_train, X_test, y_train, y_test = train_test_split(
9595
X, y, test_size=0.2, random_state=42
9696
)
97-
model = RandomForestClassifier(n_estimators=100)
97+
model = RandomForestClassifier(n_estimators=100, n_bins=len(X_train))
9898
model.fit(X_train, y_train)
9999
preds = model.predict(X_test)
100100
return preds.values

python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def df():
3131

3232
@pytest.mark.parametrize("op", reductions)
3333
def test_numpy_dataframe_reductions(df, op):
34-
return getattr(np, op)(df)
34+
return getattr(np, op)(df, axis=0)
3535

3636

3737
def test_numpy_dot(df):

python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py

Lines changed: 11 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,31 +3,11 @@
33
import pandas as pd
44
import pytest
55
import seaborn as sns
6-
from matplotlib.axes import Axes
7-
from matplotlib.collections import PathCollection
8-
from matplotlib.lines import Line2D
9-
from matplotlib.patches import Rectangle
10-
from pandas._testing import assert_equal
116

127

138
def assert_plots_equal(expect, got):
14-
if isinstance(expect, Axes) and isinstance(got, Axes):
15-
for expect_ch, got_ch in zip(
16-
expect.get_children(), got.get_children(), strict=True
17-
):
18-
assert type(expect_ch) is type(got_ch)
19-
if isinstance(expect_ch, Line2D):
20-
assert_equal(expect_ch.get_xdata(), got_ch.get_xdata())
21-
assert_equal(expect_ch.get_ydata(), got_ch.get_ydata())
22-
elif isinstance(expect_ch, Rectangle):
23-
assert expect_ch.get_height() == got_ch.get_height()
24-
elif isinstance(expect, PathCollection) and isinstance(
25-
got, PathCollection
26-
):
27-
assert_equal(expect.get_offsets()[:, 0], got.get_offsets()[:, 0])
28-
assert_equal(expect.get_offsets()[:, 1], got.get_offsets()[:, 1])
29-
else:
30-
assert_equal(expect, got)
9+
# these are the coordinates of the matplotlib objects.
10+
assert expect == got
3111

3212

3313
pytestmark = pytest.mark.assert_eq(fn=assert_plots_equal)
@@ -47,15 +27,20 @@ def df():
4727

4828
def test_bar(df):
4929
ax = sns.barplot(data=df, x="x", y="y")
50-
return ax
30+
return [x.get_height().item() for x in ax.patches]
5131

5232

5333
def test_scatter(df):
5434
ax = sns.scatterplot(data=df, x="x", y="y", hue="hue")
55-
return ax
35+
assert len(ax.collections) == 1
36+
paths = ax.collections[0].get_paths()
37+
assert len(paths) == 1
38+
return paths[0].vertices.tolist()
5639

5740

5841
def test_lineplot_with_sns_data():
5942
df = sns.load_dataset("flights")
60-
ax = sns.lineplot(data=df, x="month", y="passengers")
61-
return ax
43+
ax = sns.lineplot(data=df, x="month", y="passengers", seed=0)
44+
paths = ax.collections[0].get_paths()
45+
assert len(paths) == 1
46+
return paths[0].vertices.tolist()

python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ def test_1d_time_series():
3030
return stumpy.stump(ts, m)
3131

3232

33+
@pytest.mark.filterwarnings(
34+
"ignore::numba.core.errors.NumbaPerformanceWarning"
35+
)
3336
def test_1d_gpu():
3437
rng = np.random.default_rng(42)
3538
your_time_series = rng.random(10000)

python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
22
# SPDX-License-Identifier: Apache-2.0
33

4+
import os
5+
import warnings
6+
47
import numpy as np
58
import pandas as pd
69
import pytest
@@ -25,12 +28,30 @@ def as_float64(x):
2528

2629
# Shared dask client for all tests in this module
2730
@pytest.fixture(scope="module")
28-
def dask_client():
29-
with LocalCluster(n_workers=4, threads_per_worker=1) as cluster:
30-
with Client(cluster) as dask_client:
31-
yield dask_client
31+
def dask_client(worker_id: str):
32+
worker_count = int(os.environ.get("PYTEST_XDIST_WORKER_COUNT", "0"))
33+
if worker_count > 0:
34+
# Avoid port conflicts with multiple test runners
35+
worker_index = int(worker_id.removeprefix("gw"))
36+
scheduler_port = 8800 + worker_index
37+
dashboard_address = 8900 + worker_index
38+
else:
39+
scheduler_port = None
40+
dashboard_address = None
41+
42+
with warnings.catch_warnings():
43+
warnings.filterwarnings("ignore", category=Warning, message="Port")
44+
45+
with LocalCluster(
46+
n_workers=1,
47+
scheduler_port=scheduler_port,
48+
dashboard_address=dashboard_address,
49+
) as cluster:
50+
with Client(cluster) as client:
51+
yield client
3252

3353

54+
@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning")
3455
def test_1d_distributed(dask_client):
3556
rng = np.random.default_rng(seed=42)
3657
ts = pd.Series(rng.random(100))

python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
11
# SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION.
22
# SPDX-License-Identifier: Apache-2.0
33

4+
import warnings
5+
46
import numpy as np
57
import pandas as pd
68
import pytest
7-
import tensorflow as tf
9+
10+
with warnings.catch_warnings():
11+
# Silence a warning from numpy on keras import
12+
warnings.filterwarnings("ignore", category=FutureWarning)
13+
import tensorflow as tf
14+
815

916
SHUFFLE_BUFFER = 500
1017
BATCH_SIZE = 2

python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ def test_with_iter_quantile_dmatrix(
102102

103103

104104
@pytest.mark.parametrize("device", ["cpu", "cuda"])
105+
@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning")
105106
def test_with_external_memory(
106107
device: str,
107108
reg_batches_data: tuple[list[pd.DataFrame], list[pd.DataFrame]],
@@ -115,6 +116,7 @@ def test_with_external_memory(
115116

116117

117118
@pytest.mark.parametrize("device", ["cpu", "cuda"])
119+
@pytest.mark.filterwarnings("ignore::pytest.PytestUnraisableExceptionWarning")
118120
def test_predict(device: str) -> np.ndarray:
119121
reg = xgb.XGBRegressor(n_estimators=2, device=device)
120122
X, y = make_regression(n_samples, n_features, random_state=11)

0 commit comments

Comments
 (0)