Skip to content

Commit 2e9eab8

Browse files
committed
Make tools.download_datasets consistent with the runner
closes #454
1 parent 60b9ba6 commit 2e9eab8

File tree

3 files changed

+150
-6
lines changed

3 files changed

+150
-6
lines changed

khiops/core/internals/runner.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1032,8 +1032,9 @@ def _initialize_default_samples_dir(self):
10321032
# Set the fallback value for the samples directory
10331033
home_samples_dir = Path.home() / "khiops_data" / "samples"
10341034

1035-
# Take the value of an environment variable in priority
1036-
if "KHIOPS_SAMPLES_DIR" in os.environ:
1035+
# Take the value of an environment variable in priority, if set to
1036+
# non-empty string
1037+
if "KHIOPS_SAMPLES_DIR" in os.environ and os.environ["KHIOPS_SAMPLES_DIR"]:
10371038
self._samples_dir = os.environ["KHIOPS_SAMPLES_DIR"]
10381039

10391040
# The samples location of Windows systems is:

khiops/tools.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
import argparse
1414
import os
1515
import pathlib
16+
import platform
1617
import shutil
1718
import sys
1819
import tempfile
@@ -115,7 +116,10 @@ def download_datasets(
115116
"""Downloads the Khiops sample datasets for a given version
116117
117118
The datasets are downloaded to:
118-
- Windows: ``%USERPROFILE%\\khiops_data\\samples``
119+
- Windows:
120+
- ``%PUBLIC%\\khiops_data\\samples`` if ``%PUBLIC%`` is defined and
121+
points to a directory
122+
- ``%USERPROFILE%\\khiops_data\\samples`` otherwise
119123
- Linux/macOS: ``$HOME/khiops_data/samples``
120124
121125
Parameters
@@ -128,8 +132,22 @@ def download_datasets(
128132
# Note: The hidden parameter _called_from_shell is just to change the user messages.
129133

130134
# Check if the home sample dataset location is available and build it if necessary
131-
samples_dir = pathlib.Path.home() / "khiops_data" / "samples"
132-
if samples_dir.exists() and not force_overwrite:
135+
home_samples_dir = pathlib.Path.home() / "khiops_data" / "samples"
136+
137+
# Take the value of an environment variable in priority, if set to non-empty string
138+
# If the environment variable is not set, samples location is:
139+
# - on Windows systems:
140+
# - %PUBLIC%\khiops_data\samples if %PUBLIC% exists
141+
# - %USERPROFILE%\khiops_data\samples otherwise
142+
# - on Linux / macOS systems:
143+
# - $HOME/khiops_data/samples
144+
if "KHIOPS_SAMPLES_DIR" in os.environ and os.environ["KHIOPS_SAMPLES_DIR"]:
145+
samples_dir = os.environ["KHIOPS_SAMPLES_DIR"]
146+
elif platform.system() == "Windows" and "PUBLIC" in os.environ:
147+
samples_dir = os.path.join(os.environ["PUBLIC"], "khiops_data", "samples")
148+
else:
149+
samples_dir = str(home_samples_dir)
150+
if os.path.exists(samples_dir) and not force_overwrite:
133151
if _called_from_shell:
134152
instructions = "Execute with '--force-overwrite' to overwrite it"
135153
else:
@@ -140,7 +158,7 @@ def download_datasets(
140158
)
141159
else:
142160
# Create the samples dataset directory
143-
if samples_dir.exists():
161+
if os.path.exists(samples_dir):
144162
shutil.rmtree(samples_dir)
145163
os.makedirs(samples_dir, exist_ok=True)
146164

tests/test_khiops_integrations.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import khiops.core as kh
1919
import khiops.core.internals.filesystems as fs
20+
from khiops import tools
2021
from khiops.core.exceptions import KhiopsEnvironmentError
2122
from khiops.core.internals.runner import KhiopsLocalRunner
2223
from khiops.extras.docker import KhiopsDockerRunner
@@ -30,6 +31,130 @@
3031
class KhiopsRunnerEnvironmentTests(unittest.TestCase):
3132
"""Test that runners in different environments work"""
3233

34+
@unittest.skipIf(
35+
os.environ.get("SKIP_EXPENSIVE_TESTS", "false").lower() == "true",
36+
"Skipping expensive test",
37+
)
38+
def test_samples_are_downloaded_according_to_the_runner_setting(self):
39+
"""Test that samples are downloaded to the runner samples directory"""
40+
41+
# Save initial state
42+
initial_runner = kh.get_runner()
43+
initial_khiops_samples_dir = os.environ.get("KHIOPS_SAMPLES_DIR")
44+
45+
# Test that default samples download location is consistent with the
46+
# KhiopsLocalRunner samples directory
47+
with tempfile.TemporaryDirectory() as tmp_samples_dir:
48+
49+
# Set environment variable to the temporary samples dir
50+
os.environ["KHIOPS_SAMPLES_DIR"] = tmp_samples_dir
51+
52+
# Create test runner to update samples dir to tmp_samples_dir,
53+
# according to the newly-set environment variable
54+
test_runner = KhiopsLocalRunner()
55+
56+
# Set current runner to the test runner
57+
kh.set_runner(test_runner)
58+
59+
# Check that samples are not in tmp_samples_dir
60+
try:
61+
self.assert_samples_dir_integrity(tmp_samples_dir)
62+
except AssertionError:
63+
pass
64+
65+
# Fail if the samples directory is populated (it should be empty)
66+
else:
67+
return False
68+
69+
# Download samples into existing, but empty, tmp_samples_dir
70+
# Check that samples have been downloaded to tmp_samples_dir
71+
tools.download_datasets(force_overwrite=True)
72+
self.assert_samples_dir_integrity(tmp_samples_dir)
73+
74+
# Remove KHIOPS_SAMPLES_DIR
75+
# Create test runner to update samples dir to the default runner samples
76+
# dir, following the deletion of the KHIOPS_SAMPLES_DIR environment
77+
# variable
78+
# Set current runner to the test runner
79+
del os.environ["KHIOPS_SAMPLES_DIR"]
80+
test_runner = KhiopsLocalRunner()
81+
kh.set_runner(test_runner)
82+
83+
# Get the default runner samples dir
84+
default_runner_samples_dir = kh.get_samples_dir()
85+
86+
# Move existing default runner samples dir contents to temporary directory
87+
if os.path.isdir(default_runner_samples_dir):
88+
tmp_initial_samples_dir = tempfile.mkdtemp()
89+
shutil.copytree(
90+
default_runner_samples_dir,
91+
tmp_initial_samples_dir,
92+
dirs_exist_ok=True,
93+
)
94+
shutil.rmtree(default_runner_samples_dir)
95+
else:
96+
tmp_initial_samples_dir = None
97+
98+
# Check that the samples are not present in the default runner
99+
# samples dir
100+
try:
101+
self.assert_samples_dir_integrity(default_runner_samples_dir)
102+
except AssertionError:
103+
pass
104+
105+
# Fail if the samples directory is populated (it should not exist)
106+
else:
107+
return False
108+
109+
# Download datasets to the default runner samples dir (which
110+
# should be created on this occasion)
111+
# Default samples dir does not exist anymore
112+
# Check that the default samples dir is populated
113+
tools.download_datasets()
114+
self.assert_samples_dir_integrity(default_runner_samples_dir)
115+
116+
# Clean-up default samples dir
117+
shutil.rmtree(default_runner_samples_dir)
118+
119+
# Restore initial state:
120+
# - initial samples dir contents if previously present
121+
# - initial KHIOPS_SAMPLES_DIR if set
122+
# - initial runner
123+
if tmp_initial_samples_dir is not None and os.path.isdir(
124+
tmp_initial_samples_dir
125+
):
126+
shutil.copytree(
127+
tmp_initial_samples_dir,
128+
default_runner_samples_dir,
129+
dirs_exist_ok=True,
130+
)
131+
132+
# Remove temporary directory
133+
shutil.rmtree(tmp_initial_samples_dir)
134+
if initial_khiops_samples_dir is not None:
135+
os.environ["KHIOPS_SAMPLES_DIR"] = initial_khiops_samples_dir
136+
kh.set_runner(initial_runner)
137+
138+
def assert_samples_dir_integrity(self, samples_dir):
139+
"""Checks that the samples dir has the expected structure"""
140+
expected_dataset_names = [
141+
"Accidents",
142+
"AccidentsSummary",
143+
"Adult",
144+
"Customer",
145+
"CustomerExtended",
146+
"Iris",
147+
"Letter",
148+
"Mushroom",
149+
"NegativeAirlineTweets",
150+
"SpliceJunction",
151+
"Vehicle",
152+
"WineReviews",
153+
]
154+
self.assertTrue(os.path.isdir(samples_dir))
155+
for ds_name in expected_dataset_names:
156+
self.assertIn(ds_name, os.listdir(samples_dir))
157+
33158
@unittest.skipIf(
34159
platform.system() != "Linux", "Skipping test for non-Linux platform"
35160
)

0 commit comments

Comments
 (0)