Skip to content

Commit da7a4c9

Browse files
committed
Fix local Windows path computation errors
- simplify URI analysis - fix URI usage in the runner
1 parent 2e9eab8 commit da7a4c9

File tree

3 files changed

+68
-103
lines changed

3 files changed

+68
-103
lines changed

khiops/core/internals/filesystems.py

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
import json
1010
import os
11-
import platform
1211
import shutil
1312
import warnings
1413
from abc import ABC, abstractmethod
@@ -59,8 +58,11 @@ def is_local_resource(uri_or_path):
5958
`bool`
6059
`True` if a URI refers to a local path
6160
"""
62-
uri_info = urlparse(uri_or_path, allow_fragments=False)
63-
return len(uri_info.scheme) <= 1 or uri_info.scheme == "file"
61+
if (index := uri_or_path.find("://")) > 0:
62+
scheme = uri_or_path[:index]
63+
return len(scheme) == 1 or scheme == "file"
64+
else:
65+
return True
6466

6567

6668
def create_resource(uri_or_path):
@@ -80,15 +82,34 @@ def create_resource(uri_or_path):
8082
`FilesystemResource`
8183
The URI resource object, its class depends on the URI.
8284
"""
83-
uri_info = urlparse(uri_or_path, allow_fragments=False)
84-
if uri_info.scheme == "s3":
85-
return AmazonS3Resource(uri_or_path)
86-
elif uri_info.scheme == "gs":
87-
return GoogleCloudStorageResource(uri_or_path)
88-
elif is_local_resource(uri_or_path):
89-
return LocalFilesystemResource(uri_or_path)
85+
# Case where the URI scheme separator `://` is contained in the uri/path
86+
if (index := uri_or_path.find("://")) > 0:
87+
scheme = uri_or_path[:index]
88+
89+
# Case of normal schemes (those whose scheme is not a single char)
90+
# Note: Any 1-char scheme is considered a Windows path
91+
if len(scheme) > 1:
92+
uri_info = urlparse(uri_or_path, allow_fragments=False)
93+
if uri_info.scheme == "s3":
94+
return AmazonS3Resource(uri_or_path)
95+
elif uri_info.scheme == "gs":
96+
return GoogleCloudStorageResource(uri_or_path)
97+
elif scheme == "file":
98+
# Reject URI if authority is not empty
99+
if uri_info.netloc:
100+
raise ValueError(
101+
f"Non-empty 'authority' in local-path URI '{uri_or_path}': "
102+
f"'{uri_info.netloc}'"
103+
)
104+
return LocalFilesystemResource(uri_or_path)
105+
else:
106+
raise ValueError(f"Unsupported URI scheme {uri_info.scheme}")
107+
else:
108+
return LocalFilesystemResource(uri_or_path)
109+
110+
# No scheme separator `://` found: Build a local resource
90111
else:
91-
raise ValueError(f"Unsupported URI scheme {uri_info.scheme}")
112+
return LocalFilesystemResource(uri_or_path)
92113

93114

94115
def parent_path(path):
@@ -411,12 +432,8 @@ def __init__(self, uri):
411432
# Obtain the local from the URI
412433
# Case where the scheme is in fact a windows drive
413434
# => Build the proper path with drive
414-
if (
415-
len(self.uri_info.scheme) == 1
416-
and self.uri_info.scheme.isalpha()
417-
and platform.system() == "Windows"
418-
):
419-
self.path = os.path.join(f"{self.uri_info.scheme}:\\", self.uri_info.path)
435+
if len(self.uri_info.scheme) == 1 and self.uri_info.scheme.isalpha():
436+
self.path = f"{self.uri_info.scheme}:{self.uri_info.path}"
420437
# Case of the "file" scheme
421438
elif self.uri_info.scheme == "file":
422439
# If invalid second colon in path (eg. "/C:/Users"):

khiops/core/internals/runner.py

Lines changed: 29 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import io
1616
import os
17+
import pathlib
1718
import platform
1819
import shlex
1920
import shutil
@@ -49,6 +50,25 @@ def _isdir_without_all_perms(dir_path):
4950
)
5051

5152

53+
def get_default_samples_dir():
54+
"""Get default samples directory
55+
56+
The default samples directory is computed according to the following priorities:
57+
- all systems: ``KHIOPS_SAMPLES_DIR/khiops_data/samples`` if set
58+
- Windows:
59+
- ``%PUBLIC%\\khiops_data\\samples`` if ``%PUBLIC%`` is defined
60+
- ``%USERPROFILE%\\khiops_data\\samples`` otherwise
61+
- Linux/macOS: ``$HOME/khiops_data/samples``
62+
"""
63+
if "KHIOPS_SAMPLES_DIR" in os.environ and os.environ["KHIOPS_SAMPLES_DIR"]:
64+
samples_dir = os.environ["KHIOPS_SAMPLES_DIR"]
65+
elif platform.system() == "Windows" and "PUBLIC" in os.environ:
66+
samples_dir = os.path.join(os.environ["PUBLIC"], "khiops_data", "samples")
67+
else:
68+
samples_dir = str(pathlib.Path.home() / "khiops_data" / "samples")
69+
return samples_dir
70+
71+
5272
def _get_dir_status(a_dir):
5373
"""Returns the status of a local or remote directory
5474
@@ -59,11 +79,9 @@ def _get_dir_status(a_dir):
5979
# Remove initial slash on windows systems
6080
# urllib's url2pathname does not work properly
6181
a_dir_res = fs.create_resource(os.path.normpath(a_dir))
62-
a_dir_path = a_dir_res.uri_info.path
63-
if platform.system() == "Windows":
64-
if a_dir_path.startswith("/"):
65-
a_dir_path = a_dir_path[1:]
6682

83+
# a_dir_res is a LocalFilesystemResource already
84+
a_dir_path = a_dir_res.path
6785
if not os.path.exists(a_dir_path):
6886
status = "non-existent"
6987
elif not os.path.isdir(a_dir_path):
@@ -98,31 +116,6 @@ def _check_samples_dir(samples_dir):
98116
)
99117

100118

101-
def _extract_path_from_uri(uri):
102-
res = fs.create_resource(uri)
103-
if platform.system() == "Windows":
104-
# Case of file:///<LETTER>:/<REST_OF_PATH>:
105-
# Eliminate first slash ("/") from path if the first component
106-
if (
107-
res.uri_info.scheme == ""
108-
and res.uri_info.path[0] == "/"
109-
and res.uri_info.path[1].isalpha()
110-
and res.uri_info.path[2] == ":"
111-
):
112-
path = res.uri_info.path[1:]
113-
# Case of C:/<REST_OF_PATH>:
114-
# Just use the original path
115-
elif len(res.uri_info.scheme) == 1:
116-
path = uri
117-
# Otherwise return URI path as-is
118-
else:
119-
path = res.uri_info.path
120-
121-
else:
122-
path = res.uri_info.path
123-
return path
124-
125-
126119
def _khiops_env_file_exists(env_dir):
127120
"""Check ``khiops_env`` exists relative to the specified environment dir"""
128121
khiops_env_path = os.path.join(env_dir, "khiops_env")
@@ -399,7 +392,7 @@ def root_temp_dir(self):
399392
def root_temp_dir(self, dir_path):
400393
# Check existence, directory status and permissions for local paths
401394
if fs.is_local_resource(dir_path):
402-
real_dir_path = _extract_path_from_uri(dir_path)
395+
real_dir_path = fs.create_resource(dir_path).path
403396
if os.path.exists(real_dir_path):
404397
if os.path.isfile(real_dir_path):
405398
raise KhiopsEnvironmentError(
@@ -439,7 +432,7 @@ def create_temp_file(self, prefix, suffix):
439432
# Local resource: Effectively create the file with the python file API
440433
if fs.is_local_resource(self.root_temp_dir):
441434
# Extract the path from the potential URI
442-
root_temp_dir_path = _extract_path_from_uri(self.root_temp_dir)
435+
root_temp_dir_path = fs.create_resource(self.root_temp_dir).path
443436

444437
# Create the temporary file
445438
tmp_file_fd, tmp_file_path = tempfile.mkstemp(
@@ -470,7 +463,7 @@ def create_temp_dir(self, prefix):
470463
"""
471464
# Local resource: Effectively create the directory with the python file API
472465
if fs.is_local_resource(self.root_temp_dir):
473-
root_temp_dir_path = _extract_path_from_uri(self.root_temp_dir)
466+
root_temp_dir_path = fs.create_resource(self.root_temp_dir).path
474467
temp_dir = tempfile.mkdtemp(prefix=prefix, dir=root_temp_dir_path)
475468
# Remote resource: Just return a highly probable unique path
476469
else:
@@ -919,7 +912,7 @@ class KhiopsLocalRunner(KhiopsRunner):
919912
920913
- Windows:
921914
922-
- ``%PUBLIC%\khiops_data\samples%`` if it exists and is a directory
915+
- ``%PUBLIC%\khiops_data\samples%`` if ``%PUBLIC%`` is defined
923916
- ``%USERPROFILE%\khiops_data\samples%`` otherwise
924917
925918
- Linux and macOS:
@@ -1029,39 +1022,9 @@ def _initialize_khiops_environment(self):
10291022

10301023
def _initialize_default_samples_dir(self):
10311024
"""See class docstring"""
1032-
# Set the fallback value for the samples directory
1033-
home_samples_dir = Path.home() / "khiops_data" / "samples"
1034-
1035-
# Take the value of an environment variable in priority, if set to
1036-
# non-empty string
1037-
if "KHIOPS_SAMPLES_DIR" in os.environ and os.environ["KHIOPS_SAMPLES_DIR"]:
1038-
self._samples_dir = os.environ["KHIOPS_SAMPLES_DIR"]
1039-
1040-
# The samples location of Windows systems is:
1041-
# - %PUBLIC%\khiops_data\samples if %PUBLIC% exists
1042-
# - %USERPROFILE%\khiops_data\samples otherwise
1043-
elif platform.system() == "Windows":
1044-
if "PUBLIC" in os.environ:
1045-
public_samples_dir = os.path.join(
1046-
os.environ["PUBLIC"], "khiops_data", "samples"
1047-
)
1048-
else:
1049-
public_samples_dir = None
1050-
1051-
ok_statuses = ["ok", "remote"]
1052-
if (
1053-
public_samples_dir is not None
1054-
and _get_dir_status(public_samples_dir) in ok_statuses
1055-
):
1056-
self._samples_dir = public_samples_dir
1057-
else:
1058-
self._samples_dir = str(home_samples_dir)
1059-
1060-
# The default samples location on Unix systems is:
1061-
# $HOME/khiops/samples on Linux and Mac OS
1062-
else:
1063-
self._samples_dir = str(home_samples_dir)
1064-
1025+
samples_dir = get_default_samples_dir()
1026+
_check_samples_dir(samples_dir)
1027+
self._samples_dir = samples_dir
10651028
assert self._samples_dir is not None
10661029

10671030
def _check_tools(self):

khiops/tools.py

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import argparse
1414
import os
1515
import pathlib
16-
import platform
1716
import shutil
1817
import sys
1918
import tempfile
@@ -22,6 +21,7 @@
2221
import zipfile
2322

2423
import khiops.core as kh
24+
from khiops.core.internals.runner import get_default_samples_dir
2525
from khiops.samples import samples as samples_core
2626

2727
# We deactivate the warnings to not show a deprecation warning from sklearn
@@ -116,9 +116,10 @@ def download_datasets(
116116
"""Downloads the Khiops sample datasets for a given version
117117
118118
The datasets are downloaded to:
119+
- all systems: ``KHIOPS_SAMPLES_DIR/khiops_data/samples`` if
120+
``KHIOPS_SAMPLES_DIR`` is defined and non-empty
119121
- Windows:
120-
- ``%PUBLIC%\\khiops_data\\samples`` if ``%PUBLIC%`` is defined and
121-
points to a directory
122+
- ``%PUBLIC%\\khiops_data\\samples`` if ``%PUBLIC%`` is defined
122123
- ``%USERPROFILE%\\khiops_data\\samples`` otherwise
123124
- Linux/macOS: ``$HOME/khiops_data/samples``
124125
@@ -130,23 +131,7 @@ def download_datasets(
130131
The version of the samples datasets.
131132
"""
132133
# Note: The hidden parameter _called_from_shell is just to change the user messages.
133-
134-
# Check if the home sample dataset location is available and build it if necessary
135-
home_samples_dir = pathlib.Path.home() / "khiops_data" / "samples"
136-
137-
# Take the value of an environment variable in priority, if set to non-empty string
138-
# If the environment variable is not set, samples location is:
139-
# - on Windows systems:
140-
# - %PUBLIC%\khiops_data\samples if %PUBLIC% exists
141-
# - %USERPROFILE%\khiops_data\samples otherwise
142-
# - on Linux / macOS systems:
143-
# - $HOME/khiops_data/samples
144-
if "KHIOPS_SAMPLES_DIR" in os.environ and os.environ["KHIOPS_SAMPLES_DIR"]:
145-
samples_dir = os.environ["KHIOPS_SAMPLES_DIR"]
146-
elif platform.system() == "Windows" and "PUBLIC" in os.environ:
147-
samples_dir = os.path.join(os.environ["PUBLIC"], "khiops_data", "samples")
148-
else:
149-
samples_dir = str(home_samples_dir)
134+
samples_dir = get_default_samples_dir()
150135
if os.path.exists(samples_dir) and not force_overwrite:
151136
if _called_from_shell:
152137
instructions = "Execute with '--force-overwrite' to overwrite it"

0 commit comments

Comments
 (0)