Skip to content

Commit ab5b9a3

Browse files
authored
Merge pull request #571 from KhiopsML/244-support-azure-storage
Support for Azure storage
2 parents bcb7a08 + fa62037 commit ab5b9a3

9 files changed

Lines changed: 571 additions & 144 deletions

File tree

.github/workflows/dev-docker.yml

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@ env:
55
DEFAULT_IMAGE_INCREMENT: 0
66
DEFAULT_SERVER_REVISION: main
77
DEFAULT_PYTHON_VERSIONS: 3.10 3.11 3.12 3.13 3.14
8-
DEFAULT_KHIOPS_GCS_DRIVER_REVISION: 0.0.14
9-
DEFAULT_KHIOPS_S3_DRIVER_REVISION: 0.0.14
8+
DEFAULT_KHIOPS_GCS_DRIVER_REVISION: 0.0.16
9+
DEFAULT_KHIOPS_S3_DRIVER_REVISION: 0.0.15
10+
DEFAULT_KHIOPS_AZURE_DRIVER_REVISION: 0.0.6 # XXX : to modify soon
1011
on:
1112
pull_request:
1213
paths: [packaging/docker/khiopspydev/Dockerfile.*, .github/workflows/dev-docker.yml]
@@ -27,7 +28,7 @@ on:
2728
set-latest:
2829
type: boolean
2930
default: false
30-
description: Set as 'latest'
31+
description: Set as 'latest' (if the current branch is 'main')
3132
python-versions:
3233
type: string
3334
default: 3.10 3.11 3.12 3.13 3.14
@@ -38,12 +39,16 @@ on:
3839
description: Khiops Server Revision
3940
khiops-gcs-driver-revision:
4041
type: string
41-
default: 0.0.14
42+
default: 0.0.16
4243
description: Driver version for Google Cloud Storage remote files
4344
khiops-s3-driver-revision:
4445
type: string
45-
default: 0.0.14
46+
default: 0.0.15
4647
description: Driver version for AWS-S3 remote files
48+
khiops-azure-driver-revision:
49+
type: string
50+
default: 0.0.6 # XXX : to modify soon
51+
description: Driver version for Azure remote files and blobs
4752
concurrency:
4853
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
4954
cancel-in-progress: true
@@ -67,6 +72,7 @@ jobs:
6772
echo "IMAGE_URL=ghcr.io/khiopsml/khiops-python/khiopspydev-${{ matrix.khiopsdev-os }}" >> "$GITHUB_ENV"
6873
echo "KHIOPS_GCS_DRIVER_REVISION=${{ inputs.khiops-gcs-driver-revision || env.DEFAULT_KHIOPS_GCS_DRIVER_REVISION }}" >> "$GITHUB_ENV"
6974
echo "KHIOPS_S3_DRIVER_REVISION=${{ inputs.khiops-s3-driver-revision || env.DEFAULT_KHIOPS_S3_DRIVER_REVISION }}" >> "$GITHUB_ENV"
75+
echo "KHIOPS_AZURE_DRIVER_REVISION=${{ inputs.khiops-azure-driver-revision || env.DEFAULT_KHIOPS_AZURE_DRIVER_REVISION }}" >> "$GITHUB_ENV"
7076
- name: Checkout khiops-python sources
7177
uses: actions/checkout@v4
7278
- name: Set up Docker Buildx
@@ -105,6 +111,7 @@ jobs:
105111
"PYTHON_VERSIONS=${{ inputs.python-versions || env.DEFAULT_PYTHON_VERSIONS }}"
106112
"KHIOPS_GCS_DRIVER_REVISION=${{ env.KHIOPS_GCS_DRIVER_REVISION }}"
107113
"KHIOPS_S3_DRIVER_REVISION=${{ env.KHIOPS_S3_DRIVER_REVISION }}"
114+
"KHIOPS_AZURE_DRIVER_REVISION=${{ env.KHIOPS_AZURE_DRIVER_REVISION }}"
108115
tags: ${{ env.DOCKER_IMAGE_TAGS }}
109116
# Push only on manual request
110117
push: ${{ inputs.push || false }}

.github/workflows/tests.yml

Lines changed: 32 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -78,39 +78,27 @@ jobs:
7878
run: |
7979
CONDA="/root/miniforge3/bin/conda"
8080
81-
# Native Khiops-based Conda environment, and
82-
# `khiops-core`-based Conda environment
83-
CONDA_ENVS="py${{ matrix.python-version }} py${{ matrix.python-version }}_conda"
84-
for CONDA_ENV in $CONDA_ENVS
85-
do
86-
mkdir -p -m u+rwx reports/"$CONDA_ENV"
81+
# Native Khiops-based Conda environment (to test in a specific python version)
82+
CONDA_ENV=py${{ matrix.python-version }}
83+
mkdir -p -m u+rwx reports/"$CONDA_ENV"
8784
88-
# install within the conda environments without activating them
89-
$CONDA install -y -n "$CONDA_ENV" unittest-xml-reporting
90-
$CONDA install -y -n "$CONDA_ENV" --file test-requirements.txt
91-
done
85+
# install within the conda environments without activating them
86+
$CONDA install -y -n "$CONDA_ENV" unittest-xml-reporting
87+
$CONDA install -y -n "$CONDA_ENV" --file test-requirements.txt
9288
- name: Install khiops-python dependencies
9389
if: success() || failure()
9490
run: |
95-
# The following git command is required,
96-
# as the Git repository is in a directory the current user does not own,
97-
# Python versioneer fails to compute the current version correctly otherwise
98-
git config --global --add safe.directory $(realpath .)
9991
CONDA="/root/miniforge3/bin/conda"
100-
# Native Khiops-based Conda environment, and
101-
# `khiops-core`-based Conda environment
102-
CONDA_ENVS="py${{ matrix.python-version }} py${{ matrix.python-version }}_conda"
103-
for CONDA_ENV in $CONDA_ENVS
104-
do
105-
# Since Python 3.13, setuptools is not installed automatically anymore
106-
$CONDA install -y -n "$CONDA_ENV" setuptools
92+
# Native Khiops-based Conda environment (to test in a specific python version)
93+
CONDA_ENV=py${{ matrix.python-version }}
94+
# Since Python 3.13, setuptools is not installed automatically anymore
95+
$CONDA install -y -n "$CONDA_ENV" setuptools
10796
108-
# Add homogeneous TOML support (Python >= 3.12 has standard tomllib)
109-
$CONDA install -y -n "$CONDA_ENV" tomli
110-
$CONDA run --no-capture-output -n "$CONDA_ENV" python scripts/extract_dependencies_from_pyproject_toml.py -f "pyproject.toml" > requires.txt
111-
$CONDA install -y -n "$CONDA_ENV" `cat requires.txt`
112-
rm -f requires.txt
113-
done
97+
# Add homogeneous TOML support (Python >= 3.12 has standard tomllib)
98+
$CONDA install -y -n "$CONDA_ENV" tomli
99+
$CONDA run --no-capture-output -n "$CONDA_ENV" python scripts/extract_dependencies_from_pyproject_toml.py -f "pyproject.toml" > requires.txt
100+
$CONDA install -y -n "$CONDA_ENV" `cat requires.txt`
101+
rm -f requires.txt
114102
- name: Configure Expensive Tests Setting
115103
# Skip expensive tests by default, unless on the `main-v10` or `main` branches
116104
if: github.ref != 'main-v10' && github.ref != 'main' && ! inputs.run-expensive-tests
@@ -138,21 +126,32 @@ jobs:
138126
echo "Generated AWS configuration..."
139127
cat ${GITHUB_WORKSPACE}/.aws/configuration
140128
/scripts/run_fake_remote_file_servers.sh . # launch the servers in the background
129+
141130
# Set environment variables for the tests with GCS
142131
GCS_BUCKET_NAME=data-test-khiops-driver-gcs/khiops_data
143132
GCS_DRIVER_LOGLEVEL=info # set to debug for diagnosis
133+
144134
# Set environment variables for the tests with S3
145135
S3_DRIVER_LOGLEVEL=info # set to debug for diagnosis
146136
S3_BUCKET_NAME=s3-bucket
147137
AWS_SHARED_CREDENTIALS_FILE=${{ github.workspace }}/.aws/credentials
148138
AWS_CONFIG_FILE=${{ github.workspace }}/.aws/configuration
139+
140+
# Set environment variables for the tests with Azure
141+
AZURE_STORAGE_CONNECTION_STRING='${{ secrets.AZURE_CONNECTION_STRING }}'
142+
CLOUD_BLOB_URI_PREFIX=${{ vars.CLOUD_BLOB_URI_PREFIX }}
143+
CLOUD_FILE_URI_PREFIX=${{ vars.CLOUD_FILE_URI_PREFIX }}
144+
149145
# Persist environment variables for subsequent steps
150146
echo "GCS_BUCKET_NAME=${GCS_BUCKET_NAME}" >> "$GITHUB_ENV"
151147
echo "GCS_DRIVER_LOGLEVEL=${GCS_DRIVER_LOGLEVEL}" >> "$GITHUB_ENV"
152148
echo "S3_DRIVER_LOGLEVEL=${S3_DRIVER_LOGLEVEL}" >> "$GITHUB_ENV"
153149
echo "S3_BUCKET_NAME=${S3_BUCKET_NAME}" >> "$GITHUB_ENV"
154150
echo "AWS_SHARED_CREDENTIALS_FILE=${AWS_SHARED_CREDENTIALS_FILE}" >> "$GITHUB_ENV"
155151
echo "AWS_CONFIG_FILE=${AWS_CONFIG_FILE}" >> "$GITHUB_ENV"
152+
echo "AZURE_STORAGE_CONNECTION_STRING=${AZURE_STORAGE_CONNECTION_STRING}" >> "$GITHUB_ENV"
153+
echo "CLOUD_BLOB_URI_PREFIX=${CLOUD_BLOB_URI_PREFIX}" >> "$GITHUB_ENV"
154+
echo "CLOUD_FILE_URI_PREFIX=${CLOUD_FILE_URI_PREFIX}" >> "$GITHUB_ENV"
156155
- name: Authenticate to GCP using "Workload Identity Federation"
157156
if: env.SKIP_EXPENSIVE_TESTS != 'true'
158157
# For integration tests on GCS we use a real Google account
@@ -186,23 +185,17 @@ jobs:
186185
# version is retrieved
187186
git config --global --add safe.directory $(realpath .)
188187
CONDA="/root/miniforge3/bin/conda"
189-
# Native Khiops-based Conda environment, and
190-
# `khiops-core`-based Conda environment
191-
CONDA_ENVS="py${{ matrix.python-version }} py${{ matrix.python-version }}_conda"
192-
for CONDA_ENV in $CONDA_ENVS
193-
do
194-
$CONDA run --no-capture-output -n "$CONDA_ENV" coverage run -m xmlrunner -o "reports/$CONDA_ENV" -v
195-
$CONDA run --no-capture-output -n "$CONDA_ENV" coverage report -m
196-
$CONDA run --no-capture-output -n "$CONDA_ENV" coverage xml -o "reports/$CONDA_ENV/py-coverage.xml"
197-
done
188+
# Native Khiops-based Conda environment (to test in a specific python version)
189+
CONDA_ENV=py${{ matrix.python-version }}
190+
$CONDA run --no-capture-output -n "$CONDA_ENV" coverage run -m xmlrunner -o "reports/$CONDA_ENV" -v
191+
$CONDA run --no-capture-output -n "$CONDA_ENV" coverage report -m
192+
$CONDA run --no-capture-output -n "$CONDA_ENV" coverage xml -o "reports/$CONDA_ENV/py-coverage.xml"
198193
- name: Display Test Reports
199194
if: success() || failure()
200195
uses: dorny/test-reporter@v1
201196
with:
202197
name: Run Tests ${{ matrix.python-version }}
203-
path: >-
204-
reports/py${{ matrix.python-version }}/TEST-tests.*.*.xml,
205-
reports/py${{ matrix.python-version }}_conda/TEST-tests.*.*.xml
198+
path: reports/py${{ matrix.python-version }}/TEST-tests.*.*.xml
206199
reporter: java-junit
207200
path-replace-backslashes: 'true' # Necessary for windows paths
208201
fail-on-error: 'false'
@@ -214,8 +207,6 @@ jobs:
214207
path: |-
215208
reports/py${{ matrix.python-version }}/TEST-tests.*.*.xml
216209
reports/py${{ matrix.python-version }}/py-coverage.xml
217-
reports/py${{ matrix.python-version }}_conda/TEST-tests.*.*.xml
218-
reports/py${{ matrix.python-version }}_conda/py-coverage.xml
219210
tests/resources/scenario_generation/*/ref/*._kh
220211
tests/resources/scenario_generation/*/output/*._kh
221212
tests/resources/*/output_reports/*.txt

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
### Added
1212
- (`sklearn`) `keep_selected_variables_only` parameter to the predictors (`KhiopsClassifier` and `KhiopsRegressor`)
13+
- (General) Support for Azure storage
1314

1415
### Changed
1516
- (`core`) Rename `variable_part_dimensions` to `inner_variable_dimensions` in Coclustering results.

0 commit comments

Comments
 (0)