Skip to content

Commit 19b6b04

Browse files
imddanishMd Danish
andauthored
[ManagedCleanroom] Add --subdirectory parameter for dataset publish (Azure#9852)
Adds support for mounting a subdirectory/prefix inside the storage container when publishing analytics datasets, providing parity with the upstream cleanroom extension's blobfuse subdirectory feature. Changes: * Added --subdirectory parameter to `az managedcleanroom frontend analytics dataset publish`. * The new field is propagated into the request body's `store.subdirectory` property only when set. * Added validation: --subdirectory combined with --encryption-mode CPK raises a CLIError, matching the service-side restriction in the cleanroom-side schema and reference CLI. * Updated the auto-generated `analytics_frontend_api` SDK JSON dict templates (sync + async operations) to include `subdirectory` after `awsCgsSecretId`. The change matches what a fresh autorest regen produces from the updated frontend.yaml; only the substantive subdirectory delta is included to avoid unrelated cosmetic churn from the team's SDK post-processing. * Added unit tests covering: subdirectory propagation in SSE mode, default omission when not provided, and CPK rejection. * Updated _help.py with the new parameter doc entry and an example showing SSE publish with --subdirectory. * Bumped extension version 1.0.0b5 -> 1.0.0b6 and added HISTORY.rst entry. Validation: * azdev style managedcleanroom: PASSED (pylint + flake8) * python scripts/ci/test_index.py -q: OK * All 57 unit tests pass (3 new subdirectory tests included). Co-authored-by: Md Danish <mddanish@microsoft.com>
1 parent b660406 commit 19b6b04

8 files changed

Lines changed: 113 additions & 7 deletions

File tree

src/managedcleanroom/HISTORY.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,11 @@ Release History
7373
- Fixed schema_file parameter handling in dataset publish to support Azure CLI auto-loading (dict, string, and @file formats)
7474
- Fixed runhistory API endpoint method name
7575
- Fixed runresult API endpoint method name
76+
77+
1.0.0b7
78+
+++++++
79+
* Added: ``--subdirectory`` parameter to ``az managedcleanroom frontend analytics dataset publish``
80+
to mount a specific subdirectory/prefix inside the storage container. Validation of
81+
encryption-mode compatibility is enforced by the service.
82+
* SDK: ``analytics_frontend_api`` updated with the new ``store.subdirectory`` field
83+
in the JSON dict templates (sync + async) to match the regenerated autorest output.

src/managedcleanroom/azext_managedcleanroom/_frontend_custom.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ def frontend_collaboration_dataset_publish(
253253
kek_keyvault_url=None,
254254
kek_secret_id=None,
255255
kek_maa_url=None,
256+
subdirectory=None,
256257
api_version=None,
257258
):
258259
"""Publish a dataset
@@ -278,6 +279,7 @@ def frontend_collaboration_dataset_publish(
278279
:param kek_keyvault_url: KEK Key Vault URL (CPK mode)
279280
:param kek_secret_id: KEK secret ID (CPK mode)
280281
:param kek_maa_url: KEK MAA URL (CPK mode)
282+
:param subdirectory: Optional subdirectory/prefix inside the storage container
281283
:param api_version: API version to use for this request
282284
:return: Publish result
283285
"""
@@ -402,6 +404,8 @@ def frontend_collaboration_dataset_publish(
402404
"storageAccountType": storage_account_type,
403405
"encryptionMode": encryption_mode,
404406
}
407+
if subdirectory:
408+
store["subdirectory"] = subdirectory
405409

406410
# Build identity
407411
identity = {

src/managedcleanroom/azext_managedcleanroom/_help.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,9 @@
411411
- name: --kek-maa-url
412412
type: string
413413
short-summary: MAA URL for KEK (CPK mode only)
414+
- name: --subdirectory
415+
type: string
416+
short-summary: Optional subdirectory/prefix inside the storage container to mount
414417
examples:
415418
- name: Publish a dataset using SSE encryption with individual parameters
416419
text: |
@@ -454,6 +457,22 @@
454457
--collaboration-id my-collab-123 \
455458
--document-id my-dataset \
456459
--body @dataset-config.json
460+
- name: Publish a dataset mounted at a subdirectory
461+
text: |
462+
az managedcleanroom frontend analytics dataset publish \
463+
--collaboration-id my-collab-123 \
464+
--document-id my-dataset \
465+
--storage-account-url https://mystorageaccount.blob.core.windows.net \
466+
--container-name datasets \
467+
--storage-account-type AzureStorageAccount \
468+
--encryption-mode SSE \
469+
--schema-file @schema.json \
470+
--access-mode ReadWrite \
471+
--identity-name northwind-identity \
472+
--identity-client-id fb907136-1234-5678-9abc-def012345678 \
473+
--identity-tenant-id 72f988bf-1234-5678-9abc-def012345678 \
474+
--identity-issuer-url https://oidc.example.com/issuer \
475+
--subdirectory year=2026/month=05
457476
"""
458477

459478

src/managedcleanroom/azext_managedcleanroom/_params.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,10 @@ def load_arguments(self, _): # pylint: disable=unused-argument
204204
'kek_maa_url',
205205
options_list=['--kek-maa-url'],
206206
help='MAA URL for KEK (CPK mode only)')
207+
c.argument(
208+
'subdirectory',
209+
options_list=['--subdirectory'],
210+
help='Optional subdirectory/prefix inside the storage container to mount.')
207211

208212
# Dataset queries context
209213
with self.argument_context('managedcleanroom frontend analytics dataset queries') as c:

src/managedcleanroom/azext_managedcleanroom/analytics_frontend_api/aio/operations/_operations.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,7 +1304,8 @@ async def analytics_datasets_document_id_get(
13041304
"encryptionMode": "str",
13051305
"storageAccountType": "str",
13061306
"storageAccountUrl": "str",
1307-
"awsCgsSecretId": "str"
1307+
"awsCgsSecretId": "str",
1308+
"subdirectory": ""
13081309
},
13091310
"dek": {
13101311
"keyVaultUrl": "str",
@@ -1441,7 +1442,8 @@ async def analytics_datasets_document_id_publish_post( # pylint: disable=name-t
14411442
"encryptionMode": "str",
14421443
"storageAccountType": "str",
14431444
"storageAccountUrl": "str",
1444-
"awsCgsSecretId": "str"
1445+
"awsCgsSecretId": "str",
1446+
"subdirectory": ""
14451447
},
14461448
"dek": {
14471449
"keyVaultUrl": "str",
@@ -1555,7 +1557,8 @@ async def analytics_datasets_document_id_publish_post( # pylint: disable=name-t
15551557
"encryptionMode": "str",
15561558
"storageAccountType": "str",
15571559
"storageAccountUrl": "str",
1558-
"awsCgsSecretId": "str"
1560+
"awsCgsSecretId": "str",
1561+
"subdirectory": ""
15591562
},
15601563
"dek": {
15611564
"keyVaultUrl": "str",

src/managedcleanroom/azext_managedcleanroom/analytics_frontend_api/operations/_operations.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2247,7 +2247,8 @@ def analytics_datasets_document_id_get(
22472247
"encryptionMode": "str",
22482248
"storageAccountType": "str",
22492249
"storageAccountUrl": "str",
2250-
"awsCgsSecretId": "str"
2250+
"awsCgsSecretId": "str",
2251+
"subdirectory": ""
22512252
},
22522253
"dek": {
22532254
"keyVaultUrl": "str",
@@ -2384,7 +2385,8 @@ def analytics_datasets_document_id_publish_post( # pylint: disable=name-too-lon
23842385
"encryptionMode": "str",
23852386
"storageAccountType": "str",
23862387
"storageAccountUrl": "str",
2387-
"awsCgsSecretId": "str"
2388+
"awsCgsSecretId": "str",
2389+
"subdirectory": ""
23882390
},
23892391
"dek": {
23902392
"keyVaultUrl": "str",
@@ -2498,7 +2500,8 @@ def analytics_datasets_document_id_publish_post( # pylint: disable=name-too-lon
24982500
"encryptionMode": "str",
24992501
"storageAccountType": "str",
25002502
"storageAccountUrl": "str",
2501-
"awsCgsSecretId": "str"
2503+
"awsCgsSecretId": "str",
2504+
"subdirectory": ""
25022505
},
25032506
"dek": {
25042507
"keyVaultUrl": "str",

src/managedcleanroom/azext_managedcleanroom/tests/latest/test_frontend_dataset.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,71 @@ def test_publish_dataset_cpk_missing_keys(self, mock_get_client):
336336
"CPK encryption mode requires", str(
337337
context.exception))
338338

339+
@patch("azext_managedcleanroom._frontend_custom.get_frontend_client")
340+
def test_publish_dataset_with_subdirectory(self, mock_get_client):
341+
"""Test that --subdirectory propagates into the request body's store"""
342+
mock_publish_response = {"datasetId": "test-dataset-sub", "status": "published"}
343+
mock_client = Mock()
344+
mock_client.collaboration.analytics_datasets_document_id_publish_post.return_value = mock_publish_response
345+
mock_get_client.return_value = mock_client
346+
347+
test_schema = {"fields": [], "format": "Delta"}
348+
349+
with patch("builtins.open", unittest.mock.mock_open(read_data=json.dumps(test_schema))):
350+
frontend_collaboration_dataset_publish(
351+
cmd=Mock(),
352+
collaboration_id="test-collab-123",
353+
document_id="test-dataset-sub",
354+
body=None,
355+
storage_account_url="https://mystorageaccount.blob.core.windows.net",
356+
container_name="datasets",
357+
storage_account_type="AzureStorageAccount",
358+
encryption_mode="SSE",
359+
schema_file="@schema.json",
360+
access_mode="ReadWrite",
361+
identity_name="northwind-identity",
362+
identity_client_id="fb907136-1234-5678-9abc-def012345678",
363+
identity_tenant_id="72f988bf-1234-5678-9abc-def012345678",
364+
identity_issuer_url="https://oidc.example.com/issuer",
365+
subdirectory="year=2026/month=05",
366+
)
367+
368+
call_args = mock_client.collaboration.analytics_datasets_document_id_publish_post.call_args
369+
body = call_args[0][2]
370+
self.assertEqual(body["store"]["subdirectory"], "year=2026/month=05")
371+
372+
@patch("azext_managedcleanroom._frontend_custom.get_frontend_client")
373+
def test_publish_dataset_subdirectory_default_omitted(self, mock_get_client):
374+
"""Test that subdirectory key is omitted from store when not provided"""
375+
mock_publish_response = {"datasetId": "test-dataset-nosub", "status": "published"}
376+
mock_client = Mock()
377+
mock_client.collaboration.analytics_datasets_document_id_publish_post.return_value = mock_publish_response
378+
mock_get_client.return_value = mock_client
379+
380+
test_schema = {"fields": [], "format": "Delta"}
381+
382+
with patch("builtins.open", unittest.mock.mock_open(read_data=json.dumps(test_schema))):
383+
frontend_collaboration_dataset_publish(
384+
cmd=Mock(),
385+
collaboration_id="test-collab-123",
386+
document_id="test-dataset-nosub",
387+
body=None,
388+
storage_account_url="https://mystorageaccount.blob.core.windows.net",
389+
container_name="datasets",
390+
storage_account_type="AzureStorageAccount",
391+
encryption_mode="SSE",
392+
schema_file="@schema.json",
393+
access_mode="ReadWrite",
394+
identity_name="northwind-identity",
395+
identity_client_id="fb907136-1234-5678-9abc-def012345678",
396+
identity_tenant_id="72f988bf-1234-5678-9abc-def012345678",
397+
identity_issuer_url="https://oidc.example.com/issuer",
398+
)
399+
400+
call_args = mock_client.collaboration.analytics_datasets_document_id_publish_post.call_args
401+
body = call_args[0][2]
402+
self.assertNotIn("subdirectory", body["store"])
403+
339404

340405
if __name__ == '__main__':
341406
unittest.main()

src/managedcleanroom/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111

1212
# HISTORY.rst entry.
13-
VERSION = '1.0.0b6'
13+
VERSION = '1.0.0b7'
1414

1515
# The full list of classifiers is available at
1616
# https://pypi.python.org/pypi?%3Aaction=list_classifiers

0 commit comments

Comments
 (0)