diff --git a/.github/workflows/deploy-prod-bulk.yml b/.github/workflows/deploy-prod-bulk.yml new file mode 100644 index 0000000..6f459dd --- /dev/null +++ b/.github/workflows/deploy-prod-bulk.yml @@ -0,0 +1,32 @@ +# Orchestrator workflow for deploying Fabric items to the Prod workspace via the Bulk Import API. +# +# Alternative to deploy-prod.yml. Both fire on push to main; only one runs based +# on the DEPLOY_METHOD repository variable: +# - DEPLOY_METHOD = 'bulk' → this workflow runs +# - DEPLOY_METHOD = 'fabric-cicd' or '' → deploy-prod.yml runs +# - any other value → both skip (safe default) +# +# The `Prod` GitHub Environment should have protection rules configured +# (e.g., required reviewers, branch policy restricting to main only). +# +# The ETL workflow (etl-prod.yml) triggers automatically via workflow_run +# once deployment completes successfully. + +name: Deploy to Prod (Bulk API) + +on: + push: + branches: [main] + paths: ["data/fabric/**", ".github/workflows/**"] + +permissions: + contents: read + +jobs: + deploy-bulk: + name: Deploy via Bulk Import API + if: vars.DEPLOY_METHOD == 'bulk' + uses: ./.github/workflows/reusable-deploy-bulk.yml + with: + environment: Prod + secrets: inherit diff --git a/.github/workflows/deploy-prod.yml b/.github/workflows/deploy-prod.yml index 4e25baa..ba7b082 100644 --- a/.github/workflows/deploy-prod.yml +++ b/.github/workflows/deploy-prod.yml @@ -10,7 +10,7 @@ # The ETL workflow (etl-prod.yml) triggers automatically via workflow_run # once deployment completes successfully. -name: Deploy to Prod +name: Deploy to Prod (fabric-cicd) on: push: @@ -23,6 +23,10 @@ permissions: jobs: deploy-supported: name: Deploy supported items + # Gated by the DEPLOY_METHOD repository variable. Runs when unset or set to + # 'fabric-cicd'. Set DEPLOY_METHOD='bulk' to route deployments through + # deploy-prod-bulk.yml instead. Any other value disables both workflows. + if: vars.DEPLOY_METHOD == '' || vars.DEPLOY_METHOD == 'fabric-cicd' uses: ./.github/workflows/reusable-deploy-supported.yml with: environment: Prod diff --git a/.github/workflows/deploy-test-bulk.yml b/.github/workflows/deploy-test-bulk.yml new file mode 100644 index 0000000..f5ad058 --- /dev/null +++ b/.github/workflows/deploy-test-bulk.yml @@ -0,0 +1,29 @@ +# Orchestrator workflow for deploying Fabric items to the Test workspace via the Bulk Import API. +# +# Alternative to deploy-test.yml. Both fire on push to test; only one runs based +# on the DEPLOY_METHOD repository variable: +# - DEPLOY_METHOD = 'bulk' → this workflow runs +# - DEPLOY_METHOD = 'fabric-cicd' or '' → deploy-test.yml runs +# - any other value → both skip (safe default) +# +# The ETL workflow (etl-test.yml) triggers automatically via workflow_run +# once deployment completes successfully. + +name: Deploy to Test (Bulk API) + +on: + push: + branches: [test] + paths: ["data/fabric/**", ".github/workflows/**"] + +permissions: + contents: read + +jobs: + deploy-bulk: + name: Deploy via Bulk Import API + if: vars.DEPLOY_METHOD == 'bulk' + uses: ./.github/workflows/reusable-deploy-bulk.yml + with: + environment: Test + secrets: inherit diff --git a/.github/workflows/deploy-test.yml b/.github/workflows/deploy-test.yml index 328c80d..c9aecbd 100644 --- a/.github/workflows/deploy-test.yml +++ b/.github/workflows/deploy-test.yml @@ -7,7 +7,7 @@ # The ETL workflow (etl-test.yml) triggers automatically via workflow_run # once deployment completes successfully. -name: Deploy to Test +name: Deploy to Test (fabric-cicd) on: push: @@ -20,6 +20,10 @@ permissions: jobs: deploy-supported: name: Deploy supported items + # Gated by the DEPLOY_METHOD repository variable. Runs when unset or set to + # 'fabric-cicd'. Set DEPLOY_METHOD='bulk' to route deployments through + # deploy-test-bulk.yml instead. Any other value disables both workflows. + if: vars.DEPLOY_METHOD == '' || vars.DEPLOY_METHOD == 'fabric-cicd' uses: ./.github/workflows/reusable-deploy-supported.yml with: environment: Test diff --git a/.github/workflows/etl-prod.yml b/.github/workflows/etl-prod.yml index 74855b2..5f57dfd 100644 --- a/.github/workflows/etl-prod.yml +++ b/.github/workflows/etl-prod.yml @@ -10,7 +10,10 @@ name: ETL - Prod on: workflow_run: - workflows: ["Deploy to Prod"] + # Triggered by either deployment workflow. The DEPLOY_METHOD repository + # variable ensures only one of them actually runs per push, so ETL fires once. + # The success conclusion gate below skips ETL when a deploy was skipped. + workflows: ["Deploy to Prod (fabric-cicd)", "Deploy to Prod (Bulk API)"] types: [completed] permissions: diff --git a/.github/workflows/etl-test.yml b/.github/workflows/etl-test.yml index ce86064..aac7159 100644 --- a/.github/workflows/etl-test.yml +++ b/.github/workflows/etl-test.yml @@ -10,7 +10,10 @@ name: ETL - Test on: workflow_run: - workflows: ["Deploy to Test"] + # Triggered by either deployment workflow. The DEPLOY_METHOD repository + # variable ensures only one of them actually runs per push, so ETL fires once. + # The success conclusion gate below skips ETL when a deploy was skipped. + workflows: ["Deploy to Test (fabric-cicd)", "Deploy to Test (Bulk API)"] types: [completed] permissions: diff --git a/.github/workflows/reusable-deploy-bulk.yml b/.github/workflows/reusable-deploy-bulk.yml new file mode 100644 index 0000000..a1e1f7d --- /dev/null +++ b/.github/workflows/reusable-deploy-bulk.yml @@ -0,0 +1,265 @@ +# Reusable workflow: Deploy supported Fabric items via the Bulk Import Item Definitions API (Preview). +# +# Alternative deployment path to reusable-deploy-supported.yml. Uses the Fabric +# REST API's bulk import endpoint instead of the fabric-cicd Python library. +# Selected at orchestrator level via the DEPLOY_METHOD repository variable. +# +# Called by: deploy-test-bulk.yml, deploy-prod-bulk.yml +# +# Prerequisites: +# - GitHub Environment secrets: AZURE_TENANT_ID, AZURE_CLIENT_ID, +# AZURE_CLIENT_SECRET, FABRIC_WORKSPACE_ID +# - Service principal must have Contributor (or higher) role on the target workspace +# - Fabric Admin must enable "Service principals can use Fabric APIs" +# - Every item type in the request payload must support service principals +# (the bulk API requires SPN support for ALL items in the request, not just some) +# +# Known gaps vs. reusable-deploy-supported.yml (fabric-cicd): +# - No parameter.yml find_replace / key_value_replace substitution +# - No orphan cleanup (Bulk Import API only supports Create/Update, not Delete) +# - No item_type_in_scope filter (deploys everything in repository_directory) +# +# API references: +# - Bulk import: https://learn.microsoft.com/en-us/rest/api/fabric/core/items/bulk-import-item-definitions(beta) +# - Long running ops: https://learn.microsoft.com/en-us/rest/api/fabric/articles/long-running-operation +# +# TODO: When the Bulk Import API graduates from Preview, drop the ?beta=true +# query parameter and re-verify the endpoint URL. + +name: "Reusable: Deploy via Bulk Import API" + +on: + workflow_call: + inputs: + environment: + description: "Target environment (Test, Prod)" + required: true + type: string + repository_directory: + description: "Path to the Fabric item definitions" + required: false + type: string + default: "data/fabric" + +permissions: + contents: read + +jobs: + deploy: + name: Deploy via Bulk Import API (${{ inputs.environment }}) + runs-on: ubuntu-latest + timeout-minutes: 30 + environment: ${{ inputs.environment }} + steps: + - name: Checkout repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + + - name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6 + with: + python-version: "3.12" + + - name: Install dependencies + run: pip install requests + + - name: Bulk import item definitions + env: + AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }} + AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }} + AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }} + FABRIC_WORKSPACE_ID: ${{ secrets.FABRIC_WORKSPACE_ID }} + REPOSITORY_DIRECTORY: ${{ inputs.repository_directory }} + run: | + python -c " + import base64 + import json + import os + import pathlib + import sys + import time + import requests + + # Polling configuration (decisions #15, #16) + POLL_FALLBACK_SECONDS = 30 + POLL_FLOOR_SECONDS = 5 + POLL_TIMEOUT_SECONDS = 20 * 60 + TOKEN_REFRESH_EVERY_N_POLLS = 20 + + # Files to skip when building definitionParts[]. Two layers of exclusion: + # 1. Named files: known files that should never be sent (parameter.yml is + # fabric-cicd config; .gitkeep is a Git placeholder). + # 2. Structural rule: item definitions always live inside *./ folders, + # so any file directly under repository_directory is excluded by + # construction (handled in build_definition_parts). + EXCLUDED_FILES = {'parameter.yml', '.gitkeep'} + + tenant_id = os.environ['AZURE_TENANT_ID'] + client_id = os.environ['AZURE_CLIENT_ID'] + client_secret = os.environ['AZURE_CLIENT_SECRET'] + workspace_id = os.environ['FABRIC_WORKSPACE_ID'] + repo_dir = pathlib.Path(os.environ['REPOSITORY_DIRECTORY']).resolve() + + + def acquire_token() -> str: + token_url = f'https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token' + resp = requests.post( + token_url, + data={ + 'grant_type': 'client_credentials', + 'client_id': client_id, + 'client_secret': client_secret, + 'scope': 'https://api.fabric.microsoft.com/.default', + }, + timeout=30, + ) + if resp.status_code != 200: + sys.exit(f'::error::Token acquisition failed: HTTP {resp.status_code} {resp.text}') + token = resp.json()['access_token'] + # Mask the token in workflow logs (decision #10) + print(f'::add-mask::{token}') + return token + + + def build_definition_parts() -> list: + if not repo_dir.is_dir(): + sys.exit(f'::error::Repository directory not found: {repo_dir}') + parts = [] + for f in sorted(repo_dir.rglob('*')): + if not f.is_file(): + continue + if f.name in EXCLUDED_FILES: + continue + # Item definitions live inside *./ subfolders; anything at + # the root of repository_directory cannot belong to an item. + if f.parent == repo_dir: + continue + rel = '/' + f.relative_to(repo_dir).as_posix() + parts.append({ + 'path': rel, + 'payload': base64.b64encode(f.read_bytes()).decode('ascii'), + 'payloadType': 'InlineBase64', + }) + if not parts: + sys.exit(f'::error::No item definition files found under {repo_dir}') + return parts + + + def poll_lro(operation_id: str, headers: dict, initial_retry_after: int) -> None: + base = 'https://api.fabric.microsoft.com/v1/operations' + retry_after = max(initial_retry_after or POLL_FALLBACK_SECONDS, POLL_FLOOR_SECONDS) + started = time.monotonic() + poll_count = 0 + + while True: + elapsed = time.monotonic() - started + if elapsed > POLL_TIMEOUT_SECONDS: + sys.exit( + f'::error::LRO polling timed out after {POLL_TIMEOUT_SECONDS}s ' + f'(operation {operation_id})' + ) + + time.sleep(retry_after) + poll_count += 1 + + # Refresh token periodically for long-running operations + # (mirrors the pattern in reusable-fabric-etl.yml). + if poll_count > 0 and poll_count % TOKEN_REFRESH_EVERY_N_POLLS == 0: + headers['Authorization'] = f'Bearer {acquire_token()}' + + resp = requests.get(f'{base}/{operation_id}', headers=headers, timeout=30) + if resp.status_code != 200: + sys.exit(f'::error::Poll request failed: HTTP {resp.status_code} {resp.text}') + + body = resp.json() + status = body.get('status', 'Unknown') + print(f'Poll {poll_count} (t+{int(elapsed)}s): status={status}') + + if status == 'Succeeded': + return + if status in ('Failed', 'Undefined'): + print(json.dumps(body, indent=2)) + sys.exit(f'::error::LRO ended with status: {status}') + + # NotStarted or Running — keep polling. Honor Retry-After if present. + retry_after = max( + int(resp.headers.get('Retry-After', POLL_FALLBACK_SECONDS)), + POLL_FLOOR_SECONDS, + ) + + + def check_per_item_status(result: dict) -> None: + details = result.get('importItemDefinitionsDetails', []) + print(json.dumps(result, indent=2)) + if not details: + sys.exit('::error::Result body has no importItemDefinitionsDetails') + + failures = [ + d for d in details + if d.get('operationStatus') in ('Failed', 'SucceededDespiteFailures') + ] + if failures: + summary = '\n'.join( + f\" - {d.get('itemDisplayName')} ({d.get('itemType')}): \" + f\"{d.get('operationStatus')}\" + for d in failures + ) + sys.exit(f'::error::{len(failures)} item(s) failed:\n{summary}') + + print(f'All {len(details)} items deployed successfully.') + + + # ---------- main flow ---------- + token = acquire_token() + headers = {'Authorization': f'Bearer {token}', 'Content-Type': 'application/json'} + + parts = build_definition_parts() + print(f'Built request body with {len(parts)} definition parts from {repo_dir}') + + request_body = { + 'definitionParts': parts, + 'options': {'allowPairingByName': False}, + } + + # Endpoint URL per the API reference page (the tutorial's URL is wrong). + # https://learn.microsoft.com/en-us/rest/api/fabric/core/items/bulk-import-item-definitions(beta) + api_url = ( + f'https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}' + f'/items/bulkImportDefinitions?beta=true' + ) + print(f'POST {api_url}') + + post_resp = requests.post(api_url, headers=headers, json=request_body, timeout=120) + + if post_resp.status_code == 200: + # Sync path — result is in the response body directly. + check_per_item_status(post_resp.json()) + sys.exit(0) + + if post_resp.status_code == 202: + # Async path — poll the LRO, then fetch the result. + operation_id = post_resp.headers.get('x-ms-operation-id') + if not operation_id: + sys.exit('::error::202 response missing x-ms-operation-id header') + + initial_retry = int(post_resp.headers.get('Retry-After', POLL_FALLBACK_SECONDS)) + print(f'202 Accepted, operation_id={operation_id}, initial Retry-After={initial_retry}s') + + poll_lro(operation_id, headers, initial_retry) + + result_resp = requests.get( + f'https://api.fabric.microsoft.com/v1/operations/{operation_id}/result', + headers=headers, + timeout=30, + ) + if result_resp.status_code != 200: + sys.exit( + f'::error::Failed to fetch operation result: ' + f'HTTP {result_resp.status_code} {result_resp.text}' + ) + check_per_item_status(result_resp.json()) + sys.exit(0) + + sys.exit( + f'::error::Bulk import POST failed: HTTP {post_resp.status_code} {post_resp.text}' + ) + "