Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions .github/workflows/deploy-prod-bulk.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Orchestrator workflow for deploying Fabric items to the Prod workspace via the Bulk Import API.
#
# Alternative to deploy-prod.yml. Both fire on push to main; only one runs based
# on the DEPLOY_METHOD repository variable:
# - DEPLOY_METHOD = 'bulk' → this workflow runs
# - DEPLOY_METHOD = 'fabric-cicd' or '' → deploy-prod.yml runs
# - any other value → both skip (safe default)
#
# The `Prod` GitHub Environment should have protection rules configured
# (e.g., required reviewers, branch policy restricting to main only).
#
# The ETL workflow (etl-prod.yml) triggers automatically via workflow_run
# once deployment completes successfully.

name: Deploy to Prod (Bulk API)

on:
push:
branches: [main]
paths: ["data/fabric/**", ".github/workflows/**"]

permissions:
contents: read

jobs:
deploy-bulk:
name: Deploy via Bulk Import API
if: vars.DEPLOY_METHOD == 'bulk'
uses: ./.github/workflows/reusable-deploy-bulk.yml
with:
environment: Prod
secrets: inherit
6 changes: 5 additions & 1 deletion .github/workflows/deploy-prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# The ETL workflow (etl-prod.yml) triggers automatically via workflow_run
# once deployment completes successfully.

name: Deploy to Prod
name: Deploy to Prod (fabric-cicd)

on:
push:
Expand All @@ -23,6 +23,10 @@ permissions:
jobs:
deploy-supported:
name: Deploy supported items
# Gated by the DEPLOY_METHOD repository variable. Runs when unset or set to
# 'fabric-cicd'. Set DEPLOY_METHOD='bulk' to route deployments through
# deploy-prod-bulk.yml instead. Any other value disables both workflows.
if: vars.DEPLOY_METHOD == '' || vars.DEPLOY_METHOD == 'fabric-cicd'
uses: ./.github/workflows/reusable-deploy-supported.yml
with:
environment: Prod
Expand Down
29 changes: 29 additions & 0 deletions .github/workflows/deploy-test-bulk.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Orchestrator workflow for deploying Fabric items to the Test workspace via the Bulk Import API.
#
# Alternative to deploy-test.yml. Both fire on push to test; only one runs based
# on the DEPLOY_METHOD repository variable:
# - DEPLOY_METHOD = 'bulk' → this workflow runs
# - DEPLOY_METHOD = 'fabric-cicd' or '' → deploy-test.yml runs
# - any other value → both skip (safe default)
#
# The ETL workflow (etl-test.yml) triggers automatically via workflow_run
# once deployment completes successfully.

name: Deploy to Test (Bulk API)

on:
push:
branches: [test]
paths: ["data/fabric/**", ".github/workflows/**"]

permissions:
contents: read

jobs:
deploy-bulk:
name: Deploy via Bulk Import API
if: vars.DEPLOY_METHOD == 'bulk'
uses: ./.github/workflows/reusable-deploy-bulk.yml
with:
environment: Test
secrets: inherit
6 changes: 5 additions & 1 deletion .github/workflows/deploy-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# The ETL workflow (etl-test.yml) triggers automatically via workflow_run
# once deployment completes successfully.

name: Deploy to Test
name: Deploy to Test (fabric-cicd)

on:
push:
Expand All @@ -20,6 +20,10 @@ permissions:
jobs:
deploy-supported:
name: Deploy supported items
# Gated by the DEPLOY_METHOD repository variable. Runs when unset or set to
# 'fabric-cicd'. Set DEPLOY_METHOD='bulk' to route deployments through
# deploy-test-bulk.yml instead. Any other value disables both workflows.
if: vars.DEPLOY_METHOD == '' || vars.DEPLOY_METHOD == 'fabric-cicd'
uses: ./.github/workflows/reusable-deploy-supported.yml
with:
environment: Test
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/etl-prod.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ name: ETL - Prod

on:
workflow_run:
workflows: ["Deploy to Prod"]
# Triggered by either deployment workflow. The DEPLOY_METHOD repository
# variable ensures only one of them actually runs per push, so ETL fires once.
# The success conclusion gate below skips ETL when a deploy was skipped.
workflows: ["Deploy to Prod (fabric-cicd)", "Deploy to Prod (Bulk API)"]
types: [completed]

permissions:
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/etl-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ name: ETL - Test

on:
workflow_run:
workflows: ["Deploy to Test"]
# Triggered by either deployment workflow. The DEPLOY_METHOD repository
# variable ensures only one of them actually runs per push, so ETL fires once.
# The success conclusion gate below skips ETL when a deploy was skipped.
workflows: ["Deploy to Test (fabric-cicd)", "Deploy to Test (Bulk API)"]
types: [completed]

permissions:
Expand Down
265 changes: 265 additions & 0 deletions .github/workflows/reusable-deploy-bulk.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
# Reusable workflow: Deploy supported Fabric items via the Bulk Import Item Definitions API (Preview).
#
# Alternative deployment path to reusable-deploy-supported.yml. Uses the Fabric
# REST API's bulk import endpoint instead of the fabric-cicd Python library.
# Selected at orchestrator level via the DEPLOY_METHOD repository variable.
#
# Called by: deploy-test-bulk.yml, deploy-prod-bulk.yml
#
# Prerequisites:
# - GitHub Environment secrets: AZURE_TENANT_ID, AZURE_CLIENT_ID,
# AZURE_CLIENT_SECRET, FABRIC_WORKSPACE_ID
# - Service principal must have Contributor (or higher) role on the target workspace
# - Fabric Admin must enable "Service principals can use Fabric APIs"
# - Every item type in the request payload must support service principals
# (the bulk API requires SPN support for ALL items in the request, not just some)
#
# Known gaps vs. reusable-deploy-supported.yml (fabric-cicd):
# - No parameter.yml find_replace / key_value_replace substitution
# - No orphan cleanup (Bulk Import API only supports Create/Update, not Delete)
# - No item_type_in_scope filter (deploys everything in repository_directory)
#
# API references:
# - Bulk import: https://learn.microsoft.com/en-us/rest/api/fabric/core/items/bulk-import-item-definitions(beta)
# - Long running ops: https://learn.microsoft.com/en-us/rest/api/fabric/articles/long-running-operation
#
# TODO: When the Bulk Import API graduates from Preview, drop the ?beta=true
# query parameter and re-verify the endpoint URL.

name: "Reusable: Deploy via Bulk Import API"

on:
workflow_call:
inputs:
environment:
description: "Target environment (Test, Prod)"
required: true
type: string
repository_directory:
description: "Path to the Fabric item definitions"
required: false
type: string
default: "data/fabric"

permissions:
contents: read

jobs:
deploy:
name: Deploy via Bulk Import API (${{ inputs.environment }})
runs-on: ubuntu-latest
timeout-minutes: 30
environment: ${{ inputs.environment }}
steps:
- name: Checkout repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6

- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
with:
python-version: "3.12"

- name: Install dependencies
run: pip install requests

- name: Bulk import item definitions
env:
AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
AZURE_CLIENT_SECRET: ${{ secrets.AZURE_CLIENT_SECRET }}
FABRIC_WORKSPACE_ID: ${{ secrets.FABRIC_WORKSPACE_ID }}
REPOSITORY_DIRECTORY: ${{ inputs.repository_directory }}
run: |
python -c "
import base64
import json
import os
import pathlib
import sys
import time
import requests

# Polling configuration (decisions #15, #16)
POLL_FALLBACK_SECONDS = 30
POLL_FLOOR_SECONDS = 5
POLL_TIMEOUT_SECONDS = 20 * 60
TOKEN_REFRESH_EVERY_N_POLLS = 20

# Files to skip when building definitionParts[]. Two layers of exclusion:
# 1. Named files: known files that should never be sent (parameter.yml is
# fabric-cicd config; .gitkeep is a Git placeholder).
# 2. Structural rule: item definitions always live inside *.<Type>/ folders,
# so any file directly under repository_directory is excluded by
# construction (handled in build_definition_parts).
EXCLUDED_FILES = {'parameter.yml', '.gitkeep'}

tenant_id = os.environ['AZURE_TENANT_ID']
client_id = os.environ['AZURE_CLIENT_ID']
client_secret = os.environ['AZURE_CLIENT_SECRET']
workspace_id = os.environ['FABRIC_WORKSPACE_ID']
repo_dir = pathlib.Path(os.environ['REPOSITORY_DIRECTORY']).resolve()


def acquire_token() -> str:
token_url = f'https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token'
resp = requests.post(
token_url,
data={
'grant_type': 'client_credentials',
'client_id': client_id,
'client_secret': client_secret,
'scope': 'https://api.fabric.microsoft.com/.default',
},
timeout=30,
)
if resp.status_code != 200:
sys.exit(f'::error::Token acquisition failed: HTTP {resp.status_code} {resp.text}')
token = resp.json()['access_token']
# Mask the token in workflow logs (decision #10)
print(f'::add-mask::{token}')
return token


def build_definition_parts() -> list:
if not repo_dir.is_dir():
sys.exit(f'::error::Repository directory not found: {repo_dir}')
parts = []
for f in sorted(repo_dir.rglob('*')):
if not f.is_file():
continue
if f.name in EXCLUDED_FILES:
continue
# Item definitions live inside *.<Type>/ subfolders; anything at
# the root of repository_directory cannot belong to an item.
if f.parent == repo_dir:
continue
rel = '/' + f.relative_to(repo_dir).as_posix()
parts.append({
'path': rel,
'payload': base64.b64encode(f.read_bytes()).decode('ascii'),
'payloadType': 'InlineBase64',
})
if not parts:
sys.exit(f'::error::No item definition files found under {repo_dir}')
return parts


def poll_lro(operation_id: str, headers: dict, initial_retry_after: int) -> None:
base = 'https://api.fabric.microsoft.com/v1/operations'
retry_after = max(initial_retry_after or POLL_FALLBACK_SECONDS, POLL_FLOOR_SECONDS)
started = time.monotonic()
poll_count = 0

while True:
elapsed = time.monotonic() - started
if elapsed > POLL_TIMEOUT_SECONDS:
sys.exit(
f'::error::LRO polling timed out after {POLL_TIMEOUT_SECONDS}s '
f'(operation {operation_id})'
)

time.sleep(retry_after)
poll_count += 1

# Refresh token periodically for long-running operations
# (mirrors the pattern in reusable-fabric-etl.yml).
if poll_count > 0 and poll_count % TOKEN_REFRESH_EVERY_N_POLLS == 0:
headers['Authorization'] = f'Bearer {acquire_token()}'

resp = requests.get(f'{base}/{operation_id}', headers=headers, timeout=30)
if resp.status_code != 200:
sys.exit(f'::error::Poll request failed: HTTP {resp.status_code} {resp.text}')

body = resp.json()
status = body.get('status', 'Unknown')
print(f'Poll {poll_count} (t+{int(elapsed)}s): status={status}')

if status == 'Succeeded':
return
if status in ('Failed', 'Undefined'):
print(json.dumps(body, indent=2))
sys.exit(f'::error::LRO ended with status: {status}')

# NotStarted or Running — keep polling. Honor Retry-After if present.
retry_after = max(
int(resp.headers.get('Retry-After', POLL_FALLBACK_SECONDS)),
POLL_FLOOR_SECONDS,
)


def check_per_item_status(result: dict) -> None:
details = result.get('importItemDefinitionsDetails', [])
print(json.dumps(result, indent=2))
if not details:
sys.exit('::error::Result body has no importItemDefinitionsDetails')

failures = [
d for d in details
if d.get('operationStatus') in ('Failed', 'SucceededDespiteFailures')
]
if failures:
summary = '\n'.join(
f\" - {d.get('itemDisplayName')} ({d.get('itemType')}): \"
f\"{d.get('operationStatus')}\"
for d in failures
)
sys.exit(f'::error::{len(failures)} item(s) failed:\n{summary}')

print(f'All {len(details)} items deployed successfully.')


# ---------- main flow ----------
token = acquire_token()
headers = {'Authorization': f'Bearer {token}', 'Content-Type': 'application/json'}

parts = build_definition_parts()
print(f'Built request body with {len(parts)} definition parts from {repo_dir}')

request_body = {
'definitionParts': parts,
'options': {'allowPairingByName': False},
}

# Endpoint URL per the API reference page (the tutorial's URL is wrong).
# https://learn.microsoft.com/en-us/rest/api/fabric/core/items/bulk-import-item-definitions(beta)
api_url = (
f'https://api.fabric.microsoft.com/v1/workspaces/{workspace_id}'
f'/items/bulkImportDefinitions?beta=true'
)
print(f'POST {api_url}')

post_resp = requests.post(api_url, headers=headers, json=request_body, timeout=120)

if post_resp.status_code == 200:
# Sync path — result is in the response body directly.
check_per_item_status(post_resp.json())
sys.exit(0)

if post_resp.status_code == 202:
# Async path — poll the LRO, then fetch the result.
operation_id = post_resp.headers.get('x-ms-operation-id')
if not operation_id:
sys.exit('::error::202 response missing x-ms-operation-id header')

initial_retry = int(post_resp.headers.get('Retry-After', POLL_FALLBACK_SECONDS))
print(f'202 Accepted, operation_id={operation_id}, initial Retry-After={initial_retry}s')

poll_lro(operation_id, headers, initial_retry)

result_resp = requests.get(
f'https://api.fabric.microsoft.com/v1/operations/{operation_id}/result',
headers=headers,
timeout=30,
)
if result_resp.status_code != 200:
sys.exit(
f'::error::Failed to fetch operation result: '
f'HTTP {result_resp.status_code} {result_resp.text}'
)
check_per_item_status(result_resp.json())
sys.exit(0)

sys.exit(
f'::error::Bulk import POST failed: HTTP {post_resp.status_code} {post_resp.text}'
)
"