Skip to content

Commit 79f6852

Browse files
Vamshi-MicrosoftCopilot
andauthored
fix: fixed ServiceUnavailable errors in Post Deployment Script when executed through Service Principle (#2221)
Co-authored-by: Copilot <copilot@github.com>
1 parent 327ea35 commit 79f6852

2 files changed

Lines changed: 59 additions & 38 deletions

File tree

.github/workflows/job-post-deployment-setup.yml

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -43,40 +43,7 @@ jobs:
4343
run: |
4444
pip install psycopg2-binary azure-identity
4545
46-
- name: Run Post-Deployment Setup (Attempt 1)
47-
id: setup1
48-
shell: bash
49-
env:
50-
RESOURCE_GROUP: ${{ inputs.RESOURCE_GROUP_NAME }}
51-
run: |
52-
chmod +x scripts/post_deployment_setup.sh
53-
bash scripts/post_deployment_setup.sh "$RESOURCE_GROUP"
54-
continue-on-error: true
55-
56-
- name: Wait 20 seconds before retry
57-
if: ${{ steps.setup1.outcome == 'failure' }}
58-
shell: bash
59-
run: sleep 20s
60-
61-
- name: Run Post-Deployment Setup (Attempt 2)
62-
id: setup2
63-
if: ${{ steps.setup1.outcome == 'failure' }}
64-
shell: bash
65-
env:
66-
RESOURCE_GROUP: ${{ inputs.RESOURCE_GROUP_NAME }}
67-
run: |
68-
chmod +x scripts/post_deployment_setup.sh
69-
bash scripts/post_deployment_setup.sh "$RESOURCE_GROUP"
70-
continue-on-error: true
71-
72-
- name: Wait 40 seconds before final retry
73-
if: ${{ steps.setup2.outcome == 'failure' }}
74-
shell: bash
75-
run: sleep 40s
76-
77-
- name: Run Post-Deployment Setup (Attempt 3)
78-
id: setup3
79-
if: ${{ steps.setup2.outcome == 'failure' }}
46+
- name: Run Post-Deployment Setup
8047
shell: bash
8148
env:
8249
RESOURCE_GROUP: ${{ inputs.RESOURCE_GROUP_NAME }}

scripts/post_deployment_setup.sh

Lines changed: 58 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -196,30 +196,84 @@ else
196196
sleep $RETRY_INTERVAL
197197
done
198198

199+
# Force host re-init: identity-based AzureWebJobsStorage role assignments
200+
# can land after the host's first boot, leaving the ARM-proxied keystore
201+
# endpoint stuck on InternalServerError/ServiceUnavailable (most common
202+
# under service-principal deployments).
203+
SUBSCRIPTION_ID=$(az account show --query "id" -o tsv | tr -d '\r')
204+
205+
echo "✓ Restarting function app to ensure host runtime is in a clean state..."
206+
az functionapp restart --name "$FUNCTION_APP_NAME" --resource-group "$RESOURCE_GROUP" >/dev/null 2>&1 || true
207+
sleep 20
208+
209+
# Wait for site to report Running again
210+
for i in $(seq 1 20); do
211+
STATE=$(az functionapp show --name "$FUNCTION_APP_NAME" --resource-group "$RESOURCE_GROUP" --query "state" -o tsv 2>/dev/null || true)
212+
[ "$STATE" = "Running" ] && break
213+
echo " [${i}/20] Function app not Running after restart. Retrying in 15s..."
214+
sleep 15
215+
done
216+
217+
echo "Waiting for Functions host runtime to be ready..."
218+
HOST_STATUS_URI="/subscriptions/${SUBSCRIPTION_ID}/resourceGroups/${RESOURCE_GROUP}/providers/Microsoft.Web/sites/${FUNCTION_APP_NAME}/host/default/properties/status?api-version=2023-01-01"
219+
HOST_MAX_RETRIES=30
220+
HOST_RETRY_INTERVAL=20
221+
for i in $(seq 1 $HOST_MAX_RETRIES); do
222+
HOST_STATE=$(az rest --method get --uri "$HOST_STATUS_URI" --query "properties.state" -o tsv 2>/dev/null || true)
223+
if [ "$HOST_STATE" = "Running" ]; then
224+
echo "Functions host runtime is Running."
225+
break
226+
fi
227+
echo " [${i}/${HOST_MAX_RETRIES}] Host runtime state: '${HOST_STATE:-unknown}'. Retrying in ${HOST_RETRY_INTERVAL}s..."
228+
sleep $HOST_RETRY_INTERVAL
229+
done
230+
231+
# Warm up the host (best-effort).
232+
curl -fsS -o /dev/null -m 30 "https://${FUNCTION_APP_NAME}.azurewebsites.net/" >/dev/null 2>&1 || true
233+
199234
# Set the function key via REST API (with retries — the host runtime may not be ready yet)
200235
echo "✓ Setting function key 'ClientKey' on '${FUNCTION_APP_NAME}'..."
201-
SUBSCRIPTION_ID=$(az account show --query "id" -o tsv | tr -d '\r')
202236
URI="/subscriptions/${SUBSCRIPTION_ID}/resourceGroups/${RESOURCE_GROUP}/providers/Microsoft.Web/sites/${FUNCTION_APP_NAME}/host/default/functionKeys/clientKey?api-version=2023-01-01"
203237
BODY="{\"properties\":{\"name\":\"ClientKey\",\"value\":\"${FUNCTION_KEY}\"}}"
204238

205239
KEY_SET=false
206-
KEY_MAX_RETRIES=5
240+
KEY_MAX_RETRIES=20
207241
KEY_RETRY_INTERVAL=30
208242
for attempt in $(seq 1 $KEY_MAX_RETRIES); do
209243
REST_ERR=$(az rest --method put --uri "$URI" --body "$BODY" 2>&1 > /dev/null) || true
210244
if [ -z "$REST_ERR" ]; then
211245
KEY_SET=true
212246
break
213247
fi
214-
echo " [${attempt}/${KEY_MAX_RETRIES}] Host runtime not ready yet. Retrying in ${KEY_RETRY_INTERVAL}s..."
215-
echo " $REST_ERR"
248+
# Treat ServiceUnavailable / InternalServerError from the host runtime as transient.
249+
if echo "$REST_ERR" | grep -qiE "ServiceUnavailable|InternalServerError"; then
250+
echo " [${attempt}/${KEY_MAX_RETRIES}] Host runtime transient error. Retrying in ${KEY_RETRY_INTERVAL}s..."
251+
# Every 5 attempts, restart to nudge a stuck host.
252+
if [ $((attempt % 5)) -eq 0 ] && [ $attempt -lt $KEY_MAX_RETRIES ]; then
253+
echo " → Re-restarting function app to clear stuck host state..."
254+
az functionapp restart --name "$FUNCTION_APP_NAME" --resource-group "$RESOURCE_GROUP" >/dev/null 2>&1 || true
255+
sleep 30
256+
fi
257+
else
258+
echo " [${attempt}/${KEY_MAX_RETRIES}] Key set failed. Retrying in ${KEY_RETRY_INTERVAL}s..."
259+
echo " $REST_ERR"
260+
fi
216261
sleep $KEY_RETRY_INTERVAL
217262
done
218263

219264
if [ "$KEY_SET" = "true" ]; then
220265
echo "✓ Function key set successfully."
221266
else
222267
echo "✗ ERROR: Failed to set function key on '${FUNCTION_APP_NAME}' after ${KEY_MAX_RETRIES} attempts." >&2
268+
echo " Last error: $REST_ERR" >&2
269+
echo "" >&2
270+
echo " Manual workaround:" >&2
271+
echo " 1. In the Azure Portal, open Function App '${FUNCTION_APP_NAME}' → Functions → App keys." >&2
272+
echo " 2. Add a Host key named 'ClientKey' with the value of the 'FUNCTION-KEY' secret" >&2
273+
echo " in Key Vault '${KEY_VAULT_NAME}'." >&2
274+
echo " 3. Or run:" >&2
275+
echo " az functionapp keys set --name ${FUNCTION_APP_NAME} --resource-group ${RESOURCE_GROUP} \\" >&2
276+
echo " --key-type functionKeys --key-name ClientKey --key-value <FUNCTION-KEY value>" >&2
223277
restore_network_access
224278
exit 1
225279
fi

0 commit comments

Comments
 (0)