Reduce model capacity to 50k

chaudhariniraj · chaudhariniraj · commit 157780b9f21c · 2026-06-16T15:07:47.000+05:30
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -17,7 +17,7 @@ on:
     - cron: "0 11,23 * * *" # Runs at 11:00 AM and 11:00 PM GMT
   workflow_dispatch: #Allow manual triggering
 env:
-  GPT_MIN_CAPACITY: 150
+  GPT_MIN_CAPACITY: 50
   O4_MINI_MIN_CAPACITY: 50
   GPT41_MINI_MIN_CAPACITY: 50
   BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
diff --git a/.github/workflows/job-deploy.yml b/.github/workflows/job-deploy.yml
@@ -98,7 +98,7 @@ on:
         value: ${{ jobs.azure-setup.outputs.QUOTA_FAILED }}
 
 env:
-  GPT_MIN_CAPACITY: 150
+  GPT_MIN_CAPACITY: 50
   O4_MINI_MIN_CAPACITY: 50
   GPT41_MINI_MIN_CAPACITY: 50
   BRANCH_NAME: ${{ github.event.workflow_run.head_branch || github.head_ref || github.ref_name }}
diff --git a/docs/CustomizingAzdParameters.md b/docs/CustomizingAzdParameters.md
@@ -18,7 +18,7 @@ By default this template will use the environment name as the prefix to prevent
 | `AZURE_ENV_MODEL_4_1_DEPLOYMENT_TYPE` | string | `GlobalStandard` | Defines the deployment type for the AI model (e.g., Standard, GlobalStandard).                     |
 | `AZURE_ENV_MODEL_4_1_NAME`          | string | `gpt-4.1`          | Specifies the name of the GPT model to be deployed.                                                |
 | `AZURE_ENV_MODEL_4_1_VERSION`       | string | `2025-04-14`      | Version of the GPT model to be used for deployment.                                                |
-| `AZURE_ENV_MODEL_4_1_CAPACITY`       | int | `150`      | Sets the GPT model capacity.                                                |
+| `AZURE_ENV_MODEL_4_1_CAPACITY`       | int | `50`      | Sets the GPT model capacity.                                                |
 | `AZURE_ENV_REASONING_MODEL_DEPLOYMENT_TYPE` | string | `GlobalStandard` | Defines the deployment type for the AI model (e.g., Standard, GlobalStandard).                     |
 | `AZURE_ENV_REASONING_MODEL_NAME`          | string | `o4-mini`          | Specifies the name of the reasoning GPT model to be deployed.                                                |
 | `AZURE_ENV_REASONING_MODEL_VERSION`       | string | `2025-04-16`      | Version of the reasoning GPT model to be used for deployment.                                                |
diff --git a/docs/DeploymentGuide.md b/docs/DeploymentGuide.md
@@ -68,7 +68,7 @@ Ensure you have access to an [Azure subscription](https://azure.microsoft.com/fr
 📖 **Follow:** [Quota Check Instructions](./quota_check.md) to ensure sufficient capacity.
 
 **Default Quota Configuration:**
-- **GPT-4.1:** 150k tokens
+- **GPT-4.1:** 50k tokens
 - **o4-mini:** 50k tokens
 - **GPT-4.1-mini:** 50k tokens
 
@@ -246,7 +246,7 @@ You can customize various deployment settings before running `azd up`, including
 <details>
   <summary><b>[Optional] Quota Recommendations</b></summary>
 
-By default, the **GPT model capacity** in deployment is set to **150k tokens**.
+By default, the **GPT model capacity** in deployment is set to **50k tokens**.
 
 To adjust quota settings, follow these [steps](./AzureGPTQuotaSettings.md).
 
diff --git a/docs/quota_check.md b/docs/quota_check.md
@@ -1,7 +1,7 @@
 ## Check Quota Availability Before Deployment
 
 Before deploying the accelerator, **ensure sufficient quota availability** for the required model.
-> **For Global Standard | GPT-4o - the capacity to at least 150k tokens for optimal performance.**
+> **For Global Standard | GPT-4o - the capacity to at least 50k tokens for optimal performance.**
 
 ### Login if you have not done so already
 ```
@@ -16,7 +16,7 @@ az login --use-device-code
 
 ### 📌 Default Models & Capacities:
 ```
-gpt4.1:150,o4-mini:50,gpt4.1-mini:50
+gpt4.1:50,o4-mini:50,gpt4.1-mini:50
 ```
 ### 📌 Default Regions:
 ```
@@ -42,23 +42,23 @@ australiaeast, eastus2, francecentral, japaneast, norwayeast, swedencentral, uks
    ```
 ✔️ Check specific model(s) in default regions:
   ```
-  ./quota_check_params.sh --models gpt4.1:150
+  ./quota_check_params.sh --models gpt4.1:50
   ```
 ✔️ Check default models in specific region(s):
   ```
 ./quota_check_params.sh --regions eastus2,westus
   ```
 ✔️ Passing Both models and regions:  
   ```
-  ./quota_check_params.sh --models gpt4.1:150 --regions eastus2,westus
+  ./quota_check_params.sh --models gpt4.1:50 --regions eastus2,westus
   ```
 ✔️ All parameters combined:
   ```
- ./quota_check_params.sh --models gpt4.1:150 --regions eastus2,westus --verbose
+ ./quota_check_params.sh --models gpt4.1:50 --regions eastus2,westus --verbose
   ```
 ✔️ Multiple models with single region:
   ```
- ./quota_check_params.sh --models gpt4.1:150,gpt4.1-mini:50 --regions eastus2 --verbose
+ ./quota_check_params.sh --models gpt4.1:50,gpt4.1-mini:50 --regions eastus2 --verbose
   ```
 
 ### **Sample Output**
diff --git a/infra/main.bicep b/infra/main.bicep
@@ -37,7 +37,7 @@ var deployingUserPrincipalId = deployerInfo.objectId
   azd: {
     type: 'location'
     usageName: [
-      'OpenAI.GlobalStandard.gpt4.1, 150'
+      'OpenAI.GlobalStandard.gpt4.1, 50'
       'OpenAI.GlobalStandard.o4-mini, 50'
       'OpenAI.GlobalStandard.gpt4.1-mini, 50'
     ]
@@ -100,8 +100,8 @@ param gptReasoningModelDeploymentType string = 'GlobalStandard'
 @description('Optional. AI model deployment token capacity. Defaults to 50 for optimal performance.')
 param gptDeploymentCapacity int = 50
 
-@description('Optional. AI model deployment token capacity. Defaults to 150 for optimal performance.')
-param gpt4_1ModelCapacity int = 150
+@description('Optional. AI model deployment token capacity. Defaults to 50 for optimal performance.')
+param gpt4_1ModelCapacity int = 50
 
 @description('Optional. AI model deployment token capacity. Defaults to 50 for optimal performance.')
 param gptReasoningModelCapacity int = 50
diff --git a/infra/main.json b/infra/main.json
@@ -6,7 +6,7 @@
     "_generator": {
       "name": "bicep",
       "version": "0.43.8.12551",
-      "templateHash": "6587818059632090787"
+      "templateHash": "17441022390921143507"
     },
     "name": "Multi-Agent Custom Automation Engine",
     "description": "This module contains the resources required to deploy the [Multi-Agent Custom Automation Engine solution accelerator](https://github.com/microsoft/Multi-Agent-Custom-Automation-Engine-Solution-Accelerator) for both Sandbox environments and WAF aligned environments.\n\n> **Note:** This module is not intended for broad, generic use, as it was designed by the Commercial Solution Areas CTO team, as a Microsoft Solution Accelerator. Feature requests and bug fix requests are welcome if they support the needs of this organization but may not be incorporated if they aim to make this module more generic than what it needs to be for its primary use case. This module will likely be updated to leverage AVM resource modules in the future. This may result in breaking changes in upcoming versions when these features are implemented.\n"
@@ -64,7 +64,7 @@
         "azd": {
           "type": "location",
           "usageName": [
-            "OpenAI.GlobalStandard.gpt4.1, 150",
+            "OpenAI.GlobalStandard.gpt4.1, 50",
             "OpenAI.GlobalStandard.o4-mini, 50",
             "OpenAI.GlobalStandard.gpt4.1-mini, 50"
           ]
@@ -176,9 +176,9 @@
     },
     "gpt4_1ModelCapacity": {
       "type": "int",
-      "defaultValue": 150,
+      "defaultValue": 50,
       "metadata": {
-        "description": "Optional. AI model deployment token capacity. Defaults to 150 for optimal performance."
+        "description": "Optional. AI model deployment token capacity. Defaults to 50 for optimal performance."
       }
     },
     "gptReasoningModelCapacity": {
@@ -27975,9 +27975,9 @@
       },
       "dependsOn": [
         "aiFoundryAiServices",
-        "[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').openAI)]",
-        "[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').cognitiveServices)]",
         "[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').aiServices)]",
+        "[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').cognitiveServices)]",
+        "[format('avmPrivateDnsZones[{0}]', variables('dnsZoneIndex').openAI)]",
         "virtualNetwork"
       ]
     },
diff --git a/infra/main_custom.bicep b/infra/main_custom.bicep
@@ -37,7 +37,7 @@ var deployingUserPrincipalId = deployerInfo.objectId
   azd: {
     type: 'location'
     usageName: [
-      'OpenAI.GlobalStandard.gpt4.1, 150'
+      'OpenAI.GlobalStandard.gpt4.1, 50'
       'OpenAI.GlobalStandard.o4-mini, 50'
       'OpenAI.GlobalStandard.gpt4.1-mini, 50'
     ]
@@ -100,8 +100,8 @@ param gptReasoningModelDeploymentType string = 'GlobalStandard'
 @description('Optional. AI model deployment token capacity. Defaults to 50 for optimal performance.')
 param gptDeploymentCapacity int = 50
 
-@description('Optional. AI model deployment token capacity. Defaults to 150 for optimal performance.')
-param gpt4_1ModelCapacity int = 150
+@description('Optional. AI model deployment token capacity. Defaults to 50 for optimal performance.')
+param gpt4_1ModelCapacity int = 50
 
 @description('Optional. AI model deployment token capacity. Defaults to 50 for optimal performance.')
 param gptReasoningModelCapacity int = 50
diff --git a/infra/scripts/quota_check_params.sh b/infra/scripts/quota_check_params.sh
@@ -47,7 +47,7 @@ log_verbose() {
 }
 
 # Default Models and Capacities (Comma-separated in "model:capacity" format)
-DEFAULT_MODEL_CAPACITY="gpt4.1:150,o4-mini:50,gpt4.1-mini:50"
+DEFAULT_MODEL_CAPACITY="gpt4.1:50,o4-mini:50,gpt4.1-mini:50"
 # Convert the comma-separated string into an array
 IFS=',' read -r -a MODEL_CAPACITY_PAIRS <<< "$DEFAULT_MODEL_CAPACITY"
 

Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ log_verbose() {`
`47`	`47`	`}`
`48`	`48`
`49`	`49`	`# Default Models and Capacities (Comma-separated in "model:capacity" format)`
`50`		`-DEFAULT_MODEL_CAPACITY="gpt4.1:150,o4-mini:50,gpt4.1-mini:50"`
	`50`	`+DEFAULT_MODEL_CAPACITY="gpt4.1:50,o4-mini:50,gpt4.1-mini:50"`
`51`	`51`	`# Convert the comma-separated string into an array`
`52`	`52`	`IFS=',' read -r -a MODEL_CAPACITY_PAIRS <<< "$DEFAULT_MODEL_CAPACITY"`
`53`	`53`