chore: upgrade Azure OpenAI embedding model to text-embedding-3-small (#2128)

Ayaz-Microsoft · web-flow · commit d5f93d0ff8fa · 2026-04-06T11:35:27.000+05:30
diff --git a/.env.sample b/.env.sample
@@ -27,7 +27,7 @@ AZURE_OPENAI_RESOURCE=
 AZURE_OPENAI_API_KEY=
 AZURE_OPENAI_MODEL=gpt-4o
 AZURE_OPENAI_MODEL_NAME=gpt-4o
-AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-ada-002
+AZURE_OPENAI_EMBEDDING_MODEL=text-embedding-3-small
 AZURE_OPENAI_TEMPERATURE=0
 AZURE_OPENAI_TOP_P=1.0
 AZURE_OPENAI_MAX_TOKENS=1000
diff --git a/code/backend/batch/utilities/helpers/env_helper.py b/code/backend/batch/utilities/helpers/env_helper.py
@@ -212,7 +212,7 @@ def __load_config(self, **kwargs) -> None:
         else:
             # Otherwise, fallback to individual environment variable
             self.AZURE_OPENAI_EMBEDDING_MODEL = os.getenv(
-                "AZURE_OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002"
+                "AZURE_OPENAI_EMBEDDING_MODEL", "text-embedding-3-small"
             )
 
         self.SHOULD_STREAM = (
diff --git a/code/backend/batch/utilities/helpers/llm_helper.py b/code/backend/batch/utilities/helpers/llm_helper.py
@@ -103,13 +103,15 @@ def get_embedding_model(self):
                 azure_endpoint=self.env_helper.AZURE_OPENAI_ENDPOINT,
                 api_key=self.env_helper.OPENAI_API_KEY,
                 azure_deployment=self.embedding_model,
+                model=self.embedding_model,
                 dimensions=dimensions,
                 chunk_size=1,
             )
         else:
             return AzureOpenAIEmbeddings(
                 azure_endpoint=self.env_helper.AZURE_OPENAI_ENDPOINT,
                 azure_deployment=self.embedding_model,
+                model=self.embedding_model,
                 dimensions=dimensions,
                 chunk_size=1,
                 azure_ad_token_provider=self.token_provider,
diff --git a/code/tests/functional/app_config.py b/code/tests/functional/app_config.py
@@ -23,7 +23,7 @@ class AppConfig:
         "AZURE_FORM_RECOGNIZER_INFO": '{"endpoint":"some-key-vault-endpoint","key":"some-key-vault-endpoint"}',
         "AZURE_OPENAI_API_KEY": "some-azure-openai-api-key",
         "AZURE_OPENAI_API_VERSION": "2024-02-01",
-        "AZURE_OPENAI_EMBEDDING_MODEL_INFO": '{"model":"some-embedding-model","modelName":"some-embedding-model-name","modelVersion":"some-embedding-model-version"}',
+        "AZURE_OPENAI_EMBEDDING_MODEL_INFO": '{"model":"text-embedding-3-small","modelName":"text-embedding-3-small","modelVersion":"1"}',
         "AZURE_OPENAI_ENDPOINT": "some-openai-endpoint",
         "AZURE_OPENAI_MAX_TOKENS": "1000",
         "AZURE_OPENAI_MODEL_INFO": '{"model":"some-openai-model","modelName":"some-openai-model-name","modelVersion":"some-openai-model-version"}',
diff --git a/code/tests/functional/conftest.py b/code/tests/functional/conftest.py
@@ -32,7 +32,7 @@ def setup_default_mocking(httpserver: HTTPServer, app_config: AppConfig):
                     "index": 0,
                 }
             ],
-            "model": "text-embedding-ada-002",
+            "model": "text-embedding-3-small",
         }
     )
 
diff --git a/code/tests/functional/tests/backend_api/default/test_conversation.py b/code/tests/functional/tests/backend_api/default/test_conversation.py
@@ -131,8 +131,9 @@ def test_post_makes_correct_calls_to_openai_embeddings_to_get_vector_dimensions(
             method="POST",
             json={
                 "input": [[1199]],
-                "model": "text-embedding-ada-002",
+                "model": "text-embedding-3-small",
                 "encoding_format": "base64",
+                "dimensions": 1536,
             },
             headers={
                 "Accept": "application/json",
@@ -162,10 +163,9 @@ def test_post_makes_correct_calls_to_openai_embeddings_to_embed_question_to_sear
                 "input": [
                     [3923, 374, 279, 7438, 315, 2324, 30]
                 ],  # Embedding of "What is the meaning of life?"
-                "model": app_config.get_from_json(
-                    "AZURE_OPENAI_EMBEDDING_MODEL_INFO", "model"
-                ),
+                "model": "text-embedding-3-small",
                 "encoding_format": "base64",
+                "dimensions": 1536,
             },
             headers={
                 "Accept": "application/json",
@@ -174,7 +174,7 @@ def test_post_makes_correct_calls_to_openai_embeddings_to_embed_question_to_sear
                 "Api-Key": app_config.get("AZURE_OPENAI_API_KEY"),
             },
             query_string="api-version=2024-02-01",
-            times=1,
+            times=2,
         ),
     )
 
@@ -197,8 +197,9 @@ def test_post_makes_correct_calls_to_openai_embeddings_to_embed_question_to_stor
                 "input": [
                     [3923, 374, 279, 7438, 315, 2324, 30]
                 ],  # Embedding of "What is the meaning of life?"
-                "model": "text-embedding-ada-002",  # this is hard coded in the langchain code base
+                "model": "text-embedding-3-small",
                 "encoding_format": "base64",
+                "dimensions": 1536,
             },
             headers={
                 "Accept": "application/json",
@@ -207,7 +208,7 @@ def test_post_makes_correct_calls_to_openai_embeddings_to_embed_question_to_stor
                 "Api-Key": app_config.get("AZURE_OPENAI_API_KEY"),
             },
             query_string="api-version=2024-02-01",
-            times=1,
+            times=2,
         ),
     )
 
diff --git a/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_without_tool_call.py b/code/tests/functional/tests/backend_api/sk_orchestrator/test_response_without_tool_call.py
@@ -71,8 +71,9 @@ def test_post_makes_correct_call_to_openai_embeddings(
                 "input": [
                     [3923, 374, 279, 7438, 315, 2324, 30]
                 ],  # Embedding of "What is the meaning of life?"
-                "model": "text-embedding-ada-002",
+                "model": "text-embedding-3-small",
                 "encoding_format": "base64",
+                "dimensions": 1536,
             },
             headers={
                 "Accept": "application/json",
diff --git a/code/tests/functional/tests/functions/advanced_image_processing/test_advanced_image_processing.py b/code/tests/functional/tests/functions/advanced_image_processing/test_advanced_image_processing.py
@@ -221,6 +221,7 @@ def test_embeddings_generated_for_caption(
                     "AZURE_OPENAI_EMBEDDING_MODEL_INFO", "model"
                 ),
                 "encoding_format": "base64",
+                "dimensions": 1536,
             },
             headers={
                 "Accept": "application/json",
diff --git a/docs/LOCAL_DEPLOYMENT.md b/docs/LOCAL_DEPLOYMENT.md
@@ -71,7 +71,7 @@ Ensure you have access to an [Azure subscription](https://azure.microsoft.com/fr
 | **Model** | **Minimum Capacity** | **Recommended Capacity** |
 |-----------|---------------------|--------------------------|
 | **gpt-4.1** | 150k tokens | 200k tokens (for best performance) |
-| **text-embedding-ada-002** | 100k tokens | 150k tokens (for best performance) |
+| **text-embedding-3-small** | 100k tokens | 150k tokens (for best performance) |
 
 > **Note:** When you run `azd up`, the deployment will automatically show you regions with available quota, so this pre-check is optional but helpful for planning purposes. You can customize these settings later in [Step 3.3: Advanced Configuration](#33-advanced-configuration-optional).
 
diff --git a/docs/QuotaCheck.md b/docs/QuotaCheck.md
@@ -12,7 +12,7 @@ azd auth login
 
 ### 📌 Default Models & Capacities:
 ```
-gpt4.1:150, text-embedding-ada-002:100
+gpt4.1:150, text-embedding-3-small:100
 ```
 ### 📌 Default Regions:
 ```
@@ -38,7 +38,7 @@ australiaeast, eastus2, japaneast, uksouth
    ```
 ✔️ Check specific model(s) in default regions:
   ```
-  ./quota_check_params.sh --models gpt4.1:150,text-embedding-ada-002:100
+  ./quota_check_params.sh --models gpt4.1:150,text-embedding-3-small:100
   ```
 ✔️ Check default models in specific region(s):
 ```
@@ -50,7 +50,7 @@ australiaeast, eastus2, japaneast, uksouth
 ```
 ✔️ All parameters combined:
 ```
-./quota_check_params.sh --models gpt4.1:150,text-embedding-ada-002:100 --regions eastus2,japaneast --verbose
+./quota_check_params.sh --models gpt4.1:150,text-embedding-3-small:100 --regions eastus2,japaneast --verbose
 ```
 
 ### **Sample Output**
diff --git a/docs/TEAMS_LOCAL_DEPLOYMENT.md b/docs/TEAMS_LOCAL_DEPLOYMENT.md
@@ -60,7 +60,7 @@ Or use the [Azure Functions VS Code extension](https://marketplace.visualstudio.
 |AZURE_SEARCH_ENABLE_IN_DOMAIN|True|Limits responses to only queries relating to your data.|
 |AZURE_SEARCH_CONTENT_COLUMN||List of fields in your Azure AI Search index that contains the text content of your documents to use when formulating a bot response. Represent these as a string joined with "|", e.g. `"product_description|product_manual"`|
 |AZURE_SEARCH_CONTENT_VECTOR_COLUMN||Field from your Azure AI Search index for storing the content's Vector embeddings|
-|AZURE_SEARCH_DIMENSIONS|1536| Azure OpenAI Embeddings dimensions. 1536 for `text-embedding-ada-002`. A full list of dimensions can be found [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#embeddings-models). |
+|AZURE_SEARCH_DIMENSIONS|1536| Azure OpenAI Embeddings dimensions. 1536 for `text-embedding-3-small`. A full list of dimensions can be found [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#embeddings-models). |
 |AZURE_SEARCH_FIELDS_ID|id|`AZURE_SEARCH_FIELDS_ID`: Field from your Azure AI Search index that gives a unique idenitfier of the document chunk. `id` if you don't have a specific requirement.|
 |AZURE_SEARCH_FILENAME_COLUMN||`AZURE_SEARCH_FILENAME_COLUMN`: Field from your Azure AI Search index that gives a unique idenitfier of the source of your data to display in the UI.|
 |AZURE_SEARCH_TITLE_COLUMN||Field from your Azure AI Search index that gives a relevant title or header for your data content to display in the UI.|
@@ -75,7 +75,7 @@ Or use the [Azure Functions VS Code extension](https://marketplace.visualstudio.
 |AZURE_OPENAI_MODEL||The name of your model deployment|
 |AZURE_OPENAI_MODEL_NAME|gpt-4.1|The name of the model|
 |AZURE_OPENAI_API_KEY||One of the API keys of your Azure OpenAI resource|
-|AZURE_OPENAI_EMBEDDING_MODEL|text-embedding-ada-002|The name of you Azure OpenAI embeddings model deployment|
+|AZURE_OPENAI_EMBEDDING_MODEL|text-embedding-3-small|The name of you Azure OpenAI embeddings model deployment|
 |AZURE_OPENAI_TEMPERATURE|0|What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. A value of 0 is recommended when using your data.|
 |AZURE_OPENAI_TOP_P|1.0|An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. We recommend setting this to 1.0 when using your data.|
 |AZURE_OPENAI_MAX_TOKENS|1000|The maximum number of tokens allowed for the generated answer.|
diff --git a/docs/azure_openai_model_quota_settings.md b/docs/azure_openai_model_quota_settings.md
@@ -7,6 +7,6 @@ Please follow [quota check instructions guide](./QuotaCheck.md) to check quota a
 3. **Go to** the `Shared Resources` section in the bottom-left navigation menu.
 4. Select `Quota`
    - Click on the `GlobalStandard` dropdown.
-   - Select the required **GPT model** (`gpt-4.1` or `text-embedding-ada-002`).
+   - Select the required **GPT model** (`gpt-4.1` or `text-embedding-3-small`).
    - Choose the **region** where the deployment is hosted.
 5. Request More Quota or delete any unused model deployments as needed.
diff --git a/docs/customizing_azd_parameters.md b/docs/customizing_azd_parameters.md
@@ -38,9 +38,9 @@ By default this template will use the environment name as the prefix to prevent
 | `AZURE_OPENAI_MODEL_CAPACITY` | integer | `150` | Model capacity (TPM in thousands) |
 | `AZURE_OPENAI_API_VERSION` | string | `2024-02-01` | Azure OpenAI API version |
 | `AZURE_OPENAI_STREAM` | boolean | `true` | Enable streaming responses |
-| `AZURE_OPENAI_EMBEDDING_MODEL` | string | `text-embedding-ada-002` | Embedding model deployment name |
-| `AZURE_OPENAI_EMBEDDING_MODEL_NAME` | string | `text-embedding-ada-002` | Actual embedding model name |
-| `AZURE_OPENAI_EMBEDDING_MODEL_VERSION` | string | `2` | Embedding model version |
+| `AZURE_OPENAI_EMBEDDING_MODEL` | string | `text-embedding-3-small` | Embedding model deployment name |
+| `AZURE_OPENAI_EMBEDDING_MODEL_NAME` | string | `text-embedding-3-small` | Actual embedding model name |
+| `AZURE_OPENAI_EMBEDDING_MODEL_VERSION` | string | `1` | Embedding model version |
 | `AZURE_OPENAI_EMBEDDING_MODEL_CAPACITY` | integer | `100` | Embedding model capacity (TPM in thousands) |
 | `AZURE_SEARCH_DIMENSIONS` | integer | `1536` | Azure Search vector dimensions(Update dimensions for CosmosDB) |
 | `USE_ADVANCED_IMAGE_PROCESSING` | boolean | `false` | Enable vision LLM and Computer Vision for images (must be false for PostgreSQL) |
diff --git a/docs/model_configuration.md b/docs/model_configuration.md
@@ -35,11 +35,11 @@ This document outlines the necessary steps and configurations required for setti
 
 ### EMBEDDINGS
 - `AZURE_OPENAI_EMBEDDING_MODEL`: The Azure OpenAI Model Deployment Name
-    - example: `my-text-embedding-ada-002`
+    - example: `my-text-embedding-3-small`
 - `AZURE_OPENAI_EMBEDDING_MODEL_NAME`: The Azure OpenAI Model Name
-    - example: `text-embedding-ada-002`
+    - example: `text-embedding-3-small`
 - `AZURE_OPENAI_EMBEDDING_MODEL_VERSION`: The Azure OpenAI Model Version
-    - example: `2`
+    - example: `1`
 - `AZURE_OPENAI_EMBEDDING_MODEL_CAPACITY`: The Tokens per Minute Rate Limit (thousands)
     - example: `30`
 - `AZURE_SEARCH_DIMENSIONS`: Azure OpenAI Embeddings dimensions. A full list of dimensions can be found [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#embeddings-models).
diff --git a/docs/spikes/using-image-data/ai-vision.ipynb b/docs/spikes/using-image-data/ai-vision.ipynb
@@ -16,7 +16,7 @@
     "- Azure AI Search\n",
     "- Azure Storage Account\n",
     "- Azure OpenAI - Check the supported regions here https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models#standard-deployment-model-availability\n",
-    "    - A `text-embeddings-ada-002` model deployment\n",
+    "    - A `text-embedding-3-small` model deployment\n",
     "    - A `gpt-4-vision` model deployment\n",
     "- Azure Computer Vision\n",
     "\n",
@@ -41,15 +41,15 @@
     "2. Upload image to blob storage\n",
     "3. Generate embeddings using computer vision `vectorizeImage` API\n",
     "4. Generate a caption of the image using `gpt-4-vision`\n",
-    "5. Generate embeddings of the caption using `text-embeddings-002-ada`\n",
+    "5. Generate embeddings of the caption using `text-embedding-3-small`\n",
     "6. Store data in search index\n",
     "\n",
     "\n",
     "### Question\n",
     "\n",
     "To ask a question using this data, the following steps are performed:\n",
     "1. Generate embeddings for the question using computer vision `vectorizeText` API\n",
-    "2. Generate embeddings for the question using `text-embeddings-002-ada`\n",
+    "2. Generate embeddings for the question using `text-embedding-3-small`\n",
     "3. Search index using both embeddings\n",
     "4. Generate blob sas url from returned search results\n",
     "5. Pass question, along with blob sas url to `gpt-4-vision` chat completions end point\n",
@@ -58,7 +58,7 @@
     "## Why do we need two different embedding models?\n",
     "\n",
     "It is not required to use two different embedding models, however, using both Azure Computer Vision to embed the image\n",
-    "and `gpt-4-vision` to generate a description that is then embedded by `text-embeddings-002-ada` provides richer data and\n",
+    "and `gpt-4-vision` to generate a description that is then embedded by `text-embedding-3-small` provides richer data and\n",
     "provides better search results. This is useful in particular for diagrams and flow charts which show relationships and\n",
     "decision points.\n",
     "\n",
@@ -84,7 +84,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "a2fc464c",
    "metadata": {},
    "outputs": [],
@@ -123,7 +123,7 @@
     "openai_service = \"\"\n",
     "openai_endpoint = endpoint = f\"https://{openai_service}.openai.azure.com/openai/\"\n",
     "gpt4v_deployment_name = \"gpt-4v\"\n",
-    "embeddings_deployment_name = \"text-embedding-ada-002\"\n"
+    "embeddings_deployment_name = \"text-embedding-3-small\"\n"
    ]
   },
   {
diff --git a/infra/main.bicep b/infra/main.bicep
@@ -187,18 +187,18 @@ param azureOpenAIApiVersion string = '2024-02-01'
 param azureOpenAIStream string = 'true'
 
 @description('Optional. Azure OpenAI Embedding Model Deployment Name.')
-param azureOpenAIEmbeddingModel string = 'text-embedding-ada-002'
+param azureOpenAIEmbeddingModel string = 'text-embedding-3-small'
 
 @description('Optional. Azure OpenAI Embedding Model Name.')
-param azureOpenAIEmbeddingModelName string = 'text-embedding-ada-002'
+param azureOpenAIEmbeddingModelName string = 'text-embedding-3-small'
 
 @description('Optional. Azure OpenAI Embedding Model Version.')
-param azureOpenAIEmbeddingModelVersion string = '2'
+param azureOpenAIEmbeddingModelVersion string = '1'
 
 @description('Optional. Azure OpenAI Embedding Model Capacity - See here for more info https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/quota .')
 param azureOpenAIEmbeddingModelCapacity int = 100
 
-@description('Optional. Azure Search vector field dimensions. Must match the embedding model dimensions. 1536 for text-embedding-ada-002, 3072 for text-embedding-3-large. See https://learn.microsoft.com/en-us/azure/search/cognitive-search-skill-azure-openai-embedding#supported-dimensions-by-modelname.(Only for databaseType=CosmosDB)')
+@description('Optional. Azure Search vector field dimensions. Must match the embedding model dimensions. 1536 for text-embedding-3-small, 3072 for text-embedding-3-large. See https://learn.microsoft.com/en-us/azure/search/cognitive-search-skill-azure-openai-embedding#supported-dimensions-by-modelname.(Only for databaseType=CosmosDB)')
 param azureSearchDimensions string = '1536'
 
 @description('Optional. Name of Computer Vision Resource (if useAdvancedImageProcessing=true).')
@@ -381,14 +381,10 @@ param createdBy string = contains(deployer(), 'userPrincipalName')
 resource resourceGroupTags 'Microsoft.Resources/tags@2025-04-01' = {
   name: 'default'
   properties: {
-    tags: union(
-      existingTags,
-      allTags,
-      {
-        TemplateName: 'CWYD'
-        CreatedBy: createdBy
-      }
-    )
+    tags: union(existingTags, allTags, {
+      TemplateName: 'CWYD'
+      CreatedBy: createdBy
+    })
   }
 }
 
diff --git a/infra/main.json b/infra/main.json
diff --git a/infra/main.parameters.json b/infra/main.parameters.json
diff --git a/infra/main.waf.parameters.json b/infra/main.waf.parameters.json
diff --git a/infra/modules/app/function.bicep b/infra/modules/app/function.bicep
diff --git a/infra/modules/core/ai/cognitiveservices.bicep b/infra/modules/core/ai/cognitiveservices.bicep
diff --git a/scripts/checkquota.sh b/scripts/checkquota.sh
diff --git a/scripts/quota_check_params.sh b/scripts/quota_check_params.sh

Original file line number	Diff line number	Diff line change
`@@ -212,7 +212,7 @@ def __load_config(self, **kwargs) -> None:`
`212`	`212`	`else:`
`213`	`213`	`# Otherwise, fallback to individual environment variable`
`214`	`214`	`self.AZURE_OPENAI_EMBEDDING_MODEL = os.getenv(`
`215`		`- "AZURE_OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002"`
	`215`	`+ "AZURE_OPENAI_EMBEDDING_MODEL", "text-embedding-3-small"`
`216`	`216`	`)`
`217`	`217`
`218`	`218`	`self.SHOULD_STREAM = (`
Original file line number	Diff line number	Diff line change
`@@ -32,7 +32,7 @@ def setup_default_mocking(httpserver: HTTPServer, app_config: AppConfig):`
`32`	`32`	`"index": 0,`
`33`	`33`	`}`
`34`	`34`	`],`
`35`		`- "model": "text-embedding-ada-002",`
	`35`	`+ "model": "text-embedding-3-small",`
`36`	`36`	`}`
`37`	`37`	`)`
`38`	`38`