From 2446c6b24c0f59239133ea5d630e0428a6540dee Mon Sep 17 00:00:00 2001
From: Josh Bradley <joshbradley@microsoft.com>
Date: Wed, 18 Mar 2026 16:56:16 -0400
Subject: [PATCH 1/4] add initial support for azure openai models and managed
 identity auth

---
 examples/eval_api_azure_openai_demo.py |  57 +++++++++++
 opencompass/models/openai_api.py       | 132 ++++++++++++++++++-------
 requirements/api.txt                   |   1 +
 setup.py                               |   2 +-
 4 files changed, 157 insertions(+), 35 deletions(-)
 create mode 100644 examples/eval_api_azure_openai_demo.py
diff --git a/examples/eval_api_azure_openai_demo.py b/examples/eval_api_azure_openai_demo.py
new file mode 100644
index 000000000..b1410ecdb
--- /dev/null
+++ b/examples/eval_api_azure_openai_demo.py
@@ -0,0 +1,57 @@
+"""
+Example configuration of using Azure OpenAI models.
+
+This demonstrates how to use Azure Managed Identity if API keys are not available for authentication.
+"""
+
+from mmengine.config import read_base
+
+from opencompass.models import OpenAI, OpenAISDK
+
+with read_base():
+    from opencompass.configs.datasets.demo.demo_gsm8k_chat_gen import \
+        gsm8k_datasets
+
+# API template for chat models
+api_meta_template = dict(
+    round=[
+        dict(role='HUMAN', api_role='HUMAN'),
+        dict(role='BOT', api_role='BOT', generate=True),
+    ],
+)
+
+models = [
+    dict(
+        abbr='Azure-GPT-5.1',
+        type=OpenAI,
+        path='gpt-5.1',
+        use_azure_identity=True,  # Enable Azure identity authentication
+        tokenizer_path='gpt-5',
+        # Azure OpenAI endpoint format:
+        openai_api_base='https://{resource-name}.openai.azure.com/openai/deployments/{deployment-name}/chat/completions?api-version=2024-12-01-preview',
+        meta_template=api_meta_template,
+        query_per_second=1,
+        max_out_len=2048,
+        max_seq_len=4096,
+        batch_size=8,
+        retry=2,
+    ),
+    dict(
+        abbr='Azure-GPT-5.1-SDK',
+        type=OpenAISDK,
+        path='gpt-5.1',
+        use_azure_identity=True,  # Enable Azure identity authentication
+        tokenizer_path='gpt-5',
+        # Azure OpenAI endpoint format:
+        openai_api_base='https://{resource-name}.openai.azure.com/openai/v1/',
+        meta_template=api_meta_template,
+        query_per_second=1,
+        max_out_len=2048,
+        max_seq_len=4096,
+        batch_size=8,
+        retry=2,
+    ),
+]
+
+# Datasets to evaluate
+datasets = gsm8k_datasets
diff --git a/opencompass/models/openai_api.py b/opencompass/models/openai_api.py
index 91229cea5..a31cb6225 100644
--- a/opencompass/models/openai_api.py
+++ b/opencompass/models/openai_api.py
@@ -12,6 +12,7 @@
 import requests
 from tqdm import tqdm
 
+from azure.identity import DefaultAzureCredential
 from opencompass.registry import MODELS
 from opencompass.utils.prompt import PromptList
 
@@ -76,6 +77,10 @@ class OpenAI(BaseAPIModel):
         max_workers (int, optional): Maximum number of worker threads for
             concurrent API requests. For I/O-intensive API calls, recommended
             value is 10-20. Defaults to None (uses CPU count * 2).
+        use_azure_identity (bool, optional): Use Azure DefaultAzureCredential
+            for authentication instead of API key. When enabled, tokens are
+            obtained from Azure identity. Requires azure-identity package.
+            Defaults to False.
     """
 
     is_api: bool = True
@@ -101,6 +106,7 @@ def __init__(
         verbose: bool = False,
         think_tag: str = '</think>',
         max_workers: Optional[int] = None,
+        use_azure_identity: bool = False,
     ):
 
         super().__init__(
@@ -124,6 +130,8 @@ def __init__(
         self.hf_tokenizer = None
         self.extra_body = extra_body
         self.think_tag = think_tag
+        self.use_azure_identity = use_azure_identity
+        self.azure_credential = None
 
         if max_workers is None:
             cpu_count = os.cpu_count() or 1
@@ -131,7 +139,12 @@ def __init__(
         else:
             self.max_workers = max_workers
 
-        if isinstance(key, str):
+        # Handle Azure Identity authentication
+        if use_azure_identity:
+            self.azure_credential = DefaultAzureCredential()
+            # Use dummy key for compatibility, actual token will be generated
+            self.keys = ['AZURE_TOKEN']
+        elif isinstance(key, str):
             if key == 'ENV':
                 if 'OPENAI_API_KEY' not in os.environ:
                     raise ValueError('OpenAI API key is not set.')
@@ -228,20 +241,28 @@ def _generate(self, input: PromptType, max_out_len: int,
         while max_num_retries < self.retry:
             self.wait()
 
-            with Lock():
-                if len(self.invalid_keys) == len(self.keys):
-                    raise RuntimeError('All keys have insufficient quota.')
+            # Get authentication token
+            if self.use_azure_identity:
+                # Get fresh token from Azure
+                token = self.azure_credential.get_token(
+                    'https://cognitiveservices.azure.com/.default'
+                )
+                key = token.token
+            else:
+                with Lock():
+                    if len(self.invalid_keys) == len(self.keys):
+                        raise RuntimeError('All keys have insufficient quota.')
 
-                # find the next valid key
-                while True:
-                    self.key_ctr += 1
-                    if self.key_ctr == len(self.keys):
-                        self.key_ctr = 0
+                    # find the next valid key
+                    while True:
+                        self.key_ctr += 1
+                        if self.key_ctr == len(self.keys):
+                            self.key_ctr = 0
 
-                    if self.keys[self.key_ctr] not in self.invalid_keys:
-                        break
+                        if self.keys[self.key_ctr] not in self.invalid_keys:
+                            break
 
-                key = self.keys[self.key_ctr]
+                    key = self.keys[self.key_ctr]
 
             header = {
                 'Authorization': f'Bearer {key}',
@@ -327,9 +348,17 @@ def _generate(self, input: PromptType, max_out_len: int,
                     continue
                 response = raw_response.json()
             except requests.JSONDecodeError:
-                self.logger.error(f'JsonDecode error, got status code '
-                                  f'{raw_response.status_code}, response: '
-                                  f'{raw_response.content.decode()}')
+                response_text = raw_response.content.decode()
+                self.logger.error(
+                    f'JsonDecode error, got status code {raw_response.status_code}. '
+                    f'URL: {url}')
+                self.logger.error(
+                    f'Response preview (first 500 chars): {response_text[:500]}')
+                if self.use_azure_identity:
+                    self.logger.error(
+                        'Azure OpenAI requires a specific URL format: '
+                        'https://{resource}.openai.azure.com/openai/deployments/'
+                        '{deployment-name}/chat/completions?api-version=2024-02-15-preview')
                 continue
             self.logger.debug(str(response))
             try:
@@ -609,6 +638,7 @@ def __init__(
         max_workers: Optional[int] = None,
         openai_extra_kwargs: Dict | None = None,
         timeout: int = 3600,
+        use_azure_identity: bool = False,
     ):
         super().__init__(
             path,
@@ -629,6 +659,7 @@ def __init__(
             extra_body,
             verbose=verbose,
             max_workers=max_workers,
+            use_azure_identity=use_azure_identity,
         )
         from openai import OpenAI
 
@@ -645,12 +676,24 @@ def __init__(
                     'https://': self.proxy_url,
                 }
 
-        self.openai_client = OpenAI(
-            base_url=self.openai_api_base,
-            api_key=key,
-            http_client=httpx.Client(
-                **http_client_cfg) if http_client_cfg else None,
-        )
+        # Initialize OpenAI client with appropriate authentication
+        if use_azure_identity:
+            # When using Azure identity, get token dynamically
+            # Note: The OpenAI SDK client will be updated with fresh tokens
+            # in the _generate method for each request
+            self.openai_client = OpenAI(
+                base_url=self.openai_api_base,
+                api_key='placeholder',  # Will be replaced with Azure token
+                http_client=httpx.Client(
+                    **http_client_cfg) if http_client_cfg else None,
+            )
+        else:
+            self.openai_client = OpenAI(
+                base_url=self.openai_api_base,
+                api_key=key,
+                http_client=httpx.Client(
+                    **http_client_cfg) if http_client_cfg else None,
+            )
         self.timeout = timeout
         if self.verbose:
             self.logger.info(f'Used openai_client: {self.openai_client}')
@@ -713,6 +756,13 @@ def _generate(
                 query_data.update(self.openai_extra_kwargs)
 
             try:
+                # Update API key with fresh Azure token if using Azure identity
+                if self.use_azure_identity:
+                    token = self.azure_credential.get_token(
+                        'https://cognitiveservices.azure.com/.default'
+                    )
+                    self.openai_client.api_key = token.token
+
                 if self.verbose:
                     self.logger.info('Start calling OpenAI API')
 
@@ -836,6 +886,7 @@ def __init__(
         think_tag: str = '</think>',
         max_workers: Optional[int] = None,
         openai_extra_kwargs: Dict | None = None,
+        use_azure_identity: bool = False,
     ):
         super().__init__(
             path,
@@ -856,6 +907,7 @@ def __init__(
             extra_body,
             verbose=verbose,
             max_workers=max_workers,
+            use_azure_identity=use_azure_identity,
         )
         from openai import OpenAI
 
@@ -872,19 +924,24 @@ def __init__(
                     'https://': self.proxy_url,
                 }
 
-        self.openai_client = OpenAI(
-            base_url=self.openai_api_base,
-            api_key=key,
-            http_client=httpx.Client(
-                **http_client_cfg) if http_client_cfg else None,
-        )
-
-        if self.verbose:
-            self.logger.info(f'Used openai_client: {self.openai_client}')
-        self.status_code_mappings = status_code_mappings
-        self.think_tag = think_tag
-        self.openai_extra_kwargs = openai_extra_kwargs
-
+        # Initialize OpenAI client with appropriate authentication
+        if use_azure_identity:
+            # When using Azure identity, get token dynamically
+            # Note: The OpenAI SDK client will be updated with fresh tokens
+            # in the _generate method for each request
+            self.openai_client = OpenAI(
+                base_url=self.openai_api_base,
+                api_key='placeholder',  # Will be replaced with Azure token
+                http_client=httpx.Client(
+                    **http_client_cfg) if http_client_cfg else None,
+            )
+        else:
+            self.openai_client = OpenAI(
+                base_url=self.openai_api_base,
+                api_key=key,
+                http_client=httpx.Client(
+                    **http_client_cfg) if http_client_cfg else None,
+            )
     def _generate(
         self,
         input: PromptList | str,
@@ -941,6 +998,13 @@ def _generate(
                 query_data.update(self.openai_extra_kwargs)
 
             try:
+                # Update API key with fresh Azure token if using Azure identity
+                if self.use_azure_identity:
+                    token = self.azure_credential.get_token(
+                        'https://cognitiveservices.azure.com/.default'
+                    )
+                    self.openai_client.api_key = token.token
+
                 if self.verbose:
                     self.logger.info('Start calling OpenAI API')
 
diff --git a/requirements/api.txt b/requirements/api.txt
index e8cd3d156..84d3431e2 100644
--- a/requirements/api.txt
+++ b/requirements/api.txt
@@ -4,6 +4,7 @@ anthropic
 dashscope
 # openai
 openai
+azure-identity
  # xunfei
 spark_ai_python
 sseclient-py==1.7.2
diff --git a/setup.py b/setup.py
index 90c98565e..0ca756456 100644
--- a/setup.py
+++ b/setup.py
@@ -134,7 +134,7 @@ def do_setup():
         },
         license='Apache License 2.0',
         include_package_data=True,
-        packages=find_packages(),
+        packages=find_packages(exclude=['autotest', 'autotest.*']),
         keywords=[
             'AI',
             'NLP',

From 6abaf6485920ca6a08e2c2e2edf32d3f5c2424bd Mon Sep 17 00:00:00 2001
From: Josh Bradley <joshbradley@microsoft.com>
Date: Wed, 18 Mar 2026 17:44:35 -0400
Subject: [PATCH 2/4] update docs

---
 docs/en/user_guides/models.md          |  3 +++
 examples/eval_api_azure_openai_demo.py |  2 +-
 opencompass/models/openai_api.py       | 23 +++++++++++------------
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/docs/en/user_guides/models.md b/docs/en/user_guides/models.md
index ad9ee7899..3e9d9d5d3 100644
--- a/docs/en/user_guides/models.md
+++ b/docs/en/user_guides/models.md
@@ -86,6 +86,7 @@ models = [
         # Parameters for `OpenAI` initialization
         path='gpt-4',                            # Specify the model type
         key='YOUR_OPENAI_KEY',                   # OpenAI API Key
+        use_azure_identity=True,                 # Use Azure Managed Identity for authentication instead of OPENAI key
         max_seq_len=2048,                        # The max input number of tokens
         # Common parameters shared by various models, not specific to `OpenAI` initialization.
         abbr='GPT-4',                            # Model abbreviation used for result display.
@@ -100,6 +101,8 @@ We have provided several examples for API-based models. Please refer to
 
 ```bash
 configs
+├── eval_api_demo.py
+├── eval_api_azure_openai_demo.py
 ├── eval_zhipu.py
 ├── eval_xunfei.py
 └── eval_minimax.py
diff --git a/examples/eval_api_azure_openai_demo.py b/examples/eval_api_azure_openai_demo.py
index b1410ecdb..c25f25f7b 100644
--- a/examples/eval_api_azure_openai_demo.py
+++ b/examples/eval_api_azure_openai_demo.py
@@ -1,7 +1,7 @@
 """
 Example configuration of using Azure OpenAI models.
 
-This demonstrates how to use Azure Managed Identity if API keys are not available for authentication.
+This demonstrates how to use Azure Managed Identity authentication if API keys are not available.
 """
 
 from mmengine.config import read_base
diff --git a/opencompass/models/openai_api.py b/opencompass/models/openai_api.py
index a31cb6225..b077a48f4 100644
--- a/opencompass/models/openai_api.py
+++ b/opencompass/models/openai_api.py
@@ -10,9 +10,9 @@
 import httpx
 import jieba
 import requests
+from azure.identity import DefaultAzureCredential
 from tqdm import tqdm
 
-from azure.identity import DefaultAzureCredential
 from opencompass.registry import MODELS
 from opencompass.utils.prompt import PromptList
 
@@ -77,10 +77,9 @@ class OpenAI(BaseAPIModel):
         max_workers (int, optional): Maximum number of worker threads for
             concurrent API requests. For I/O-intensive API calls, recommended
             value is 10-20. Defaults to None (uses CPU count * 2).
-        use_azure_identity (bool, optional): Use Azure DefaultAzureCredential
+        use_azure_identity (bool, optional): Use DefaultAzureCredential
             for authentication instead of API key. When enabled, tokens are
-            obtained from Azure identity. Requires azure-identity package.
-            Defaults to False.
+            obtained from Azure identity. Defaults to False.
     """
 
     is_api: bool = True
@@ -245,8 +244,7 @@ def _generate(self, input: PromptType, max_out_len: int,
             if self.use_azure_identity:
                 # Get fresh token from Azure
                 token = self.azure_credential.get_token(
-                    'https://cognitiveservices.azure.com/.default'
-                )
+                    'https://cognitiveservices.azure.com/.default')
                 key = token.token
             else:
                 with Lock():
@@ -353,12 +351,14 @@ def _generate(self, input: PromptType, max_out_len: int,
                     f'JsonDecode error, got status code {raw_response.status_code}. '
                     f'URL: {url}')
                 self.logger.error(
-                    f'Response preview (first 500 chars): {response_text[:500]}')
+                    f'Response preview (first 500 chars): {response_text[:500]}'
+                )
                 if self.use_azure_identity:
                     self.logger.error(
                         'Azure OpenAI requires a specific URL format: '
                         'https://{resource}.openai.azure.com/openai/deployments/'
-                        '{deployment-name}/chat/completions?api-version=2024-02-15-preview')
+                        '{deployment-name}/chat/completions?api-version=2024-02-15-preview'
+                    )
                 continue
             self.logger.debug(str(response))
             try:
@@ -759,8 +759,7 @@ def _generate(
                 # Update API key with fresh Azure token if using Azure identity
                 if self.use_azure_identity:
                     token = self.azure_credential.get_token(
-                        'https://cognitiveservices.azure.com/.default'
-                    )
+                        'https://cognitiveservices.azure.com/.default')
                     self.openai_client.api_key = token.token
 
                 if self.verbose:
@@ -942,6 +941,7 @@ def __init__(
                 http_client=httpx.Client(
                     **http_client_cfg) if http_client_cfg else None,
             )
+
     def _generate(
         self,
         input: PromptList | str,
@@ -1001,8 +1001,7 @@ def _generate(
                 # Update API key with fresh Azure token if using Azure identity
                 if self.use_azure_identity:
                     token = self.azure_credential.get_token(
-                        'https://cognitiveservices.azure.com/.default'
-                    )
+                        'https://cognitiveservices.azure.com/.default')
                     self.openai_client.api_key = token.token
 
                 if self.verbose:

From 0d63661e161cb54bc634eb7e6dbbf2aaf96fefd6 Mon Sep 17 00:00:00 2001
From: Josh Bradley <joshbradley@microsoft.com>
Date: Mon, 23 Mar 2026 00:05:17 -0400
Subject: [PATCH 3/4] update code to support AzureOpenAI client when azure
 endpoints are used

---
 docs/en/user_guides/models.md          |  63 +++++++++++++-
 docs/zh_cn/user_guides/models.md       |  60 +++++++++++++
 examples/eval_api_azure_openai_demo.py |   8 +-
 opencompass/models/openai_api.py       | 115 +++++++++++++++----------
 opencompass/models/openai_streaming.py |  64 +++++++++-----
 5 files changed, 239 insertions(+), 71 deletions(-)

diff --git a/docs/en/user_guides/models.md b/docs/en/user_guides/models.md
index 3e9d9d5d3..a7636d6ef 100644
--- a/docs/en/user_guides/models.md
+++ b/docs/en/user_guides/models.md
@@ -86,7 +86,6 @@ models = [
         # Parameters for `OpenAI` initialization
         path='gpt-4',                            # Specify the model type
         key='YOUR_OPENAI_KEY',                   # OpenAI API Key
-        use_azure_identity=True,                 # Use Azure Managed Identity for authentication instead of OPENAI key
         max_seq_len=2048,                        # The max input number of tokens
         # Common parameters shared by various models, not specific to `OpenAI` initialization.
         abbr='GPT-4',                            # Model abbreviation used for result display.
@@ -97,6 +96,68 @@ models = [
 ]
 ```
 
+### Authentication
+
+The `key` parameter defaults to `'ENV'`, which reads from the `OPENAI_API_KEY` environment variable.
+If `OPENAI_API_KEY` is not set, the model will attempt to fallback to
+Azure Managed Identity (`DefaultAzureCredential`) — no extra configuration is needed.
+
+You can also pass a key directly:
+
+```python
+key='sk-...',           # Explicit API key
+key='ENV',              # Read from OPENAI_API_KEY env var (default); falls back to Azure Managed Identity
+```
+
+### Azure OpenAI
+
+To use Azure OpenAI endpoints, set `azure_endpoint` and `azure_api_version` to reference your Azure resource.
+Authentication: if `OPENAI_API_KEY` is set it will be used,
+otherwise Azure Managed Identity is used as a fallback.
+
+```python
+from opencompass.models import OpenAISDK
+
+models = [
+    dict(
+        type=OpenAISDK,
+        path='gpt-4',
+        azure_endpoint='https://{resource-name}.openai.azure.com',
+        azure_api_version='2024-12-01-preview',
+        tokenizer_path='gpt-4',
+        meta_template=dict(round=[
+            dict(role='HUMAN', api_role='HUMAN'),
+            dict(role='BOT', api_role='BOT', generate=True),
+        ]),
+        query_per_second=1,
+        max_out_len=2048,
+        max_seq_len=4096,
+        batch_size=8,
+    ),
+]
+```
+
+### Reasoning Effort
+
+For OpenAI reasoning models (o1, o3, o4, gpt-5), you can control the amount of reasoning
+with the `reasoning_effort` parameter. Valid values are `'low'`, `'medium'`, and `'high'`
+(case-insensitive). Defaults to `None` (use the model's default behavior).
+
+```python
+from opencompass.models import OpenAISDK
+
+models = [
+    dict(
+        type=OpenAISDK,
+        path='o3',
+        reasoning_effort='high',
+        openai_api_base='https://api.openai.com/v1/',
+        max_out_len=4096,
+        max_seq_len=32768,
+    ),
+]
+```
+
 We have provided several examples for API-based models. Please refer to
 
 ```bash
diff --git a/docs/zh_cn/user_guides/models.md b/docs/zh_cn/user_guides/models.md
index 9a69bf761..c07f6c57e 100644
--- a/docs/zh_cn/user_guides/models.md
+++ b/docs/zh_cn/user_guides/models.md
@@ -88,10 +88,70 @@ models = [
 ]
 ```
 
+### 认证方式
+
+`key` 参数默认为 `'ENV'`，会从环境变量 `OPENAI_API_KEY` 中读取。如果未设置 `OPENAI_API_KEY`，
+模型会自动回退到 Azure 托管身份（`DefaultAzureCredential`）进行认证，无需额外配置。
+
+你也可以直接传入密钥：
+
+```python
+key='sk-...',           # 直接指定 API Key
+key='ENV',              # 从 OPENAI_API_KEY 环境变量读取（默认）；未设置时自动回退到 Azure 托管身份
+```
+
+### Azure OpenAI
+
+使用 Azure OpenAI 时，将 `openai_api_base` 指向你的 Azure 资源即可。
+认证方式自动处理：如果设置了 `OPENAI_API_KEY` 则使用该密钥，否则自动回退到 Azure 托管身份。
+
+```python
+from opencompass.models import OpenAISDK
+
+models = [
+    dict(
+        type=OpenAISDK,
+        path='gpt-4',
+        azure_endpoint='https://{resource-name}.openai.azure.com',
+        azure_api_version='2024-12-01-preview',
+        tokenizer_path='gpt-4',
+        meta_template=dict(round=[
+            dict(role='HUMAN', api_role='HUMAN'),
+            dict(role='BOT', api_role='BOT', generate=True),
+        ]),
+        query_per_second=1,
+        max_out_len=2048,
+        max_seq_len=4096,
+        batch_size=8,
+    ),
+]
+```
+
+### 推理力度（Reasoning Effort）
+
+对于 OpenAI 推理模型（o1、o3、o4、gpt-5），可以通过 `reasoning_effort` 参数控制推理深度。
+有效值为 `'low'`、`'medium'`、`'high'`（不区分大小写）。默认为 `None`（使用模型的默认行为）。
+
+```python
+from opencompass.models import OpenAISDK
+
+models = [
+    dict(
+        type=OpenAISDK,
+        path='o3',
+        reasoning_effort='high',                 # 控制推理深度
+        openai_api_base='https://api.openai.com/v1/',
+        max_out_len=4096,
+        max_seq_len=32768,
+    ),
+]
+```
+
 我们也提供了API模型的评测示例，请参考
 
 ```bash
 configs
+├── eval_api_azure_openai_demo.py
 ├── eval_zhipu.py
 ├── eval_xunfei.py
 └── eval_minimax.py
diff --git a/examples/eval_api_azure_openai_demo.py b/examples/eval_api_azure_openai_demo.py
index c25f25f7b..abd886739 100644
--- a/examples/eval_api_azure_openai_demo.py
+++ b/examples/eval_api_azure_openai_demo.py
@@ -1,7 +1,8 @@
 """
 Example configuration of using Azure OpenAI models.
 
-This demonstrates how to use Azure Managed Identity authentication if API keys are not available.
+If OPENAI_API_KEY is not set, Azure Managed Identity (DefaultAzureCredential)
+is used automatically as a fallback.
 """
 
 from mmengine.config import read_base
@@ -25,7 +26,6 @@
         abbr='Azure-GPT-5.1',
         type=OpenAI,
         path='gpt-5.1',
-        use_azure_identity=True,  # Enable Azure identity authentication
         tokenizer_path='gpt-5',
         # Azure OpenAI endpoint format:
         openai_api_base='https://{resource-name}.openai.azure.com/openai/deployments/{deployment-name}/chat/completions?api-version=2024-12-01-preview',
@@ -40,10 +40,10 @@
         abbr='Azure-GPT-5.1-SDK',
         type=OpenAISDK,
         path='gpt-5.1',
-        use_azure_identity=True,  # Enable Azure identity authentication
         tokenizer_path='gpt-5',
         # Azure OpenAI endpoint format:
-        openai_api_base='https://{resource-name}.openai.azure.com/openai/v1/',
+        azure_endpoint='https://{resource-name}.openai.azure.com',
+        azure_api_version='2024-12-01-preview',
         meta_template=api_meta_template,
         query_per_second=1,
         max_out_len=2048,
diff --git a/opencompass/models/openai_api.py b/opencompass/models/openai_api.py
index b077a48f4..e883831a4 100644
--- a/opencompass/models/openai_api.py
+++ b/opencompass/models/openai_api.py
@@ -10,7 +10,7 @@
 import httpx
 import jieba
 import requests
-from azure.identity import DefaultAzureCredential
+from azure.identity import DefaultAzureCredential, get_bearer_token_provider
 from tqdm import tqdm
 
 from opencompass.registry import MODELS
@@ -45,9 +45,10 @@ class OpenAI(BaseAPIModel):
         retry (int): Number of retires if the API call fails. Defaults to 2.
         key (str or List[str]): OpenAI key(s). In particular, when it
             is set to "ENV", the key will be fetched from the environment
-            variable $OPENAI_API_KEY, as how openai defaults to be. If it's a
-            list, the keys will be used in round-robin manner. Defaults to
-            'ENV'.
+            variable $OPENAI_API_KEY. If the variable is not set, Azure
+            Managed Identity (DefaultAzureCredential) will be used as a
+            fallback. If it's a list, the keys will be used in round-robin
+            manner. Defaults to 'ENV'.
         org (str or List[str], optional): OpenAI organization(s). If not
             specified, OpenAI uses the default organization bound to each API
             key. If specified, the orgs will be posted with each request in
@@ -77,9 +78,6 @@ class OpenAI(BaseAPIModel):
         max_workers (int, optional): Maximum number of worker threads for
             concurrent API requests. For I/O-intensive API calls, recommended
             value is 10-20. Defaults to None (uses CPU count * 2).
-        use_azure_identity (bool, optional): Use DefaultAzureCredential
-            for authentication instead of API key. When enabled, tokens are
-            obtained from Azure identity. Defaults to False.
     """
 
     is_api: bool = True
@@ -105,7 +103,6 @@ def __init__(
         verbose: bool = False,
         think_tag: str = '</think>',
         max_workers: Optional[int] = None,
-        use_azure_identity: bool = False,
     ):
 
         super().__init__(
@@ -129,8 +126,8 @@ def __init__(
         self.hf_tokenizer = None
         self.extra_body = extra_body
         self.think_tag = think_tag
-        self.use_azure_identity = use_azure_identity
         self.azure_credential = None
+        self.use_azure_identity = False
 
         if max_workers is None:
             cpu_count = os.cpu_count() or 1
@@ -138,16 +135,24 @@ def __init__(
         else:
             self.max_workers = max_workers
 
-        # Handle Azure Identity authentication
-        if use_azure_identity:
-            self.azure_credential = DefaultAzureCredential()
-            # Use dummy key for compatibility, actual token will be generated
-            self.keys = ['AZURE_TOKEN']
-        elif isinstance(key, str):
+        # Resolve API keys: try explicit key, then env var, then Azure identity
+        if isinstance(key, str):
             if key == 'ENV':
-                if 'OPENAI_API_KEY' not in os.environ:
-                    raise ValueError('OpenAI API key is not set.')
-                self.keys = os.getenv('OPENAI_API_KEY').split(',')
+                if 'OPENAI_API_KEY' in os.environ:
+                    self.keys = os.getenv('OPENAI_API_KEY').split(',')
+                else:
+                    self.logger.warning(
+                        'OPENAI_API_KEY is not set. Will try to use Azure Managed Identity for authentication.'
+                    )
+                    try:
+                        self.azure_credential = DefaultAzureCredential()
+                        self.use_azure_identity = self.azure_credential is not None
+                        self.keys = ['AZURE_TOKEN']  # placeholder to indicate Azure token usage
+                    except Exception as e:
+                        self.logger.warning(
+                            f'Azure Managed Identity is not available: {e}. '
+                            'OPENAI_API_KEY and managed identity are unavailable.')
+                        raise ValueError('OpenAI API key is not set and Azure Managed Identity is not provided.')
             else:
                 self.keys = [key]
         else:
@@ -241,8 +246,7 @@ def _generate(self, input: PromptType, max_out_len: int,
             self.wait()
 
             # Get authentication token
-            if self.use_azure_identity:
-                # Get fresh token from Azure
+            if self.azure_credential:
                 token = self.azure_credential.get_token(
                     'https://cognitiveservices.azure.com/.default')
                 key = token.token
@@ -613,6 +617,8 @@ def bin_trim_wrapper(text):
 @MODELS.register_module()
 class OpenAISDK(OpenAI):
 
+    VALID_REASONING_EFFORTS = {None, 'low', 'medium', 'high'}
+
     def __init__(
         self,
         path: str = 'gpt-3.5-turbo',
@@ -624,6 +630,8 @@ def __init__(
         org: str | List[str] | None = None,
         meta_template: Dict | None = None,
         openai_api_base: str | List[str] = OPENAISDK_API_BASE,
+        azure_endpoint: Optional[str] = None,
+        azure_api_version: Optional[str] = '2024-12-01-preview',
         openai_proxy_url: Optional[str] = None,
         mode: str = 'none',
         logprobs: bool | None = False,
@@ -638,7 +646,7 @@ def __init__(
         max_workers: Optional[int] = None,
         openai_extra_kwargs: Dict | None = None,
         timeout: int = 3600,
-        use_azure_identity: bool = False,
+        reasoning_effort: Optional[str] = None,
     ):
         super().__init__(
             path,
@@ -659,15 +667,17 @@ def __init__(
             extra_body,
             verbose=verbose,
             max_workers=max_workers,
-            use_azure_identity=use_azure_identity,
         )
-        from openai import OpenAI
+        from openai import OpenAI, AzureOpenAI
 
         # support multiple api_base for acceleration
         if isinstance(openai_api_base, List):
             self.openai_api_base = random.choice(openai_api_base)
         else:
             self.openai_api_base = openai_api_base
+        
+        self.azure_endpoint = azure_endpoint
+        self.azure_api_version = azure_api_version
 
         if self.proxy_url or http_client_cfg:
             if self.proxy_url:
@@ -677,13 +687,12 @@ def __init__(
                 }
 
         # Initialize OpenAI client with appropriate authentication
-        if use_azure_identity:
-            # When using Azure identity, get token dynamically
-            # Note: The OpenAI SDK client will be updated with fresh tokens
-            # in the _generate method for each request
-            self.openai_client = OpenAI(
-                base_url=self.openai_api_base,
-                api_key='placeholder',  # Will be replaced with Azure token
+        if azure_endpoint:
+            self.openai_client = AzureOpenAI(
+                azure_endpoint=self.azure_endpoint,
+                api_key=key if not self.azure_credential else None,
+                api_version=self.azure_api_version,
+                azure_ad_token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default") if self.azure_credential else None,
                 http_client=httpx.Client(
                     **http_client_cfg) if http_client_cfg else None,
             )
@@ -701,6 +710,14 @@ def __init__(
         self.think_tag = think_tag
         self.openai_extra_kwargs = openai_extra_kwargs
 
+        if reasoning_effort:
+            reasoning_effort = reasoning_effort.lower()
+        if reasoning_effort not in self.VALID_REASONING_EFFORTS:
+            raise ValueError(
+                f'Invalid reasoning_effort: {reasoning_effort}. '
+                f'Must be one of {self.VALID_REASONING_EFFORTS}')
+        self.reasoning_effort = reasoning_effort
+
     def _generate(
         self,
         input: PromptList | str,
@@ -742,6 +759,7 @@ def _generate(
                     messages=messages,
                     extra_body=self.extra_body,
                 )
+                query_data['reasoning_effort'] = self.reasoning_effort
             else:
                 query_data = dict(
                     model=self.path,
@@ -756,12 +774,6 @@ def _generate(
                 query_data.update(self.openai_extra_kwargs)
 
             try:
-                # Update API key with fresh Azure token if using Azure identity
-                if self.use_azure_identity:
-                    token = self.azure_credential.get_token(
-                        'https://cognitiveservices.azure.com/.default')
-                    self.openai_client.api_key = token.token
-
                 if self.verbose:
                     self.logger.info('Start calling OpenAI API')
 
@@ -861,6 +873,8 @@ def _generate(
 @MODELS.register_module()
 class OpenAISDKRollout(OpenAI):
 
+    VALID_REASONING_EFFORTS = {None, 'low', 'medium', 'high'}
+
     def __init__(
         self,
         path: str = 'gpt-3.5-turbo',
@@ -872,6 +886,8 @@ def __init__(
         org: str | List[str] | None = None,
         meta_template: Dict | None = None,
         openai_api_base: str | List[str] = OPENAISDK_API_BASE,
+        azure_endpoint: Optional[str] = None,
+        azure_api_version: Optional[str] = '2024-12-01-preview',
         openai_proxy_url: Optional[str] = None,
         mode: str = 'none',
         logprobs: bool | None = False,
@@ -885,7 +901,7 @@ def __init__(
         think_tag: str = '</think>',
         max_workers: Optional[int] = None,
         openai_extra_kwargs: Dict | None = None,
-        use_azure_identity: bool = False,
+        reasoning_effort: Optional[str] = None,
     ):
         super().__init__(
             path,
@@ -906,9 +922,8 @@ def __init__(
             extra_body,
             verbose=verbose,
             max_workers=max_workers,
-            use_azure_identity=use_azure_identity,
         )
-        from openai import OpenAI
+        from openai import OpenAI, AzureOpenAI
 
         # support multiple api_base for acceleration
         if isinstance(openai_api_base, List):
@@ -924,13 +939,12 @@ def __init__(
                 }
 
         # Initialize OpenAI client with appropriate authentication
-        if use_azure_identity:
-            # When using Azure identity, get token dynamically
-            # Note: The OpenAI SDK client will be updated with fresh tokens
-            # in the _generate method for each request
-            self.openai_client = OpenAI(
-                base_url=self.openai_api_base,
-                api_key='placeholder',  # Will be replaced with Azure token
+        if azure_endpoint:
+            self.openai_client = AzureOpenAI(
+                azure_endpoint=self.azure_endpoint,
+                api_key=key if not self.azure_credential else None,
+                api_version=azure_api_version,
+                token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default") if self.azure_credential else None,
                 http_client=httpx.Client(
                     **http_client_cfg) if http_client_cfg else None,
             )
@@ -942,6 +956,14 @@ def __init__(
                     **http_client_cfg) if http_client_cfg else None,
             )
 
+        if reasoning_effort is not None:
+            reasoning_effort = reasoning_effort.lower()
+        if reasoning_effort not in self.VALID_REASONING_EFFORTS:
+            raise ValueError(
+                f'Invalid reasoning_effort: {reasoning_effort}. '
+                f'Must be one of {self.VALID_REASONING_EFFORTS}')
+        self.reasoning_effort = reasoning_effort
+
     def _generate(
         self,
         input: PromptList | str,
@@ -984,6 +1006,7 @@ def _generate(
                     messages=messages,
                     extra_body=self.extra_body,
                 )
+                query_data['reasoning_effort'] = self.reasoning_effort
             else:
                 query_data = dict(
                     model=self.path,
diff --git a/opencompass/models/openai_streaming.py b/opencompass/models/openai_streaming.py
index 904c6a6eb..86360827a 100644
--- a/opencompass/models/openai_streaming.py
+++ b/opencompass/models/openai_streaming.py
@@ -39,6 +39,8 @@ def __init__(self,
                  org: str | List[str] | None = None,
                  meta_template: Dict | None = None,
                  openai_api_base: str | List[str] = OPENAISDK_API_BASE,
+                 azure_endpoint: Optional[str] = None,
+                 azure_api_version: Optional[str] = '2024-12-01-preview',
                  openai_proxy_url: Optional[str] = None,
                  mode: str = 'none',
                  logprobs: bool | None = False,
@@ -55,7 +57,8 @@ def __init__(self,
                  stream_chunk_size: int = 1,
                  timeout: int = 3600,
                  finish_reason_confirm: bool = True,
-                 max_workers: Optional[int] = None):
+                 max_workers: Optional[int] = None,
+                 reasoning_effort: Optional[str] = None):
 
         super().__init__(
             path=path,
@@ -67,6 +70,8 @@ def __init__(self,
             org=org,
             meta_template=meta_template,
             openai_api_base=openai_api_base,
+            azure_endpoint=azure_endpoint,
+            azure_api_version=azure_api_version,
             openai_proxy_url=openai_proxy_url,
             mode=mode,
             logprobs=logprobs,
@@ -79,6 +84,7 @@ def __init__(self,
             status_code_mappings=status_code_mappings,
             think_tag=think_tag,
             max_workers=max_workers,
+            reasoning_effort=reasoning_effort,
         )
 
         self.stream = stream
@@ -91,23 +97,29 @@ def _create_fresh_client(self):
         """Create a fresh OpenAI client for each request to avoid
         concurrency issues."""
         import httpx
-        from openai import OpenAI
+        from openai import OpenAI, AzureOpenAI
+        from azure.identity import DefaultAzureCredential, get_bearer_token_provider
 
         # Get current key (with key rotation)
-        with Lock():
-            if len(self.invalid_keys) == len(self.keys):
-                raise RuntimeError('All keys have insufficient quota.')
+        if self.azure_credential:
+            token = self.azure_credential.get_token(
+                'https://cognitiveservices.azure.com/.default')
+            current_key = token.token
+        else:
+            with Lock():
+                if len(self.invalid_keys) == len(self.keys):
+                    raise RuntimeError('All keys have insufficient quota.')
 
-            # find the next valid key
-            while True:
-                self.key_ctr += 1
-                if self.key_ctr == len(self.keys):
-                    self.key_ctr = 0
+                # find the next valid key
+                while True:
+                    self.key_ctr += 1
+                    if self.key_ctr == len(self.keys):
+                        self.key_ctr = 0
 
-                if self.keys[self.key_ctr] not in self.invalid_keys:
-                    break
+                    if self.keys[self.key_ctr] not in self.invalid_keys:
+                        break
 
-            current_key = self.keys[self.key_ctr]
+                current_key = self.keys[self.key_ctr]
 
         # Create fresh client with current key
         http_client_cfg = {}
@@ -117,13 +129,24 @@ def _create_fresh_client(self):
                 'https://': self.proxy_url,
             }
 
-        return OpenAI(
-            base_url=self.openai_api_base,
-            api_key=current_key,
-            http_client=httpx.Client(**http_client_cfg,
-                                     timeout=httpx.Timeout(self.timeout))
-            if http_client_cfg or True else None,
-        )
+        if self.azure_endpoint:
+            return AzureOpenAI(
+                azure_endpoint=self.azure_endpoint,
+                api_key=current_key if not self.azure_credential else None,
+                api_version=self.azure_api_version,
+                azure_ad_token_provider=get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default") if self.azure_credential else None,
+                http_client=httpx.Client(**http_client_cfg,
+                                         timeout=httpx.Timeout(self.timeout))
+                if http_client_cfg or True else None,
+            )
+        else:
+            return OpenAI(
+                base_url=self.openai_api_base,
+                api_key=current_key,
+                http_client=httpx.Client(**http_client_cfg,
+                                        timeout=httpx.Timeout(self.timeout))
+                if http_client_cfg or True else None,
+            )
 
     def _generate(
         self,
@@ -170,6 +193,7 @@ def _generate(
                     extra_body=self.extra_body,
                     stream=self.stream,  # Enable streaming
                 )
+                query_data['reasoning_effort'] = self.reasoning_effort
             else:
                 query_data = dict(
                     model=self.path,

From 8d0cd18acb4f08e7f147799845e03b5c4772a675 Mon Sep 17 00:00:00 2001
From: Josh Bradley <joshbradley@microsoft.com>
Date: Tue, 24 Mar 2026 02:16:10 -0400
Subject: [PATCH 4/4] add support for image input

---
 opencompass/models/base_api.py   |  5 +++++
 opencompass/models/openai_api.py | 18 +++++++++++++++++-
 opencompass/utils/prompt.py      |  5 +++++
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/opencompass/models/base_api.py b/opencompass/models/base_api.py
index ab33c609f..4d6951e31 100644
--- a/opencompass/models/base_api.py
+++ b/opencompass/models/base_api.py
@@ -310,6 +310,9 @@ def parse_template(self, prompt_template: PromptType,
             for item in prompt[1:]:
                 if item['role'] == last_role:
                     new_prompt[-1]['prompt'] += '\n' + item['prompt']
+                    if item.get('image'):
+                        existing = new_prompt[-1].get('image', [])
+                        new_prompt[-1]['image'] = existing + item['image']
                 else:
                     last_role = item['role']
                     new_prompt.append(item)
@@ -452,6 +455,8 @@ def _role2api_role(self,
         res['prompt'] = merged_prompt.get('begin', '')
         res['prompt'] += merged_prompt.get('prompt', '')
         res['prompt'] += merged_prompt.get('end', '')
+        if merged_prompt.get('image'):
+            res['image'] = merged_prompt['image']
         return res, True
 
 
diff --git a/opencompass/models/openai_api.py b/opencompass/models/openai_api.py
index 1ce5c1fd2..488e01eb0 100644
--- a/opencompass/models/openai_api.py
+++ b/opencompass/models/openai_api.py
@@ -586,13 +586,29 @@ def bin_trim_wrapper(text):
                     if mode != 'none':
                         input_content = bin_trim_wrapper(input_content)
                     processed_prompts.append(input_content)
-                    msg = {'content': input_content}
+                    msg = {}
                     if item['role'] == 'HUMAN':
                         msg['role'] = 'user'
                     elif item['role'] == 'BOT':
                         msg['role'] = 'assistant'
                     elif item['role'] == 'SYSTEM':
                         msg['role'] = 'system'
+                    # Build multi-part content when images are present
+                    images = [
+                        img for img in item.get('image', []) if img
+                    ]
+                    if images:
+                        content_parts = [
+                            {'type': 'text', 'text': input_content}
+                        ]
+                        for img_url in images:
+                            content_parts.append({
+                                'type': 'image_url',
+                                'image_url': {'url': img_url},
+                            })
+                        msg['content'] = content_parts
+                    else:
+                        msg['content'] = input_content
                     messages.append(msg)
                 input_len = sum(
                     get_token_len_func(prompt) for prompt in processed_prompts)
diff --git a/opencompass/utils/prompt.py b/opencompass/utils/prompt.py
index cef6a31dd..830aae51a 100644
--- a/opencompass/utils/prompt.py
+++ b/opencompass/utils/prompt.py
@@ -99,6 +99,11 @@ def format(self, **kwargs) -> PromptList:
                 new_item = deepcopy(item)
                 if 'prompt' in item:
                     new_item['prompt'] = safe_format(item['prompt'], **kwargs)
+                if 'image' in item:
+                    new_item['image'] = [
+                        safe_format(img, **kwargs)
+                        for img in item['image']
+                    ]
                 new_list.append(new_item)
             else:
                 new_list.append(safe_format(item, **kwargs))