From 2446c6b24c0f59239133ea5d630e0428a6540dee Mon Sep 17 00:00:00 2001 From: Josh Bradley Date: Wed, 18 Mar 2026 16:56:16 -0400 Subject: [PATCH 1/4] add initial support for azure openai models and managed identity auth --- examples/eval_api_azure_openai_demo.py | 57 +++++++++++ opencompass/models/openai_api.py | 132 ++++++++++++++++++------- requirements/api.txt | 1 + setup.py | 2 +- 4 files changed, 157 insertions(+), 35 deletions(-) create mode 100644 examples/eval_api_azure_openai_demo.py diff --git a/examples/eval_api_azure_openai_demo.py b/examples/eval_api_azure_openai_demo.py new file mode 100644 index 000000000..b1410ecdb --- /dev/null +++ b/examples/eval_api_azure_openai_demo.py @@ -0,0 +1,57 @@ +""" +Example configuration of using Azure OpenAI models. + +This demonstrates how to use Azure Managed Identity if API keys are not available for authentication. +""" + +from mmengine.config import read_base + +from opencompass.models import OpenAI, OpenAISDK + +with read_base(): + from opencompass.configs.datasets.demo.demo_gsm8k_chat_gen import \ + gsm8k_datasets + +# API template for chat models +api_meta_template = dict( + round=[ + dict(role='HUMAN', api_role='HUMAN'), + dict(role='BOT', api_role='BOT', generate=True), + ], +) + +models = [ + dict( + abbr='Azure-GPT-5.1', + type=OpenAI, + path='gpt-5.1', + use_azure_identity=True, # Enable Azure identity authentication + tokenizer_path='gpt-5', + # Azure OpenAI endpoint format: + openai_api_base='https://{resource-name}.openai.azure.com/openai/deployments/{deployment-name}/chat/completions?api-version=2024-12-01-preview', + meta_template=api_meta_template, + query_per_second=1, + max_out_len=2048, + max_seq_len=4096, + batch_size=8, + retry=2, + ), + dict( + abbr='Azure-GPT-5.1-SDK', + type=OpenAISDK, + path='gpt-5.1', + use_azure_identity=True, # Enable Azure identity authentication + tokenizer_path='gpt-5', + # Azure OpenAI endpoint format: + openai_api_base='https://{resource-name}.openai.azure.com/openai/v1/', + meta_template=api_meta_template, + query_per_second=1, + max_out_len=2048, + max_seq_len=4096, + batch_size=8, + retry=2, + ), +] + +# Datasets to evaluate +datasets = gsm8k_datasets diff --git a/opencompass/models/openai_api.py b/opencompass/models/openai_api.py index 91229cea5..a31cb6225 100644 --- a/opencompass/models/openai_api.py +++ b/opencompass/models/openai_api.py @@ -12,6 +12,7 @@ import requests from tqdm import tqdm +from azure.identity import DefaultAzureCredential from opencompass.registry import MODELS from opencompass.utils.prompt import PromptList @@ -76,6 +77,10 @@ class OpenAI(BaseAPIModel): max_workers (int, optional): Maximum number of worker threads for concurrent API requests. For I/O-intensive API calls, recommended value is 10-20. Defaults to None (uses CPU count * 2). + use_azure_identity (bool, optional): Use Azure DefaultAzureCredential + for authentication instead of API key. When enabled, tokens are + obtained from Azure identity. Requires azure-identity package. + Defaults to False. """ is_api: bool = True @@ -101,6 +106,7 @@ def __init__( verbose: bool = False, think_tag: str = '', max_workers: Optional[int] = None, + use_azure_identity: bool = False, ): super().__init__( @@ -124,6 +130,8 @@ def __init__( self.hf_tokenizer = None self.extra_body = extra_body self.think_tag = think_tag + self.use_azure_identity = use_azure_identity + self.azure_credential = None if max_workers is None: cpu_count = os.cpu_count() or 1 @@ -131,7 +139,12 @@ def __init__( else: self.max_workers = max_workers - if isinstance(key, str): + # Handle Azure Identity authentication + if use_azure_identity: + self.azure_credential = DefaultAzureCredential() + # Use dummy key for compatibility, actual token will be generated + self.keys = ['AZURE_TOKEN'] + elif isinstance(key, str): if key == 'ENV': if 'OPENAI_API_KEY' not in os.environ: raise ValueError('OpenAI API key is not set.') @@ -228,20 +241,28 @@ def _generate(self, input: PromptType, max_out_len: int, while max_num_retries < self.retry: self.wait() - with Lock(): - if len(self.invalid_keys) == len(self.keys): - raise RuntimeError('All keys have insufficient quota.') + # Get authentication token + if self.use_azure_identity: + # Get fresh token from Azure + token = self.azure_credential.get_token( + 'https://cognitiveservices.azure.com/.default' + ) + key = token.token + else: + with Lock(): + if len(self.invalid_keys) == len(self.keys): + raise RuntimeError('All keys have insufficient quota.') - # find the next valid key - while True: - self.key_ctr += 1 - if self.key_ctr == len(self.keys): - self.key_ctr = 0 + # find the next valid key + while True: + self.key_ctr += 1 + if self.key_ctr == len(self.keys): + self.key_ctr = 0 - if self.keys[self.key_ctr] not in self.invalid_keys: - break + if self.keys[self.key_ctr] not in self.invalid_keys: + break - key = self.keys[self.key_ctr] + key = self.keys[self.key_ctr] header = { 'Authorization': f'Bearer {key}', @@ -327,9 +348,17 @@ def _generate(self, input: PromptType, max_out_len: int, continue response = raw_response.json() except requests.JSONDecodeError: - self.logger.error(f'JsonDecode error, got status code ' - f'{raw_response.status_code}, response: ' - f'{raw_response.content.decode()}') + response_text = raw_response.content.decode() + self.logger.error( + f'JsonDecode error, got status code {raw_response.status_code}. ' + f'URL: {url}') + self.logger.error( + f'Response preview (first 500 chars): {response_text[:500]}') + if self.use_azure_identity: + self.logger.error( + 'Azure OpenAI requires a specific URL format: ' + 'https://{resource}.openai.azure.com/openai/deployments/' + '{deployment-name}/chat/completions?api-version=2024-02-15-preview') continue self.logger.debug(str(response)) try: @@ -609,6 +638,7 @@ def __init__( max_workers: Optional[int] = None, openai_extra_kwargs: Dict | None = None, timeout: int = 3600, + use_azure_identity: bool = False, ): super().__init__( path, @@ -629,6 +659,7 @@ def __init__( extra_body, verbose=verbose, max_workers=max_workers, + use_azure_identity=use_azure_identity, ) from openai import OpenAI @@ -645,12 +676,24 @@ def __init__( 'https://': self.proxy_url, } - self.openai_client = OpenAI( - base_url=self.openai_api_base, - api_key=key, - http_client=httpx.Client( - **http_client_cfg) if http_client_cfg else None, - ) + # Initialize OpenAI client with appropriate authentication + if use_azure_identity: + # When using Azure identity, get token dynamically + # Note: The OpenAI SDK client will be updated with fresh tokens + # in the _generate method for each request + self.openai_client = OpenAI( + base_url=self.openai_api_base, + api_key='placeholder', # Will be replaced with Azure token + http_client=httpx.Client( + **http_client_cfg) if http_client_cfg else None, + ) + else: + self.openai_client = OpenAI( + base_url=self.openai_api_base, + api_key=key, + http_client=httpx.Client( + **http_client_cfg) if http_client_cfg else None, + ) self.timeout = timeout if self.verbose: self.logger.info(f'Used openai_client: {self.openai_client}') @@ -713,6 +756,13 @@ def _generate( query_data.update(self.openai_extra_kwargs) try: + # Update API key with fresh Azure token if using Azure identity + if self.use_azure_identity: + token = self.azure_credential.get_token( + 'https://cognitiveservices.azure.com/.default' + ) + self.openai_client.api_key = token.token + if self.verbose: self.logger.info('Start calling OpenAI API') @@ -836,6 +886,7 @@ def __init__( think_tag: str = '', max_workers: Optional[int] = None, openai_extra_kwargs: Dict | None = None, + use_azure_identity: bool = False, ): super().__init__( path, @@ -856,6 +907,7 @@ def __init__( extra_body, verbose=verbose, max_workers=max_workers, + use_azure_identity=use_azure_identity, ) from openai import OpenAI @@ -872,19 +924,24 @@ def __init__( 'https://': self.proxy_url, } - self.openai_client = OpenAI( - base_url=self.openai_api_base, - api_key=key, - http_client=httpx.Client( - **http_client_cfg) if http_client_cfg else None, - ) - - if self.verbose: - self.logger.info(f'Used openai_client: {self.openai_client}') - self.status_code_mappings = status_code_mappings - self.think_tag = think_tag - self.openai_extra_kwargs = openai_extra_kwargs - + # Initialize OpenAI client with appropriate authentication + if use_azure_identity: + # When using Azure identity, get token dynamically + # Note: The OpenAI SDK client will be updated with fresh tokens + # in the _generate method for each request + self.openai_client = OpenAI( + base_url=self.openai_api_base, + api_key='placeholder', # Will be replaced with Azure token + http_client=httpx.Client( + **http_client_cfg) if http_client_cfg else None, + ) + else: + self.openai_client = OpenAI( + base_url=self.openai_api_base, + api_key=key, + http_client=httpx.Client( + **http_client_cfg) if http_client_cfg else None, + ) def _generate( self, input: PromptList | str, @@ -941,6 +998,13 @@ def _generate( query_data.update(self.openai_extra_kwargs) try: + # Update API key with fresh Azure token if using Azure identity + if self.use_azure_identity: + token = self.azure_credential.get_token( + 'https://cognitiveservices.azure.com/.default' + ) + self.openai_client.api_key = token.token + if self.verbose: self.logger.info('Start calling OpenAI API') diff --git a/requirements/api.txt b/requirements/api.txt index e8cd3d156..84d3431e2 100644 --- a/requirements/api.txt +++ b/requirements/api.txt @@ -4,6 +4,7 @@ anthropic dashscope # openai openai +azure-identity # xunfei spark_ai_python sseclient-py==1.7.2 diff --git a/setup.py b/setup.py index 90c98565e..0ca756456 100644 --- a/setup.py +++ b/setup.py @@ -134,7 +134,7 @@ def do_setup(): }, license='Apache License 2.0', include_package_data=True, - packages=find_packages(), + packages=find_packages(exclude=['autotest', 'autotest.*']), keywords=[ 'AI', 'NLP', From 6abaf6485920ca6a08e2c2e2edf32d3f5c2424bd Mon Sep 17 00:00:00 2001 From: Josh Bradley Date: Wed, 18 Mar 2026 17:44:35 -0400 Subject: [PATCH 2/4] update docs --- docs/en/user_guides/models.md | 3 +++ examples/eval_api_azure_openai_demo.py | 2 +- opencompass/models/openai_api.py | 23 +++++++++++------------ 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/docs/en/user_guides/models.md b/docs/en/user_guides/models.md index ad9ee7899..3e9d9d5d3 100644 --- a/docs/en/user_guides/models.md +++ b/docs/en/user_guides/models.md @@ -86,6 +86,7 @@ models = [ # Parameters for `OpenAI` initialization path='gpt-4', # Specify the model type key='YOUR_OPENAI_KEY', # OpenAI API Key + use_azure_identity=True, # Use Azure Managed Identity for authentication instead of OPENAI key max_seq_len=2048, # The max input number of tokens # Common parameters shared by various models, not specific to `OpenAI` initialization. abbr='GPT-4', # Model abbreviation used for result display. @@ -100,6 +101,8 @@ We have provided several examples for API-based models. Please refer to ```bash configs +├── eval_api_demo.py +├── eval_api_azure_openai_demo.py ├── eval_zhipu.py ├── eval_xunfei.py └── eval_minimax.py diff --git a/examples/eval_api_azure_openai_demo.py b/examples/eval_api_azure_openai_demo.py index b1410ecdb..c25f25f7b 100644 --- a/examples/eval_api_azure_openai_demo.py +++ b/examples/eval_api_azure_openai_demo.py @@ -1,7 +1,7 @@ """ Example configuration of using Azure OpenAI models. -This demonstrates how to use Azure Managed Identity if API keys are not available for authentication. +This demonstrates how to use Azure Managed Identity authentication if API keys are not available. """ from mmengine.config import read_base diff --git a/opencompass/models/openai_api.py b/opencompass/models/openai_api.py index a31cb6225..b077a48f4 100644 --- a/opencompass/models/openai_api.py +++ b/opencompass/models/openai_api.py @@ -10,9 +10,9 @@ import httpx import jieba import requests +from azure.identity import DefaultAzureCredential from tqdm import tqdm -from azure.identity import DefaultAzureCredential from opencompass.registry import MODELS from opencompass.utils.prompt import PromptList @@ -77,10 +77,9 @@ class OpenAI(BaseAPIModel): max_workers (int, optional): Maximum number of worker threads for concurrent API requests. For I/O-intensive API calls, recommended value is 10-20. Defaults to None (uses CPU count * 2). - use_azure_identity (bool, optional): Use Azure DefaultAzureCredential + use_azure_identity (bool, optional): Use DefaultAzureCredential for authentication instead of API key. When enabled, tokens are - obtained from Azure identity. Requires azure-identity package. - Defaults to False. + obtained from Azure identity. Defaults to False. """ is_api: bool = True @@ -245,8 +244,7 @@ def _generate(self, input: PromptType, max_out_len: int, if self.use_azure_identity: # Get fresh token from Azure token = self.azure_credential.get_token( - 'https://cognitiveservices.azure.com/.default' - ) + 'https://cognitiveservices.azure.com/.default') key = token.token else: with Lock(): @@ -353,12 +351,14 @@ def _generate(self, input: PromptType, max_out_len: int, f'JsonDecode error, got status code {raw_response.status_code}. ' f'URL: {url}') self.logger.error( - f'Response preview (first 500 chars): {response_text[:500]}') + f'Response preview (first 500 chars): {response_text[:500]}' + ) if self.use_azure_identity: self.logger.error( 'Azure OpenAI requires a specific URL format: ' 'https://{resource}.openai.azure.com/openai/deployments/' - '{deployment-name}/chat/completions?api-version=2024-02-15-preview') + '{deployment-name}/chat/completions?api-version=2024-02-15-preview' + ) continue self.logger.debug(str(response)) try: @@ -759,8 +759,7 @@ def _generate( # Update API key with fresh Azure token if using Azure identity if self.use_azure_identity: token = self.azure_credential.get_token( - 'https://cognitiveservices.azure.com/.default' - ) + 'https://cognitiveservices.azure.com/.default') self.openai_client.api_key = token.token if self.verbose: @@ -942,6 +941,7 @@ def __init__( http_client=httpx.Client( **http_client_cfg) if http_client_cfg else None, ) + def _generate( self, input: PromptList | str, @@ -1001,8 +1001,7 @@ def _generate( # Update API key with fresh Azure token if using Azure identity if self.use_azure_identity: token = self.azure_credential.get_token( - 'https://cognitiveservices.azure.com/.default' - ) + 'https://cognitiveservices.azure.com/.default') self.openai_client.api_key = token.token if self.verbose: From 0d63661e161cb54bc634eb7e6dbbf2aaf96fefd6 Mon Sep 17 00:00:00 2001 From: Josh Bradley Date: Mon, 23 Mar 2026 00:05:17 -0400 Subject: [PATCH 3/4] update code to support AzureOpenAI client when azure endpoints are used --- docs/en/user_guides/models.md | 63 +++++++++++++- docs/zh_cn/user_guides/models.md | 60 +++++++++++++ examples/eval_api_azure_openai_demo.py | 8 +- opencompass/models/openai_api.py | 115 +++++++++++++++---------- opencompass/models/openai_streaming.py | 64 +++++++++----- 5 files changed, 239 insertions(+), 71 deletions(-) diff --git a/docs/en/user_guides/models.md b/docs/en/user_guides/models.md index 3e9d9d5d3..a7636d6ef 100644 --- a/docs/en/user_guides/models.md +++ b/docs/en/user_guides/models.md @@ -86,7 +86,6 @@ models = [ # Parameters for `OpenAI` initialization path='gpt-4', # Specify the model type key='YOUR_OPENAI_KEY', # OpenAI API Key - use_azure_identity=True, # Use Azure Managed Identity for authentication instead of OPENAI key max_seq_len=2048, # The max input number of tokens # Common parameters shared by various models, not specific to `OpenAI` initialization. abbr='GPT-4', # Model abbreviation used for result display. @@ -97,6 +96,68 @@ models = [ ] ``` +### Authentication + +The `key` parameter defaults to `'ENV'`, which reads from the `OPENAI_API_KEY` environment variable. +If `OPENAI_API_KEY` is not set, the model will attempt to fallback to +Azure Managed Identity (`DefaultAzureCredential`) — no extra configuration is needed. + +You can also pass a key directly: + +```python +key='sk-...', # Explicit API key +key='ENV', # Read from OPENAI_API_KEY env var (default); falls back to Azure Managed Identity +``` + +### Azure OpenAI + +To use Azure OpenAI endpoints, set `azure_endpoint` and `azure_api_version` to reference your Azure resource. +Authentication: if `OPENAI_API_KEY` is set it will be used, +otherwise Azure Managed Identity is used as a fallback. + +```python +from opencompass.models import OpenAISDK + +models = [ + dict( + type=OpenAISDK, + path='gpt-4', + azure_endpoint='https://{resource-name}.openai.azure.com', + azure_api_version='2024-12-01-preview', + tokenizer_path='gpt-4', + meta_template=dict(round=[ + dict(role='HUMAN', api_role='HUMAN'), + dict(role='BOT', api_role='BOT', generate=True), + ]), + query_per_second=1, + max_out_len=2048, + max_seq_len=4096, + batch_size=8, + ), +] +``` + +### Reasoning Effort + +For OpenAI reasoning models (o1, o3, o4, gpt-5), you can control the amount of reasoning +with the `reasoning_effort` parameter. Valid values are `'low'`, `'medium'`, and `'high'` +(case-insensitive). Defaults to `None` (use the model's default behavior). + +```python +from opencompass.models import OpenAISDK + +models = [ + dict( + type=OpenAISDK, + path='o3', + reasoning_effort='high', + openai_api_base='https://api.openai.com/v1/', + max_out_len=4096, + max_seq_len=32768, + ), +] +``` + We have provided several examples for API-based models. Please refer to ```bash diff --git a/docs/zh_cn/user_guides/models.md b/docs/zh_cn/user_guides/models.md index 9a69bf761..c07f6c57e 100644 --- a/docs/zh_cn/user_guides/models.md +++ b/docs/zh_cn/user_guides/models.md @@ -88,10 +88,70 @@ models = [ ] ``` +### 认证方式 + +`key` 参数默认为 `'ENV'`,会从环境变量 `OPENAI_API_KEY` 中读取。如果未设置 `OPENAI_API_KEY`, +模型会自动回退到 Azure 托管身份(`DefaultAzureCredential`)进行认证,无需额外配置。 + +你也可以直接传入密钥: + +```python +key='sk-...', # 直接指定 API Key +key='ENV', # 从 OPENAI_API_KEY 环境变量读取(默认);未设置时自动回退到 Azure 托管身份 +``` + +### Azure OpenAI + +使用 Azure OpenAI 时,将 `openai_api_base` 指向你的 Azure 资源即可。 +认证方式自动处理:如果设置了 `OPENAI_API_KEY` 则使用该密钥,否则自动回退到 Azure 托管身份。 + +```python +from opencompass.models import OpenAISDK + +models = [ + dict( + type=OpenAISDK, + path='gpt-4', + azure_endpoint='https://{resource-name}.openai.azure.com', + azure_api_version='2024-12-01-preview', + tokenizer_path='gpt-4', + meta_template=dict(round=[ + dict(role='HUMAN', api_role='HUMAN'), + dict(role='BOT', api_role='BOT', generate=True), + ]), + query_per_second=1, + max_out_len=2048, + max_seq_len=4096, + batch_size=8, + ), +] +``` + +### 推理力度(Reasoning Effort) + +对于 OpenAI 推理模型(o1、o3、o4、gpt-5),可以通过 `reasoning_effort` 参数控制推理深度。 +有效值为 `'low'`、`'medium'`、`'high'`(不区分大小写)。默认为 `None`(使用模型的默认行为)。 + +```python +from opencompass.models import OpenAISDK + +models = [ + dict( + type=OpenAISDK, + path='o3', + reasoning_effort='high', # 控制推理深度 + openai_api_base='https://api.openai.com/v1/', + max_out_len=4096, + max_seq_len=32768, + ), +] +``` + 我们也提供了API模型的评测示例,请参考 ```bash configs +├── eval_api_azure_openai_demo.py ├── eval_zhipu.py ├── eval_xunfei.py └── eval_minimax.py diff --git a/examples/eval_api_azure_openai_demo.py b/examples/eval_api_azure_openai_demo.py index c25f25f7b..abd886739 100644 --- a/examples/eval_api_azure_openai_demo.py +++ b/examples/eval_api_azure_openai_demo.py @@ -1,7 +1,8 @@ """ Example configuration of using Azure OpenAI models. -This demonstrates how to use Azure Managed Identity authentication if API keys are not available. +If OPENAI_API_KEY is not set, Azure Managed Identity (DefaultAzureCredential) +is used automatically as a fallback. """ from mmengine.config import read_base @@ -25,7 +26,6 @@ abbr='Azure-GPT-5.1', type=OpenAI, path='gpt-5.1', - use_azure_identity=True, # Enable Azure identity authentication tokenizer_path='gpt-5', # Azure OpenAI endpoint format: openai_api_base='https://{resource-name}.openai.azure.com/openai/deployments/{deployment-name}/chat/completions?api-version=2024-12-01-preview', @@ -40,10 +40,10 @@ abbr='Azure-GPT-5.1-SDK', type=OpenAISDK, path='gpt-5.1', - use_azure_identity=True, # Enable Azure identity authentication tokenizer_path='gpt-5', # Azure OpenAI endpoint format: - openai_api_base='https://{resource-name}.openai.azure.com/openai/v1/', + azure_endpoint='https://{resource-name}.openai.azure.com', + azure_api_version='2024-12-01-preview', meta_template=api_meta_template, query_per_second=1, max_out_len=2048, diff --git a/opencompass/models/openai_api.py b/opencompass/models/openai_api.py index b077a48f4..e883831a4 100644 --- a/opencompass/models/openai_api.py +++ b/opencompass/models/openai_api.py @@ -10,7 +10,7 @@ import httpx import jieba import requests -from azure.identity import DefaultAzureCredential +from azure.identity import DefaultAzureCredential, get_bearer_token_provider from tqdm import tqdm from opencompass.registry import MODELS @@ -45,9 +45,10 @@ class OpenAI(BaseAPIModel): retry (int): Number of retires if the API call fails. Defaults to 2. key (str or List[str]): OpenAI key(s). In particular, when it is set to "ENV", the key will be fetched from the environment - variable $OPENAI_API_KEY, as how openai defaults to be. If it's a - list, the keys will be used in round-robin manner. Defaults to - 'ENV'. + variable $OPENAI_API_KEY. If the variable is not set, Azure + Managed Identity (DefaultAzureCredential) will be used as a + fallback. If it's a list, the keys will be used in round-robin + manner. Defaults to 'ENV'. org (str or List[str], optional): OpenAI organization(s). If not specified, OpenAI uses the default organization bound to each API key. If specified, the orgs will be posted with each request in @@ -77,9 +78,6 @@ class OpenAI(BaseAPIModel): max_workers (int, optional): Maximum number of worker threads for concurrent API requests. For I/O-intensive API calls, recommended value is 10-20. Defaults to None (uses CPU count * 2). - use_azure_identity (bool, optional): Use DefaultAzureCredential - for authentication instead of API key. When enabled, tokens are - obtained from Azure identity. Defaults to False. """ is_api: bool = True @@ -105,7 +103,6 @@ def __init__( verbose: bool = False, think_tag: str = '', max_workers: Optional[int] = None, - use_azure_identity: bool = False, ): super().__init__( @@ -129,8 +126,8 @@ def __init__( self.hf_tokenizer = None self.extra_body = extra_body self.think_tag = think_tag - self.use_azure_identity = use_azure_identity self.azure_credential = None + self.use_azure_identity = False if max_workers is None: cpu_count = os.cpu_count() or 1 @@ -138,16 +135,24 @@ def __init__( else: self.max_workers = max_workers - # Handle Azure Identity authentication - if use_azure_identity: - self.azure_credential = DefaultAzureCredential() - # Use dummy key for compatibility, actual token will be generated - self.keys = ['AZURE_TOKEN'] - elif isinstance(key, str): + # Resolve API keys: try explicit key, then env var, then Azure identity + if isinstance(key, str): if key == 'ENV': - if 'OPENAI_API_KEY' not in os.environ: - raise ValueError('OpenAI API key is not set.') - self.keys = os.getenv('OPENAI_API_KEY').split(',') + if 'OPENAI_API_KEY' in os.environ: + self.keys = os.getenv('OPENAI_API_KEY').split(',') + else: + self.logger.warning( + 'OPENAI_API_KEY is not set. Will try to use Azure Managed Identity for authentication.' + ) + try: + self.azure_credential = DefaultAzureCredential() + self.use_azure_identity = self.azure_credential is not None + self.keys = ['AZURE_TOKEN'] # placeholder to indicate Azure token usage + except Exception as e: + self.logger.warning( + f'Azure Managed Identity is not available: {e}. ' + 'OPENAI_API_KEY and managed identity are unavailable.') + raise ValueError('OpenAI API key is not set and Azure Managed Identity is not provided.') else: self.keys = [key] else: @@ -241,8 +246,7 @@ def _generate(self, input: PromptType, max_out_len: int, self.wait() # Get authentication token - if self.use_azure_identity: - # Get fresh token from Azure + if self.azure_credential: token = self.azure_credential.get_token( 'https://cognitiveservices.azure.com/.default') key = token.token @@ -613,6 +617,8 @@ def bin_trim_wrapper(text): @MODELS.register_module() class OpenAISDK(OpenAI): + VALID_REASONING_EFFORTS = {None, 'low', 'medium', 'high'} + def __init__( self, path: str = 'gpt-3.5-turbo', @@ -624,6 +630,8 @@ def __init__( org: str | List[str] | None = None, meta_template: Dict | None = None, openai_api_base: str | List[str] = OPENAISDK_API_BASE, + azure_endpoint: Optional[str] = None, + azure_api_version: Optional[str] = '2024-12-01-preview', openai_proxy_url: Optional[str] = None, mode: str = 'none', logprobs: bool | None = False, @@ -638,7 +646,7 @@ def __init__( max_workers: Optional[int] = None, openai_extra_kwargs: Dict | None = None, timeout: int = 3600, - use_azure_identity: bool = False, + reasoning_effort: Optional[str] = None, ): super().__init__( path, @@ -659,15 +667,17 @@ def __init__( extra_body, verbose=verbose, max_workers=max_workers, - use_azure_identity=use_azure_identity, ) - from openai import OpenAI + from openai import OpenAI, AzureOpenAI # support multiple api_base for acceleration if isinstance(openai_api_base, List): self.openai_api_base = random.choice(openai_api_base) else: self.openai_api_base = openai_api_base + + self.azure_endpoint = azure_endpoint + self.azure_api_version = azure_api_version if self.proxy_url or http_client_cfg: if self.proxy_url: @@ -677,13 +687,12 @@ def __init__( } # Initialize OpenAI client with appropriate authentication - if use_azure_identity: - # When using Azure identity, get token dynamically - # Note: The OpenAI SDK client will be updated with fresh tokens - # in the _generate method for each request - self.openai_client = OpenAI( - base_url=self.openai_api_base, - api_key='placeholder', # Will be replaced with Azure token + if azure_endpoint: + self.openai_client = AzureOpenAI( + azure_endpoint=self.azure_endpoint, + api_key=key if not self.azure_credential else None, + api_version=self.azure_api_version, + azure_ad_token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default") if self.azure_credential else None, http_client=httpx.Client( **http_client_cfg) if http_client_cfg else None, ) @@ -701,6 +710,14 @@ def __init__( self.think_tag = think_tag self.openai_extra_kwargs = openai_extra_kwargs + if reasoning_effort: + reasoning_effort = reasoning_effort.lower() + if reasoning_effort not in self.VALID_REASONING_EFFORTS: + raise ValueError( + f'Invalid reasoning_effort: {reasoning_effort}. ' + f'Must be one of {self.VALID_REASONING_EFFORTS}') + self.reasoning_effort = reasoning_effort + def _generate( self, input: PromptList | str, @@ -742,6 +759,7 @@ def _generate( messages=messages, extra_body=self.extra_body, ) + query_data['reasoning_effort'] = self.reasoning_effort else: query_data = dict( model=self.path, @@ -756,12 +774,6 @@ def _generate( query_data.update(self.openai_extra_kwargs) try: - # Update API key with fresh Azure token if using Azure identity - if self.use_azure_identity: - token = self.azure_credential.get_token( - 'https://cognitiveservices.azure.com/.default') - self.openai_client.api_key = token.token - if self.verbose: self.logger.info('Start calling OpenAI API') @@ -861,6 +873,8 @@ def _generate( @MODELS.register_module() class OpenAISDKRollout(OpenAI): + VALID_REASONING_EFFORTS = {None, 'low', 'medium', 'high'} + def __init__( self, path: str = 'gpt-3.5-turbo', @@ -872,6 +886,8 @@ def __init__( org: str | List[str] | None = None, meta_template: Dict | None = None, openai_api_base: str | List[str] = OPENAISDK_API_BASE, + azure_endpoint: Optional[str] = None, + azure_api_version: Optional[str] = '2024-12-01-preview', openai_proxy_url: Optional[str] = None, mode: str = 'none', logprobs: bool | None = False, @@ -885,7 +901,7 @@ def __init__( think_tag: str = '', max_workers: Optional[int] = None, openai_extra_kwargs: Dict | None = None, - use_azure_identity: bool = False, + reasoning_effort: Optional[str] = None, ): super().__init__( path, @@ -906,9 +922,8 @@ def __init__( extra_body, verbose=verbose, max_workers=max_workers, - use_azure_identity=use_azure_identity, ) - from openai import OpenAI + from openai import OpenAI, AzureOpenAI # support multiple api_base for acceleration if isinstance(openai_api_base, List): @@ -924,13 +939,12 @@ def __init__( } # Initialize OpenAI client with appropriate authentication - if use_azure_identity: - # When using Azure identity, get token dynamically - # Note: The OpenAI SDK client will be updated with fresh tokens - # in the _generate method for each request - self.openai_client = OpenAI( - base_url=self.openai_api_base, - api_key='placeholder', # Will be replaced with Azure token + if azure_endpoint: + self.openai_client = AzureOpenAI( + azure_endpoint=self.azure_endpoint, + api_key=key if not self.azure_credential else None, + api_version=azure_api_version, + token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default") if self.azure_credential else None, http_client=httpx.Client( **http_client_cfg) if http_client_cfg else None, ) @@ -942,6 +956,14 @@ def __init__( **http_client_cfg) if http_client_cfg else None, ) + if reasoning_effort is not None: + reasoning_effort = reasoning_effort.lower() + if reasoning_effort not in self.VALID_REASONING_EFFORTS: + raise ValueError( + f'Invalid reasoning_effort: {reasoning_effort}. ' + f'Must be one of {self.VALID_REASONING_EFFORTS}') + self.reasoning_effort = reasoning_effort + def _generate( self, input: PromptList | str, @@ -984,6 +1006,7 @@ def _generate( messages=messages, extra_body=self.extra_body, ) + query_data['reasoning_effort'] = self.reasoning_effort else: query_data = dict( model=self.path, diff --git a/opencompass/models/openai_streaming.py b/opencompass/models/openai_streaming.py index 904c6a6eb..86360827a 100644 --- a/opencompass/models/openai_streaming.py +++ b/opencompass/models/openai_streaming.py @@ -39,6 +39,8 @@ def __init__(self, org: str | List[str] | None = None, meta_template: Dict | None = None, openai_api_base: str | List[str] = OPENAISDK_API_BASE, + azure_endpoint: Optional[str] = None, + azure_api_version: Optional[str] = '2024-12-01-preview', openai_proxy_url: Optional[str] = None, mode: str = 'none', logprobs: bool | None = False, @@ -55,7 +57,8 @@ def __init__(self, stream_chunk_size: int = 1, timeout: int = 3600, finish_reason_confirm: bool = True, - max_workers: Optional[int] = None): + max_workers: Optional[int] = None, + reasoning_effort: Optional[str] = None): super().__init__( path=path, @@ -67,6 +70,8 @@ def __init__(self, org=org, meta_template=meta_template, openai_api_base=openai_api_base, + azure_endpoint=azure_endpoint, + azure_api_version=azure_api_version, openai_proxy_url=openai_proxy_url, mode=mode, logprobs=logprobs, @@ -79,6 +84,7 @@ def __init__(self, status_code_mappings=status_code_mappings, think_tag=think_tag, max_workers=max_workers, + reasoning_effort=reasoning_effort, ) self.stream = stream @@ -91,23 +97,29 @@ def _create_fresh_client(self): """Create a fresh OpenAI client for each request to avoid concurrency issues.""" import httpx - from openai import OpenAI + from openai import OpenAI, AzureOpenAI + from azure.identity import DefaultAzureCredential, get_bearer_token_provider # Get current key (with key rotation) - with Lock(): - if len(self.invalid_keys) == len(self.keys): - raise RuntimeError('All keys have insufficient quota.') + if self.azure_credential: + token = self.azure_credential.get_token( + 'https://cognitiveservices.azure.com/.default') + current_key = token.token + else: + with Lock(): + if len(self.invalid_keys) == len(self.keys): + raise RuntimeError('All keys have insufficient quota.') - # find the next valid key - while True: - self.key_ctr += 1 - if self.key_ctr == len(self.keys): - self.key_ctr = 0 + # find the next valid key + while True: + self.key_ctr += 1 + if self.key_ctr == len(self.keys): + self.key_ctr = 0 - if self.keys[self.key_ctr] not in self.invalid_keys: - break + if self.keys[self.key_ctr] not in self.invalid_keys: + break - current_key = self.keys[self.key_ctr] + current_key = self.keys[self.key_ctr] # Create fresh client with current key http_client_cfg = {} @@ -117,13 +129,24 @@ def _create_fresh_client(self): 'https://': self.proxy_url, } - return OpenAI( - base_url=self.openai_api_base, - api_key=current_key, - http_client=httpx.Client(**http_client_cfg, - timeout=httpx.Timeout(self.timeout)) - if http_client_cfg or True else None, - ) + if self.azure_endpoint: + return AzureOpenAI( + azure_endpoint=self.azure_endpoint, + api_key=current_key if not self.azure_credential else None, + api_version=self.azure_api_version, + azure_ad_token_provider=get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default") if self.azure_credential else None, + http_client=httpx.Client(**http_client_cfg, + timeout=httpx.Timeout(self.timeout)) + if http_client_cfg or True else None, + ) + else: + return OpenAI( + base_url=self.openai_api_base, + api_key=current_key, + http_client=httpx.Client(**http_client_cfg, + timeout=httpx.Timeout(self.timeout)) + if http_client_cfg or True else None, + ) def _generate( self, @@ -170,6 +193,7 @@ def _generate( extra_body=self.extra_body, stream=self.stream, # Enable streaming ) + query_data['reasoning_effort'] = self.reasoning_effort else: query_data = dict( model=self.path, From 8d0cd18acb4f08e7f147799845e03b5c4772a675 Mon Sep 17 00:00:00 2001 From: Josh Bradley Date: Tue, 24 Mar 2026 02:16:10 -0400 Subject: [PATCH 4/4] add support for image input --- opencompass/models/base_api.py | 5 +++++ opencompass/models/openai_api.py | 18 +++++++++++++++++- opencompass/utils/prompt.py | 5 +++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/opencompass/models/base_api.py b/opencompass/models/base_api.py index ab33c609f..4d6951e31 100644 --- a/opencompass/models/base_api.py +++ b/opencompass/models/base_api.py @@ -310,6 +310,9 @@ def parse_template(self, prompt_template: PromptType, for item in prompt[1:]: if item['role'] == last_role: new_prompt[-1]['prompt'] += '\n' + item['prompt'] + if item.get('image'): + existing = new_prompt[-1].get('image', []) + new_prompt[-1]['image'] = existing + item['image'] else: last_role = item['role'] new_prompt.append(item) @@ -452,6 +455,8 @@ def _role2api_role(self, res['prompt'] = merged_prompt.get('begin', '') res['prompt'] += merged_prompt.get('prompt', '') res['prompt'] += merged_prompt.get('end', '') + if merged_prompt.get('image'): + res['image'] = merged_prompt['image'] return res, True diff --git a/opencompass/models/openai_api.py b/opencompass/models/openai_api.py index 1ce5c1fd2..488e01eb0 100644 --- a/opencompass/models/openai_api.py +++ b/opencompass/models/openai_api.py @@ -586,13 +586,29 @@ def bin_trim_wrapper(text): if mode != 'none': input_content = bin_trim_wrapper(input_content) processed_prompts.append(input_content) - msg = {'content': input_content} + msg = {} if item['role'] == 'HUMAN': msg['role'] = 'user' elif item['role'] == 'BOT': msg['role'] = 'assistant' elif item['role'] == 'SYSTEM': msg['role'] = 'system' + # Build multi-part content when images are present + images = [ + img for img in item.get('image', []) if img + ] + if images: + content_parts = [ + {'type': 'text', 'text': input_content} + ] + for img_url in images: + content_parts.append({ + 'type': 'image_url', + 'image_url': {'url': img_url}, + }) + msg['content'] = content_parts + else: + msg['content'] = input_content messages.append(msg) input_len = sum( get_token_len_func(prompt) for prompt in processed_prompts) diff --git a/opencompass/utils/prompt.py b/opencompass/utils/prompt.py index cef6a31dd..830aae51a 100644 --- a/opencompass/utils/prompt.py +++ b/opencompass/utils/prompt.py @@ -99,6 +99,11 @@ def format(self, **kwargs) -> PromptList: new_item = deepcopy(item) if 'prompt' in item: new_item['prompt'] = safe_format(item['prompt'], **kwargs) + if 'image' in item: + new_item['image'] = [ + safe_format(img, **kwargs) + for img in item['image'] + ] new_list.append(new_item) else: new_list.append(safe_format(item, **kwargs))