Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 29 additions & 25 deletions app/i18n.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from typing import Optional

from openai import OpenAI
from openai.lib.azure import AzureOpenAI
from slack_bolt import BoltContext

from .openai_api_utils import (
build_openai_client,
is_search_model,
sampling_kwargs,
token_budget_kwarg,
)
from .openai_constants import GPT_4O_MINI_MODEL

# All the supported languages for Slack app as of March 2023
Expand Down Expand Up @@ -31,6 +35,9 @@ def from_locale_to_lang(locale: Optional[str]) -> Optional[str]:


_translation_result_cache = {}
TRANSLATION_MODEL = GPT_4O_MINI_MODEL
TRANSLATION_TEMPERATURE = 1
TRANSLATION_TOKEN_BUDGET = 1024


def translate(*, openai_api_key: Optional[str], context: BoltContext, text: str) -> str:
Expand All @@ -44,21 +51,16 @@ def translate(*, openai_api_key: Optional[str], context: BoltContext, text: str)
cached_result = _translation_result_cache.get(f"{lang}:{text}")
if cached_result is not None:
return cached_result
if context.get("OPENAI_API_TYPE") == "azure":
client = AzureOpenAI(
api_key=openai_api_key,
api_version=context.get("OPENAI_API_VERSION"),
azure_endpoint=context.get("OPENAI_API_BASE"),
azure_deployment=context.get("OPENAI_DEPLOYMENT_ID"),
)
else:
client = OpenAI(
api_key=openai_api_key,
base_url=context.get("OPENAI_API_BASE"),
)
response = client.chat.completions.create(
model=GPT_4O_MINI_MODEL,
messages=[
client = build_openai_client(
openai_api_key=openai_api_key,
openai_api_type=context.get("OPENAI_API_TYPE"),
openai_api_base=context.get("OPENAI_API_BASE"),
openai_api_version=context.get("OPENAI_API_VERSION"),
openai_deployment_id=context.get("OPENAI_DEPLOYMENT_ID"),
)
request_kwargs = {
"model": TRANSLATION_MODEL,
"messages": [
{
"role": "system",
"content": "You're the AI model that primarily focuses on the quality of language translation. "
Expand All @@ -77,14 +79,16 @@ def translate(*, openai_api_key: Optional[str], context: BoltContext, text: str)
f"Here is the original sentence you need to translate:\n{text}",
},
],
top_p=1,
n=1,
max_tokens=1024,
temperature=1,
presence_penalty=0,
frequency_penalty=0,
logit_bias={},
user="system",
"user": "system",
}
if not is_search_model(TRANSLATION_MODEL):
request_kwargs["n"] = 1
request_kwargs.update(
token_budget_kwarg(TRANSLATION_MODEL, TRANSLATION_TOKEN_BUDGET)
)
request_kwargs.update(sampling_kwargs(TRANSLATION_MODEL, TRANSLATION_TEMPERATURE))
response = client.chat.completions.create(
**request_kwargs,
)
translated_text = response.model_dump()["choices"][0]["message"].get("content")
_translation_result_cache[f"{lang}:{text}"] = translated_text
Expand Down
90 changes: 90 additions & 0 deletions app/openai_api_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from typing import Optional, Dict, Union

from openai import OpenAI
from openai.lib.azure import AzureOpenAI


def is_reasoning_model(model: Optional[str]) -> bool:
"""Returns True if the model is a reasoning model under Chat Completions."""
if not model:
return False
ml = model.lower()
# Treat any gpt-5 family chat/search variants (including numbered updates)
# as regular chat models so they keep sampling params.
if ml.startswith("gpt-5") and ("-chat" in ml or "-search" in ml):
return False
return (
ml.startswith("o1")
or ml.startswith("o3")
or ml.startswith("o4")
or ml.startswith("gpt-5")
)


def is_search_model(model: Optional[str]) -> bool:
"""Returns True for search-specific chat models."""
if not model:
return False
return model.lower().startswith("gpt-5-search")


def normalize_base_url(value: Optional[str]) -> Optional[str]:
"""Normalizes falsy/empty base URLs to None for SDK compatibility."""
if value is None:
return None
trimmed = value.strip()
return trimmed or None


def token_budget_kwarg(model: Optional[str], budget: int) -> Dict[str, int]:
"""Returns the correct token budget kwarg for the given model."""
should_use_completion_tokens = (
model and model.lower().startswith("gpt-5")
) or is_reasoning_model(model)

return (
{"max_completion_tokens": budget}
if should_use_completion_tokens
else {"max_tokens": budget}
)


def sampling_kwargs(
model: Optional[str], temperature: float
) -> Dict[str, Union[float, Dict]]:
"""Returns sampling-related kwargs supported by the given model."""
ml = model.lower() if model else ""
if is_reasoning_model(model) or is_search_model(model):
return {}
if ml.startswith(("gpt-5.1", "gpt-5.2", "gpt-5.3")):
return {}
return {
"temperature": temperature,
"presence_penalty": 0,
"frequency_penalty": 0,
"logit_bias": {},
"top_p": 1,
}


def build_openai_client(
*,
openai_api_key: str,
openai_api_type: Optional[str],
openai_api_base: Optional[str],
openai_api_version: Optional[str],
openai_deployment_id: Optional[str],
openai_organization_id: Optional[str] = None,
) -> Union[OpenAI, AzureOpenAI]:
if openai_api_type == "azure":
return AzureOpenAI(
api_key=openai_api_key,
api_version=openai_api_version,
azure_endpoint=openai_api_base,
azure_deployment=openai_deployment_id,
)
return OpenAI(
api_key=openai_api_key,
base_url=normalize_base_url(openai_api_base),
organization=openai_organization_id,
)
105 changes: 34 additions & 71 deletions app/openai_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,22 @@
from typing import List, Dict, Tuple, Optional, Union
from importlib import import_module

from openai import OpenAI, Stream
from openai.lib.azure import AzureOpenAI
from openai import Stream
from openai.types import Completion
import tiktoken

from slack_bolt import BoltContext
from slack_sdk.web import WebClient, SlackResponse

from app.markdown_conversion import slack_to_markdown, markdown_to_slack
from app.openai_api_utils import (
build_openai_client,
is_reasoning_model,
is_search_model,
normalize_base_url,
sampling_kwargs,
token_budget_kwarg,
)
from app.openai_constants import (
MAX_TOKENS,
MODEL_TOKENS,
Expand Down Expand Up @@ -89,34 +96,17 @@ def _is_reasoning(model: str) -> bool:
Excludes chat models like gpt-5-chat-latest, gpt-5.1-chat-latest, gpt-5.2-chat-latest, and gpt-5-search-api.
Matches o3*, o4*, and non-chat gpt-5* families. Case-insensitive and safe with None/empty.
"""
if not model:
return False
ml = model.lower()
# Treat any gpt-5 family chat/search variants (including numbered updates)
# as regular chat models so they keep sampling params.
if ml.startswith("gpt-5") and ("-chat" in ml or "-search" in ml):
return False
return (
ml.startswith("o1")
or ml.startswith("o3")
or ml.startswith("o4")
or ml.startswith("gpt-5")
)
return is_reasoning_model(model)


def _is_search_model(model: str) -> bool:
"""Returns True for search-specific chat models."""
if not model:
return False
return model.lower().startswith("gpt-5-search")
return is_search_model(model)


def _normalize_base_url(value: Optional[str]) -> Optional[str]:
"""Normalizes falsy/empty base URLs to None for SDK compatibility."""
if value is None:
return None
trimmed = value.strip()
return trimmed or None
return normalize_base_url(value)


def _token_budget_kwarg(model: str, budget: int) -> Dict[str, int]:
Expand All @@ -128,36 +118,19 @@ def _token_budget_kwarg(model: str, budget: int) -> Dict[str, int]:
max_completion_tokens.
- Legacy chat models still accept max_tokens.
"""
should_use_completion_tokens = (
(model and model.lower().startswith("gpt-5")) or _is_reasoning(model)
)

return (
{"max_completion_tokens": budget}
if should_use_completion_tokens
else {"max_tokens": budget}
)
return token_budget_kwarg(model, budget)


def _sampling_kwargs(model: Optional[str], temperature: float) -> Dict[str, Union[float, Dict]]:
def _sampling_kwargs(
model: Optional[str], temperature: float
) -> Dict[str, Union[float, Dict]]:
"""Returns sampling-related kwargs supported by the given model.

- GPT-5.1/5.2/5.3 chat variants drop sampling knobs (stay at provider defaults).
- Search and reasoning models drop sampling altogether.
- Legacy chat models retain the full sampling set (temperature/top_p/penalties/logit_bias).
"""
ml = model.lower() if model else ""
if _is_reasoning(model) or _is_search_model(model):
return {}
if ml.startswith(("gpt-5.1", "gpt-5.2", "gpt-5.3")):
return {}
return {
"temperature": temperature,
"presence_penalty": 0,
"frequency_penalty": 0,
"logit_bias": {},
"top_p": 1,
}
return sampling_kwargs(model, temperature)


def _create_chat_completion(
Expand All @@ -182,19 +155,14 @@ def _create_chat_completion(
relevant to their respective behavior (e.g., timeout for sync only,
function calls for streaming only).
"""
if openai_api_type == "azure":
client = AzureOpenAI(
api_key=openai_api_key,
api_version=openai_api_version,
azure_endpoint=openai_api_base,
azure_deployment=openai_deployment_id,
)
else:
client = OpenAI(
api_key=openai_api_key,
base_url=_normalize_base_url(openai_api_base),
organization=openai_organization_id,
)
client = build_openai_client(
openai_api_key=openai_api_key,
openai_api_type=openai_api_type,
openai_api_base=openai_api_base,
openai_api_version=openai_api_version,
openai_deployment_id=openai_deployment_id,
openai_organization_id=openai_organization_id,
)

# Guard against misuse: streaming calls should not pass timeout_seconds
if stream and timeout_seconds is not None:
Expand Down Expand Up @@ -766,17 +734,12 @@ def generate_chatgpt_response(
return content


def create_openai_client(context: BoltContext) -> Union[OpenAI, AzureOpenAI]:
if context.get("OPENAI_API_TYPE") == "azure":
return AzureOpenAI(
api_key=context.get("OPENAI_API_KEY"),
api_version=context.get("OPENAI_API_VERSION"),
azure_endpoint=context.get("OPENAI_API_BASE"),
azure_deployment=context.get("OPENAI_DEPLOYMENT_ID"),
)
else:
return OpenAI(
api_key=context.get("OPENAI_API_KEY"),
base_url=_normalize_base_url(context.get("OPENAI_API_BASE")),
organization=context.get("OPENAI_ORG_ID"),
)
def create_openai_client(context: BoltContext):
return build_openai_client(
openai_api_key=context.get("OPENAI_API_KEY"),
openai_api_type=context.get("OPENAI_API_TYPE"),
openai_api_base=context.get("OPENAI_API_BASE"),
openai_api_version=context.get("OPENAI_API_VERSION"),
openai_deployment_id=context.get("OPENAI_DEPLOYMENT_ID"),
openai_organization_id=context.get("OPENAI_ORG_ID"),
)
Loading
Loading