diff --git a/hindsight-api-slim/hindsight_api/config.py b/hindsight-api-slim/hindsight_api/config.py index fd5f88652..775570f8f 100644 --- a/hindsight-api-slim/hindsight_api/config.py +++ b/hindsight-api-slim/hindsight_api/config.py @@ -279,6 +279,10 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]: ENV_RERANKER_SILICONFLOW_MODEL = "HINDSIGHT_API_RERANKER_SILICONFLOW_MODEL" ENV_RERANKER_SILICONFLOW_BASE_URL = "HINDSIGHT_API_RERANKER_SILICONFLOW_BASE_URL" +# Alibaba Cloud DashScope configuration (reranker only) +ENV_RERANKER_ALIBABA_API_KEY = "HINDSIGHT_API_RERANKER_ALIBABA_API_KEY" +ENV_RERANKER_ALIBABA_MODEL = "HINDSIGHT_API_RERANKER_ALIBABA_MODEL" + # Google Discovery Engine reranker configuration ENV_RERANKER_GOOGLE_MODEL = "HINDSIGHT_API_RERANKER_GOOGLE_MODEL" ENV_RERANKER_GOOGLE_PROJECT_ID = "HINDSIGHT_API_RERANKER_GOOGLE_PROJECT_ID" @@ -547,6 +551,8 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]: DEFAULT_RERANKER_SILICONFLOW_MODEL = "BAAI/bge-reranker-v2-m3" DEFAULT_RERANKER_SILICONFLOW_BASE_URL = "https://api.siliconflow.cn/v1" +DEFAULT_RERANKER_ALIBABA_MODEL = "qwen3-rerank" + DEFAULT_RERANKER_GOOGLE_MODEL = "semantic-ranker-default-004" # Vector extension (pgvector, vchord, pgvectorscale, or AlloyDB ScaNN) @@ -1012,6 +1018,8 @@ class HindsightConfig: reranker_siliconflow_api_key: str | None reranker_siliconflow_model: str reranker_siliconflow_base_url: str + reranker_alibaba_api_key: str | None + reranker_alibaba_model: str reranker_google_model: str reranker_google_project_id: str | None reranker_google_service_account_key: str | None @@ -1648,6 +1656,9 @@ def from_env(cls) -> "HindsightConfig": reranker_siliconflow_base_url=os.getenv( ENV_RERANKER_SILICONFLOW_BASE_URL, DEFAULT_RERANKER_SILICONFLOW_BASE_URL ), + # Alibaba Cloud DashScope reranker + reranker_alibaba_api_key=os.getenv(ENV_RERANKER_ALIBABA_API_KEY), + reranker_alibaba_model=os.getenv(ENV_RERANKER_ALIBABA_MODEL, DEFAULT_RERANKER_ALIBABA_MODEL), # Google Discovery Engine reranker (with fallback to LLM Vertex AI keys) reranker_google_model=os.getenv(ENV_RERANKER_GOOGLE_MODEL, DEFAULT_RERANKER_GOOGLE_MODEL), reranker_google_project_id=os.getenv(ENV_RERANKER_GOOGLE_PROJECT_ID) diff --git a/hindsight-api-slim/hindsight_api/engine/cross_encoder.py b/hindsight-api-slim/hindsight_api/engine/cross_encoder.py index be9a28122..7ea7c9e3e 100644 --- a/hindsight-api-slim/hindsight_api/engine/cross_encoder.py +++ b/hindsight-api-slim/hindsight_api/engine/cross_encoder.py @@ -17,6 +17,7 @@ from ..config import ( DEFAULT_LITELLM_API_BASE, + DEFAULT_RERANKER_ALIBABA_MODEL, DEFAULT_RERANKER_COHERE_MODEL, DEFAULT_RERANKER_FLASHRANK_CACHE_DIR, DEFAULT_RERANKER_FLASHRANK_CPU_MEM_ARENA, @@ -37,6 +38,7 @@ DEFAULT_RERANKER_TEI_HTTP_TIMEOUT, DEFAULT_RERANKER_TEI_MAX_CONCURRENT, DEFAULT_RERANKER_ZEROENTROPY_MODEL, + ENV_RERANKER_ALIBABA_API_KEY, ENV_RERANKER_COHERE_API_KEY, ENV_RERANKER_COHERE_MODEL, ENV_RERANKER_FLASHRANK_CACHE_DIR, @@ -546,12 +548,14 @@ def __init__( rerank_url: str, timeout: float = 60.0, include_top_n: bool = True, + include_return_documents: bool = False, ): self.api_key = api_key self.model = model self.rerank_url = rerank_url self.timeout = timeout self.include_top_n = include_top_n + self.include_return_documents = include_return_documents self._async_client: httpx.AsyncClient | None = None async def initialize(self) -> None: @@ -1534,6 +1538,48 @@ async def predict(self, pairs: list[tuple[str, str]]) -> list[float]: return await loop.run_in_executor(None, self._predict_sync, pairs) +class AlibabaCloudCrossEncoder(CrossEncoderModel): + """ + Alibaba Cloud DashScope text reranking API. + + Uses the Cohere-compatible /reranks endpoint, which is the standard interface + for qwen3-rerank. Authentication via HINDSIGHT_API_RERANKER_ALIBABA_API_KEY + (or DASHSCOPE_API_KEY as a fallback). + See: https://help.aliyun.com/zh/model-studio/text-rerank-api + """ + + RERANK_URL = "https://dashscope.aliyuncs.com/compatible-api/v1/reranks" + + def __init__( + self, + api_key: str, + model: str = DEFAULT_RERANKER_ALIBABA_MODEL, + timeout: float = 60.0, + ): + self.model = model + self._client = _CohereCompatibleRerankClient( + api_key=api_key, + model=model, + rerank_url=self.RERANK_URL, + timeout=timeout, + include_return_documents=False, + ) + + @property + def provider_name(self) -> str: + return "alibaba" + + async def initialize(self) -> None: + if self._client._async_client is not None: + return + logger.info(f"Reranker: initializing Alibaba Cloud provider with model {self.model}") + await self._client.initialize() + logger.info("Reranker: Alibaba Cloud provider initialized") + + async def predict(self, pairs: list[tuple[str, str]]) -> list[float]: + return await self._client.predict(pairs) + + def create_cross_encoder_from_env() -> CrossEncoderModel: """ Create a CrossEncoderModel instance based on configuration. @@ -1648,11 +1694,21 @@ def create_cross_encoder_from_env() -> CrossEncoderModel: model=config.reranker_google_model, service_account_key=config.reranker_google_service_account_key, ) + elif provider == "alibaba": + api_key = config.reranker_alibaba_api_key + if not api_key: + raise ValueError( + f"{ENV_RERANKER_ALIBABA_API_KEY} is required when {ENV_RERANKER_PROVIDER} is 'alibaba'" + ) + return AlibabaCloudCrossEncoder( + api_key=api_key, + model=config.reranker_alibaba_model, + ) elif provider == "rrf": return RRFPassthroughCrossEncoder() elif provider == "jina-mlx": return JinaMLXCrossEncoder() else: raise ValueError( - f"Unknown reranker provider: {provider}. Supported: 'local', 'tei', 'cohere', 'zeroentropy', 'siliconflow', 'google', 'flashrank', 'litellm', 'litellm-sdk', 'rrf', 'jina-mlx'" + f"Unknown reranker provider: {provider}. Supported: 'local', 'tei', 'cohere', 'zeroentropy', 'siliconflow', 'alibaba', 'google', 'flashrank', 'litellm', 'litellm-sdk', 'rrf', 'jina-mlx'" ) diff --git a/hindsight-docs/docs/developer/configuration.md b/hindsight-docs/docs/developer/configuration.md index fe7a7ffcb..cb2fa3c03 100644 --- a/hindsight-docs/docs/developer/configuration.md +++ b/hindsight-docs/docs/developer/configuration.md @@ -605,7 +605,7 @@ Google's `gemini-embedding-001` produces 3072 dimensions natively but supports c | Variable | Description | Default | |----------|-------------|---------| -| `HINDSIGHT_API_RERANKER_PROVIDER` | Provider: `local`, `tei`, `cohere`, `openrouter`, `zeroentropy`, `siliconflow`, `google`, `flashrank`, `litellm`, `litellm-sdk`, `jina-mlx`, or `rrf` | `local` | +| `HINDSIGHT_API_RERANKER_PROVIDER` | Provider: `local`, `tei`, `cohere`, `openrouter`, `zeroentropy`, `siliconflow`, `alibaba`, `google`, `flashrank`, `litellm`, `litellm-sdk`, `jina-mlx`, or `rrf` | `local` | | `HINDSIGHT_API_RERANKER_LOCAL_MODEL` | Model for local provider | `cross-encoder/ms-marco-MiniLM-L-6-v2` | | `HINDSIGHT_API_RERANKER_LOCAL_MAX_CONCURRENT` | Max concurrent local reranking (prevents CPU thrashing under load) | `4` | | `HINDSIGHT_API_RERANKER_LOCAL_TRUST_REMOTE_CODE` | Allow loading models with custom code (security risk, disabled by default) | `false` | @@ -635,6 +635,8 @@ Google's `gemini-embedding-001` produces 3072 dimensions natively but supports c | `HINDSIGHT_API_RERANKER_SILICONFLOW_API_KEY` | SiliconFlow API key for reranking | - | | `HINDSIGHT_API_RERANKER_SILICONFLOW_MODEL` | SiliconFlow rerank model (e.g., `BAAI/bge-reranker-v2-m3`) | `BAAI/bge-reranker-v2-m3` | | `HINDSIGHT_API_RERANKER_SILICONFLOW_BASE_URL` | Base URL for the SiliconFlow `/rerank` endpoint | `https://api.siliconflow.cn/v1` | +| `HINDSIGHT_API_RERANKER_ALIBABA_API_KEY` | Alibaba Cloud DashScope API key for reranking | - | +| `HINDSIGHT_API_RERANKER_ALIBABA_MODEL` | DashScope rerank model | `qwen3-rerank` | | `HINDSIGHT_API_RERANKER_GOOGLE_PROJECT_ID` | Google Cloud project ID for Discovery Engine reranking (falls back to `HINDSIGHT_API_LLM_VERTEXAI_PROJECT_ID`) | - | | `HINDSIGHT_API_RERANKER_GOOGLE_MODEL` | Google Discovery Engine ranking model | `semantic-ranker-default-004` | | `HINDSIGHT_API_RERANKER_GOOGLE_SERVICE_ACCOUNT_KEY` | Path to service account JSON key (falls back to `HINDSIGHT_API_LLM_VERTEXAI_SERVICE_ACCOUNT_KEY`). If unset, uses ADC. | - | @@ -698,6 +700,11 @@ export HINDSIGHT_API_RERANKER_SILICONFLOW_API_KEY=your-api-key export HINDSIGHT_API_RERANKER_SILICONFLOW_MODEL=BAAI/bge-reranker-v2-m3 # export HINDSIGHT_API_RERANKER_SILICONFLOW_BASE_URL=https://api.siliconflow.cn/v1 # default +# Alibaba Cloud DashScope - qwen3-rerank via Cohere-compatible /reranks endpoint +export HINDSIGHT_API_RERANKER_PROVIDER=alibaba +export HINDSIGHT_API_RERANKER_ALIBABA_API_KEY=your-dashscope-api-key # or set DASHSCOPE_API_KEY +export HINDSIGHT_API_RERANKER_ALIBABA_MODEL=qwen3-rerank # default, can omit + # LiteLLM proxy - unified gateway for multiple reranking providers (requires running LiteLLM proxy server) export HINDSIGHT_API_RERANKER_PROVIDER=litellm export HINDSIGHT_API_RERANKER_LITELLM_API_BASE=http://localhost:4000 diff --git a/hindsight-docs/docs/developer/models.mdx b/hindsight-docs/docs/developer/models.mdx index 0c958e714..766d673f8 100644 --- a/hindsight-docs/docs/developer/models.mdx +++ b/hindsight-docs/docs/developer/models.mdx @@ -484,6 +484,7 @@ Reranks initial search results to improve precision. | `cohere` | Cohere rerank API | Production, high quality | | `zeroentropy` | ZeroEntropy rerank API (zerank-2) | Production, state-of-the-art accuracy | | `siliconflow` | SiliconFlow rerank API (Cohere-compatible `/rerank` endpoint) | Users in China or anyone on SiliconFlow's platform | +| `alibaba` | Alibaba Cloud DashScope rerank API (qwen3-rerank) | Users on Alibaba Cloud / DashScope | | `tei` | HuggingFace Text Embeddings Inference | Production, self-hosted | | `flashrank` | FlashRank (lightweight, fast) | Resource-constrained environments | | `litellm` | LiteLLM proxy (unified gateway) | Multi-provider setups | @@ -521,6 +522,14 @@ SiliconFlow hosts a range of open-weight rerankers behind a Cohere-compatible `/ | `BAAI/bge-reranker-v2-m3` | Multilingual, strong default | | `Qwen/Qwen3-Reranker-8B` | Larger, higher accuracy | +### Alibaba Cloud Models + +Alibaba Cloud DashScope exposes `qwen3-rerank` via a Cohere-compatible `/reranks` endpoint: + +| Model | Use Case | +|-------|----------| +| `qwen3-rerank` | 100+ languages, default | + ### LiteLLM Supported Providers LiteLLM supports multiple reranking providers via the `/rerank` endpoint: @@ -566,6 +575,11 @@ export HINDSIGHT_API_RERANKER_PROVIDER=siliconflow export HINDSIGHT_API_RERANKER_SILICONFLOW_API_KEY=your-api-key export HINDSIGHT_API_RERANKER_SILICONFLOW_MODEL=BAAI/bge-reranker-v2-m3 # default, can omit +# Alibaba Cloud DashScope (qwen3-rerank) +export HINDSIGHT_API_RERANKER_PROVIDER=alibaba +export HINDSIGHT_API_RERANKER_ALIBABA_API_KEY=your-dashscope-api-key +export HINDSIGHT_API_RERANKER_ALIBABA_MODEL=qwen3-rerank # default, can omit + # TEI (self-hosted) export HINDSIGHT_API_RERANKER_PROVIDER=tei export HINDSIGHT_API_RERANKER_TEI_URL=http://localhost:8081