|
1 | 1 | # pageindex/client.py |
2 | 2 | from __future__ import annotations |
3 | 3 | from pathlib import Path |
| 4 | +from typing import Any, Iterator |
| 5 | + |
| 6 | +from typing_extensions import deprecated |
| 7 | + |
4 | 8 | from .collection import Collection |
5 | 9 | from .config import IndexConfig |
| 10 | +from .errors import PageIndexAPIError |
6 | 11 | from .parser.protocol import DocumentParser |
7 | 12 |
|
| 13 | +_LEGACY_SDK_MSG = ( |
| 14 | + "Legacy compatibility — new code should prefer the Collection-based API " |
| 15 | + "(PageIndexClient.collection(...))." |
| 16 | +) |
| 17 | +_legacy_sdk = deprecated(_LEGACY_SDK_MSG, category=PendingDeprecationWarning) |
| 18 | + |
8 | 19 |
|
9 | 20 | def _normalize_retrieve_model(model: str) -> str: |
10 | 21 | """Preserve supported Agents SDK prefixes and route other provider paths via LiteLLM.""" |
@@ -39,21 +50,34 @@ class PageIndexClient: |
39 | 50 | # Or use LocalClient / CloudClient for explicit mode selection |
40 | 51 | """ |
41 | 52 |
|
42 | | - def __init__(self, api_key: str = None, model: str = None, |
| 53 | + BASE_URL = "https://api.pageindex.ai" |
| 54 | + |
| 55 | + def __init__(self, api_key: str | None = None, model: str = None, |
43 | 56 | retrieve_model: str = None, storage_path: str = None, |
44 | 57 | storage=None, index_config: IndexConfig | dict = None): |
45 | | - if api_key: |
| 58 | + if api_key == "": |
| 59 | + import logging |
| 60 | + logging.getLogger(__name__).warning( |
| 61 | + "PageIndexClient received an empty api_key; falling back to local mode. " |
| 62 | + "Pass api_key=None to silence this warning, or provide a real key for cloud mode." |
| 63 | + ) |
| 64 | + api_key = None |
| 65 | + if api_key is not None: |
46 | 66 | self._init_cloud(api_key) |
47 | 67 | else: |
48 | 68 | self._init_local(model, retrieve_model, storage_path, storage, index_config) |
49 | 69 |
|
50 | 70 | def _init_cloud(self, api_key: str): |
51 | 71 | from .backend.cloud import CloudBackend |
| 72 | + from .cloud_api import LegacyCloudAPI |
52 | 73 | self._backend = CloudBackend(api_key=api_key) |
| 74 | + self._legacy_cloud_api = LegacyCloudAPI(api_key=api_key, base_url=self.BASE_URL) |
53 | 75 |
|
54 | 76 | def _init_local(self, model: str = None, retrieve_model: str = None, |
55 | 77 | storage_path: str = None, storage=None, |
56 | 78 | index_config: IndexConfig | dict = None): |
| 79 | + self._legacy_cloud_api = None |
| 80 | + |
57 | 81 | # Build IndexConfig: merge model/retrieve_model with index_config |
58 | 82 | overrides = {} |
59 | 83 | if model: |
@@ -123,6 +147,124 @@ def register_parser(self, parser: DocumentParser) -> None: |
123 | 147 | raise PageIndexError("Custom parsers are not supported in cloud mode") |
124 | 148 | self._backend.register_parser(parser) |
125 | 149 |
|
| 150 | + def _require_cloud_api(self): |
| 151 | + if self._legacy_cloud_api is None: |
| 152 | + from .errors import PageIndexAPIError |
| 153 | + raise PageIndexAPIError( |
| 154 | + "This method is part of the pageindex 0.2.x cloud SDK API. " |
| 155 | + "Initialize with api_key to use it." |
| 156 | + ) |
| 157 | + return self._legacy_cloud_api |
| 158 | + |
| 159 | + # ── pageindex 0.2.x cloud SDK compatibility (prefer Collection API for new code) ── |
| 160 | + @_legacy_sdk |
| 161 | + def submit_document( |
| 162 | + self, |
| 163 | + file_path: str, |
| 164 | + mode: str | None = None, |
| 165 | + beta_headers: list[str] | None = None, |
| 166 | + folder_id: str | None = None, |
| 167 | + ) -> dict[str, Any]: |
| 168 | + """Legacy SDK compatibility — prefer ``client.collection(...).add(path)``.""" |
| 169 | + return self._require_cloud_api().submit_document( |
| 170 | + file_path=file_path, |
| 171 | + mode=mode, |
| 172 | + beta_headers=beta_headers, |
| 173 | + folder_id=folder_id, |
| 174 | + ) |
| 175 | + |
| 176 | + @_legacy_sdk |
| 177 | + def get_ocr(self, doc_id: str, format: str = "page") -> dict[str, Any]: |
| 178 | + """Legacy SDK compatibility — prefer ``collection.get_page_content(doc_id, pages)``.""" |
| 179 | + return self._require_cloud_api().get_ocr(doc_id=doc_id, format=format) |
| 180 | + |
| 181 | + @_legacy_sdk |
| 182 | + def get_tree(self, doc_id: str, node_summary: bool = False) -> dict[str, Any]: |
| 183 | + """Legacy SDK compatibility — prefer ``collection.get_document_structure(doc_id)``.""" |
| 184 | + return self._require_cloud_api().get_tree(doc_id=doc_id, node_summary=node_summary) |
| 185 | + |
| 186 | + @_legacy_sdk |
| 187 | + def is_retrieval_ready(self, doc_id: str) -> bool: |
| 188 | + """Legacy SDK compatibility — Collection API handles readiness internally.""" |
| 189 | + return self._require_cloud_api().is_retrieval_ready(doc_id=doc_id) |
| 190 | + |
| 191 | + @_legacy_sdk |
| 192 | + def submit_query(self, doc_id: str, query: str, thinking: bool = False) -> dict[str, Any]: |
| 193 | + """Legacy SDK compatibility — prefer ``collection.query(question, doc_ids=[doc_id])``.""" |
| 194 | + return self._require_cloud_api().submit_query( |
| 195 | + doc_id=doc_id, |
| 196 | + query=query, |
| 197 | + thinking=thinking, |
| 198 | + ) |
| 199 | + |
| 200 | + @_legacy_sdk |
| 201 | + def get_retrieval(self, retrieval_id: str) -> dict[str, Any]: |
| 202 | + """Legacy SDK compatibility — Collection API returns answers synchronously.""" |
| 203 | + return self._require_cloud_api().get_retrieval(retrieval_id=retrieval_id) |
| 204 | + |
| 205 | + @_legacy_sdk |
| 206 | + def chat_completions( |
| 207 | + self, |
| 208 | + messages: list[dict[str, str]], |
| 209 | + stream: bool = False, |
| 210 | + doc_id: str | list[str] | None = None, |
| 211 | + temperature: float | None = None, |
| 212 | + stream_metadata: bool = False, |
| 213 | + enable_citations: bool = False, |
| 214 | + ) -> dict[str, Any] | Iterator[str] | Iterator[dict[str, Any]]: |
| 215 | + """Legacy SDK compatibility — prefer ``collection.query(...)``.""" |
| 216 | + return self._require_cloud_api().chat_completions( |
| 217 | + messages=messages, |
| 218 | + stream=stream, |
| 219 | + doc_id=doc_id, |
| 220 | + temperature=temperature, |
| 221 | + stream_metadata=stream_metadata, |
| 222 | + enable_citations=enable_citations, |
| 223 | + ) |
| 224 | + |
| 225 | + @_legacy_sdk |
| 226 | + def get_document(self, doc_id: str) -> dict[str, Any]: |
| 227 | + """Legacy SDK compatibility — prefer ``collection.get_document(doc_id)``.""" |
| 228 | + return self._require_cloud_api().get_document(doc_id=doc_id) |
| 229 | + |
| 230 | + @_legacy_sdk |
| 231 | + def delete_document(self, doc_id: str) -> dict[str, Any]: |
| 232 | + """Legacy SDK compatibility — prefer ``collection.delete_document(doc_id)``.""" |
| 233 | + return self._require_cloud_api().delete_document(doc_id=doc_id) |
| 234 | + |
| 235 | + @_legacy_sdk |
| 236 | + def list_documents( |
| 237 | + self, |
| 238 | + limit: int = 50, |
| 239 | + offset: int = 0, |
| 240 | + folder_id: str | None = None, |
| 241 | + ) -> dict[str, Any]: |
| 242 | + """Legacy SDK compatibility — prefer ``collection.list_documents()``.""" |
| 243 | + return self._require_cloud_api().list_documents( |
| 244 | + limit=limit, |
| 245 | + offset=offset, |
| 246 | + folder_id=folder_id, |
| 247 | + ) |
| 248 | + |
| 249 | + @_legacy_sdk |
| 250 | + def create_folder( |
| 251 | + self, |
| 252 | + name: str, |
| 253 | + description: str | None = None, |
| 254 | + parent_folder_id: str | None = None, |
| 255 | + ) -> dict[str, Any]: |
| 256 | + """Legacy SDK compatibility — prefer ``client.collection(name)`` (auto-creates).""" |
| 257 | + return self._require_cloud_api().create_folder( |
| 258 | + name=name, |
| 259 | + description=description, |
| 260 | + parent_folder_id=parent_folder_id, |
| 261 | + ) |
| 262 | + |
| 263 | + @_legacy_sdk |
| 264 | + def list_folders(self, parent_folder_id: str | None = None) -> dict[str, Any]: |
| 265 | + """Legacy SDK compatibility — prefer ``client.list_collections()``.""" |
| 266 | + return self._require_cloud_api().list_folders(parent_folder_id=parent_folder_id) |
| 267 | + |
126 | 268 |
|
127 | 269 | class LocalClient(PageIndexClient): |
128 | 270 | """Local mode — indexes and queries documents on your machine. |
|
0 commit comments