Skip to content

Commit 595895c

Browse files
saccharin98XinyanZhousaccharin98KylinMountain
authored
feat:compatible with Pageindex SDK (#238)
* feat:compatible with Pageindex SDK * corner cases fixed * fix: mock behavior of old SDK * fix: close streaming response and warn on empty api_key - LegacyCloudAPI: close response in `finally` for both _stream_chat_response variants so abandoned iterators no longer leak the TCP connection. - PageIndexClient: emit a warning instead of silently falling back to local when api_key is the empty string, surfacing typical env-var-unset misconfig. - FakeResponse: add close()/closed to match the real requests.Response API. - Add unit coverage for stream close (both paths) and the empty-api_key warning. - Add scripts/e2e_legacy_sdk.py to smoke-test the legacy SDK contract end-to-end against api.pageindex.ai. * chore: mark legacy SDK methods with @deprecated and docstring pointers - Decorate the 12 PageIndexClient cloud-SDK compat methods with @typing_extensions.deprecated(..., category=PendingDeprecationWarning): - IDE/type-checkers render them with a strikethrough hint - runtime warnings stay silent by default (no spam for existing callers), surfaceable via `python -W default::PendingDeprecationWarning` - Add a one-line docstring on each pointing to the Collection-based equivalent. - Promote typing-extensions to a direct dependency (was transitive via litellm). --------- Co-authored-by: XinyanZhou <xinyanzhou@XinyanZhoudeMacBook-Pro.local> Co-authored-by: saccharin98 <xinyanzhou938@gmail.com> Co-authored-by: mountain <kose2livs@gmail.com>
1 parent 6d29886 commit 595895c

10 files changed

Lines changed: 1030 additions & 20 deletions

File tree

pageindex/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from .events import QueryEvent
1414
from .errors import (
1515
PageIndexError,
16+
PageIndexAPIError,
1617
CollectionNotFoundError,
1718
DocumentNotFoundError,
1819
IndexingError,
@@ -32,6 +33,7 @@
3233
"StorageEngine",
3334
"QueryEvent",
3435
"PageIndexError",
36+
"PageIndexAPIError",
3537
"CollectionNotFoundError",
3638
"DocumentNotFoundError",
3739
"IndexingError",

pageindex/client.py

Lines changed: 144 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,21 @@
11
# pageindex/client.py
22
from __future__ import annotations
33
from pathlib import Path
4+
from typing import Any, Iterator
5+
6+
from typing_extensions import deprecated
7+
48
from .collection import Collection
59
from .config import IndexConfig
10+
from .errors import PageIndexAPIError
611
from .parser.protocol import DocumentParser
712

13+
_LEGACY_SDK_MSG = (
14+
"Legacy compatibility — new code should prefer the Collection-based API "
15+
"(PageIndexClient.collection(...))."
16+
)
17+
_legacy_sdk = deprecated(_LEGACY_SDK_MSG, category=PendingDeprecationWarning)
18+
819

920
def _normalize_retrieve_model(model: str) -> str:
1021
"""Preserve supported Agents SDK prefixes and route other provider paths via LiteLLM."""
@@ -39,21 +50,34 @@ class PageIndexClient:
3950
# Or use LocalClient / CloudClient for explicit mode selection
4051
"""
4152

42-
def __init__(self, api_key: str = None, model: str = None,
53+
BASE_URL = "https://api.pageindex.ai"
54+
55+
def __init__(self, api_key: str | None = None, model: str = None,
4356
retrieve_model: str = None, storage_path: str = None,
4457
storage=None, index_config: IndexConfig | dict = None):
45-
if api_key:
58+
if api_key == "":
59+
import logging
60+
logging.getLogger(__name__).warning(
61+
"PageIndexClient received an empty api_key; falling back to local mode. "
62+
"Pass api_key=None to silence this warning, or provide a real key for cloud mode."
63+
)
64+
api_key = None
65+
if api_key is not None:
4666
self._init_cloud(api_key)
4767
else:
4868
self._init_local(model, retrieve_model, storage_path, storage, index_config)
4969

5070
def _init_cloud(self, api_key: str):
5171
from .backend.cloud import CloudBackend
72+
from .cloud_api import LegacyCloudAPI
5273
self._backend = CloudBackend(api_key=api_key)
74+
self._legacy_cloud_api = LegacyCloudAPI(api_key=api_key, base_url=self.BASE_URL)
5375

5476
def _init_local(self, model: str = None, retrieve_model: str = None,
5577
storage_path: str = None, storage=None,
5678
index_config: IndexConfig | dict = None):
79+
self._legacy_cloud_api = None
80+
5781
# Build IndexConfig: merge model/retrieve_model with index_config
5882
overrides = {}
5983
if model:
@@ -123,6 +147,124 @@ def register_parser(self, parser: DocumentParser) -> None:
123147
raise PageIndexError("Custom parsers are not supported in cloud mode")
124148
self._backend.register_parser(parser)
125149

150+
def _require_cloud_api(self):
151+
if self._legacy_cloud_api is None:
152+
from .errors import PageIndexAPIError
153+
raise PageIndexAPIError(
154+
"This method is part of the pageindex 0.2.x cloud SDK API. "
155+
"Initialize with api_key to use it."
156+
)
157+
return self._legacy_cloud_api
158+
159+
# ── pageindex 0.2.x cloud SDK compatibility (prefer Collection API for new code) ──
160+
@_legacy_sdk
161+
def submit_document(
162+
self,
163+
file_path: str,
164+
mode: str | None = None,
165+
beta_headers: list[str] | None = None,
166+
folder_id: str | None = None,
167+
) -> dict[str, Any]:
168+
"""Legacy SDK compatibility — prefer ``client.collection(...).add(path)``."""
169+
return self._require_cloud_api().submit_document(
170+
file_path=file_path,
171+
mode=mode,
172+
beta_headers=beta_headers,
173+
folder_id=folder_id,
174+
)
175+
176+
@_legacy_sdk
177+
def get_ocr(self, doc_id: str, format: str = "page") -> dict[str, Any]:
178+
"""Legacy SDK compatibility — prefer ``collection.get_page_content(doc_id, pages)``."""
179+
return self._require_cloud_api().get_ocr(doc_id=doc_id, format=format)
180+
181+
@_legacy_sdk
182+
def get_tree(self, doc_id: str, node_summary: bool = False) -> dict[str, Any]:
183+
"""Legacy SDK compatibility — prefer ``collection.get_document_structure(doc_id)``."""
184+
return self._require_cloud_api().get_tree(doc_id=doc_id, node_summary=node_summary)
185+
186+
@_legacy_sdk
187+
def is_retrieval_ready(self, doc_id: str) -> bool:
188+
"""Legacy SDK compatibility — Collection API handles readiness internally."""
189+
return self._require_cloud_api().is_retrieval_ready(doc_id=doc_id)
190+
191+
@_legacy_sdk
192+
def submit_query(self, doc_id: str, query: str, thinking: bool = False) -> dict[str, Any]:
193+
"""Legacy SDK compatibility — prefer ``collection.query(question, doc_ids=[doc_id])``."""
194+
return self._require_cloud_api().submit_query(
195+
doc_id=doc_id,
196+
query=query,
197+
thinking=thinking,
198+
)
199+
200+
@_legacy_sdk
201+
def get_retrieval(self, retrieval_id: str) -> dict[str, Any]:
202+
"""Legacy SDK compatibility — Collection API returns answers synchronously."""
203+
return self._require_cloud_api().get_retrieval(retrieval_id=retrieval_id)
204+
205+
@_legacy_sdk
206+
def chat_completions(
207+
self,
208+
messages: list[dict[str, str]],
209+
stream: bool = False,
210+
doc_id: str | list[str] | None = None,
211+
temperature: float | None = None,
212+
stream_metadata: bool = False,
213+
enable_citations: bool = False,
214+
) -> dict[str, Any] | Iterator[str] | Iterator[dict[str, Any]]:
215+
"""Legacy SDK compatibility — prefer ``collection.query(...)``."""
216+
return self._require_cloud_api().chat_completions(
217+
messages=messages,
218+
stream=stream,
219+
doc_id=doc_id,
220+
temperature=temperature,
221+
stream_metadata=stream_metadata,
222+
enable_citations=enable_citations,
223+
)
224+
225+
@_legacy_sdk
226+
def get_document(self, doc_id: str) -> dict[str, Any]:
227+
"""Legacy SDK compatibility — prefer ``collection.get_document(doc_id)``."""
228+
return self._require_cloud_api().get_document(doc_id=doc_id)
229+
230+
@_legacy_sdk
231+
def delete_document(self, doc_id: str) -> dict[str, Any]:
232+
"""Legacy SDK compatibility — prefer ``collection.delete_document(doc_id)``."""
233+
return self._require_cloud_api().delete_document(doc_id=doc_id)
234+
235+
@_legacy_sdk
236+
def list_documents(
237+
self,
238+
limit: int = 50,
239+
offset: int = 0,
240+
folder_id: str | None = None,
241+
) -> dict[str, Any]:
242+
"""Legacy SDK compatibility — prefer ``collection.list_documents()``."""
243+
return self._require_cloud_api().list_documents(
244+
limit=limit,
245+
offset=offset,
246+
folder_id=folder_id,
247+
)
248+
249+
@_legacy_sdk
250+
def create_folder(
251+
self,
252+
name: str,
253+
description: str | None = None,
254+
parent_folder_id: str | None = None,
255+
) -> dict[str, Any]:
256+
"""Legacy SDK compatibility — prefer ``client.collection(name)`` (auto-creates)."""
257+
return self._require_cloud_api().create_folder(
258+
name=name,
259+
description=description,
260+
parent_folder_id=parent_folder_id,
261+
)
262+
263+
@_legacy_sdk
264+
def list_folders(self, parent_folder_id: str | None = None) -> dict[str, Any]:
265+
"""Legacy SDK compatibility — prefer ``client.list_collections()``."""
266+
return self._require_cloud_api().list_folders(parent_folder_id=parent_folder_id)
267+
126268

127269
class LocalClient(PageIndexClient):
128270
"""Local mode — indexes and queries documents on your machine.

0 commit comments

Comments
 (0)