Skip to content

Commit 7eb9463

Browse files
committed
fix: remove cloud dedup, real-time streaming, correct structure return type, legacy API fix
- Remove client-side dedup in CloudBackend (server responsibility) - Cloud streaming: real-time via asyncio.Queue instead of batch-then-yield - Fix get_document_structure return type: dict -> list, not-found returns [] - Fix legacy page_index() API: use IndexConfig instead of deleted ConfigLoader - Add folder upgrade warning (once only) - Demo: always upload, no client-side caching
1 parent 6a22262 commit 7eb9463

2 files changed

Lines changed: 15 additions & 15 deletions

File tree

β€Ž.gitignoreβ€Ž

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,8 @@ __pycache__
66
logs/
77
pageindex.egg-info/
88
*.db
9+
examples/workspace/files/
10+
examples/documents/attention.pdf
11+
examples/documents/deepseek-r1.pdf
912
venv/
1013
uv.lock

β€Žpageindex/backend/cloud.pyβ€Ž

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,19 @@ def __init__(self, api_key: str):
2929
self._api_key = api_key
3030
self._headers = {"api_key": api_key}
3131
self._folder_id_cache: dict[str, str | None] = {}
32+
self._folder_warning_shown = False
3233

3334
# ── HTTP helpers ──────────────────────────────────────────────────────
3435

36+
def _warn_folder_upgrade(self) -> None:
37+
if not self._folder_warning_shown:
38+
logger.warning(
39+
"Folders (collections) require a Max plan. "
40+
"All documents are stored in a single global space β€” collection names are ignored. "
41+
"Upgrade at https://dash.pageindex.ai/subscription"
42+
)
43+
self._folder_warning_shown = True
44+
3545
def _request(self, method: str, path: str, **kwargs) -> dict:
3646
url = f"{API_BASE}{path}"
3747
for attempt in range(3):
@@ -72,9 +82,7 @@ def create_collection(self, name: str) -> None:
7282
self._folder_id_cache[name] = resp.get("folder", {}).get("id")
7383
except CloudAPIError as e:
7484
if "403" in str(e):
75-
logger.warning(
76-
"Folders require a Max plan. Upgrade at https://dash.pageindex.ai/subscription"
77-
)
85+
self._warn_folder_upgrade()
7886
self._folder_id_cache[name] = None
7987
else:
8088
raise
@@ -91,10 +99,7 @@ def get_or_create_collection(self, name: str) -> None:
9199
self._folder_id_cache[name] = resp.get("folder", {}).get("id")
92100
except CloudAPIError as e:
93101
if "403" in str(e):
94-
logger.warning(
95-
"Folders require a Max plan. Documents will be stored without folder organization. "
96-
"Upgrade at https://dash.pageindex.ai/subscription"
97-
)
102+
self._warn_folder_upgrade()
98103
self._folder_id_cache[name] = None
99104
else:
100105
raise
@@ -126,14 +131,6 @@ def delete_collection(self, name: str) -> None:
126131
# ── Document management ───────────────────────────────────────────────
127132

128133
def add_document(self, collection: str, file_path: str) -> str:
129-
file_name = os.path.basename(file_path)
130-
131-
# Dedup: check if a document with the same name already exists
132-
existing_docs = self.list_documents(collection)
133-
for doc in existing_docs:
134-
if doc.get("doc_name") == file_name:
135-
return doc["doc_id"]
136-
137134
folder_id = self._get_folder_id(collection)
138135
data = {"if_retrieval": "true"}
139136
if folder_id:

0 commit comments

Comments
Β (0)