Skip to content

Commit aa5585c

Browse files
authored
feat: update document index status model and related components (#985)
1 parent d13b7f9 commit aa5585c

25 files changed

Lines changed: 911 additions & 1348 deletions

aperag/api/components/schemas/document.yaml

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,29 +14,34 @@ document:
1414
- FAILED
1515
- DELETING
1616
- DELETED
17-
- WARNING
1817
vector_index_status:
1918
type: string
2019
enum:
2120
- PENDING
22-
- RUNNING
23-
- COMPLETE
21+
- CREATING
22+
- ACTIVE
23+
- DELETING
24+
- DELETION_IN_PROGRESS
2425
- FAILED
2526
- SKIPPED
2627
fulltext_index_status:
2728
type: string
2829
enum:
2930
- PENDING
30-
- RUNNING
31-
- COMPLETE
31+
- CREATING
32+
- ACTIVE
33+
- DELETING
34+
- DELETION_IN_PROGRESS
3235
- FAILED
3336
- SKIPPED
3437
graph_index_status:
3538
type: string
3639
enum:
3740
- PENDING
38-
- RUNNING
39-
- COMPLETE
41+
- CREATING
42+
- ACTIVE
43+
- DELETING
44+
- DELETION_IN_PROGRESS
4045
- FAILED
4146
- SKIPPED
4247
vector_index_updated:
@@ -85,16 +90,6 @@ documentCreate:
8590
collection_id:
8691
type: string
8792

88-
documentUpdate:
89-
type: object
90-
properties:
91-
title:
92-
type: string
93-
description:
94-
type: string
95-
source:
96-
type: string
97-
9893
rebuildIndexesRequest:
9994
type: object
10095
properties:

aperag/api/paths/collections.yaml

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -254,47 +254,6 @@ document:
254254
application/json:
255255
schema:
256256
$ref: '../components/schemas/common.yaml#/failResponse'
257-
put:
258-
summary: Update a document
259-
description: Update a document
260-
security:
261-
- BearerAuth: []
262-
parameters:
263-
- name: collection_id
264-
in: path
265-
required: true
266-
schema:
267-
type: string
268-
- name: document_id
269-
in: path
270-
required: true
271-
schema:
272-
type: string
273-
requestBody:
274-
required: true
275-
content:
276-
application/json:
277-
schema:
278-
$ref: '../components/schemas/document.yaml#/documentUpdate'
279-
responses:
280-
'200':
281-
description: Document updated successfully
282-
content:
283-
application/json:
284-
schema:
285-
$ref: '../components/schemas/document.yaml#/document'
286-
'401':
287-
description: Unauthorized
288-
content:
289-
application/json:
290-
schema:
291-
$ref: '../components/schemas/common.yaml#/failResponse'
292-
'404':
293-
description: Document not found
294-
content:
295-
application/json:
296-
schema:
297-
$ref: '../components/schemas/common.yaml#/failResponse'
298257

299258
rebuild_indexes:
300259
post:

aperag/db/models.py

Lines changed: 24 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,17 @@ class DocumentIndexType(str, Enum):
8383
GRAPH = "GRAPH"
8484

8585

86+
class DocumentIndexStatus(str, Enum):
87+
"""Document index lifecycle status"""
88+
89+
PENDING = "PENDING" # Awaiting processing (create/update)
90+
CREATING = "CREATING" # Task claimed, creation/update in progress
91+
ACTIVE = "ACTIVE" # Index is up-to-date and ready for use
92+
DELETING = "DELETING" # Deletion has been requested
93+
DELETION_IN_PROGRESS = "DELETION_IN_PROGRESS" # Task claimed, deletion in progress
94+
FAILED = "FAILED" # The last operation failed
95+
96+
8697
class BotStatus(str, Enum):
8798
ACTIVE = "ACTIVE"
8899
DELETED = "DELETED"
@@ -159,24 +170,6 @@ class LightRAGDocStatus(str, Enum):
159170
FAILED = "failed"
160171

161172

162-
# Add new enums for K8s-inspired design
163-
class IndexDesiredState(str, Enum):
164-
"""Desired state for index - what we want"""
165-
166-
PRESENT = "present"
167-
ABSENT = "absent"
168-
169-
170-
class IndexActualState(str, Enum):
171-
"""Actual state for index - what currently exists"""
172-
173-
ABSENT = "absent"
174-
CREATING = "creating"
175-
PRESENT = "present"
176-
DELETING = "deleting"
177-
FAILED = "failed"
178-
179-
180173
# Models
181174
class Collection(Base):
182175
__tablename__ = "collection"
@@ -243,13 +236,15 @@ def get_overall_index_status(self, session) -> "DocumentStatus":
243236
if not document_indexes:
244237
return DocumentStatus.PENDING
245238

246-
states = [idx.actual_state for idx in document_indexes]
239+
statuses = [idx.status for idx in document_indexes]
247240

248-
if any(state == IndexActualState.FAILED for state in states):
241+
if any(status == DocumentIndexStatus.FAILED for status in statuses):
249242
return DocumentStatus.FAILED
250-
elif any(state == IndexActualState.CREATING for state in states):
243+
elif any(
244+
status in [DocumentIndexStatus.CREATING, DocumentIndexStatus.DELETION_IN_PROGRESS] for status in statuses
245+
):
251246
return DocumentStatus.RUNNING
252-
elif all(state == IndexActualState.PRESENT for state in states):
247+
elif all(status == DocumentIndexStatus.ACTIVE for status in statuses):
253248
return DocumentStatus.COMPLETE
254249
else:
255250
return DocumentStatus.PENDING
@@ -700,7 +695,7 @@ class LightRAGLLMCacheModel(Base):
700695

701696

702697
class DocumentIndex(Base):
703-
"""Document index - combines spec and status into single table"""
698+
"""Document index - single status model"""
704699

705700
__tablename__ = "document_index"
706701
__table_args__ = (UniqueConstraint("document_id", "index_type", name="uq_document_index"),)
@@ -709,14 +704,11 @@ class DocumentIndex(Base):
709704
document_id = Column(String(24), nullable=False, index=True)
710705
index_type = Column(EnumColumn(DocumentIndexType), nullable=False, index=True)
711706

712-
# Desired state (spec) fields
713-
desired_state = Column(EnumColumn(IndexDesiredState), nullable=False, default=IndexDesiredState.PRESENT, index=True)
707+
status = Column(EnumColumn(DocumentIndexStatus), nullable=False, default=DocumentIndexStatus.PENDING, index=True)
714708
version = Column(Integer, nullable=False, default=1) # Incremented on each spec change
715-
created_by = Column(String(256), nullable=False) # User who created this spec
716-
717-
# Actual state (status) fields
718-
actual_state = Column(EnumColumn(IndexActualState), nullable=False, default=IndexActualState.ABSENT, index=True)
719709
observed_version = Column(Integer, nullable=False, default=0) # Last processed spec version
710+
711+
# Index data and task tracking
720712
index_data = Column(Text, nullable=True) # JSON string for index-specific data
721713
error_message = Column(Text, nullable=True)
722714

@@ -726,25 +718,10 @@ class DocumentIndex(Base):
726718
gmt_last_reconciled = Column(DateTime(timezone=True), nullable=True) # Last reconciliation attempt
727719

728720
def __repr__(self):
729-
return f"<DocumentIndex(id={self.id}, document_id={self.document_id}, type={self.index_type}, desired={self.desired_state}, actual={self.actual_state})>"
730-
731-
def is_in_sync(self) -> bool:
732-
"""Check if desired and actual states are in sync"""
733-
if self.observed_version < self.version:
734-
return False
735-
736-
if self.desired_state == IndexDesiredState.PRESENT:
737-
return self.actual_state == IndexActualState.PRESENT
738-
elif self.desired_state == IndexDesiredState.ABSENT:
739-
return self.actual_state == IndexActualState.ABSENT
740-
return False
721+
return f"<DocumentIndex(id={self.id}, document_id={self.document_id}, type={self.index_type}, status={self.status}, version={self.version})>"
741722

742-
def update_spec(self, desired_state: IndexDesiredState = None, created_by: str = None):
743-
"""Update the spec (desired state) part"""
744-
if desired_state is not None:
745-
self.desired_state = desired_state
746-
if created_by is not None:
747-
self.created_by = created_by
723+
def update_version(self):
724+
"""Update the version to trigger reconciliation"""
748725
self.version += 1
749726
self.gmt_updated = utc_now()
750727

aperag/index/manager.py

Lines changed: 17 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -18,24 +18,23 @@
1818
from sqlalchemy import and_, select
1919
from sqlalchemy.ext.asyncio import AsyncSession
2020

21-
from aperag.db.models import DocumentIndex, DocumentIndexType, IndexActualState, IndexDesiredState, utc_now
21+
from aperag.db.models import DocumentIndex, DocumentIndexStatus, DocumentIndexType, utc_now
2222

2323
logger = logging.getLogger(__name__)
2424

2525

26-
class FrontendIndexManager:
26+
class DocumentIndexManager:
2727
"""Simple manager for document index specs (frontend chain)"""
2828

29-
async def create_document_indexes(
30-
self, session: AsyncSession, document_id: str, user: str, index_types: Optional[List[DocumentIndexType]] = None
29+
async def create_or_update_document_indexes(
30+
self, session: AsyncSession, document_id: str, index_types: Optional[List[DocumentIndexType]] = None
3131
):
3232
"""
33-
Create index specs for a document (called when document is created)
33+
Create or update index records for a document (called when document is created or index isupdated)
3434
3535
Args:
3636
session: Database session
3737
document_id: Document ID
38-
user: User creating the document
3938
index_types: List of index types to create (defaults to all)
4039
"""
4140
if index_types is None:
@@ -50,38 +49,21 @@ async def create_document_indexes(
5049
existing_index = result.scalar_one_or_none()
5150

5251
if existing_index:
53-
# Update existing index
54-
existing_index.update_spec(IndexDesiredState.PRESENT, user)
55-
logger.debug(f"Updated index for {document_id}:{index_type} to version {existing_index.version}")
52+
# Update existing index to pending and increment version
53+
existing_index.status = DocumentIndexStatus.PENDING
54+
existing_index.update_version()
55+
logger.debug(f"Updated index for {document_id}:{index_type.value} to version {existing_index.version}")
5656
else:
5757
# Create new index
5858
doc_index = DocumentIndex(
5959
document_id=document_id,
6060
index_type=index_type,
61-
desired_state=IndexDesiredState.PRESENT,
61+
status=DocumentIndexStatus.PENDING,
6262
version=1,
63-
created_by=user,
63+
observed_version=0,
6464
)
6565
session.add(doc_index)
66-
67-
async def update_document_indexes(self, session: AsyncSession, document_id: str):
68-
"""
69-
Update document indexes (called when document content is updated)
70-
71-
This increments the version of all indexes to trigger reconciliation.
72-
73-
Args:
74-
session: Database session
75-
document_id: Document ID
76-
"""
77-
stmt = select(DocumentIndex).where(DocumentIndex.document_id == document_id)
78-
result = await session.execute(stmt)
79-
indexes = result.scalars().all()
80-
81-
for index in indexes:
82-
if index.desired_state == IndexDesiredState.PRESENT:
83-
index.version += 1 # Increment version to trigger re-indexing
84-
index.gmt_updated = utc_now()
66+
logger.debug(f"Created new index for {document_id}:{index_type.value}")
8567

8668
async def delete_document_indexes(
8769
self, session: AsyncSession, document_id: str, index_types: Optional[List[DocumentIndexType]] = None
@@ -105,92 +87,11 @@ async def delete_document_indexes(
10587
doc_index = result.scalar_one_or_none()
10688

10789
if doc_index:
108-
doc_index.update_spec(IndexDesiredState.ABSENT)
109-
110-
async def rebuild_document_indexes(
111-
self, session: AsyncSession, document_id: str, index_types: List[DocumentIndexType]
112-
):
113-
"""
114-
Rebuild specified document indexes (called when user requests index rebuild)
115-
116-
This increments the version of specified indexes to trigger reconciliation.
117-
118-
Args:
119-
session: Database session
120-
document_id: Document ID
121-
index_types: List of index types to rebuild
122-
"""
123-
if len(set(index_types)) != len(index_types):
124-
raise Exception("Duplicate index types are not allowed")
125-
126-
for index_type in index_types:
127-
stmt = select(DocumentIndex).where(
128-
and_(DocumentIndex.document_id == document_id, DocumentIndex.index_type == index_type)
129-
)
130-
result = await session.execute(stmt)
131-
doc_index = result.scalar_one_or_none()
132-
133-
if doc_index:
134-
# Only rebuild if the index is present or failed
135-
if doc_index.desired_state == IndexDesiredState.PRESENT:
136-
doc_index.version += 1 # Increment version to trigger re-indexing
137-
doc_index.gmt_updated = utc_now()
138-
logger.info(f"Triggered rebuild for {index_type.value} index of document {document_id}")
139-
else:
140-
logger.warning(
141-
f"Cannot rebuild {index_type.value} index for document {document_id}: index not present"
142-
)
143-
else:
144-
logger.warning(f"No {index_type.value} index found for document {document_id}")
145-
146-
async def get_document_index_status(self, session: AsyncSession, document_id: str) -> dict:
147-
"""
148-
Get current index status for a document
149-
150-
Args:
151-
session: Database session
152-
document_id: Document ID
153-
154-
Returns:
155-
Dictionary with index status information
156-
"""
157-
# Get all indexes for the document
158-
stmt = select(DocumentIndex).where(DocumentIndex.document_id == document_id)
159-
result = await session.execute(stmt)
160-
indexes = result.scalars().all()
161-
162-
# Build result
163-
result = {"document_id": document_id, "indexes": {}, "overall_status": "complete"}
164-
165-
has_creating = False
166-
has_failed = False
167-
168-
for index in indexes:
169-
index_info = {
170-
"type": index.index_type,
171-
"desired_state": index.desired_state,
172-
"actual_state": index.actual_state,
173-
"in_sync": index.is_in_sync(),
174-
}
175-
176-
if index.actual_state == IndexActualState.CREATING:
177-
has_creating = True
178-
elif index.actual_state == IndexActualState.FAILED:
179-
has_failed = True
180-
index_info["error"] = index.error_message
181-
182-
result["indexes"][index.index_type] = index_info
183-
184-
# Determine overall status
185-
if has_failed:
186-
result["overall_status"] = "failed"
187-
elif has_creating:
188-
result["overall_status"] = "running"
189-
else:
190-
result["overall_status"] = "complete"
191-
192-
return result
90+
# Mark for deletion
91+
doc_index.status = DocumentIndexStatus.DELETING
92+
doc_index.gmt_updated = utc_now()
93+
logger.debug(f"Marked index {document_id}:{index_type.value} for deletion")
19394

19495

19596
# Global instance
196-
document_index_manager = FrontendIndexManager()
97+
document_index_manager = DocumentIndexManager()

0 commit comments

Comments
 (0)