Skip to content

Commit 7b04be2

Browse files
authored
feat: Fix some index bugs (#982)
* feat: add UT * feat: fix celery task
1 parent 6b7cf4d commit 7b04be2

18 files changed

Lines changed: 413 additions & 258 deletions

File tree

aperag/api/components/schemas/document.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,9 @@ rebuildIndexesRequest:
103103
items:
104104
type: string
105105
enum:
106-
- vector
107-
- fulltext
108-
- graph
106+
- VECTOR
107+
- FULLTEXT
108+
- GRAPH
109109
description: Types of indexes to rebuild
110110
minItems: 1
111111
required:

aperag/db/models.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,9 @@ class DocumentStatus(str, Enum):
7878
class DocumentIndexType(str, Enum):
7979
"""Document index type enumeration"""
8080

81-
VECTOR = "vector"
82-
FULLTEXT = "fulltext"
83-
GRAPH = "graph"
81+
VECTOR = "VECTOR"
82+
FULLTEXT = "FULLTEXT"
83+
GRAPH = "GRAPH"
8484

8585

8686
class BotStatus(str, Enum):

aperag/index/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
class IndexType(Enum):
2222
"""Index type enumeration"""
2323

24-
VECTOR = "vector"
25-
FULLTEXT = "fulltext"
26-
GRAPH = "graph"
24+
VECTOR = "VECTOR"
25+
FULLTEXT = "FULLTEXT"
26+
GRAPH = "GRAPH"
2727

2828

2929
class IndexStatus(Enum):

aperag/index/manager.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,9 @@ async def rebuild_document_indexes(
137137
doc_index.gmt_updated = utc_now()
138138
logger.info(f"Triggered rebuild for {index_type.value} index of document {document_id}")
139139
else:
140-
logger.warning(f"Cannot rebuild {index_type.value} index for document {document_id}: index not present")
140+
logger.warning(
141+
f"Cannot rebuild {index_type.value} index for document {document_id}: index not present"
142+
)
141143
else:
142144
logger.warning(f"No {index_type.value} index found for document {document_id}")
143145

aperag/migration/versions/20250621002836-2768dfee8bbc.py

Lines changed: 0 additions & 70 deletions
This file was deleted.

aperag/migration/versions/20250623110658-23c0533b6b63.py

Lines changed: 0 additions & 50 deletions
This file was deleted.

aperag/migration/versions/20250617113447-dcc0b6c56552.py renamed to aperag/migration/versions/20250624093005-eb8aa708478f.py

Lines changed: 52 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
"""empty message
22
3-
Revision ID: dcc0b6c56552
3+
Revision ID: eb8aa708478f
44
Revises:
5-
Create Date: 2025-06-17 11:34:47.015293
5+
Create Date: 2025-06-24 09:30:05.267898
66
77
"""
88
from typing import Sequence, Union
@@ -12,7 +12,7 @@
1212
from pgvector.sqlalchemy import Vector
1313

1414
# revision identifiers, used by Alembic.
15-
revision: str = 'dcc0b6c56552'
15+
revision: str = 'eb8aa708478f'
1616
down_revision: Union[str, None] = None
1717
branch_labels: Union[str, Sequence[str], None] = None
1818
depends_on: Union[str, Sequence[str], None] = None
@@ -36,6 +36,36 @@ def upgrade() -> None:
3636
op.create_index(op.f('ix_api_key_gmt_deleted'), 'api_key', ['gmt_deleted'], unique=False)
3737
op.create_index(op.f('ix_api_key_status'), 'api_key', ['status'], unique=False)
3838
op.create_index(op.f('ix_api_key_user'), 'api_key', ['user'], unique=False)
39+
op.create_table('audit_log',
40+
sa.Column('id', sa.String(length=36), nullable=False),
41+
sa.Column('user_id', sa.String(length=36), nullable=True, comment='User ID'),
42+
sa.Column('username', sa.String(length=255), nullable=True, comment='Username'),
43+
sa.Column('resource_type', sa.Enum('collection', 'document', 'bot', 'chat', 'message', 'api_key', 'llm_provider', 'llm_provider_model', 'model_service_provider', 'user', 'config', 'invitation', 'auth', 'chat_completion', 'search', 'llm', 'flow', 'system', name='auditresource'), nullable=True, comment='Resource type'),
44+
sa.Column('resource_id', sa.String(length=255), nullable=True, comment='Resource ID (extracted at query time)'),
45+
sa.Column('api_name', sa.String(length=255), nullable=False, comment='API operation name'),
46+
sa.Column('http_method', sa.String(length=10), nullable=False, comment='HTTP method (POST, PUT, DELETE)'),
47+
sa.Column('path', sa.String(length=512), nullable=False, comment='API path'),
48+
sa.Column('status_code', sa.Integer(), nullable=True, comment='HTTP status code'),
49+
sa.Column('request_data', sa.Text(), nullable=True, comment='Request data (JSON)'),
50+
sa.Column('response_data', sa.Text(), nullable=True, comment='Response data (JSON)'),
51+
sa.Column('error_message', sa.Text(), nullable=True, comment='Error message if failed'),
52+
sa.Column('ip_address', sa.String(length=45), nullable=True, comment='Client IP address'),
53+
sa.Column('user_agent', sa.String(length=500), nullable=True, comment='User agent string'),
54+
sa.Column('request_id', sa.String(length=255), nullable=False, comment='Request ID for tracking'),
55+
sa.Column('start_time', sa.BigInteger(), nullable=False, comment='Request start time (milliseconds since epoch)'),
56+
sa.Column('end_time', sa.BigInteger(), nullable=True, comment='Request end time (milliseconds since epoch)'),
57+
sa.Column('gmt_created', sa.DateTime(timezone=True), nullable=False, comment='Created time'),
58+
sa.PrimaryKeyConstraint('id')
59+
)
60+
op.create_index('idx_audit_api_name', 'audit_log', ['api_name'], unique=False)
61+
op.create_index('idx_audit_gmt_created', 'audit_log', ['gmt_created'], unique=False)
62+
op.create_index('idx_audit_http_method', 'audit_log', ['http_method'], unique=False)
63+
op.create_index('idx_audit_request_id', 'audit_log', ['request_id'], unique=False)
64+
op.create_index('idx_audit_resource_id', 'audit_log', ['resource_id'], unique=False)
65+
op.create_index('idx_audit_resource_type', 'audit_log', ['resource_type'], unique=False)
66+
op.create_index('idx_audit_start_time', 'audit_log', ['start_time'], unique=False)
67+
op.create_index('idx_audit_status_code', 'audit_log', ['status_code'], unique=False)
68+
op.create_index('idx_audit_user_id', 'audit_log', ['user_id'], unique=False)
3969
op.create_table('bot',
4070
sa.Column('id', sa.String(length=24), nullable=False),
4171
sa.Column('user', sa.String(length=256), nullable=False),
@@ -125,7 +155,7 @@ def upgrade() -> None:
125155
op.create_table('document_index',
126156
sa.Column('id', sa.Integer(), nullable=False),
127157
sa.Column('document_id', sa.String(length=24), nullable=False),
128-
sa.Column('index_type', sa.Enum('vector', 'fulltext', 'graph', name='documentindextype'), nullable=False),
158+
sa.Column('index_type', sa.Enum('VECTOR', 'FULLTEXT', 'GRAPH', name='documentindextype'), nullable=False),
129159
sa.Column('desired_state', sa.Enum('present', 'absent', name='indexdesiredstate'), nullable=False),
130160
sa.Column('version', sa.Integer(), nullable=False),
131161
sa.Column('created_by', sa.String(length=256), nullable=False),
@@ -293,7 +323,7 @@ def upgrade() -> None:
293323
op.create_index(op.f('ix_model_service_provider_gmt_deleted'), 'model_service_provider', ['gmt_deleted'], unique=False)
294324
op.create_index(op.f('ix_model_service_provider_name'), 'model_service_provider', ['name'], unique=False)
295325
op.create_index(op.f('ix_model_service_provider_status'), 'model_service_provider', ['status'], unique=False)
296-
op.create_table('searchtesthistory',
326+
op.create_table('searchhistory',
297327
sa.Column('id', sa.String(length=24), nullable=False),
298328
sa.Column('user', sa.String(length=256), nullable=False),
299329
sa.Column('collection_id', sa.String(length=24), nullable=True),
@@ -306,9 +336,9 @@ def upgrade() -> None:
306336
sa.Column('gmt_deleted', sa.DateTime(timezone=True), nullable=True),
307337
sa.PrimaryKeyConstraint('id')
308338
)
309-
op.create_index(op.f('ix_searchtesthistory_collection_id'), 'searchtesthistory', ['collection_id'], unique=False)
310-
op.create_index(op.f('ix_searchtesthistory_gmt_deleted'), 'searchtesthistory', ['gmt_deleted'], unique=False)
311-
op.create_index(op.f('ix_searchtesthistory_user'), 'searchtesthistory', ['user'], unique=False)
339+
op.create_index(op.f('ix_searchhistory_collection_id'), 'searchhistory', ['collection_id'], unique=False)
340+
op.create_index(op.f('ix_searchhistory_gmt_deleted'), 'searchhistory', ['gmt_deleted'], unique=False)
341+
op.create_index(op.f('ix_searchhistory_user'), 'searchhistory', ['user'], unique=False)
312342
op.create_table('user',
313343
sa.Column('id', sa.String(length=24), nullable=False),
314344
sa.Column('username', sa.String(length=256), nullable=False),
@@ -344,10 +374,10 @@ def downgrade() -> None:
344374
# ### commands auto generated by Alembic - please adjust! ###
345375
op.drop_table('user_quota')
346376
op.drop_table('user')
347-
op.drop_index(op.f('ix_searchtesthistory_user'), table_name='searchtesthistory')
348-
op.drop_index(op.f('ix_searchtesthistory_gmt_deleted'), table_name='searchtesthistory')
349-
op.drop_index(op.f('ix_searchtesthistory_collection_id'), table_name='searchtesthistory')
350-
op.drop_table('searchtesthistory')
377+
op.drop_index(op.f('ix_searchhistory_user'), table_name='searchhistory')
378+
op.drop_index(op.f('ix_searchhistory_gmt_deleted'), table_name='searchhistory')
379+
op.drop_index(op.f('ix_searchhistory_collection_id'), table_name='searchhistory')
380+
op.drop_table('searchhistory')
351381
op.drop_index(op.f('ix_model_service_provider_status'), table_name='model_service_provider')
352382
op.drop_index(op.f('ix_model_service_provider_name'), table_name='model_service_provider')
353383
op.drop_index(op.f('ix_model_service_provider_gmt_deleted'), table_name='model_service_provider')
@@ -393,6 +423,16 @@ def downgrade() -> None:
393423
op.drop_index(op.f('ix_bot_status'), table_name='bot')
394424
op.drop_index(op.f('ix_bot_gmt_deleted'), table_name='bot')
395425
op.drop_table('bot')
426+
op.drop_index('idx_audit_user_id', table_name='audit_log')
427+
op.drop_index('idx_audit_status_code', table_name='audit_log')
428+
op.drop_index('idx_audit_start_time', table_name='audit_log')
429+
op.drop_index('idx_audit_resource_type', table_name='audit_log')
430+
op.drop_index('idx_audit_resource_id', table_name='audit_log')
431+
op.drop_index('idx_audit_request_id', table_name='audit_log')
432+
op.drop_index('idx_audit_http_method', table_name='audit_log')
433+
op.drop_index('idx_audit_gmt_created', table_name='audit_log')
434+
op.drop_index('idx_audit_api_name', table_name='audit_log')
435+
op.drop_table('audit_log')
396436
op.drop_index(op.f('ix_api_key_user'), table_name='api_key')
397437
op.drop_index(op.f('ix_api_key_status'), table_name='api_key')
398438
op.drop_index(op.f('ix_api_key_gmt_deleted'), table_name='api_key')

aperag/migration/versions/20250617113448-12ea6d2bf365.py renamed to aperag/migration/versions/20250624093016-dc0829e062eb.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
11
"""Initialize model configurations data
22
3-
Revision ID: 12ea6d2bf365
4-
Revises: 6b5cab1cd8d8
5-
Create Date: 2025-06-11 22:35:16.362747
3+
Revision ID: dc0829e062eb
4+
Revises: eb8aa708478f
5+
Create Date: 2025-06-24 09:30:16.549135
66
77
"""
88
from typing import Sequence, Union
99

1010
from alembic import op
1111
import sqlalchemy as sa
12-
from aperag.migration.utils import execute_sql_file
1312

13+
from aperag.migration.utils import execute_sql_file
1414

1515
# revision identifiers, used by Alembic.
16-
revision: str = '12ea6d2bf365'
17-
down_revision: Union[str, None] = 'dcc0b6c56552'
16+
revision: str = 'dc0829e062eb'
17+
down_revision: Union[str, None] = 'eb8aa708478f'
1818
branch_labels: Union[str, Sequence[str], None] = None
1919
depends_on: Union[str, Sequence[str], None] = None
2020

aperag/schema/view_models.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,20 @@
1+
# Copyright 2025 ApeCloud, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
115
# generated by datamodel-codegen:
216
# filename: openapi.merged.yaml
3-
# timestamp: 2025-06-23T10:33:05+00:00
17+
# timestamp: 2025-06-24T01:35:54+00:00
418

519
from __future__ import annotations
620

@@ -585,7 +599,7 @@ class DocumentUpdate(BaseModel):
585599

586600

587601
class RebuildIndexesRequest(BaseModel):
588-
index_types: list[Literal['vector', 'fulltext', 'graph']] = Field(
602+
index_types: list[Literal['VECTOR', 'FULLTEXT', 'GRAPH']] = Field(
589603
..., description='Types of indexes to rebuild', min_items=1
590604
)
591605

0 commit comments

Comments
 (0)