Skip to content

Commit 2d78626

Browse files
hibiki233isourcery-ai[bot]gemini-code-assist[bot]zouyonghe
authored
fix SQLAlchemy compatibility issues on macOS (#7724)
* Stabilize packaged SQLite knowledge base initialization * Apply suggestion from @sourcery-ai[bot] Co-authored-by: sourcery-ai[bot] <58596630+sourcery-ai[bot]@users.noreply.github.com> * Apply suggestion from @gemini-code-assist[bot] Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * fix: updating database URL handling and ensuring unique document IDs * fix: preserve sqlite pragmas with null pool --------- Co-authored-by: sourcery-ai[bot] <58596630+sourcery-ai[bot]@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: 邹永赫 <1259085392@qq.com>
1 parent ff28eca commit 2d78626

5 files changed

Lines changed: 153 additions & 11 deletions

File tree

astrbot/core/db/__init__.py

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,10 @@
55
from dataclasses import dataclass
66

77
from deprecated import deprecated
8+
from sqlalchemy import event
9+
from sqlalchemy.engine import make_url
810
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
11+
from sqlalchemy.pool import NullPool
912

1013
from astrbot.core.db.po import (
1114
ApiKey,
@@ -28,6 +31,19 @@
2831
)
2932

3033

34+
def _configure_sqlite_connection(dbapi_connection, connection_record) -> None:
35+
cursor = dbapi_connection.cursor()
36+
try:
37+
cursor.execute("PRAGMA journal_mode=WAL")
38+
cursor.execute("PRAGMA synchronous=NORMAL")
39+
cursor.execute("PRAGMA cache_size=20000")
40+
cursor.execute("PRAGMA temp_store=MEMORY")
41+
cursor.execute("PRAGMA mmap_size=134217728")
42+
cursor.execute("PRAGMA optimize")
43+
finally:
44+
cursor.close()
45+
46+
3147
@dataclass
3248
class BaseDatabase(abc.ABC):
3349
"""数据库基类"""
@@ -40,14 +56,29 @@ def __init__(self) -> None:
4056
# second write is attempted. Setting timeout=30 tells SQLite to
4157
# wait up to 30 s for the lock, which is enough to ride out brief
4258
# write bursts from concurrent agent/metrics/session operations.
43-
is_sqlite = "sqlite" in self.DATABASE_URL
59+
db_url = make_url(self.DATABASE_URL)
60+
is_sqlite = db_url.get_backend_name() == "sqlite"
4461
connect_args = {"timeout": 30} if is_sqlite else {}
62+
engine_kwargs = {
63+
"echo": False,
64+
"future": True,
65+
"connect_args": connect_args,
66+
}
67+
if is_sqlite:
68+
# Keep SQLite async engines off SQLAlchemy's default async queue
69+
# pool so packaged runtimes don't depend on dialect-specific pool
70+
# event support.
71+
engine_kwargs["poolclass"] = NullPool
4572
self.engine = create_async_engine(
4673
self.DATABASE_URL,
47-
echo=False,
48-
future=True,
49-
connect_args=connect_args,
74+
**engine_kwargs,
5075
)
76+
if is_sqlite:
77+
event.listen(
78+
self.engine.sync_engine,
79+
"connect",
80+
_configure_sqlite_connection,
81+
)
5182
self.AsyncSessionLocal = async_sessionmaker(
5283
self.engine,
5384
class_=AsyncSession,

astrbot/core/db/vec_db/faiss_impl/document_storage.py

Lines changed: 56 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,11 @@
55
from pathlib import Path
66

77
from sqlalchemy import Column, Text, bindparam
8+
from sqlalchemy.dialects import sqlite
89
from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, create_async_engine
910
from sqlalchemy.orm import sessionmaker
11+
from sqlalchemy.pool import NullPool
12+
from sqlalchemy.schema import CreateTable
1013
from sqlmodel import Field, MetaData, SQLModel, col, func, select, text
1114

1215
from astrbot.core import logger
@@ -34,7 +37,7 @@ class Document(BaseDocModel, table=True):
3437
primary_key=True,
3538
sa_column_kwargs={"autoincrement": True},
3639
)
37-
doc_id: str = Field(nullable=False)
40+
doc_id: str = Field(nullable=False, unique=True)
3841
text: str = Field(nullable=False)
3942
metadata_: str | None = Field(default=None, sa_column=Column("metadata", Text))
4043
created_at: datetime | None = Field(default=None)
@@ -60,8 +63,7 @@ async def initialize(self) -> None:
6063
"""Initialize the SQLite database and create the documents table if it doesn't exist."""
6164
await self.connect()
6265
async with self.engine.begin() as conn: # type: ignore
63-
# Create tables using SQLModel
64-
await conn.run_sync(BaseDocModel.metadata.create_all)
66+
await self._ensure_documents_table(conn)
6567

6668
try:
6769
await conn.execute(
@@ -94,6 +96,56 @@ async def initialize(self) -> None:
9496
await self._initialize_fts5(conn)
9597
await conn.commit()
9698

99+
async def _ensure_documents_table(self, executor) -> None:
100+
"""Create the document table from the SQLModel definition."""
101+
result = await executor.execute(
102+
text(
103+
"""
104+
SELECT 1
105+
FROM sqlite_master
106+
WHERE type='table' AND name=:table_name
107+
LIMIT 1
108+
""",
109+
),
110+
{"table_name": Document.__tablename__},
111+
)
112+
if result.scalar_one_or_none() is not None:
113+
await self._ensure_doc_id_unique_index(executor)
114+
return
115+
116+
create_table = CreateTable(Document.__table__, if_not_exists=True) # type: ignore[attr-defined]
117+
118+
await executor.execute(
119+
text(str(create_table.compile(dialect=sqlite.dialect())))
120+
)
121+
await self._ensure_doc_id_unique_index(executor)
122+
123+
async def _ensure_doc_id_unique_index(self, executor) -> None:
124+
duplicate_result = await executor.execute(
125+
text(
126+
"""
127+
SELECT doc_id
128+
FROM documents
129+
GROUP BY doc_id
130+
HAVING COUNT(*) > 1
131+
LIMIT 1
132+
""",
133+
),
134+
)
135+
if duplicate_result.scalar_one_or_none() is not None:
136+
logger.warning(
137+
"Skipping documents.doc_id unique index migration because duplicate "
138+
f"doc_id values already exist in {self.db_path}.",
139+
)
140+
return
141+
142+
await executor.execute(
143+
text(
144+
"CREATE UNIQUE INDEX IF NOT EXISTS "
145+
"idx_documents_doc_id_unique ON documents(doc_id)",
146+
),
147+
)
148+
97149
async def _initialize_fts5(self, executor) -> None:
98150
try:
99151
await self._create_fts5_table(executor, if_not_exists=True)
@@ -197,6 +249,7 @@ async def connect(self) -> None:
197249
self.DATABASE_URL,
198250
echo=False,
199251
future=True,
252+
poolclass=NullPool,
200253
)
201254
self.async_session_maker = sessionmaker(
202255
self.engine, # type: ignore

astrbot/core/knowledge_base/kb_db_sqlite.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
from pathlib import Path
33
from typing import TYPE_CHECKING
44

5-
from sqlalchemy import delete, func, select, text, update
5+
from sqlalchemy import delete, event, func, select, text, update
66
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
7+
from sqlalchemy.pool import NullPool
78
from sqlmodel import col, desc
89

910
from astrbot.core import logger
@@ -19,6 +20,19 @@
1920
from astrbot.core.db.vec_db.faiss_impl import FaissVecDB
2021

2122

23+
def _configure_sqlite_connection(dbapi_connection, connection_record) -> None:
24+
cursor = dbapi_connection.cursor()
25+
try:
26+
cursor.execute("PRAGMA journal_mode=WAL")
27+
cursor.execute("PRAGMA synchronous=NORMAL")
28+
cursor.execute("PRAGMA cache_size=20000")
29+
cursor.execute("PRAGMA temp_store=MEMORY")
30+
cursor.execute("PRAGMA mmap_size=134217728")
31+
cursor.execute("PRAGMA optimize")
32+
finally:
33+
cursor.close()
34+
35+
2236
class KBSQLiteDatabase:
2337
def __init__(self, db_path: str | None = None) -> None:
2438
"""初始化知识库数据库
@@ -40,8 +54,12 @@ def __init__(self, db_path: str | None = None) -> None:
4054
self.engine = create_async_engine(
4155
self.DATABASE_URL,
4256
echo=False,
43-
pool_pre_ping=True,
44-
pool_recycle=3600,
57+
poolclass=NullPool,
58+
)
59+
event.listen(
60+
self.engine.sync_engine,
61+
"connect",
62+
_configure_sqlite_connection,
4563
)
4664

4765
# 创建会话工厂

main.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
get_dashboard_version,
2828
should_use_bundled_dashboard_dist,
2929
)
30+
from astrbot.core.utils.runtime_env import is_packaged_desktop_runtime # noqa: E402
3031

3132
# 将父目录添加到 sys.path
3233
sys.path.append(Path(__file__).parent.as_posix())
@@ -52,7 +53,10 @@ def check_env() -> None:
5253
sys.path.insert(0, astrbot_root)
5354

5455
site_packages_path = get_astrbot_site_packages_path()
55-
if site_packages_path not in sys.path:
56+
if not is_packaged_desktop_runtime() and site_packages_path not in sys.path:
57+
# Packaged desktop runtime keeps shared plugin dependencies out of the
58+
# global import path so bundled core libraries don't mix with user-
59+
# installed wheels from ~/.astrbot/data/site-packages.
5660
sys.path.append(site_packages_path)
5761

5862
os.makedirs(get_astrbot_config_path(), exist_ok=True)

tests/unit/test_document_storage_fts.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import sqlite3
22

33
import pytest
4+
from sqlalchemy.exc import IntegrityError
45

56
from astrbot.core.db.vec_db.faiss_impl.document_storage import DocumentStorage
67

@@ -101,3 +102,38 @@ async def test_document_storage_fts_recovers_from_legacy_non_fts_table(tmp_path)
101102
assert [result["doc_id"] for result in results] == ["legacy-fix"]
102103

103104
await storage.close()
105+
106+
107+
@pytest.mark.asyncio
108+
async def test_document_storage_adds_unique_doc_id_index_to_existing_table(tmp_path):
109+
db_path = tmp_path / "doc.db"
110+
conn = sqlite3.connect(db_path)
111+
conn.execute(
112+
"""
113+
CREATE TABLE documents (
114+
id INTEGER PRIMARY KEY AUTOINCREMENT,
115+
doc_id VARCHAR NOT NULL,
116+
text VARCHAR NOT NULL,
117+
metadata TEXT,
118+
created_at DATETIME,
119+
updated_at DATETIME
120+
)
121+
""",
122+
)
123+
conn.execute(
124+
"INSERT INTO documents (doc_id, text) VALUES ('legacy-chunk', 'legacy text')"
125+
)
126+
conn.commit()
127+
conn.close()
128+
129+
storage = DocumentStorage(str(db_path))
130+
await storage.initialize()
131+
132+
with pytest.raises(IntegrityError):
133+
await storage.insert_document(
134+
doc_id="legacy-chunk",
135+
text="duplicate text",
136+
metadata={},
137+
)
138+
139+
await storage.close()

0 commit comments

Comments
 (0)