-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathdatabase.py
More file actions
103 lines (88 loc) · 3.31 KB
/
database.py
File metadata and controls
103 lines (88 loc) · 3.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import importlib
import importlib.resources
import sqlite3
from .settings import Settings
class Database:
"""Database initialization and schema management for SQLiteRag."""
@staticmethod
def new_connection(db_path: str = "./sqliterag.sqlite") -> sqlite3.Connection:
"""Create a new SQLite connection to the specified database path."""
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
return conn
@staticmethod
def initialize(conn: sqlite3.Connection, settings: Settings) -> sqlite3.Connection:
"""Initialize the database with extensions and schema"""
conn.enable_load_extension(True)
try:
conn.load_extension(
str(
importlib.resources.files(
"sqliteai.binaries." + ("gpu" if settings.use_gpu else "cpu")
)
/ "ai"
)
)
conn.load_extension(
str(importlib.resources.files("sqlite_vector.binaries") / "vector")
)
except sqlite3.OperationalError as e:
raise RuntimeError(
"Failed to load extensions: "
+ str(e)
+ """\n
Install via pip:
pip install sqlite-ai sqliteai-vector
See more:
sqlite-ai: https://github.com/sqliteai/sqlite-ai/releases
sqlite-vector: https://github.com/sqliteai/sqlite-vector/releases
"""
) from e
conn.enable_load_extension(False)
try:
# Check if extensions are available
conn.execute("SELECT vector_version()")
conn.execute("SELECT ai_version()")
except sqlite3.OperationalError:
raise RuntimeError("Extensions are not loaded correctly.")
Database._create_schema(conn, settings)
return conn
@staticmethod
def _create_schema(conn: sqlite3.Connection, settings: Settings):
cursor = conn.cursor()
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS documents (
id TEXT PRIMARY KEY,
hash TEXT NOT NULL UNIQUE,
uri TEXT,
content TEXT,
metadata TEXT DEFAULT '{}',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
"""
)
# TODO: this table is not ready for sqlite-sync, it uses the id AUTOINCREMENT
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS chunks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
document_id TEXT,
content TEXT,
embedding BLOB,
FOREIGN KEY (document_id) REFERENCES documents (id) ON DELETE CASCADE
);
"""
)
cursor.execute(
"""
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(content, content='chunks', content_rowid='id');
"""
)
cursor.execute(
f"""
SELECT vector_init('chunks', 'embedding', 'type={settings.vector_type},dimension={settings.embedding_dim},{settings.other_vector_options}');
"""
)
conn.commit()