-
Notifications
You must be signed in to change notification settings - Fork 13
Expand file tree
/
Copy pathdatabase.py
More file actions
125 lines (109 loc) · 3.93 KB
/
database.py
File metadata and controls
125 lines (109 loc) · 3.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import importlib
import importlib.resources
import sqlite3
from .settings import Settings
class Database:
"""Database initialization and schema management for SQLiteRag."""
@staticmethod
def new_connection(db_path: str = "./sqliterag.sqlite") -> sqlite3.Connection:
"""Create a new SQLite connection to the specified database path."""
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
return conn
@staticmethod
def initialize(conn: sqlite3.Connection, settings: Settings) -> sqlite3.Connection:
"""Initialize the database with extensions and schema"""
conn.enable_load_extension(True)
try:
conn.load_extension(
str(
importlib.resources.files(
"sqliteai.binaries." + ("gpu" if settings.use_gpu else "cpu")
)
/ "ai"
)
)
conn.load_extension(
str(importlib.resources.files("sqlite_vector.binaries") / "vector")
)
except sqlite3.OperationalError as e:
raise RuntimeError(
"Failed to load extensions: "
+ str(e)
+ """\n
Install via pip:
pip install sqlite-ai sqliteai-vector
See more:
sqlite-ai: https://github.com/sqliteai/sqlite-ai/releases
sqlite-vector: https://github.com/sqliteai/sqlite-vector/releases
"""
) from e
conn.enable_load_extension(False)
try:
# Check if extensions are available
conn.execute("SELECT vector_version()")
conn.execute("SELECT ai_version()")
except sqlite3.OperationalError:
raise RuntimeError("Extensions are not loaded correctly.")
Database._create_schema(conn, settings)
return conn
@staticmethod
def _create_schema(conn: sqlite3.Connection, settings: Settings):
cursor = conn.cursor()
cursor.execute(
"""
CREATE TABLE IF NOT EXISTS documents (
id TEXT PRIMARY KEY,
hash TEXT NOT NULL UNIQUE,
uri TEXT,
content TEXT,
metadata TEXT DEFAULT '{}',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
"""
)
# TODO: this table is not ready for sqlite-sync, it uses the id AUTOINCREMENT
cursor.executescript(
"""
CREATE TABLE IF NOT EXISTS chunks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
document_id TEXT,
content TEXT,
embedding BLOB
);
CREATE INDEX IF NOT EXISTS idx_chunks_document_id ON chunks (document_id);
"""
)
cursor.executescript(
"""
CREATE TABLE IF NOT EXISTS sentences (
id TEXT PRIMARY KEY,
chunk_id INTEGER,
content TEXT,
embedding BLOB,
start_offset INTEGER,
end_offset INTEGER
);
CREATE INDEX IF NOT EXISTS idx_sentences_chunk_id ON sentences (chunk_id);
"""
)
cursor.execute(
"""
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(content, content='chunks', content_rowid='id');
"""
)
cursor.execute(
"""
SELECT vector_init('chunks', 'embedding', ?);
""",
(settings.get_vector_init_options(),),
)
# TODO: same configuration as chunks (or different options?)
cursor.execute(
"""
SELECT vector_init('sentences', 'embedding', ?);
""",
(settings.get_vector_init_options(),),
)
conn.commit()