Skip to content

Commit bb72bcd

Browse files
Add vectorstore API support (#63)
* Add vectorstore API support
1 parent e851d08 commit bb72bcd

File tree

7 files changed

+263
-2
lines changed

7 files changed

+263
-2
lines changed

.github/workflows/pre-commit.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ jobs:
99
strategy:
1010
matrix:
1111
python-version:
12-
- "3.8"
1312
- "3.9"
1413
- "3.10"
1514
- "3.11"

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ parsimonious
33
PyJWT
44
requests
55
setuptools
6+
singlestore-vectorstore>=0.1.2
67
sqlparams
78
tomli>=1.1.0; python_version < '3.11'
89
typing_extensions<=4.13.2

setup.cfg

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,12 @@ install_requires =
2424
parsimonious
2525
requests
2626
setuptools
27+
singlestore-vectorstore>=0.1.2
2728
sqlparams
2829
wheel
2930
tomli>=1.1.0;python_version < '3.11'
3031
typing-extensions<=4.13.2;python_version < '3.11'
31-
python_requires = >=3.8
32+
python_requires = >=3.9
3233
include_package_data = True
3334
tests_require =
3435
coverage

singlestoredb/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,14 @@
3131
Date, Time, Timestamp, DateFromTicks, TimeFromTicks, TimestampFromTicks,
3232
Binary, STRING, BINARY, NUMBER, DATETIME, ROWID,
3333
)
34+
from .vectorstore import (
35+
vector_db, IndexInterface, IndexList, IndexModel, MatchTypedDict,
36+
Metric, IndexStatsTypedDict, NamespaceStatsTypedDict, Vector,
37+
VectorDictMetadataValue, VectorMetadataTypedDict, VectorTuple,
38+
VectorTupleWithMetadata, DeletionProtection, AndFilter, EqFilter,
39+
ExactMatchFilter, FilterTypedDict, GteFilter, GtFilter, InFilter,
40+
LteFilter, LtFilter, NeFilter, NinFilter, OrFilter, SimpleFilter,
41+
)
3442

3543

3644
#

singlestoredb/connection.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/usr/bin/env python
22
"""SingleStoreDB connections and cursors."""
33
import abc
4+
import functools
45
import inspect
56
import io
67
import queue
@@ -1288,6 +1289,14 @@ def show(self) -> ShowAccessor:
12881289
"""Access server properties managed by the SHOW statement."""
12891290
return ShowAccessor(self)
12901291

1292+
@functools.cached_property
1293+
def vector_db(self) -> Any:
1294+
"""
1295+
Get vectorstore API accessor
1296+
"""
1297+
from vectorstore import VectorDB
1298+
return VectorDB(connection=self)
1299+
12911300

12921301
#
12931302
# NOTE: When adding parameters to this function, you should always
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import os
2+
import unittest
3+
4+
from vectorstore import VectorDB
5+
6+
import singlestoredb as s2
7+
from . import utils
8+
9+
10+
class TestVectorDB(unittest.TestCase):
11+
12+
driver = s2
13+
14+
dbname: str = ''
15+
dbexisted: bool = False
16+
17+
@classmethod
18+
def setUpClass(cls) -> None:
19+
sql_file = os.path.join(os.path.dirname(__file__), 'empty.sql')
20+
cls.dbname, cls.dbexisted = utils.load_sql(sql_file) # type: ignore
21+
22+
@classmethod
23+
def tearDownClass(cls) -> None:
24+
if not cls.dbexisted:
25+
utils.drop_database(cls.dbname) # type: ignore
26+
27+
def test_vectordb_from_params(self) -> None:
28+
db: VectorDB = s2.vector_db(database=type(self).dbname)
29+
index = db.create_index(
30+
name='test_index', dimension=3,
31+
tags={'name': 'test_tag'},
32+
)
33+
assert index.name == 'test_index'
34+
assert index.dimension == 3
35+
assert index.tags == {'name': 'test_tag'}
36+
assert db.has_index('test_index')
37+
38+
def test_vectordb_from_connection(self) -> None:
39+
with s2.connect(database=type(self).dbname) as conn:
40+
db: VectorDB = conn.vector_db
41+
index = db.create_index(
42+
name='test_index_1',
43+
dimension=4, tags={'name': 'test_tag'},
44+
)
45+
assert index.name == 'test_index_1'
46+
assert index.dimension == 4
47+
assert index.tags == {'name': 'test_tag'}
48+
assert db.has_index('test_index_1')
49+
50+
db2: VectorDB = conn.vector_db
51+
assert db2.has_index('test_index_1')

singlestoredb/vectorstore.py

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
from typing import Any
2+
from typing import Callable
3+
from typing import Dict
4+
from typing import Optional
5+
6+
from vectorstore import AndFilter # noqa: F401
7+
from vectorstore import DeletionProtection # noqa: F401
8+
from vectorstore import EqFilter # noqa: F401
9+
from vectorstore import ExactMatchFilter # noqa: F401
10+
from vectorstore import FilterTypedDict # noqa: F401
11+
from vectorstore import GteFilter # noqa: F401
12+
from vectorstore import GtFilter # noqa: F401
13+
from vectorstore import IndexInterface # noqa: F401
14+
from vectorstore import IndexList # noqa: F401
15+
from vectorstore import IndexModel # noqa: F401
16+
from vectorstore import IndexStatsTypedDict # noqa: F401
17+
from vectorstore import InFilter # noqa: F401
18+
from vectorstore import LteFilter # noqa: F401
19+
from vectorstore import LtFilter # noqa: F401
20+
from vectorstore import MatchTypedDict # noqa: F401
21+
from vectorstore import Metric # noqa: F401
22+
from vectorstore import NamespaceStatsTypedDict # noqa: F401
23+
from vectorstore import NeFilter # noqa: F401
24+
from vectorstore import NinFilter # noqa: F401
25+
from vectorstore import OrFilter # noqa: F401
26+
from vectorstore import SimpleFilter # noqa: F401
27+
from vectorstore import Vector # noqa: F401
28+
from vectorstore import VectorDictMetadataValue # noqa: F401
29+
from vectorstore import VectorMetadataTypedDict # noqa: F401
30+
from vectorstore import VectorTuple # noqa: F401
31+
from vectorstore import VectorTupleWithMetadata # noqa: F401
32+
33+
34+
def vector_db(
35+
host: Optional[str] = None, user: Optional[str] = None,
36+
password: Optional[str] = None, port: Optional[int] = None,
37+
database: Optional[str] = None, driver: Optional[str] = None,
38+
pure_python: Optional[bool] = None, local_infile: Optional[bool] = None,
39+
charset: Optional[str] = None,
40+
ssl_key: Optional[str] = None, ssl_cert: Optional[str] = None,
41+
ssl_ca: Optional[str] = None, ssl_disabled: Optional[bool] = None,
42+
ssl_cipher: Optional[str] = None, ssl_verify_cert: Optional[bool] = None,
43+
tls_sni_servername: Optional[str] = None,
44+
ssl_verify_identity: Optional[bool] = None,
45+
conv: Optional[Dict[int, Callable[..., Any]]] = None,
46+
credential_type: Optional[str] = None,
47+
autocommit: Optional[bool] = None,
48+
results_type: Optional[str] = None,
49+
buffered: Optional[bool] = None,
50+
results_format: Optional[str] = None,
51+
program_name: Optional[str] = None,
52+
conn_attrs: Optional[Dict[str, str]] = {},
53+
multi_statements: Optional[bool] = None,
54+
client_found_rows: Optional[bool] = None,
55+
connect_timeout: Optional[int] = None,
56+
nan_as_null: Optional[bool] = None,
57+
inf_as_null: Optional[bool] = None,
58+
encoding_errors: Optional[str] = None,
59+
track_env: Optional[bool] = None,
60+
enable_extended_data_types: Optional[bool] = None,
61+
vector_data_format: Optional[str] = None,
62+
parse_json: Optional[bool] = None,
63+
pool_size: Optional[int] = 5,
64+
max_overflow: Optional[int] = 10,
65+
timeout: Optional[float] = 30,
66+
) -> Any:
67+
"""
68+
Return a vectorstore API connection.
69+
Database should be specified in the URL or as a keyword.
70+
71+
Parameters
72+
----------
73+
host : str, optional
74+
Hostname, IP address, or URL that describes the connection.
75+
The scheme or protocol defines which database connector to use.
76+
By default, the ``mysql`` scheme is used. To connect to the
77+
HTTP API, the scheme can be set to ``http`` or ``https``. The username,
78+
password, host, and port are specified as in a standard URL. The path
79+
indicates the database name. The overall form of the URL is:
80+
``scheme://user:password@host:port/db_name``. The scheme can
81+
typically be left off (unless you are using the HTTP API):
82+
``user:password@host:port/db_name``.
83+
user : str, optional
84+
Database user name
85+
password : str, optional
86+
Database user password
87+
port : int, optional
88+
Database port. This defaults to 3306 for non-HTTP connections, 80
89+
for HTTP connections, and 443 for HTTPS connections.
90+
database : str, optional
91+
Database name.
92+
pure_python : bool, optional
93+
Use the connector in pure Python mode
94+
local_infile : bool, optional
95+
Allow local file uploads
96+
charset : str, optional
97+
Character set for string values
98+
ssl_key : str, optional
99+
File containing SSL key
100+
ssl_cert : str, optional
101+
File containing SSL certificate
102+
ssl_ca : str, optional
103+
File containing SSL certificate authority
104+
ssl_cipher : str, optional
105+
Sets the SSL cipher list
106+
ssl_disabled : bool, optional
107+
Disable SSL usage
108+
ssl_verify_cert : bool, optional
109+
Verify the server's certificate. This is automatically enabled if
110+
``ssl_ca`` is also specified.
111+
ssl_verify_identity : bool, optional
112+
Verify the server's identity
113+
conv : dict[int, Callable], optional
114+
Dictionary of data conversion functions
115+
credential_type : str, optional
116+
Type of authentication to use: auth.PASSWORD, auth.JWT, or auth.BROWSER_SSO
117+
autocommit : bool, optional
118+
Enable autocommits
119+
results_type : str, optional
120+
The form of the query results: tuples, namedtuples, dicts,
121+
numpy, polars, pandas, arrow
122+
buffered : bool, optional
123+
Should the entire query result be buffered in memory? This is the default
124+
behavior which allows full cursor control of the result, but does consume
125+
more memory.
126+
results_format : str, optional
127+
Deprecated. This option has been renamed to results_type.
128+
program_name : str, optional
129+
Name of the program
130+
conn_attrs : dict, optional
131+
Additional connection attributes for telemetry. Example:
132+
{'program_version': "1.0.2", "_connector_name": "dbt connector"}
133+
multi_statements: bool, optional
134+
Should multiple statements be allowed within a single query?
135+
connect_timeout : int, optional
136+
The timeout for connecting to the database in seconds.
137+
(default: 10, min: 1, max: 31536000)
138+
nan_as_null : bool, optional
139+
Should NaN values be treated as NULLs when used in parameter
140+
substitutions including uploaded data?
141+
inf_as_null : bool, optional
142+
Should Inf values be treated as NULLs when used in parameter
143+
substitutions including uploaded data?
144+
encoding_errors : str, optional
145+
The error handler name for value decoding errors
146+
track_env : bool, optional
147+
Should the connection track the SINGLESTOREDB_URL environment variable?
148+
enable_extended_data_types : bool, optional
149+
Should extended data types (BSON, vector) be enabled?
150+
vector_data_format : str, optional
151+
Format for vector types: json or binary
152+
pool_size : int, optional
153+
The number of connections to keep in the connection pool. Default is 5.
154+
max_overflow : int, optional
155+
The maximum number of connections to allow beyond the pool size.
156+
Default is 10.
157+
timeout : float, optional
158+
The timeout for acquiring a connection from the pool in seconds.
159+
Default is 30 seconds.
160+
161+
See Also
162+
--------
163+
:class:`Connection`
164+
165+
Returns
166+
-------
167+
:class:`VectorDB`
168+
169+
"""
170+
from vectorstore import VectorDB
171+
return VectorDB(
172+
host=host, user=user, password=password, port=port,
173+
database=database, driver=driver, pure_python=pure_python,
174+
local_infile=local_infile, charset=charset,
175+
ssl_key=ssl_key, ssl_cert=ssl_cert, ssl_ca=ssl_ca,
176+
ssl_disabled=ssl_disabled, ssl_cipher=ssl_cipher,
177+
ssl_verify_cert=ssl_verify_cert,
178+
tls_sni_servername=tls_sni_servername,
179+
ssl_verify_identity=ssl_verify_identity, conv=conv,
180+
credential_type=credential_type, autocommit=autocommit,
181+
results_type=results_type, buffered=buffered,
182+
results_format=results_format, program_name=program_name,
183+
conn_attrs=conn_attrs, multi_statements=multi_statements,
184+
client_found_rows=client_found_rows,
185+
connect_timeout=connect_timeout, nan_as_null=nan_as_null,
186+
inf_as_null=inf_as_null, encoding_errors=encoding_errors,
187+
track_env=track_env,
188+
enable_extended_data_types=enable_extended_data_types,
189+
vector_data_format=vector_data_format,
190+
parse_json=parse_json, pool_size=pool_size,
191+
max_overflow=max_overflow, timeout=timeout,
192+
)

0 commit comments

Comments
 (0)