From 5b83c354e295e04d36b7078f2caddd9727947ac2 Mon Sep 17 00:00:00 2001 From: shashwatrai <46484010+shashwatrai@users.noreply.github.com> Date: Mon, 19 May 2025 16:39:12 +0530 Subject: [PATCH 1/3] Update pgvector.py Added support for halfvec, in case the dimension of the embeddings are more than 2000 --- gptcache/manager/vector_data/pgvector.py | 34 ++++++++++++++++++++---- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/gptcache/manager/vector_data/pgvector.py b/gptcache/manager/vector_data/pgvector.py index 55ab0c99..9a149f97 100644 --- a/gptcache/manager/vector_data/pgvector.py +++ b/gptcache/manager/vector_data/pgvector.py @@ -25,10 +25,13 @@ class _VectorType(UserDefinedType): """ cache_ok = True - def __init__(self, precision=8): + def __init__(self, precision=8, use_halfvec=False): self.precision = precision + self.use_halfvec = use_halfvec def get_col_spec(self, **_): + if self.use_halfvec + return f"halfvec({self.precision})" return f"vector({self.precision})" # pylint: disable=unused-argument @@ -40,7 +43,7 @@ def result_processor(self, dialect, coltype): return lambda value: value -def _get_model_and_index(table_prefix, vector_dimension, index_type, lists): +def _get_model_and_index(table_prefix, vector_dimension, index_type, lists, use_halfvec=False): class VectorStoreTable(Base): """ vector store table @@ -49,7 +52,7 @@ class VectorStoreTable(Base): __tablename__ = table_prefix + "_pg_vector_store" __table_args__ = {"extend_existing": True} id = Column(Integer, primary_key=True, autoincrement=False) - embedding = Column(_VectorType(vector_dimension), nullable=False) + embedding = Column(_VectorType(vector_dimension, use_halfvec), nullable=False) vector_store_index = Index( f"idx_{table_prefix}_pg_vector_store_embedding", @@ -76,12 +79,16 @@ class PGVector(VectorBase): :param index_params: the index parameters for pgvector, defaults to 'vector_l2_ops' index: {"index_type": "L2", "params": {"lists": 100, "probes": 10}. :type index_params: dict + :param use_halfvec: whether to use half-precision vector, defaults to False + :type use_halfvec: bool """ INDEX_PARAM = { "L2": {"operator": "<->", "name": "vector_l2_ops"}, # The only one supported now "cosine": {"operator": "<=>", "name": "vector_cosine_ops"}, "inner_product": {"operator": "<->", "name": "vector_ip_ops"}, + "halfvec_l2": {"operator": "<->", "name": "halfvec_l2_ops"}, + "halfvec_cosine": {"operator": "<=>", "name": "halfvec_cosine_ops"}, } def __init__( @@ -91,6 +98,7 @@ def __init__( collection_name: str = "gptcache", dimension: int = 0, top_k: int = 1, + use_halfvec: bool = False; ): if dimension <= 0: raise ValueError( @@ -100,11 +108,18 @@ def __init__( self.top_k = top_k self.index_params = index_params self._url = url + self.use_halfvec = use_halfvec + + #correcting the index type passed by user + if use_halfvec and "halfvec" not in index_params["index_type"]: + index_params["index_type"] = f"halfvec_{index_params['index_type'].lower()" + self._store, self._index = _get_model_and_index( collection_name, dimension, index_type=self.INDEX_PARAM[index_params["index_type"]]["name"], - lists=index_params["params"]["lists"] + lists=index_params["params"]["lists"], + use_halfvec=use_halfvec ) self._connect(url) self._create_collection() @@ -124,11 +139,17 @@ def _query(self, session): return session.query(self._store) def _format_data_for_search(self, data): + if self.use_halfvec: + data = np.array(data,dtype=np.float16) + return f"[{','.join(map(str, data))}]" def mul_add(self, datas: List[VectorData]): data_array, id_array = map(list, zip(*((data.data, data.id) for data in datas))) - np_data = np.array(data_array).astype("float32") + if self.use_halfvec: + np_data = np.array(data_array).astype("float16") + else: + np_data = np.array(data_array).astype("float32") entities = [{"id": id, "embedding": embedding.tolist()} for id, embedding in zip(id_array, np_data)] with self._session() as session: @@ -139,6 +160,9 @@ def search(self, data: np.ndarray, top_k: int = -1): if top_k == -1: top_k = self.top_k + if self.use_halfvec: + data = data.astype(np.float16) + formatted_data = self._format_data_for_search(data.reshape(1, -1)[0].tolist()) index_config = self.INDEX_PARAM[self.index_params["index_type"]] similarity = self._store.embedding.op(index_config["operator"])(formatted_data) From 68bd2ed1694fb20fdc778b1a1f7c37f35d4189ff Mon Sep 17 00:00:00 2001 From: shashwatrai <46484010+shashwatrai@users.noreply.github.com> Date: Mon, 19 May 2025 16:55:11 +0530 Subject: [PATCH 2/3] Update pgvector.py removed redundant conversions --- gptcache/manager/vector_data/pgvector.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gptcache/manager/vector_data/pgvector.py b/gptcache/manager/vector_data/pgvector.py index 9a149f97..f9957be4 100644 --- a/gptcache/manager/vector_data/pgvector.py +++ b/gptcache/manager/vector_data/pgvector.py @@ -112,7 +112,7 @@ def __init__( #correcting the index type passed by user if use_halfvec and "halfvec" not in index_params["index_type"]: - index_params["index_type"] = f"halfvec_{index_params['index_type'].lower()" + index_params["index_type"] = f"halfvec_{index_params['index_type'].lower()}" self._store, self._index = _get_model_and_index( collection_name, @@ -131,7 +131,9 @@ def _connect(self, url): def _create_collection(self): with self._engine.connect() as con: con.execution_options(isolation_level="AUTOCOMMIT").execute(text("CREATE EXTENSION IF NOT EXISTS vector;")) - + if self.use_halfvec: + con.execution_options(isolation_level="AUTOCOMMIT").execute(text("CREATE EXTENSION IF NOT EXISTS halfvec;")) + self._store.__table__.create(bind=self._engine, checkfirst=True) self._index.create(bind=self._engine, checkfirst=True) @@ -139,9 +141,7 @@ def _query(self, session): return session.query(self._store) def _format_data_for_search(self, data): - if self.use_halfvec: - data = np.array(data,dtype=np.float16) - + return f"[{','.join(map(str, data))}]" def mul_add(self, datas: List[VectorData]): From 78103eb1a8b6d59efbe3b3aee5f2b3bdec5a8303 Mon Sep 17 00:00:00 2001 From: shashwatrai <46484010+shashwatrai@users.noreply.github.com> Date: Mon, 19 May 2025 17:48:35 +0530 Subject: [PATCH 3/3] Update pgvector.py removed unwanted code --- gptcache/manager/vector_data/pgvector.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/gptcache/manager/vector_data/pgvector.py b/gptcache/manager/vector_data/pgvector.py index f9957be4..61e7f590 100644 --- a/gptcache/manager/vector_data/pgvector.py +++ b/gptcache/manager/vector_data/pgvector.py @@ -131,8 +131,6 @@ def _connect(self, url): def _create_collection(self): with self._engine.connect() as con: con.execution_options(isolation_level="AUTOCOMMIT").execute(text("CREATE EXTENSION IF NOT EXISTS vector;")) - if self.use_halfvec: - con.execution_options(isolation_level="AUTOCOMMIT").execute(text("CREATE EXTENSION IF NOT EXISTS halfvec;")) self._store.__table__.create(bind=self._engine, checkfirst=True) self._index.create(bind=self._engine, checkfirst=True)