66import uuid
77from typing import Any , Dict , List , Optional
88
9+ _GLIDE_AVAILABLE = False
910try :
1011 from glide_sync import (
12+ Batch ,
13+ ConnectionError as GlideConnectionError ,
1114 DataType ,
1215 DistanceMetricType ,
1316 Field ,
1720 GlideClient ,
1821 GlideClientConfiguration ,
1922 NodeAddress ,
23+ RequestError ,
2024 ReturnField ,
2125 ServerCredentials ,
2226 TagField ,
2327 TextField ,
28+ TimeoutError as GlideTimeoutError ,
2429 VectorAlgorithm ,
2530 VectorField ,
2631 VectorFieldAttributesFlat ,
2732 VectorFieldAttributesHnsw ,
2833 VectorType ,
2934 ft ,
3035 )
36+
37+ _GLIDE_AVAILABLE = True
3138except ImportError :
32- raise ImportError (
33- "Could not import valkey-glide-sync. "
34- "Please install with `pip install valkey-glide-sync`."
35- )
39+ pass
3640
37- from application .core .settings import settings
38- from application .vectorstore .base import BaseVectorStore
39- from application .vectorstore .document_class import Document
41+ from application .core .settings import settings # noqa: E402
42+ from application .vectorstore .base import BaseVectorStore # noqa: E402
43+ from application .vectorstore .document_class import Document # noqa: E402
4044
4145logger = logging .getLogger (__name__ )
4246
4347# Characters that must be escaped in Valkey tag field query values.
44- _TAG_SPECIAL_CHARS = set (r".,<>{}[]\"':;!@#$%^&*()-+=~ /|" )
48+ # Includes '?' which is a single-character wildcard in valkey-search TAG queries.
49+ _TAG_SPECIAL_CHARS = set (r".,<>{}[]\"':;!@#$%^&*()-+=~ /|?" )
4550
4651# Batch size for DELETE operations in delete_index.
4752_DELETE_BATCH_SIZE = 100
@@ -70,7 +75,15 @@ def __init__(
7075 source_id: Identifier for the document source, used to
7176 namespace and filter documents.
7277 embeddings_key: Key name or API key for the embeddings provider.
78+
79+ Raises:
80+ ImportError: If valkey-glide-sync is not installed.
7381 """
82+ if not _GLIDE_AVAILABLE :
83+ raise ImportError (
84+ "Could not import valkey-glide-sync. "
85+ "Please install with `pip install valkey-glide-sync`."
86+ )
7487 super ().__init__ ()
7588 self ._source_id = str (source_id ).replace ("application/indexes/" , "" ).rstrip ("/" )
7689 self ._embedding = self ._get_embeddings (settings .EMBEDDINGS_NAME , embeddings_key )
@@ -127,7 +140,10 @@ def _ensure_index_exists(self):
127140 Uses VALKEY_DISTANCE_METRIC, VALKEY_VECTOR_TYPE, and VALKEY_VECTOR_ALGORITHM
128141 from settings. Falls back to cosine/float32/hnsw if values are unrecognized.
129142 """
130- embedding_dim = getattr (self ._embedding , "dimension" , 768 )
143+ embedding_dim = getattr (self ._embedding , "dimension" , None )
144+ if embedding_dim is None :
145+ probe = self ._embedding .embed_query ("dimension probe" )
146+ embedding_dim = len (probe )
131147
132148 distance_metric = self ._resolve_distance_metric (settings .VALKEY_DISTANCE_METRIC )
133149 vector_type = self ._resolve_vector_type (settings .VALKEY_VECTOR_TYPE )
@@ -310,7 +326,7 @@ def search(self, question: str, k: int = 2, *args, **kwargs) -> List[Document]:
310326
311327 return self ._parse_search_results (results )
312328
313- except Exception as e :
329+ except ( RequestError , GlideConnectionError , GlideTimeoutError ) as e :
314330 logger .error (f"Error searching Valkey: { e } " , exc_info = True )
315331 return []
316332
@@ -406,6 +422,8 @@ def add_texts(
406422 metadatas = metadatas or [{}] * len (texts )
407423 doc_ids : List [str ] = []
408424
425+ # Use non-atomic Batch (pipeline) to reduce network round trips.
426+ batch = Batch (False )
409427 for text , embedding , metadata in zip (texts , embeddings , metadatas ):
410428 doc_id = str (uuid .uuid4 ())
411429 key = self ._doc_key (doc_id )
@@ -418,15 +436,17 @@ def add_texts(
418436 "embedding" : vector_bytes ,
419437 }
420438
421- try :
422- self ._client .hset (key , fields )
423- doc_ids .append (doc_id )
424- except Exception as e :
425- logger .error (
426- f"Error adding document to Valkey (wrote { len (doc_ids )} /{ len (texts )} "
427- f"before failure): { e } "
428- )
429- raise
439+ batch .hset (key , fields )
440+ doc_ids .append (doc_id )
441+
442+ try :
443+ self ._client .exec (batch , raise_on_error = True )
444+ except (RequestError , GlideConnectionError , GlideTimeoutError ) as e :
445+ logger .error (
446+ f"Error adding documents to Valkey via pipeline "
447+ f"({ len (doc_ids )} documents): { e } "
448+ )
449+ raise
430450
431451 return doc_ids
432452
@@ -488,7 +508,7 @@ def delete_index(self, *args, **kwargs):
488508 batch = keys [i : i + _DELETE_BATCH_SIZE ]
489509 self ._client .delete (batch )
490510
491- except Exception as e :
511+ except ( RequestError , GlideConnectionError , GlideTimeoutError ) as e :
492512 logger .error (f"Error deleting index from Valkey: { e } " , exc_info = True )
493513
494514 def save_local (self , * args , ** kwargs ):
@@ -550,7 +570,7 @@ def get_chunks(self) -> List[Dict[str, Any]]:
550570
551571 return chunks
552572
553- except Exception as e :
573+ except ( RequestError , GlideConnectionError , GlideTimeoutError ) as e :
554574 logger .error (f"Error getting chunks from Valkey: { e } " , exc_info = True )
555575 return []
556576
@@ -586,7 +606,7 @@ def add_chunk(self, text: str, metadata: Optional[Dict[str, Any]] = None) -> str
586606 try :
587607 self ._client .hset (key , fields )
588608 return doc_id
589- except Exception as e :
609+ except ( RequestError , GlideConnectionError , GlideTimeoutError ) as e :
590610 logger .error (f"Error adding chunk to Valkey: { e } " )
591611 raise
592612
@@ -603,6 +623,6 @@ def delete_chunk(self, chunk_id: str) -> bool:
603623 key = self ._doc_key (chunk_id )
604624 result = self ._client .delete ([key ])
605625 return result > 0
606- except Exception as e :
626+ except ( RequestError , GlideConnectionError , GlideTimeoutError ) as e :
607627 logger .error (f"Error deleting chunk from Valkey: { e } " , exc_info = True )
608628 return False
0 commit comments