2121from common .utils .common import get_file_content
2222from common .utils .ts_vecto_util import to_ts_vector , to_query
2323from knowledge .models import Embedding , SearchMode , SourceType
24- from knowledge .vector .base_vector import BaseVectorStore
24+ from knowledge .vector .base_vector import BaseVectorStore , normalize_for_embedding
2525from maxkb .conf import PROJECT_DIR
2626
2727
@@ -46,6 +46,7 @@ def _save(self, text, source_type: SourceType, knowledge_id: str, document_id: s
4646 source_id : str ,
4747 is_active : bool ,
4848 embedding : Embeddings ):
49+ text = normalize_for_embedding (text )
4950 text_embedding = [float (x ) for x in embedding .embed_query (text )]
5051 embedding = Embedding (
5152 id = uuid .uuid7 (),
@@ -62,7 +63,7 @@ def _save(self, text, source_type: SourceType, knowledge_id: str, document_id: s
6263 return True
6364
6465 def _batch_save (self , text_list : List [Dict ], embedding : Embeddings , is_the_task_interrupted ):
65- texts = [row .get ('text' ) for row in text_list ]
66+ texts = [normalize_for_embedding ( row .get ('text' ) ) for row in text_list ]
6667 embeddings = embedding .embed_documents (texts )
6768 embedding_list = [
6869 Embedding (
@@ -87,6 +88,7 @@ def hit_test(self, query_text, knowledge_id_list: list[str], exclude_document_id
8788 if knowledge_id_list is None or len (knowledge_id_list ) == 0 :
8889 return []
8990 exclude_dict = {}
91+ query_text = normalize_for_embedding (query_text )
9092 embedding_query = embedding .embed_query (query_text )
9193 query_set = QuerySet (Embedding ).filter (knowledge_id__in = knowledge_id_list , is_active = True )
9294 if exclude_document_id_list is not None and len (exclude_document_id_list ) > 0 :
0 commit comments