@@ -132,6 +132,9 @@ private boolean isValidCollection(String collectionName) {
132132 public VectorDocument autoAddFolderToKnowledgeBase () {
133133 try {
134134 String folderPath = System .getenv ("FOLDER_PATH" );
135+ if (folderPath == null || folderPath .isBlank ()) {
136+ throw new ServiceException (ExceptionEnum .CM329 .getResultCode (), "FOLDER_PATH does not exist: " + folderPath );
137+ }
135138 // 验证文件夹路径
136139 Path folder = Paths .get (folderPath );
137140 if (!Files .exists (folder ) || !Files .isDirectory (folder )) {
@@ -585,26 +588,30 @@ public DeleteResult deleteByFilePath(String filePath, String collectionName) {
585588 */
586589 private List <EmbeddingMatch <TextSegment >> searchBySource (String sourcePath , String collectionName ) {
587590 try {
591+ // 使用更合理的查询文本
592+ String queryText = "document content analysis" ;
593+
588594 EmbeddingSearchRequest searchRequest = EmbeddingSearchRequest .builder ()
589- .queryEmbedding (embeddingModel .embed ("test" ).content ())
590- .maxResults (10000 )
591- .minScore (0.0 )
592- .build ();
595+ .queryEmbedding (embeddingModel .embed (queryText ).content ())
596+ .maxResults (1000 )
597+ .minScore (0.1 )
598+ .build ();
593599
594600 List <EmbeddingMatch <TextSegment >> allMatches = embeddingStore .search (searchRequest ).matches ();
595601
596- // 根据源文件路径和集合名称过滤
602+ // 在应用层过滤
597603 return allMatches .stream ()
598- .filter (match -> {
599- String source = match .embedded ().metadata ().getString ("source" );
600- String collection = match .embedded ().metadata ().getString ("collection" );
604+ .filter (match -> {
605+ String source = match .embedded ().metadata ().getString ("source" );
606+ String collection = match .embedded ().metadata ().getString ("collection" );
601607
602- boolean sourceMatch = source != null && source .equals (sourcePath );
603- boolean collectionMatch = collectionName == null ||
604- (collection != null && collection .equals (collectionName ));
608+ boolean sourceMatch = source != null && source .equals (sourcePath );
609+ boolean collectionMatch = collectionName == null ||
610+ (collection != null && collection .equals (collectionName ));
605611
606- return sourceMatch && collectionMatch ;
607- }).collect (Collectors .toList ());
612+ return sourceMatch && collectionMatch ;
613+ })
614+ .collect (Collectors .toList ());
608615
609616 } catch (Exception e ) {
610617 log .error ("Failed to search vectors by source: {} in collection: {}" , sourcePath , collectionName , e );
0 commit comments