Skip to content

Commit 24df01e

Browse files
Fruank4claude
andcommitted
fix(rag): fix document ID compatibility with Python and respect defaultConfig limit
- Document.generateDocumentId: use getContentText() instead of getContent() so the JSON key contains a plain string matching the Python implementation's _map_text_to_uuid, ensuring Java and Python generate identical UUIDs for the same document content when sharing a vector store. - KnowledgeRetrievalTools.retrieveKnowledge: fall back to defaultConfig.getLimit() instead of hardcoded 5 when the LLM omits the limit parameter, so the limit configured at construction time is honoured. Add tests: DocumentTest.testDocumentIdUsesTextNotContentBlockObject and KnowledgeRetrievalToolsTest covering both limit fallback and explicit override. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 762672e commit 24df01e

4 files changed

Lines changed: 182 additions & 2 deletions

File tree

agentscope-core/src/main/java/io/agentscope/core/rag/KnowledgeRetrievalTools.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ public String retrieveKnowledge(
133133

134134
// Set default value
135135
if (limit == null) {
136-
limit = 5;
136+
limit = defaultConfig.getLimit();
137137
}
138138

139139
// Extract conversation history from agent if available

agentscope-core/src/main/java/io/agentscope/core/rag/model/Document.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ private static String generateDocumentId(DocumentMetadata metadata) {
214214
Map<String, Object> keyMap = new LinkedHashMap<>();
215215
keyMap.put("doc_id", metadata.getDocId());
216216
keyMap.put("chunk_id", metadata.getChunkId());
217-
keyMap.put("content", metadata.getContent());
217+
keyMap.put("content", metadata.getContentText());
218218

219219
// Serialize to JSON (ensure_ascii=False in Python, so we use default UTF-8)
220220
String jsonKey = JsonUtils.getJsonCodec().toJson(keyMap);
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
/*
2+
* Copyright 2024-2026 the original author or authors.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package io.agentscope.core.rag;
17+
18+
import static org.junit.jupiter.api.Assertions.assertEquals;
19+
import static org.junit.jupiter.api.Assertions.assertNotNull;
20+
import static org.junit.jupiter.api.Assertions.assertTrue;
21+
22+
import io.agentscope.core.message.TextBlock;
23+
import io.agentscope.core.rag.model.Document;
24+
import io.agentscope.core.rag.model.DocumentMetadata;
25+
import io.agentscope.core.rag.model.RetrieveConfig;
26+
import java.util.List;
27+
import java.util.concurrent.atomic.AtomicReference;
28+
import org.junit.jupiter.api.DisplayName;
29+
import org.junit.jupiter.api.Tag;
30+
import org.junit.jupiter.api.Test;
31+
import reactor.core.publisher.Mono;
32+
33+
/**
34+
* Unit tests for KnowledgeRetrievalTools.
35+
*/
36+
@Tag("unit")
37+
@DisplayName("KnowledgeRetrievalTools Unit Tests")
38+
class KnowledgeRetrievalToolsTest {
39+
40+
private static Document makeDoc(String text) {
41+
return new Document(
42+
new DocumentMetadata(TextBlock.builder().text(text).build(), "doc-1", "0"));
43+
}
44+
45+
@Test
46+
@DisplayName("When LLM omits limit, defaultConfig.getLimit() should be used")
47+
void testNullLimitFallsBackToDefaultConfig() {
48+
AtomicReference<RetrieveConfig> capturedConfig = new AtomicReference<>();
49+
50+
Knowledge knowledge =
51+
new Knowledge() {
52+
@Override
53+
public Mono<Void> addDocuments(List<Document> documents) {
54+
return Mono.empty();
55+
}
56+
57+
@Override
58+
public Mono<List<Document>> retrieve(String query, RetrieveConfig config) {
59+
capturedConfig.set(config);
60+
return Mono.just(List.of());
61+
}
62+
};
63+
64+
RetrieveConfig defaultConfig = RetrieveConfig.builder().limit(8).build();
65+
KnowledgeRetrievalTools tools = new KnowledgeRetrievalTools(knowledge, defaultConfig);
66+
67+
// Simulate LLM not providing limit (null)
68+
tools.retrieveKnowledge("test query", null, null);
69+
70+
assertNotNull(capturedConfig.get());
71+
assertEquals(
72+
8,
73+
capturedConfig.get().getLimit(),
74+
"When limit is null, defaultConfig.getLimit() should be used, not hardcoded 5");
75+
}
76+
77+
@Test
78+
@DisplayName("When LLM provides explicit limit, it should override defaultConfig")
79+
void testExplicitLimitOverridesDefault() {
80+
AtomicReference<RetrieveConfig> capturedConfig = new AtomicReference<>();
81+
82+
Knowledge knowledge =
83+
new Knowledge() {
84+
@Override
85+
public Mono<Void> addDocuments(List<Document> documents) {
86+
return Mono.empty();
87+
}
88+
89+
@Override
90+
public Mono<List<Document>> retrieve(String query, RetrieveConfig config) {
91+
capturedConfig.set(config);
92+
return Mono.just(List.of());
93+
}
94+
};
95+
96+
RetrieveConfig defaultConfig = RetrieveConfig.builder().limit(8).build();
97+
KnowledgeRetrievalTools tools = new KnowledgeRetrievalTools(knowledge, defaultConfig);
98+
99+
tools.retrieveKnowledge("test query", 3, null);
100+
101+
assertNotNull(capturedConfig.get());
102+
assertEquals(
103+
3,
104+
capturedConfig.get().getLimit(),
105+
"Explicit limit from LLM should override defaultConfig.getLimit()");
106+
}
107+
108+
@Test
109+
@DisplayName("formatDocumentsForTool should return no-results message for empty list")
110+
void testFormatEmptyResults() {
111+
Knowledge knowledge =
112+
new Knowledge() {
113+
@Override
114+
public Mono<Void> addDocuments(List<Document> docs) {
115+
return Mono.empty();
116+
}
117+
118+
@Override
119+
public Mono<List<Document>> retrieve(String query, RetrieveConfig config) {
120+
return Mono.just(List.of());
121+
}
122+
};
123+
124+
KnowledgeRetrievalTools tools = new KnowledgeRetrievalTools(knowledge);
125+
String result = tools.retrieveKnowledge("query", null, null);
126+
127+
assertTrue(
128+
result.contains("No relevant documents found"),
129+
"Empty results should return no-results message");
130+
}
131+
132+
@Test
133+
@DisplayName("formatDocumentsForTool should include document content and score")
134+
void testFormatDocumentsWithScore() {
135+
Document doc = makeDoc("Important content");
136+
doc.setScore(0.87);
137+
138+
Knowledge knowledge =
139+
new Knowledge() {
140+
@Override
141+
public Mono<Void> addDocuments(List<Document> docs) {
142+
return Mono.empty();
143+
}
144+
145+
@Override
146+
public Mono<List<Document>> retrieve(String query, RetrieveConfig config) {
147+
return Mono.just(List.of(doc));
148+
}
149+
};
150+
151+
KnowledgeRetrievalTools tools = new KnowledgeRetrievalTools(knowledge);
152+
String result = tools.retrieveKnowledge("query", null, null);
153+
154+
assertTrue(result.contains("Important content"));
155+
assertTrue(result.contains("0.870"));
156+
}
157+
}

agentscope-core/src/test/java/io/agentscope/core/rag/model/DocumentTest.java

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,29 @@ void testDocumentIdConsistency() {
6969
assertEquals(doc1.getId(), doc2.getId());
7070
}
7171

72+
@Test
73+
@DisplayName("Should generate ID based on text content, not ContentBlock object structure")
74+
void testDocumentIdUsesTextNotContentBlockObject() {
75+
// Two TextBlock instances with identical text but different object references
76+
// must produce the same document ID, proving the ID is derived from the text
77+
// string rather than the ContentBlock object's serialized form.
78+
TextBlock content1 = TextBlock.builder().text("hello world").build();
79+
TextBlock content2 = TextBlock.builder().text("hello world").build();
80+
DocumentMetadata metadata1 = new DocumentMetadata(content1, "doc-1", "0");
81+
DocumentMetadata metadata2 = new DocumentMetadata(content2, "doc-1", "0");
82+
83+
Document doc1 = new Document(metadata1);
84+
Document doc2 = new Document(metadata2);
85+
86+
assertEquals(doc1.getId(), doc2.getId());
87+
88+
// Also verify that different text produces a different ID
89+
TextBlock differentContent = TextBlock.builder().text("different text").build();
90+
DocumentMetadata metadata3 = new DocumentMetadata(differentContent, "doc-1", "0");
91+
Document doc3 = new Document(metadata3);
92+
assertFalse(doc1.getId().equals(doc3.getId()));
93+
}
94+
7295
@Test
7396
@DisplayName("Should generate different IDs for different content")
7497
void testDocumentIdUniqueness() {

0 commit comments

Comments
 (0)