Skip to content

Commit b313e8a

Browse files
committed
Address code review comments from @Zabuzard and @tj-wazei
- refactor(XkcdRetriever): rename to XkcdService - doc(ChatGptService): clarify the meaning of RAG and vector store - XkcdService: improve fetchAllXkcdPosts method - XkcdCommand: do not allow more than 40KB of chat messages Signed-off-by: Chris Sdogkos <work@chris-sdogkos.com>
1 parent d4323eb commit b313e8a

File tree

3 files changed

+63
-33
lines changed

3 files changed

+63
-33
lines changed

application/src/main/java/org/togetherjava/tjbot/features/chatgpt/ChatGptService.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,10 @@ public Optional<String> uploadFileIfNotExists(Path filePath, FilePurpose purpose
167167
* Creates a new vector store with the given file ID if none exists or returns the ID of the
168168
* existing vector store with that name.
169169
* <p>
170-
* You can use this for RAG purposes, it is an effective way to give ChatGPT extra information
171-
* from what it has been trained.
170+
* A vector store indexes document content as embeddings for semantic search. You can use this
171+
* for RAG (Retrieval-Augmented Generation), where the model retrieves relevant context from
172+
* your documents before generating responses, effectively giving it access to information
173+
* beyond its training data.
172174
*
173175
* @param fileId The ID of the file to include in the new vector store.
174176
* @return The vector store ID (existing or newly created).

application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdCommand.java

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import net.dv8tion.jda.api.interactions.commands.OptionType;
1212
import net.dv8tion.jda.api.interactions.commands.build.OptionData;
1313
import net.dv8tion.jda.api.interactions.commands.build.SubcommandData;
14+
import org.apache.commons.lang3.IntegerRange;
1415
import org.slf4j.Logger;
1516
import org.slf4j.LoggerFactory;
1617

@@ -37,7 +38,7 @@
3738
* <li>{@code /xkcd custom <id>} - Posts a specific XKCD comic by ID from local cache.</li>
3839
* </ul>
3940
*
40-
* Relies on {@link XkcdRetriever} for local XKCD data and {@link ChatGptService} for AI-powered
41+
* Relies on {@link XkcdService} for local XKCD data and {@link ChatGptService} for AI-powered
4142
* relevance matching via OpenAI's file search tool and vector stores.
4243
*/
4344
public final class XkcdCommand extends SlashCommandAdapter {
@@ -50,21 +51,22 @@ public final class XkcdCommand extends SlashCommandAdapter {
5051
private static final String LAST_MESSAGES_AMOUNT_OPTION_NAME = "amount";
5152
private static final String XKCD_ID_OPTION_NAME = "id";
5253
private static final int MAXIMUM_MESSAGE_HISTORY = 100;
54+
private static final int MESSAGE_HISTORY_CUTOFF_SIZE_KB = 40_000;
5355
private static final String VECTOR_STORE_XKCD = "xkcd-comics";
5456
private static final ChatGptModel CHAT_GPT_MODEL = ChatGptModel.FAST;
5557
private static final Pattern XKCD_POST_PATTERN = Pattern.compile("^\\D*(\\d+)");
5658
private static final String CHATGPT_NO_ID_MESSAGE =
5759
"ChatGPT could not respond with a XKCD post ID.";
5860

5961
private final ChatGptService chatGptService;
60-
private final XkcdRetriever xkcdRetriever;
62+
private final XkcdService xkcdService;
6163

6264
public XkcdCommand(ChatGptService chatGptService) {
6365
super(COMMAND_NAME, "Post a relevant XKCD from the chat or your own",
6466
CommandVisibility.GLOBAL);
6567

6668
this.chatGptService = chatGptService;
67-
this.xkcdRetriever = new XkcdRetriever(chatGptService);
69+
this.xkcdService = new XkcdService(chatGptService);
6870

6971
OptionData lastMessagesAmountOption =
7072
new OptionData(OptionType.INTEGER, LAST_MESSAGES_AMOUNT_OPTION_NAME,
@@ -81,7 +83,7 @@ public XkcdCommand(ChatGptService chatGptService) {
8183
"The XKCD number to post to the chat")
8284
.setMinValue(0)
8385
.setRequired(true)
84-
.setMaxValue(xkcdRetriever.getXkcdPosts().size());
86+
.setMaxValue(xkcdService.getXkcdPosts().size());
8587

8688
SubcommandData customSubcommand = new SubcommandData(SUBCOMMAND_CUSTOM,
8789
"Post your own XKCD regardless of the recent chat messages")
@@ -149,17 +151,18 @@ private void handleCustomXkcd(SlashCommandInteractionEvent event) {
149151

150152
private void sendRelevantXkcdEmbedFromMessages(List<Message> messages,
151153
SlashCommandInteractionEvent event) {
152-
String discordChat = formatDiscordChatHistory(messages);
153-
String xkcdComicsFileId = xkcdRetriever.getXkcdUploadedFileId();
154+
List<Message> discordChatCutoff = cutoffDiscordChatHistory(messages);
155+
String discordChatFormatted = formatDiscordChatHistory(discordChatCutoff);
156+
String xkcdComicsFileId = xkcdService.getXkcdUploadedFileId();
154157
String xkcdVectorStore =
155158
chatGptService.createOrGetVectorStore(xkcdComicsFileId, VECTOR_STORE_XKCD);
156159
FileSearchTool fileSearch =
157160
FileSearchTool.builder().vectorStoreIds(List.of(xkcdVectorStore)).build();
158161

159162
Tool tool = Tool.ofFileSearch(fileSearch);
160163

161-
Optional<String> responseOptional = chatGptService
162-
.sendPrompt(getChatgptRelevantPrompt(discordChat), CHAT_GPT_MODEL, List.of(tool));
164+
Optional<String> responseOptional = chatGptService.sendPrompt(
165+
getChatgptRelevantPrompt(discordChatFormatted), CHAT_GPT_MODEL, List.of(tool));
163166

164167
Optional<Integer> responseIdOptional = getXkcdIdFromMessage(responseOptional.orElseThrow());
165168

@@ -174,11 +177,15 @@ private void sendRelevantXkcdEmbedFromMessages(List<Message> messages,
174177
Optional<MessageEmbed> embedOptional =
175178
constructEmbed(responseId, "Most relevant XKCD according to ChatGPT.");
176179

177-
embedOptional.ifPresent(embed -> event.getHook().sendMessageEmbeds(embed).queue());
180+
embedOptional.ifPresentOrElse(embed -> event.getHook().sendMessageEmbeds(embed).queue(),
181+
() -> event.getHook()
182+
.setEphemeral(true)
183+
.sendMessage("I could not find post with ID " + responseId)
184+
.queue());
178185
}
179186

180187
private Optional<MessageEmbed> constructEmbed(int xkcdId, String footer) {
181-
Optional<XkcdPost> xkcdPostOptional = xkcdRetriever.getXkcdPost(xkcdId);
188+
Optional<XkcdPost> xkcdPostOptional = xkcdService.getXkcdPost(xkcdId);
182189

183190
if (xkcdPostOptional.isEmpty()) {
184191
logger.warn("Could not find XKCD post with ID {} from local map", xkcdId);
@@ -220,6 +227,23 @@ private String formatDiscordChatHistory(List<Message> messages) {
220227
.toString();
221228
}
222229

230+
private List<Message> cutoffDiscordChatHistory(List<Message> messages) {
231+
int cutoffMessageIndex = (int) IntegerRange.of(0, messages.size() - 1)
232+
.toIntStream()
233+
.map(index -> countMessagesLength(messages.subList(0, index)))
234+
.filter(length -> length < MESSAGE_HISTORY_CUTOFF_SIZE_KB)
235+
.count();
236+
237+
return messages.subList(0, cutoffMessageIndex);
238+
}
239+
240+
private int countMessagesLength(List<Message> messages) {
241+
return messages.stream()
242+
.mapToInt(message -> message.getContentRaw().length()
243+
+ message.getAuthor().getName().length())
244+
.sum();
245+
}
246+
223247
private static String getChatgptRelevantPrompt(String discordChat) {
224248
return """
225249
<discord-chat>

application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdRetriever.java renamed to application/src/main/java/org/togetherjava/tjbot/features/xkcd/XkcdService.java

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,14 @@
2020
import java.nio.file.Path;
2121
import java.time.Duration;
2222
import java.util.HashMap;
23+
import java.util.List;
2324
import java.util.Map;
2425
import java.util.Optional;
2526
import java.util.concurrent.CompletableFuture;
27+
import java.util.concurrent.ExecutionException;
2628
import java.util.concurrent.ExecutorService;
2729
import java.util.concurrent.Executors;
30+
import java.util.concurrent.Future;
2831
import java.util.concurrent.Semaphore;
2932

3033
/**
@@ -36,25 +39,23 @@
3639
* Posts are cached locally in {@value #SAVED_XKCD_PATH} as JSON and uploaded to OpenAI using the
3740
* provided {@link ChatGptService} if not already present.
3841
*/
39-
public class XkcdRetriever {
42+
public class XkcdService {
4043

41-
private static final Logger logger = LoggerFactory.getLogger(XkcdRetriever.class);
44+
private static final Logger logger = LoggerFactory.getLogger(XkcdService.class);
4245

4346
private static final HttpClient CLIENT =
4447
HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build();
4548
private static final String XKCD_GET_URL = "https://xkcd.com/%d/info.0.json";
4649
private static final String SAVED_XKCD_PATH = "xkcd.generated.json";
4750
private static final int XKCD_POSTS_AMOUNT = 3201;
48-
private static final int FETCH_XCKD_POSTS_POOL_SIZE = 20;
4951
private static final int FETCH_XKCD_POSTS_SEMAPHORE_SIZE = 10;
50-
private static final int FETCH_XKCD_POSTS_THREAD_SLEEP_MS = 50;
5152
private static final ObjectMapper objectMapper = new ObjectMapper();
5253

5354
private final Map<Integer, XkcdPost> xkcdPosts = new HashMap<>();
5455
private final ChatGptService chatGptService;
5556
private String xkcdUploadedFileId;
5657

57-
public XkcdRetriever(ChatGptService chatGptService) {
58+
public XkcdService(ChatGptService chatGptService) {
5859
this.chatGptService = chatGptService;
5960

6061
Optional<String> xkcdUploadedFileIdOptional =
@@ -97,27 +98,30 @@ public Map<Integer, XkcdPost> getXkcdPosts() {
9798

9899
private void fetchAllXkcdPosts(Path savedXckdsPath) {
99100
objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
100-
Semaphore semaphore = new Semaphore(FETCH_XKCD_POSTS_SEMAPHORE_SIZE);
101101

102102
logger.info("Fetching {} XKCD posts...", XKCD_POSTS_AMOUNT);
103-
try (ExecutorService executor = Executors.newFixedThreadPool(FETCH_XCKD_POSTS_POOL_SIZE)) {
104-
CompletableFuture.allOf(IntegerRange.of(1, XKCD_POSTS_AMOUNT)
103+
try (ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor()) {
104+
Semaphore semaphore = new Semaphore(FETCH_XKCD_POSTS_SEMAPHORE_SIZE);
105+
List<? extends Future<?>> futures = IntegerRange.of(1, XKCD_POSTS_AMOUNT)
105106
.toIntStream()
106107
.filter(id -> id != 404) // XKCD has a joke on comic ID 404 so exclude
107-
.mapToObj(xkcdId -> CompletableFuture.runAsync(() -> {
108+
.mapToObj(xkcdId -> executor.submit(() -> {
108109
semaphore.acquireUninterruptibly();
109-
try {
110-
Optional<XkcdPost> postOptional = this.retrieveXkcdPost(xkcdId).join();
111-
postOptional.ifPresent(post -> xkcdPosts.put(xkcdId, post));
112-
113-
Thread.sleep(FETCH_XKCD_POSTS_THREAD_SLEEP_MS);
114-
} catch (InterruptedException _) {
115-
Thread.currentThread().interrupt();
116-
} finally {
117-
semaphore.release();
118-
}
119-
}, executor))
120-
.toArray(CompletableFuture[]::new)).join();
110+
retrieveXkcdPost(xkcdId).join().ifPresent(post -> xkcdPosts.put(xkcdId, post));
111+
semaphore.release();
112+
}))
113+
.toList();
114+
115+
try {
116+
for (Future<?> future : futures) {
117+
future.get();
118+
}
119+
} catch (InterruptedException e) {
120+
logger.error("Failed to wait for future", e);
121+
Thread.currentThread().interrupt();
122+
} catch (ExecutionException e) {
123+
logger.error("Could not get result from future", e);
124+
}
121125
}
122126

123127
saveToFile(savedXckdsPath, xkcdPosts);

0 commit comments

Comments
 (0)