fix(codescanner): strip URLs before scanning to prevent false positives

BigMichi1 · BigMichi1 · commit 4f63785fdbea · 2026-05-15T11:57:54.000+02:00
Twitch URLs (e.g. https://www.twitch.tv/dungeonscrawlers) were being uppercased and their path segments matched as valid codes. Add a stripUrls helper that removes http/https URLs prior to code extraction, applied in both scanMessageForCodes and extractCodesFromText. Add three new tests covering Twitch username false positives and http URLs. Signed-off-by: Michael Cramer <michael@bigmichi1.de>
diff --git a/src/bot/handlers/codeScanner.test.ts b/src/bot/handlers/codeScanner.test.ts
@@ -32,6 +32,23 @@ describe('extractCodesFromText', () => {
     });
   });
 
+  describe('URL stripping', () => {
+    test('does not extract Twitch username as code', () => {
+      const text = 'LATU-EGIS-TOCK\n\nhttps://www.twitch.tv/dungeonscrawlers\n1x Electrum Chest';
+      expect(extractCodesFromText(text)).toEqual(['LATUEGISTOCK']);
+    });
+
+    test('does not extract long URL path segments as codes', () => {
+      const text = 'GOEL-ARNA-VIDS\nhttps://www.twitch.tv/jasoncharlesmiller\n1x Electrum Chest';
+      expect(extractCodesFromText(text)).toEqual(['GOELARNAVIDS']);
+    });
+
+    test('strips http URLs as well as https', () => {
+      const text = 'ABCD1234EFGH http://example.com/SOMETHINGLONG123456';
+      expect(extractCodesFromText(text)).toEqual(['ABCD1234EFGH']);
+    });
+  });
+
   describe('Discord emoji stripping', () => {
     test('strips static emoji tags before matching', () => {
       const text = 'Redeem <:gem:123456789012345678> this: ABCD1234EFGH';
diff --git a/src/bot/handlers/codeScanner.ts b/src/bot/handlers/codeScanner.ts
@@ -10,11 +10,16 @@ function stripDiscordEmojis(text: string): string {
   return text.replace(/<a?:[^:]+:\d+>/g, '');
 }
 
+// Strip URLs before scanning to avoid false positives from URL paths/usernames.
+function stripUrls(text: string): string {
+  return text.replace(/https?:\/\/\S+/gi, '');
+}
+
 export async function scanMessageForCodes(message: Message): Promise<string[]> {
   try {
 
-    // Strip emoji tags then uppercase for matching
-    const messageText = stripDiscordEmojis(message.content).toUpperCase();
+    // Strip URLs and emoji tags, then uppercase for matching
+    const messageText = stripUrls(stripDiscordEmojis(message.content)).toUpperCase();
 
     const codeMatches = messageText.match(CODE_REGEX) || [];
     const codes: string[] = [];
@@ -40,6 +45,6 @@ export async function scanMessageForCodes(message: Message): Promise<string[]> {
 }
 
 export function extractCodesFromText(text: string): string[] {
-  const codeMatches = stripDiscordEmojis(text).toUpperCase().match(CODE_REGEX) || [];
+  const codeMatches = stripUrls(stripDiscordEmojis(text)).toUpperCase().match(CODE_REGEX) || [];
   return codeMatches.map((code) => code.replaceAll('-', ''));
 }