Skip to content

Commit 4f63785

Browse files
committed
fix(codescanner): strip URLs before scanning to prevent false positives
Twitch URLs (e.g. https://www.twitch.tv/dungeonscrawlers) were being uppercased and their path segments matched as valid codes. Add a stripUrls helper that removes http/https URLs prior to code extraction, applied in both scanMessageForCodes and extractCodesFromText. Add three new tests covering Twitch username false positives and http URLs. Signed-off-by: Michael Cramer <michael@bigmichi1.de>
1 parent 7fc7c07 commit 4f63785

2 files changed

Lines changed: 25 additions & 3 deletions

File tree

src/bot/handlers/codeScanner.test.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,23 @@ describe('extractCodesFromText', () => {
3232
});
3333
});
3434

35+
describe('URL stripping', () => {
36+
test('does not extract Twitch username as code', () => {
37+
const text = 'LATU-EGIS-TOCK\n\nhttps://www.twitch.tv/dungeonscrawlers\n1x Electrum Chest';
38+
expect(extractCodesFromText(text)).toEqual(['LATUEGISTOCK']);
39+
});
40+
41+
test('does not extract long URL path segments as codes', () => {
42+
const text = 'GOEL-ARNA-VIDS\nhttps://www.twitch.tv/jasoncharlesmiller\n1x Electrum Chest';
43+
expect(extractCodesFromText(text)).toEqual(['GOELARNAVIDS']);
44+
});
45+
46+
test('strips http URLs as well as https', () => {
47+
const text = 'ABCD1234EFGH http://example.com/SOMETHINGLONG123456';
48+
expect(extractCodesFromText(text)).toEqual(['ABCD1234EFGH']);
49+
});
50+
});
51+
3552
describe('Discord emoji stripping', () => {
3653
test('strips static emoji tags before matching', () => {
3754
const text = 'Redeem <:gem:123456789012345678> this: ABCD1234EFGH';

src/bot/handlers/codeScanner.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,16 @@ function stripDiscordEmojis(text: string): string {
1010
return text.replace(/<a?:[^:]+:\d+>/g, '');
1111
}
1212

13+
// Strip URLs before scanning to avoid false positives from URL paths/usernames.
14+
function stripUrls(text: string): string {
15+
return text.replace(/https?:\/\/\S+/gi, '');
16+
}
17+
1318
export async function scanMessageForCodes(message: Message): Promise<string[]> {
1419
try {
1520

16-
// Strip emoji tags then uppercase for matching
17-
const messageText = stripDiscordEmojis(message.content).toUpperCase();
21+
// Strip URLs and emoji tags, then uppercase for matching
22+
const messageText = stripUrls(stripDiscordEmojis(message.content)).toUpperCase();
1823

1924
const codeMatches = messageText.match(CODE_REGEX) || [];
2025
const codes: string[] = [];
@@ -40,6 +45,6 @@ export async function scanMessageForCodes(message: Message): Promise<string[]> {
4045
}
4146

4247
export function extractCodesFromText(text: string): string[] {
43-
const codeMatches = stripDiscordEmojis(text).toUpperCase().match(CODE_REGEX) || [];
48+
const codeMatches = stripUrls(stripDiscordEmojis(text)).toUpperCase().match(CODE_REGEX) || [];
4449
return codeMatches.map((code) => code.replaceAll('-', ''));
4550
}

0 commit comments

Comments
 (0)