|
| 1 | +import { test } from 'node:test'; |
| 2 | +import assert from 'node:assert/strict'; |
| 3 | +import { extractEmails, hasAddIntent } from './parser.js'; |
| 4 | + |
| 5 | +// ── hasAddIntent ────────────────────────────────────────────────── |
| 6 | + |
| 7 | +test('hasAddIntent: matches add/invite/include verbs', () => { |
| 8 | + assert.equal(hasAddIntent('please add alice@example.com'), true); |
| 9 | + assert.equal(hasAddIntent('invite Bob to the team'), true); |
| 10 | + assert.equal(hasAddIntent('include this user'), true); |
| 11 | + assert.equal(hasAddIntent('onboard the new hire'), true); |
| 12 | +}); |
| 13 | + |
| 14 | +test('hasAddIntent: ignores text inside <at>...</at> mention tags', () => { |
| 15 | + // Bot name might contain "add" — must not count as intent on its own |
| 16 | + assert.equal(hasAddIntent('<at>add-bot</at> hello there'), false); |
| 17 | +}); |
| 18 | + |
| 19 | +test('hasAddIntent: returns false for empty/missing text', () => { |
| 20 | + assert.equal(hasAddIntent(''), false); |
| 21 | + assert.equal(hasAddIntent(null), false); |
| 22 | + assert.equal(hasAddIntent(undefined), false); |
| 23 | +}); |
| 24 | + |
| 25 | +// ── extractEmails: mailto chips (commit 22f49c6) ────────────────── |
| 26 | + |
| 27 | +test('extractEmails: pulls addresses from mailto: hrefs in flattened bullet lists', () => { |
| 28 | + // Teams renders contact chips as <a href="mailto:..."> — the email regex |
| 29 | + // alone would glue list items together, so mailto extraction runs first. |
| 30 | + const html = `<at>admin-bot</at> add the following: |
| 31 | + <li><a href="mailto:alice@example.com">Alice</a></li> |
| 32 | + <li><a href="mailto:bob@example.com">Bob</a></li> |
| 33 | + <li><a href="mailto:carol@example.org">Carol</a></li>`; |
| 34 | + assert.deepEqual(extractEmails(html), [ |
| 35 | + 'alice@example.com', |
| 36 | + 'bob@example.com', |
| 37 | + 'carol@example.org', |
| 38 | + ]); |
| 39 | +}); |
| 40 | + |
| 41 | +test('extractEmails: dedupes when the same address appears in both mailto and link text', () => { |
| 42 | + const html = `<a href="mailto:alice@example.com">alice@example.com</a>`; |
| 43 | + assert.deepEqual(extractEmails(html), ['alice@example.com']); |
| 44 | +}); |
| 45 | + |
| 46 | +test('extractEmails: lowercases extracted addresses', () => { |
| 47 | + assert.deepEqual(extractEmails('add ALICE@Example.COM'), ['alice@example.com']); |
| 48 | +}); |
| 49 | + |
| 50 | +// ── extractEmails: concatenated bullet emails (commit 74a0fc4) ──── |
| 51 | + |
| 52 | +test('extractEmails: splits emails Teams glued together at common TLD boundaries', () => { |
| 53 | + // Teams sometimes flattens <li> items with no separator at all, producing |
| 54 | + // "a@example.comb@example.com". A glued common-TLD-then-letter boundary |
| 55 | + // can only come from this concatenation, so we split there. |
| 56 | + const glued = 'add alice@example.combob@example.orgcarol@example.net'; |
| 57 | + assert.deepEqual(extractEmails(glued), [ |
| 58 | + 'alice@example.com', |
| 59 | + 'bob@example.org', |
| 60 | + 'carol@example.net', |
| 61 | + ]); |
| 62 | +}); |
| 63 | + |
| 64 | +test('extractEmails: decodes HTML entities before email extraction', () => { |
| 65 | + // Teams sometimes HTML-encodes the entire payload (<li> instead of <li>). |
| 66 | + // decodeEntities runs first so tag stripping and TLD-glue splitting still work. |
| 67 | + const encoded = 'add <li>alice@example.com</li><li>bob@example.com</li>'; |
| 68 | + assert.deepEqual(extractEmails(encoded), [ |
| 69 | + 'alice@example.com', |
| 70 | + 'bob@example.com', |
| 71 | + ]); |
| 72 | +}); |
| 73 | + |
| 74 | +// ── extractEmails: end-of-string TLD (commit 865b472) ───────────── |
| 75 | + |
| 76 | +test('extractEmails: does not split a .com TLD at end of string into .co + m', () => { |
| 77 | + // Regression: earlier the lookahead only required a following letter, so |
| 78 | + // the regex backtracked from .com (lookahead fail at EOS) to .co + lone m, |
| 79 | + // producing "alice@example.co". Lookahead now requires another local@. |
| 80 | + assert.deepEqual(extractEmails('add alice@example.com'), ['alice@example.com']); |
| 81 | +}); |
| 82 | + |
| 83 | +test('extractEmails: preserves trailing dot-co address when nothing follows', () => { |
| 84 | + // .co is itself a common TLD; it must remain intact when at end of string. |
| 85 | + assert.deepEqual(extractEmails('add alice@example.co'), ['alice@example.co']); |
| 86 | +}); |
| 87 | + |
| 88 | +// ── extractEmails: RFC-5322 angle brackets (commit c8f3ff1) ─────── |
| 89 | + |
| 90 | +test('extractEmails: extracts addresses wrapped in <...> mailbox brackets', () => { |
| 91 | + // Outlook copy-paste produces "Name <email@domain>". The HTML tag stripper |
| 92 | + // used to consume the <...> span entirely; it now skips <...> spans with @. |
| 93 | + const text = 'add Alice Example <alice@example.com>'; |
| 94 | + assert.deepEqual(extractEmails(text), ['alice@example.com']); |
| 95 | +}); |
| 96 | + |
| 97 | +test('extractEmails: handles a semicolon-separated RFC-5322 list (the real failure case)', () => { |
| 98 | + // The exact message shape that triggered the silent-fail incident, with |
| 99 | + // names and domains anonymized. |
| 100 | + const text = `add the following email addressees to this chat |
| 101 | +
|
| 102 | +Alice Example <alice@example.com>; Bob Sample <bob.sample@partner.example>; Carol Demo <carol@example.com>; Dan Placeholder <dan_p@example.com>; Eve Tester <eve@example.com>`; |
| 103 | + assert.deepEqual(extractEmails(text), [ |
| 104 | + 'alice@example.com', |
| 105 | + 'bob.sample@partner.example', |
| 106 | + 'carol@example.com', |
| 107 | + 'dan_p@example.com', |
| 108 | + 'eve@example.com', |
| 109 | + ]); |
| 110 | +}); |
| 111 | + |
| 112 | +test('extractEmails: handles HTML-encoded angle-bracket addresses', () => { |
| 113 | + const text = 'add Alice <alice@example.com>; Bob <bob@example.com>'; |
| 114 | + assert.deepEqual(extractEmails(text), [ |
| 115 | + 'alice@example.com', |
| 116 | + 'bob@example.com', |
| 117 | + ]); |
| 118 | +}); |
| 119 | + |
| 120 | +// ── extractEmails: misc invariants ──────────────────────────────── |
| 121 | + |
| 122 | +test('extractEmails: strips <at> mention so bot name is not matched', () => { |
| 123 | + // The bot's own @mention markup includes its display name — must not be |
| 124 | + // parsed as an email even when it superficially resembles one. |
| 125 | + const text = '<at>admin-bot</at> add alice@example.com'; |
| 126 | + assert.deepEqual(extractEmails(text), ['alice@example.com']); |
| 127 | +}); |
| 128 | + |
| 129 | +test('extractEmails: returns empty array on empty/missing input', () => { |
| 130 | + assert.deepEqual(extractEmails(''), []); |
| 131 | + assert.deepEqual(extractEmails(null), []); |
| 132 | + assert.deepEqual(extractEmails(undefined), []); |
| 133 | +}); |
0 commit comments