Skip to content

Commit 9818ab1

Browse files
committed
test: add parser tests covering recent bug-fix use cases
Anonymized regression tests for the four recent parser fixes: - 22f49c6: mailto chip extraction from bullet lists - 74a0fc4: HTML-entity decode and concatenated-TLD splitting - 865b472: end-of-string .com not split into .co + m - c8f3ff1: RFC-5322 angle-bracket mailbox form (Name <a@b>) Uses node:test from the standard library — no new dependencies. Run with `npm test`.
1 parent c8f3ff1 commit 9818ab1

2 files changed

Lines changed: 134 additions & 0 deletions

File tree

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
"scripts": {
88
"dev": "wrangler dev",
99
"deploy": "wrangler deploy",
10+
"test": "node --test src/**/*.test.js",
1011
"db:create": "wrangler d1 create teams-admin-agent",
1112
"db:migrate": "wrangler d1 migrations apply teams-admin-agent --local",
1213
"db:migrate:prod": "wrangler d1 migrations apply teams-admin-agent --remote"

src/parser.test.js

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
import { test } from 'node:test';
2+
import assert from 'node:assert/strict';
3+
import { extractEmails, hasAddIntent } from './parser.js';
4+
5+
// ── hasAddIntent ──────────────────────────────────────────────────
6+
7+
test('hasAddIntent: matches add/invite/include verbs', () => {
8+
assert.equal(hasAddIntent('please add alice@example.com'), true);
9+
assert.equal(hasAddIntent('invite Bob to the team'), true);
10+
assert.equal(hasAddIntent('include this user'), true);
11+
assert.equal(hasAddIntent('onboard the new hire'), true);
12+
});
13+
14+
test('hasAddIntent: ignores text inside <at>...</at> mention tags', () => {
15+
// Bot name might contain "add" — must not count as intent on its own
16+
assert.equal(hasAddIntent('<at>add-bot</at> hello there'), false);
17+
});
18+
19+
test('hasAddIntent: returns false for empty/missing text', () => {
20+
assert.equal(hasAddIntent(''), false);
21+
assert.equal(hasAddIntent(null), false);
22+
assert.equal(hasAddIntent(undefined), false);
23+
});
24+
25+
// ── extractEmails: mailto chips (commit 22f49c6) ──────────────────
26+
27+
test('extractEmails: pulls addresses from mailto: hrefs in flattened bullet lists', () => {
28+
// Teams renders contact chips as <a href="mailto:..."> — the email regex
29+
// alone would glue list items together, so mailto extraction runs first.
30+
const html = `<at>admin-bot</at> add the following:
31+
<li><a href="mailto:alice@example.com">Alice</a></li>
32+
<li><a href="mailto:bob@example.com">Bob</a></li>
33+
<li><a href="mailto:carol@example.org">Carol</a></li>`;
34+
assert.deepEqual(extractEmails(html), [
35+
'alice@example.com',
36+
'bob@example.com',
37+
'carol@example.org',
38+
]);
39+
});
40+
41+
test('extractEmails: dedupes when the same address appears in both mailto and link text', () => {
42+
const html = `<a href="mailto:alice@example.com">alice@example.com</a>`;
43+
assert.deepEqual(extractEmails(html), ['alice@example.com']);
44+
});
45+
46+
test('extractEmails: lowercases extracted addresses', () => {
47+
assert.deepEqual(extractEmails('add ALICE@Example.COM'), ['alice@example.com']);
48+
});
49+
50+
// ── extractEmails: concatenated bullet emails (commit 74a0fc4) ────
51+
52+
test('extractEmails: splits emails Teams glued together at common TLD boundaries', () => {
53+
// Teams sometimes flattens <li> items with no separator at all, producing
54+
// "a@example.comb@example.com". A glued common-TLD-then-letter boundary
55+
// can only come from this concatenation, so we split there.
56+
const glued = 'add alice@example.combob@example.orgcarol@example.net';
57+
assert.deepEqual(extractEmails(glued), [
58+
'alice@example.com',
59+
'bob@example.org',
60+
'carol@example.net',
61+
]);
62+
});
63+
64+
test('extractEmails: decodes HTML entities before email extraction', () => {
65+
// Teams sometimes HTML-encodes the entire payload (&lt;li&gt; instead of <li>).
66+
// decodeEntities runs first so tag stripping and TLD-glue splitting still work.
67+
const encoded = 'add &lt;li&gt;alice@example.com&lt;/li&gt;&lt;li&gt;bob@example.com&lt;/li&gt;';
68+
assert.deepEqual(extractEmails(encoded), [
69+
'alice@example.com',
70+
'bob@example.com',
71+
]);
72+
});
73+
74+
// ── extractEmails: end-of-string TLD (commit 865b472) ─────────────
75+
76+
test('extractEmails: does not split a .com TLD at end of string into .co + m', () => {
77+
// Regression: earlier the lookahead only required a following letter, so
78+
// the regex backtracked from .com (lookahead fail at EOS) to .co + lone m,
79+
// producing "alice@example.co". Lookahead now requires another local@.
80+
assert.deepEqual(extractEmails('add alice@example.com'), ['alice@example.com']);
81+
});
82+
83+
test('extractEmails: preserves trailing dot-co address when nothing follows', () => {
84+
// .co is itself a common TLD; it must remain intact when at end of string.
85+
assert.deepEqual(extractEmails('add alice@example.co'), ['alice@example.co']);
86+
});
87+
88+
// ── extractEmails: RFC-5322 angle brackets (commit c8f3ff1) ───────
89+
90+
test('extractEmails: extracts addresses wrapped in <...> mailbox brackets', () => {
91+
// Outlook copy-paste produces "Name <email@domain>". The HTML tag stripper
92+
// used to consume the <...> span entirely; it now skips <...> spans with @.
93+
const text = 'add Alice Example <alice@example.com>';
94+
assert.deepEqual(extractEmails(text), ['alice@example.com']);
95+
});
96+
97+
test('extractEmails: handles a semicolon-separated RFC-5322 list (the real failure case)', () => {
98+
// The exact message shape that triggered the silent-fail incident, with
99+
// names and domains anonymized.
100+
const text = `add the following email addressees to this chat
101+
102+
Alice Example <alice@example.com>; Bob Sample <bob.sample@partner.example>; Carol Demo <carol@example.com>; Dan Placeholder <dan_p@example.com>; Eve Tester <eve@example.com>`;
103+
assert.deepEqual(extractEmails(text), [
104+
'alice@example.com',
105+
'bob.sample@partner.example',
106+
'carol@example.com',
107+
'dan_p@example.com',
108+
'eve@example.com',
109+
]);
110+
});
111+
112+
test('extractEmails: handles HTML-encoded angle-bracket addresses', () => {
113+
const text = 'add Alice &lt;alice@example.com&gt;; Bob &lt;bob@example.com&gt;';
114+
assert.deepEqual(extractEmails(text), [
115+
'alice@example.com',
116+
'bob@example.com',
117+
]);
118+
});
119+
120+
// ── extractEmails: misc invariants ────────────────────────────────
121+
122+
test('extractEmails: strips <at> mention so bot name is not matched', () => {
123+
// The bot's own @mention markup includes its display name — must not be
124+
// parsed as an email even when it superficially resembles one.
125+
const text = '<at>admin-bot</at> add alice@example.com';
126+
assert.deepEqual(extractEmails(text), ['alice@example.com']);
127+
});
128+
129+
test('extractEmails: returns empty array on empty/missing input', () => {
130+
assert.deepEqual(extractEmails(''), []);
131+
assert.deepEqual(extractEmails(null), []);
132+
assert.deepEqual(extractEmails(undefined), []);
133+
});

0 commit comments

Comments
 (0)