Skip to content

Commit ab01b32

Browse files
committed
parse emails with simplified regex (and add specific plaintext tests)
1 parent 7a546b9 commit ab01b32

3 files changed

Lines changed: 130 additions & 107 deletions

File tree

src/resources/filters/quarto-post/email.lua

Lines changed: 20 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,11 @@ function str_truthy_falsy(str)
6565
return false
6666
end
6767

68-
-- Parse recipients from inline code output or plain text
69-
-- Supports multiple formats:
70-
-- 1. Python list: ['a', 'b'] or ["a", "b"]
71-
-- 2. R vector: "a" "b" "c"
72-
-- 3. Comma-separated: a, b, c
73-
-- 4. Line-separated: a\nb\nc
74-
-- Returns an empty array if parsing fails
68+
-- Parse recipients using regex to find email addresses
69+
-- Matches pattern: local-part@domain.tld
70+
-- Handles any format: Python lists, R vectors, comma-separated,
71+
-- space-separated, quoted, unquoted, etc.
72+
-- Returns an empty array if no valid emails found
7573
function parse_recipients(recipient_str)
7674
recipient_str = str_trunc_trim(recipient_str, 10000)
7775

@@ -80,110 +78,26 @@ function parse_recipients(recipient_str)
8078
end
8179

8280
local recipients = {}
83-
84-
-- Try Python list format ['...', '...'] or ["...", "..."]
85-
if string.match(recipient_str, "^%[") and string.match(recipient_str, "%]$") then
86-
local content = string.sub(recipient_str, 2, -2)
87-
88-
-- Try to parse as Python/R list by splitting on commas
89-
-- and stripping quotes and brackets from each item
90-
recipients = {}
91-
for item in string.gmatch(content, "[^,]+") do
92-
local trimmed = str_trunc_trim(item, 1000)
93-
-- Strip leading/trailing brackets
94-
trimmed = string.gsub(trimmed, "^%[", "")
95-
trimmed = string.gsub(trimmed, "%]$", "")
96-
trimmed = str_trunc_trim(trimmed, 1000)
97-
98-
-- Strip leading/trailing quotes (ASCII single/double and UTF-8 curly quotes)
99-
-- ASCII single quote '
100-
trimmed = string.gsub(trimmed, "^'", "")
101-
trimmed = string.gsub(trimmed, "'$", "")
102-
-- ASCII double quote "
103-
trimmed = string.gsub(trimmed, '^"', "")
104-
trimmed = string.gsub(trimmed, '"$', "")
105-
-- UTF-8 curly single quotes ' and ' (U+2018, U+2019)
106-
trimmed = string.gsub(trimmed, "^" .. string.char(226, 128, 152), "")
107-
trimmed = string.gsub(trimmed, string.char(226, 128, 153) .. "$", "")
108-
-- UTF-8 curly double quotes " and " (U+201C, U+201D)
109-
trimmed = string.gsub(trimmed, "^" .. string.char(226, 128, 156), "")
110-
trimmed = string.gsub(trimmed, string.char(226, 128, 157) .. "$", "")
111-
112-
trimmed = str_trunc_trim(trimmed, 1000)
113-
if trimmed ~= "" then
114-
table.insert(recipients, trimmed)
115-
end
116-
end
117-
if #recipients > 0 then
118-
return recipients
119-
end
120-
end
121-
122-
-- Try R-style quoted format (space-separated quoted strings outside of brackets)
123-
recipients = {}
124-
local found_any = false
125-
126-
-- Try single quotes: 'a' 'b' 'c'
127-
for quoted_pair in string.gmatch(recipient_str, "'([^']*)'") do
128-
local trimmed = str_trunc_trim(quoted_pair, 1000)
129-
if trimmed ~= "" then
130-
table.insert(recipients, trimmed)
131-
found_any = true
81+
-- Match anything that's not a separator (quotes, commas, spaces, brackets, parens)
82+
-- This allows international characters while stopping at separators
83+
for email in string.gmatch(recipient_str, "[^%s,'\"%[%]%(%)]+@[^%s,'\"%[%]%(%)]+%.[^%s,'\"%[%]%(%)]+") do
84+
-- Strip any leading/trailing quote characters (both straight and curly)
85+
-- Straight quotes: ' "
86+
-- Curly single quotes: ' ' (U+2018, U+2019)
87+
-- Curly double quotes: " " (U+201C, U+201D)
88+
email = string.gsub(email, "^['\"" .. string.char(226, 128, 152) .. string.char(226, 128, 153) .. string.char(226, 128, 156) .. string.char(226, 128, 157) .. "]+", "")
89+
email = string.gsub(email, "['\"" .. string.char(226, 128, 152) .. string.char(226, 128, 153) .. string.char(226, 128, 156) .. string.char(226, 128, 157) .. "]+$", "")
90+
91+
if email ~= "" and string.match(email, "@") then
92+
table.insert(recipients, email)
13293
end
13394
end
134-
if found_any then
135-
return recipients
136-
end
13795

138-
-- Try double quotes: "a" "b" "c"
139-
recipients = {}
140-
for quoted_pair in string.gmatch(recipient_str, '"([^"]*)"') do
141-
local trimmed = str_trunc_trim(quoted_pair, 1000)
142-
if trimmed ~= "" then
143-
table.insert(recipients, trimmed)
144-
found_any = true
145-
end
146-
end
147-
if found_any then
148-
return recipients
149-
end
150-
151-
-- Try line-separated format (newlines or spaces)
152-
-- Check if there are newlines or multiple space-separated emails
153-
if string.match(recipient_str, "\n") or
154-
(string.match(recipient_str, "@.*%s+.*@") and not string.match(recipient_str, ",")) then
155-
recipients = {}
156-
-- Split on newlines or spaces
157-
for item in string.gmatch(recipient_str, "[^\n%s]+") do
158-
local trimmed = str_trunc_trim(item, 1000)
159-
if trimmed ~= "" and string.match(trimmed, "@") then
160-
table.insert(recipients, trimmed)
161-
found_any = true
162-
end
163-
end
164-
if found_any then
165-
return recipients
166-
end
167-
end
168-
169-
-- Try comma-separated format without quotes
170-
-- Split by comma and trim each part
171-
recipients = {}
172-
found_any = false
173-
for part in string.gmatch(recipient_str, "[^,]+") do
174-
local trimmed = str_trunc_trim(part, 1000)
175-
if trimmed ~= "" and not string.match(trimmed, "^[%[%]]") then
176-
table.insert(recipients, trimmed)
177-
found_any = true
178-
end
179-
end
180-
if found_any then
181-
return recipients
96+
if #recipients == 0 then
97+
quarto.log.warning("Could not parse recipients format: " .. recipient_str)
18298
end
18399

184-
-- Could not parse - log warning and return empty
185-
quarto.log.warning("Could not parse recipients format: " .. recipient_str)
186-
return {}
100+
return recipients
187101
end
188102

189103
local html_email_template_1 = [[

tests/docs/email/email-recipients-plaintext-formats.qmd

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,4 +46,77 @@ Second email with comma-separated recipients.
4646

4747
:::
4848

49+
::: {.email}
50+
51+
::: {.subject}
52+
Funky Email Formats - Dots and Hyphens
53+
:::
54+
55+
::: {.recipients}
56+
first.last@example.com, john-doe@test-domain.org, user_name@example.co.uk
57+
:::
58+
59+
::: {.email-text}
60+
Email with dots, hyphens, and underscores in addresses.
61+
:::
62+
63+
Third email with funky formats.
64+
65+
:::
66+
67+
::: {.email}
68+
69+
::: {.subject}
70+
Funky Email Formats - Plus Signs
71+
:::
72+
73+
::: {.recipients}
74+
user+tag@example.com, test+filter@domain.org, admin+reports@company.com
75+
:::
76+
77+
::: {.email-text}
78+
Email with plus signs in addresses.
79+
:::
80+
81+
Fourth email with plus signs.
82+
83+
:::
84+
85+
::: {.email}
86+
87+
::: {.subject}
88+
Mixed Separators and Quotes
89+
:::
90+
91+
::: {.recipients}
92+
"user1@example.com" 'user2@example.com' user3@example.com, user4@example.com
93+
user5@example.com
94+
:::
95+
96+
::: {.email-text}
97+
Email with mixed quotes, commas, spaces, and newlines.
98+
:::
99+
100+
Fifth email with mixed separators.
101+
102+
:::
103+
104+
::: {.email}
105+
106+
::: {.subject}
107+
Complex Local Parts
108+
:::
109+
110+
::: {.recipients}
111+
first.middle.last@example.com, user+tag1+tag2@domain.org, test_user-name.v2@example.co.uk
112+
:::
113+
114+
::: {.email-text}
115+
Email with complex combinations in local parts.
116+
:::
117+
118+
Sixth email with complex local parts.
119+
120+
:::
121+
49122
Done with test emails.

tests/smoke/render/render-email.test.ts

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,11 @@ testRender(docs("email/email-mixed-metadata-v2.qmd"), "email", false, [
230230
testRender(docs("email/email-recipients-plaintext-formats.qmd"), "email", false, [
231231
fileExists(previewFileV2_1),
232232
fileExists(previewFileV2_2),
233-
validJsonWithMultipleEmails(jsonFile, 2, {
233+
fileExists(docs("email/email-preview/email_id-3.html")),
234+
fileExists(docs("email/email-preview/email_id-4.html")),
235+
fileExists(docs("email/email-preview/email_id-5.html")),
236+
fileExists(docs("email/email-preview/email_id-6.html")),
237+
validJsonWithMultipleEmails(jsonFile, 6, {
234238
"0": {
235239
"email_id": 1,
236240
"subject": "Line-Separated Recipients",
@@ -246,6 +250,38 @@ testRender(docs("email/email-recipients-plaintext-formats.qmd"), "email", false,
246250
"attachments": [],
247251
"suppress_scheduled": false,
248252
"send_report_as_attachment": false
253+
},
254+
"2": {
255+
"email_id": 3,
256+
"subject": "Funky Email Formats - Dots and Hyphens",
257+
"recipients": ["first.last@example.com", "john-doe@test-domain.org", "user_name@example.co.uk"],
258+
"attachments": [],
259+
"suppress_scheduled": false,
260+
"send_report_as_attachment": false
261+
},
262+
"3": {
263+
"email_id": 4,
264+
"subject": "Funky Email Formats - Plus Signs",
265+
"recipients": ["user+tag@example.com", "test+filter@domain.org", "admin+reports@company.com"],
266+
"attachments": [],
267+
"suppress_scheduled": false,
268+
"send_report_as_attachment": false
269+
},
270+
"4": {
271+
"email_id": 5,
272+
"subject": "Mixed Separators and Quotes",
273+
"recipients": ["user1@example.com", "user2@example.com", "user3@example.com", "user4@example.com", "user5@example.com"],
274+
"attachments": [],
275+
"suppress_scheduled": false,
276+
"send_report_as_attachment": false
277+
},
278+
"5": {
279+
"email_id": 6,
280+
"subject": "Complex Local Parts",
281+
"recipients": ["first.middle.last@example.com", "user+tag1+tag2@domain.org", "test_user-name.v2@example.co.uk"],
282+
"attachments": [],
283+
"suppress_scheduled": false,
284+
"send_report_as_attachment": false
249285
}
250286
})
251287
], {

0 commit comments

Comments
 (0)