Skip to content

Commit 1261be6

Browse files
authored
Fix email-to-issue sync: clean up comment formatting (#61)
- Fix GitHub search query to find existing issues by Gmail Thread ID in comments (in:comments), preventing duplicate issue creation when emails arrive across different scheduler ticks - Strip mailing list [tag] and Re:/Fwd: prefixes from email subjects before using them as issue titles - Include Gmail-Thread-ID and Subject only in the first comment (issue creation), not on every reply - Separate email header metadata from message body with a horizontal rule Signed-off-by: Bruno Oliveira da Silva <bruno@abstractj.com>
1 parent c5e1dde commit 1261be6

7 files changed

Lines changed: 399 additions & 9 deletions

File tree

src/main/java/org/keycloak/gh/bot/security/command/CommandParser.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ private boolean isAuthorizedRepository(GHEventPayload.IssueComment payload) {
4545
protected void success(GHEventPayload.IssueComment payload) throws IOException {
4646
payload.getComment().createReaction(ReactionContent.PLUS_ONE);
4747
}
48+
4849
protected void fail(GHEventPayload.IssueComment payload, String reason) throws IOException {
4950
LOGGER.errorf("Execution aborted: %s", reason);
5051
payload.getComment().createReaction(ReactionContent.MINUS_ONE);

src/main/java/org/keycloak/gh/bot/security/email/MailProcessor.java

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,14 @@
2121
import java.util.List;
2222
import java.util.Map;
2323
import java.util.Optional;
24+
import java.util.regex.Pattern;
2425

2526
@ApplicationScoped
2627
public class MailProcessor {
2728

2829
private static final Logger LOGGER = Logger.getLogger(MailProcessor.class);
30+
private static final Pattern BRACKET_PREFIX_PATTERN = Pattern.compile("^.*?\\[.*?]\\s*");
31+
private static final Pattern REPLY_PREFIX_PATTERN = Pattern.compile("^(?:\\s*(?:Re|Fwd|Fw)\\s*:\\s*)+", Pattern.CASE_INSENSITIVE);
2932

3033
@ConfigProperty(name = "google.group.target")
3134
String targetGroupEmail;
@@ -98,7 +101,7 @@ private void processSingleMessage(Message msgSummary, GitHub github, GHRepositor
98101

99102
var threadId = msg.getThreadId();
100103
var messageId = headers.getOrDefault("Message-ID", "").replaceAll("^<|>$", "");
101-
var subject = headers.getOrDefault("Subject", "(No Subject)").trim();
104+
var subject = normalizeSubject(headers.getOrDefault("Subject", "(No Subject)").trim());
102105

103106
var body = bodySanitizer.sanitize(gmail.getBody(msg)).orElse("(No content)");
104107
var attachments = gmail.getAttachments(msg);
@@ -113,7 +116,7 @@ private void processSingleMessage(Message msgSummary, GitHub github, GHRepositor
113116
issue.reopen();
114117
LOGGER.infof("Reopened existing closed issue #%d for thread %s", issue.getNumber(), threadId);
115118
}
116-
appendComment(issue, from, subject, body, threadId, attachmentSection);
119+
appendComment(issue, from, body, attachmentSection);
117120
} else {
118121
var newIssue = createNewIssue(repository, threadId, subject, from, body, attachmentSection);
119122
issueCache.put(threadId, newIssue.getNumber());
@@ -129,7 +132,7 @@ private void processSingleMessage(Message msgSummary, GitHub github, GHRepositor
129132
private Optional<GHIssue> resolveIssue(GitHub github, GHRepository repository, String threadId) {
130133
Integer issueNumber = issueCache.get(threadId, id -> {
131134
try {
132-
var query = "repo:%s \"%s\" label:%s is:issue".formatted(repositoryName, id, Labels.SOURCE_EMAIL);
135+
var query = "repo:%s \"%s\" label:%s is:issue in:comments".formatted(repositoryName, id, Labels.SOURCE_EMAIL);
133136
var iterator = github.searchIssues().q(query).list().iterator();
134137
return iterator.hasNext() ? iterator.next().getNumber() : null;
135138
} catch (Exception e) {
@@ -170,23 +173,32 @@ private void handleProcessingFailure(String messageId, Exception e) {
170173
LOGGER.errorf(e, "Failure processing message %s. It will remain unread and be retried.", messageId);
171174
}
172175

173-
private void appendComment(GHIssue issue, String from, String subject, String body, String threadId, String attachmentSection) throws IOException {
174-
issue.comment(formatGitHubComment(threadId, from, subject, body, attachmentSection));
176+
private void appendComment(GHIssue issue, String from, String body, String attachmentSection) throws IOException {
177+
issue.comment(formatReplyComment(from, body, attachmentSection));
175178
}
176179

177180
private GHIssue createNewIssue(GHRepository repo, String threadId, String subject, String from, String body, String attachmentSection) throws IOException {
178181
var issue = repo.createIssue(subject).body(Constants.ISSUE_DESCRIPTION_TEMPLATE).create();
179182
issue.addLabels(Labels.STATUS_TRIAGE, Labels.SOURCE_EMAIL);
180-
issue.comment(formatGitHubComment(threadId, from, subject, body, attachmentSection));
183+
issue.comment(formatNewIssueComment(threadId, subject, from, body, attachmentSection));
181184
return issue;
182185
}
183186

184-
private String formatGitHubComment(String threadId, String from, String subject, String body, String attachmentSection) {
185-
return "%s %s\nSubject: %s\nFrom: %s\n\n%s%s".formatted(
187+
static String formatNewIssueComment(String threadId, String subject, String from, String body, String attachmentSection) {
188+
return "%s %s\nSubject: %s\nFrom: %s\n\n---\n\n%s%s".formatted(
186189
Constants.GMAIL_THREAD_ID_PREFIX, threadId, subject, from, body, attachmentSection
187190
);
188191
}
189192

193+
static String formatReplyComment(String from, String body, String attachmentSection) {
194+
return "From: %s\n\n---\n\n%s%s".formatted(from, body, attachmentSection);
195+
}
196+
197+
static String normalizeSubject(String subject) {
198+
String result = BRACKET_PREFIX_PATTERN.matcher(subject).replaceFirst("");
199+
return REPLY_PREFIX_PATTERN.matcher(result).replaceFirst("");
200+
}
201+
190202
private boolean isFromBot(String from) {
191203
return from != null && from.toLowerCase().contains(botEmail.toLowerCase());
192204
}

src/main/java/org/keycloak/gh/bot/security/email/TargetGroup.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
import java.net.URLEncoder;
44
import java.nio.charset.StandardCharsets;
55

6-
/** Encapsulates the Google Group identity, structural validation, and URL generation. */
6+
/**
7+
* Encapsulates the Google Group identity, structural validation, and URL generation.
8+
*/
79
public record TargetGroup(String email, String id, String domain) {
810

911
public static TargetGroup from(String emailAddress) {
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
package org.keycloak.gh.bot.security.email;
2+
3+
import org.junit.jupiter.api.Test;
4+
import org.junit.jupiter.params.ParameterizedTest;
5+
import org.junit.jupiter.params.provider.NullAndEmptySource;
6+
import org.junit.jupiter.params.provider.ValueSource;
7+
8+
import static org.junit.jupiter.api.Assertions.assertEquals;
9+
import static org.junit.jupiter.api.Assertions.assertTrue;
10+
11+
public class EmailBodySanitizerTest {
12+
13+
private final EmailBodySanitizer sanitizer = new EmailBodySanitizer();
14+
15+
@ParameterizedTest
16+
@NullAndEmptySource
17+
@ValueSource(strings = {" ", "\n\n"})
18+
void sanitize_returnsEmptyForBlankInput(String input) {
19+
assertTrue(sanitizer.sanitize(input).isEmpty());
20+
}
21+
22+
@Test
23+
void sanitize_returnsPlainTextAsIs() {
24+
var result = sanitizer.sanitize("This is a vulnerability report.");
25+
assertTrue(result.isPresent());
26+
assertEquals("This is a vulnerability report.", result.get());
27+
}
28+
29+
@Test
30+
void sanitize_stripsGoogleGroupsFooter() {
31+
String body = "Important content.\nYou received this message because you are subscribed to the group.";
32+
var result = sanitizer.sanitize(body);
33+
assertTrue(result.isPresent());
34+
assertEquals("Important content.", result.get());
35+
}
36+
37+
@Test
38+
void sanitize_stripsSignatureDivider() {
39+
String body = "Important content.\n-- \nJohn Doe\njohn@example.com";
40+
var result = sanitizer.sanitize(body);
41+
assertTrue(result.isPresent());
42+
assertEquals("Important content.", result.get());
43+
}
44+
45+
@Test
46+
void sanitize_wrapsGmailThreadHistoryInDetails() {
47+
String body = "New reply here.\n\nOn Mon, Jan 1, 2024 at 10:00 AM Alice wrote:\n> Original message";
48+
var result = sanitizer.sanitize(body);
49+
assertTrue(result.isPresent());
50+
assertTrue(result.get().startsWith("New reply here."));
51+
assertTrue(result.get().contains("<details>"));
52+
assertTrue(result.get().contains("Thread history"));
53+
assertTrue(result.get().contains("Original message"));
54+
}
55+
56+
@Test
57+
void sanitize_wrapsOutlookPlainDividerInDetails() {
58+
String body = "Fresh content.\n\n-----Original Message-----\nFrom: bob@example.com";
59+
var result = sanitizer.sanitize(body);
60+
assertTrue(result.isPresent());
61+
assertTrue(result.get().startsWith("Fresh content."));
62+
assertTrue(result.get().contains("<details>"));
63+
}
64+
65+
@Test
66+
void sanitize_wrapsOutlookHtmlDividerInDetails() {
67+
String body = "Fresh content.\n\n________________________________\nFrom: bob@example.com";
68+
var result = sanitizer.sanitize(body);
69+
assertTrue(result.isPresent());
70+
assertTrue(result.get().startsWith("Fresh content."));
71+
assertTrue(result.get().contains("<details>"));
72+
}
73+
74+
@Test
75+
void sanitize_wrapsExchangeHeaderInDetails() {
76+
String body = "My reply.\n\nFrom: Alice Smith\nSent: Monday, January 1, 2024\nTo: Bob";
77+
var result = sanitizer.sanitize(body);
78+
assertTrue(result.isPresent());
79+
assertTrue(result.get().startsWith("My reply."));
80+
assertTrue(result.get().contains("<details>"));
81+
}
82+
83+
@Test
84+
void sanitize_wrapsQuotedLinesInDetails() {
85+
String body = "My reply.\n\n> quoted line one\n> quoted line two";
86+
var result = sanitizer.sanitize(body);
87+
assertTrue(result.isPresent());
88+
assertTrue(result.get().startsWith("My reply."));
89+
assertTrue(result.get().contains("<details>"));
90+
assertTrue(result.get().contains("> quoted line one"));
91+
}
92+
93+
@Test
94+
void sanitize_returnsEmptyWhenOnlySignature() {
95+
String body = "-- \nJohn Doe";
96+
assertTrue(sanitizer.sanitize(body).isEmpty());
97+
}
98+
}
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
package org.keycloak.gh.bot.security.email;
2+
3+
import com.google.api.services.gmail.model.Message;
4+
import com.google.api.services.gmail.model.MessagePart;
5+
import com.google.api.services.gmail.model.MessagePartBody;
6+
import com.google.api.services.gmail.model.MessagePartHeader;
7+
import org.junit.jupiter.api.Test;
8+
9+
import java.nio.charset.StandardCharsets;
10+
import java.util.Base64;
11+
import java.util.List;
12+
13+
import static org.junit.jupiter.api.Assertions.assertEquals;
14+
import static org.junit.jupiter.api.Assertions.assertTrue;
15+
16+
public class GmailAdapterTest {
17+
18+
private final GmailAdapter adapter = new GmailAdapter();
19+
20+
@Test
21+
void getHeadersMap_extractsHeadersFromMessage() {
22+
var msg = messageWithHeaders(
23+
header("From", "alice@example.com"),
24+
header("Subject", "CVE report"),
25+
header("List-ID", "<keycloak-security.googlegroups.com>")
26+
);
27+
28+
var headers = adapter.getHeadersMap(msg);
29+
assertEquals("alice@example.com", headers.get("From"));
30+
assertEquals("CVE report", headers.get("Subject"));
31+
}
32+
33+
@Test
34+
void getHeadersMap_isCaseInsensitive() {
35+
var msg = messageWithHeaders(header("From", "alice@example.com"));
36+
var headers = adapter.getHeadersMap(msg);
37+
assertEquals("alice@example.com", headers.get("from"));
38+
assertEquals("alice@example.com", headers.get("FROM"));
39+
}
40+
41+
@Test
42+
void getHeadersMap_keepsFirstValueOnDuplicateHeaders() {
43+
var msg = messageWithHeaders(
44+
header("From", "first@example.com"),
45+
header("From", "second@example.com")
46+
);
47+
assertEquals("first@example.com", adapter.getHeadersMap(msg).get("From"));
48+
}
49+
50+
@Test
51+
void getBody_extractsDirectPayloadBody() {
52+
var msg = new Message().setPayload(
53+
new MessagePart().setBody(new MessagePartBody().setData(encode("Hello world")))
54+
);
55+
assertEquals("Hello world", adapter.getBody(msg));
56+
}
57+
58+
@Test
59+
void getBody_prefersPlainTextOverHtml() {
60+
var htmlPart = part("text/html", "<b>Hello</b>");
61+
var plainPart = part("text/plain", "Hello plain");
62+
63+
var msg = new Message().setPayload(
64+
new MessagePart().setParts(List.of(htmlPart, plainPart))
65+
);
66+
assertEquals("Hello plain", adapter.getBody(msg));
67+
}
68+
69+
@Test
70+
void getBody_fallsBackToHtmlWhenNoPlainText() {
71+
var htmlPart = part("text/html", "<b>Hello</b>");
72+
73+
var msg = new Message().setPayload(
74+
new MessagePart().setParts(List.of(htmlPart))
75+
);
76+
assertEquals("<b>Hello</b>", adapter.getBody(msg));
77+
}
78+
79+
@Test
80+
void getBody_extractsFromNestedParts() {
81+
var nested = part("text/plain", "Nested content");
82+
var wrapper = new MessagePart().setMimeType("multipart/alternative").setParts(List.of(nested));
83+
84+
var msg = new Message().setPayload(new MessagePart().setParts(List.of(wrapper)));
85+
assertEquals("Nested content", adapter.getBody(msg));
86+
}
87+
88+
@Test
89+
void getAttachments_collectsAttachmentsWithIds() {
90+
var attachment = new MessagePart()
91+
.setFilename("report.pdf")
92+
.setMimeType("application/pdf")
93+
.setBody(new MessagePartBody().setAttachmentId("att-1"));
94+
var textPart = part("text/plain", "body");
95+
96+
var msg = new Message().setPayload(new MessagePart().setParts(List.of(textPart, attachment)));
97+
var result = adapter.getAttachments(msg);
98+
99+
assertEquals(1, result.size());
100+
assertEquals("report.pdf", result.get(0).fileName());
101+
assertEquals("application/pdf", result.get(0).mimeType());
102+
}
103+
104+
@Test
105+
void getAttachments_ignoresPartsWithoutAttachmentId() {
106+
var inline = new MessagePart()
107+
.setFilename("image.png")
108+
.setMimeType("image/png")
109+
.setBody(new MessagePartBody());
110+
111+
var msg = new Message().setPayload(new MessagePart().setParts(List.of(inline)));
112+
assertTrue(adapter.getAttachments(msg).isEmpty());
113+
}
114+
115+
@Test
116+
void getAttachments_ignoresPartsWithBlankFilename() {
117+
var noName = new MessagePart()
118+
.setFilename("")
119+
.setMimeType("application/octet-stream")
120+
.setBody(new MessagePartBody().setAttachmentId("att-1"));
121+
122+
var msg = new Message().setPayload(new MessagePart().setParts(List.of(noName)));
123+
assertTrue(adapter.getAttachments(msg).isEmpty());
124+
}
125+
126+
@Test
127+
void getAttachments_collectsNestedAttachments() {
128+
var deepAttachment = new MessagePart()
129+
.setFilename("deep.zip")
130+
.setMimeType("application/zip")
131+
.setBody(new MessagePartBody().setAttachmentId("att-deep"));
132+
var wrapper = new MessagePart().setMimeType("multipart/mixed").setParts(List.of(deepAttachment));
133+
134+
var msg = new Message().setPayload(new MessagePart().setParts(List.of(wrapper)));
135+
assertEquals(1, adapter.getAttachments(msg).size());
136+
assertEquals("deep.zip", adapter.getAttachments(msg).get(0).fileName());
137+
}
138+
139+
private static Message messageWithHeaders(MessagePartHeader... headers) {
140+
return new Message().setPayload(new MessagePart().setHeaders(List.of(headers)));
141+
}
142+
143+
private static MessagePartHeader header(String name, String value) {
144+
return new MessagePartHeader().setName(name).setValue(value);
145+
}
146+
147+
private static MessagePart part(String mimeType, String content) {
148+
return new MessagePart()
149+
.setMimeType(mimeType)
150+
.setBody(new MessagePartBody().setData(encode(content)));
151+
}
152+
153+
private static String encode(String text) {
154+
return Base64.getUrlEncoder().encodeToString(text.getBytes(StandardCharsets.UTF_8));
155+
}
156+
}

0 commit comments

Comments
 (0)