Skip to content

Commit 2dfc9da

Browse files
authored
Share generated text sanitization and strip commit spam (#479)
- Move attribution stripping into `packages/shared` - Add commit-msg hook to remove provider attribution from commit messages - Expand tests and cleanup duplicate PR utility exports
1 parent 3b1cfa7 commit 2dfc9da

File tree

6 files changed

+196
-133
lines changed

6 files changed

+196
-133
lines changed

.husky/commit-msg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
bun run scripts/strip-provider-attribution-from-commit-message.ts "$1"

apps/server/src/git/generatedTextSanitization.test.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import {
55
sanitizeGeneratedCommitSubject,
66
sanitizeGeneratedPrBody,
77
sanitizeGeneratedPrTitle,
8+
stripProviderAttribution,
89
} from "./generatedTextSanitization.ts";
910

1011
describe("generatedTextSanitization", () => {
@@ -73,4 +74,26 @@ describe("generatedTextSanitization", () => {
7374
].join("\n"),
7475
);
7576
});
77+
78+
it("removes equivalent agent attribution variants and preserves human trailers", () => {
79+
expect(
80+
stripProviderAttribution(
81+
[
82+
"Refine provider session recovery",
83+
"",
84+
"Authored by Codex",
85+
"Written with ChatGPT",
86+
"Signed-off-by: Claude Opus 4.6 <noreply@anthropic.com>",
87+
"Co-Authored-By: GPT-5 Codex <noreply@openai.com>",
88+
"Co-authored-by: Val Alexander <bunsthedev@gmail.com>",
89+
].join("\n"),
90+
),
91+
).toBe(
92+
[
93+
"Refine provider session recovery",
94+
"",
95+
"Co-authored-by: Val Alexander <bunsthedev@gmail.com>",
96+
].join("\n"),
97+
);
98+
});
7699
});
Lines changed: 7 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -1,133 +1,7 @@
1-
const PROVIDER_ATTRIBUTION_MARKERS = [
2-
"claude code",
3-
"anthropic",
4-
"codex",
5-
"openai codex",
6-
"openai",
7-
"github copilot",
8-
"copilot",
9-
"cursor",
10-
"gemini",
11-
"noreply@anthropic.com",
12-
"noreply@openai.com",
13-
"copilot@github.com",
14-
] as const;
15-
16-
const TRAILER_LINE_PATTERN = /^(?:co-authored-by|signed-off-by):/i;
17-
const ATTRIBUTION_LINE_PATTERN =
18-
/^(?:this (?:commit|pull request|pr) was\s+)?(?:generated|created|authored|written)\s+(?:with|by)\s+(.+?)(?:[.!])?$/i;
19-
20-
function normalizeAttributionLine(line: string): string {
21-
return line
22-
.trim()
23-
.replace(/\[([^\]]+)\]\((?:[^)]+)\)/g, "$1")
24-
.replace(/^[-*]\s+/, "")
25-
.replace(/^🤖\s*/, "")
26-
.replace(/\s+/g, " ");
27-
}
28-
29-
function containsProviderAttributionMarker(value: string): boolean {
30-
const lower = value.toLowerCase();
31-
return PROVIDER_ATTRIBUTION_MARKERS.some((marker) => lower.includes(marker));
32-
}
33-
34-
function isLikelyProviderLabel(value: string): boolean {
35-
const normalized = value
36-
.trim()
37-
.replace(/[()[\]{}"'`]/g, "")
38-
.replace(/\s+/g, " ");
39-
if (!containsProviderAttributionMarker(normalized)) {
40-
return false;
41-
}
42-
return normalized.split(" ").filter(Boolean).length <= 4;
43-
}
44-
45-
function isProviderAttributionLine(line: string): boolean {
46-
const normalized = normalizeAttributionLine(line);
47-
if (normalized.length === 0) {
48-
return false;
49-
}
50-
51-
if (TRAILER_LINE_PATTERN.test(normalized) && containsProviderAttributionMarker(normalized)) {
52-
return true;
53-
}
54-
55-
const attributionMatch = normalized.match(ATTRIBUTION_LINE_PATTERN);
56-
if (!attributionMatch) {
57-
return false;
58-
}
59-
60-
const tail = attributionMatch[1] ?? "";
61-
return isLikelyProviderLabel(tail);
62-
}
63-
64-
function trimBlankLines(lines: readonly string[]): string[] {
65-
let start = 0;
66-
let end = lines.length;
67-
68-
while (start < end && lines[start]?.trim().length === 0) {
69-
start += 1;
70-
}
71-
while (end > start && lines[end - 1]?.trim().length === 0) {
72-
end -= 1;
73-
}
74-
75-
const trimmed = lines.slice(start, end);
76-
const compacted: string[] = [];
77-
let previousWasBlank = false;
78-
79-
for (const line of trimmed) {
80-
const normalizedLine = line.trimEnd();
81-
const isBlank = normalizedLine.length === 0;
82-
if (isBlank) {
83-
if (previousWasBlank) {
84-
continue;
85-
}
86-
previousWasBlank = true;
87-
compacted.push("");
88-
continue;
89-
}
90-
91-
previousWasBlank = false;
92-
compacted.push(normalizedLine);
93-
}
94-
95-
return compacted;
96-
}
97-
98-
export function stripProviderAttribution(raw: string): string {
99-
const normalized = raw.replace(/\r\n?/g, "\n");
100-
const keptLines = normalized.split("\n").filter((line) => !isProviderAttributionLine(line));
101-
return trimBlankLines(keptLines).join("\n").trim();
102-
}
103-
104-
export function sanitizeGeneratedCommitSubject(raw: string): string {
105-
const sanitized = stripProviderAttribution(raw);
106-
const singleLine = sanitized.split("\n")[0]?.trim() ?? "";
107-
const withoutTrailingPeriod = singleLine.replace(/[.]+$/g, "").trim();
108-
if (withoutTrailingPeriod.length === 0) {
109-
return "Update project files";
110-
}
111-
112-
if (withoutTrailingPeriod.length <= 72) {
113-
return withoutTrailingPeriod;
114-
}
115-
return withoutTrailingPeriod.slice(0, 72).trimEnd();
116-
}
117-
118-
export function sanitizeGeneratedCommitBody(raw: string): string {
119-
return stripProviderAttribution(raw);
120-
}
121-
122-
export function sanitizeGeneratedPrTitle(raw: string): string {
123-
const sanitized = stripProviderAttribution(raw);
124-
const singleLine = sanitized.split("\n")[0]?.trim() ?? "";
125-
if (singleLine.length > 0) {
126-
return singleLine;
127-
}
128-
return "Update project changes";
129-
}
130-
131-
export function sanitizeGeneratedPrBody(raw: string): string {
132-
return stripProviderAttribution(raw);
133-
}
1+
export {
2+
sanitizeGeneratedCommitBody,
3+
sanitizeGeneratedCommitSubject,
4+
sanitizeGeneratedPrBody,
5+
sanitizeGeneratedPrTitle,
6+
stripProviderAttribution,
7+
} from "@okcode/shared/generatedTextSanitization";

packages/shared/package.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@
7171
"./projectIcons": {
7272
"types": "./src/projectIcons.ts",
7373
"import": "./src/projectIcons.ts"
74+
},
75+
"./generatedTextSanitization": {
76+
"types": "./src/generatedTextSanitization.ts",
77+
"import": "./src/generatedTextSanitization.ts"
7478
}
7579
},
7680
"scripts": {
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
const PROVIDER_ATTRIBUTION_MARKERS = [
2+
"claude code",
3+
"claude opus",
4+
"claude sonnet",
5+
"claude haiku",
6+
"anthropic",
7+
"chatgpt",
8+
"gpt-",
9+
"gpt ",
10+
"codex",
11+
"openai codex",
12+
"openai",
13+
"github copilot",
14+
"copilot",
15+
"cursor",
16+
"gemini",
17+
"noreply@anthropic.com",
18+
"noreply@openai.com",
19+
"copilot@github.com",
20+
] as const;
21+
22+
const TRAILER_LINE_PATTERN = /^(?:co-authored-by|signed-off-by):/i;
23+
const ATTRIBUTION_LINE_PATTERN =
24+
/^(?:this (?:commit|pull request|pr) was\s+)?(?:generated|created|authored|written)\s+(?:with|by)\s+(.+?)(?:[.!])?$/i;
25+
26+
function normalizeAttributionLine(line: string): string {
27+
return line
28+
.trim()
29+
.replace(/\[([^\]]+)\]\((?:[^)]+)\)/g, "$1")
30+
.replace(/^[-*]\s+/, "")
31+
.replace(/^🤖\s*/, "")
32+
.replace(/\s+/g, " ");
33+
}
34+
35+
function containsProviderAttributionMarker(value: string): boolean {
36+
const lower = value.toLowerCase();
37+
return PROVIDER_ATTRIBUTION_MARKERS.some((marker) => lower.includes(marker));
38+
}
39+
40+
function isLikelyProviderLabel(value: string): boolean {
41+
const normalized = value
42+
.trim()
43+
.replace(/[()[\]{}"'`]/g, "")
44+
.replace(/\s+/g, " ");
45+
if (!containsProviderAttributionMarker(normalized)) {
46+
return false;
47+
}
48+
return normalized.split(" ").filter(Boolean).length <= 4;
49+
}
50+
51+
function isProviderAttributionLine(line: string): boolean {
52+
const normalized = normalizeAttributionLine(line);
53+
if (normalized.length === 0) {
54+
return false;
55+
}
56+
57+
if (TRAILER_LINE_PATTERN.test(normalized) && containsProviderAttributionMarker(normalized)) {
58+
return true;
59+
}
60+
61+
const attributionMatch = normalized.match(ATTRIBUTION_LINE_PATTERN);
62+
if (!attributionMatch) {
63+
return false;
64+
}
65+
66+
const tail = attributionMatch[1] ?? "";
67+
return isLikelyProviderLabel(tail);
68+
}
69+
70+
function trimBlankLines(lines: readonly string[]): string[] {
71+
let start = 0;
72+
let end = lines.length;
73+
74+
while (start < end && lines[start]?.trim().length === 0) {
75+
start += 1;
76+
}
77+
while (end > start && lines[end - 1]?.trim().length === 0) {
78+
end -= 1;
79+
}
80+
81+
const trimmed = lines.slice(start, end);
82+
const compacted: string[] = [];
83+
let previousWasBlank = false;
84+
85+
for (const line of trimmed) {
86+
const normalizedLine = line.trimEnd();
87+
const isBlank = normalizedLine.length === 0;
88+
if (isBlank) {
89+
if (previousWasBlank) {
90+
continue;
91+
}
92+
previousWasBlank = true;
93+
compacted.push("");
94+
continue;
95+
}
96+
97+
previousWasBlank = false;
98+
compacted.push(normalizedLine);
99+
}
100+
101+
return compacted;
102+
}
103+
104+
export function stripProviderAttribution(raw: string): string {
105+
const normalized = raw.replace(/\r\n?/g, "\n");
106+
const keptLines = normalized.split("\n").filter((line) => !isProviderAttributionLine(line));
107+
return trimBlankLines(keptLines).join("\n").trim();
108+
}
109+
110+
export function sanitizeGeneratedCommitSubject(raw: string): string {
111+
const sanitized = stripProviderAttribution(raw);
112+
const singleLine = sanitized.split("\n")[0]?.trim() ?? "";
113+
const withoutTrailingPeriod = singleLine.replace(/[.]+$/g, "").trim();
114+
if (withoutTrailingPeriod.length === 0) {
115+
return "Update project files";
116+
}
117+
118+
if (withoutTrailingPeriod.length <= 72) {
119+
return withoutTrailingPeriod;
120+
}
121+
return withoutTrailingPeriod.slice(0, 72).trimEnd();
122+
}
123+
124+
export function sanitizeGeneratedCommitBody(raw: string): string {
125+
return stripProviderAttribution(raw);
126+
}
127+
128+
export function sanitizeGeneratedPrTitle(raw: string): string {
129+
const sanitized = stripProviderAttribution(raw);
130+
const singleLine = sanitized.split("\n")[0]?.trim() ?? "";
131+
if (singleLine.length > 0) {
132+
return singleLine;
133+
}
134+
return "Update project changes";
135+
}
136+
137+
export function sanitizeGeneratedPrBody(raw: string): string {
138+
return stripProviderAttribution(raw);
139+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import { readFile, writeFile } from "node:fs/promises";
2+
3+
import { stripProviderAttribution } from "@okcode/shared/generatedTextSanitization";
4+
5+
async function main() {
6+
const commitMessagePath = process.argv[2];
7+
if (!commitMessagePath) {
8+
console.error(
9+
"Usage: bun run scripts/strip-provider-attribution-from-commit-message.ts <commit-message-file>",
10+
);
11+
process.exit(1);
12+
}
13+
14+
const original = await readFile(commitMessagePath, "utf8");
15+
const sanitized = `${stripProviderAttribution(original)}\n`;
16+
17+
if (sanitized !== original) {
18+
await writeFile(commitMessagePath, sanitized, "utf8");
19+
}
20+
}
21+
22+
await main();

0 commit comments

Comments
 (0)