Skip to content

Commit 1bee696

Browse files
authored
Strip provider attribution from generated git text (#354)
- centralize sanitization for generated commit and PR content - remove AI/provider signatures, trailers, and footers before writing output - add tests for attribution stripping and fallback subjects
1 parent 6cf2a72 commit 1bee696

4 files changed

Lines changed: 234 additions & 33 deletions

File tree

apps/server/src/git/Layers/CodexTextGeneration.ts

Lines changed: 12 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@ import { sanitizeBranchFragment, sanitizeFeatureBranchName } from "@okcode/share
1414
import { resolveAttachmentPath } from "../../attachmentStore.ts";
1515
import { ServerConfig } from "../../config.ts";
1616
import { getRuntimeEnv } from "../../runtimeEnvironment.ts";
17+
import {
18+
sanitizeGeneratedCommitBody,
19+
sanitizeGeneratedCommitSubject,
20+
sanitizeGeneratedPrBody,
21+
sanitizeGeneratedPrTitle,
22+
} from "../generatedTextSanitization.ts";
1723
import { TextGenerationError } from "../Errors.ts";
1824
import {
1925
type BranchNameGenerationInput,
@@ -80,27 +86,6 @@ function limitSection(value: string, maxChars: number): string {
8086
return `${truncated}\n\n[truncated]`;
8187
}
8288

83-
function sanitizeCommitSubject(raw: string): string {
84-
const singleLine = raw.trim().split(/\r?\n/g)[0]?.trim() ?? "";
85-
const withoutTrailingPeriod = singleLine.replace(/[.]+$/g, "").trim();
86-
if (withoutTrailingPeriod.length === 0) {
87-
return "Update project files";
88-
}
89-
90-
if (withoutTrailingPeriod.length <= 72) {
91-
return withoutTrailingPeriod;
92-
}
93-
return withoutTrailingPeriod.slice(0, 72).trimEnd();
94-
}
95-
96-
function sanitizePrTitle(raw: string): string {
97-
const singleLine = raw.trim().split(/\r?\n/g)[0]?.trim() ?? "";
98-
if (singleLine.length > 0) {
99-
return singleLine;
100-
}
101-
return "Update project changes";
102-
}
103-
10489
const makeCodexTextGeneration = Effect.gen(function* () {
10590
const fileSystem = yield* FileSystem.FileSystem;
10691
const path = yield* Path.Path;
@@ -341,6 +326,7 @@ const makeCodexTextGeneration = Effect.gen(function* () {
341326
? ["- branch must be a short semantic git branch fragment for this change"]
342327
: []),
343328
"- capture the primary user-visible or developer-visible change",
329+
"- do not include AI/provider attribution, signatures, trailers, or generated-with footers",
344330
"",
345331
`Branch: ${input.branch ?? "(detached)"}`,
346332
"",
@@ -372,8 +358,8 @@ const makeCodexTextGeneration = Effect.gen(function* () {
372358
Effect.map(
373359
(generated) =>
374360
({
375-
subject: sanitizeCommitSubject(generated.subject),
376-
body: generated.body.trim(),
361+
subject: sanitizeGeneratedCommitSubject(generated.subject),
362+
body: sanitizeGeneratedCommitBody(generated.body),
377363
...("branch" in generated && typeof generated.branch === "string"
378364
? { branch: sanitizeFeatureBranchName(generated.branch) }
379365
: {}),
@@ -391,6 +377,7 @@ const makeCodexTextGeneration = Effect.gen(function* () {
391377
"- body must be markdown and include headings '## Summary' and '## Testing'",
392378
"- under Summary, provide short bullet points",
393379
"- under Testing, include bullet points with concrete checks or 'Not run' where appropriate",
380+
"- do not include AI/provider attribution, co-author trailers, or generated-with footers",
394381
"",
395382
`Base branch: ${input.baseBranch}`,
396383
`Head branch: ${input.headBranch}`,
@@ -418,8 +405,8 @@ const makeCodexTextGeneration = Effect.gen(function* () {
418405
Effect.map(
419406
(generated) =>
420407
({
421-
title: sanitizePrTitle(generated.title),
422-
body: generated.body.trim(),
408+
title: sanitizeGeneratedPrTitle(generated.title),
409+
body: sanitizeGeneratedPrBody(generated.body),
423410
}) satisfies PrContentGenerationResult,
424411
),
425412
);

apps/server/src/git/Layers/GitManager.ts

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@ import {
1919
import { GitCore } from "../Services/GitCore.ts";
2020
import { GitHubCli } from "../Services/GitHubCli.ts";
2121
import { TextGeneration } from "../Services/TextGeneration.ts";
22+
import {
23+
sanitizeGeneratedCommitBody,
24+
sanitizeGeneratedCommitSubject,
25+
sanitizeGeneratedPrBody,
26+
sanitizeGeneratedPrTitle,
27+
} from "../generatedTextSanitization.ts";
2228
import { buildGitActionFailure } from "../actionFailure.ts";
2329

2430
const COMMIT_TIMEOUT_MS = 10 * 60_000;
@@ -202,12 +208,9 @@ function sanitizeCommitMessage(generated: {
202208
body: string;
203209
branch?: string | undefined;
204210
} {
205-
const rawSubject = generated.subject.trim().split(/\r?\n/g)[0]?.trim() ?? "";
206-
const subject = rawSubject.replace(/[.]+$/g, "").trim();
207-
const safeSubject = subject.length > 0 ? subject.slice(0, 72).trimEnd() : "Update project files";
208211
return {
209-
subject: safeSubject,
210-
body: generated.body.trim(),
212+
subject: sanitizeGeneratedCommitSubject(generated.subject),
213+
body: sanitizeGeneratedCommitBody(generated.body),
211214
...(generated.branch !== undefined ? { branch: generated.branch } : {}),
212215
};
213216
}
@@ -1044,10 +1047,12 @@ export const makeGitManager = Effect.gen(function* () {
10441047
diffPatch: limitContext(rangeContext.diffPatch, 60_000),
10451048
...(model ? { model } : {}),
10461049
});
1050+
const sanitizedPrTitle = sanitizeGeneratedPrTitle(generated.title);
1051+
const sanitizedPrBody = sanitizeGeneratedPrBody(generated.body);
10471052

10481053
const bodyFile = path.join(tempDir, `okcode-pr-body-${process.pid}-${randomUUID()}.md`);
10491054
yield* fileSystem
1050-
.writeFileString(bodyFile, generated.body)
1055+
.writeFileString(bodyFile, sanitizedPrBody)
10511056
.pipe(
10521057
Effect.mapError((cause) =>
10531058
gitManagerError("runPrStep", "Failed to write pull request body temp file.", cause),
@@ -1058,7 +1063,7 @@ export const makeGitManager = Effect.gen(function* () {
10581063
cwd,
10591064
baseBranch,
10601065
headSelector: headContext.preferredHeadSelector,
1061-
title: generated.title,
1066+
title: sanitizedPrTitle,
10621067
bodyFile,
10631068
})
10641069
.pipe(Effect.ensuring(fileSystem.remove(bodyFile).pipe(Effect.catch(() => Effect.void))));
@@ -1069,7 +1074,7 @@ export const makeGitManager = Effect.gen(function* () {
10691074
status: "created" as const,
10701075
baseBranch,
10711076
headBranch: headContext.headBranch,
1072-
title: generated.title,
1077+
title: sanitizedPrTitle,
10731078
};
10741079
}
10751080

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import { describe, expect, it } from "vitest";
2+
3+
import {
4+
sanitizeGeneratedCommitBody,
5+
sanitizeGeneratedCommitSubject,
6+
sanitizeGeneratedPrBody,
7+
sanitizeGeneratedPrTitle,
8+
} from "./generatedTextSanitization.ts";
9+
10+
describe("generatedTextSanitization", () => {
11+
it("removes provider attribution trailers from generated commit bodies", () => {
12+
expect(
13+
sanitizeGeneratedCommitBody(
14+
[
15+
"- Add server-side fallback",
16+
"",
17+
"Generated with [Claude Code](https://claude.ai/code)",
18+
"Co-authored-by: Codex <noreply@openai.com>",
19+
].join("\n"),
20+
),
21+
).toBe("- Add server-side fallback");
22+
});
23+
24+
it("falls back when the generated commit subject is only provider attribution", () => {
25+
expect(sanitizeGeneratedCommitSubject("Generated with Claude Code")).toBe(
26+
"Update project files",
27+
);
28+
});
29+
30+
it("removes provider attribution notes from generated PR bodies", () => {
31+
expect(
32+
sanitizeGeneratedPrBody(
33+
[
34+
"## Summary",
35+
"- Tighten generated git text sanitizing",
36+
"",
37+
"## Testing",
38+
"- Not run",
39+
"",
40+
"Generated by OpenAI Codex",
41+
"Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>",
42+
].join("\n"),
43+
),
44+
).toBe(
45+
["## Summary", "- Tighten generated git text sanitizing", "", "## Testing", "- Not run"].join(
46+
"\n",
47+
),
48+
);
49+
});
50+
51+
it("falls back when the generated PR title is only provider attribution", () => {
52+
expect(sanitizeGeneratedPrTitle("Generated by OpenAI Codex")).toBe("Update project changes");
53+
});
54+
55+
it("does not strip normal summary lines that mention providers in prose", () => {
56+
expect(
57+
sanitizeGeneratedPrBody(
58+
[
59+
"## Summary",
60+
"- Document OpenAI provider failover behavior",
61+
"",
62+
"## Testing",
63+
"- Not run",
64+
].join("\n"),
65+
),
66+
).toBe(
67+
[
68+
"## Summary",
69+
"- Document OpenAI provider failover behavior",
70+
"",
71+
"## Testing",
72+
"- Not run",
73+
].join("\n"),
74+
);
75+
});
76+
});
Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
const PROVIDER_ATTRIBUTION_MARKERS = [
2+
"claude code",
3+
"anthropic",
4+
"codex",
5+
"openai codex",
6+
"openai",
7+
"github copilot",
8+
"copilot",
9+
"cursor",
10+
"gemini",
11+
"noreply@anthropic.com",
12+
"noreply@openai.com",
13+
"copilot@github.com",
14+
] as const;
15+
16+
const TRAILER_LINE_PATTERN = /^(?:co-authored-by|signed-off-by):/i;
17+
const ATTRIBUTION_LINE_PATTERN =
18+
/^(?:this (?:commit|pull request|pr) was\s+)?(?:generated|created|authored|written)\s+(?:with|by)\s+(.+?)(?:[.!])?$/i;
19+
20+
function normalizeAttributionLine(line: string): string {
21+
return line
22+
.trim()
23+
.replace(/\[([^\]]+)\]\((?:[^)]+)\)/g, "$1")
24+
.replace(/^[-*]\s+/, "")
25+
.replace(/^🤖\s*/, "")
26+
.replace(/\s+/g, " ");
27+
}
28+
29+
function containsProviderAttributionMarker(value: string): boolean {
30+
const lower = value.toLowerCase();
31+
return PROVIDER_ATTRIBUTION_MARKERS.some((marker) => lower.includes(marker));
32+
}
33+
34+
function isLikelyProviderLabel(value: string): boolean {
35+
const normalized = value
36+
.trim()
37+
.replace(/[()[\]{}"'`]/g, "")
38+
.replace(/\s+/g, " ");
39+
if (!containsProviderAttributionMarker(normalized)) {
40+
return false;
41+
}
42+
return normalized.split(" ").filter(Boolean).length <= 4;
43+
}
44+
45+
function isProviderAttributionLine(line: string): boolean {
46+
const normalized = normalizeAttributionLine(line);
47+
if (normalized.length === 0) {
48+
return false;
49+
}
50+
51+
if (TRAILER_LINE_PATTERN.test(normalized) && containsProviderAttributionMarker(normalized)) {
52+
return true;
53+
}
54+
55+
const attributionMatch = normalized.match(ATTRIBUTION_LINE_PATTERN);
56+
if (!attributionMatch) {
57+
return false;
58+
}
59+
60+
const tail = attributionMatch[1] ?? "";
61+
return isLikelyProviderLabel(tail);
62+
}
63+
64+
function trimBlankLines(lines: readonly string[]): string[] {
65+
let start = 0;
66+
let end = lines.length;
67+
68+
while (start < end && lines[start]?.trim().length === 0) {
69+
start += 1;
70+
}
71+
while (end > start && lines[end - 1]?.trim().length === 0) {
72+
end -= 1;
73+
}
74+
75+
const trimmed = lines.slice(start, end);
76+
const compacted: string[] = [];
77+
let previousWasBlank = false;
78+
79+
for (const line of trimmed) {
80+
const normalizedLine = line.trimEnd();
81+
const isBlank = normalizedLine.length === 0;
82+
if (isBlank) {
83+
if (previousWasBlank) {
84+
continue;
85+
}
86+
previousWasBlank = true;
87+
compacted.push("");
88+
continue;
89+
}
90+
91+
previousWasBlank = false;
92+
compacted.push(normalizedLine);
93+
}
94+
95+
return compacted;
96+
}
97+
98+
export function stripProviderAttribution(raw: string): string {
99+
const normalized = raw.replace(/\r\n?/g, "\n");
100+
const keptLines = normalized.split("\n").filter((line) => !isProviderAttributionLine(line));
101+
return trimBlankLines(keptLines).join("\n").trim();
102+
}
103+
104+
export function sanitizeGeneratedCommitSubject(raw: string): string {
105+
const sanitized = stripProviderAttribution(raw);
106+
const singleLine = sanitized.split("\n")[0]?.trim() ?? "";
107+
const withoutTrailingPeriod = singleLine.replace(/[.]+$/g, "").trim();
108+
if (withoutTrailingPeriod.length === 0) {
109+
return "Update project files";
110+
}
111+
112+
if (withoutTrailingPeriod.length <= 72) {
113+
return withoutTrailingPeriod;
114+
}
115+
return withoutTrailingPeriod.slice(0, 72).trimEnd();
116+
}
117+
118+
export function sanitizeGeneratedCommitBody(raw: string): string {
119+
return stripProviderAttribution(raw);
120+
}
121+
122+
export function sanitizeGeneratedPrTitle(raw: string): string {
123+
const sanitized = stripProviderAttribution(raw);
124+
const singleLine = sanitized.split("\n")[0]?.trim() ?? "";
125+
if (singleLine.length > 0) {
126+
return singleLine;
127+
}
128+
return "Update project changes";
129+
}
130+
131+
export function sanitizeGeneratedPrBody(raw: string): string {
132+
return stripProviderAttribution(raw);
133+
}

0 commit comments

Comments
 (0)