Skip to content

Commit 6ea7127

Browse files
committed
feat: Enhance GitHub and Bitbucket diff handling with fallback for large diffs and improved headers
1 parent 65c6010 commit 6ea7127

9 files changed

Lines changed: 281 additions & 17 deletions

File tree

java-ecosystem/libs/vcs-client/src/main/java/org/rostilos/codecrow/vcsclient/bitbucket/cloud/actions/GetPullRequestDiffAction.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ public String getPullRequestDiff(String workspace, String repoSlug, String prNum
3838

3939
Request req = new Request.Builder()
4040
.url(apiUrl)
41+
.header("Accept", "text/plain") // Required for Bitbucket diff endpoint to avoid 406
4142
.get()
4243
.build();
4344

java-ecosystem/libs/vcs-client/src/main/java/org/rostilos/codecrow/vcsclient/github/actions/GetPullRequestDiffAction.java

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
package org.rostilos.codecrow.vcsclient.github.actions;
22

3+
import com.fasterxml.jackson.databind.JsonNode;
4+
import com.fasterxml.jackson.databind.ObjectMapper;
35
import okhttp3.OkHttpClient;
46
import okhttp3.Request;
57
import okhttp3.Response;
@@ -8,10 +10,14 @@
810
import org.slf4j.LoggerFactory;
911

1012
import java.io.IOException;
13+
import java.util.regex.Matcher;
14+
import java.util.regex.Pattern;
1115

1216
public class GetPullRequestDiffAction {
1317

1418
private static final Logger log = LoggerFactory.getLogger(GetPullRequestDiffAction.class);
19+
private static final ObjectMapper objectMapper = new ObjectMapper();
20+
private static final Pattern LINK_NEXT_PATTERN = Pattern.compile("<([^>]+)>;\\s*rel=\"next\"");
1521
private final OkHttpClient authorizedOkHttpClient;
1622

1723
public GetPullRequestDiffAction(OkHttpClient authorizedOkHttpClient) {
@@ -24,12 +30,16 @@ public String getPullRequestDiff(String owner, String repo, int pullRequestNumbe
2430

2531
Request req = new Request.Builder()
2632
.url(apiUrl)
27-
.header("Accept", "application/vnd.github.diff")
33+
.header("Accept", "application/vnd.github.v3.diff")
2834
.header("X-GitHub-Api-Version", "2022-11-28")
2935
.get()
3036
.build();
3137

3238
try (Response resp = authorizedOkHttpClient.newCall(req).execute()) {
39+
if (resp.code() == 406) {
40+
log.warn("GitHub returned 406 for diff endpoint (likely too large), falling back to /files endpoint for PR #{}", pullRequestNumber);
41+
return getPullRequestDiffFromFiles(owner, repo, pullRequestNumber);
42+
}
3343
if (!resp.isSuccessful()) {
3444
String body = resp.body() != null ? resp.body().string() : "";
3545
String msg = String.format("GitHub returned non-success response %d for URL %s: %s",
@@ -40,4 +50,73 @@ public String getPullRequestDiff(String owner, String repo, int pullRequestNumbe
4050
return resp.body() != null ? resp.body().string() : "";
4151
}
4252
}
53+
54+
/**
55+
* Fallback method when the diff endpoint returns 406 (diff too large).
56+
* Uses the /files endpoint which returns patches for each file with pagination.
57+
*/
58+
private String getPullRequestDiffFromFiles(String owner, String repo, int pullRequestNumber) throws IOException {
59+
StringBuilder combinedDiff = new StringBuilder();
60+
String nextUrl = String.format("%s/repos/%s/%s/pulls/%d/files?per_page=100",
61+
GitHubConfig.API_BASE, owner, repo, pullRequestNumber);
62+
63+
while (nextUrl != null) {
64+
Request req = new Request.Builder()
65+
.url(nextUrl)
66+
.header("Accept", "application/vnd.github.v3+json")
67+
.header("X-GitHub-Api-Version", "2022-11-28")
68+
.get()
69+
.build();
70+
71+
try (Response resp = authorizedOkHttpClient.newCall(req).execute()) {
72+
if (!resp.isSuccessful()) {
73+
String body = resp.body() != null ? resp.body().string() : "";
74+
String msg = String.format("GitHub returned non-success response %d for files endpoint: %s", resp.code(), body);
75+
log.warn(msg);
76+
throw new IOException(msg);
77+
}
78+
79+
String responseBody = resp.body() != null ? resp.body().string() : "[]";
80+
JsonNode files = objectMapper.readTree(responseBody);
81+
82+
for (JsonNode file : files) {
83+
String filename = file.has("filename") ? file.get("filename").asText() : "";
84+
String patch = file.has("patch") ? file.get("patch").asText() : "";
85+
String status = file.has("status") ? file.get("status").asText() : "";
86+
String previousFilename = file.has("previous_filename") ? file.get("previous_filename").asText() : "";
87+
88+
if (!patch.isEmpty()) {
89+
// Build a unified diff header
90+
String fromFile = "renamed".equals(status) && !previousFilename.isEmpty() ? previousFilename : filename;
91+
combinedDiff.append("diff --git a/").append(fromFile).append(" b/").append(filename).append("\n");
92+
93+
if ("added".equals(status)) {
94+
combinedDiff.append("new file mode 100644\n");
95+
} else if ("removed".equals(status)) {
96+
combinedDiff.append("deleted file mode 100644\n");
97+
} else if ("renamed".equals(status)) {
98+
combinedDiff.append("rename from ").append(previousFilename).append("\n");
99+
combinedDiff.append("rename to ").append(filename).append("\n");
100+
}
101+
102+
combinedDiff.append("--- a/").append(fromFile).append("\n");
103+
combinedDiff.append("+++ b/").append(filename).append("\n");
104+
combinedDiff.append(patch).append("\n");
105+
}
106+
}
107+
108+
// Check for next page in Link header
109+
nextUrl = null;
110+
String linkHeader = resp.header("Link");
111+
if (linkHeader != null) {
112+
Matcher matcher = LINK_NEXT_PATTERN.matcher(linkHeader);
113+
if (matcher.find()) {
114+
nextUrl = matcher.group(1);
115+
}
116+
}
117+
}
118+
}
119+
120+
return combinedDiff.toString();
121+
}
43122
}

java-ecosystem/mcp-servers/bitbucket-mcp/src/main/java/org/rostilos/codecrow/mcp/bitbucket/cloud/BitbucketCloudClientImpl.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,7 @@ public String getPullRequestDiff(String workspace, String repoSlug, String pullR
518518
String apiUrl = String.format("https://api.bitbucket.org/2.0/repositories/%s/%s/pullrequests/%s/diff", workspace, repoSlug, pullRequestId);
519519
Request request = new Request.Builder()
520520
.url(apiUrl)
521+
.header("Accept", "text/plain") // Required for Bitbucket diff endpoint to avoid 406
521522
.get()
522523
.build();
523524

java-ecosystem/mcp-servers/bitbucket-mcp/src/main/java/org/rostilos/codecrow/mcp/github/GitHubClientFactory.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,13 @@ public GitHubMcpClientImpl createClient() {
3636
.readTimeout(60, TimeUnit.SECONDS)
3737
.writeTimeout(60, TimeUnit.SECONDS)
3838
.addInterceptor(chain -> {
39-
okhttp3.Request.Builder builder = chain.request().newBuilder()
39+
okhttp3.Request originalRequest = chain.request();
40+
okhttp3.Request.Builder builder = originalRequest.newBuilder()
4041
.header("Authorization", "Bearer " + accessToken)
4142
.header("X-GitHub-Api-Version", "2022-11-28");
42-
if (chain.request().header("Accept") == null) {
43+
// Only set default Accept header if not already specified in the request
44+
// This allows methods like getPullRequestDiff to use their own Accept header
45+
if (originalRequest.header("Accept") == null) {
4346
builder.header("Accept", "application/vnd.github+json");
4447
}
4548
return chain.proceed(builder.build());

java-ecosystem/mcp-servers/bitbucket-mcp/src/main/java/org/rostilos/codecrow/mcp/github/GitHubMcpClientImpl.java

Lines changed: 119 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -308,17 +308,132 @@ public String getPullRequestDiff(String owner, String repoSlug, String pullReque
308308
String url = String.format("%s/repos/%s/%s/pulls/%s", API_BASE, owner, repoSlug, pullRequestId);
309309
Request req = new Request.Builder()
310310
.url(url)
311-
.header("Accept", "application/vnd.github.diff")
311+
.header("Accept", "application/vnd.github.v3.diff")
312312
.get()
313313
.build();
314+
LOGGER.info("getPullRequestDiff request - URL: {}, Accept header: {}", url, req.header("Accept"));
314315
try (Response resp = httpClient.newCall(req).execute()) {
315-
if (!resp.isSuccessful()) {
316-
throw new IOException("Failed to get PR diff: " + resp.code());
316+
if (resp.isSuccessful()) {
317+
return resp.body().string();
317318
}
318-
return resp.body().string();
319+
320+
// If 406 (diff too large - exceeds GitHub's 20,000 line limit), fall back to per-file approach
321+
if (resp.code() == 406) {
322+
String body = resp.body() != null ? resp.body().string() : "";
323+
LOGGER.warn("PR diff too large (406), falling back to files endpoint. Response: {}", body);
324+
return getPullRequestDiffFromFiles(owner, repoSlug, pullRequestId);
325+
}
326+
327+
String body = resp.body() != null ? resp.body().string() : "";
328+
LOGGER.error("getPullRequestDiff failed - code: {}, body: {}", resp.code(), body);
329+
throw new IOException("Failed to get PR diff: " + resp.code() + " - " + body);
319330
}
320331
}
321332

333+
/**
334+
* Fetches diff using the /files endpoint when the full PR diff exceeds GitHub's 20,000 line limit.
335+
* This endpoint returns patches for each file in a single paginated request (up to 3000 files, 100 per page).
336+
*
337+
* @see <a href="https://docs.github.com/en/rest/pulls/pulls#list-pull-requests-files">GitHub API: List PR Files</a>
338+
*/
339+
private String getPullRequestDiffFromFiles(String owner, String repoSlug, String pullRequestId) throws IOException {
340+
StringBuilder combinedDiff = new StringBuilder();
341+
int page = 1;
342+
int perPage = 100; // GitHub max per page
343+
int totalFiles = 0;
344+
boolean hasMorePages = true;
345+
346+
while (hasMorePages) {
347+
String filesUrl = String.format("%s/repos/%s/%s/pulls/%s/files?per_page=%d&page=%d",
348+
API_BASE, owner, repoSlug, pullRequestId, perPage, page);
349+
Request filesReq = new Request.Builder().url(filesUrl).get().build();
350+
351+
try (Response filesResp = httpClient.newCall(filesReq).execute()) {
352+
if (!filesResp.isSuccessful()) {
353+
throw new IOException("Failed to get PR files: " + filesResp.code());
354+
}
355+
356+
JsonNode filesNode = objectMapper.readTree(filesResp.body().string());
357+
358+
// Check if we got any files
359+
if (!filesNode.isArray() || filesNode.isEmpty()) {
360+
hasMorePages = false;
361+
continue;
362+
}
363+
364+
int filesInPage = filesNode.size();
365+
totalFiles += filesInPage;
366+
367+
for (JsonNode file : filesNode) {
368+
// Respect file limit if configured
369+
if (fileLimit > 0 && totalFiles > fileLimit) {
370+
LOGGER.info("Reached file limit of {}, stopping diff collection", fileLimit);
371+
hasMorePages = false;
372+
break;
373+
}
374+
375+
String filename = file.get("filename").asText();
376+
String status = file.get("status").asText();
377+
String patch = file.has("patch") && !file.get("patch").isNull()
378+
? file.get("patch").asText() : null;
379+
380+
// Build unified diff format header
381+
combinedDiff.append("diff --git a/").append(filename).append(" b/").append(filename).append("\n");
382+
383+
// Add status-specific headers
384+
switch (status) {
385+
case "added":
386+
combinedDiff.append("new file mode 100644\n");
387+
combinedDiff.append("--- /dev/null\n");
388+
combinedDiff.append("+++ b/").append(filename).append("\n");
389+
break;
390+
case "removed":
391+
combinedDiff.append("deleted file mode 100644\n");
392+
combinedDiff.append("--- a/").append(filename).append("\n");
393+
combinedDiff.append("+++ /dev/null\n");
394+
break;
395+
case "renamed":
396+
String previousFilename = file.has("previous_filename")
397+
? file.get("previous_filename").asText() : filename;
398+
combinedDiff.append("rename from ").append(previousFilename).append("\n");
399+
combinedDiff.append("rename to ").append(filename).append("\n");
400+
combinedDiff.append("--- a/").append(previousFilename).append("\n");
401+
combinedDiff.append("+++ b/").append(filename).append("\n");
402+
break;
403+
default: // modified, copied, etc.
404+
combinedDiff.append("--- a/").append(filename).append("\n");
405+
combinedDiff.append("+++ b/").append(filename).append("\n");
406+
break;
407+
}
408+
409+
// Add the patch content
410+
if (patch != null && !patch.isEmpty()) {
411+
combinedDiff.append(patch).append("\n");
412+
} else {
413+
// Binary or too large file - GitHub doesn't provide patch
414+
combinedDiff.append("@@ -0,0 +0,0 @@\n");
415+
combinedDiff.append("\\ No patch available (binary or truncated file)\n");
416+
}
417+
418+
combinedDiff.append("\n");
419+
}
420+
421+
// Check if there are more pages (less than perPage means last page)
422+
hasMorePages = filesInPage == perPage;
423+
page++;
424+
425+
// Safety limit to prevent infinite loops
426+
if (page > 50) {
427+
LOGGER.warn("Reached maximum page limit (50), stopping pagination");
428+
hasMorePages = false;
429+
}
430+
}
431+
}
432+
433+
LOGGER.info("Constructed diff from {} files across {} pages", totalFiles, page - 1);
434+
return combinedDiff.toString();
435+
}
436+
322437
@Override
323438
public Object getPullRequestCommits(String owner, String repoSlug, String pullRequestId) throws IOException {
324439
String url = String.format("%s/repos/%s/%s/pulls/%s/commits", API_BASE, owner, repoSlug, pullRequestId);

java-ecosystem/services/pipeline-agent/src/main/java/org/rostilos/codecrow/pipelineagent/generic/handler/processor/SummarizeCommandProcessor.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,7 @@ private String formatSummaryForPosting(SummaryResult result, PrSummarizeCache.Di
489489
}
490490

491491
sb.append("\n\n---\n");
492-
sb.append("_Generated by CodeCrow_ 🦅");
492+
sb.append("_Generated by CodeCrow_ \uD83D\uDC26\u200D");
493493

494494
String content = sb.toString();
495495
if (content.length() > MAX_SUMMARY_LENGTH) {

python-ecosystem/.gitignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
### IntelliJ IDEA ###
2+
.idea
3+
*.iws
4+
*.iml
5+
*.ipr
6+
.github
7+
docker-compose.yml
8+
mcp-client/logs/**

0 commit comments

Comments
 (0)