Skip to content

Commit d608c4b

Browse files
Add tree-sitter structural diff for code files
Extends the compare_file_contents tool with AST-based structural diffing for code files using tree-sitter. Instead of line-based diffs, this shows declaration-level changes (functions, classes, types) which gives models more concise and semantically meaningful context. Supported languages: Go, Python, JavaScript, TypeScript, Ruby, Rust, Java, C/C++. Requires CGO_ENABLED=1 for the tree-sitter C bindings. Windows builds are removed from goreleaser as CGO cross-compilation is not supported without additional toolchain setup. For unsupported languages, falls back to unified line-based diff.
1 parent cec7b00 commit d608c4b

File tree

9 files changed

+882
-39
lines changed

9 files changed

+882
-39
lines changed

.goreleaser.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,11 @@ before:
77

88
builds:
99
- env:
10-
- CGO_ENABLED=0
10+
- CGO_ENABLED=1
1111
ldflags:
1212
- -s -w -X main.version={{.Version}} -X main.commit={{.Commit}} -X main.date={{.Date}}
1313
goos:
1414
- linux
15-
- windows
1615
- darwin
1716
main: ./cmd/github-mcp-server
1817

Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,16 @@ ARG VERSION="dev"
44
# Set the working directory
55
WORKDIR /build
66

7-
# Install git
7+
# Install git and C compiler for CGO (tree-sitter)
88
RUN --mount=type=cache,target=/var/cache/apk \
9-
apk add git
9+
apk add git gcc musl-dev
1010

1111
# Build the server
1212
# go build automatically download required module dependencies to /go/pkg/mod
1313
RUN --mount=type=cache,target=/go/pkg/mod \
1414
--mount=type=cache,target=/root/.cache/go-build \
1515
--mount=type=bind,target=. \
16-
CGO_ENABLED=0 go build -ldflags="-s -w -X main.version=${VERSION} -X main.commit=$(git rev-parse HEAD) -X main.date=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
16+
CGO_ENABLED=1 go build -ldflags="-s -w -X main.version=${VERSION} -X main.commit=$(git rev-parse HEAD) -X main.date=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
1717
-o /bin/github-mcp-server ./cmd/github-mcp-server
1818

1919
# Make a stage to run the app

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ require (
1313
github.com/stretchr/testify v1.11.1
1414
)
1515

16+
require github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82
17+
1618
require (
1719
github.com/aymerick/douceur v0.2.0 // indirect
1820
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect

go.sum

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ github.com/shurcooL/githubv4 v0.0.0-20240727222349-48295856cce7 h1:cYCy18SHPKRkv
7474
github.com/shurcooL/githubv4 v0.0.0-20240727222349-48295856cce7/go.mod h1:zqMwyHmnN/eDOZOdiTohqIUKUrTFX62PNlu7IJdu0q8=
7575
github.com/shurcooL/graphql v0.0.0-20230722043721-ed46e5a46466 h1:17JxqqJY66GmZVHkmAsGEkcIu0oCe3AM420QDgGwZx0=
7676
github.com/shurcooL/graphql v0.0.0-20230722043721-ed46e5a46466/go.mod h1:9dIRpgIY7hVhoqfe0/FcYp0bpInZaT7dc3BYOprrIUE=
77+
github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 h1:6C8qej6f1bStuePVkLSFxoU22XBS165D3klxlzRg8F4=
78+
github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82/go.mod h1:xe4pgH49k4SsmkQq5OT8abwhWmnzkhpgnXeekbx2efw=
7779
github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 h1:+jumHNA0Wrelhe64i8F6HNlS8pkoyMv5sreGx2Ry5Rw=
7880
github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8/go.mod h1:3n1Cwaq1E1/1lhQhtRK2ts/ZwZEhjcQeJQ1RuC6Q/8U=
7981
github.com/spf13/afero v1.15.0 h1:b/YBCLWAJdFWJTN9cLhiXXcD7mzKn9Dm86dNnfyQw1I=

pkg/github/compare_file_contents_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ func Test_CompareFileContents(t *testing.T) {
114114
expectDiff: `host: "localhost" → "production.db"`,
115115
},
116116
{
117-
name: "unsupported format falls back to unified diff",
117+
name: "Go file uses structural diff",
118118
mockedClient: MockHTTPClientWithHandlers(map[string]http.HandlerFunc{
119119
GetReposContentsByOwnerByRepoByPath: mockContentsForRef(map[string]string{
120120
"main": "func main() {}\n",
@@ -128,8 +128,8 @@ func Test_CompareFileContents(t *testing.T) {
128128
"base": "main",
129129
"head": "feature",
130130
},
131-
expectFormat: "unified",
132-
expectDiff: "--- a/main.go",
131+
expectFormat: "structural",
132+
expectDiff: "function_declaration main: modified",
133133
},
134134
{
135135
name: "missing required parameter - owner",

pkg/github/semantic_diff.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ func SemanticDiff(path string, base, head []byte) SemanticDiffResult {
8080
case ".toml":
8181
return semanticDiffTOML(path, base, head)
8282
default:
83+
// Try tree-sitter structural diff for code files
84+
if languageForPath(path) != nil {
85+
return structuralDiff(path, base, head)
86+
}
8387
return SemanticDiffResult{
8488
Format: DiffFormatUnified,
8589
Diff: unifiedDiff(path, base, head),
@@ -542,6 +546,9 @@ func DetectDiffFormat(path string) DiffFormat {
542546
case ".toml":
543547
return DiffFormatTOML
544548
default:
549+
if languageForPath(path) != nil {
550+
return DiffFormatStructural
551+
}
545552
return DiffFormatUnified
546553
}
547554
}

pkg/github/semantic_diff_test.go

Lines changed: 12 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -302,36 +302,17 @@ func TestSemanticDiffTOML(t *testing.T) {
302302
}
303303

304304
func TestSemanticDiffUnifiedFallback(t *testing.T) {
305-
tests := []struct {
306-
name string
307-
path string
308-
base string
309-
head string
310-
expectedDiff string
311-
}{
312-
{
313-
name: "unsupported extension uses unified diff",
314-
path: "main.go",
315-
base: "func main() {\n}\n",
316-
head: "func main() {\n\tfmt.Println(\"hello\")\n}\n",
317-
expectedDiff: "--- a/main.go",
318-
},
319-
{
320-
name: "no extension uses unified diff",
321-
path: "Makefile",
322-
base: "all:\n\techo hello\n",
323-
head: "all:\n\techo world\n",
324-
expectedDiff: "--- a/Makefile",
325-
},
326-
}
305+
t.Run("Go file uses structural diff", func(t *testing.T) {
306+
result := SemanticDiff("main.go", []byte("func main() {\n}\n"), []byte("func main() {\n\tfmt.Println(\"hello\")\n}\n"))
307+
assert.Equal(t, DiffFormatStructural, result.Format)
308+
assert.Contains(t, result.Diff, "function_declaration main: modified")
309+
})
327310

328-
for _, tc := range tests {
329-
t.Run(tc.name, func(t *testing.T) {
330-
result := SemanticDiff(tc.path, []byte(tc.base), []byte(tc.head))
331-
assert.Equal(t, DiffFormatUnified, result.Format)
332-
assert.Contains(t, result.Diff, tc.expectedDiff)
333-
})
334-
}
311+
t.Run("no extension uses unified diff", func(t *testing.T) {
312+
result := SemanticDiff("Makefile", []byte("all:\n\techo hello\n"), []byte("all:\n\techo world\n"))
313+
assert.Equal(t, DiffFormatUnified, result.Format)
314+
assert.Contains(t, result.Diff, "--- a/Makefile")
315+
})
335316
}
336317

337318
func TestSemanticDiffFileSizeLimit(t *testing.T) {
@@ -373,7 +354,7 @@ func TestSemanticDiffNewAndDeletedFiles(t *testing.T) {
373354

374355
t.Run("deleted Go file", func(t *testing.T) {
375356
result := SemanticDiff("main.go", []byte("package main\n"), nil)
376-
assert.Equal(t, DiffFormatUnified, result.Format)
357+
assert.Equal(t, DiffFormatStructural, result.Format)
377358
assert.Equal(t, "file deleted", result.Diff)
378359
})
379360

@@ -394,7 +375,7 @@ func TestDetectDiffFormat(t *testing.T) {
394375
{"config.yml", DiffFormatYAML},
395376
{"data.csv", DiffFormatCSV},
396377
{"config.toml", DiffFormatTOML},
397-
{"main.go", DiffFormatUnified},
378+
{"main.go", DiffFormatStructural},
398379
{"README.md", DiffFormatUnified},
399380
{"Makefile", DiffFormatUnified},
400381
}

0 commit comments

Comments
 (0)