Skip to content

Commit cddd8d5

Browse files
committed
Implemented download and repo traversal based on single zip download
1 parent 2708d8e commit cddd8d5

2 files changed

Lines changed: 109 additions & 139 deletions

File tree

services/scan_service.go

Lines changed: 32 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ import (
2929
"time"
3030

3131
"github.com/CycloneDX/cyclonedx-go"
32-
"github.com/google/go-github/v62/github"
3332
"github.com/google/uuid"
3433
"github.com/l3montree-dev/devguard/database/models"
3534
databasetypes "github.com/l3montree-dev/devguard/database/types"
@@ -67,14 +66,7 @@ type scanService struct {
6766
utils.FireAndForgetSynchronizer
6867
}
6968

70-
var newGitHubClient = func() *github.Client {
71-
return github.NewClient(&http.Client{
72-
Transport: utils.EgressTransport,
73-
Timeout: 10 * time.Minute,
74-
})
75-
}
76-
77-
var downloadRawFileFn = DownloadRawFile
69+
var downloadRawFileFn = DownloadGithubRepoAsZip
7870

7971
var _ shared.ScanService = (*scanService)(nil)
8072

@@ -878,57 +870,51 @@ func (s *scanService) ScanSBOMWithoutSaving(ctx context.Context, bom *cyclonedx.
878870
}, nil
879871
}
880872

881-
func (s *scanService) FetchOpenVexFromGitHub(ctx context.Context, targetURL string) (vexReports []*normalize.VexReportOpenVEX, err error) {
882-
client := newGitHubClient()
873+
func (s *scanService) FetchOpenVexFromGitHub(ctx context.Context, targetURL string, targetBranch string) (vexReports []*normalize.VexReportOpenVEX, err error) {
883874
owner, repo, err := ParseGitHubURL(targetURL)
884875
if err != nil {
885876
return nil, err
886877
}
887878

888879
// Determine default branch
889-
repository, _, err := client.Repositories.Get(ctx, owner, repo)
890-
if err != nil {
891-
return nil, err
892-
}
893-
branch := repository.GetDefaultBranch()
880+
branch := targetBranch
894881
if branch == "" {
895882
branch = "main"
896883
}
897884

898-
tree, _, err := client.Git.GetTree(
899-
ctx,
900-
owner,
901-
repo,
902-
branch,
903-
true, // recursive
904-
)
885+
resp, err := downloadRawFileFn(ctx, owner, repo, branch)
905886
if err != nil {
906-
907887
return nil, err
908888
}
909-
for _, entry := range tree.Entries {
910-
if entry.GetType() != "blob" {
889+
890+
repoZip, err := utils.ZipReaderFromResponse(resp)
891+
if err != nil {
892+
return nil, fmt.Errorf("could not read obtained zip: %w", err)
893+
}
894+
895+
for _, fileEntry := range repoZip.File {
896+
if fileEntry.FileInfo().IsDir() {
911897
continue
912898
}
913-
filePath := entry.GetPath()
914-
filename := strings.ToLower(path.Base(filePath))
899+
filename := strings.ToLower(path.Base(fileEntry.Name))
915900
if !strings.HasSuffix(filename, ".json") {
916901
continue
917902
}
918903

919-
content, err := downloadRawFileFn(
920-
ctx,
921-
owner,
922-
repo,
923-
branch,
924-
filePath,
925-
)
904+
fileRead, err := fileEntry.Open()
905+
if err != nil {
906+
slog.Info("openvex document could not be opened, skipping this file for parsing", "filename", fileEntry.Name, "err", err)
907+
continue
908+
}
909+
data, err := io.ReadAll(fileRead)
910+
fileRead.Close()
926911
if err != nil {
927-
slog.Info("download of openVEX failed", "err", err)
912+
slog.Info("openvex document could not be opened, skipping this file for parsing", "filename", fileEntry.Name, "err", err)
928913
continue
929914
}
915+
930916
var openVEX ov.VEX
931-
err = json.Unmarshal(content, &openVEX)
917+
err = json.Unmarshal(data, &openVEX)
932918
if err != nil {
933919
slog.Info("could not unmarshal openVEX failed", "err", err)
934920
continue
@@ -948,42 +934,38 @@ func ParseGitHubURL(rawURL string) (owner string, repo string, err error) {
948934
return "", "", err
949935
}
950936
const githubDomain = "github.com"
937+
const gitSuffix = ".git"
938+
const trailingSlashSuffix = "/"
951939
if u.Host != githubDomain {
952940
return "", "", fmt.Errorf("invalid github repository url")
953941
}
954-
parts := strings.Split(strings.Trim(u.Path, "/"), "/")
942+
parts := strings.Split(strings.TrimSuffix(strings.Trim(u.Path, trailingSlashSuffix), gitSuffix), "/")
955943
if len(parts) < 2 {
956944
return "", "", fmt.Errorf("invalid github repository url path: expected /{owner}/{repo}, got %q", u.Path)
957945
}
958946
owner = parts[0]
959-
repo = strings.TrimSuffix(parts[1], ".git")
947+
repo = parts[1]
960948
if owner == "" || repo == "" {
961949
return "", "", fmt.Errorf("invalid github repository url path: expected non-empty owner and repo, got %q", u.Path)
962950
}
963951
return owner, repo, nil
964952
}
965953

966-
func DownloadRawFile(ctx context.Context, owner, repo, branch, filePath string) ([]byte, error) {
967-
968-
rawURL := fmt.Sprintf(
969-
"https://raw.githubusercontent.com/%s/%s/%s/%s",
954+
func DownloadGithubRepoAsZip(ctx context.Context, owner, repo, branch string) (*http.Response, error) {
955+
url := fmt.Sprintf(
956+
"https://github.com/%s/%s/archive/refs/heads/%s.zip",
970957
owner,
971958
repo,
972959
branch,
973-
filePath,
974960
)
975-
resp, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil)
961+
resp, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
976962
if err != nil {
977963
return nil, err
978964
}
979965
defer resp.Body.Close()
980966
switch resp.Response.StatusCode {
981967
case http.StatusOK:
982-
file, err := io.ReadAll(resp.Body)
983-
if err != nil {
984-
return nil, fmt.Errorf("401 Unauthorized")
985-
}
986-
return file, nil
968+
return resp.Response, nil
987969
case http.StatusNotFound:
988970
return nil, fmt.Errorf("404 Source not found")
989971
case http.StatusUnauthorized:

services/scan_service_test.go

Lines changed: 77 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,17 @@
1515
package services
1616

1717
import (
18+
"archive/zip"
19+
"bytes"
1820
"context"
1921
"encoding/json"
22+
"io"
2023
"net/http"
2124
"net/http/httptest"
22-
"net/url"
25+
"sort"
2326
"testing"
2427
"time"
2528

26-
"github.com/google/go-github/v62/github"
2729
"github.com/google/uuid"
2830
"github.com/l3montree-dev/devguard/database/models"
2931
"github.com/l3montree-dev/devguard/dtos"
@@ -297,62 +299,67 @@ func TestFetchSbomsFromUpstream_PassesURLNotRef(t *testing.T) {
297299
}
298300

299301
func TestFetchOpenVexFromGitHub(t *testing.T) {
300-
originalNewGitHubClient := newGitHubClient
301302
originalDownloadRawFileFn := downloadRawFileFn
302303
t.Cleanup(func() {
303-
newGitHubClient = originalNewGitHubClient
304304
downloadRawFileFn = originalDownloadRawFileFn
305305
})
306306

307-
t.Run("should fetch openvex reports from json files in the repository", func(t *testing.T) {
308-
mockGitHub := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
309-
switch {
310-
case r.Method == http.MethodGet && r.URL.Path == "/repos/octo-org/openvex-repo":
311-
_, _ = w.Write([]byte(`{"default_branch":"main"}`))
312-
case r.Method == http.MethodGet && r.URL.Path == "/repos/octo-org/openvex-repo/git/trees/main":
313-
if got := r.URL.Query().Get("recursive"); got != "1" {
314-
t.Fatalf("expected recursive=1, got %q", got)
315-
}
316-
_, _ = w.Write([]byte(`{"tree":[{"path":"reports/openvex.json","type":"blob"},{"path":"README.md","type":"blob"}]}`))
317-
default:
318-
t.Fatalf("unexpected github api request: %s %s", r.Method, r.URL.String())
319-
}
320-
}))
321-
defer mockGitHub.Close()
307+
newZipResponse := func(t *testing.T, files map[string]string) *http.Response {
308+
t.Helper()
322309

323-
newGitHubClient = func() *github.Client {
324-
client := github.NewClient(mockGitHub.Client())
325-
baseURL, err := url.Parse(mockGitHub.URL + "/")
310+
var buf bytes.Buffer
311+
zw := zip.NewWriter(&buf)
312+
paths := make([]string, 0, len(files))
313+
for filePath := range files {
314+
paths = append(paths, filePath)
315+
}
316+
sort.Strings(paths)
317+
for _, filePath := range paths {
318+
content := files[filePath]
319+
entry, err := zw.Create(filePath)
326320
if err != nil {
327-
t.Fatalf("failed to parse mock github url: %v", err)
321+
t.Fatalf("failed to create zip entry %s: %v", filePath, err)
322+
}
323+
if _, err := entry.Write([]byte(content)); err != nil {
324+
t.Fatalf("failed to write zip entry %s: %v", filePath, err)
328325
}
329-
client.BaseURL = baseURL
330-
client.UploadURL = baseURL
331-
return client
326+
}
327+
if err := zw.Close(); err != nil {
328+
t.Fatalf("failed to close zip writer: %v", err)
332329
}
333330

331+
return &http.Response{
332+
StatusCode: http.StatusOK,
333+
Status: "200 OK",
334+
Header: make(http.Header),
335+
Body: io.NopCloser(bytes.NewReader(buf.Bytes())),
336+
}
337+
}
338+
339+
t.Run("should fetch openvex reports from json files in the repository", func(t *testing.T) {
334340
calls := 0
335-
downloadRawFileFn = func(ctx context.Context, owner, repo, branch, filePath string) ([]byte, error) {
341+
downloadRawFileFn = func(ctx context.Context, owner, repo, branch string) (*http.Response, error) {
336342
calls++
337343
assert.Equal(t, "octo-org", owner)
338344
assert.Equal(t, "openvex-repo", repo)
339345
assert.Equal(t, "main", branch)
340-
assert.Equal(t, "reports/openvex.json", filePath)
341346

342347
ts := time.Date(2026, time.May, 20, 12, 0, 0, 0, time.UTC)
343-
payload := map[string]any{
344-
"@context": "https://openvex.dev/ns/v0.2.0",
345-
"@id": "openvex-1",
346-
"author": "test-author",
347-
"timestamp": ts,
348-
"version": 1,
349-
"statements": []any{},
350-
}
351-
return json.Marshal(payload)
348+
return newZipResponse(t, map[string]string{
349+
"reports/openvex.json": mustMarshalJSON(t, map[string]any{
350+
"@context": "https://openvex.dev/ns/v0.2.0",
351+
"@id": "openvex-1",
352+
"author": "test-author",
353+
"timestamp": ts,
354+
"version": 1,
355+
"statements": []any{},
356+
}),
357+
"README.md": "# ignore me",
358+
}), nil
352359
}
353360

354361
service := &scanService{}
355-
reports, err := service.FetchOpenVexFromGitHub(context.Background(), "https://github.com/octo-org/openvex-repo")
362+
reports, err := service.FetchOpenVexFromGitHub(context.Background(), "https://github.com/octo-org/openvex-repo", "")
356363
assert.NoError(t, err)
357364
assert.Len(t, reports, 1)
358365
assert.Equal(t, "https://github.com/octo-org/openvex-repo", reports[0].Source)
@@ -363,65 +370,37 @@ func TestFetchOpenVexFromGitHub(t *testing.T) {
363370
})
364371

365372
t.Run("should fetch multiple openvex reports from multiple json files", func(t *testing.T) {
366-
mockGitHub := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
367-
switch {
368-
case r.Method == http.MethodGet && r.URL.Path == "/repos/octo-org/multi-vex-repo":
369-
_, _ = w.Write([]byte(`{"default_branch":"develop"}`))
370-
case r.Method == http.MethodGet && r.URL.Path == "/repos/octo-org/multi-vex-repo/git/trees/develop":
371-
if got := r.URL.Query().Get("recursive"); got != "1" {
372-
t.Fatalf("expected recursive=1, got %q", got)
373-
}
374-
_, _ = w.Write([]byte(`{"tree":[{"path":"vex/vex1.json","type":"blob"},{"path":"vex/vex2.json","type":"blob"},{"path":"README.md","type":"blob"}]}`))
375-
default:
376-
t.Fatalf("unexpected github api request: %s %s", r.Method, r.URL.String())
377-
}
378-
}))
379-
defer mockGitHub.Close()
380-
381-
newGitHubClient = func() *github.Client {
382-
client := github.NewClient(mockGitHub.Client())
383-
baseURL, err := url.Parse(mockGitHub.URL + "/")
384-
if err != nil {
385-
t.Fatalf("failed to parse mock github url: %v", err)
386-
}
387-
client.BaseURL = baseURL
388-
client.UploadURL = baseURL
389-
return client
390-
}
391-
392373
calls := 0
393-
downloadRawFileFn = func(ctx context.Context, owner, repo, branch, filePath string) ([]byte, error) {
374+
downloadRawFileFn = func(ctx context.Context, owner, repo, branch string) (*http.Response, error) {
394375
calls++
395376
assert.Equal(t, "octo-org", owner)
396377
assert.Equal(t, "multi-vex-repo", repo)
397378
assert.Equal(t, "develop", branch)
398379

399380
ts := time.Date(2026, time.May, 20, 12, 0, 0, 0, time.UTC)
400-
var id, author string
401-
switch filePath {
402-
case "vex/vex1.json":
403-
id = "openvex-first"
404-
author = "author-one"
405-
case "vex/vex2.json":
406-
id = "openvex-second"
407-
author = "author-two"
408-
default:
409-
t.Fatalf("unexpected file path: %s", filePath)
410-
}
411-
412-
payload := map[string]any{
413-
"@context": "https://openvex.dev/ns/v0.2.0",
414-
"@id": id,
415-
"author": author,
416-
"timestamp": ts,
417-
"version": 1,
418-
"statements": []any{},
419-
}
420-
return json.Marshal(payload)
381+
return newZipResponse(t, map[string]string{
382+
"vex/vex1.json": mustMarshalJSON(t, map[string]any{
383+
"@context": "https://openvex.dev/ns/v0.2.0",
384+
"@id": "openvex-first",
385+
"author": "author-one",
386+
"timestamp": ts,
387+
"version": 1,
388+
"statements": []any{},
389+
}),
390+
"vex/vex2.json": mustMarshalJSON(t, map[string]any{
391+
"@context": "https://openvex.dev/ns/v0.2.0",
392+
"@id": "openvex-second",
393+
"author": "author-two",
394+
"timestamp": ts,
395+
"version": 1,
396+
"statements": []any{},
397+
}),
398+
"README.md": "# ignore me",
399+
}), nil
421400
}
422401

423402
service := &scanService{}
424-
reports, err := service.FetchOpenVexFromGitHub(context.Background(), "https://github.com/octo-org/multi-vex-repo")
403+
reports, err := service.FetchOpenVexFromGitHub(context.Background(), "https://github.com/octo-org/multi-vex-repo", "develop")
425404
assert.NoError(t, err)
426405
assert.Len(t, reports, 2)
427406
assert.Equal(t, "https://github.com/octo-org/multi-vex-repo", reports[0].Source)
@@ -430,14 +409,23 @@ func TestFetchOpenVexFromGitHub(t *testing.T) {
430409
assert.Equal(t, "openvex-second", reports[1].Report.ID)
431410
assert.Equal(t, "author-one", reports[0].Report.Author)
432411
assert.Equal(t, "author-two", reports[1].Report.Author)
433-
assert.Equal(t, 2, calls)
412+
assert.Equal(t, 1, calls)
434413
})
435414

436415
t.Run("should reject non github urls", func(t *testing.T) {
437416
service := &scanService{}
438-
reports, err := service.FetchOpenVexFromGitHub(context.Background(), "https://example.com/repo")
417+
reports, err := service.FetchOpenVexFromGitHub(context.Background(), "https://example.com/repo", "")
439418
assert.Error(t, err)
440419
assert.Nil(t, reports)
441420
assert.Contains(t, err.Error(), "invalid github repository url")
442421
})
443422
}
423+
424+
func mustMarshalJSON(t *testing.T, value any) string {
425+
t.Helper()
426+
data, err := json.Marshal(value)
427+
if err != nil {
428+
t.Fatalf("failed to marshal json: %v", err)
429+
}
430+
return string(data)
431+
}

0 commit comments

Comments
 (0)