Skip to content

Commit f548b19

Browse files
committed
feat(github): add collaborator permissions extraction
Extract repository collaborators with has_access_to edges that include the permission level (admin, maintain, push, triage, pull). This surfaces who has access to what repositories in the metadata graph.
1 parent 1e46d79 commit f548b19

3 files changed

Lines changed: 194 additions & 14 deletions

File tree

plugins/extractors/github/README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ source:
2929
| :-- | :--- | :------- | :---------- |
3030
| `org` | `string` | Yes | Name of the GitHub organisation. |
3131
| `token` | `string` | Yes | GitHub API access token. |
32-
| `extract` | `[]string` | No | Entity types to extract. Defaults to all: `users`, `repositories`, `teams`, `documents`. |
32+
| `extract` | `[]string` | No | Entity types to extract. Defaults to all: `users`, `repositories`, `teams`, `documents`, `collaborators`. |
3333
| `docs.repos` | `[]string` | No | Repositories to scan for documents. Defaults to all org repos. |
3434
| `docs.paths` | `[]string` | No | Directory paths to scan within each repo. Defaults to `["docs"]`. |
3535
| `docs.pattern` | `string` | No | Glob pattern to match files. Defaults to `"*.md"`. |
@@ -100,6 +100,11 @@ The extractor emits four entity types and their relationships as edges.
100100
| `owned_by` | `repository` | `user` | Repository is owned by a user |
101101
| `member_of` | `user` | `team` | User is a member of a team |
102102
| `belongs_to` | `document` | `repository` | Document belongs to a repository |
103+
| `has_access_to` | `user` | `repository` | User has access to a repository (properties: `permission`) |
104+
105+
### Collaborator Permissions
106+
107+
When `collaborators` is included in `extract`, the extractor lists collaborators for each repository and emits `has_access_to` edges with a `permission` property indicating the highest access level: `admin`, `maintain`, `push`, `triage`, or `pull`.
103108

104109
## Contributing
105110

plugins/extractors/github/github.go

Lines changed: 89 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"github.com/raystack/meteor/registry"
1515
log "github.com/raystack/salt/observability/logger"
1616
"golang.org/x/oauth2"
17+
"google.golang.org/protobuf/types/known/structpb"
1718
)
1819

1920
//go:embed README.md
@@ -36,12 +37,13 @@ var sampleConfig = `
3637
org: raystack
3738
token: github_token
3839
# extract specifies which entity types to extract.
39-
# Defaults to all: ["users", "repositories", "teams"]
40+
# Defaults to all: ["users", "repositories", "teams", "documents", "collaborators"]
4041
extract:
4142
- users
4243
- repositories
4344
- teams
4445
- documents
46+
- collaborators
4547
# docs configures document extraction (only used when "documents" is in extract).
4648
docs:
4749
# repos limits which repositories to scan. If empty, scans all org repos.
@@ -53,7 +55,7 @@ docs:
5355
pattern: "*.md"`
5456

5557
var info = plugins.Info{
56-
Description: "Extract metadata from a GitHub organisation including users, repositories, teams, and documents.",
58+
Description: "Extract metadata from a GitHub organisation including users, repositories, teams, documents, and collaborator permissions.",
5759
SampleConfig: sampleConfig,
5860
Summary: summary,
5961
Tags: []string{"platform", "extractor"},
@@ -83,10 +85,11 @@ func (e *Extractor) Init(ctx context.Context, config plugins.Config) error {
8385
e.client = gh.NewClient(tc)
8486

8587
e.extract = map[string]bool{
86-
"users": true,
87-
"repositories": true,
88-
"teams": true,
89-
"documents": true,
88+
"users": true,
89+
"repositories": true,
90+
"teams": true,
91+
"documents": true,
92+
"collaborators": true,
9093
}
9194
if len(e.config.Extract) > 0 {
9295
e.extract = make(map[string]bool, len(e.config.Extract))
@@ -124,6 +127,11 @@ func (e *Extractor) Extract(ctx context.Context, emit plugins.Emit) error {
124127
return fmt.Errorf("extract documents: %w", err)
125128
}
126129
}
130+
if e.extract["collaborators"] {
131+
if err := e.extractCollaborators(ctx, emit); err != nil {
132+
return fmt.Errorf("extract collaborators: %w", err)
133+
}
134+
}
127135
return nil
128136
}
129137

@@ -428,6 +436,81 @@ func (e *Extractor) emitDocument(ctx context.Context, emit plugins.Emit, repo *g
428436
return nil
429437
}
430438

439+
func (e *Extractor) extractCollaborators(ctx context.Context, emit plugins.Emit) error {
440+
repoOpts := &gh.RepositoryListByOrgOptions{
441+
ListOptions: gh.ListOptions{PerPage: 100},
442+
}
443+
for {
444+
repos, resp, err := e.client.Repositories.ListByOrg(ctx, e.config.Org, repoOpts)
445+
if err != nil {
446+
return fmt.Errorf("list repositories: %w", err)
447+
}
448+
449+
for _, repo := range repos {
450+
if err := e.extractRepoCollaborators(ctx, emit, repo); err != nil {
451+
e.logger.Warn("failed to extract collaborators, skipping",
452+
"repo", repo.GetFullName(), "error", err)
453+
}
454+
}
455+
456+
if resp.NextPage == 0 {
457+
break
458+
}
459+
repoOpts.Page = resp.NextPage
460+
}
461+
return nil
462+
}
463+
464+
func (e *Extractor) extractRepoCollaborators(ctx context.Context, emit plugins.Emit, repo *gh.Repository) error {
465+
repoURN := models.NewURN("github", e.UrnScope, "repository", repo.GetNodeID())
466+
opts := &gh.ListCollaboratorsOptions{
467+
ListOptions: gh.ListOptions{PerPage: 100},
468+
}
469+
470+
var edges []*meteorv1beta1.Edge
471+
for {
472+
collaborators, resp, err := e.client.Repositories.ListCollaborators(ctx, e.config.Org, repo.GetName(), opts)
473+
if err != nil {
474+
return fmt.Errorf("list collaborators for %s: %w", repo.GetName(), err)
475+
}
476+
477+
for _, collab := range collaborators {
478+
userURN := models.NewURN("github", e.UrnScope, "user", collab.GetNodeID())
479+
props, _ := structpb.NewStruct(map[string]any{
480+
"permission": resolvePermission(collab.GetPermissions()),
481+
})
482+
edges = append(edges, &meteorv1beta1.Edge{
483+
SourceUrn: userURN,
484+
TargetUrn: repoURN,
485+
Type: "has_access_to",
486+
Source: "github",
487+
Properties: props,
488+
})
489+
}
490+
491+
if resp.NextPage == 0 {
492+
break
493+
}
494+
opts.Page = resp.NextPage
495+
}
496+
497+
if len(edges) > 0 {
498+
entity := models.NewEntity(repoURN, "repository", repo.GetName(), "github", nil)
499+
emit(models.NewRecord(entity, edges...))
500+
}
501+
return nil
502+
}
503+
504+
// resolvePermission returns the highest permission level from the permissions map.
505+
func resolvePermission(perms map[string]bool) string {
506+
for _, level := range []string{"admin", "maintain", "push", "triage", "pull"} {
507+
if perms[level] {
508+
return level
509+
}
510+
}
511+
return "pull"
512+
}
513+
431514
func init() {
432515
if err := registry.Extractors.Register("github", func() plugins.Extractor {
433516
return New(plugins.GetLog())

plugins/extractors/github/github_test.go

Lines changed: 99 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313

1414
gh "github.com/google/go-github/v68/github"
1515
"github.com/raystack/meteor/models"
16+
meteorv1beta1 "github.com/raystack/meteor/models/raystack/meteor/v1beta1"
1617
"github.com/raystack/meteor/plugins"
1718
extractor "github.com/raystack/meteor/plugins/extractors/github"
1819
"github.com/raystack/meteor/test/mocks"
@@ -487,6 +488,76 @@ func TestExtract(t *testing.T) {
487488
assert.Contains(t, err.Error(), "extract users")
488489
})
489490

491+
t.Run("should extract collaborators with has_access_to edges", func(t *testing.T) {
492+
server := setupServer(t, serverConfig{
493+
repos: []*gh.Repository{
494+
{
495+
NodeID: strPtr("R_repo1"),
496+
Name: strPtr("meteor"),
497+
FullName: strPtr("my-org/meteor"),
498+
},
499+
},
500+
repoCollaborators: map[string][]*gh.User{
501+
"meteor": {
502+
{NodeID: strPtr("U_alice"), Login: strPtr("alice"), Permissions: map[string]bool{"admin": true, "push": true, "pull": true}},
503+
{NodeID: strPtr("U_bob"), Login: strPtr("bob"), Permissions: map[string]bool{"push": true, "pull": true}},
504+
},
505+
},
506+
})
507+
defer server.Close()
508+
509+
extr := initExtractor(t, server.URL, map[string]any{
510+
"extract": []string{"collaborators"},
511+
})
512+
513+
emitter := mocks.NewEmitter()
514+
err := extr.Extract(context.Background(), emitter.Push)
515+
require.NoError(t, err)
516+
517+
records := emitter.Get()
518+
require.Len(t, records, 1)
519+
520+
entity := records[0].Entity()
521+
assert.Equal(t, models.NewURN("github", urnScope, "repository", "R_repo1"), entity.GetUrn())
522+
assert.Equal(t, "repository", entity.GetType())
523+
524+
edges := records[0].Edges()
525+
require.Len(t, edges, 2)
526+
527+
for _, edge := range edges {
528+
assert.Equal(t, "has_access_to", edge.GetType())
529+
assert.Equal(t, models.NewURN("github", urnScope, "repository", "R_repo1"), edge.GetTargetUrn())
530+
}
531+
532+
// Check permission levels via edge properties.
533+
aliceEdge := findEdgeBySource(edges, models.NewURN("github", urnScope, "user", "U_alice"))
534+
require.NotNil(t, aliceEdge)
535+
assert.Equal(t, "admin", aliceEdge.GetProperties().AsMap()["permission"])
536+
537+
bobEdge := findEdgeBySource(edges, models.NewURN("github", urnScope, "user", "U_bob"))
538+
require.NotNil(t, bobEdge)
539+
assert.Equal(t, "push", bobEdge.GetProperties().AsMap()["permission"])
540+
})
541+
542+
t.Run("should skip collaborators for repos that fail", func(t *testing.T) {
543+
server := setupServer(t, serverConfig{
544+
repos: []*gh.Repository{
545+
{NodeID: strPtr("R_repo1"), Name: strPtr("meteor"), FullName: strPtr("my-org/meteor")},
546+
},
547+
// No repoCollaborators entry → will 404
548+
})
549+
defer server.Close()
550+
551+
extr := initExtractor(t, server.URL, map[string]any{
552+
"extract": []string{"collaborators"},
553+
})
554+
555+
emitter := mocks.NewEmitter()
556+
err := extr.Extract(context.Background(), emitter.Push)
557+
require.NoError(t, err)
558+
assert.Empty(t, emitter.Get())
559+
})
560+
490561
t.Run("should extract repos without owner edge when owner is nil", func(t *testing.T) {
491562
server := setupServer(t, serverConfig{
492563
repos: []*gh.Repository{
@@ -537,12 +608,13 @@ func initExtractor(t *testing.T, serverURL string, extraConfig map[string]any) *
537608
}
538609

539610
type serverConfig struct {
540-
members []*gh.User
541-
userDetails map[string]*gh.User
542-
repos []*gh.Repository
543-
teams []*gh.Team
544-
teamMembers map[string][]*gh.User
545-
repoContents map[string]map[string]any // key: "repo/path" -> {"type":"dir","entries":[]} or {"type":"file","file":*RepositoryContent}
611+
members []*gh.User
612+
userDetails map[string]*gh.User
613+
repos []*gh.Repository
614+
teams []*gh.Team
615+
teamMembers map[string][]*gh.User
616+
repoContents map[string]map[string]any // key: "repo/path" -> {"type":"dir","entries":[]} or {"type":"file","file":*RepositoryContent}
617+
repoCollaborators map[string][]*gh.User // key: repo name -> collaborators
546618
}
547619

548620
func setupServer(t *testing.T, cfg serverConfig) *httptest.Server {
@@ -580,7 +652,7 @@ func setupServer(t *testing.T, cfg serverConfig) *httptest.Server {
580652
writeJSON(w, []*gh.User{})
581653
}
582654
})
583-
// Individual repo endpoint for docs.repos config.
655+
// Individual repo endpoint for docs.repos config, contents, and collaborators.
584656
mux.HandleFunc("/api/v3/repos/my-org/", func(w http.ResponseWriter, r *http.Request) {
585657
urlPath := r.URL.Path
586658
const prefix = "/api/v3/repos/my-org/"
@@ -603,6 +675,17 @@ func setupServer(t *testing.T, cfg serverConfig) *httptest.Server {
603675
return
604676
}
605677

678+
// Check if this is a collaborators request: "{repo}/collaborators"
679+
if strings.HasSuffix(rest, "/collaborators") {
680+
repoName := rest[:len(rest)-len("/collaborators")]
681+
if collabs, ok := cfg.repoCollaborators[repoName]; ok {
682+
writeJSON(w, collabs)
683+
} else {
684+
w.WriteHeader(http.StatusNotFound)
685+
}
686+
return
687+
}
688+
606689
// Otherwise it's a repo get: "{repo}"
607690
repoName := rest
608691
for _, repo := range cfg.repos {
@@ -668,3 +751,12 @@ func intPtr(i int) *int { return &i }
668751

669752
func indexOf(s, substr string) int { return strings.Index(s, substr) }
670753

754+
func findEdgeBySource(edges []*meteorv1beta1.Edge, sourceURN string) *meteorv1beta1.Edge {
755+
for _, e := range edges {
756+
if e.GetSourceUrn() == sourceURN {
757+
return e
758+
}
759+
}
760+
return nil
761+
}
762+

0 commit comments

Comments
 (0)