Skip to content

Commit 6a37b18

Browse files
committed
Prevent stale issue cleanup from crossing repository boundaries
The previous missing-issue cleanup only keyed deletes by connection and issue identifiers, which could reach too broadly when an issue was transferred between repositories under the same DevLake connection. This change scopes cleanup to the source repository by carrying RepoId through the refresh path and requiring the source RawDataOrigin parameters before deleting stale tool rows. Constraint: Keep the fix inside the existing github_graphql collector path without schema changes Rejected: Continue deleting by connection_id + github_id only | could remove destination-repository rows after transfers Rejected: Disable stale cleanup for all missing issues | would leave deleted-issue failures recurring Confidence: medium Scope-risk: moderate Directive: Preserve source-scope guards if future cleanup paths touch transferred issues again Tested: go test ./plugins/github_graphql/tasks; go build ./helpers/pluginhelper/api ./plugins/github_graphql/tasks Not-tested: End-to-end transfer scenario against a live GitHub repository
1 parent c8b8116 commit 6a37b18

2 files changed

Lines changed: 154 additions & 15 deletions

File tree

backend/plugins/github_graphql/tasks/issue_collector.go

Lines changed: 87 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,15 @@ type GraphqlQueryIssue struct {
9090

9191
type missingGithubIssueRef struct {
9292
ConnectionId uint64
93+
RepoId int
94+
GithubId int
95+
Number int
96+
RawDataOrigin common.RawDataOrigin
97+
}
98+
99+
type missingGithubIssueCleanupScope struct {
100+
ConnectionId uint64
101+
RepoId int
93102
GithubId int
94103
Number int
95104
RawDataOrigin common.RawDataOrigin
@@ -195,6 +204,7 @@ func CollectIssues(taskCtx plugin.SubTaskContext) errors.Error {
195204
issueUpdatedAt[inputIssue.Number] = inputIssue.GithubUpdatedAt
196205
query.requestedIssues[inputIssue.Number] = missingGithubIssueRef{
197206
ConnectionId: inputIssue.ConnectionId,
207+
RepoId: inputIssue.RepoId,
198208
GithubId: inputIssue.GithubId,
199209
Number: inputIssue.Number,
200210
RawDataOrigin: inputIssue.RawDataOrigin,
@@ -261,18 +271,82 @@ func findMissingGithubIssues(requestedIssues map[int]missingGithubIssueRef, reso
261271
func cleanupMissingGithubIssues(db dal.Dal, logger log.Logger, issues []missingGithubIssueRef) errors.Error {
262272
var allErrors []error
263273
for _, issue := range issues {
264-
logger.Warn(nil, "GitHub issue #%d no longer resolves from the source API, deleting stale local data", issue.Number)
265-
err := cleanupMissingGithubIssue(db, issue)
274+
scope, ok := buildMissingGithubIssueCleanupScope(issue)
275+
if !ok {
276+
logger.Warn(nil, "GitHub issue #%d no longer resolves from the source API, but source scope is incomplete so stale cleanup is skipped", issue.Number)
277+
continue
278+
}
279+
logger.Warn(nil, "GitHub issue #%d no longer resolves from the source API, deleting stale local data for the current repository scope", issue.Number)
280+
err := cleanupMissingGithubIssue(db, scope)
266281
if err != nil {
267282
allErrors = append(allErrors, err)
268283
}
269284
}
270285
return errors.Default.Combine(allErrors)
271286
}
272287

273-
func cleanupMissingGithubIssue(db dal.Dal, issue missingGithubIssueRef) errors.Error {
288+
func buildMissingGithubIssueCleanupScope(issue missingGithubIssueRef) (*missingGithubIssueCleanupScope, bool) {
289+
if issue.ConnectionId == 0 || issue.RepoId == 0 || issue.GithubId == 0 || issue.RawDataOrigin.RawDataTable == "" || issue.RawDataOrigin.RawDataParams == "" {
290+
return nil, false
291+
}
292+
return &missingGithubIssueCleanupScope{
293+
ConnectionId: issue.ConnectionId,
294+
RepoId: issue.RepoId,
295+
GithubId: issue.GithubId,
296+
Number: issue.Number,
297+
RawDataOrigin: issue.RawDataOrigin,
298+
}, true
299+
}
300+
301+
func (scope *missingGithubIssueCleanupScope) issueScopedClauses() []dal.Clause {
302+
return []dal.Clause{
303+
dal.Where(
304+
"connection_id = ? AND issue_id = ? AND _raw_data_table = ? AND _raw_data_params = ?",
305+
scope.ConnectionId,
306+
scope.GithubId,
307+
scope.RawDataOrigin.RawDataTable,
308+
scope.RawDataOrigin.RawDataParams,
309+
),
310+
}
311+
}
312+
313+
func (scope *missingGithubIssueCleanupScope) assigneeScopedClauses() []dal.Clause {
314+
return []dal.Clause{
315+
dal.Where(
316+
"connection_id = ? AND repo_id = ? AND issue_id = ? AND _raw_data_table = ? AND _raw_data_params = ?",
317+
scope.ConnectionId,
318+
scope.RepoId,
319+
scope.GithubId,
320+
scope.RawDataOrigin.RawDataTable,
321+
scope.RawDataOrigin.RawDataParams,
322+
),
323+
}
324+
}
325+
326+
func (scope *missingGithubIssueCleanupScope) githubIssueScopedClauses() []dal.Clause {
327+
return []dal.Clause{
328+
dal.Where(
329+
"connection_id = ? AND repo_id = ? AND github_id = ? AND _raw_data_table = ? AND _raw_data_params = ?",
330+
scope.ConnectionId,
331+
scope.RepoId,
332+
scope.GithubId,
333+
scope.RawDataOrigin.RawDataTable,
334+
scope.RawDataOrigin.RawDataParams,
335+
),
336+
}
337+
}
338+
339+
func (scope *missingGithubIssueCleanupScope) rawDataScopedClauses() []dal.Clause {
340+
if scope.RawDataOrigin.RawDataId == 0 {
341+
return nil
342+
}
343+
return []dal.Clause{dal.Where("id = ?", scope.RawDataOrigin.RawDataId)}
344+
}
345+
346+
func cleanupMissingGithubIssue(db dal.Dal, scope *missingGithubIssueCleanupScope) errors.Error {
274347
deleteByIssueId := func(model any, table string) errors.Error {
275-
err := db.Delete(model, dal.From(table), dal.Where("connection_id = ? AND issue_id = ?", issue.ConnectionId, issue.GithubId))
348+
clauses := append([]dal.Clause{dal.From(table)}, scope.issueScopedClauses()...)
349+
err := db.Delete(model, clauses...)
276350
if err != nil {
277351
return errors.Default.Wrap(err, "failed to delete stale github issue data from "+table)
278352
}
@@ -291,31 +365,31 @@ func cleanupMissingGithubIssue(db dal.Dal, issue missingGithubIssueRef) errors.E
291365
if err != nil {
292366
return err
293367
}
294-
err = deleteByIssueId(&models.GithubIssueAssignee{}, models.GithubIssueAssignee{}.TableName())
368+
err = db.Delete(
369+
&models.GithubIssueAssignee{},
370+
append([]dal.Clause{dal.From(models.GithubIssueAssignee{}.TableName())}, scope.assigneeScopedClauses()...)...,
371+
)
295372
if err != nil {
296-
return err
373+
return errors.Default.Wrap(err, "failed to delete stale github issue assignees")
297374
}
298375
err = db.Delete(
299376
&models.GithubPrIssue{},
300-
dal.From(models.GithubPrIssue{}.TableName()),
301-
dal.Where("connection_id = ? AND issue_id = ?", issue.ConnectionId, issue.GithubId),
377+
append([]dal.Clause{dal.From(models.GithubPrIssue{}.TableName())}, scope.issueScopedClauses()...)...,
302378
)
303379
if err != nil {
304380
return errors.Default.Wrap(err, "failed to delete stale github pull request issue links")
305381
}
306382
err = db.Delete(
307383
&models.GithubIssue{},
308-
dal.From(models.GithubIssue{}.TableName()),
309-
dal.Where("connection_id = ? AND github_id = ?", issue.ConnectionId, issue.GithubId),
384+
append([]dal.Clause{dal.From(models.GithubIssue{}.TableName())}, scope.githubIssueScopedClauses()...)...,
310385
)
311386
if err != nil {
312387
return errors.Default.Wrap(err, "failed to delete stale github issue")
313388
}
314-
if issue.RawDataOrigin.RawDataTable != "" && issue.RawDataOrigin.RawDataId != 0 {
389+
if rawDataClauses := scope.rawDataScopedClauses(); len(rawDataClauses) > 0 {
315390
err = db.Delete(
316391
&api.RawData{},
317-
dal.From(issue.RawDataOrigin.RawDataTable),
318-
dal.Where("id = ?", issue.RawDataOrigin.RawDataId),
392+
append([]dal.Clause{dal.From(scope.RawDataOrigin.RawDataTable)}, rawDataClauses...)...,
319393
)
320394
if err != nil {
321395
return errors.Default.Wrap(err, "failed to delete stale raw github issue")

backend/plugins/github_graphql/tasks/issue_collector_test.go

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package tasks
2020
import (
2121
"testing"
2222

23+
"github.com/apache/incubator-devlake/core/dal"
2324
"github.com/apache/incubator-devlake/core/models/common"
2425
"github.com/stretchr/testify/assert"
2526
)
@@ -28,15 +29,18 @@ func TestFindMissingGithubIssues(t *testing.T) {
2829
requestedIssues := map[int]missingGithubIssueRef{
2930
17: {
3031
ConnectionId: 1,
32+
RepoId: 101,
3133
GithubId: 1700,
3234
Number: 17,
3335
RawDataOrigin: common.RawDataOrigin{
34-
RawDataTable: "_raw_github_graphql_issues",
35-
RawDataId: 10,
36+
RawDataTable: "_raw_github_graphql_issues",
37+
RawDataParams: "{\"connectionId\":1,\"name\":\"repo-a\"}",
38+
RawDataId: 10,
3639
},
3740
},
3841
18: {
3942
ConnectionId: 1,
43+
RepoId: 101,
4044
GithubId: 1800,
4145
Number: 18,
4246
},
@@ -51,6 +55,7 @@ func TestFindMissingGithubIssues(t *testing.T) {
5155
if assert.Len(t, missingIssues, 1) {
5256
assert.Equal(t, 17, missingIssues[0].Number)
5357
assert.Equal(t, 1700, missingIssues[0].GithubId)
58+
assert.Equal(t, 101, missingIssues[0].RepoId)
5459
assert.Equal(t, uint64(10), missingIssues[0].RawDataOrigin.RawDataId)
5560
}
5661
}
@@ -72,3 +77,63 @@ func TestFindMissingGithubIssuesSkipsZeroValueResponses(t *testing.T) {
7277
assert.Equal(t, 17, missingIssues[0].Number)
7378
}
7479
}
80+
81+
func TestBuildMissingGithubIssueCleanupScopeRequiresSourceRawDataParams(t *testing.T) {
82+
scope, ok := buildMissingGithubIssueCleanupScope(missingGithubIssueRef{
83+
ConnectionId: 1,
84+
RepoId: 101,
85+
GithubId: 1700,
86+
Number: 17,
87+
RawDataOrigin: common.RawDataOrigin{
88+
RawDataTable: "_raw_github_graphql_issues",
89+
},
90+
})
91+
92+
assert.False(t, ok)
93+
assert.Nil(t, scope)
94+
}
95+
96+
func TestBuildMissingGithubIssueCleanupScopeBuildsRepoScopedClauses(t *testing.T) {
97+
scope, ok := buildMissingGithubIssueCleanupScope(missingGithubIssueRef{
98+
ConnectionId: 1,
99+
RepoId: 101,
100+
GithubId: 1700,
101+
Number: 17,
102+
RawDataOrigin: common.RawDataOrigin{
103+
RawDataTable: "_raw_github_graphql_issues",
104+
RawDataParams: "{\"connectionId\":1,\"name\":\"repo-a\"}",
105+
RawDataId: 10,
106+
},
107+
})
108+
if !assert.True(t, ok) {
109+
return
110+
}
111+
112+
issueClause := scope.issueScopedClauses()[0]
113+
issueWhere, ok := issueClause.Data.(dal.DalClause)
114+
if assert.True(t, ok) {
115+
assert.Equal(t, "connection_id = ? AND issue_id = ? AND _raw_data_table = ? AND _raw_data_params = ?", issueWhere.Expr)
116+
assert.Equal(t, []interface{}{uint64(1), 1700, "_raw_github_graphql_issues", "{\"connectionId\":1,\"name\":\"repo-a\"}"}, issueWhere.Params)
117+
}
118+
119+
assigneeClause := scope.assigneeScopedClauses()[0]
120+
assigneeWhere, ok := assigneeClause.Data.(dal.DalClause)
121+
if assert.True(t, ok) {
122+
assert.Equal(t, "connection_id = ? AND repo_id = ? AND issue_id = ? AND _raw_data_table = ? AND _raw_data_params = ?", assigneeWhere.Expr)
123+
assert.Equal(t, []interface{}{uint64(1), 101, 1700, "_raw_github_graphql_issues", "{\"connectionId\":1,\"name\":\"repo-a\"}"}, assigneeWhere.Params)
124+
}
125+
126+
githubIssueClause := scope.githubIssueScopedClauses()[0]
127+
githubIssueWhere, ok := githubIssueClause.Data.(dal.DalClause)
128+
if assert.True(t, ok) {
129+
assert.Equal(t, "connection_id = ? AND repo_id = ? AND github_id = ? AND _raw_data_table = ? AND _raw_data_params = ?", githubIssueWhere.Expr)
130+
assert.Equal(t, []interface{}{uint64(1), 101, 1700, "_raw_github_graphql_issues", "{\"connectionId\":1,\"name\":\"repo-a\"}"}, githubIssueWhere.Params)
131+
}
132+
133+
rawDataClause := scope.rawDataScopedClauses()[0]
134+
rawDataWhere, ok := rawDataClause.Data.(dal.DalClause)
135+
if assert.True(t, ok) {
136+
assert.Equal(t, "id = ?", rawDataWhere.Expr)
137+
assert.Equal(t, []interface{}{uint64(10)}, rawDataWhere.Params)
138+
}
139+
}

0 commit comments

Comments
 (0)