@@ -20,11 +20,14 @@ package tasks
2020import (
2121 "encoding/json"
2222 "reflect"
23+ "sort"
2324 "strings"
2425 "time"
2526
2627 "github.com/apache/incubator-devlake/core/dal"
2728 "github.com/apache/incubator-devlake/core/errors"
29+ "github.com/apache/incubator-devlake/core/log"
30+ "github.com/apache/incubator-devlake/core/models/common"
2831 "github.com/apache/incubator-devlake/core/plugin"
2932 "github.com/apache/incubator-devlake/core/utils"
3033 "github.com/apache/incubator-devlake/helpers/pluginhelper/api"
@@ -49,7 +52,8 @@ type GraphqlQueryIssueWrapper struct {
4952}
5053
5154type GraphqlQueryIssueDetailWrapper struct {
52- RateLimit struct {
55+ requestedIssues map [int ]missingGithubIssueRef
56+ RateLimit struct {
5357 Cost int
5458 }
5559 Repository struct {
@@ -84,6 +88,13 @@ type GraphqlQueryIssue struct {
8488 } `graphql:"labels(first: 100)"`
8589}
8690
91+ type missingGithubIssueRef struct {
92+ ConnectionId uint64
93+ GithubId int
94+ Number int
95+ RawDataOrigin common.RawDataOrigin
96+ }
97+
8798var CollectIssuesMeta = plugin.SubTaskMeta {
8899 Name : "Collect Issues" ,
89100 EntryPoint : CollectIssues ,
@@ -175,12 +186,19 @@ func CollectIssues(taskCtx plugin.SubTaskContext) errors.Error {
175186 ownerName := strings .Split (data .Options .Name , "/" )
176187 inputIssues := reqData .Input .([]interface {})
177188 outputIssues := []map [string ]interface {}{}
189+ query .requestedIssues = make (map [int ]missingGithubIssueRef , len (inputIssues ))
178190 for _ , i := range inputIssues {
179191 inputIssue := i .(* models.GithubIssue )
180192 outputIssues = append (outputIssues , map [string ]interface {}{
181193 `number` : graphql .Int (inputIssue .Number ),
182194 })
183195 issueUpdatedAt [inputIssue .Number ] = inputIssue .GithubUpdatedAt
196+ query .requestedIssues [inputIssue .Number ] = missingGithubIssueRef {
197+ ConnectionId : inputIssue .ConnectionId ,
198+ GithubId : inputIssue .GithubId ,
199+ Number : inputIssue .Number ,
200+ RawDataOrigin : inputIssue .RawDataOrigin ,
201+ }
184202 }
185203 variables := map [string ]interface {}{
186204 "issue" : outputIssues ,
@@ -193,10 +211,17 @@ func CollectIssues(taskCtx plugin.SubTaskContext) errors.Error {
193211 query := queryWrapper .(* GraphqlQueryIssueDetailWrapper )
194212 issues := query .Repository .Issues
195213 for _ , rawL := range issues {
214+ if rawL .DatabaseId == 0 || rawL .Number == 0 {
215+ continue
216+ }
196217 if rawL .UpdatedAt .After (issueUpdatedAt [rawL .Number ]) {
197218 messages = append (messages , errors .Must1 (json .Marshal (rawL )))
198219 }
199220 }
221+ missingIssues := findMissingGithubIssues (query .requestedIssues , issues )
222+ if len (missingIssues ) > 0 {
223+ err = cleanupMissingGithubIssues (db , taskCtx .GetLogger (), missingIssues )
224+ }
200225 return
201226 },
202227 })
@@ -206,3 +231,95 @@ func CollectIssues(taskCtx plugin.SubTaskContext) errors.Error {
206231
207232 return apiCollector .Execute ()
208233}
234+
235+ func findMissingGithubIssues (requestedIssues map [int ]missingGithubIssueRef , resolvedIssues []GraphqlQueryIssue ) []missingGithubIssueRef {
236+ if len (requestedIssues ) == 0 {
237+ return nil
238+ }
239+
240+ resolvedNumbers := make (map [int ]struct {}, len (resolvedIssues ))
241+ for _ , issue := range resolvedIssues {
242+ if issue .DatabaseId == 0 || issue .Number == 0 {
243+ continue
244+ }
245+ resolvedNumbers [issue .Number ] = struct {}{}
246+ }
247+
248+ missingIssues := make ([]missingGithubIssueRef , 0 )
249+ for number , issue := range requestedIssues {
250+ if _ , ok := resolvedNumbers [number ]; ok {
251+ continue
252+ }
253+ missingIssues = append (missingIssues , issue )
254+ }
255+ sort .Slice (missingIssues , func (i , j int ) bool {
256+ return missingIssues [i ].Number < missingIssues [j ].Number
257+ })
258+ return missingIssues
259+ }
260+
261+ func cleanupMissingGithubIssues (db dal.Dal , logger log.Logger , issues []missingGithubIssueRef ) errors.Error {
262+ var allErrors []error
263+ for _ , issue := range issues {
264+ logger .Warn (nil , "GitHub issue #%d no longer resolves from the source API, deleting stale local data" , issue .Number )
265+ err := cleanupMissingGithubIssue (db , issue )
266+ if err != nil {
267+ allErrors = append (allErrors , err )
268+ }
269+ }
270+ return errors .Default .Combine (allErrors )
271+ }
272+
273+ func cleanupMissingGithubIssue (db dal.Dal , issue missingGithubIssueRef ) errors.Error {
274+ deleteByIssueId := func (model any , table string ) errors.Error {
275+ err := db .Delete (model , dal .From (table ), dal .Where ("connection_id = ? AND issue_id = ?" , issue .ConnectionId , issue .GithubId ))
276+ if err != nil {
277+ return errors .Default .Wrap (err , "failed to delete stale github issue data from " + table )
278+ }
279+ return nil
280+ }
281+
282+ err := deleteByIssueId (& models.GithubIssueComment {}, models.GithubIssueComment {}.TableName ())
283+ if err != nil {
284+ return err
285+ }
286+ err = deleteByIssueId (& models.GithubIssueEvent {}, models.GithubIssueEvent {}.TableName ())
287+ if err != nil {
288+ return err
289+ }
290+ err = deleteByIssueId (& models.GithubIssueLabel {}, models.GithubIssueLabel {}.TableName ())
291+ if err != nil {
292+ return err
293+ }
294+ err = deleteByIssueId (& models.GithubIssueAssignee {}, models.GithubIssueAssignee {}.TableName ())
295+ if err != nil {
296+ return err
297+ }
298+ err = db .Delete (
299+ & models.GithubPrIssue {},
300+ dal .From (models.GithubPrIssue {}.TableName ()),
301+ dal .Where ("connection_id = ? AND issue_id = ?" , issue .ConnectionId , issue .GithubId ),
302+ )
303+ if err != nil {
304+ return errors .Default .Wrap (err , "failed to delete stale github pull request issue links" )
305+ }
306+ err = db .Delete (
307+ & models.GithubIssue {},
308+ dal .From (models.GithubIssue {}.TableName ()),
309+ dal .Where ("connection_id = ? AND github_id = ?" , issue .ConnectionId , issue .GithubId ),
310+ )
311+ if err != nil {
312+ return errors .Default .Wrap (err , "failed to delete stale github issue" )
313+ }
314+ if issue .RawDataOrigin .RawDataTable != "" && issue .RawDataOrigin .RawDataId != 0 {
315+ err = db .Delete (
316+ & api.RawData {},
317+ dal .From (issue .RawDataOrigin .RawDataTable ),
318+ dal .Where ("id = ?" , issue .RawDataOrigin .RawDataId ),
319+ )
320+ if err != nil {
321+ return errors .Default .Wrap (err , "failed to delete stale raw github issue" )
322+ }
323+ }
324+ return nil
325+ }
0 commit comments