Skip to content

Commit ea4311f

Browse files
committed
change: deletion tasks get more resilient
If any of the 3 deletion tasks (delete-repository-snapshost, delete-snapshots, delete-templates) failed to delete a resource, it was not exactly clear where and why it happened. To make it more resilient, this was changed: - detailed failure messages were added to identify easily which op caused the failure - logging warnings if a resource was not found or nil - loops now return errors at the end, so all the resources in a loop get a chance to be deleted
1 parent 5e0fac4 commit ea4311f

3 files changed

Lines changed: 159 additions & 72 deletions

File tree

pkg/tasks/delete_repository_snapshots.go

Lines changed: 74 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ type DeleteRepositorySnapshots struct {
3333

3434
// This org may or may not have a domain created in pulp, so make sure the domain exists and if not, return a nil pulpClient
3535
func lookupOptionalPulpClient(ctx context.Context, globalClient pulp_client.PulpGlobalClient, task *models.TaskInfo, daoReg *dao.DaoRegistry) (*pulp_client.PulpClient, error) {
36+
logger := LogForTask(task.Id.String(), task.Typename, task.RequestID)
3637
if !config.PulpConfigured() {
38+
logger.Debug().Msg("pulp not configured, skipping pulp client setup")
3739
return nil, nil
3840
}
3941
domainName, err := daoReg.Domain.FetchOrCreateDomain(ctx, task.OrgId)
@@ -51,6 +53,10 @@ func lookupOptionalPulpClient(ctx context.Context, globalClient pulp_client.Pulp
5153
client := pulp_client.GetPulpClientWithDomain(domainName)
5254
return &client, nil
5355
}
56+
logger.Debug().
57+
Str("org_id", task.OrgId).
58+
Str("domain_name", domainName).
59+
Msg("no pulp domain found for org, org has never snapshotted, skipping pulp cleanup")
5460
return nil, nil
5561
}
5662

@@ -100,13 +106,13 @@ func (d *DeleteRepositorySnapshots) Run() error {
100106
// Do not throw an error if not found
101107
latestDistro, err := d.getPulpClient().FindDistributionByPath(d.ctx, latestPathIdent)
102108
if err != nil {
103-
return err
109+
return fmt.Errorf("failed to find latest distribution by path %v: %w", latestPathIdent, err)
104110
}
105111

106112
if latestDistro != nil {
107113
_, err := d.deleteRpmDistribution(*latestDistro.PulpHref)
108114
if err != nil {
109-
return err
115+
return fmt.Errorf("failed to delete latest distribution %v: %w", *latestDistro.PulpHref, err)
110116
}
111117
}
112118

@@ -115,20 +121,24 @@ func (d *DeleteRepositorySnapshots) Run() error {
115121
return err
116122
}
117123

124+
var snapErrs []error
118125
for _, snap := range snaps {
119126
_, err = d.deleteRpmDistribution(snap.DistributionHref)
120127
if err != nil {
121-
return err
128+
snapErrs = append(snapErrs, err)
129+
continue
122130
}
123-
err = d.deleteTemplateSnapshot(snap.UUID)
124-
if err != nil {
125-
return err
131+
if err = d.deleteTemplateSnapshot(snap.UUID); err != nil {
132+
snapErrs = append(snapErrs, err)
126133
}
127-
err = d.deleteSnapshot(snap.UUID)
128-
if err != nil {
129-
return err
134+
if err = d.deleteSnapshot(snap.UUID); err != nil {
135+
snapErrs = append(snapErrs, err)
130136
}
131137
}
138+
if err = errors.Join(snapErrs...); err != nil {
139+
return err
140+
}
141+
// only runs if all snaps deletes succeed
132142
_, _, err = d.deleteRpmRepoAndRemote()
133143
if err != nil {
134144
return err
@@ -160,60 +170,80 @@ func (d *DeleteRepositorySnapshots) fetchSnapshots() ([]models.Snapshot, error)
160170
}
161171

162172
func (d *DeleteRepositorySnapshots) deleteRpmDistribution(snapDistributionHref string) (*zest.TaskResponse, error) {
173+
logger := LogForTask(d.task.Id.String(), d.task.Typename, d.task.RequestID)
163174
deleteDistributionHref, err := d.getPulpClient().DeleteRpmDistribution(d.ctx, snapDistributionHref)
164175
if err != nil {
165-
return nil, err
176+
return nil, fmt.Errorf("failed to delete rpm distribution %v: %w", snapDistributionHref, err)
166177
}
167178
if deleteDistributionHref == nil {
179+
logger.Debug().
180+
Str("distribution_href", snapDistributionHref).
181+
Msg("no task href returned for distribution deletion, distribution may have already been deleted")
168182
return nil, nil
169183
}
170184
task, err := d.getPulpClient().PollTask(d.ctx, *deleteDistributionHref)
171185
if err != nil {
172-
return task, err
186+
return task, fmt.Errorf("error polling distribution deletion task for %v: %w", snapDistributionHref, err)
173187
}
174188
return task, nil
175189
}
176190

177191
func (d *DeleteRepositorySnapshots) deleteRpmRepoAndRemote() (taskRepo, taskRemote *zest.TaskResponse, err error) {
192+
logger := LogForTask(d.task.Id.String(), d.task.Typename, d.task.RequestID)
193+
178194
remoteResp, err := d.getPulpClient().GetRpmRemoteByName(d.ctx, d.payload.RepoConfigUUID)
179195
if err != nil {
180-
return nil, nil, err
196+
return nil, nil, fmt.Errorf("failed to look up rpm remote %v: %w", d.payload.RepoConfigUUID, err)
197+
}
198+
if remoteResp == nil {
199+
logger.Debug().
200+
Str("repo_config_uuid", d.payload.RepoConfigUUID).
201+
Msg("no rpm remote found, skipping remote deletion")
181202
}
182203
if remoteResp != nil {
183204
remoteHref := remoteResp.PulpHref
184205
deleteRemoteHref, err := d.getPulpClient().DeleteRpmRemote(d.ctx, *remoteHref)
185206
if err != nil {
186-
return taskRepo, nil, err
207+
return taskRepo, nil, fmt.Errorf("failed to delete rpm remote %v: %w", *remoteHref, err)
187208
}
188209
taskRemote, err = d.getPulpClient().PollTask(d.ctx, deleteRemoteHref)
189210
if err != nil {
190-
return taskRepo, taskRemote, err
211+
return taskRepo, taskRemote, fmt.Errorf("error polling rpm remote deletion task for %v: %w", *remoteHref, err)
191212
}
192213
}
193214

194215
repoResp, err := d.getPulpClient().GetRpmRepositoryByName(d.ctx, d.payload.RepoConfigUUID)
195216
if err != nil {
196-
return nil, nil, err
217+
return nil, nil, fmt.Errorf("failed to look up rpm repository %v: %w", d.payload.RepoConfigUUID, err)
218+
}
219+
if repoResp == nil {
220+
logger.Debug().
221+
Str("repo_config_uuid", d.payload.RepoConfigUUID).
222+
Msg("no rpm repository found, skipping repository deletion")
197223
}
198224
if repoResp != nil {
199225
repoHref := repoResp.PulpHref
200226
deleteRepoHref, err := d.getPulpClient().DeleteRpmRepository(d.ctx, *repoHref)
201227
if err != nil {
202-
return nil, nil, err
228+
return nil, nil, fmt.Errorf("failed to delete rpm repository %v: %w", *repoHref, err)
203229
}
204230
taskRepo, err = d.getPulpClient().PollTask(d.ctx, deleteRepoHref)
205231
if err != nil {
206-
return taskRepo, nil, err
232+
return taskRepo, nil, fmt.Errorf("error polling rpm repository deletion task for %v: %w", *repoHref, err)
207233
}
208234
}
209235
return taskRepo, taskRemote, nil
210236
}
211237

212238
func (d *DeleteRepositorySnapshots) deleteSnapshot(snapUUID string) error {
239+
logger := LogForTask(d.task.Id.String(), d.task.Typename, d.task.RequestID)
213240
err := d.daoReg.Snapshot.Delete(d.ctx, snapUUID)
214241
if err != nil {
215242
var daoErr *ce.DaoError
216243
if errors.As(err, &daoErr) && daoErr.NotFound {
244+
logger.Warn().
245+
Str("snapshot_uuid", snapUUID).
246+
Msg("snapshot not found during deletion, already deleted")
217247
return nil
218248
}
219249
return fmt.Errorf("error deleting snapshot %v: %w", snapUUID, err)
@@ -222,13 +252,17 @@ func (d *DeleteRepositorySnapshots) deleteSnapshot(snapUUID string) error {
222252
}
223253

224254
func (d *DeleteRepositorySnapshots) deleteRepoConfig() error {
255+
logger := LogForTask(d.task.Id.String(), d.task.Typename, d.task.RequestID)
225256
err := d.daoReg.RepositoryConfig.Delete(d.ctx, d.task.OrgId, d.payload.RepoConfigUUID)
226257
if err != nil {
227258
var daoErr *ce.DaoError
228259
if errors.As(err, &daoErr) && daoErr.NotFound {
260+
logger.Warn().
261+
Str("repo_config_uuid", d.payload.RepoConfigUUID).
262+
Msg("repo config not found during deletion, already deleted")
229263
return nil
230264
}
231-
return fmt.Errorf("error deleting repository configuration: %w", err)
265+
return fmt.Errorf("error deleting repository configuration %v: %w", d.payload.RepoConfigUUID, err)
232266
}
233267
return nil
234268
}
@@ -249,13 +283,15 @@ func (d *DeleteRepositorySnapshots) candlepinRHContentId(templateOrgId string, r
249283
}
250284

251285
func (d *DeleteRepositorySnapshots) deleteCandlepinContent() error {
286+
logger := LogForTask(d.task.Id.String(), d.task.Typename, d.task.RequestID)
252287
if !config.CandlepinConfigured() {
288+
logger.Debug().Msg("candlepin not configured, skipping candlepin content deletion")
253289
return nil
254290
}
255291
if d.task.OrgId == config.RedHatOrg {
256292
templates, err := d.daoReg.Template.InternalOnlyGetTemplatesForRepoConfig(d.ctx, d.payload.RepoConfigUUID, false)
257293
if err != nil {
258-
return fmt.Errorf("couldn't get templates for repo config")
294+
return fmt.Errorf("couldn't get templates for repo config %v: %w", d.payload.RepoConfigUUID, err)
259295
}
260296
for _, template := range templates {
261297
// We have to lookup the content ID for RH content, as its based on the repo label
@@ -265,45 +301,48 @@ func (d *DeleteRepositorySnapshots) deleteCandlepinContent() error {
265301
}
266302
err = d.cpClient.DemoteContentFromEnvironment(d.ctx, template.UUID, []string{contentId})
267303
if err != nil {
268-
return fmt.Errorf("couldn't demote content from environment, %v", err)
304+
return fmt.Errorf("couldn't demote content from environment for template %v: %w", template.UUID, err)
269305
}
270306
}
271307
} else {
272308
err := d.cpClient.RemoveContentFromProduct(d.ctx, d.task.OrgId, d.payload.RepoConfigUUID)
273309
if err != nil {
274-
return err
310+
return fmt.Errorf("failed to remove content from candlepin product for repo %v: %w", d.payload.RepoConfigUUID, err)
275311
}
276312

277313
err = d.cpClient.DeleteContent(d.ctx, d.task.OrgId, d.payload.RepoConfigUUID)
278314
if err != nil {
279-
return err
315+
return fmt.Errorf("error deleting candlepin content for repo %v: %w", d.payload.RepoConfigUUID, err)
280316
}
281317
}
282318

283319
return nil
284320
}
285321

286-
func (d *DeleteRepositorySnapshots) deleteTemplateRepoDistributions() (err error) {
322+
func (d *DeleteRepositorySnapshots) deleteTemplateRepoDistributions() error {
287323
logger := LogForTask(d.task.Id.String(), d.task.Typename, d.task.RequestID)
324+
var errs []error
288325

289326
var templates []api.TemplateResponse
290327
if d.task.OrgId == config.RedHatOrg {
328+
var err error
291329
templates, err = d.daoReg.Template.InternalOnlyGetTemplatesForRepoConfig(d.ctx, d.payload.RepoConfigUUID, false)
292330
if err != nil {
293-
return err
331+
return fmt.Errorf("failed to get templates for repo config %v: %w", d.payload.RepoConfigUUID, err)
294332
}
295333
} else {
296334
templateResponse, _, err := d.daoReg.Template.List(d.ctx, d.task.OrgId, true, api.PaginationData{Limit: -1}, api.TemplateFilterData{RepositoryUUIDs: []string{d.payload.RepoConfigUUID}})
297335
if err != nil {
298-
return err
336+
return fmt.Errorf("failed to get templates for repo config %v: %w", d.payload.RepoConfigUUID, err)
299337
}
300338
templates = templateResponse.Data
301339
}
302340

303341
for _, template := range templates {
304342
distHref, err := d.daoReg.Template.GetDistributionHref(d.ctx, template.UUID, d.payload.RepoConfigUUID)
305343
if err != nil {
306-
return err
344+
errs = append(errs, fmt.Errorf("failed to get distribution href for template %v: %w", template.UUID, err))
345+
continue
307346
}
308347

309348
if distHref == nil {
@@ -315,25 +354,29 @@ func (d *DeleteRepositorySnapshots) deleteTemplateRepoDistributions() (err error
315354

316355
taskHref, err := d.getPulpClient().DeleteRpmDistribution(d.ctx, *distHref)
317356
if err != nil {
318-
return err
357+
errs = append(errs, fmt.Errorf("failed to delete rpm distribution %v for template %v: %w", *distHref, template.UUID, err))
358+
continue
319359
}
320360

321361
if taskHref != nil {
322-
_, err = d.getPulpClient().PollTask(d.ctx, *taskHref)
323-
if err != nil {
324-
return err
362+
if _, err = d.getPulpClient().PollTask(d.ctx, *taskHref); err != nil {
363+
errs = append(errs, fmt.Errorf("error polling distribution deletion task for template %v: %w", template.UUID, err))
325364
}
326365
}
327366
}
328367

329-
return nil
368+
return errors.Join(errs...)
330369
}
331370

332371
func (d *DeleteRepositorySnapshots) deleteTemplateSnapshot(snapshotUUID string) error {
372+
logger := LogForTask(d.task.Id.String(), d.task.Typename, d.task.RequestID)
333373
err := d.daoReg.Template.DeleteTemplateSnapshot(d.ctx, snapshotUUID)
334374
if err != nil {
335375
var daoErr *ce.DaoError
336376
if errors.As(err, &daoErr) && daoErr.NotFound {
377+
logger.Warn().
378+
Str("snapshot_uuid", snapshotUUID).
379+
Msg("template snapshot association not found during deletion, already deleted")
337380
return nil
338381
}
339382
return fmt.Errorf("error deleting template snapshot %v: %w", snapshotUUID, err)

0 commit comments

Comments
 (0)