Skip to content

Commit 4e19406

Browse files
committed
refactor: deletion tasks get more resilient
Previously, if any of the 3 deletion tasks (delete-repository-snapshost, delete-snapshots, delete-templates) failed to delete a resource, it was not exactly clear where and why it happened. To make it more resilient, this was changed: - detailed failure messages were added to identify easily which op caused the failure - logging warnings if a resource was not found or nil - loops now return errors at the end, so all the resources in a loop get a chance to be deleted
1 parent 5e0fac4 commit 4e19406

3 files changed

Lines changed: 160 additions & 72 deletions

File tree

pkg/tasks/delete_repository_snapshots.go

Lines changed: 75 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ type DeleteRepositorySnapshots struct {
3333

3434
// This org may or may not have a domain created in pulp, so make sure the domain exists and if not, return a nil pulpClient
3535
func lookupOptionalPulpClient(ctx context.Context, globalClient pulp_client.PulpGlobalClient, task *models.TaskInfo, daoReg *dao.DaoRegistry) (*pulp_client.PulpClient, error) {
36+
logger := LogForTask(task.Id.String(), task.Typename, task.RequestID)
3637
if !config.PulpConfigured() {
38+
logger.Debug().Msg("pulp not configured, skipping pulp client setup")
3739
return nil, nil
3840
}
3941
domainName, err := daoReg.Domain.FetchOrCreateDomain(ctx, task.OrgId)
@@ -51,6 +53,10 @@ func lookupOptionalPulpClient(ctx context.Context, globalClient pulp_client.Pulp
5153
client := pulp_client.GetPulpClientWithDomain(domainName)
5254
return &client, nil
5355
}
56+
logger.Debug().
57+
Str("org_id", task.OrgId).
58+
Str("domain_name", domainName).
59+
Msg("no pulp domain found for org, org has never snapshotted, skipping pulp cleanup")
5460
return nil, nil
5561
}
5662

@@ -100,13 +106,13 @@ func (d *DeleteRepositorySnapshots) Run() error {
100106
// Do not throw an error if not found
101107
latestDistro, err := d.getPulpClient().FindDistributionByPath(d.ctx, latestPathIdent)
102108
if err != nil {
103-
return err
109+
return fmt.Errorf("failed to find latest distribution by path %v: %w", latestPathIdent, err)
104110
}
105111

106112
if latestDistro != nil {
107113
_, err := d.deleteRpmDistribution(*latestDistro.PulpHref)
108114
if err != nil {
109-
return err
115+
return fmt.Errorf("failed to delete latest distribution %v: %w", *latestDistro.PulpHref, err)
110116
}
111117
}
112118

@@ -115,20 +121,25 @@ func (d *DeleteRepositorySnapshots) Run() error {
115121
return err
116122
}
117123

124+
var snapErrs []error
118125
for _, snap := range snaps {
119126
_, err = d.deleteRpmDistribution(snap.DistributionHref)
120127
if err != nil {
121-
return err
128+
snapErrs = append(snapErrs, err)
129+
continue
122130
}
123-
err = d.deleteTemplateSnapshot(snap.UUID)
124-
if err != nil {
125-
return err
131+
if err = d.deleteTemplateSnapshot(snap.UUID); err != nil {
132+
snapErrs = append(snapErrs, err)
133+
continue
126134
}
127-
err = d.deleteSnapshot(snap.UUID)
128-
if err != nil {
129-
return err
135+
if err = d.deleteSnapshot(snap.UUID); err != nil {
136+
snapErrs = append(snapErrs, err)
130137
}
131138
}
139+
if err = errors.Join(snapErrs...); err != nil {
140+
return err
141+
}
142+
// only runs if all snaps deletes succeed
132143
_, _, err = d.deleteRpmRepoAndRemote()
133144
if err != nil {
134145
return err
@@ -160,60 +171,80 @@ func (d *DeleteRepositorySnapshots) fetchSnapshots() ([]models.Snapshot, error)
160171
}
161172

162173
func (d *DeleteRepositorySnapshots) deleteRpmDistribution(snapDistributionHref string) (*zest.TaskResponse, error) {
174+
logger := LogForTask(d.task.Id.String(), d.task.Typename, d.task.RequestID)
163175
deleteDistributionHref, err := d.getPulpClient().DeleteRpmDistribution(d.ctx, snapDistributionHref)
164176
if err != nil {
165-
return nil, err
177+
return nil, fmt.Errorf("failed to delete rpm distribution %v: %w", snapDistributionHref, err)
166178
}
167179
if deleteDistributionHref == nil {
180+
logger.Debug().
181+
Str("distribution_href", snapDistributionHref).
182+
Msg("no task href returned for distribution deletion, distribution may have already been deleted")
168183
return nil, nil
169184
}
170185
task, err := d.getPulpClient().PollTask(d.ctx, *deleteDistributionHref)
171186
if err != nil {
172-
return task, err
187+
return task, fmt.Errorf("error polling distribution deletion task for %v: %w", snapDistributionHref, err)
173188
}
174189
return task, nil
175190
}
176191

177192
func (d *DeleteRepositorySnapshots) deleteRpmRepoAndRemote() (taskRepo, taskRemote *zest.TaskResponse, err error) {
193+
logger := LogForTask(d.task.Id.String(), d.task.Typename, d.task.RequestID)
194+
178195
remoteResp, err := d.getPulpClient().GetRpmRemoteByName(d.ctx, d.payload.RepoConfigUUID)
179196
if err != nil {
180-
return nil, nil, err
197+
return nil, nil, fmt.Errorf("failed to look up rpm remote %v: %w", d.payload.RepoConfigUUID, err)
198+
}
199+
if remoteResp == nil {
200+
logger.Debug().
201+
Str("repo_config_uuid", d.payload.RepoConfigUUID).
202+
Msg("no rpm remote found, skipping remote deletion")
181203
}
182204
if remoteResp != nil {
183205
remoteHref := remoteResp.PulpHref
184206
deleteRemoteHref, err := d.getPulpClient().DeleteRpmRemote(d.ctx, *remoteHref)
185207
if err != nil {
186-
return taskRepo, nil, err
208+
return taskRepo, nil, fmt.Errorf("failed to delete rpm remote %v: %w", *remoteHref, err)
187209
}
188210
taskRemote, err = d.getPulpClient().PollTask(d.ctx, deleteRemoteHref)
189211
if err != nil {
190-
return taskRepo, taskRemote, err
212+
return taskRepo, taskRemote, fmt.Errorf("error polling rpm remote deletion task for %v: %w", *remoteHref, err)
191213
}
192214
}
193215

194216
repoResp, err := d.getPulpClient().GetRpmRepositoryByName(d.ctx, d.payload.RepoConfigUUID)
195217
if err != nil {
196-
return nil, nil, err
218+
return nil, nil, fmt.Errorf("failed to look up rpm repository %v: %w", d.payload.RepoConfigUUID, err)
219+
}
220+
if repoResp == nil {
221+
logger.Debug().
222+
Str("repo_config_uuid", d.payload.RepoConfigUUID).
223+
Msg("no rpm repository found, skipping repository deletion")
197224
}
198225
if repoResp != nil {
199226
repoHref := repoResp.PulpHref
200227
deleteRepoHref, err := d.getPulpClient().DeleteRpmRepository(d.ctx, *repoHref)
201228
if err != nil {
202-
return nil, nil, err
229+
return nil, nil, fmt.Errorf("failed to delete rpm repository %v: %w", *repoHref, err)
203230
}
204231
taskRepo, err = d.getPulpClient().PollTask(d.ctx, deleteRepoHref)
205232
if err != nil {
206-
return taskRepo, nil, err
233+
return taskRepo, nil, fmt.Errorf("error polling rpm repository deletion task for %v: %w", *repoHref, err)
207234
}
208235
}
209236
return taskRepo, taskRemote, nil
210237
}
211238

212239
func (d *DeleteRepositorySnapshots) deleteSnapshot(snapUUID string) error {
240+
logger := LogForTask(d.task.Id.String(), d.task.Typename, d.task.RequestID)
213241
err := d.daoReg.Snapshot.Delete(d.ctx, snapUUID)
214242
if err != nil {
215243
var daoErr *ce.DaoError
216244
if errors.As(err, &daoErr) && daoErr.NotFound {
245+
logger.Warn().
246+
Str("snapshot_uuid", snapUUID).
247+
Msg("snapshot not found during deletion, already deleted")
217248
return nil
218249
}
219250
return fmt.Errorf("error deleting snapshot %v: %w", snapUUID, err)
@@ -222,13 +253,17 @@ func (d *DeleteRepositorySnapshots) deleteSnapshot(snapUUID string) error {
222253
}
223254

224255
func (d *DeleteRepositorySnapshots) deleteRepoConfig() error {
256+
logger := LogForTask(d.task.Id.String(), d.task.Typename, d.task.RequestID)
225257
err := d.daoReg.RepositoryConfig.Delete(d.ctx, d.task.OrgId, d.payload.RepoConfigUUID)
226258
if err != nil {
227259
var daoErr *ce.DaoError
228260
if errors.As(err, &daoErr) && daoErr.NotFound {
261+
logger.Warn().
262+
Str("repo_config_uuid", d.payload.RepoConfigUUID).
263+
Msg("repo config not found during deletion, already deleted")
229264
return nil
230265
}
231-
return fmt.Errorf("error deleting repository configuration: %w", err)
266+
return fmt.Errorf("error deleting repository configuration %v: %w", d.payload.RepoConfigUUID, err)
232267
}
233268
return nil
234269
}
@@ -249,13 +284,15 @@ func (d *DeleteRepositorySnapshots) candlepinRHContentId(templateOrgId string, r
249284
}
250285

251286
func (d *DeleteRepositorySnapshots) deleteCandlepinContent() error {
287+
logger := LogForTask(d.task.Id.String(), d.task.Typename, d.task.RequestID)
252288
if !config.CandlepinConfigured() {
289+
logger.Debug().Msg("candlepin not configured, skipping candlepin content deletion")
253290
return nil
254291
}
255292
if d.task.OrgId == config.RedHatOrg {
256293
templates, err := d.daoReg.Template.InternalOnlyGetTemplatesForRepoConfig(d.ctx, d.payload.RepoConfigUUID, false)
257294
if err != nil {
258-
return fmt.Errorf("couldn't get templates for repo config")
295+
return fmt.Errorf("couldn't get templates for repo config %v: %w", d.payload.RepoConfigUUID, err)
259296
}
260297
for _, template := range templates {
261298
// We have to lookup the content ID for RH content, as its based on the repo label
@@ -265,45 +302,48 @@ func (d *DeleteRepositorySnapshots) deleteCandlepinContent() error {
265302
}
266303
err = d.cpClient.DemoteContentFromEnvironment(d.ctx, template.UUID, []string{contentId})
267304
if err != nil {
268-
return fmt.Errorf("couldn't demote content from environment, %v", err)
305+
return fmt.Errorf("couldn't demote content from environment for template %v: %w", template.UUID, err)
269306
}
270307
}
271308
} else {
272309
err := d.cpClient.RemoveContentFromProduct(d.ctx, d.task.OrgId, d.payload.RepoConfigUUID)
273310
if err != nil {
274-
return err
311+
return fmt.Errorf("failed to remove content from candlepin product for repo %v: %w", d.payload.RepoConfigUUID, err)
275312
}
276313

277314
err = d.cpClient.DeleteContent(d.ctx, d.task.OrgId, d.payload.RepoConfigUUID)
278315
if err != nil {
279-
return err
316+
return fmt.Errorf("error deleting candlepin content for repo %v: %w", d.payload.RepoConfigUUID, err)
280317
}
281318
}
282319

283320
return nil
284321
}
285322

286-
func (d *DeleteRepositorySnapshots) deleteTemplateRepoDistributions() (err error) {
323+
func (d *DeleteRepositorySnapshots) deleteTemplateRepoDistributions() error {
287324
logger := LogForTask(d.task.Id.String(), d.task.Typename, d.task.RequestID)
325+
var errs []error
288326

289327
var templates []api.TemplateResponse
290328
if d.task.OrgId == config.RedHatOrg {
329+
var err error
291330
templates, err = d.daoReg.Template.InternalOnlyGetTemplatesForRepoConfig(d.ctx, d.payload.RepoConfigUUID, false)
292331
if err != nil {
293-
return err
332+
return fmt.Errorf("failed to get templates for repo config %v: %w", d.payload.RepoConfigUUID, err)
294333
}
295334
} else {
296335
templateResponse, _, err := d.daoReg.Template.List(d.ctx, d.task.OrgId, true, api.PaginationData{Limit: -1}, api.TemplateFilterData{RepositoryUUIDs: []string{d.payload.RepoConfigUUID}})
297336
if err != nil {
298-
return err
337+
return fmt.Errorf("failed to get templates for repo config %v: %w", d.payload.RepoConfigUUID, err)
299338
}
300339
templates = templateResponse.Data
301340
}
302341

303342
for _, template := range templates {
304343
distHref, err := d.daoReg.Template.GetDistributionHref(d.ctx, template.UUID, d.payload.RepoConfigUUID)
305344
if err != nil {
306-
return err
345+
errs = append(errs, fmt.Errorf("failed to get distribution href for template %v: %w", template.UUID, err))
346+
continue
307347
}
308348

309349
if distHref == nil {
@@ -315,25 +355,29 @@ func (d *DeleteRepositorySnapshots) deleteTemplateRepoDistributions() (err error
315355

316356
taskHref, err := d.getPulpClient().DeleteRpmDistribution(d.ctx, *distHref)
317357
if err != nil {
318-
return err
358+
errs = append(errs, fmt.Errorf("failed to delete rpm distribution %v for template %v: %w", *distHref, template.UUID, err))
359+
continue
319360
}
320361

321362
if taskHref != nil {
322-
_, err = d.getPulpClient().PollTask(d.ctx, *taskHref)
323-
if err != nil {
324-
return err
363+
if _, err = d.getPulpClient().PollTask(d.ctx, *taskHref); err != nil {
364+
errs = append(errs, fmt.Errorf("error polling distribution deletion task for template %v: %w", template.UUID, err))
325365
}
326366
}
327367
}
328368

329-
return nil
369+
return errors.Join(errs...)
330370
}
331371

332372
func (d *DeleteRepositorySnapshots) deleteTemplateSnapshot(snapshotUUID string) error {
373+
logger := LogForTask(d.task.Id.String(), d.task.Typename, d.task.RequestID)
333374
err := d.daoReg.Template.DeleteTemplateSnapshot(d.ctx, snapshotUUID)
334375
if err != nil {
335376
var daoErr *ce.DaoError
336377
if errors.As(err, &daoErr) && daoErr.NotFound {
378+
logger.Warn().
379+
Str("snapshot_uuid", snapshotUUID).
380+
Msg("template snapshot association not found during deletion, already deleted")
337381
return nil
338382
}
339383
return fmt.Errorf("error deleting template snapshot %v: %w", snapshotUUID, err)

0 commit comments

Comments
 (0)