@@ -278,10 +278,15 @@ func reconcileTaskState(ctx context.Context, w *worker, assignments []*api.Assig
278278
279279 removeTaskAssignment := func (taskID string ) error {
280280 ctx := log .WithLogger (ctx , log .G (ctx ).WithField ("task.id" , taskID ))
281- if err := SetTaskAssignment (tx , taskID , false ); err != nil {
282- log .G (ctx ).WithError (err ).Error ("error setting task assignment in database" )
281+ // if a task is no longer assigned, then we do not have to keep track
282+ // of it. a task will only be unassigned when it is deleted on the
283+ // manager. instead of SetTaskAssginment to true, we'll just remove the
284+ // task now.
285+ if err := DeleteTask (tx , taskID ); err != nil {
286+ log .G (ctx ).WithError (err ).Error ("error removing de-assigned task" )
287+ return err
283288 }
284- return err
289+ return nil
285290 }
286291
287292 // If this was a complete set of assignments, we're going to remove all the remaining
@@ -500,6 +505,21 @@ func (w *worker) newTaskManager(ctx context.Context, tx *bolt.Tx, task *api.Task
500505// updateTaskStatus reports statuses to listeners, read lock must be held.
501506func (w * worker ) updateTaskStatus (ctx context.Context , tx * bolt.Tx , taskID string , status * api.TaskStatus ) error {
502507 if err := PutTaskStatus (tx , taskID , status ); err != nil {
508+ // we shouldn't fail to put a task status. however, there exists the
509+ // possibility of a race in which we try to put a task status after the
510+ // task has been deleted. because this whole contraption is a careful
511+ // dance of too-tightly-coupled concurrent parts, fixing tht race is
512+ // fraught with hazards. instead, we'll recognize that it can occur,
513+ // log the error, and then ignore it.
514+ if err == errTaskUnknown {
515+ // log at info level. debug logging in docker is already really
516+ // verbose, so many people disable it. the race that causes this
517+ // behavior should be very rare, but if it occurs, we should know
518+ // about it, because if there is some case where it is _not_ rare,
519+ // then knowing about it will go a long way toward debugging.
520+ log .G (ctx ).Info ("attempted to update status for a task that has been removed" )
521+ return nil
522+ }
503523 log .G (ctx ).WithError (err ).Error ("failed writing status to disk" )
504524 return err
505525 }
0 commit comments