Skip to content

Commit 97adc6b

Browse files
committed
Fail tasks stuck in PENDING
1 parent 13d9fc2 commit 97adc6b

File tree

2 files changed

+58
-5
lines changed

2 files changed

+58
-5
lines changed

lib/cloud_controller/diego/tasks_sync.rb

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ module VCAP::CloudController
22
module Diego
33
class TasksSync
44
BATCH_SIZE = 500
5+
PENDING_TASK_EXPIRATION = 300
56

67
class Error < StandardError
78
end
@@ -21,10 +22,17 @@ def sync
2122

2223
to_update = []
2324
to_cancel = []
25+
expired_pending = []
2426

2527
batched_cc_tasks do |cc_tasks|
2628
cc_tasks.each do |cc_task|
2729
diego_task = diego_tasks.delete(cc_task.guid)
30+
31+
if cc_task.state == TaskModel::PENDING_STATE
32+
expired_pending << cc_task.guid if cc_task.created_at < Time.now.utc - PENDING_TASK_EXPIRATION
33+
next
34+
end
35+
2836
next unless [TaskModel::RUNNING_STATE, TaskModel::CANCELING_STATE].include? cc_task.state
2937

3038
if diego_task.nil?
@@ -35,6 +43,8 @@ def sync
3543
end
3644
end
3745

46+
fail_expired_pending_tasks(expired_pending)
47+
3848
update_missing_diego_tasks(to_update)
3949
cancel_cc_tasks(to_cancel)
4050
cancel_missing_cc_tasks(diego_tasks)
@@ -97,6 +107,16 @@ def cancel_cc_tasks(to_cancel)
97107
end
98108
end
99109

110+
def fail_expired_pending_tasks(expired_pending)
111+
expired_pending.each do |task_guid|
112+
task = TaskModel.where(guid: task_guid, state: TaskModel::PENDING_STATE).first
113+
next unless task
114+
115+
task.update(state: TaskModel::FAILED_STATE, failure_reason: 'Task expired in PENDING state')
116+
logger.info('expired-pending-task', task_guid: task_guid)
117+
end
118+
end
119+
100120
def cancel_missing_cc_tasks(to_cancel_missing)
101121
to_cancel_missing.each_key do |task_guid|
102122
workpool.submit(task_guid) do |guid|
@@ -111,7 +131,7 @@ def batched_cc_tasks
111131
loop do
112132
tasks = TaskModel.where(
113133
Sequel.lit('tasks.id > ?', last_id)
114-
).order(:id).limit(BATCH_SIZE).select(:id, :guid, :state).all
134+
).order(:id).limit(BATCH_SIZE).select(:id, :guid, :state, :created_at).all
115135

116136
yield tasks
117137
return if tasks.count < BATCH_SIZE

spec/unit/lib/cloud_controller/diego/tasks_sync_spec.rb

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,43 @@ def exceptions
107107
let!(:succeeded_task) { TaskModel.make(:succeeded, created_at: 1.minute.ago) }
108108
let(:bbs_tasks) { [] }
109109

110-
it 'does nothing to the task' do
111-
expect { subject.sync }.not_to(change do
112-
[pending_task.reload.state, succeeded_task.reload.state]
113-
end)
110+
it 'does not change the succeeded task' do
111+
expect { subject.sync }.not_to(change { succeeded_task.reload.state })
112+
end
113+
114+
it 'does not change a recently created pending task' do
115+
expect { subject.sync }.not_to(change { pending_task.reload.state })
116+
end
117+
118+
it 'bumps freshness' do
119+
subject.sync
120+
expect(bbs_task_client).to have_received(:bump_freshness).once
121+
end
122+
end
123+
124+
context 'when a pending task has expired' do
125+
let!(:expired_pending_task) do
126+
task = TaskModel.make(:pending)
127+
task.this.update(created_at: 10.minutes.ago)
128+
task.reload
129+
end
130+
let!(:recent_pending_task) { TaskModel.make(:pending) }
131+
let(:bbs_tasks) { [] }
132+
133+
it 'fails the expired pending task' do
134+
subject.sync
135+
136+
expect(expired_pending_task.reload.state).to eq(TaskModel::FAILED_STATE)
137+
expect(expired_pending_task.reload.failure_reason).to eq('Task expired in PENDING state')
138+
end
139+
140+
it 'does not fail the recent pending task' do
141+
expect { subject.sync }.not_to(change { recent_pending_task.reload.state })
142+
end
143+
144+
it 'logs the expired pending task' do
145+
subject.sync
146+
expect(logger).to have_received(:info).with('expired-pending-task', task_guid: expired_pending_task.guid)
114147
end
115148

116149
it 'bumps freshness' do

0 commit comments

Comments
 (0)