Skip to content

Commit c9dc2ee

Browse files
committed
Add recover_from_failure hook to SynchronizeBrokerCatalogJob
When a create-service-broker job fails during a database outage, the broker can get stuck in SYNCHRONIZING state. The failure occurs after the state was set but before the rescue block can revert it, leaving the broker permanently unusable. This adds a recover_from_failure hook that runs when the delayed_job transitions to FAILED state (after all retries are exhausted). At this point the database is likely back up, allowing the broker state to be set to SYNCHRONIZATION_FAILED. The WHERE clause ensures we only update brokers still in SYNCHRONIZING state, preventing overwrites of newer state changes. This mirrors the fix applied to UpdateBrokerJob and ensures both create and update operations handle database failures gracefully.
1 parent 1c425fc commit c9dc2ee

File tree

7 files changed

+40
-6
lines changed

7 files changed

+40
-6
lines changed

app/jobs/pollable_job_wrapper.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,9 @@ def error(job, exception)
7272
def failure(job)
7373
begin
7474
unwrapped_job = Enqueuer.unwrap_job(@handler)
75-
unwrapped_job.recover_from_failure if unwrapped_job.respond_to?(:recover_from_failure)
75+
unwrapped_job.recover_from_failure
7676
rescue StandardError => e
77-
logger.error("failure recovery failed for #{unwrapped_job.class.name}: #{e.class}: #{e.message}")
77+
logger.error("failure recovery failed: #{e.class}: #{e.message}")
7878
end
7979

8080
change_state(job, PollableJobModel::FAILED_STATE)

app/jobs/v3/services/synchronize_broker_catalog_job.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,14 @@ def display_name
3434
'service_broker.catalog.synchronize'
3535
end
3636

37+
def recover_from_failure
38+
ServiceBroker.where(guid: broker_guid, state: ServiceBrokerStateEnum::SYNCHRONIZING).
39+
update(state: ServiceBrokerStateEnum::SYNCHRONIZATION_FAILED)
40+
rescue StandardError => e
41+
logger = Steno.logger('cc.background')
42+
logger.error("Failed to recover broker state for #{broker_guid}: #{e.class}: #{e.message}")
43+
end
44+
3745
private
3846

3947
attr_reader :broker_guid, :user_audit_info

app/jobs/v3/services/update_broker_job.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def display_name
3939

4040
def recover_from_failure
4141
ServiceBroker.where(guid: broker_guid, state: ServiceBrokerStateEnum::SYNCHRONIZING).
42-
update(state: previous_broker_state)
42+
update(state: ServiceBrokerStateEnum::SYNCHRONIZATION_FAILED)
4343
rescue StandardError => e
4444
logger = Steno.logger('cc.background')
4545
logger.error("Failed to recover broker state for #{broker_guid}: #{e.class}: #{e.message}")

app/jobs/wrapping_job.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ def error(job, e)
5151
handler.error(job, e) if handler.respond_to?(:error)
5252
end
5353

54+
def recover_from_failure
55+
handler.recover_from_failure if handler.respond_to?(:recover_from_failure)
56+
end
57+
5458
def display_name
5559
handler.respond_to?(:display_name) ? handler.display_name : handler.class.name
5660
end

spec/unit/jobs/pollable_job_wrapper_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ class BigException < StandardError
235235
execute_all_jobs(expected_successes: 0, expected_failures: 1)
236236

237237
broker.reload
238-
expect(broker.state).to eq('AVAILABLE')
238+
expect(broker.state).to eq('SYNCHRONIZATION_FAILED')
239239
end
240240
end
241241

spec/unit/jobs/v3/services/update_broker_job_spec.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -496,11 +496,11 @@ module V3
496496
broker.update(state: ServiceBrokerStateEnum::SYNCHRONIZING)
497497
end
498498

499-
it 'reverts the broker to the previous state' do
499+
it 'sets the broker to SYNCHRONIZATION_FAILED' do
500500
job.recover_from_failure
501501

502502
broker.reload
503-
expect(broker.state).to eq(ServiceBrokerStateEnum::AVAILABLE)
503+
expect(broker.state).to eq(ServiceBrokerStateEnum::SYNCHRONIZATION_FAILED)
504504
end
505505
end
506506

spec/unit/jobs/wrapping_job_spec.rb

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,28 @@ module Jobs
198198
end
199199
end
200200
end
201+
202+
describe '#recover_from_failure' do
203+
context 'when the wrapped job has the recover_from_failure method defined' do
204+
it 'delegates to the handler' do
205+
handler = double('Job', recover_from_failure: nil)
206+
job = WrappingJob.new(handler)
207+
208+
expect(handler).to receive(:recover_from_failure)
209+
job.recover_from_failure
210+
end
211+
end
212+
213+
context 'when the wrapped job does not have the recover_from_failure method defined' do
214+
it 'does not raise an exception' do
215+
handler = Object.new
216+
job = WrappingJob.new(handler)
217+
expect do
218+
job.recover_from_failure
219+
end.not_to raise_error
220+
end
221+
end
222+
end
201223
end
202224
end
203225
end

0 commit comments

Comments
 (0)