Skip to content

Commit 3afc75e

Browse files
authored
Make user lookup resilient and consistent (#3529)
* Make user lookup resilient and consistent * Address PR feedback
1 parent d9eab13 commit 3afc75e

4 files changed

Lines changed: 152 additions & 40 deletions

File tree

app/controllers/v3/roles_controller.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ def index
2828
message: message,
2929
decorators: decorators
3030
)
31+
rescue UaaUnavailable
32+
raise CloudController::Errors::ApiError.new_from_details('UaaUnavailable')
3133
end
3234

3335
def show
@@ -43,6 +45,8 @@ def show
4345
resource_not_found!(:role) unless role
4446

4547
render status: :ok, json: Presenters::V3::RolePresenter.new(role, decorators:)
48+
rescue UaaUnavailable
49+
raise CloudController::Errors::ApiError.new_from_details('UaaUnavailable')
4650
end
4751

4852
def create

app/controllers/v3/users_controller.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ def show
3333
user_not_found! unless user
3434

3535
render status: :ok, json: Presenters::V3::UserPresenter.new(user, uaa_users: User.uaa_users_info([user.guid]))
36+
rescue VCAP::CloudController::UaaUnavailable
37+
raise CloudController::Errors::ApiError.new_from_details('UaaUnavailable')
3638
end
3739

3840
def create

lib/cloud_controller/uaa/uaa_client.rb

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,7 @@ def token_info
3838
end
3939

4040
def users_for_ids(user_ids)
41-
fetch_users(user_ids)
42-
rescue UaaUnavailable, CF::UAA::UAAError => e
43-
logger.error("Failed to retrieve users from UAA: #{e.inspect}")
44-
{}
41+
with_request_error_handling { fetch_users(user_ids) }
4542
end
4643

4744
def usernames_for_ids(user_ids)
@@ -64,22 +61,21 @@ def id_for_username(username, origin: nil)
6461
end
6562

6663
def ids_for_usernames_and_origins(usernames, origins, precise_username_match=true)
67-
operator = precise_username_match ? 'eq' : 'co'
68-
username_filter_string = usernames&.map { |u| "username #{operator} \"#{u}\"" }&.join(' or ')
69-
origin_filter_string = origins&.map { |o| "origin eq \"#{o}\"" }&.join(' or ')
64+
with_request_error_handling do
65+
operator = precise_username_match ? 'eq' : 'co'
66+
username_filter_string = usernames&.map { |u| "username #{operator} \"#{u}\"" }&.join(' or ')
67+
origin_filter_string = origins&.map { |o| "origin eq \"#{o}\"" }&.join(' or ')
7068

71-
filter_string = construct_filter_string(username_filter_string, origin_filter_string)
69+
filter_string = construct_filter_string(username_filter_string, origin_filter_string)
7270

73-
results = if precise_username_match
74-
query(:user_id, includeInactive: true, filter: filter_string)
75-
else
76-
query(:user, filter: filter_string, attributes: 'id')
77-
end
71+
results = if precise_username_match
72+
query(:user_id, includeInactive: true, filter: filter_string)
73+
else
74+
query(:user, filter: filter_string, attributes: 'id')
75+
end
7876

79-
results['resources'].pluck('id')
80-
rescue CF::UAA::UAAError => e
81-
logger.error("Failed to retrieve user ids from UAA: #{e.inspect}")
82-
raise UaaUnavailable
77+
results['resources'].pluck('id')
78+
end
8379
end
8480

8581
def construct_filter_string(username_filter_string, origin_filter_string)
@@ -104,6 +100,32 @@ def info
104100
CF::UAA::Info.new(uaa_target, uaa_connection_opts)
105101
end
106102

103+
def with_request_error_handling
104+
delay = 0.25
105+
max_delay = 5
106+
retry_until = Time.now.utc + 60 # retry for 1 minute from now
107+
factor = 2
108+
109+
begin
110+
yield
111+
rescue CF::UAA::InvalidToken => e
112+
logger.error("UAA request for token failed: #{e.inspect}")
113+
raise
114+
rescue UaaUnavailable, CF::UAA::UAAError => e
115+
if Time.now.utc > retry_until
116+
logger.error('Unable to establish a connection to UAA, no more retries, raising an exception.')
117+
raise UaaUnavailable
118+
else
119+
sleep_time = [delay, max_delay].min
120+
logger.error("Failed to retrieve details from UAA: #{e.inspect}")
121+
logger.info("Attempting to connect to the UAA. Total #{(retry_until - Time.now.utc).round(2)} seconds remaining. Next retry after #{sleep_time} seconds.")
122+
sleep(sleep_time)
123+
delay *= factor
124+
retry
125+
end
126+
end
127+
end
128+
107129
private
108130

109131
def query(type, **)

spec/unit/lib/uaa/uaa_client_spec.rb

Lines changed: 107 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -315,10 +315,11 @@ module VCAP::CloudController
315315
context 'when UAA is unavailable' do
316316
before do
317317
allow(uaa_client).to receive(:token_info).and_raise(UaaUnavailable)
318+
allow(subject).to receive(:sleep) { |n| Timecop.travel(n) }
318319
end
319320

320-
it 'returns an empty hash' do
321-
expect(uaa_client.users_for_ids([userid_1])).to eq({})
321+
it 'raises an exception' do
322+
expect { uaa_client.users_for_ids([userid_1]) }.to raise_error(UaaUnavailable)
322323
end
323324
end
324325

@@ -378,21 +379,23 @@ module VCAP::CloudController
378379

379380
context 'when the endpoint returns an error' do
380381
let(:uaa_error) { CF::UAA::UAAError.new('some error') }
381-
let(:mock_logger) { double(:steno_logger, error: nil) }
382+
let(:mock_logger) { double(:steno_logger, error: nil, info: nil) }
382383

383384
before do
384385
scim = instance_double(CF::UAA::Scim)
385386
allow(scim).to receive(:query).and_raise(uaa_error)
386387
allow(uaa_client).to receive_messages(scim: scim, logger: mock_logger)
388+
allow(subject).to receive(:sleep) { |n| Timecop.travel(n) }
387389
end
388390

389-
it 'returns an empty hash' do
390-
expect(uaa_client.users_for_ids([userid_1])).to eq({})
391+
it 'raises an exception' do
392+
expect { uaa_client.users_for_ids([userid_1]) }.to raise_error(UaaUnavailable)
391393
end
392394

393-
it 'logs the error' do
394-
uaa_client.users_for_ids([userid_1])
395-
expect(mock_logger).to have_received(:error).with("Failed to retrieve users from UAA: #{uaa_error.inspect}")
395+
it 'retries, raises an exception after 17 attempts' do
396+
expect { uaa_client.users_for_ids([userid_1]) }.to raise_error(UaaUnavailable)
397+
expect(uaa_client).to have_received(:scim).exactly(17).times
398+
expect(uaa_client).to have_received(:sleep).exactly(16).times
396399
end
397400
end
398401

@@ -439,8 +442,9 @@ module VCAP::CloudController
439442
context 'when token is invalid or expired twice' do
440443
let(:auth_header) { 'bearer invalid' }
441444

442-
it 'retries once and then returns no usernames' do
443-
expect(uaa_client.users_for_ids([userid_1, userid_2])).to eq({})
445+
it 'fails immediately without retries' do
446+
expect { uaa_client.users_for_ids([userid_1, userid_2]) }.to raise_error(CF::UAA::InvalidToken)
447+
expect(uaa_client).not_to receive(:sleep)
444448
end
445449
end
446450
end
@@ -548,6 +552,7 @@ module VCAP::CloudController
548552
let(:username1) { 'user1@example.com' }
549553
let(:username2) { 'user2@example.com' }
550554
let(:partial_username) { 'user' }
555+
let(:mock_logger) { double(:steno_logger, error: nil, info: nil) }
551556

552557
context 'with usernames but no origins' do
553558
it 'returns the ids for the usernames' do
@@ -675,13 +680,14 @@ module VCAP::CloudController
675680

676681
context 'when UAA is unavailable' do
677682
before do
678-
allow(uaa_client).to receive(:token_info).and_raise(UaaUnavailable)
683+
allow(uaa_client).to receive(:query).and_raise(UaaUnavailable)
684+
allow(uaa_client).to receive(:sleep) { |n| Timecop.travel(n) }
679685
end
680686

681-
it 'raises UaaUnavailable' do
682-
expect do
683-
uaa_client.ids_for_usernames_and_origins([username1], nil)
684-
end.to raise_error(UaaUnavailable)
687+
it 'retries, raises an exception after 17 attempts' do
688+
expect { uaa_client.ids_for_usernames_and_origins([username1], nil) }.to raise_error(UaaUnavailable)
689+
expect(uaa_client).to have_received(:query).exactly(17).times
690+
expect(uaa_client).to have_received(:sleep).exactly(16).times
685691
end
686692
end
687693

@@ -690,12 +696,13 @@ module VCAP::CloudController
690696
scim = double('scim')
691697
allow(scim).to receive(:query).and_raise(CF::UAA::TargetError)
692698
allow(uaa_client).to receive(:scim).and_return(scim)
699+
allow(subject).to receive(:sleep) { |n| Timecop.travel(n) }
693700
end
694701

695-
it 'raises UaaUnavailable' do
696-
expect do
697-
uaa_client.ids_for_usernames_and_origins([username1], nil)
698-
end.to raise_error(UaaUnavailable)
702+
it 'retries, raises an exception after 17 attempts' do
703+
expect { uaa_client.ids_for_usernames_and_origins([username1], nil) }.to raise_error(UaaUnavailable)
704+
expect(uaa_client).to have_received(:scim).exactly(17).times
705+
expect(uaa_client).to have_received(:sleep).exactly(16).times
699706
end
700707
end
701708

@@ -704,12 +711,13 @@ module VCAP::CloudController
704711
scim = double('scim')
705712
allow(scim).to receive(:query).and_raise(CF::UAA::BadTarget)
706713
allow(uaa_client).to receive(:scim).and_return(scim)
714+
allow(subject).to receive(:sleep) { |n| Timecop.travel(n) }
707715
end
708716

709-
it 'raises UaaUnavailable' do
710-
expect do
711-
uaa_client.ids_for_usernames_and_origins([username1], nil)
712-
end.to raise_error(UaaUnavailable)
717+
it 'retries, raises an exception after 17 attempts' do
718+
expect { uaa_client.ids_for_usernames_and_origins([username1], nil) }.to raise_error(UaaUnavailable)
719+
expect(uaa_client).to have_received(:scim).exactly(17).times
720+
expect(uaa_client).to have_received(:sleep).exactly(16).times
713721
end
714722
end
715723
end
@@ -814,5 +822,81 @@ module VCAP::CloudController
814822
end
815823
end
816824
end
825+
826+
describe '#with_request_error_handling' do
827+
before do
828+
allow(subject).to receive(:sleep) { |n| Timecop.travel(n) }
829+
end
830+
831+
context 'when the block succeeds immediately' do
832+
it 'does not sleep or raise an exception' do
833+
expect { uaa_client.with_request_error_handling {} }.not_to raise_error
834+
end
835+
end
836+
837+
context 'when the block raises an exception' do
838+
let(:successful_block) do
839+
proc {
840+
@count ||= 0
841+
@count += 1
842+
@count == 2 ? true : raise(UaaUnavailable)
843+
}
844+
end
845+
846+
it 'retries once and eventually succeeds' do
847+
expect { subject.with_request_error_handling(&successful_block) }.not_to raise_error
848+
end
849+
850+
it 'fails immediately if invalidToken exception has been thrown' do
851+
expect { subject.with_request_error_handling { raise CF::UAA::InvalidToken } }.to raise_error(CF::UAA::InvalidToken)
852+
expect(uaa_client).not_to receive(:sleep)
853+
end
854+
855+
it 'retries and eventually raises an error when the block fails' do
856+
attempts = 0
857+
858+
expect do
859+
subject.with_request_error_handling do
860+
attempts += 1
861+
Timecop.travel(Time.now.utc + 50)
862+
raise UaaUnavailable
863+
end
864+
end.to raise_error(UaaUnavailable)
865+
expect(uaa_client).to have_received(:sleep).exactly(1).times
866+
expect(attempts).to eq(2)
867+
end
868+
869+
it 'stops retrying after 60 seconds and raises an exception' do
870+
start_time = Time.now.utc
871+
872+
Timecop.freeze do
873+
expect do
874+
subject.with_request_error_handling do
875+
Timecop.travel(start_time + 61)
876+
raise UaaUnavailable
877+
end
878+
end.to raise_error(UaaUnavailable)
879+
end
880+
expect(uaa_client).not_to receive(:sleep)
881+
end
882+
883+
it 'raises an error after 17 attempts in approximately 1 minute when each yield call immediately' do
884+
attempts = 0
885+
start_time = Time.now.utc
886+
887+
expect do
888+
subject.with_request_error_handling do
889+
attempts += 1
890+
raise UaaUnavailable
891+
end
892+
end.to raise_error(UaaUnavailable)
893+
end_time = Time.now.utc
894+
duration = end_time.to_f - start_time.to_f
895+
expect(attempts).to be_within(1).of(17)
896+
expect(duration).to be_within(1).of(62)
897+
expect(uaa_client).to have_received(:sleep).exactly(16).times
898+
end
899+
end
900+
end
817901
end
818902
end

0 commit comments

Comments
 (0)