Skip to content

Commit cd3d425

Browse files
maennchenclaude
andauthored
Serialize device auth in hex_cli_auth to reuse concurrent logins (#197)
device_auth was the only auth-resolution path in hex_cli_auth not wrapped in global:trans. Concurrent callers that all needed to authenticate (e.g. parallel commands with no credentials) each kicked off their own device auth flow, prompting the user and persisting a token multiple times instead of sharing a single login. Wrap maybe_authenticate_and_retry in a single global lock (the device auth token is always persisted to the global OAuth scope, so the lock is not keyed by repo). Inside the lock, re-resolve auth first and reuse a token that differs from the one we arrived with: a waiting caller picks up the login a prior winner just persisted instead of prompting again. The "differs from current" check covers both entry reasons — no_credentials (no key in config) and token_refresh_failed (the rejected key in config) — and avoids an infinite retry loop when the server keeps rejecting a token that still looks valid by expiry. Add a regression test that drives concurrent callers through a barrier and blocks the winner inside should_authenticate, asserting exactly one caller is prompted and the persisted login is reused by the rest. Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 1c80486 commit cd3d425

2 files changed

Lines changed: 180 additions & 1 deletion

File tree

src/hex_cli_auth.erl

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,38 @@ repo_name(_) ->
321321

322322
%% @private
323323
%% Ask user if they want to authenticate, and if yes, initiate device auth.
324+
%%
325+
%% Serialized with a global lock so concurrent callers don't each trigger their
326+
%% own device auth flow. The first caller to acquire the lock runs device auth
327+
%% and persists the resulting token; subsequent callers re-check for an existing
328+
%% (now-valid) token inside the lock and reuse it instead of re-authenticating.
324329
maybe_authenticate_and_retry(BaseConfig, Fun, Reason, Opts) ->
330+
global:trans(
331+
{{?MODULE, device_auth}, self()},
332+
fun() ->
333+
do_maybe_authenticate_and_retry(BaseConfig, Fun, Reason, Opts)
334+
end,
335+
[node()],
336+
infinity
337+
).
338+
339+
%% @private
340+
%% Another caller may have authenticated while we waited for the lock. Re-resolve
341+
%% and, if we get a token that differs from the one we arrived with (none when
342+
%% credentials were missing; the rejected one on token_refresh_failed), reuse it
343+
%% instead of prompting again. Otherwise proceed to prompt + device auth.
344+
do_maybe_authenticate_and_retry(BaseConfig, Fun, Reason, Opts) ->
345+
CurrentApiKey = maps:get(api_key, BaseConfig, undefined),
346+
case resolve_api_auth(write, BaseConfig) of
347+
{ok, ApiKey, AuthContext} when ApiKey =/= CurrentApiKey ->
348+
Config = BaseConfig#{api_key => ApiKey},
349+
execute_with_retry(Config, Fun, AuthContext, 0, undefined, Opts);
350+
_ ->
351+
prompt_and_device_auth(BaseConfig, Fun, Reason, Opts)
352+
end.
353+
354+
%% @private
355+
prompt_and_device_auth(BaseConfig, Fun, Reason, Opts) ->
325356
case call_callback(BaseConfig, should_authenticate, [Reason]) of
326357
true ->
327358
case device_auth(BaseConfig, <<"api repositories">>, Opts) of

test/hex_cli_auth_SUITE.erl

Lines changed: 149 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ all() ->
6969
with_repo_optional_token_refresh_failed_test,
7070

7171
%% concurrency tests
72-
resolve_oauth_token_concurrent_refresh_serialized_test
72+
resolve_oauth_token_concurrent_refresh_serialized_test,
73+
device_auth_concurrent_serialized_reuses_login_test
7374
].
7475

7576
%%====================================================================
@@ -908,6 +909,153 @@ resolve_oauth_token_concurrent_refresh_serialized_test(_Config) ->
908909
end,
909910
ok.
910911

912+
device_auth_concurrent_serialized_reuses_login_test(_Config) ->
913+
%% Multiple concurrent callers that all need to authenticate via device auth
914+
%% must serialize: the FIRST caller runs the device auth flow, persists the
915+
%% resulting token, and every subsequent caller reuses that login instead of
916+
%% kicking off its own device auth flow.
917+
%%
918+
%% To make the race deterministic, all callers first sync at a barrier (so
919+
%% they enter `with_api` together) and the `should_authenticate' callback
920+
%% blocks the caller that reaches it until the test releases it. Then:
921+
%% * With the global lock, only ONE caller can be inside the device auth
922+
%% section at a time, so only one `should_authenticate' arrives while the
923+
%% others are still blocked on the lock.
924+
%% * Without the lock, ALL callers reach `should_authenticate' concurrently.
925+
%% We assert exactly one caller entered the prompt, then release it; the
926+
%% persisted token is reused by everyone else.
927+
NumCallers = 5,
928+
Self = self(),
929+
930+
PromptCount = counters:new(1, [atomics]),
931+
PersistCount = counters:new(1, [atomics]),
932+
933+
%% Persisted token store, updated by the winning device auth flow and read by
934+
%% every subsequent caller. Starts empty so the first caller has no credentials.
935+
TokenStore = ets:new(token_store, [public, set]),
936+
true = ets:insert(TokenStore, {oauth_tokens, error}),
937+
938+
GetOAuthTokensFn = fun() ->
939+
[{oauth_tokens, Tokens}] = ets:lookup(TokenStore, oauth_tokens),
940+
Tokens
941+
end,
942+
943+
Config = config_with_callbacks(#{
944+
get_oauth_tokens => GetOAuthTokensFn,
945+
should_authenticate => fun(no_credentials) ->
946+
counters:add(PromptCount, 1, 1),
947+
%% Signal arrival and block until the test releases us. This holds the
948+
%% global lock (if any), keeping other callers out of this section.
949+
Self ! {entered_prompt, self()},
950+
receive
951+
release -> ok
952+
end,
953+
true
954+
end,
955+
persist_oauth_tokens => fun(global, Access, Refresh, Expires) ->
956+
%% The winning caller persists the device token; make it valid so
957+
%% subsequent callers reuse it instead of authenticating again.
958+
ets:insert(
959+
TokenStore,
960+
{oauth_tokens,
961+
{ok, #{
962+
access_token => Access,
963+
refresh_token => Refresh,
964+
expires_at => Expires
965+
}}}
966+
),
967+
counters:add(PersistCount, 1, 1),
968+
ok
969+
end
970+
}),
971+
972+
%% The device auth poll reads the queued oauth_device_response from the
973+
%% *calling* process's mailbox; each caller plants its own success response.
974+
AccessToken = <<"device_token">>,
975+
SuccessPayload = #{
976+
<<"access_token">> => AccessToken,
977+
<<"refresh_token">> => <<"device_refresh">>,
978+
<<"token_type">> => <<"Bearer">>,
979+
<<"expires_in">> => 3600
980+
},
981+
Headers = #{<<"content-type">> => <<"application/vnd.hex+erlang; charset=utf-8">>},
982+
DeviceResponse =
983+
{hex_http_test, oauth_device_response,
984+
{ok, {200, Headers, term_to_binary(SuccessPayload)}}},
985+
986+
%% Spawn concurrent callers, all with no initial credentials. They sync at a
987+
%% barrier so they enter with_api as simultaneously as possible.
988+
[
989+
spawn(fun() ->
990+
self() ! DeviceResponse,
991+
Self ! {ready, self()},
992+
receive
993+
go -> ok
994+
end,
995+
Result = hex_cli_auth:with_api(
996+
write,
997+
Config,
998+
fun(Cfg) -> maps:get(api_key, Cfg) end,
999+
[{oauth_open_browser, false}]
1000+
),
1001+
Self ! {result, N, Result}
1002+
end)
1003+
|| N <- lists:seq(1, NumCallers)
1004+
],
1005+
1006+
%% Barrier: wait for all callers to be ready, then release them together.
1007+
Pids = [
1008+
receive
1009+
{ready, Pid} -> Pid
1010+
after 1000 ->
1011+
error(caller_not_ready)
1012+
end
1013+
|| _ <- lists:seq(1, NumCallers)
1014+
],
1015+
[Pid ! go || Pid <- Pids],
1016+
1017+
%% Exactly one caller may reach the prompt; give the others time to (wrongly)
1018+
%% reach it too if the lock is missing.
1019+
receive
1020+
{entered_prompt, PromptPid} ->
1021+
%% Allow other callers to race into the (un)locked section.
1022+
timer:sleep(200),
1023+
?assertEqual(
1024+
1,
1025+
counters:get(PromptCount, 1),
1026+
"device auth was not serialized: multiple callers prompted concurrently"
1027+
),
1028+
%% Release the winner so it completes device auth and persists.
1029+
PromptPid ! release
1030+
after 2000 ->
1031+
error(no_prompt)
1032+
end,
1033+
1034+
%% Collect all results. Every caller must end up with the same bearer token,
1035+
%% either because it ran the (single) device auth flow or because it reused
1036+
%% the login persisted by the winner.
1037+
Results = [
1038+
receive
1039+
{result, _N, R} -> R
1040+
after 5000 ->
1041+
error(caller_timed_out)
1042+
end
1043+
|| _ <- lists:seq(1, NumCallers)
1044+
],
1045+
1046+
[
1047+
?assertEqual(<<"Bearer device_token">>, R)
1048+
|| R <- Results
1049+
],
1050+
1051+
%% The user was prompted, and the token persisted, exactly once across all
1052+
%% callers — proving calls were serialized and the login was reused.
1053+
?assertEqual(1, counters:get(PromptCount, 1)),
1054+
?assertEqual(1, counters:get(PersistCount, 1)),
1055+
1056+
ets:delete(TokenStore),
1057+
ok.
1058+
9111059
%%====================================================================
9121060
%% Helper Functions
9131061
%%====================================================================

0 commit comments

Comments
 (0)