Skip to content

Commit e1cbac8

Browse files
committed
fix(upstreams): reuse fresh quota evidence for transient reconciliation probes
Scheduled account reconciliation could report quota refresh failures when every live usage probe was temporarily unavailable even though the identity already had fresh usable account-primary quota evidence. Reuse only persisted 5h or monthly account-primary windows that remain usable, and keep auth failures, 429s, stale, missing, resetless, exhausted, weekly-only, model-scoped, and unknown-duration evidence fail-closed.
1 parent aab6f80 commit e1cbac8

2 files changed

Lines changed: 569 additions & 4 deletions

File tree

lib/codex_pooler/upstreams/reconciliation/pool_reconciliation.ex

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ defmodule CodexPooler.Upstreams.Reconciliation.PoolReconciliation do
55

66
alias CodexPooler.Jobs
77
alias CodexPooler.Pools.Pool
8+
alias CodexPooler.Quotas.WindowClassifier
89
alias CodexPooler.Repo
910
alias CodexPooler.Upstreams.Auth.TokenRefresh
1011
alias CodexPooler.Upstreams.EndpointMetadata
@@ -18,6 +19,7 @@ defmodule CodexPooler.Upstreams.Reconciliation.PoolReconciliation do
1819
@eligible PoolUpstreamAssignment.eligible_status()
1920
@health_active PoolUpstreamAssignment.active_health_status()
2021
@account_quota_key "account"
22+
@fallback_denied_usage_statuses [401, 403, 429, :auth_rejected]
2123
@usage_auth_refresh_skew_seconds 5 * 60
2224
@codex_usage_paths [
2325
"/api/codex/usage",
@@ -131,6 +133,11 @@ defmodule CodexPooler.Upstreams.Reconciliation.PoolReconciliation do
131133
:usage_unavailable ->
132134
step_result(:failed, "quota_refresh_unavailable", "quota windows were not available")
133135

136+
{:persisted_windows, windows} ->
137+
step_result(:succeeded, "quota_reused_fresh", "fresh quota windows reused", %{
138+
"window_count" => length(windows)
139+
})
140+
134141
{:windows, windows, identity_attrs} ->
135142
upsert_reconciliation_quota(identity, windows, identity_attrs, nil)
136143

@@ -153,7 +160,9 @@ defmodule CodexPooler.Upstreams.Reconciliation.PoolReconciliation do
153160
{:windows, windows, %{}}
154161

155162
true ->
156-
codex_usage_quota_windows(identity, assignment, opts)
163+
identity
164+
|> codex_usage_quota_windows(assignment, opts)
165+
|> maybe_reuse_persisted_quota_windows(identity)
157166
end
158167
end
159168

@@ -208,8 +217,8 @@ defmodule CodexPooler.Upstreams.Reconciliation.PoolReconciliation do
208217
{:error, {:upstream_status, status}} when status in [401, 403] ->
209218
maybe_retry_codex_usage_after_token_refresh(identity, assignment, observed_at, opts)
210219

211-
_error ->
212-
:usage_unavailable
220+
{:error, reason} ->
221+
{:usage_unavailable, reason}
213222
end
214223
else
215224
_unavailable -> :auth_unavailable
@@ -219,11 +228,40 @@ defmodule CodexPooler.Upstreams.Reconciliation.PoolReconciliation do
219228
defp maybe_retry_codex_usage_after_token_refresh(identity, assignment, observed_at, opts) do
220229
if access_token_refresh_due_after_usage_auth_failure?(identity, observed_at) do
221230
retry_codex_usage_after_token_refresh(identity, assignment, opts)
231+
else
232+
{:usage_unavailable, {:upstream_status, :auth_rejected}}
233+
end
234+
end
235+
236+
defp maybe_reuse_persisted_quota_windows(
237+
{:usage_unavailable, {:upstream_status, status}},
238+
_identity
239+
)
240+
when status in @fallback_denied_usage_statuses,
241+
do: :usage_unavailable
242+
243+
defp maybe_reuse_persisted_quota_windows({:usage_unavailable, _reason}, identity) do
244+
timestamp = now()
245+
246+
windows =
247+
identity
248+
|> Quota.Windows.list_quota_windows()
249+
|> Enum.filter(&reusable_persisted_quota_window?(&1, timestamp))
250+
251+
if windows != [] do
252+
{:persisted_windows, windows}
222253
else
223254
:usage_unavailable
224255
end
225256
end
226257

258+
defp maybe_reuse_persisted_quota_windows(result, _identity), do: result
259+
260+
defp reusable_persisted_quota_window?(window, timestamp) do
261+
(WindowClassifier.primary_5h?(window) or WindowClassifier.monthly_primary?(window)) and
262+
Quota.Windows.usable_window?(window, timestamp)
263+
end
264+
227265
defp access_token_refresh_due_after_usage_auth_failure?(
228266
%UpstreamIdentity{} = identity,
229267
%DateTime{} = observed_at

0 commit comments

Comments
 (0)