Skip to content

Commit a9f711e

Browse files
authored
feat: auto failover for API with LK Cloud (#936)
* feat: auto failover for API with LK Cloud automatically fail over 5xx and transport errors * fix codeql perms * tweaks * address feedback * clean up failover logic, handle per-attempt timeout * sane default timeouts for CreateSIPParticipant
1 parent 102ddcc commit a9f711e

16 files changed

Lines changed: 1134 additions & 215 deletions

.github/workflows/test-api.yml

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Copyright 2026 LiveKit, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
name: Test API
16+
17+
permissions:
18+
contents: read
19+
20+
on:
21+
workflow_dispatch:
22+
push:
23+
branches: [main]
24+
pull_request:
25+
branches: [main]
26+
27+
jobs:
28+
failover:
29+
runs-on: ubuntu-latest
30+
services:
31+
mock-server:
32+
image: livekit/test-server:latest
33+
ports:
34+
- 9999:9999
35+
- 10000:10000
36+
- 10001:10001
37+
- 10002:10002
38+
steps:
39+
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6
40+
41+
- name: Set up Opus
42+
run: |
43+
sudo apt-get update
44+
sudo apt-get install -y libsoxr-dev libopus-dev libopusfile-dev
45+
46+
- name: Set up Go
47+
uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6
48+
with:
49+
go-version: 1.26.0
50+
51+
- name: Wait for mock server
52+
run: |
53+
for i in $(seq 1 30); do
54+
curl -sf http://127.0.0.1:9999/settings/regions >/dev/null && exit 0
55+
sleep 1
56+
done
57+
echo "mock server did not become ready" && exit 1
58+
59+
- name: Run API tests
60+
run: go test -v -count=1 -run '^TestAPI_' .

agent_client.go

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ func NewAgentClient(url string, apiKey string, apiSecret string, opts ...AgentCl
2929
}
3030
c := &AgentClient{
3131
authBase: authBase{apiKey, apiSecret},
32-
httpClient: &http.Client{},
32+
httpClient: newAPIHTTPClient(),
3333
}
3434
for _, opt := range opts {
3535
opt(c)
@@ -53,143 +53,143 @@ func WithTwirpClientOptions(opts ...twirp.ClientOption) AgentClientOption {
5353
}
5454

5555
func (c *AgentClient) CreateAgent(ctx context.Context, req *livekit.CreateAgentRequest) (*livekit.CreateAgentResponse, error) {
56-
ctx, err := c.withAuth(ctx, withAgentGrant{Admin: true})
56+
ctx, err := c.prepareContext(ctx, withAgentGrant{Admin: true})
5757
if err != nil {
5858
return nil, err
5959
}
6060
return c.agentClient.CreateAgent(ctx, req)
6161
}
6262

6363
func (c *AgentClient) CreateAgentV2(ctx context.Context, req *livekit.CreateAgentV2Request) (*livekit.CreateAgentV2Response, error) {
64-
ctx, err := c.withAuth(ctx, withAgentGrant{Admin: true})
64+
ctx, err := c.prepareContext(ctx, withAgentGrant{Admin: true})
6565
if err != nil {
6666
return nil, err
6767
}
6868
return c.agentClient.CreateAgentV2(ctx, req)
6969
}
7070

7171
func (c *AgentClient) ListAgents(ctx context.Context, req *livekit.ListAgentsRequest) (*livekit.ListAgentsResponse, error) {
72-
ctx, err := c.withAuth(ctx, withAgentGrant{Admin: true})
72+
ctx, err := c.prepareContext(ctx, withAgentGrant{Admin: true})
7373
if err != nil {
7474
return nil, err
7575
}
7676
return c.agentClient.ListAgents(ctx, req)
7777
}
7878

7979
func (c *AgentClient) ListAgentVersions(ctx context.Context, req *livekit.ListAgentVersionsRequest) (*livekit.ListAgentVersionsResponse, error) {
80-
ctx, err := c.withAuth(ctx, withAgentGrant{Admin: true})
80+
ctx, err := c.prepareContext(ctx, withAgentGrant{Admin: true})
8181
if err != nil {
8282
return nil, err
8383
}
8484
return c.agentClient.ListAgentVersions(ctx, req)
8585
}
8686

8787
func (c *AgentClient) DeleteAgent(ctx context.Context, req *livekit.DeleteAgentRequest) (*livekit.DeleteAgentResponse, error) {
88-
ctx, err := c.withAuth(ctx, withAgentGrant{Admin: true})
88+
ctx, err := c.prepareContext(ctx, withAgentGrant{Admin: true})
8989
if err != nil {
9090
return nil, err
9191
}
9292
return c.agentClient.DeleteAgent(ctx, req)
9393
}
9494

9595
func (c *AgentClient) UpdateAgent(ctx context.Context, req *livekit.UpdateAgentRequest) (*livekit.UpdateAgentResponse, error) {
96-
ctx, err := c.withAuth(ctx, withAgentGrant{Admin: true})
96+
ctx, err := c.prepareContext(ctx, withAgentGrant{Admin: true})
9797
if err != nil {
9898
return nil, err
9999
}
100100
return c.agentClient.UpdateAgent(ctx, req)
101101
}
102102

103103
func (c *AgentClient) RestartAgent(ctx context.Context, req *livekit.RestartAgentRequest) (*livekit.RestartAgentResponse, error) {
104-
ctx, err := c.withAuth(ctx, withAgentGrant{Admin: true})
104+
ctx, err := c.prepareContext(ctx, withAgentGrant{Admin: true})
105105
if err != nil {
106106
return nil, err
107107
}
108108
return c.agentClient.RestartAgent(ctx, req)
109109
}
110110

111111
func (c *AgentClient) RollbackAgent(ctx context.Context, req *livekit.RollbackAgentRequest) (*livekit.RollbackAgentResponse, error) {
112-
ctx, err := c.withAuth(ctx, withAgentGrant{Admin: true})
112+
ctx, err := c.prepareContext(ctx, withAgentGrant{Admin: true})
113113
if err != nil {
114114
return nil, err
115115
}
116116
return c.agentClient.RollbackAgent(ctx, req)
117117
}
118118

119119
func (c *AgentClient) ListAgentSecrets(ctx context.Context, req *livekit.ListAgentSecretsRequest) (*livekit.ListAgentSecretsResponse, error) {
120-
ctx, err := c.withAuth(ctx, withAgentGrant{Admin: true})
120+
ctx, err := c.prepareContext(ctx, withAgentGrant{Admin: true})
121121
if err != nil {
122122
return nil, err
123123
}
124124
return c.agentClient.ListAgentSecrets(ctx, req)
125125
}
126126

127127
func (c *AgentClient) UpdateAgentSecrets(ctx context.Context, req *livekit.UpdateAgentSecretsRequest) (*livekit.UpdateAgentSecretsResponse, error) {
128-
ctx, err := c.withAuth(ctx, withAgentGrant{Admin: true})
128+
ctx, err := c.prepareContext(ctx, withAgentGrant{Admin: true})
129129
if err != nil {
130130
return nil, err
131131
}
132132
return c.agentClient.UpdateAgentSecrets(ctx, req)
133133
}
134134

135135
func (c *AgentClient) DeployAgent(ctx context.Context, req *livekit.DeployAgentRequest) (*livekit.DeployAgentResponse, error) {
136-
ctx, err := c.withAuth(ctx, withAgentGrant{Admin: true})
136+
ctx, err := c.prepareContext(ctx, withAgentGrant{Admin: true})
137137
if err != nil {
138138
return nil, err
139139
}
140140
return c.agentClient.DeployAgent(ctx, req)
141141
}
142142

143143
func (c *AgentClient) DeployAgentV2(ctx context.Context, req *livekit.DeployAgentV2Request) (*livekit.DeployAgentV2Response, error) {
144-
ctx, err := c.withAuth(ctx, withAgentGrant{Admin: true})
144+
ctx, err := c.prepareContext(ctx, withAgentGrant{Admin: true})
145145
if err != nil {
146146
return nil, err
147147
}
148148
return c.agentClient.DeployAgentV2(ctx, req)
149149
}
150150

151151
func (c *AgentClient) PromoteAgent(ctx context.Context, req *livekit.PromoteAgentRequest) (*livekit.PromoteAgentResponse, error) {
152-
ctx, err := c.withAuth(ctx, withAgentGrant{Admin: true})
152+
ctx, err := c.prepareContext(ctx, withAgentGrant{Admin: true})
153153
if err != nil {
154154
return nil, err
155155
}
156156
return c.agentClient.PromoteAgent(ctx, req)
157157
}
158158

159159
func (c *AgentClient) GetClientSettings(ctx context.Context, req *livekit.ClientSettingsRequest) (*livekit.ClientSettingsResponse, error) {
160-
ctx, err := c.withAuth(ctx, withAgentGrant{Admin: true})
160+
ctx, err := c.prepareContext(ctx, withAgentGrant{Admin: true})
161161
if err != nil {
162162
return nil, err
163163
}
164164
return c.agentClient.GetClientSettings(ctx, req)
165165
}
166166

167167
func (c *AgentClient) CreatePrivateLink(ctx context.Context, req *livekit.CreatePrivateLinkRequest) (*livekit.CreatePrivateLinkResponse, error) {
168-
ctx, err := c.withAuth(ctx, withAgentGrant{Admin: true})
168+
ctx, err := c.prepareContext(ctx, withAgentGrant{Admin: true})
169169
if err != nil {
170170
return nil, err
171171
}
172172
return c.agentClient.CreatePrivateLink(ctx, req)
173173
}
174174

175175
func (c *AgentClient) DestroyPrivateLink(ctx context.Context, req *livekit.DestroyPrivateLinkRequest) (*livekit.DestroyPrivateLinkResponse, error) {
176-
ctx, err := c.withAuth(ctx, withAgentGrant{Admin: true})
176+
ctx, err := c.prepareContext(ctx, withAgentGrant{Admin: true})
177177
if err != nil {
178178
return nil, err
179179
}
180180
return c.agentClient.DestroyPrivateLink(ctx, req)
181181
}
182182

183183
func (c *AgentClient) ListPrivateLinks(ctx context.Context, req *livekit.ListPrivateLinksRequest) (*livekit.ListPrivateLinksResponse, error) {
184-
ctx, err := c.withAuth(ctx, withAgentGrant{Admin: true})
184+
ctx, err := c.prepareContext(ctx, withAgentGrant{Admin: true})
185185
if err != nil {
186186
return nil, err
187187
}
188188
return c.agentClient.ListPrivateLinks(ctx, req)
189189
}
190190

191191
func (c *AgentClient) GetPrivateLinkStatus(ctx context.Context, req *livekit.GetPrivateLinkStatusRequest) (*livekit.GetPrivateLinkStatusResponse, error) {
192-
ctx, err := c.withAuth(ctx, withAgentGrant{Admin: true})
192+
ctx, err := c.prepareContext(ctx, withAgentGrant{Admin: true})
193193
if err != nil {
194194
return nil, err
195195
}

agent_dispatch_client.go

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ package lksdk
1616

1717
import (
1818
"context"
19-
"net/http"
2019

2120
"github.com/twitchtv/twirp"
2221

@@ -31,7 +30,7 @@ type AgentDispatchClient struct {
3130

3231
func NewAgentDispatchServiceClient(url string, apiKey string, secretKey string, opts ...twirp.ClientOption) *AgentDispatchClient {
3332
url = signalling.ToHttpURL(url)
34-
client := livekit.NewAgentDispatchServiceProtobufClient(url, &http.Client{}, opts...)
33+
client := livekit.NewAgentDispatchServiceProtobufClient(url, newAPIHTTPClient(), opts...)
3534

3635
return &AgentDispatchClient{
3736
agentDispatchService: client,
@@ -43,7 +42,7 @@ func NewAgentDispatchServiceClient(url string, apiKey string, secretKey string,
4342
}
4443

4544
func (c *AgentDispatchClient) CreateDispatch(ctx context.Context, req *livekit.CreateAgentDispatchRequest) (*livekit.AgentDispatch, error) {
46-
ctx, err := c.withAuth(ctx, withVideoGrant{RoomAdmin: true, Room: req.Room})
45+
ctx, err := c.prepareContext(ctx, withVideoGrant{RoomAdmin: true, Room: req.Room})
4746
if err != nil {
4847
return nil, err
4948
}
@@ -52,7 +51,7 @@ func (c *AgentDispatchClient) CreateDispatch(ctx context.Context, req *livekit.C
5251
}
5352

5453
func (c *AgentDispatchClient) DeleteDispatch(ctx context.Context, req *livekit.DeleteAgentDispatchRequest) (*livekit.AgentDispatch, error) {
55-
ctx, err := c.withAuth(ctx, withVideoGrant{RoomAdmin: true, Room: req.Room})
54+
ctx, err := c.prepareContext(ctx, withVideoGrant{RoomAdmin: true, Room: req.Room})
5655
if err != nil {
5756
return nil, err
5857
}
@@ -61,7 +60,7 @@ func (c *AgentDispatchClient) DeleteDispatch(ctx context.Context, req *livekit.D
6160
}
6261

6362
func (c *AgentDispatchClient) ListDispatch(ctx context.Context, req *livekit.ListAgentDispatchRequest) (*livekit.ListAgentDispatchResponse, error) {
64-
ctx, err := c.withAuth(ctx, withVideoGrant{RoomAdmin: true, Room: req.Room})
63+
ctx, err := c.prepareContext(ctx, withVideoGrant{RoomAdmin: true, Room: req.Room})
6564
if err != nil {
6665
return nil, err
6766
}

agent_simulation_client.go

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ package lksdk
1616

1717
import (
1818
"context"
19-
"net/http"
2019

2120
"github.com/twitchtv/twirp"
2221

@@ -29,55 +28,55 @@ type AgentSimulationClient struct {
2928
}
3029

3130
func NewAgentSimulationClient(url string, apiKey string, apiSecret string, opts ...twirp.ClientOption) *AgentSimulationClient {
32-
client := livekit.NewAgentSimulationProtobufClient(url, &http.Client{}, opts...)
31+
client := livekit.NewAgentSimulationProtobufClient(url, newAPIHTTPClient(), opts...)
3332
return &AgentSimulationClient{
3433
simulationClient: client,
3534
authBase: authBase{apiKey, apiSecret},
3635
}
3736
}
3837

3938
func (c *AgentSimulationClient) CreateSimulationRun(ctx context.Context, req *livekit.SimulationRun_Create_Request) (*livekit.SimulationRun_Create_Response, error) {
40-
ctx, err := c.withAuth(ctx, withAgentGrant{SimulationAdmin: true})
39+
ctx, err := c.prepareContext(ctx, withAgentGrant{SimulationAdmin: true})
4140
if err != nil {
4241
return nil, err
4342
}
4443
return c.simulationClient.CreateSimulationRun(ctx, req)
4544
}
4645

4746
func (c *AgentSimulationClient) ConfirmSimulationSourceUpload(ctx context.Context, req *livekit.SimulationRun_ConfirmSourceUpload_Request) (*livekit.SimulationRun_ConfirmSourceUpload_Response, error) {
48-
ctx, err := c.withAuth(ctx, withAgentGrant{SimulationAdmin: true})
47+
ctx, err := c.prepareContext(ctx, withAgentGrant{SimulationAdmin: true})
4948
if err != nil {
5049
return nil, err
5150
}
5251
return c.simulationClient.ConfirmSimulationSourceUpload(ctx, req)
5352
}
5453

5554
func (c *AgentSimulationClient) GetSimulationRun(ctx context.Context, req *livekit.SimulationRun_Get_Request) (*livekit.SimulationRun_Get_Response, error) {
56-
ctx, err := c.withAuth(ctx, withAgentGrant{SimulationAdmin: true})
55+
ctx, err := c.prepareContext(ctx, withAgentGrant{SimulationAdmin: true})
5756
if err != nil {
5857
return nil, err
5958
}
6059
return c.simulationClient.GetSimulationRun(ctx, req)
6160
}
6261

6362
func (c *AgentSimulationClient) ListSimulationRuns(ctx context.Context, req *livekit.SimulationRun_List_Request) (*livekit.SimulationRun_List_Response, error) {
64-
ctx, err := c.withAuth(ctx, withAgentGrant{SimulationAdmin: true})
63+
ctx, err := c.prepareContext(ctx, withAgentGrant{SimulationAdmin: true})
6564
if err != nil {
6665
return nil, err
6766
}
6867
return c.simulationClient.ListSimulationRuns(ctx, req)
6968
}
7069

7170
func (c *AgentSimulationClient) CancelSimulationRun(ctx context.Context, req *livekit.SimulationRun_Cancel_Request) (*livekit.SimulationRun_Cancel_Response, error) {
72-
ctx, err := c.withAuth(ctx, withAgentGrant{SimulationAdmin: true})
71+
ctx, err := c.prepareContext(ctx, withAgentGrant{SimulationAdmin: true})
7372
if err != nil {
7473
return nil, err
7574
}
7675
return c.simulationClient.CancelSimulationRun(ctx, req)
7776
}
7877

7978
func (c *AgentSimulationClient) CreateScenarioFromSession(ctx context.Context, req *livekit.Scenario_CreateFromSession_Request) (*livekit.Scenario_CreateFromSession_Response, error) {
80-
ctx, err := c.withAuth(ctx, withAgentGrant{SimulationAdmin: true})
79+
ctx, err := c.prepareContext(ctx, withAgentGrant{SimulationAdmin: true})
8180
if err != nil {
8281
return nil, err
8382
}

auth.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,11 @@ func (g withAgentGrant) Apply(t *auth.AccessToken) {
5252
t.SetAgentGrant((*auth.AgentGrant)(&g))
5353
}
5454

55-
func (b authBase) withAuth(ctx context.Context, opt authOption, options ...authOption) (context.Context, error) {
55+
// prepareContext builds the context for an outgoing API request: it signs an
56+
// access token for the given grants and attaches it as a request header, then
57+
// detaches a long-enough deadline so failover can reset it per attempt (see
58+
// withFailoverTimeout).
59+
func (b authBase) prepareContext(ctx context.Context, opt authOption, options ...authOption) (context.Context, error) {
5660
at := auth.NewAccessToken(b.apiKey, b.apiSecret)
5761
opt.Apply(at)
5862
for _, opt := range options {
@@ -78,5 +82,10 @@ func (b authBase) withAuth(ctx context.Context, opt authOption, options ...authO
7882
}
7983
}
8084

85+
// Detach a long-enough deadline so it isn't enforced across failover retries
86+
// (twirp re-checks the context after each request); the transport re-applies
87+
// the budget per attempt. A no-op for short or deadline-free requests.
88+
ctx = withFailoverTimeout(ctx)
89+
8190
return twirp.WithHTTPRequestHeaders(ctx, ctxH)
8291
}

0 commit comments

Comments
 (0)