Skip to content

Commit 9799c43

Browse files
User search gateway routing (#699)
Co-authored-by: Kent C. Dodds <me+github@kentcdodds.com> Co-authored-by: Cursor Agent <cursoragent@cursor.com>
1 parent 9d83556 commit 9799c43

7 files changed

Lines changed: 162 additions & 12 deletions

File tree

.env.example

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,9 @@ CLOUDFLARE_API_TOKEN=MOCK_CLOUDFLARE_API_TOKEN
105105
CLOUDFLARE_VECTORIZE_INDEX=MOCK_CLOUDFLARE_VECTORIZE_INDEX
106106
# Route Workers AI requests through Cloudflare AI Gateway (gateway name/id)
107107
CLOUDFLARE_AI_GATEWAY_ID=MOCK_CLOUDFLARE_AI_GATEWAY_ID
108-
# Optional: route embeddings through a different AI Gateway (for example,
109-
# without guardrails). Falls back to CLOUDFLARE_AI_GATEWAY_ID when omitted.
108+
# Optional: indexing/batch embedding jobs can use a different AI Gateway (for
109+
# example, without guardrails). Runtime user search queries still use
110+
# CLOUDFLARE_AI_GATEWAY_ID. Falls back to CLOUDFLARE_AI_GATEWAY_ID when omitted.
110111
CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID=MOCK_CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID
111112
# AI Gateway Authenticated Gateway token (sent as `cf-aig-authorization`)
112113
CLOUDFLARE_AI_GATEWAY_AUTH_TOKEN=MOCK_CLOUDFLARE_AI_GATEWAY_AUTH_TOKEN
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import { expect, test, vi } from 'vitest'
2+
import { withEnv } from '#tests/with-env.ts'
3+
4+
vi.mock('#app/utils/cache.server.ts', () => ({
5+
cache: {
6+
name: 'test-cache',
7+
get: () => null,
8+
set: async () => {},
9+
delete: async () => {},
10+
},
11+
cachified: async ({
12+
getFreshValue,
13+
}: {
14+
getFreshValue: () => Promise<unknown>
15+
}) => getFreshValue(),
16+
}))
17+
18+
vi.mock('#app/utils/semantic-search-presentation.server.ts', () => ({
19+
getSemanticSearchPresentation: async () => ({}),
20+
}))
21+
22+
import { semanticSearchKCD } from '../semantic-search.server.ts'
23+
24+
test('semanticSearchKCD routes user query embeddings through CLOUDFLARE_AI_GATEWAY_ID', async () => {
25+
await withEnv(
26+
{
27+
CLOUDFLARE_AI_GATEWAY_ID: 'runtime-search-gateway',
28+
CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID: 'indexing-only-gateway',
29+
},
30+
async () => {
31+
const fetchSpy = vi
32+
.spyOn(globalThis, 'fetch')
33+
.mockImplementation(async (input) => {
34+
const url = input instanceof Request ? input.url : String(input)
35+
36+
if (url.includes('/workers-ai/')) {
37+
return new Response(
38+
JSON.stringify({
39+
result: {
40+
shape: [1, 3],
41+
data: [[0.1, 0.2, 0.3]],
42+
},
43+
}),
44+
{
45+
status: 200,
46+
headers: { 'Content-Type': 'application/json' },
47+
},
48+
)
49+
}
50+
51+
if (url.includes('/vectorize/')) {
52+
return new Response(
53+
JSON.stringify({ result: { count: 0, matches: [] } }),
54+
{
55+
status: 200,
56+
headers: { 'Content-Type': 'application/json' },
57+
},
58+
)
59+
}
60+
61+
throw new Error(`Unexpected fetch URL in semantic search test: ${url}`)
62+
})
63+
64+
try {
65+
await semanticSearchKCD({
66+
query: `Gateway regression test ${Date.now()}`,
67+
topK: 1,
68+
})
69+
70+
const embeddingRequestUrl = fetchSpy.mock.calls
71+
.map(([input]) => (input instanceof Request ? input.url : String(input)))
72+
.find((url) => url.includes('/workers-ai/'))
73+
74+
expect(embeddingRequestUrl).toBeDefined()
75+
expect(embeddingRequestUrl).toContain('/runtime-search-gateway/')
76+
expect(embeddingRequestUrl).not.toContain('/indexing-only-gateway/')
77+
} finally {
78+
fetchSpy.mockRestore()
79+
}
80+
},
81+
)
82+
})

app/utils/env.server.ts

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ const schemaBase = z.object({
6868
/** AI Gateway "id" is the gateway name you create in Cloudflare. */
6969
CLOUDFLARE_AI_GATEWAY_ID: nonEmptyString,
7070
/**
71-
* Optional embedding-specific AI Gateway id.
71+
* Optional indexing/batch embedding AI Gateway id.
72+
* Runtime user search queries continue to use `CLOUDFLARE_AI_GATEWAY_ID`.
7273
* Falls back to `CLOUDFLARE_AI_GATEWAY_ID` when omitted.
7374
*/
7475
CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID: z.string().trim().optional(),
@@ -183,8 +184,9 @@ export type Env = Omit<
183184
*/
184185
CLOUDFLARE_AI_CALL_KENT_TRANSCRIPT_FORMAT_MODEL: string
185186
/**
186-
* Embeddings can be routed through a separate gateway (for example, with
187-
* guardrails disabled) without affecting other AI routes.
187+
* Indexing/batch embedding jobs can be routed through a separate gateway
188+
* (for example, with guardrails disabled). Runtime user search queries
189+
* should use `CLOUDFLARE_AI_GATEWAY_ID`.
188190
*/
189191
CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID: string
190192
/** Derived from CLOUDFLARE_ACCOUNT_ID when not explicitly set. */

app/utils/semantic-search.server.ts

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ function getRequiredSemanticSearchEnv() {
178178
return {
179179
accountId: env.CLOUDFLARE_ACCOUNT_ID,
180180
apiToken: env.CLOUDFLARE_API_TOKEN,
181-
embeddingGatewayId: env.CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID,
181+
gatewayId: env.CLOUDFLARE_AI_GATEWAY_ID,
182182
gatewayAuthToken: env.CLOUDFLARE_AI_GATEWAY_AUTH_TOKEN,
183183
indexName: env.CLOUDFLARE_VECTORIZE_INDEX,
184184
embeddingModel: env.CLOUDFLARE_AI_EMBEDDING_MODEL,
@@ -221,21 +221,21 @@ async function cloudflareFetch(
221221
async function getEmbedding({
222222
accountId,
223223
apiToken,
224-
embeddingGatewayId,
224+
gatewayId,
225225
gatewayAuthToken,
226226
model,
227227
text,
228228
}: {
229229
accountId: string
230230
apiToken: string
231-
embeddingGatewayId: string
231+
gatewayId: string
232232
gatewayAuthToken: string
233233
model: string
234234
text: string
235235
}) {
236236
const url = getWorkersAiRunUrl({
237237
accountId,
238-
gatewayId: embeddingGatewayId,
238+
gatewayId,
239239
model,
240240
})
241241
const res = await fetch(url, {
@@ -496,7 +496,7 @@ export async function semanticSearchKCD({
496496
const {
497497
accountId,
498498
apiToken,
499-
embeddingGatewayId,
499+
gatewayId,
500500
gatewayAuthToken,
501501
indexName,
502502
embeddingModel,
@@ -533,7 +533,7 @@ export async function semanticSearchKCD({
533533
const vector = await getEmbedding({
534534
accountId,
535535
apiToken,
536-
embeddingGatewayId,
536+
gatewayId,
537537
gatewayAuthToken,
538538
model: embeddingModel,
539539
text: cleanedQuery,
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import { expect, test } from 'vitest'
2+
import { withEnv } from '#tests/with-env.ts'
3+
import { getCloudflareConfig } from '../cloudflare.ts'
4+
5+
test('getCloudflareConfig prefers embedding gateway for indexing when configured', async () => {
6+
await withEnv(
7+
{
8+
CLOUDFLARE_ACCOUNT_ID: 'cf-account',
9+
CLOUDFLARE_API_TOKEN: 'cf-token',
10+
CLOUDFLARE_AI_GATEWAY_ID: 'runtime-gateway',
11+
CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID: 'indexing-gateway',
12+
CLOUDFLARE_AI_GATEWAY_AUTH_TOKEN: 'gateway-auth-token',
13+
CLOUDFLARE_VECTORIZE_INDEX: 'vector-index',
14+
},
15+
() => {
16+
const config = getCloudflareConfig()
17+
expect(config.gatewayId).toBe('indexing-gateway')
18+
},
19+
)
20+
})
21+
22+
test('getCloudflareConfig falls back to regular gateway when embedding override is unset', async () => {
23+
await withEnv(
24+
{
25+
CLOUDFLARE_ACCOUNT_ID: 'cf-account',
26+
CLOUDFLARE_API_TOKEN: 'cf-token',
27+
CLOUDFLARE_AI_GATEWAY_ID: 'runtime-gateway',
28+
CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID: undefined,
29+
CLOUDFLARE_AI_GATEWAY_AUTH_TOKEN: 'gateway-auth-token',
30+
CLOUDFLARE_VECTORIZE_INDEX: 'vector-index',
31+
},
32+
() => {
33+
const config = getCloudflareConfig()
34+
expect(config.gatewayId).toBe('runtime-gateway')
35+
},
36+
)
37+
})

other/semantic-search/readme.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ and shared utilities.
2424
- `CLOUDFLARE_API_TOKEN`
2525
- `CLOUDFLARE_VECTORIZE_INDEX`
2626
- `CLOUDFLARE_AI_EMBEDDING_MODEL` (optional; defaults in code)
27-
- `CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID` (optional; defaults to `CLOUDFLARE_AI_GATEWAY_ID`)
27+
- `CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID` (optional; indexers only; defaults to `CLOUDFLARE_AI_GATEWAY_ID`)
2828

2929
- `R2_BUCKET`
3030

tests/with-env.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
export async function withEnv(
2+
overrides: Record<string, string | undefined>,
3+
callback: () => Promise<void> | void,
4+
) {
5+
const env = process.env as Record<string, string | undefined>
6+
const previous = new Map<string, string | undefined>()
7+
8+
for (const [key, value] of Object.entries(overrides)) {
9+
previous.set(key, env[key])
10+
if (value === undefined) {
11+
delete env[key]
12+
} else {
13+
env[key] = value
14+
}
15+
}
16+
17+
try {
18+
await callback()
19+
} finally {
20+
for (const [key, value] of previous.entries()) {
21+
if (value === undefined) {
22+
delete env[key]
23+
} else {
24+
env[key] = value
25+
}
26+
}
27+
}
28+
}

0 commit comments

Comments
 (0)