Skip to content

Commit bd3270b

Browse files
committed
Refactor Cloudflare AI gateway handling
Updated the Cloudflare AI utility functions and configuration to support a dedicated embedding gateway ID. The changes include a new function to determine the use of the embedding model and adjustments to the workflow files to include the embedding gateway ID as a secret. Tests were also updated to ensure proper error handling when the embedding gateway ID is not set.
1 parent eed6527 commit bd3270b

7 files changed

Lines changed: 64 additions & 10 deletions

File tree

.github/workflows/index-semantic-content.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ jobs:
8585
CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
8686
CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }}
8787
CLOUDFLARE_AI_GATEWAY_ID: ${{ secrets.CLOUDFLARE_AI_GATEWAY_ID }}
88+
CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID:
89+
${{ secrets.CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID }}
8890
CLOUDFLARE_AI_GATEWAY_AUTH_TOKEN:
8991
${{ secrets.CLOUDFLARE_AI_GATEWAY_AUTH_TOKEN }}
9092
CLOUDFLARE_VECTORIZE_INDEX: ${{ secrets.CLOUDFLARE_VECTORIZE_INDEX }}

.github/workflows/index-semantic-podcasts.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ jobs:
3131
CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
3232
CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }}
3333
CLOUDFLARE_AI_GATEWAY_ID: ${{ secrets.CLOUDFLARE_AI_GATEWAY_ID }}
34+
CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID:
35+
${{ secrets.CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID }}
3436
CLOUDFLARE_AI_GATEWAY_AUTH_TOKEN:
3537
${{ secrets.CLOUDFLARE_AI_GATEWAY_AUTH_TOKEN }}
3638
CLOUDFLARE_VECTORIZE_INDEX: ${{ secrets.CLOUDFLARE_VECTORIZE_INDEX }}

.github/workflows/index-semantic-youtube.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ jobs:
4343
CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
4444
CLOUDFLARE_API_TOKEN: ${{ secrets.CLOUDFLARE_API_TOKEN }}
4545
CLOUDFLARE_AI_GATEWAY_ID: ${{ secrets.CLOUDFLARE_AI_GATEWAY_ID }}
46+
CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID:
47+
${{ secrets.CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID }}
4648
CLOUDFLARE_AI_GATEWAY_AUTH_TOKEN:
4749
${{ secrets.CLOUDFLARE_AI_GATEWAY_AUTH_TOKEN }}
4850
CLOUDFLARE_VECTORIZE_INDEX: ${{ secrets.CLOUDFLARE_VECTORIZE_INDEX }}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import { expect, test } from 'vitest'
2+
import { setEnv } from '#tests/env-disposable.ts'
3+
import { getWorkersAiRunUrl } from '../cloudflare-ai-utils.server.ts'
4+
5+
test('getWorkersAiRunUrl routes embeddinggemma requests through CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID', () => {
6+
using ignoredEnv = setEnv({
7+
CLOUDFLARE_ACCOUNT_ID: 'cf-account',
8+
CLOUDFLARE_AI_GATEWAY_ID: 'runtime-gateway',
9+
CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID: 'embedding-gateway',
10+
})
11+
12+
const url = getWorkersAiRunUrl('@cf/google/embeddinggemma-300m')
13+
expect(url).toContain('/embedding-gateway/')
14+
expect(url).not.toContain('/runtime-gateway/')
15+
})
16+
17+
test('getWorkersAiRunUrl keeps non-embedding models on CLOUDFLARE_AI_GATEWAY_ID', () => {
18+
using ignoredEnv = setEnv({
19+
CLOUDFLARE_ACCOUNT_ID: 'cf-account',
20+
CLOUDFLARE_AI_GATEWAY_ID: 'runtime-gateway',
21+
CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID: 'embedding-gateway',
22+
})
23+
24+
const url = getWorkersAiRunUrl('@cf/openai/whisper-large-v3-turbo')
25+
expect(url).toContain('/runtime-gateway/')
26+
expect(url).not.toContain('/embedding-gateway/')
27+
})
28+
29+
test('getWorkersAiRunUrl prefers explicit gatewayId overrides', () => {
30+
using ignoredEnv = setEnv({
31+
CLOUDFLARE_ACCOUNT_ID: 'cf-account',
32+
CLOUDFLARE_AI_GATEWAY_ID: 'runtime-gateway',
33+
CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID: 'embedding-gateway',
34+
})
35+
36+
const url = getWorkersAiRunUrl({
37+
model: '@cf/google/embeddinggemma-300m',
38+
gatewayId: 'explicit-gateway',
39+
})
40+
expect(url).toContain('/explicit-gateway/')
41+
expect(url).not.toContain('/embedding-gateway/')
42+
})

app/utils/cloudflare-ai-utils.server.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,24 @@ type WorkersAiRunUrlOptions = {
66
gatewayId?: string
77
}
88

9+
const EMBEDDING_GEMMA_MODEL_SEGMENT = 'embeddinggemma-300m'
10+
11+
function usesEmbeddingGemmaModel(model: string) {
12+
return model.includes(EMBEDDING_GEMMA_MODEL_SEGMENT)
13+
}
14+
915
export function getWorkersAiRunUrl(options: string | WorkersAiRunUrlOptions) {
1016
const { model, accountId, gatewayId } =
1117
typeof options === 'string' ? { model: options } : options
1218
// Cloudflare's REST route expects the model as path segments (with `/`), so do
1319
// not URL-encode the model string (encoding can yield "No route for that URI").
1420
const env = getEnv()
1521
const resolvedAccountId = accountId ?? env.CLOUDFLARE_ACCOUNT_ID
16-
const resolvedGatewayId = gatewayId ?? env.CLOUDFLARE_AI_GATEWAY_ID
22+
const resolvedGatewayId =
23+
gatewayId ??
24+
(usesEmbeddingGemmaModel(model)
25+
? env.CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID
26+
: env.CLOUDFLARE_AI_GATEWAY_ID)
1727
return `https://gateway.ai.cloudflare.com/v1/${resolvedAccountId}/${resolvedGatewayId}/workers-ai/${model}`
1828
}
1929

other/semantic-search/__tests__/cloudflare-config.test.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ test('getCloudflareConfig prefers embedding gateway for indexing when configured
1616
expect(config.gatewayId).toBe('indexing-gateway')
1717
})
1818

19-
test('getCloudflareConfig falls back to regular gateway when embedding override is unset', () => {
19+
test('getCloudflareConfig throws when embedding gateway is unset', () => {
2020
using ignoredEnv = setEnv({
2121
CLOUDFLARE_ACCOUNT_ID: 'cf-account',
2222
CLOUDFLARE_API_TOKEN: 'cf-token',
@@ -26,6 +26,7 @@ test('getCloudflareConfig falls back to regular gateway when embedding override
2626
CLOUDFLARE_VECTORIZE_INDEX: 'vector-index',
2727
})
2828

29-
const config = getCloudflareConfig()
30-
expect(config.gatewayId).toBe('runtime-gateway')
29+
expect(() => getCloudflareConfig()).toThrow(
30+
'Missing required env var: CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID',
31+
)
3132
})

other/semantic-search/cloudflare.ts

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,10 @@ function getRequiredEnv(name: string) {
1414
}
1515

1616
export function getCloudflareConfig() {
17-
const defaultGatewayId = getRequiredEnv('CLOUDFLARE_AI_GATEWAY_ID')
18-
const embeddingGatewayId =
19-
process.env.CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID?.trim() || defaultGatewayId
20-
2117
return {
2218
accountId: getRequiredEnv('CLOUDFLARE_ACCOUNT_ID'),
2319
apiToken: getRequiredEnv('CLOUDFLARE_API_TOKEN'),
24-
// Embedding jobs can use a dedicated gateway without guardrails.
25-
gatewayId: embeddingGatewayId,
20+
gatewayId: getRequiredEnv('CLOUDFLARE_AI_EMBEDDING_GATEWAY_ID'),
2621
gatewayAuthToken: getRequiredEnv('CLOUDFLARE_AI_GATEWAY_AUTH_TOKEN'),
2722
vectorizeIndex: getRequiredEnv('CLOUDFLARE_VECTORIZE_INDEX'),
2823
embeddingModel:

0 commit comments

Comments
 (0)