Skip to content

Commit b9a8428

Browse files
committed
feat(api): add gemini ai models
1 parent 05f7d8c commit b9a8428

22 files changed

Lines changed: 3246 additions & 10 deletions

apps/api/.dev.vars.example

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,5 @@ SES_DEFAULT_FROM=CHANGE_ME
3434
OPENAI_API_KEY=CHANGE_ME
3535

3636
ANTHROPIC_API_KEY=CHANGE_ME
37+
38+
GEMINI_API_KEY=CHANGE_ME

apps/api/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
"@dafthunk/types": "workspace:*",
5252
"@dafthunk/utils": "workspace:*",
5353
"@gltf-transform/core": "^4.2.1",
54+
"@google/genai": "^1.16.0",
5455
"@hono-rate-limiter/cloudflare": "^0.2.2",
5556
"@hono/oauth-providers": "^0.7.1",
5657
"@hono/zod-validator": "^0.5.0",

apps/api/src/context.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ export interface Bindings {
3939
SES_DEFAULT_FROM?: string;
4040
OPENAI_API_KEY?: string;
4141
ANTHROPIC_API_KEY?: string;
42+
GEMINI_API_KEY?: string;
4243
}
4344

4445
export interface Variables {

apps/api/src/nodes/cloudflare-node-registry.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@ import { ReceiveEmailNode } from "./email/receive-email-node";
2828
import { SendEmailSendgridNode } from "./email/send-emai-sendgrid-node";
2929
import { SendEmailResendNode } from "./email/send-email-resend-node";
3030
import { SendEmailSesNode } from "./email/send-email-ses-node";
31+
import { Gemini25FlashAudioUnderstandingNode } from "./gemini/gemini-2-5-flash-audio-understanding-node";
32+
import { Gemini25FlashImagePreviewNode } from "./gemini/gemini-2-5-flash-image-preview-node";
33+
import { Gemini25FlashImageUnderstandingNode } from "./gemini/gemini-2-5-flash-image-understanding-node";
34+
import { Gemini25FlashNode } from "./gemini/gemini-2-5-flash-node";
35+
import { Gemini25FlashTtsNode } from "./gemini/gemini-2-5-flash-tts-node";
36+
import { Gemini25ProNode } from "./gemini/gemini-2-5-pro-node";
37+
import { ImagenNode } from "./gemini/imagen-node";
3138
import { AlongNode } from "./geo/along-node";
3239
import { AngleNode } from "./geo/angle-node";
3340
import { AreaNode } from "./geo/area-node";
@@ -283,6 +290,7 @@ export class CloudflareNodeRegistry extends BaseNodeRegistry {
283290
);
284291
const hasOpenAI = !!this.env.OPENAI_API_KEY;
285292
const hasAnthropic = !!this.env.ANTHROPIC_API_KEY;
293+
const hasGemini = !!this.env.GEMINI_API_KEY;
286294

287295
// Register all core nodes
288296
this.registerImplementation(FormDataStringNode);
@@ -588,5 +596,16 @@ export class CloudflareNodeRegistry extends BaseNodeRegistry {
588596
this.registerImplementation(Claude35HaikuNode);
589597
this.registerImplementation(Claude3OpusNode);
590598
}
599+
600+
// Google Gemini nodes
601+
if (hasGemini) {
602+
this.registerImplementation(Gemini25FlashNode);
603+
this.registerImplementation(Gemini25ProNode);
604+
this.registerImplementation(Gemini25FlashImagePreviewNode);
605+
this.registerImplementation(Gemini25FlashTtsNode);
606+
this.registerImplementation(Gemini25FlashAudioUnderstandingNode);
607+
this.registerImplementation(Gemini25FlashImageUnderstandingNode);
608+
this.registerImplementation(ImagenNode);
609+
}
591610
}
592611
}
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
import { Node } from "@dafthunk/types";
2+
import { describe, expect, it, vi } from "vitest";
3+
4+
import { NodeContext } from "../types";
5+
import { Gemini25FlashAudioUnderstandingNode } from "./gemini-2-5-flash-audio-understanding-node";
6+
7+
describe("Gemini25FlashAudioUnderstandingNode", () => {
8+
vi.mock("@google/genai", () => ({
9+
GoogleGenAI: class MockGoogleGenAI {
10+
models = {
11+
generateContent: vi.fn().mockResolvedValue({
12+
candidates: [
13+
{
14+
content: {
15+
parts: [
16+
{
17+
text: "This is a transcript of the audio content. The speaker discusses various topics including technology and innovation.",
18+
},
19+
],
20+
},
21+
finishReason: "STOP",
22+
},
23+
],
24+
usageMetadata: {
25+
promptTokenCount: 150,
26+
candidatesTokenCount: 25,
27+
totalTokenCount: 175,
28+
},
29+
}),
30+
};
31+
constructor() {}
32+
},
33+
}));
34+
35+
const nodeId = "gemini-2-5-flash-audio-understanding";
36+
const node = new Gemini25FlashAudioUnderstandingNode({
37+
nodeId,
38+
} as unknown as Node);
39+
40+
const createContext = (inputs: Record<string, any>): NodeContext =>
41+
({
42+
nodeId: "test",
43+
inputs,
44+
workflowId: "test",
45+
organizationId: "test-org",
46+
env: {
47+
DB: {} as any,
48+
AI: {} as any,
49+
AI_OPTIONS: {},
50+
RESSOURCES: {} as any,
51+
DATASETS: {} as any,
52+
DATASETS_AUTORAG: "",
53+
EMAIL_DOMAIN: "",
54+
CLOUDFLARE_ACCOUNT_ID: "",
55+
CLOUDFLARE_API_TOKEN: "",
56+
CLOUDFLARE_AI_GATEWAY_ID: "",
57+
TWILIO_ACCOUNT_SID: "",
58+
TWILIO_AUTH_TOKEN: "",
59+
TWILIO_PHONE_NUMBER: "",
60+
SENDGRID_API_KEY: "",
61+
SENDGRID_DEFAULT_FROM: "",
62+
RESEND_API_KEY: "",
63+
RESEND_DEFAULT_FROM: "",
64+
AWS_ACCESS_KEY_ID: "",
65+
AWS_SECRET_ACCESS_KEY: "",
66+
AWS_REGION: "",
67+
SES_DEFAULT_FROM: "",
68+
OPENAI_API_KEY: "",
69+
ANTHROPIC_API_KEY: "",
70+
GEMINI_API_KEY: "test",
71+
},
72+
}) as unknown as NodeContext;
73+
74+
const createMockAudio = (mimeType = "audio/wav") => ({
75+
data: new Uint8Array([1, 2, 3, 4, 5]), // Mock audio data
76+
mimeType,
77+
});
78+
79+
describe("execute", () => {
80+
it("should transcribe audio with default prompt", async () => {
81+
const result = await node.execute(
82+
createContext({
83+
audio: createMockAudio(),
84+
prompt: "Transcribe this audio",
85+
})
86+
);
87+
88+
expect(result.status).toBe("completed");
89+
expect(result.outputs?.text).toBeDefined();
90+
expect(result.outputs?.text).toContain("This is a transcript");
91+
expect(result.outputs?.finish_reason).toBe("STOP");
92+
});
93+
94+
it("should analyze audio with custom prompt", async () => {
95+
const result = await node.execute(
96+
createContext({
97+
audio: createMockAudio("audio/mp3"),
98+
prompt: "Describe what you hear in this audio clip",
99+
})
100+
);
101+
102+
expect(result.status).toBe("completed");
103+
expect(result.outputs?.text).toBeDefined();
104+
expect(result.outputs?.text).toContain("speaker discusses");
105+
});
106+
107+
it("should handle timestamp-based analysis", async () => {
108+
const result = await node.execute(
109+
createContext({
110+
audio: createMockAudio(),
111+
prompt: "Provide a transcript from 02:30 to 03:29",
112+
})
113+
);
114+
115+
expect(result.status).toBe("completed");
116+
expect(result.outputs?.text).toBeDefined();
117+
});
118+
119+
it("should handle thinking budget configuration", async () => {
120+
const result = await node.execute(
121+
createContext({
122+
audio: createMockAudio(),
123+
prompt: "Analyze this audio content",
124+
thinking_budget: 500,
125+
})
126+
);
127+
128+
expect(result.status).toBe("completed");
129+
expect(result.outputs?.text).toBeDefined();
130+
});
131+
132+
it("should return error when audio is missing", async () => {
133+
const result = await node.execute(
134+
createContext({
135+
prompt: "Transcribe this audio",
136+
})
137+
);
138+
139+
expect(result.status).toBe("error");
140+
expect(result.error).toContain("Audio input is required");
141+
});
142+
143+
it("should return error when prompt is missing", async () => {
144+
const result = await node.execute(
145+
createContext({
146+
audio: createMockAudio(),
147+
})
148+
);
149+
150+
expect(result.status).toBe("error");
151+
expect(result.error).toContain("Prompt is required");
152+
});
153+
154+
it("should return error when API key is missing", async () => {
155+
const context = createContext({
156+
audio: createMockAudio(),
157+
prompt: "Transcribe this audio",
158+
});
159+
context.env.GEMINI_API_KEY = "";
160+
161+
const result = await node.execute(context);
162+
163+
expect(result.status).toBe("error");
164+
expect(result.error).toContain("GEMINI_API_KEY is not configured");
165+
});
166+
167+
it("should handle different audio formats", async () => {
168+
const formats = [
169+
"audio/wav",
170+
"audio/mp3",
171+
"audio/aiff",
172+
"audio/aac",
173+
"audio/ogg",
174+
"audio/flac",
175+
];
176+
177+
for (const format of formats) {
178+
const result = await node.execute(
179+
createContext({
180+
audio: createMockAudio(format),
181+
prompt: "Transcribe this audio",
182+
})
183+
);
184+
185+
expect(result.status).toBe("completed");
186+
expect(result.outputs?.text).toBeDefined();
187+
}
188+
});
189+
190+
it("should handle large audio files without stack overflow", async () => {
191+
// Create a larger mock audio file to test the base64 conversion
192+
const largeAudioData = new Uint8Array(100000); // 100KB of data
193+
for (let i = 0; i < largeAudioData.length; i++) {
194+
largeAudioData[i] = Math.floor(Math.random() * 256);
195+
}
196+
197+
const result = await node.execute(
198+
createContext({
199+
audio: {
200+
data: largeAudioData,
201+
mimeType: "audio/wav",
202+
},
203+
prompt: "Transcribe this audio",
204+
})
205+
);
206+
207+
expect(result.status).toBe("completed");
208+
expect(result.outputs?.text).toBeDefined();
209+
});
210+
});
211+
});

0 commit comments

Comments
 (0)