dafthunk-com
diff --git a/‎apps/api/.dev.vars.example‎
Lines changed: 2 additions & 0 deletions b/‎apps/api/.dev.vars.example‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎apps/api/package.json‎
Lines changed: 1 addition & 0 deletions b/‎apps/api/package.json‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎apps/api/src/context.ts‎
Lines changed: 1 addition & 0 deletions b/‎apps/api/src/context.ts‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎apps/api/src/nodes/cloudflare-node-registry.ts‎
Lines changed: 19 additions & 0 deletions b/‎apps/api/src/nodes/cloudflare-node-registry.ts‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎apps/api/src/nodes/gemini/gemini-2-5-flash-audio-understanding-node.test.ts‎
Lines changed: 211 additions & 0 deletions b/‎apps/api/src/nodes/gemini/gemini-2-5-flash-audio-understanding-node.test.ts‎
Lines changed: 211 additions & 0 deletions
@@ -34,3 +34,5 @@ SES_DEFAULT_FROM=CHANGE_ME
 OPENAI_API_KEY=CHANGE_ME
 
 ANTHROPIC_API_KEY=CHANGE_ME
+
+GEMINI_API_KEY=CHANGE_ME
@@ -51,6 +51,7 @@
     "@dafthunk/types": "workspace:*",
     "@dafthunk/utils": "workspace:*",
     "@gltf-transform/core": "^4.2.1",
+    "@google/genai": "^1.16.0",
     "@hono-rate-limiter/cloudflare": "^0.2.2",
     "@hono/oauth-providers": "^0.7.1",
     "@hono/zod-validator": "^0.5.0",
 
@@ -39,6 +39,7 @@ export interface Bindings {
   SES_DEFAULT_FROM?: string;
   OPENAI_API_KEY?: string;
   ANTHROPIC_API_KEY?: string;
+  GEMINI_API_KEY?: string;
 }
 
 export interface Variables {
 
@@ -28,6 +28,13 @@ import { ReceiveEmailNode } from "./email/receive-email-node";
 import { SendEmailSendgridNode } from "./email/send-emai-sendgrid-node";
 import { SendEmailResendNode } from "./email/send-email-resend-node";
 import { SendEmailSesNode } from "./email/send-email-ses-node";
+import { Gemini25FlashAudioUnderstandingNode } from "./gemini/gemini-2-5-flash-audio-understanding-node";
+import { Gemini25FlashImagePreviewNode } from "./gemini/gemini-2-5-flash-image-preview-node";
+import { Gemini25FlashImageUnderstandingNode } from "./gemini/gemini-2-5-flash-image-understanding-node";
+import { Gemini25FlashNode } from "./gemini/gemini-2-5-flash-node";
+import { Gemini25FlashTtsNode } from "./gemini/gemini-2-5-flash-tts-node";
+import { Gemini25ProNode } from "./gemini/gemini-2-5-pro-node";
+import { ImagenNode } from "./gemini/imagen-node";
 import { AlongNode } from "./geo/along-node";
 import { AngleNode } from "./geo/angle-node";
 import { AreaNode } from "./geo/area-node";
@@ -283,6 +290,7 @@ export class CloudflareNodeRegistry extends BaseNodeRegistry {
     );
     const hasOpenAI = !!this.env.OPENAI_API_KEY;
     const hasAnthropic = !!this.env.ANTHROPIC_API_KEY;
+    const hasGemini = !!this.env.GEMINI_API_KEY;
 
     // Register all core nodes
     this.registerImplementation(FormDataStringNode);
@@ -588,5 +596,16 @@ export class CloudflareNodeRegistry extends BaseNodeRegistry {
       this.registerImplementation(Claude35HaikuNode);
       this.registerImplementation(Claude3OpusNode);
     }
+
+    // Google Gemini nodes
+    if (hasGemini) {
+      this.registerImplementation(Gemini25FlashNode);
+      this.registerImplementation(Gemini25ProNode);
+      this.registerImplementation(Gemini25FlashImagePreviewNode);
+      this.registerImplementation(Gemini25FlashTtsNode);
+      this.registerImplementation(Gemini25FlashAudioUnderstandingNode);
+      this.registerImplementation(Gemini25FlashImageUnderstandingNode);
+      this.registerImplementation(ImagenNode);
+    }
   }
 }
@@ -0,0 +1,211 @@
+import { Node } from "@dafthunk/types";
+import { describe, expect, it, vi } from "vitest";
+
+import { NodeContext } from "../types";
+import { Gemini25FlashAudioUnderstandingNode } from "./gemini-2-5-flash-audio-understanding-node";
+
+describe("Gemini25FlashAudioUnderstandingNode", () => {
+  vi.mock("@google/genai", () => ({
+    GoogleGenAI: class MockGoogleGenAI {
+      models = {
+        generateContent: vi.fn().mockResolvedValue({
+          candidates: [
+            {
+              content: {
+                parts: [
+                  {
+                    text: "This is a transcript of the audio content. The speaker discusses various topics including technology and innovation.",
+                  },
+                ],
+              },
+              finishReason: "STOP",
+            },
+          ],
+          usageMetadata: {
+            promptTokenCount: 150,
+            candidatesTokenCount: 25,
+            totalTokenCount: 175,
+          },
+        }),
+      };
+      constructor() {}
+    },
+  }));
+
+  const nodeId = "gemini-2-5-flash-audio-understanding";
+  const node = new Gemini25FlashAudioUnderstandingNode({
+    nodeId,
+  } as unknown as Node);
+
+  const createContext = (inputs: Record<string, any>): NodeContext =>
+    ({
+      nodeId: "test",
+      inputs,
+      workflowId: "test",
+      organizationId: "test-org",
+      env: {
+        DB: {} as any,
+        AI: {} as any,
+        AI_OPTIONS: {},
+        RESSOURCES: {} as any,
+        DATASETS: {} as any,
+        DATASETS_AUTORAG: "",
+        EMAIL_DOMAIN: "",
+        CLOUDFLARE_ACCOUNT_ID: "",
+        CLOUDFLARE_API_TOKEN: "",
+        CLOUDFLARE_AI_GATEWAY_ID: "",
+        TWILIO_ACCOUNT_SID: "",
+        TWILIO_AUTH_TOKEN: "",
+        TWILIO_PHONE_NUMBER: "",
+        SENDGRID_API_KEY: "",
+        SENDGRID_DEFAULT_FROM: "",
+        RESEND_API_KEY: "",
+        RESEND_DEFAULT_FROM: "",
+        AWS_ACCESS_KEY_ID: "",
+        AWS_SECRET_ACCESS_KEY: "",
+        AWS_REGION: "",
+        SES_DEFAULT_FROM: "",
+        OPENAI_API_KEY: "",
+        ANTHROPIC_API_KEY: "",
+        GEMINI_API_KEY: "test",
+      },
+    }) as unknown as NodeContext;
+
+  const createMockAudio = (mimeType = "audio/wav") => ({
+    data: new Uint8Array([1, 2, 3, 4, 5]), // Mock audio data
+    mimeType,
+  });
+
+  describe("execute", () => {
+    it("should transcribe audio with default prompt", async () => {
+      const result = await node.execute(
+        createContext({
+          audio: createMockAudio(),
+          prompt: "Transcribe this audio",
+        })
+      );
+
+      expect(result.status).toBe("completed");
+      expect(result.outputs?.text).toBeDefined();
+      expect(result.outputs?.text).toContain("This is a transcript");
+      expect(result.outputs?.finish_reason).toBe("STOP");
+    });
+
+    it("should analyze audio with custom prompt", async () => {
+      const result = await node.execute(
+        createContext({
+          audio: createMockAudio("audio/mp3"),
+          prompt: "Describe what you hear in this audio clip",
+        })
+      );
+
+      expect(result.status).toBe("completed");
+      expect(result.outputs?.text).toBeDefined();
+      expect(result.outputs?.text).toContain("speaker discusses");
+    });
+
+    it("should handle timestamp-based analysis", async () => {
+      const result = await node.execute(
+        createContext({
+          audio: createMockAudio(),
+          prompt: "Provide a transcript from 02:30 to 03:29",
+        })
+      );
+
+      expect(result.status).toBe("completed");
+      expect(result.outputs?.text).toBeDefined();
+    });
+
+    it("should handle thinking budget configuration", async () => {
+      const result = await node.execute(
+        createContext({
+          audio: createMockAudio(),
+          prompt: "Analyze this audio content",
+          thinking_budget: 500,
+        })
+      );
+
+      expect(result.status).toBe("completed");
+      expect(result.outputs?.text).toBeDefined();
+    });
+
+    it("should return error when audio is missing", async () => {
+      const result = await node.execute(
+        createContext({
+          prompt: "Transcribe this audio",
+        })
+      );
+
+      expect(result.status).toBe("error");
+      expect(result.error).toContain("Audio input is required");
+    });
+
+    it("should return error when prompt is missing", async () => {
+      const result = await node.execute(
+        createContext({
+          audio: createMockAudio(),
+        })
+      );
+
+      expect(result.status).toBe("error");
+      expect(result.error).toContain("Prompt is required");
+    });
+
+    it("should return error when API key is missing", async () => {
+      const context = createContext({
+        audio: createMockAudio(),
+        prompt: "Transcribe this audio",
+      });
+      context.env.GEMINI_API_KEY = "";
+
+      const result = await node.execute(context);
+
+      expect(result.status).toBe("error");
+      expect(result.error).toContain("GEMINI_API_KEY is not configured");
+    });
+
+    it("should handle different audio formats", async () => {
+      const formats = [
+        "audio/wav",
+        "audio/mp3",
+        "audio/aiff",
+        "audio/aac",
+        "audio/ogg",
+        "audio/flac",
+      ];
+
+      for (const format of formats) {
+        const result = await node.execute(
+          createContext({
+            audio: createMockAudio(format),
+            prompt: "Transcribe this audio",
+          })
+        );
+
+        expect(result.status).toBe("completed");
+        expect(result.outputs?.text).toBeDefined();
+      }
+    });
+
+    it("should handle large audio files without stack overflow", async () => {
+      // Create a larger mock audio file to test the base64 conversion
+      const largeAudioData = new Uint8Array(100000); // 100KB of data
+      for (let i = 0; i < largeAudioData.length; i++) {
+        largeAudioData[i] = Math.floor(Math.random() * 256);
+      }
+
+      const result = await node.execute(
+        createContext({
+          audio: {
+            data: largeAudioData,
+            mimeType: "audio/wav",
+          },
+          prompt: "Transcribe this audio",
+        })
+      );
+
+      expect(result.status).toBe("completed");
+      expect(result.outputs?.text).toBeDefined();
+    });
+  });
+});
Original file line number	Diff line number	Diff line change
`@@ -39,6 +39,7 @@ export interface Bindings {`
`39`	`39`	`SES_DEFAULT_FROM?: string;`
`40`	`40`	`OPENAI_API_KEY?: string;`
`41`	`41`	`ANTHROPIC_API_KEY?: string;`
	`42`	`+ GEMINI_API_KEY?: string;`
`42`	`43`	`}`
`43`	`44`
`44`	`45`	`export interface Variables {`