Skip to content

Commit c145105

Browse files
bchapuisclaude
andcommitted
Add Tavily search and extract workflow nodes
Integrate Tavily API for web search and content extraction with two new nodes gated on TAVILY_API_KEY env variable. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent cca4f53 commit c145105

8 files changed

Lines changed: 679 additions & 0 deletions

File tree

apps/api/.dev.vars.example

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ GOOGLE_API_KEY=CHANGE_ME
2727
# Replicate
2828
REPLICATE_API_TOKEN=CHANGE_ME
2929

30+
# Tavily
31+
TAVILY_API_KEY=CHANGE_ME
32+
3033
# Integration OAuth (workflow integrations)
3134
# Use separate OAuth apps from authentication for security isolation
3235
# Each integration requires different scopes and redirect URIs

apps/api/src/context.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ export interface Bindings {
6262
HUGGINGFACE_API_KEY?: string;
6363
REPLICATE_API_TOKEN?: string;
6464
GOOGLE_API_KEY?: string;
65+
TAVILY_API_KEY?: string;
6566
R2_ACCESS_KEY_ID?: string;
6667
R2_SECRET_ACCESS_KEY?: string;
6768
R2_BUCKET_NAME?: string;

apps/api/src/runtime/cloudflare-node-registry.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,8 @@ import { SubmitCommentRedditNode } from "@dafthunk/runtime/nodes/reddit/submit-c
407407
import { VoteRedditNode } from "@dafthunk/runtime/nodes/reddit/vote-reddit-node";
408408
import { ReplicateModelNode } from "@dafthunk/runtime/nodes/replicate/replicate-model-node";
409409
import { ReceiveScheduledTriggerNode } from "@dafthunk/runtime/nodes/scheduled/receive-scheduled-trigger-node";
410+
import { ExtractTavilyNode } from "@dafthunk/runtime/nodes/tavily/extract-tavily-node";
411+
import { SearchTavilyNode } from "@dafthunk/runtime/nodes/tavily/search-tavily-node";
410412
import { BotForwardMessageTelegramNode } from "@dafthunk/runtime/nodes/telegram/bot-forward-message-telegram-node";
411413
import { BotGetChatTelegramNode } from "@dafthunk/runtime/nodes/telegram/bot-get-chat-telegram-node";
412414
import { BotReceiveTelegramMessageNode } from "@dafthunk/runtime/nodes/telegram/bot-receive-telegram-message-node";
@@ -736,6 +738,12 @@ export class CloudflareNodeRegistry extends BaseNodeRegistry<Bindings> {
736738
this.registerImplementation(SearchWikipediaNode);
737739
this.registerImplementation(SearchMediaWikiNode);
738740

741+
// Tavily nodes
742+
if (this.env.TAVILY_API_KEY) {
743+
this.registerImplementation(SearchTavilyNode);
744+
this.registerImplementation(ExtractTavilyNode);
745+
}
746+
739747
// Google API nodes
740748
if (this.env.GOOGLE_API_KEY) {
741749
this.registerImplementation(AirQualityGoogleNode);

packages/runtime/src/node-types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ export interface NodeEnv {
217217
HUGGINGFACE_API_KEY?: string;
218218
REPLICATE_API_TOKEN?: string;
219219
GOOGLE_API_KEY?: string;
220+
TAVILY_API_KEY?: string;
220221
}
221222

222223
export interface NodeContext {
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
import type { NodeContext } from "@dafthunk/runtime";
2+
import type { Node } from "@dafthunk/types";
3+
import { beforeEach, describe, expect, it, vi } from "vitest";
4+
import { ExtractTavilyNode } from "./extract-tavily-node";
5+
6+
global.fetch = vi.fn();
7+
8+
describe("ExtractTavilyNode", () => {
9+
beforeEach(() => vi.clearAllMocks());
10+
11+
const createContext = (
12+
inputs: Record<string, unknown>,
13+
env: Record<string, string> = {}
14+
): NodeContext =>
15+
({
16+
nodeId: "extract-tavily",
17+
inputs,
18+
organizationId: "test-org",
19+
env,
20+
}) as unknown as NodeContext;
21+
22+
const createNode = () =>
23+
new ExtractTavilyNode({
24+
nodeId: "extract-tavily",
25+
} as unknown as Node);
26+
27+
it("should return error for missing urls", async () => {
28+
const result = await createNode().execute(
29+
createContext({}, { TAVILY_API_KEY: "tvly-test" })
30+
);
31+
expect(result.status).toBe("error");
32+
expect(result.error).toContain("Missing required input: urls");
33+
});
34+
35+
it("should return error for invalid urls type", async () => {
36+
const result = await createNode().execute(
37+
createContext({ urls: 123 }, { TAVILY_API_KEY: "tvly-test" })
38+
);
39+
expect(result.status).toBe("error");
40+
expect(result.error).toContain("Invalid input type for urls");
41+
});
42+
43+
it("should return error for missing API key", async () => {
44+
const result = await createNode().execute(
45+
createContext({ urls: ["https://example.com"] })
46+
);
47+
expect(result.status).toBe("error");
48+
expect(result.error).toContain("TAVILY_API_KEY");
49+
});
50+
51+
it("should extract content from URLs", async () => {
52+
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValue({
53+
ok: true,
54+
json: async () => ({
55+
results: [
56+
{
57+
url: "https://example.com",
58+
raw_content: "# Example\nPage content here.",
59+
},
60+
],
61+
failed_results: [],
62+
response_time: 1.2,
63+
}),
64+
});
65+
66+
const result = await createNode().execute(
67+
createContext(
68+
{ urls: ["https://example.com"] },
69+
{ TAVILY_API_KEY: "tvly-test" }
70+
)
71+
);
72+
73+
expect(result.status).toBe("completed");
74+
expect(result.outputs?.results).toEqual([
75+
{
76+
url: "https://example.com",
77+
content: "# Example\nPage content here.",
78+
},
79+
]);
80+
expect(result.outputs?.count).toBe(1);
81+
expect(result.outputs?.failedResults).toEqual([]);
82+
});
83+
84+
it("should accept a single URL string", async () => {
85+
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValue({
86+
ok: true,
87+
json: async () => ({
88+
results: [{ url: "https://example.com", raw_content: "Content" }],
89+
failed_results: [],
90+
response_time: 0.5,
91+
}),
92+
});
93+
94+
await createNode().execute(
95+
createContext(
96+
{ urls: "https://example.com" },
97+
{ TAVILY_API_KEY: "tvly-test" }
98+
)
99+
);
100+
101+
const fetchCall = (global.fetch as ReturnType<typeof vi.fn>).mock.calls[0];
102+
const body = JSON.parse(fetchCall[1].body);
103+
expect(body.urls).toEqual(["https://example.com"]);
104+
});
105+
106+
it("should report failed extractions", async () => {
107+
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValue({
108+
ok: true,
109+
json: async () => ({
110+
results: [],
111+
failed_results: [
112+
{
113+
url: "https://blocked.com",
114+
error: "Access denied",
115+
},
116+
],
117+
response_time: 0.8,
118+
}),
119+
});
120+
121+
const result = await createNode().execute(
122+
createContext(
123+
{ urls: ["https://blocked.com"] },
124+
{ TAVILY_API_KEY: "tvly-test" }
125+
)
126+
);
127+
128+
expect(result.status).toBe("completed");
129+
expect(result.outputs?.results).toEqual([]);
130+
expect(result.outputs?.failedResults).toEqual([
131+
{ url: "https://blocked.com", error: "Access denied" },
132+
]);
133+
expect(result.outputs?.count).toBe(0);
134+
});
135+
136+
it("should handle API errors", async () => {
137+
(global.fetch as ReturnType<typeof vi.fn>).mockResolvedValue({
138+
ok: false,
139+
status: 401,
140+
});
141+
142+
const result = await createNode().execute(
143+
createContext(
144+
{ urls: ["https://example.com"] },
145+
{ TAVILY_API_KEY: "tvly-bad-key" }
146+
)
147+
);
148+
149+
expect(result.status).toBe("error");
150+
expect(result.error).toContain("401");
151+
});
152+
});
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
import { ExecutableNode, type NodeContext } from "@dafthunk/runtime";
2+
import type { NodeExecution, NodeType } from "@dafthunk/types";
3+
4+
interface TavilyExtractResult {
5+
url: string;
6+
raw_content: string;
7+
}
8+
9+
interface TavilyExtractFailedResult {
10+
url: string;
11+
error: string;
12+
}
13+
14+
interface TavilyExtractResponse {
15+
results: TavilyExtractResult[];
16+
failed_results: TavilyExtractFailedResult[];
17+
response_time: number;
18+
}
19+
20+
export class ExtractTavilyNode extends ExecutableNode {
21+
public static readonly nodeType: NodeType = {
22+
id: "extract-tavily",
23+
name: "Extract (Tavily)",
24+
type: "extract-tavily",
25+
description: "Extract content from web pages using Tavily",
26+
tags: ["Extract", "Web", "Tavily"],
27+
icon: "file-text",
28+
documentation:
29+
"Extract clean, structured content from one or more web pages using the Tavily Extract API. Returns the page content in markdown or text format.",
30+
usage: 10,
31+
subscription: true,
32+
asTool: true,
33+
inputs: [
34+
{
35+
name: "urls",
36+
type: "json",
37+
description: "URL string or array of URLs to extract content from",
38+
required: true,
39+
},
40+
{
41+
name: "extractDepth",
42+
type: "string",
43+
description:
44+
"Extraction depth: basic (1 credit per 5) or advanced (2 credits per 5). Defaults to basic.",
45+
required: false,
46+
hidden: true,
47+
},
48+
{
49+
name: "format",
50+
type: "string",
51+
description: "Output format: markdown or text. Defaults to markdown.",
52+
required: false,
53+
hidden: true,
54+
},
55+
{
56+
name: "includeImages",
57+
type: "boolean",
58+
description: "Include extracted image URLs. Defaults to false.",
59+
required: false,
60+
hidden: true,
61+
},
62+
],
63+
outputs: [
64+
{
65+
name: "results",
66+
type: "json",
67+
description: "Array of extracted content objects with url and content",
68+
},
69+
{
70+
name: "failedResults",
71+
type: "json",
72+
description: "Array of URLs that failed to extract with error messages",
73+
hidden: true,
74+
},
75+
{
76+
name: "count",
77+
type: "number",
78+
description: "Number of successfully extracted pages",
79+
hidden: true,
80+
},
81+
],
82+
};
83+
84+
async execute(context: NodeContext): Promise<NodeExecution> {
85+
try {
86+
const { urls, extractDepth, format, includeImages } = context.inputs;
87+
88+
if (urls === null || urls === undefined) {
89+
return this.createErrorResult("Missing required input: urls");
90+
}
91+
92+
const urlList = typeof urls === "string" ? [urls] : urls;
93+
if (!Array.isArray(urlList) || urlList.length === 0) {
94+
return this.createErrorResult(
95+
`Invalid input type for urls: expected string or non-empty array of strings, got ${typeof urls}`
96+
);
97+
}
98+
99+
const { TAVILY_API_KEY } = context.env;
100+
if (!TAVILY_API_KEY) {
101+
return this.createErrorResult(
102+
"TAVILY_API_KEY environment variable is not configured"
103+
);
104+
}
105+
106+
const body: Record<string, unknown> = { urls: urlList };
107+
108+
if (extractDepth && typeof extractDepth === "string") {
109+
body.extract_depth = extractDepth;
110+
}
111+
if (format && typeof format === "string") {
112+
body.format = format;
113+
}
114+
if (includeImages) {
115+
body.include_images = true;
116+
}
117+
118+
const response = await fetch("https://api.tavily.com/extract", {
119+
method: "POST",
120+
headers: {
121+
Authorization: `Bearer ${TAVILY_API_KEY}`,
122+
"Content-Type": "application/json",
123+
},
124+
body: JSON.stringify(body),
125+
});
126+
127+
if (!response.ok) {
128+
return this.createErrorResult(
129+
`Tavily API request failed with status ${response.status}`
130+
);
131+
}
132+
133+
const data = (await response.json()) as TavilyExtractResponse;
134+
135+
const results = data.results.map((item) => ({
136+
url: item.url,
137+
content: item.raw_content,
138+
}));
139+
140+
const failedResults = data.failed_results.map((item) => ({
141+
url: item.url,
142+
error: item.error,
143+
}));
144+
145+
return this.createSuccessResult({
146+
results,
147+
failedResults,
148+
count: results.length,
149+
});
150+
} catch (err) {
151+
const error = err as Error;
152+
return this.createErrorResult(`Error in ExtractTavily: ${error.message}`);
153+
}
154+
}
155+
}

0 commit comments

Comments
 (0)