Skip to content

Commit d6aa4fe

Browse files
Merge pull request #54 from ai-action/feat/web_fetch
2 parents 6969e17 + 95bd32b commit d6aa4fe

12 files changed

Lines changed: 270 additions & 13 deletions

File tree

src/constants/tool.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ export const LIST_DIR = 'list_dir';
66
export const GREP_SEARCH = 'grep_search';
77
export const VIEW_RANGE = 'view_range';
88
export const WEB_SEARCH = 'web_search';
9+
export const WEB_FETCH = 'web_fetch';

src/utils/tools/definitions.test.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { READ_TOOLS, TOOLS, WRITE_TOOLS } from './definitions';
33
describe('definitions', () => {
44
describe('TOOLS', () => {
55
it('exports tool definitions', () => {
6-
expect(TOOLS).toHaveLength(8);
6+
expect(TOOLS).toHaveLength(9);
77
expect(TOOLS.map((t) => t.function.name)).toContain('read_file');
88
expect(TOOLS.map((t) => t.function.name)).toContain('write_file');
99
expect(TOOLS.map((t) => t.function.name)).toContain('edit_file');
@@ -12,6 +12,7 @@ describe('definitions', () => {
1212
expect(TOOLS.map((t) => t.function.name)).toContain('grep_search');
1313
expect(TOOLS.map((t) => t.function.name)).toContain('view_range');
1414
expect(TOOLS.map((t) => t.function.name)).toContain('web_search');
15+
expect(TOOLS.map((t) => t.function.name)).toContain('web_fetch');
1516
});
1617
});
1718

src/utils/tools/definitions.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,15 @@ export const TOOLS = [
126126
},
127127
['query'],
128128
),
129+
130+
defineTool(
131+
TOOL.WEB_FETCH,
132+
'Fetch the readable content of a webpage at the given URL',
133+
{
134+
url: { type: 'string', description: 'The full URL of the page to fetch' },
135+
},
136+
['url'],
137+
),
129138
];
130139

131140
// tools that can be used during plan mode
@@ -135,6 +144,7 @@ export const READ_TOOLS = new Set<string>([
135144
TOOL.GREP_SEARCH,
136145
TOOL.VIEW_RANGE,
137146
TOOL.WEB_SEARCH,
147+
TOOL.WEB_FETCH,
138148
]);
139149

140150
// tools that require approval before execution (safe mode or plan approval)

src/utils/tools/dispatcher.test.ts

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,5 +158,18 @@ describe('dispatcher', () => {
158158
expect(result.content).toContain('Source');
159159
expect(result.error).toBeUndefined();
160160
});
161+
162+
it('executes web_fetch tool', async () => {
163+
mockFetch.mockResolvedValueOnce({
164+
ok: true,
165+
text: vi.fn().mockResolvedValue('# Fetched Page\n\nContent here.'),
166+
} as unknown as Response);
167+
168+
const result = await executeTool('web_fetch', {
169+
url: 'https://example.com',
170+
});
171+
expect(result.content).toContain('Fetched Page');
172+
expect(result.error).toBeUndefined();
173+
});
161174
});
162175
});

src/utils/tools/dispatcher.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import {
99
writeFile,
1010
} from './filesystem';
1111
import { runShell } from './shell';
12-
import { webSearch } from './web/search';
12+
import { webFetch, webSearch } from './web';
1313

1414
interface ToolOptions {
1515
allowedTools?: ReadonlySet<string>;
@@ -63,6 +63,9 @@ export async function executeTool(
6363
case TOOL.WEB_SEARCH:
6464
return await webSearch(args.query as string);
6565

66+
case TOOL.WEB_FETCH:
67+
return await webFetch(args.url as string);
68+
6669
default:
6770
return { content: '', error: `Unknown tool: ${name as string}` };
6871
}
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import { webFetch } from './fetch-page';
2+
3+
const mockFetch = vi.fn<typeof fetch>();
4+
5+
describe('fetch-page', () => {
6+
beforeEach(() => {
7+
vi.clearAllMocks();
8+
vi.stubGlobal('fetch', mockFetch);
9+
});
10+
11+
describe('webFetch', () => {
12+
it('returns error for empty URL', async () => {
13+
const result = await webFetch(' ');
14+
expect(result.error).toBe('URL cannot be empty');
15+
});
16+
17+
it('returns content from Jina Reader on success', async () => {
18+
mockFetch.mockResolvedValueOnce(
19+
createFetchResponse('# Page Title\n\nSome content.', 200),
20+
);
21+
22+
const result = await webFetch('https://example.com');
23+
expect(result.content).toBe('# Page Title\n\nSome content.');
24+
expect(result.error).toBeUndefined();
25+
const calledUrl = (mockFetch.mock.calls[0] as [string, ...unknown[]])[0];
26+
expect(calledUrl).toBe('https://r.jina.ai/https://example.com');
27+
});
28+
29+
it('falls back to direct fetch when Jina Reader fails', async () => {
30+
mockFetch
31+
.mockRejectedValueOnce(new Error('rate limited'))
32+
.mockResolvedValueOnce(
33+
createFetchResponse(
34+
'<html><body><p>Direct content</p></body></html>',
35+
200,
36+
),
37+
);
38+
39+
const result = await webFetch('https://example.com');
40+
expect(result.content).toContain('Note: Jina Reader unavailable');
41+
expect(result.content).toContain('Direct content');
42+
expect(result.error).toBeUndefined();
43+
});
44+
45+
it('returns error when both Jina and fallback fail', async () => {
46+
mockFetch
47+
.mockRejectedValueOnce(new Error('network error'))
48+
.mockRejectedValueOnce(new Error('also failed'));
49+
50+
const result = await webFetch('https://example.com');
51+
expect(result.error).toContain('Failed to fetch page: also failed');
52+
expect(result.content).toBe('');
53+
});
54+
55+
it('returns error when both fail with non-Error rejection', async () => {
56+
mockFetch
57+
.mockRejectedValueOnce('jina string error')
58+
.mockRejectedValueOnce('fallback string error');
59+
60+
const result = await webFetch('https://example.com');
61+
expect(result.error).toContain(
62+
'Failed to fetch page: fallback string error',
63+
);
64+
});
65+
66+
it('returns error when fallback responds with non-OK status', async () => {
67+
mockFetch
68+
.mockRejectedValueOnce(new Error('jina down'))
69+
.mockResolvedValueOnce(createFetchResponse('bad gateway', 502));
70+
71+
const result = await webFetch('https://example.com');
72+
expect(result.error).toContain('Failed to fetch page: HTTP 502');
73+
});
74+
75+
it('strips HTML tags in fallback content', async () => {
76+
mockFetch
77+
.mockRejectedValueOnce(new Error('jina error'))
78+
.mockResolvedValueOnce(
79+
createFetchResponse(
80+
'<html><head><title>Title</title></head><body><h1>Heading</h1><p>Paragraph text.</p></body></html>',
81+
200,
82+
),
83+
);
84+
85+
const result = await webFetch('https://example.com');
86+
expect(result.content).toContain('Heading');
87+
expect(result.content).toContain('Paragraph text.');
88+
expect(result.content).not.toContain('<h1>');
89+
expect(result.content).not.toContain('<p>');
90+
});
91+
});
92+
});
93+
94+
function createFetchResponse(body: string, status: number): Response {
95+
return {
96+
ok: status >= 200 && status < 300,
97+
status,
98+
text: vi.fn().mockResolvedValue(body),
99+
} as unknown as Response;
100+
}

src/utils/tools/web/fetch-page.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import type { ToolResult } from '../../../types';
2+
import { fetchText } from './fetch';
3+
import { cleanText, stripTags } from './utils';
4+
5+
const JINA_READER_BASE_URL = 'https://r.jina.ai/';
6+
7+
/**
8+
* Fetch readable page content via Jina Reader, with fallback to direct fetch + HTML stripping
9+
*/
10+
export async function webFetch(url: string): Promise<ToolResult> {
11+
const trimmedUrl = url.trim();
12+
if (!trimmedUrl) {
13+
return { content: '', error: 'URL cannot be empty' };
14+
}
15+
16+
try {
17+
const content = await fetchText(`${JINA_READER_BASE_URL}${trimmedUrl}`, {
18+
Accept: 'text/plain',
19+
});
20+
return { content };
21+
} catch {
22+
// Fallback: direct fetch + strip HTML
23+
}
24+
25+
try {
26+
const html = await fetchText(trimmedUrl, { Accept: 'text/html' });
27+
const content = cleanText(stripTags(html));
28+
return {
29+
content: `Note: Jina Reader unavailable, falling back to raw fetch.\n\n${content}`,
30+
};
31+
} catch (error) {
32+
return {
33+
content: '',
34+
error: `Failed to fetch page: ${error instanceof Error ? error.message : String(error)}`,
35+
};
36+
}
37+
}

src/utils/tools/web/fetch.test.ts

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import { fetchJSON, fetchText } from './fetch';
2+
3+
const mockFetch = vi.fn<typeof fetch>();
4+
5+
describe('web/fetch', () => {
6+
beforeEach(() => {
7+
vi.clearAllMocks();
8+
vi.stubGlobal('fetch', mockFetch);
9+
});
10+
11+
describe('fetchText', () => {
12+
it('returns response text on success', async () => {
13+
mockFetch.mockResolvedValueOnce({
14+
ok: true,
15+
status: 200,
16+
text: vi.fn().mockResolvedValue('hello world'),
17+
} as unknown as Response);
18+
19+
const result = await fetchText('https://example.com', {
20+
Accept: 'text/plain',
21+
});
22+
expect(result).toBe('hello world');
23+
});
24+
25+
it('throws on non-OK status', async () => {
26+
mockFetch.mockResolvedValueOnce({
27+
ok: false,
28+
status: 404,
29+
text: vi.fn(),
30+
} as unknown as Response);
31+
32+
await expect(fetchText('https://example.com', {})).rejects.toThrow(
33+
'HTTP 404',
34+
);
35+
});
36+
});
37+
38+
describe('fetchJSON', () => {
39+
it('returns parsed JSON on success', async () => {
40+
mockFetch.mockResolvedValueOnce({
41+
ok: true,
42+
status: 200,
43+
json: vi.fn().mockResolvedValue({ key: 'value' }),
44+
} as unknown as Response);
45+
46+
const result = await fetchJSON<{ key: string }>('https://example.com');
47+
expect(result).toEqual({ key: 'value' });
48+
});
49+
50+
it('throws on non-OK status', async () => {
51+
mockFetch.mockResolvedValueOnce({
52+
ok: false,
53+
status: 500,
54+
json: vi.fn(),
55+
} as unknown as Response);
56+
57+
await expect(fetchJSON('https://example.com')).rejects.toThrow(
58+
'HTTP 500',
59+
);
60+
});
61+
});
62+
});

src/utils/tools/web/fetch.ts

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,22 @@ import { PACKAGE } from '../../../constants';
22

33
const FETCH_TIMEOUT_MS = 10_000;
44

5+
const BASE_HEADERS = {
6+
'user-agent': `${PACKAGE.NAME}/${PACKAGE.VERSION}`,
7+
};
8+
9+
type Headers = Record<string, string>;
10+
511
/**
612
* Fetch text from URL with timeout and headers
713
*/
814
export async function fetchText(
915
url: string,
10-
headers: Record<string, string>,
16+
headers: Headers,
1117
): Promise<string> {
1218
const response = await fetch(url, {
1319
headers: {
14-
'user-agent': `${PACKAGE.NAME}/${PACKAGE.VERSION}`,
20+
...BASE_HEADERS,
1521
...headers,
1622
},
1723
signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
@@ -23,3 +29,26 @@ export async function fetchText(
2329

2430
return response.text();
2531
}
32+
33+
/**
34+
* Fetch and parse JSON from URL with timeout and headers
35+
*/
36+
export async function fetchJSON<T>(
37+
url: string,
38+
headers: Headers = {},
39+
): Promise<T> {
40+
const response = await fetch(url, {
41+
headers: {
42+
...BASE_HEADERS,
43+
Accept: 'application/json',
44+
...headers,
45+
},
46+
signal: AbortSignal.timeout(FETCH_TIMEOUT_MS),
47+
});
48+
49+
if (!response.ok) {
50+
throw new Error(`HTTP ${response.status.toString()}`);
51+
}
52+
53+
return response.json() as Promise<T>;
54+
}

src/utils/tools/web/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
1+
export { webFetch } from './fetch-page';
12
export { webSearch } from './search';

0 commit comments

Comments
 (0)