Skip to content

Commit 2ab3bbd

Browse files
committed
feat: add discover command
1 parent f8a0fe1 commit 2ab3bbd

File tree

6 files changed

+567
-1
lines changed

6 files changed

+567
-1
lines changed

README.md

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
|---|---|
2525
| `brightdata scrape` | Scrape any URL — bypasses CAPTCHAs, JS rendering, anti-bot protections |
2626
| `brightdata search` | Google / Bing / Yandex search with structured JSON output |
27+
| `brightdata discover` | AI-powered web discovery - find and rank results by intent with optional full-page content |
2728
| `brightdata pipelines` | Extract structured data from 40+ platforms (Amazon, LinkedIn, TikTok…) |
2829
| `brightdata browser` | Control a real browser via Bright Data's Scraping Browser — navigate, snapshot, click, type, and more |
2930
| `brightdata zones` | List and inspect your Bright Data proxy zones |
@@ -44,6 +45,7 @@
4445
- [init](#init)
4546
- [scrape](#scrape)
4647
- [search](#search)
48+
- [discover](#discover)
4749
- [pipelines](#pipelines)
4850
- [browser](#browser)
4951
- [status](#status)
@@ -246,6 +248,60 @@ brightdata search "bright data pricing" --engine bing
246248

247249
---
248250

251+
### `discover`
252+
253+
AI-powered web discovery. Submit a query with optional intent, and Bright Data finds, ranks, and optionally extracts full-page content for each result.
254+
255+
```bash
256+
brightdata discover <query> [options]
257+
```
258+
259+
| Flag | Description |
260+
|---|---|
261+
| `--intent <text>` | AI intent to evaluate and rank result relevance |
262+
| `--country <code>` | ISO country code (default: `US`) |
263+
| `--city <name>` | City for localized results (e.g. `"New York"`) |
264+
| `--language <code>` | Language code (default: `en`) |
265+
| `--num-results <n>` | Number of results to return |
266+
| `--filter-keywords <words>` | Comma-separated keywords that must appear in results |
267+
| `--include-content` | Include full page content in each result |
268+
| `--no-remove-duplicates` | Keep duplicate results |
269+
| `--start-date <date>` | Only content updated from date (`YYYY-MM-DD`) |
270+
| `--end-date <date>` | Only content updated until date (`YYYY-MM-DD`) |
271+
| `--timeout <seconds>` | Polling timeout (default: `600`) |
272+
| `-o, --output <path>` | Write output to file |
273+
| `--json` / `--pretty` | JSON output (raw / indented) |
274+
| `-k, --api-key <key>` | Override API key |
275+
276+
**Examples**
277+
278+
```bash
279+
# Basic discovery — table output
280+
brightdata discover "AI trends"
281+
282+
# With AI intent for relevance ranking
283+
brightdata discover "AI trends" \
284+
--intent "Prioritize institutional reports for VC research"
285+
286+
# Include full page content as markdown
287+
brightdata discover "AI trends" --include-content --num-results 5
288+
289+
# Geo-targeted with date range
290+
brightdata discover "best restaurants" --country US --city "New York" \
291+
--start-date 2025-01-01 --end-date 2025-12-31
292+
293+
# Filter results by keywords
294+
brightdata discover "generative AI SaaS" --filter-keywords "revenue,SaaS"
295+
296+
# JSON output to file
297+
brightdata discover "AI trends" --num-results 10 --pretty -o results.json
298+
299+
# Pipe-friendly — redirected stdout outputs JSON automatically
300+
brightdata discover "AI trends" --include-content --num-results 3 > results.json
301+
```
302+
303+
---
304+
249305
### `pipelines`
250306

251307
Extract structured data from 40+ platforms using Bright Data's Web Scraper API. Triggers an async collection job, polls until ready, and returns results.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@brightdata/cli",
3-
"version": "0.1.6",
3+
"version": "0.1.7",
44
"description": "Command-line interface for Bright Data. Scrape, search, extract structured data, and automate browsers directly from your terminal.",
55
"main": "dist/index.js",
66
"bin": {
Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
import {describe, it, expect, beforeEach, vi} from 'vitest';
2+
3+
const mocks = vi.hoisted(()=>({
4+
post: vi.fn(),
5+
get: vi.fn(),
6+
ensure_authenticated: vi.fn(),
7+
stop: vi.fn(),
8+
start: vi.fn(),
9+
print: vi.fn(),
10+
print_table: vi.fn(),
11+
fail: vi.fn((msg: string)=>{ throw new Error(`fail:${msg}`); }),
12+
dim: vi.fn((msg: string)=>msg),
13+
parse_timeout: vi.fn(),
14+
poll_until: vi.fn(),
15+
}));
16+
17+
vi.mock('../../utils/client', ()=>({
18+
post: mocks.post,
19+
get: mocks.get,
20+
}));
21+
22+
vi.mock('../../utils/auth', ()=>({
23+
ensure_authenticated: mocks.ensure_authenticated,
24+
}));
25+
26+
vi.mock('../../utils/spinner', ()=>({
27+
start: mocks.start,
28+
}));
29+
30+
vi.mock('../../utils/output', ()=>({
31+
print: mocks.print,
32+
print_table: mocks.print_table,
33+
fail: mocks.fail,
34+
dim: mocks.dim,
35+
}));
36+
37+
vi.mock('../../utils/polling', ()=>({
38+
parse_timeout: mocks.parse_timeout,
39+
poll_until: mocks.poll_until,
40+
}));
41+
42+
import {
43+
handle_discover,
44+
build_request,
45+
extract_status,
46+
format_markdown,
47+
print_discover_table,
48+
} from '../../commands/discover';
49+
50+
describe('commands/discover', ()=>{
51+
beforeEach(()=>{
52+
vi.clearAllMocks();
53+
mocks.ensure_authenticated.mockReturnValue('api_key');
54+
mocks.parse_timeout.mockReturnValue(600);
55+
mocks.start.mockReturnValue({stop: mocks.stop});
56+
});
57+
58+
describe('build_request', ()=>{
59+
it('builds minimal request with only query', ()=>{
60+
const req = build_request('AI trends', {});
61+
expect(req).toEqual({query: 'AI trends'});
62+
});
63+
64+
it('includes all optional params', ()=>{
65+
const req = build_request('AI trends', {
66+
intent: 'find research papers',
67+
city: 'New York',
68+
country: 'US',
69+
language: 'en',
70+
numResults: '10',
71+
filterKeywords: 'AI, machine learning',
72+
includeContent: true,
73+
startDate: '2025-01-01',
74+
endDate: '2025-12-31',
75+
});
76+
expect(req).toEqual({
77+
query: 'AI trends',
78+
intent: 'find research papers',
79+
city: 'New York',
80+
country: 'US',
81+
language: 'en',
82+
num_results: 10,
83+
filter_keywords: ['AI', 'machine learning'],
84+
include_content: true,
85+
start_date: '2025-01-01',
86+
end_date: '2025-12-31',
87+
});
88+
});
89+
90+
it('parses comma-separated filter keywords with whitespace', ()=>{
91+
const req = build_request('q', {filterKeywords: ' a , b , c '});
92+
expect(req.filter_keywords).toEqual(['a', 'b', 'c']);
93+
});
94+
95+
it('does not set format by default (API returns JSON)', ()=>{
96+
const req = build_request('test', {});
97+
expect(req.format).toBeUndefined();
98+
});
99+
100+
it('does not set format when include-content is used', ()=>{
101+
const req = build_request('test', {includeContent: true});
102+
expect(req.format).toBeUndefined();
103+
expect(req.include_content).toBe(true);
104+
});
105+
});
106+
107+
describe('extract_status', ()=>{
108+
it('returns status from valid response', ()=>{
109+
expect(extract_status({status: 'processing'})).toBe('processing');
110+
expect(extract_status({status: 'done'})).toBe('done');
111+
});
112+
113+
it('returns undefined for invalid input', ()=>{
114+
expect(extract_status(null as never)).toBeUndefined();
115+
expect(extract_status(undefined as never)).toBeUndefined();
116+
});
117+
});
118+
119+
describe('format_markdown', ()=>{
120+
it('formats results as markdown', ()=>{
121+
const md = format_markdown([
122+
{
123+
link: 'https://example.com',
124+
title: 'Example',
125+
description: 'A description',
126+
relevance_score: 0.95,
127+
},
128+
], 'test query');
129+
expect(md).toContain('# Discover results for "test query"');
130+
expect(md).toContain('**1. [Example](https://example.com)** (95.0%)');
131+
expect(md).toContain('A description');
132+
});
133+
134+
it('includes content when present', ()=>{
135+
const md = format_markdown([
136+
{
137+
link: 'https://example.com',
138+
title: 'Example',
139+
description: 'Desc',
140+
relevance_score: 0.5,
141+
content: '# Page content here',
142+
},
143+
], 'q');
144+
expect(md).toContain('# Page content here');
145+
});
146+
});
147+
148+
describe('print_discover_table', ()=>{
149+
it('calls print_table with formatted rows', ()=>{
150+
const results = [
151+
{
152+
link: 'https://example.com',
153+
title: 'Example Title',
154+
description: 'Desc',
155+
relevance_score: 0.98184747,
156+
},
157+
];
158+
print_discover_table(results);
159+
expect(mocks.print_table).toHaveBeenCalledWith(
160+
[{
161+
'#': '1',
162+
title: 'Example Title',
163+
score: '98.2%',
164+
url: 'https://example.com',
165+
}],
166+
['#', 'title', 'score', 'url']
167+
);
168+
});
169+
170+
it('prints dim message when no results', ()=>{
171+
const log = vi.spyOn(console, 'log').mockImplementation(()=>{});
172+
print_discover_table([]);
173+
expect(log).toHaveBeenCalled();
174+
expect(mocks.print_table).not.toHaveBeenCalled();
175+
log.mockRestore();
176+
});
177+
});
178+
179+
describe('handle_discover', ()=>{
180+
it('triggers and polls then prints table', async()=>{
181+
mocks.post.mockResolvedValue({status: 'ok', task_id: 'abc123'});
182+
mocks.poll_until.mockResolvedValue({
183+
result: {
184+
status: 'done',
185+
duration_seconds: 5,
186+
results: [
187+
{
188+
link: 'https://example.com',
189+
title: 'Result',
190+
description: 'Desc',
191+
relevance_score: 0.9,
192+
},
193+
],
194+
},
195+
attempts: 3,
196+
});
197+
await handle_discover('AI trends', {});
198+
expect(mocks.post).toHaveBeenCalledWith(
199+
'api_key',
200+
'/discover',
201+
{query: 'AI trends'},
202+
{timing: undefined}
203+
);
204+
expect(mocks.poll_until).toHaveBeenCalledTimes(1);
205+
expect(mocks.print_table).toHaveBeenCalledTimes(1);
206+
});
207+
208+
it('prints json when --json is set', async()=>{
209+
const response = {
210+
status: 'done',
211+
duration_seconds: 2,
212+
results: [{
213+
link: 'https://example.com',
214+
title: 'R',
215+
description: 'D',
216+
relevance_score: 0.8,
217+
}],
218+
};
219+
mocks.post.mockResolvedValue({status: 'ok', task_id: 't1'});
220+
mocks.poll_until.mockResolvedValue({result: response, attempts: 1});
221+
await handle_discover('q', {json: true});
222+
expect(mocks.print).toHaveBeenCalledWith(
223+
response,
224+
{json: true, pretty: undefined, output: undefined}
225+
);
226+
expect(mocks.print_table).not.toHaveBeenCalled();
227+
});
228+
229+
it('prints raw JSON when --output is set', async()=>{
230+
const response = {
231+
status: 'done',
232+
results: [{
233+
link: 'https://example.com',
234+
title: 'R',
235+
description: 'D',
236+
relevance_score: 0.7,
237+
}],
238+
};
239+
mocks.post.mockResolvedValue({status: 'ok', task_id: 't2'});
240+
mocks.poll_until.mockResolvedValue({result: response, attempts: 1});
241+
await handle_discover('q', {output: 'out.json'});
242+
expect(mocks.print).toHaveBeenCalledWith(
243+
response,
244+
{json: undefined, pretty: undefined, output: 'out.json'}
245+
);
246+
});
247+
248+
it('fails when trigger returns no task_id', async()=>{
249+
mocks.post.mockResolvedValue({status: 'ok'});
250+
const exit = vi.spyOn(process, 'exit')
251+
.mockImplementation(()=>undefined as never);
252+
const error = vi.spyOn(console, 'error')
253+
.mockImplementation(()=>{});
254+
await handle_discover('q', {});
255+
expect(mocks.fail).toHaveBeenCalled();
256+
exit.mockRestore();
257+
error.mockRestore();
258+
});
259+
});
260+
});

0 commit comments

Comments
 (0)