brightdata
diff --git a/‎README.md‎
Lines changed: 29 additions & 6 deletions b/‎README.md‎
Lines changed: 29 additions & 6 deletions
diff --git a/‎src/__tests__/commands/scraper.test.ts‎
Lines changed: 288 additions & 0 deletions b/‎src/__tests__/commands/scraper.test.ts‎
Lines changed: 288 additions & 0 deletions
@@ -410,27 +410,38 @@ brightdata scraper create https://example.com/product/1 \
 
 ### `scraper run`
 
-Run a scraper (built with `scraper create` or in the web UI) against a URL and get the extracted data.
+Run a scraper (built with `scraper create` or in the web UI) against one or more URLs and get the extracted data.
 
 ```bash
-brightdata scraper run <collector_id> <url> [options]
+brightdata scraper run <collector_id> [url] [options]
 ```
 
+Provide URLs in exactly one of three ways:
+
+- Positional `<url>` — single URL (legacy form, unchanged).
+- `--urls <u1,u2,...>` — comma-separated list.
+- `--input-file <path>` — file with one URL per line, **or** a JSON array of URL strings, **or** a JSON array of `{"url": "..."}` objects.
+
 | Flag | Description |
 |---|---|
-| `--sync` | Use the synchronous `/dca/crawl` endpoint (server-side cap of 25–50s) |
+| `--urls <list>` | Comma-separated list of URLs (multi-URL batch path) |
+| `--input-file <path>` | File with URLs (txt one-per-line, or JSON array) |
+| `--sync` | Use the synchronous `/dca/crawl` endpoint (single-URL only, server-side cap of 25–50s) |
 | `--sync-timeout <seconds>` | Sync-mode server timeout, `25`–`50` (default: `50`) |
-| `--timeout <seconds>` | Async polling timeout (default: `600`) |
+| `--timeout <seconds>` | Polling timeout (default: `600` single-URL, `3600` batch) |
 | `--name <name>` | Human-readable job name |
 | `--version <version>` | Scraper version (e.g. `dev`) |
 | `-o, --output <path>` | Write output to file |
 | `--json` / `--pretty` | JSON output (raw / indented) |
 | `--timing` | Show request timing |
 | `-k, --api-key <key>` | Override API key |
 
-By default the command uses the async flow: it triggers `/dca/trigger_immediate`, gets back a `response_id`, and polls `/dca/get_result` until the data is ready. Use `--sync` for one-shot scrapes that you expect to finish within ~50 seconds; on a sync server-side timeout the command exits with the `response_id` so you can re-run without `--sync` to poll for the result.
+**Routing**
+
+- **Single URL** (positional, or one entry via `--urls` / `--input-file`) → async flow: `/dca/trigger_immediate` → poll `/dca/get_result`. Use `--sync` for `/dca/crawl` (one-shot, 25–50s).
+- **Multiple URLs** (`--urls` / `--input-file` with 2+ entries) → single POST to `/dca/trigger` with an array body, one `collection_id`, polled via `/dca/dataset`. This mirrors the canonical batch shape used by the reference SDKs ([`triggerWithUrls`](https://github.com/brightdata/bright-data-scraper-studio-nodejs-project) / [`trigger_with_urls`](https://github.com/brightdata/bright-data-scraper-studio-python-project)). `--sync` is incompatible with multi-URL — `/dca/crawl` accepts only a single URL.
 
-If a URL expands to more pages than the realtime job limit allows (e.g. paginated listings, infinite scroll), the CLI automatically falls back to the batch endpoint (`/dca/trigger` → poll `/dca/dataset`). The fallback prints a one-line notice and adjusts the poll interval and timeout for the longer batch wait. No flag required.
+If a single URL expands to more pages than the realtime job limit allows (paginated listings, infinite scroll), the CLI automatically falls back to the batch endpoint and prints a one-line notice. No flag required.
 
 **Examples**
 
@@ -448,6 +459,18 @@ brightdata scraper run c_mp3tuab31lswoxvpws https://example.com/p/1 --sync
 # Sync with a shorter server timeout and a job name
 brightdata scraper run c_mp3tuab31lswoxvpws https://example.com/p/1 \
     --sync --sync-timeout 30 --name first-test
+
+# Multi-URL batch — one API call, one snapshot, one merged result array
+brightdata scraper run c_mp3tuab31lswoxvpws \
+    --urls "https://example.com/p/1,https://example.com/p/2,https://example.com/p/3" \
+    --pretty -o products.json
+
+# Multi-URL from a file (one URL per line; # comments and blank lines skipped)
+brightdata scraper run c_mp3tuab31lswoxvpws --input-file urls.txt -o products.json
+
+# Multi-URL from a JSON array
+echo '["https://example.com/p/1","https://example.com/p/2"]' > urls.json
+brightdata scraper run c_mp3tuab31lswoxvpws --input-file urls.json
 ```
 
 ---
 
@@ -1,3 +1,6 @@
+import {writeFileSync, mkdtempSync, rmSync} from 'node:fs';
+import {tmpdir} from 'node:os';
+import {join} from 'node:path';
 import {describe, it, expect, beforeEach, afterEach, vi} from 'vitest';
 import {Command} from 'commander';
 import type {Scraper_create_opts} from '../../types/scraper';
@@ -68,6 +71,10 @@ import {
     AI_TRIGGER_DEFAULT_RETRIES,
     AI_TRIGGER_RETRY_BASE_MS,
     AI_TRIGGER_RETRY_MAX_MS,
+    parse_urls_arg,
+    read_input_file,
+    resolve_run_inputs,
+    is_valid_url,
 } from '../../commands/scraper';
 
 describe('commands/scraper', ()=>{
@@ -1163,4 +1170,285 @@ describe('commands/scraper', ()=>{
             error.mockRestore();
         });
     });
+
+    describe('is_valid_url', ()=>{
+        it('accepts http/https URLs', ()=>{
+            expect(is_valid_url('https://example.com')).toBe(true);
+            expect(is_valid_url('http://example.com/a/b?c=1')).toBe(true);
+        });
+
+        it('rejects garbage', ()=>{
+            expect(is_valid_url('not a url')).toBe(false);
+            expect(is_valid_url('')).toBe(false);
+            expect(is_valid_url('  ')).toBe(false);
+        });
+    });
+
+    describe('parse_urls_arg', ()=>{
+        it('splits, trims, and drops empties', ()=>{
+            expect(parse_urls_arg(
+                ' https://a.com , https://b.com ,, https://c.com'))
+                .toEqual(['https://a.com', 'https://b.com', 'https://c.com']);
+        });
+
+        it('returns single URL for a non-comma input', ()=>{
+            expect(parse_urls_arg('https://only.example.com'))
+                .toEqual(['https://only.example.com']);
+        });
+
+        it('returns empty array for blank input', ()=>{
+            expect(parse_urls_arg('')).toEqual([]);
+            expect(parse_urls_arg('  , ,  ')).toEqual([]);
+        });
+    });
+
+    describe('read_input_file', ()=>{
+        let tmp_dir: string;
+
+        beforeEach(()=>{
+            tmp_dir = mkdtempSync(join(tmpdir(), 'bdata-test-'));
+        });
+
+        afterEach(()=>{
+            rmSync(tmp_dir, {recursive: true, force: true});
+        });
+
+        const write = (name: string, content: string): string=>{
+            const p = join(tmp_dir, name);
+            writeFileSync(p, content, 'utf8');
+            return p;
+        };
+
+        it('reads newline-separated URLs', ()=>{
+            const p = write('urls.txt',
+                'https://a.com\nhttps://b.com\nhttps://c.com');
+            expect(read_input_file(p)).toEqual([
+                'https://a.com', 'https://b.com', 'https://c.com']);
+        });
+
+        it('skips blank lines and # comments', ()=>{
+            const p = write('urls.txt',
+                '# top comment\n'
+                +'https://a.com\n'
+                +'\n'
+                +'   \n'
+                +'# section\n'
+                +'https://b.com    # inline comment ok\n'
+                +'https://c.com');
+            expect(read_input_file(p)).toEqual([
+                'https://a.com', 'https://b.com', 'https://c.com']);
+        });
+
+        it('reads JSON array of strings', ()=>{
+            const p = write('urls.json',
+                JSON.stringify(['https://a.com', 'https://b.com']));
+            expect(read_input_file(p)).toEqual([
+                'https://a.com', 'https://b.com']);
+        });
+
+        it('reads JSON array of {url} objects', ()=>{
+            const p = write('urls.json', JSON.stringify([
+                {url: 'https://a.com'},
+                {url: 'https://b.com', extra: 'ignored'},
+            ]));
+            expect(read_input_file(p)).toEqual([
+                'https://a.com', 'https://b.com']);
+        });
+
+        it('throws on missing file', ()=>{
+            expect(()=>read_input_file(join(tmp_dir, 'missing.txt')))
+                .toThrow(/Cannot read --input-file/);
+        });
+
+        it('throws on malformed JSON', ()=>{
+            const p = write('bad.json', '[{not valid');
+            expect(()=>read_input_file(p))
+                .toThrow(/failed to parse/);
+        });
+
+        it('throws on non-array JSON', ()=>{
+            const p = write('obj.json', '{"url": "https://a.com"}');
+            expect(()=>read_input_file(p))
+                .toThrow(/must be an array/);
+        });
+
+        it('throws on JSON entry with neither string nor {url}', ()=>{
+            const p = write('mixed.json',
+                JSON.stringify(['https://a.com', {wrong: 'field'}]));
+            expect(()=>read_input_file(p))
+                .toThrow(/must be a string or an object with a "url"/);
+        });
+
+        it('returns empty array for an empty file', ()=>{
+            const p = write('empty.txt', '   \n\n  ');
+            expect(read_input_file(p)).toEqual([]);
+        });
+    });
+
+    describe('resolve_run_inputs', ()=>{
+        let tmp_dir: string;
+
+        beforeEach(()=>{
+            tmp_dir = mkdtempSync(join(tmpdir(), 'bdata-test-'));
+        });
+
+        afterEach(()=>{
+            rmSync(tmp_dir, {recursive: true, force: true});
+        });
+
+        it('returns the positional URL as a single-element list', ()=>{
+            expect(resolve_run_inputs('https://a.com', {}))
+                .toEqual(['https://a.com']);
+        });
+
+        it('parses --urls', ()=>{
+            expect(resolve_run_inputs(undefined,
+                {urls: 'https://a.com,https://b.com'}))
+                .toEqual(['https://a.com', 'https://b.com']);
+        });
+
+        it('reads --input-file', ()=>{
+            const p = join(tmp_dir, 'urls.txt');
+            writeFileSync(p, 'https://a.com\nhttps://b.com', 'utf8');
+            expect(resolve_run_inputs(undefined, {inputFile: p}))
+                .toEqual(['https://a.com', 'https://b.com']);
+        });
+
+        it('rejects when no source is provided', ()=>{
+            expect(()=>resolve_run_inputs(undefined, {}))
+                .toThrow(/requires one of: <url> positional, --urls/);
+        });
+
+        it('rejects when multiple sources are provided', ()=>{
+            expect(()=>resolve_run_inputs('https://a.com',
+                {urls: 'https://b.com'}))
+                .toThrow(/only one input source/);
+            expect(()=>resolve_run_inputs(undefined,
+                {urls: 'https://a.com', inputFile: '/tmp/x'}))
+                .toThrow(/only one input source/);
+        });
+
+        it('rejects when parsed list is empty', ()=>{
+            expect(()=>resolve_run_inputs(undefined, {urls: '  , ,  '}))
+                .toThrow(/No URLs to scrape/);
+        });
+
+        it('rejects invalid URLs and names them', ()=>{
+            expect(()=>resolve_run_inputs(undefined,
+                {urls: 'https://a.com,not-a-url,also bad'}))
+                .toThrow(/Invalid URL\(s\):.*not-a-url/);
+        });
+    });
+
+    describe('handle_run_scraper multi-URL', ()=>{
+        let fetch_spy: ReturnType<typeof vi.spyOn>;
+        let tmp_dir: string;
+
+        beforeEach(()=>{
+            fetch_spy = vi.spyOn(global, 'fetch') as never;
+            tmp_dir = mkdtempSync(join(tmpdir(), 'bdata-test-'));
+        });
+
+        afterEach(()=>{
+            fetch_spy.mockRestore();
+            rmSync(tmp_dir, {recursive: true, force: true});
+        });
+
+        it('--urls posts an array body to /dca/trigger and polls /dca/dataset',
+            async()=>{
+            mocks.post.mockResolvedValueOnce({collection_id: 'd_batch'});
+            fetch_spy.mockImplementation(()=>Promise.resolve({
+                status: 200,
+                text: ()=>Promise.resolve(
+                    '[{"title":"A"},{"title":"B"},{"title":"C"}]'),
+            } as unknown as Response));
+            mocks.poll_until.mockImplementationOnce(async(o: never)=>{
+                const cfg = o as {fetch_once: ()=>Promise<unknown>};
+                const r = await cfg.fetch_once();
+                return {result: r, attempts: 1, last_status: '__ready__'};
+            });
+            await handle_run_scraper('c_abc', undefined, {
+                urls: 'https://a.com,https://b.com,https://c.com',
+            });
+            expect(mocks.post).toHaveBeenCalledTimes(1);
+            const call = mocks.post.mock.calls[0];
+            expect(String(call[1])).toMatch(/\/dca\/trigger\?collector=c_abc/);
+            expect(call[2]).toEqual([
+                {url: 'https://a.com'},
+                {url: 'https://b.com'},
+                {url: 'https://c.com'},
+            ]);
+            expect(mocks.print).toHaveBeenCalledWith(
+                [{title: 'A'}, {title: 'B'}, {title: 'C'}],
+                {json: undefined, pretty: undefined, output: undefined}
+            );
+        });
+
+        it('--input-file routes to the same batch path', async()=>{
+            const p = join(tmp_dir, 'urls.txt');
+            writeFileSync(p, 'https://a.com\nhttps://b.com', 'utf8');
+            mocks.post.mockResolvedValueOnce({collection_id: 'd_batch'});
+            fetch_spy.mockImplementation(()=>Promise.resolve({
+                status: 200,
+                text: ()=>Promise.resolve('[{"ok":1},{"ok":2}]'),
+            } as unknown as Response));
+            mocks.poll_until.mockImplementationOnce(async(o: never)=>{
+                const cfg = o as {fetch_once: ()=>Promise<unknown>};
+                const r = await cfg.fetch_once();
+                return {result: r, attempts: 1, last_status: '__ready__'};
+            });
+            await handle_run_scraper('c_abc', undefined, {inputFile: p});
+            expect(mocks.post.mock.calls[0][2]).toEqual([
+                {url: 'https://a.com'},
+                {url: 'https://b.com'},
+            ]);
+        });
+
+        it('rejects --sync combined with --urls', async()=>{
+            await expect(
+                handle_run_scraper('c_abc', undefined, {
+                    sync: true,
+                    urls: 'https://a.com,https://b.com',
+                })
+            ).rejects.toThrow(/--sync cannot be combined with --urls/);
+            expect(mocks.fail).toHaveBeenCalledWith(
+                expect.stringContaining(
+                    '--sync cannot be combined with --urls'));
+            expect(mocks.post).not.toHaveBeenCalled();
+        });
+
+        it('rejects when no URL source is provided', async()=>{
+            await expect(
+                handle_run_scraper('c_abc', undefined, {})
+            ).rejects.toThrow(
+                /requires one of: <url> positional, --urls, or --input-file/);
+        });
+
+        it('rejects when positional and --urls are both set', async()=>{
+            await expect(
+                handle_run_scraper('c_abc', 'https://a.com',
+                    {urls: 'https://b.com'})
+            ).rejects.toThrow(/only one input source/);
+        });
+
+        it('single URL via --urls still takes the legacy single path',
+            async()=>{
+            mocks.post.mockResolvedValueOnce({response_id: 'r_xyz'});
+            fetch_spy.mockImplementation(()=>Promise.resolve({
+                status: 200,
+                text: ()=>Promise.resolve('{"title":"only"}'),
+            } as unknown as Response));
+            mocks.poll_until.mockImplementationOnce(async(o: never)=>{
+                const cfg = o as {fetch_once: ()=>Promise<unknown>};
+                const r = await cfg.fetch_once();
+                return {result: r, attempts: 1, last_status: '__ready__'};
+            });
+            await handle_run_scraper('c_abc', undefined,
+                {urls: 'https://only.com'});
+            expect(String(mocks.post.mock.calls[0][1])).toMatch(
+                /\/dca\/trigger_immediate\?collector=c_abc/);
+            expect(mocks.post.mock.calls[0][2]).toEqual(
+                {url: 'https://only.com'});
+        });
+    });
 });