brightdata · anil-bd · May 18, 2026
diff --git a/src/__tests__/commands/scraper.test.ts b/src/__tests__/commands/scraper.test.ts
@@ -58,6 +58,7 @@ import {
     parse_sync_timeout,
     is_realtime_page_limit_error,
     classify_dataset,
+    SCRAPER_BODY_HINTS,
 } from '../../commands/scraper';
 
 describe('commands/scraper', ()=>{
@@ -68,6 +69,85 @@ describe('commands/scraper', ()=>{
         mocks.start.mockReturnValue({stop: mocks.stop});
     });
 
+    // PR-12: AI Scraper Studio specific hints live with this command,
+    // not in the shared HTTP client. These tests assert (a) the
+    // patterns are well-formed and (b) they actually travel to the
+    // HTTP layer on every API call this command makes.
+    describe('SCRAPER_BODY_HINTS', ()=>{
+        it('contains a hint for the stub-collector 403 response body',
+            ()=>{
+            const stub_hint = SCRAPER_BODY_HINTS.find(h=>
+                h.pattern.test('Collector does not have a template'));
+            expect(stub_hint).toBeDefined();
+            expect(stub_hint!.hint).toMatch(/AI generation has not completed/);
+            expect(stub_hint!.hint).toMatch(/bdata scraper create/);
+            // Must NOT echo the misleading generic 403 hint.
+            expect(stub_hint!.hint).not.toMatch(/zone permissions/);
+        });
+
+        it('contains a hint for the parallel-job cap 429 response body',
+            ()=>{
+            const cap_hint = SCRAPER_BODY_HINTS.find(h=>
+                h.pattern.test('Cannot run more than 3 jobs in parallel'));
+            expect(cap_hint).toBeDefined();
+            expect(cap_hint!.hint).toMatch(/concurrent-job cap/);
+            expect(cap_hint!.hint).toMatch(/serialise/);
+        });
+
+        it('cap pattern is case-insensitive and number-agnostic '
+            +'(future-proof if the cap changes)', ()=>{
+            const cap_pattern = SCRAPER_BODY_HINTS[1].pattern;
+            expect(cap_pattern.test('cannot run more than 5 jobs '
+                +'in parallel')).toBe(true);
+            expect(cap_pattern.test('CANNOT RUN MORE THAN 100 JOBS '
+                +'IN PARALLEL')).toBe(true);
+        });
+
+        it('is passed via `hints` opt to client.post on every '
+            +'AI-Flow call in handle_create_scraper', async()=>{
+            mocks.post
+                .mockResolvedValueOnce({id: 'c_abc', name: 'cli-scraper-1'})
+                .mockResolvedValueOnce({id: 'ia_xyz', queued: false});
+            mocks.poll_until.mockResolvedValue({
+                result: {status: 'done', completed_steps: []},
+                attempts: 1,
+            });
+            await handle_create_scraper(
+                'https://example.com/p/1',
+                'extract title',
+                {}
+            );
+            // Both `post` calls (template create + AI trigger) must
+            // include the scraper hints so any 4xx is translated
+            // with the right vocabulary.
+            for (const call of mocks.post.mock.calls)
+            {
+                const opts = call[3] as {hints?: unknown};
+                expect(opts).toBeDefined();
+                expect(opts.hints).toBe(SCRAPER_BODY_HINTS);
+            }
+        });
+
+        it('is passed via `hints` opt to client.post on the '
+            +'trigger_immediate call in handle_run_scraper', async()=>{
+            mocks.post.mockResolvedValueOnce({response_id: 'r_xyz'});
+            const fetch_spy = vi.spyOn(global, 'fetch')
+                .mockImplementation(()=>Promise.resolve({
+                    status: 200,
+                    text: ()=>Promise.resolve('{"title":"ok"}'),
+                } as unknown as Response));
+            mocks.poll_until.mockImplementation(async(o: never)=>{
+                const cfg = o as {fetch_once: ()=>Promise<unknown>};
+                const r = await cfg.fetch_once();
+                return {result: r, attempts: 1};
+            });
+            await handle_run_scraper('c_abc', 'https://x.com/p/1', {});
+            const opts = mocks.post.mock.calls[0][3] as {hints?: unknown};
+            expect(opts.hints).toBe(SCRAPER_BODY_HINTS);
+            fetch_spy.mockRestore();
+        });
+    });
+
     describe('build_template_request', ()=>{
         it('uses auto-generated name and stub webhook by default', ()=>{
             const before = Math.floor(Date.now()/1000);
@@ -176,15 +256,15 @@ describe('commands/scraper', ()=>{
                 expect.objectContaining({
                     deliver: expect.objectContaining({type: 'webhook'}),
                 }),
-                {timing: undefined}
+                expect.objectContaining({timing: undefined})
             );
             expect(mocks.post).toHaveBeenNthCalledWith(
                 2,
                 'api_key',
                 '/dca/collectors/c_abc/automate_template',
                 {description: 'extract title',
                     urls: ['https://example.com/p/1']},
-                {timing: undefined}
+                expect.objectContaining({timing: undefined})
             );
             expect(mocks.poll_until).toHaveBeenCalledTimes(1);
             expect(mocks.poll_until).toHaveBeenCalledWith(
@@ -404,7 +484,7 @@ describe('commands/scraper', ()=>{
                 expect.stringMatching(
                     /\/dca\/trigger_immediate\?collector=c_abc/),
                 {url: 'https://x.com/p/1'},
-                {timing: undefined}
+                expect.objectContaining({timing: undefined})
             );
             expect(mocks.print).toHaveBeenCalledWith(
                 {title: 'hello'},

diff --git a/src/__tests__/utils/client.test.ts b/src/__tests__/utils/client.test.ts
@@ -0,0 +1,79 @@
+import {describe, it, expect} from 'vitest';
+import {pick_hint, ERROR_HINTS, type Body_hint} from '../../utils/client';
+
+// pick_hint is the layer the per-command hint lists plug into.
+// The client itself ships ZERO command-specific patterns — every test
+// here uses generic mock patterns so the contract is clear.
+
+const MOCK_HINTS: Body_hint[] = [
+    {pattern: /quota.*exceeded/i,
+        hint: 'Your monthly quota is exhausted. Top up in the dashboard.'},
+    {pattern: /not allowed in region/i,
+        hint: 'This API is geo-restricted. Use --country to override.'},
+];
+
+describe('utils/client.pick_hint', ()=>{
+    it('returns the status-based hint when no extra hints are passed', ()=>{
+        expect(pick_hint(401, 'anything')).toBe(ERROR_HINTS[401]);
+        expect(pick_hint(403, 'anything')).toBe(ERROR_HINTS[403]);
+        expect(pick_hint(404, 'anything')).toBe(ERROR_HINTS[404]);
+        expect(pick_hint(429, 'anything')).toBe(ERROR_HINTS[429]);
+    });
+
+    it('returns the status-based hint when extra hints are passed '
+        +'but nothing matches the body', ()=>{
+        expect(pick_hint(403, 'plain unauthorized message', MOCK_HINTS))
+            .toBe(ERROR_HINTS[403]);
+    });
+
+    it('overrides with the first matching extra hint', ()=>{
+        expect(pick_hint(429, 'monthly quota exceeded', MOCK_HINTS))
+            .toMatch(/Top up in the dashboard/);
+        expect(pick_hint(403, 'not allowed in region: CN', MOCK_HINTS))
+            .toMatch(/geo-restricted/);
+    });
+
+    it('is case-insensitive on the caller\'s pattern', ()=>{
+        expect(pick_hint(429, 'MONTHLY QUOTA EXCEEDED', MOCK_HINTS))
+            .toMatch(/Top up in the dashboard/);
+    });
+
+    it('respects first-match-wins order in the caller\'s list', ()=>{
+        const ordered: Body_hint[] = [
+            {pattern: /generic/, hint: 'first wins'},
+            {pattern: /generic/, hint: 'second loses'},
+        ];
+        expect(pick_hint(500, 'generic error', ordered))
+            .toBe('first wins');
+    });
+
+    it('returns undefined for unknown statuses with no body match', ()=>{
+        expect(pick_hint(418, 'teapot')).toBeUndefined();
+        expect(pick_hint(599, 'mystery')).toBeUndefined();
+    });
+
+    it('does not consult ERROR_HINTS when an extra-hint pattern '
+        +'matches', ()=>{
+        // The override should win even when the generic status hint
+        // would also be applicable.
+        const matches_403: Body_hint[] = [
+            {pattern: /custom 403 case/i,
+                hint: 'override hint, not the zone-permissions one'},
+        ];
+        expect(pick_hint(403, 'this is a custom 403 case', matches_403))
+            .toBe('override hint, not the zone-permissions one');
+    });
+
+    it('does not leak any scraper-studio vocabulary in the default '
+        +'hints', ()=>{
+        // The shared client must stay generic. Catches accidental
+        // regressions where someone adds a domain-specific hint here.
+        for (const hint of Object.values(ERROR_HINTS))
+        {
+            expect(hint).not.toMatch(/scraper create/i);
+            expect(hint).not.toMatch(/collector/i);
+            expect(hint).not.toMatch(/AI[- ]Flow/i);
+            expect(hint).not.toMatch(/automate_template/i);
+        }
+    });
+});
diff --git a/src/commands/scraper.ts b/src/commands/scraper.ts
@@ -1,5 +1,5 @@
 import {Command} from 'commander';
-import {post, get} from '../utils/client';
+import {post, get, type Body_hint} from '../utils/client';
 import {load as load_config} from '../utils/config';
 import {ensure_authenticated} from '../utils/auth';
 import {start as start_spinner} from '../utils/spinner';
@@ -18,6 +18,37 @@ import type {
     Batch_trigger_response,
 } from '../types/scraper';
 
+// AI Scraper Studio specific error-body hints. Lives with the command
+// that owns the DCA endpoints — NOT in the shared HTTP client — so the
+// AI-Flow vocabulary doesn't leak into `scrape`, `search`, `discover`,
+// `pipelines`, or `browser`.
+//
+// Both patterns describe the same incident from two perspectives:
+//   * The 429 fires during `scraper create` when too many AI-Flow jobs
+//     are in flight at once (undocumented 3-job cap as of v0.2.0).
+//   * The 403 "Collector does not have a template" fires later when
+//     the user tries to `scraper run` against the half-built
+//     collector_id the failed create left behind.
+//
+// First match wins. Add more patterns here as we learn about other
+// scraper-API error bodies that deserve a better hint.
+const SCRAPER_BODY_HINTS: Body_hint[] = [
+    {
+        pattern: /collector does not have a template/i,
+        hint: 'AI generation has not completed for this collector. '
+            +'Re-run `bdata scraper create` (the previous attempt may '
+            +'have hit the AI-Flow parallel-job cap), or open '
+            +'https://brightdata.com/cp/scrapers to inspect / finish '
+            +'the half-built collector in the web UI.',
+    },
+    {
+        pattern: /cannot run more than \d+ jobs in parallel/i,
+        hint: 'You hit the AI-Flow concurrent-job cap. Wait for an '
+            +'in-flight `scraper create` to finish, or serialise '
+            +'your launches.',
+    },
+];
+
 const CREATE_TEMPLATE_ENDPOINT = '/dca/collector';
 const AI_TRIGGER_PATH = 'automate_template';
 const AI_PROGRESS_PATH = 'automate_template/progress';
@@ -105,7 +136,7 @@ const handle_create_scraper = async(
             api_key,
             CREATE_TEMPLATE_ENDPOINT,
             template_body,
-            {timing: opts.timing}
+            {timing: opts.timing, hints: SCRAPER_BODY_HINTS}
         );
         create_spinner.stop();
         if (!template.id)
@@ -129,7 +160,7 @@ const handle_create_scraper = async(
             api_key,
             `/dca/collectors/${collector_id}/${AI_TRIGGER_PATH}`,
             build_ai_request(url, description),
-            {timing: opts.timing}
+            {timing: opts.timing, hints: SCRAPER_BODY_HINTS}
         );
         trigger_spinner.stop();
     } catch(e) {
@@ -148,7 +179,7 @@ const handle_create_scraper = async(
             fetch_once: ()=>get<Ai_progress_response>(
                 api_key,
                 `/dca/collectors/${collector_id}/${AI_PROGRESS_PATH}`,
-                {timing: opts.timing}
+                {timing: opts.timing, hints: SCRAPER_BODY_HINTS}
             ),
             get_status: extract_progress_status,
             running_statuses: [RUNNING_SENTINEL],
@@ -338,7 +369,7 @@ const run_batch = async(
             api_key,
             `${BATCH_TRIGGER_ENDPOINT}?${query}`,
             [build_run_request(url)],
-            {timing: opts.timing}
+            {timing: opts.timing, hints: SCRAPER_BODY_HINTS}
         );
         trigger_spinner.stop();
         if (!trigger.collection_id)
@@ -474,7 +505,7 @@ const handle_run_scraper = async(
             api_key,
             `${TRIGGER_IMMEDIATE_ENDPOINT}?${trigger_query}`,
             build_run_request(url),
-            {timing: opts.timing}
+            {timing: opts.timing, hints: SCRAPER_BODY_HINTS}
         );
         trigger_spinner.stop();
         if (!trigger.response_id)
@@ -592,4 +623,5 @@ export {
     parse_sync_timeout,
     is_realtime_page_limit_error,
     classify_dataset,
+    SCRAPER_BODY_HINTS,
 };
diff --git a/src/utils/client.ts b/src/utils/client.ts
@@ -11,12 +11,35 @@ const ERROR_HINTS: Record<number, string> = {
     429: 'Rate limit exceeded. Wait a moment and try again.',
 };
 
+// Each command can supply its own ordered list of body-pattern overrides
+// via Request_opts.hints. The shared HTTP layer knows nothing about
+// per-command error semantics — it just consults the caller's hints
+// before falling back to the generic ERROR_HINTS map.
+type Body_hint = {pattern: RegExp; hint: string};
+
+const pick_hint = (
+    status: number,
+    body: string,
+    extra_hints?: Body_hint[]
+): string|undefined=>{
+    if (extra_hints)
+    {
+        for (const {pattern, hint} of extra_hints)
+        {
+            if (pattern.test(body))
+                return hint;
+        }
+    }
+    return ERROR_HINTS[status];
+};
+
 type Request_opts = {
     method?: string;
     body?: unknown;
     headers?: Record<string, string>;
     timing?: boolean;
     raw_buffer?: boolean;
+    hints?: Body_hint[];
 };
 
 type Api_error = {
@@ -27,10 +50,14 @@ type Api_error = {
 
 const sleep = (ms: number)=>new Promise(resolve=>setTimeout(resolve, ms));
 
-const format_error = (status: number, detail: string): Api_error=>({
+const format_error = (
+    status: number,
+    detail: string,
+    extra_hints?: Body_hint[]
+): Api_error=>({
     status,
     message: detail,
-    hint: ERROR_HINTS[status],
+    hint: pick_hint(status, detail, extra_hints),
 });
 
 const request = async<T = unknown>(
@@ -93,7 +120,7 @@ const request = async<T = unknown>(
                 if (err_body)
                     detail = err_body;
             } catch(_e) {}
-            const api_err = format_error(res.status, detail);
+            const api_err = format_error(res.status, detail, opts.hints);
             const msg = [
                 `Error: ${api_err.message}`,
                 `  Status: ${api_err.status}`,
@@ -133,5 +160,5 @@ const get = <T = unknown>(
     opts: Omit<Request_opts, 'method'> = {}
 ): Promise<T>=>request<T>(api_key, endpoint, {method: 'GET', ...opts});
 
-export {request, post, get};
-export type {Request_opts, Api_error};
+export {request, post, get, pick_hint, ERROR_HINTS};
+export type {Request_opts, Api_error, Body_hint};