Skip to content

Commit ca9d0b1

Browse files
VinciGit00claude
andcommitted
feat!: migrate CLI to scrapegraph-js v2 API
Align the CLI with ScrapeGraphAI/scrapegraph-js#11 (v2 SDK migration): - Rename smart-scraper → extract, search-scraper → search - Remove commands dropped from the API: agentic-scraper, generate-schema, sitemap, validate - Add client factory (src/lib/client.ts) using the new scrapegraphai({ apiKey }) pattern - Update scrape command with --format flag (markdown, html, screenshot, branding) - Update crawl to use crawl.start/status polling lifecycle - Update history to use v2 service names and parameters - All commands now use try/catch (v2 throws on error) and self-timed elapsed BREAKING CHANGE: CLI commands have been renamed and removed to match the v2 API surface. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 2dc4981 commit ca9d0b1

19 files changed

+407
-588
lines changed

README.md

Lines changed: 101 additions & 180 deletions
Large diffs are not rendered by default.

bun.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "just-scrape",
3-
"version": "0.2.1",
3+
"version": "0.3.0",
44
"description": "ScrapeGraph AI CLI tool",
55
"type": "module",
66
"main": "dist/cli.mjs",
@@ -28,7 +28,7 @@
2828
"chalk": "^5.4.1",
2929
"citty": "^0.1.6",
3030
"dotenv": "^17.2.4",
31-
"scrapegraph-js": "^1.0.0"
31+
"scrapegraph-js": "github:ScrapeGraphAI/scrapegraph-js#feat/sdk-v2-migration"
3232
},
3333
"devDependencies": {
3434
"@biomejs/biome": "^1.9.4",

src/cli.ts

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,13 @@ const main = defineCommand({
1212
description: "ScrapeGraph AI CLI tool",
1313
},
1414
subCommands: {
15-
"smart-scraper": () => import("./commands/smart-scraper.js").then((m) => m.default),
16-
"search-scraper": () => import("./commands/search-scraper.js").then((m) => m.default),
15+
extract: () => import("./commands/extract.js").then((m) => m.default),
16+
search: () => import("./commands/search.js").then((m) => m.default),
17+
scrape: () => import("./commands/scrape.js").then((m) => m.default),
1718
markdownify: () => import("./commands/markdownify.js").then((m) => m.default),
1819
crawl: () => import("./commands/crawl.js").then((m) => m.default),
19-
sitemap: () => import("./commands/sitemap.js").then((m) => m.default),
20-
scrape: () => import("./commands/scrape.js").then((m) => m.default),
21-
"agentic-scraper": () => import("./commands/agentic-scraper.js").then((m) => m.default),
22-
"generate-schema": () => import("./commands/generate-schema.js").then((m) => m.default),
2320
history: () => import("./commands/history.js").then((m) => m.default),
2421
credits: () => import("./commands/credits.js").then((m) => m.default),
25-
validate: () => import("./commands/validate.js").then((m) => m.default),
2622
},
2723
});
2824

src/commands/agentic-scraper.ts

Lines changed: 0 additions & 51 deletions
This file was deleted.

src/commands/crawl.ts

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import { defineCommand } from "citty";
2-
import * as scrapegraphai from "scrapegraph-js";
3-
import { resolveApiKey } from "../lib/folders.js";
2+
import { createClient } from "../lib/client.js";
43
import * as log from "../lib/log.js";
54

5+
const POLL_INTERVAL_MS = 3000;
6+
67
export default defineCommand({
78
meta: {
89
name: "crawl",
@@ -14,49 +15,54 @@ export default defineCommand({
1415
description: "Starting URL to crawl",
1516
required: true,
1617
},
17-
prompt: {
18-
type: "string",
19-
alias: "p",
20-
description: "Extraction prompt (required when extraction mode is on)",
21-
},
22-
"no-extraction": {
23-
type: "boolean",
24-
description: "Return markdown only (2 credits/page instead of 10)",
25-
},
26-
"max-pages": { type: "string", description: "Maximum pages to crawl (default 10)" },
27-
depth: { type: "string", description: "Crawl depth (default 1)" },
28-
schema: { type: "string", description: "Output JSON schema (as JSON string)" },
29-
rules: { type: "string", description: "Crawl rules as JSON object string" },
30-
"no-sitemap": { type: "boolean", description: "Disable sitemap-based URL discovery" },
18+
"max-pages": { type: "string", description: "Maximum pages to crawl (default 50)" },
19+
"max-depth": { type: "string", description: "Crawl depth (default 2)" },
20+
"max-links-per-page": { type: "string", description: "Max links per page (default 10)" },
21+
"allow-external": { type: "boolean", description: "Allow crawling external domains" },
3122
stealth: { type: "boolean", description: "Bypass bot detection (+4 credits)" },
3223
json: { type: "boolean", description: "Output raw JSON (pipeable)" },
3324
},
3425
run: async ({ args }) => {
3526
const out = log.create(!!args.json);
36-
out.docs("https://docs.scrapegraphai.com/services/smartcrawler");
37-
const key = await resolveApiKey(!!args.json);
27+
out.docs("https://docs.scrapegraphai.com/api-reference/crawl");
28+
const sgai = await createClient(!!args.json);
3829

39-
const base: Record<string, unknown> = { url: args.url };
40-
if (args["max-pages"]) base.max_pages = Number(args["max-pages"]);
41-
if (args.depth) base.depth = Number(args.depth);
42-
if (args.rules) base.rules = JSON.parse(args.rules);
43-
if (args["no-sitemap"]) base.sitemap = false;
44-
if (args.stealth) base.stealth = true;
30+
const crawlOptions: Record<string, unknown> = {};
31+
if (args["max-pages"]) crawlOptions.maxPages = Number(args["max-pages"]);
32+
if (args["max-depth"]) crawlOptions.maxDepth = Number(args["max-depth"]);
33+
if (args["max-links-per-page"])
34+
crawlOptions.maxLinksPerPage = Number(args["max-links-per-page"]);
35+
if (args["allow-external"]) crawlOptions.allowExternal = true;
36+
if (args.stealth) crawlOptions.fetchConfig = { stealth: true };
4537

46-
if (args["no-extraction"]) {
47-
base.extraction_mode = false;
48-
} else {
49-
if (args.prompt) base.prompt = args.prompt;
50-
if (args.schema) base.schema = JSON.parse(args.schema);
51-
}
38+
out.start("Crawling");
39+
const t0 = performance.now();
40+
try {
41+
const job = await sgai.crawl.start(args.url, crawlOptions as any);
42+
const jobId = (job.data as { id: string }).id;
5243

53-
const params = base as scrapegraphai.CrawlParams;
44+
if (!jobId) {
45+
out.stop(Math.round(performance.now() - t0));
46+
out.result(job.data);
47+
return;
48+
}
5449

55-
out.start("Crawling");
56-
const result = await scrapegraphai.crawl(key, params, out.poll);
57-
out.stop(result.elapsedMs);
50+
// Poll until the crawl completes
51+
while (true) {
52+
await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
53+
const status = await sgai.crawl.status(jobId);
54+
const statusData = status.data as { status: string; [key: string]: unknown };
55+
out.poll(statusData.status);
5856

59-
if (result.data) out.result(result.data);
60-
else out.error(result.error);
57+
if (statusData.status === "completed" || statusData.status === "failed" || statusData.status === "cancelled") {
58+
out.stop(Math.round(performance.now() - t0));
59+
out.result(status.data);
60+
return;
61+
}
62+
}
63+
} catch (err) {
64+
out.stop(Math.round(performance.now() - t0));
65+
out.error(err instanceof Error ? err.message : String(err));
66+
}
6167
},
6268
});

src/commands/credits.ts

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import { defineCommand } from "citty";
2-
import * as scrapegraphai from "scrapegraph-js";
3-
import { resolveApiKey } from "../lib/folders.js";
2+
import { createClient } from "../lib/client.js";
43
import * as log from "../lib/log.js";
54

65
export default defineCommand({
@@ -13,13 +12,17 @@ export default defineCommand({
1312
},
1413
run: async ({ args }) => {
1514
const out = log.create(!!args.json);
16-
const key = await resolveApiKey(!!args.json);
15+
const sgai = await createClient(!!args.json);
1716

1817
out.start("Fetching credits");
19-
const result = await scrapegraphai.getCredits(key);
20-
out.stop(result.elapsedMs);
21-
22-
if (result.data) out.result(result.data);
23-
else out.error(result.error);
18+
const t0 = performance.now();
19+
try {
20+
const result = await sgai.credits();
21+
out.stop(Math.round(performance.now() - t0));
22+
out.result(result.data);
23+
} catch (err) {
24+
out.stop(Math.round(performance.now() - t0));
25+
out.error(err instanceof Error ? err.message : String(err));
26+
}
2427
},
2528
});

src/commands/extract.ts

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import { defineCommand } from "citty";
2+
import { createClient } from "../lib/client.js";
3+
import * as log from "../lib/log.js";
4+
5+
export default defineCommand({
6+
meta: {
7+
name: "extract",
8+
description: "Extract structured data from a URL using AI",
9+
},
10+
args: {
11+
url: {
12+
type: "positional",
13+
description: "Website URL to scrape",
14+
required: true,
15+
},
16+
prompt: {
17+
type: "string",
18+
alias: "p",
19+
description: "Extraction prompt",
20+
required: true,
21+
},
22+
schema: { type: "string", description: "Output JSON schema (as JSON string)" },
23+
scrolls: { type: "string", description: "Number of infinite scrolls (0-100)" },
24+
stealth: { type: "boolean", description: "Bypass bot detection (+4 credits)" },
25+
cookies: { type: "string", description: "Cookies as JSON object string" },
26+
headers: { type: "string", description: "Custom headers as JSON object string" },
27+
country: { type: "string", description: "ISO country code for geo-targeting" },
28+
json: { type: "boolean", description: "Output raw JSON (pipeable)" },
29+
},
30+
run: async ({ args }) => {
31+
const out = log.create(!!args.json);
32+
out.docs("https://docs.scrapegraphai.com/api-reference/extract");
33+
const sgai = await createClient(!!args.json);
34+
35+
const fetchConfig: Record<string, unknown> = {};
36+
if (args.scrolls) fetchConfig.scrolls = Number(args.scrolls);
37+
if (args.stealth) fetchConfig.stealth = true;
38+
if (args.cookies) fetchConfig.cookies = JSON.parse(args.cookies);
39+
if (args.headers) fetchConfig.headers = JSON.parse(args.headers);
40+
if (args.country) fetchConfig.country = args.country;
41+
42+
const extractOptions: Record<string, unknown> = { prompt: args.prompt };
43+
if (args.schema) extractOptions.schema = JSON.parse(args.schema);
44+
if (Object.keys(fetchConfig).length > 0) extractOptions.fetchConfig = fetchConfig;
45+
46+
out.start("Extracting");
47+
const t0 = performance.now();
48+
try {
49+
const result = await sgai.extract(args.url, extractOptions as any);
50+
out.stop(Math.round(performance.now() - t0));
51+
out.result(result.data);
52+
} catch (err) {
53+
out.stop(Math.round(performance.now() - t0));
54+
out.error(err instanceof Error ? err.message : String(err));
55+
}
56+
},
57+
});

src/commands/generate-schema.ts

Lines changed: 0 additions & 37 deletions
This file was deleted.

0 commit comments

Comments
 (0)