Skip to content

Commit fd4ad48

Browse files
VinciGit00claude
andcommitted
feat: align CLI with scrapegraph-js v2 PR #11 (016ae8b)
Split compound fetch modes (direct+stealth, js+stealth) into separate --mode (auto|fast|js) and --stealth boolean flag. Add --nationality param to search command. Update SDK dependency to latest PR commit. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent dc766a8 commit fd4ad48

10 files changed

Lines changed: 48 additions & 21 deletions

File tree

README.md

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ Extract structured data from any URL using AI (replaces `smart-scraper`). [docs]
104104
just-scrape extract <url> -p <prompt> # Extract data with AI
105105
just-scrape extract <url> -p <prompt> --schema <json> # Enforce output schema
106106
just-scrape extract <url> -p <prompt> --scrolls <n> # Infinite scroll (0-100)
107-
just-scrape extract <url> -p <prompt> --mode direct+stealth # Anti-bot bypass
107+
just-scrape extract <url> -p <prompt> --mode js --stealth # Anti-bot bypass
108108
just-scrape extract <url> -p <prompt> --cookies <json> --headers <json>
109109
just-scrape extract <url> -p <prompt> --country <iso> # Geo-targeting
110110
```
@@ -120,9 +120,9 @@ just-scrape extract https://news.example.com -p "Get all article headlines and d
120120
--schema '{"type":"object","properties":{"articles":{"type":"array","items":{"type":"object","properties":{"title":{"type":"string"},"date":{"type":"string"}}}}}}' \
121121
--scrolls 5
122122

123-
# Scrape a JS-heavy SPA behind anti-bot protection
123+
# Scrape a JS-heavy SPA with stealth mode
124124
just-scrape extract https://app.example.com/dashboard -p "Extract user stats" \
125-
--mode js+stealth
125+
--mode js --stealth
126126
```
127127

128128
## Search
@@ -139,6 +139,7 @@ just-scrape search <query> --schema <json> # Enforce output s
139139
just-scrape search <query> --location-geo-code <code> # Geo-target search (e.g. 'us', 'de', 'jp-tk')
140140
just-scrape search <query> --time-range <range> # past_hour | past_24_hours | past_week | past_month | past_year
141141
just-scrape search <query> --format <markdown|html> # Result format (default markdown)
142+
just-scrape search <query> --nationality <iso> # 2-letter ISO nationality code
142143
just-scrape search <query> --headers <json>
143144
```
144145

@@ -174,7 +175,7 @@ just-scrape scrape <url> -f json -p <prompt> # Structured JSON via
174175
just-scrape scrape <url> -f markdown,links,images # Multi-format (comma-separated)
175176
just-scrape scrape <url> --html-mode reader # normal (default), reader, or prune
176177
just-scrape scrape <url> --scrolls <n> # Infinite scroll (0-100)
177-
just-scrape scrape <url> -m direct+stealth # Anti-bot bypass
178+
just-scrape scrape <url> -m js --stealth # Anti-bot bypass
178179
just-scrape scrape <url> --country <iso> # Geo-targeting
179180
```
180181

@@ -193,8 +194,8 @@ just-scrape scrape https://example.com -f markdown,links,images
193194
# Structured JSON output with a prompt
194195
just-scrape scrape https://store.example.com -f json -p "Extract product name and price"
195196

196-
# Scrape with anti-bot bypass and geo-targeting
197-
just-scrape scrape https://store.example.com -m direct+stealth --country DE
197+
# Scrape with stealth mode and geo-targeting
198+
just-scrape scrape https://store.example.com --stealth --country DE
198199
```
199200

200201
## Markdownify
@@ -205,7 +206,7 @@ Convert any webpage to clean markdown (convenience wrapper for `scrape --format
205206

206207
```bash
207208
just-scrape markdownify <url> # Convert to markdown
208-
just-scrape markdownify <url> -m direct+stealth # Anti-bot bypass
209+
just-scrape markdownify <url> -m js --stealth # Anti-bot bypass
209210
just-scrape markdownify <url> --headers <json> # Custom headers
210211
```
211212

@@ -216,7 +217,7 @@ just-scrape markdownify <url> --headers <json> # Custom headers
216217
just-scrape markdownify https://blog.example.com/my-article
217218

218219
# Convert a JS-rendered page behind Cloudflare
219-
just-scrape markdownify https://protected.example.com -m js+stealth
220+
just-scrape markdownify https://protected.example.com -m js --stealth
220221

221222
# Pipe markdown to a file
222223
just-scrape markdownify https://docs.example.com/api --json | jq -r '.markdown' > api-docs.md
@@ -235,7 +236,7 @@ just-scrape crawl <url> --max-depth <n> # Crawl depth (default 2)
235236
just-scrape crawl <url> --max-links-per-page <n> # Links per page (default 10)
236237
just-scrape crawl <url> --allow-external # Allow external domains
237238
just-scrape crawl <url> -f html # Page format (default markdown)
238-
just-scrape crawl <url> -m direct+stealth # Anti-bot bypass
239+
just-scrape crawl <url> -m js --stealth # Anti-bot bypass
239240
```
240241

241242
### Examples
@@ -303,7 +304,7 @@ Commands have been renamed to match the v2 API:
303304
| `scrape` | `scrape` | Gains `--format` (markdown, html, screenshot, branding, links, images, summary, json), multi-format via comma, `--html-mode`, `--scrolls`, `--prompt`, `--schema` |
304305
| `crawl` | `crawl` | New options: `--max-depth`, `--max-links-per-page`, `--allow-external`, `--format` |
305306
| `search` | `search` | New options: `--location-geo-code`, `--time-range`, `--format` |
306-
| `--stealth` flag | `--mode direct+stealth` | Fetch mode enum replaces boolean (`auto`, `fast`, `js`, `direct+stealth`, `js+stealth`) |
307+
| `--stealth` flag | `--stealth` | Separate boolean flag; fetch mode is now `auto`, `fast`, or `js` |
307308
| `agentic-scraper` || Removed from API |
308309
| `generate-schema` || Removed from API |
309310
| `sitemap` || Removed from API |

bun.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
"chalk": "^5.4.1",
2929
"citty": "^0.1.6",
3030
"dotenv": "^17.2.4",
31-
"scrapegraph-js": "github:ScrapeGraphAI/scrapegraph-js#c5bf757"
31+
"scrapegraph-js": "github:ScrapeGraphAI/scrapegraph-js#016ae8b"
3232
},
3333
"devDependencies": {
3434
"@biomejs/biome": "^1.9.4",

src/commands/crawl.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ export default defineCommand({
2828
mode: {
2929
type: "string",
3030
alias: "m",
31-
description: "Fetch mode: auto (default), fast, js, direct+stealth, js+stealth",
31+
description: "Fetch mode: auto (default), fast, js",
3232
},
33+
stealth: { type: "boolean", description: "Enable stealth mode" },
3334
json: { type: "boolean", description: "Output raw JSON (pipeable)" },
3435
},
3536
run: async ({ args }) => {
@@ -44,7 +45,10 @@ export default defineCommand({
4445
crawlOptions.maxLinksPerPage = Number(args["max-links-per-page"]);
4546
if (args["allow-external"]) crawlOptions.allowExternal = true;
4647
if (args.format) crawlOptions.format = args.format;
47-
if (args.mode) crawlOptions.fetchConfig = { mode: args.mode };
48+
const fetchConfig: Record<string, unknown> = {};
49+
if (args.mode) fetchConfig.mode = args.mode;
50+
if (args.stealth) fetchConfig.stealth = true;
51+
if (Object.keys(fetchConfig).length > 0) crawlOptions.fetchConfig = fetchConfig;
4852

4953
out.start("Crawling");
5054
const t0 = performance.now();

src/commands/extract.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ export default defineCommand({
2121
},
2222
schema: { type: "string", description: "Output JSON schema (as JSON string)" },
2323
scrolls: { type: "string", description: "Number of infinite scrolls (0-100)" },
24-
mode: { type: "string", description: "Fetch mode: auto (default), fast, js, direct+stealth, js+stealth" },
24+
mode: { type: "string", description: "Fetch mode: auto (default), fast, js" },
25+
stealth: { type: "boolean", description: "Enable stealth mode" },
2526
cookies: { type: "string", description: "Cookies as JSON object string" },
2627
headers: { type: "string", description: "Custom headers as JSON object string" },
2728
country: { type: "string", description: "ISO country code for geo-targeting" },
@@ -35,6 +36,7 @@ export default defineCommand({
3536
const fetchConfig: Record<string, unknown> = {};
3637
if (args.scrolls) fetchConfig.scrolls = Number(args.scrolls);
3738
if (args.mode) fetchConfig.mode = args.mode;
39+
if (args.stealth) fetchConfig.stealth = true;
3840
if (args.cookies) fetchConfig.cookies = JSON.parse(args.cookies);
3941
if (args.headers) fetchConfig.headers = JSON.parse(args.headers);
4042
if (args.country) fetchConfig.country = args.country;

src/commands/history.ts

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,17 @@ export default defineCommand({
6464
const t0 = performance.now();
6565
const r = await sgai.history({ service, page: pg, limit });
6666
const ms = Math.round(performance.now() - t0);
67-
const d = r.data as { data?: Record<string, unknown>[]; requests?: Record<string, unknown>[]; next_key?: string; total?: number };
68-
return { rows: d.data ?? d.requests ?? [], hasMore: !!d.next_key || (d.total != null && pg * limit < d.total), ms };
67+
const d = r.data as {
68+
data?: Record<string, unknown>[];
69+
requests?: Record<string, unknown>[];
70+
next_key?: string;
71+
total?: number;
72+
};
73+
return {
74+
rows: d.data ?? d.requests ?? [],
75+
hasMore: !!d.next_key || (d.total != null && pg * limit < d.total),
76+
ms,
77+
};
6978
};
7079

7180
if (quiet || requestId) {

src/commands/markdownify.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ export default defineCommand({
1313
description: "Website URL to convert",
1414
required: true,
1515
},
16-
mode: { type: "string", alias: "m", description: "Fetch mode: auto (default), fast, js, direct+stealth, js+stealth" },
16+
mode: { type: "string", alias: "m", description: "Fetch mode: auto (default), fast, js" },
17+
stealth: { type: "boolean", description: "Enable stealth mode" },
1718
headers: { type: "string", description: "Custom headers as JSON object string" },
1819
json: { type: "boolean", description: "Output raw JSON (pipeable)" },
1920
},
@@ -24,6 +25,7 @@ export default defineCommand({
2425

2526
const fetchConfig: Record<string, unknown> = {};
2627
if (args.mode) fetchConfig.mode = args.mode;
28+
if (args.stealth) fetchConfig.stealth = true;
2729
if (args.headers) fetchConfig.headers = JSON.parse(args.headers);
2830

2931
const scrapeOptions: Record<string, unknown> = { format: "markdown" };

src/commands/scrape.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,9 @@ export default defineCommand({
4343
mode: {
4444
type: "string",
4545
alias: "m",
46-
description: "Fetch mode: auto (default), fast, js, direct+stealth, js+stealth",
46+
description: "Fetch mode: auto (default), fast, js",
4747
},
48+
stealth: { type: "boolean", description: "Enable stealth mode" },
4849
"html-mode": {
4950
type: "string",
5051
description: "HTML/markdown extraction mode: normal (default), reader, prune",
@@ -60,6 +61,7 @@ export default defineCommand({
6061

6162
const fetchConfig: Record<string, unknown> = {};
6263
if (args.mode) fetchConfig.mode = args.mode;
64+
if (args.stealth) fetchConfig.stealth = true;
6365
if (args.scrolls) fetchConfig.scrolls = Number(args.scrolls);
6466
if (args.country) fetchConfig.country = args.country;
6567

src/commands/search.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ export default defineCommand({
3636
type: "string",
3737
description: "Result format: markdown (default) or html",
3838
},
39+
nationality: {
40+
type: "string",
41+
description: "2-letter ISO nationality code for search personalization",
42+
},
3943
headers: { type: "string", description: "Custom headers as JSON object string" },
4044
json: { type: "boolean", description: "Output raw JSON (pipeable)" },
4145
},
@@ -51,6 +55,7 @@ export default defineCommand({
5155
if (args["location-geo-code"]) searchOptions.locationGeoCode = args["location-geo-code"];
5256
if (args["time-range"]) searchOptions.timeRange = args["time-range"];
5357
if (args.format) searchOptions.format = args.format;
58+
if (args.nationality) searchOptions.nationality = args.nationality;
5459
if (args.headers) searchOptions.fetchConfig = { headers: JSON.parse(args.headers) };
5560

5661
out.start("Searching");

src/utils/banner.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@ export function showBanner() {
3131
console.log(chalk.hex(BANNER_COLOR)(TAGLINE));
3232
console.log(chalk.hex(BANNER_COLOR)(`v${getVersion()}`));
3333
if (process.env.SGAI_API_URL || process.env.JUST_SCRAPE_API_URL) {
34-
console.log(chalk.yellow(`→ Custom API: ${process.env.SGAI_API_URL || process.env.JUST_SCRAPE_API_URL}`));
34+
console.log(
35+
chalk.yellow(`→ Custom API: ${process.env.SGAI_API_URL || process.env.JUST_SCRAPE_API_URL}`),
36+
);
3537
}
3638
console.log();
3739
}

0 commit comments

Comments
 (0)