-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathexamples.ts
More file actions
121 lines (107 loc) · 3.58 KB
/
examples.ts
File metadata and controls
121 lines (107 loc) · 3.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import { LLMCrawl } from "./src/index";
// Initialize the client
const client = new LLMCrawl({
apiKey: "your-api-key-here",
});
async function examples() {
try {
// Example 1: Simple scraping
console.log("=== Simple Scraping ===");
const scrapeResult = await client.scrape("https://example.com", {
formats: ["markdown", "html"],
});
if (scrapeResult.success) {
console.log("Scraping successful!");
console.log("Markdown length:", scrapeResult.data.markdown?.length);
} else {
console.error("Scraping failed:", scrapeResult.error);
}
// Example 2: AI-powered extraction
console.log("\n=== AI-Powered Extraction ===");
const extractResult = await client.scrape(
"https://example-store.com/product",
{
formats: ["markdown"],
extract: {
mode: "llm" as const,
systemPrompt:
"Extract product information from this e-commerce page.",
schema: {
type: "object",
properties: {
title: { type: "string" },
price: { type: "number" },
description: { type: "string" },
inStock: { type: "boolean" },
},
required: ["title", "price"],
},
},
}
);
if (extractResult.success && extractResult.data.extract) {
console.log("Extraction successful!");
const productData = JSON.parse(extractResult.data.extract);
console.log("Product data:", productData);
}
// Example 3: Website crawling
console.log("\n=== Website Crawling ===");
const crawlResult = await client.crawl("https://docs.example.com", {
limit: 50,
maxDepth: 3,
includePaths: ["/docs/*"],
excludePaths: ["/docs/internal/*"],
scrapeOptions: {
formats: ["markdown"],
waitFor: 0,
extract: {
mode: "llm" as const,
systemPrompt: "Extract documentation structure from this page.",
schema: {
type: "object",
properties: {
title: { type: "string" },
section: { type: "string" },
content: { type: "string" },
},
},
},
},
});
if (crawlResult.success) {
console.log("Crawl started with ID:", crawlResult.id);
// Poll for status
let status = await client.getCrawlStatus(crawlResult.id);
console.log("Initial status:", status.success ? status.status : "Error");
// In a real application, you'd poll periodically until completion
while (status.success && status.status === "scraping") {
console.log(`Progress: ${status.completed}/${status.total} pages`);
await new Promise((resolve) => setTimeout(resolve, 5000));
status = await client.getCrawlStatus(crawlResult.id);
}
if (status.success && status.status === "completed") {
console.log("Crawl completed!");
console.log("Total pages scraped:", status.data.length);
}
}
// Example 4: Site mapping
console.log("\n=== Site Mapping ===");
const mapResult = await client.map("https://example.com", {
limit: 100,
includeSubdomains: false,
search: "blog",
});
if (mapResult.success) {
console.log("Site mapping successful!");
console.log("Found links:", mapResult.links.length);
console.log("Sample links:", mapResult.links.slice(0, 5));
}
} catch (error) {
console.error("Example error:", error);
}
}
// Run examples if this file is executed directly
if (require.main === module) {
examples();
}
export { examples };