Official JavaScript/TypeScript SDK for the ScrapeGraph AI API - Smart web scraping powered by AI.
- ✨ Smart web scraping with AI
- 🔄 Fully asynchronous design
- 🔍 Detailed error handling
- ⚡ Automatic retries and logging
- 🔐 Secure API authentication
Install the package using npm or yarn:
# Using npm
npm i scrapegraph-js
# Using yarn
yarn add scrapegraph-jsNote: Store your API keys securely in environment variables. Use
.envfiles and libraries likedotenvto load them into your app.
import { smartScraper } from 'scrapegraph-js';
import 'dotenv/config';
// Initialize variables
const apiKey = process.env.SGAI_APIKEY; // Set your API key as an environment variable
const websiteUrl = 'https://example.com';
const prompt = 'What does the company do?';
(async () => {
try {
const response = await smartScraper(apiKey, websiteUrl, prompt);
console.log(response.result);
} catch (error) {
console.error('Error:', error);
}
})();import { smartScraper } from 'scrapegraph-js';
const apiKey = 'your-api-key';
const url = 'https://example.com';
const prompt = 'Extract the main heading and description.';
(async () => {
try {
const response = await smartScraper(apiKey, url, prompt);
console.log(response.result);
} catch (error) {
console.error('Error:', error);
}
})();Note
To use this feature, it is necessary to employ the Zod package for schema creation.
Here is a real-world example:
import { smartScraper } from 'scrapegraph-js';
import { z } from 'zod';
const apiKey = 'your-api-key';
const url = 'https://scrapegraphai.com/';
const prompt = 'What does the company do? and ';
const schema = z.object({
title: z.string().describe('The title of the webpage'),
description: z.string().describe('The description of the webpage'),
summary: z.string().describe('A brief summary of the webpage'),
});
(async () => {
try {
const response = await smartScraper(apiKey, url, prompt, schema);
console.log(response.result);
} catch (error) {
console.error('Error:', error);
}
})();For websites that load content dynamically through infinite scrolling (like social media feeds), you can use the numberOfScrolls parameter:
import { smartScraper } from 'scrapegraph-js';
const apiKey = 'your-api-key';
const url = 'https://example.com/infinite-scroll-page';
const prompt = 'Extract all the posts from the feed';
const numberOfScrolls = 10; // Will scroll 10 times to load more content
(async () => {
try {
const response = await smartScraper(apiKey, url, prompt, null, numberOfScrolls);
console.log('Extracted data from scrolled page:', response);
} catch (error) {
console.error('Error:', error);
}
})();The numberOfScrolls parameter accepts values between 0 and 100, allowing you to control how many times the page should be scrolled before extraction.
Search and extract information from multiple web sources using AI.
import { searchScraper } from 'scrapegraph-js';
const apiKey = 'your-api-key';
const prompt = 'What is the latest version of Python and what are its main features?';
(async () => {
try {
const response = await searchScraper(apiKey, prompt);
console.log(response.result);
} catch (error) {
console.error('Error:', error);
}
})();Start a crawl job to extract structured data from a website and its linked pages, using a custom schema.
import { crawl, getCrawlRequest } from 'scrapegraph-js';
import 'dotenv/config';
const apiKey = process.env.SGAI_APIKEY;
const url = 'https://scrapegraphai.com/';
const prompt = 'What does the company do? and I need text content from there privacy and terms';
const schema = {
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "ScrapeGraphAI Website Content",
"type": "object",
"properties": {
"company": {
"type": "object",
"properties": {
"name": { "type": "string" },
"description": { "type": "string" },
"features": { "type": "array", "items": { "type": "string" } },
"contact_email": { "type": "string", "format": "email" },
"social_links": {
"type": "object",
"properties": {
"github": { "type": "string", "format": "uri" },
"linkedin": { "type": "string", "format": "uri" },
"twitter": { "type": "string", "format": "uri" }
},
"additionalProperties": false
}
},
"required": ["name", "description"]
},
"services": {
"type": "array",
"items": {
"type": "object",
"properties": {
"service_name": { "type": "string" },
"description": { "type": "string" },
"features": { "type": "array", "items": { "type": "string" } }
},
"required": ["service_name", "description"]
}
},
"legal": {
"type": "object",
"properties": {
"privacy_policy": { "type": "string" },
"terms_of_service": { "type": "string" }
},
"required": ["privacy_policy", "terms_of_service"]
}
},
"required": ["company", "services", "legal"]
};
(async () => {
try {
// Start the crawl job
const crawlResponse = await crawl(apiKey, url, prompt, schema, {
cacheWebsite: true,
depth: 2,
maxPages: 2,
sameDomainOnly: true,
batchSize: 1,
});
console.log('Crawl job started. Response:', crawlResponse);
// If the crawl is asynchronous and returns an ID, fetch the result
const crawlId = crawlResponse.id || crawlResponse.task_id;
if (crawlId) {
for (let i = 0; i < 10; i++) {
await new Promise((resolve) => setTimeout(resolve, 5000));
const result = await getCrawlRequest(apiKey, crawlId);
if (result.status === 'success' && result.result) {
console.log('Crawl completed. Result:', result.result.llm_result);
break;
} else if (result.status === 'failed') {
console.log('Crawl failed. Result:', result);
break;
} else {
console.log(`Status: ${result.status}, waiting...`);
}
}
} else {
console.log('No crawl ID found in response. Synchronous result:', crawlResponse);
}
} catch (error) {
console.error('Error occurred:', error);
}
})();You can use a plain JSON schema or a Zod schema for the schema parameter. The crawl API supports options for crawl depth, max pages, domain restriction, and batch size.
Extract structured data from local HTML content
import { localScraper } from 'scrapegraph-js';
const apiKey = 'your_api_key';
const prompt = 'What does the company do?';
const websiteHtml = `<html>
<body>
<h1>Company Name</h1>
<p>We are a technology company focused on AI solutions.</p>
<div class="contact">
<p>Email: contact@example.com</p>
</div>
</body>
</html>`;
(async () => {
try {
const response = await localScraper(apiKey, websiteHtml, prompt);
console.log(response);
} catch (error) {
console.error(error);
}
})();Converts a webpage into clean, well-structured markdown format.
import { smartScraper } from 'scrapegraph-js';
const apiKey = 'your_api_key';
const url = 'https://scrapegraphai.com/';
(async () => {
try {
const response = await markdownify(apiKey, url);
console.log(response);
} catch (error) {
console.error(error);
}
})();import { getCredits } from 'scrapegraph-js';
const apiKey = 'your-api-key';
(async () => {
try {
const credits = await getCredits(apiKey);
console.log('Available credits:', credits);
} catch (error) {
console.error('Error fetching credits:', error);
}
})();import { sendFeedback } from 'scrapegraph-js';
const apiKey = 'your-api-key';
const requestId = '16a63a80-c87f-4cde-b005-e6c3ecda278b';
const rating = 5;
const feedbackText = 'This is a test feedback message.';
(async () => {
try {
const response = await sendFeedback(apiKey, requestId, rating, feedbackText);
console.log('Feedback response:', response);
} catch (error) {
console.error('Error sending feedback:', error);
}
})();For detailed documentation, visit docs.scrapegraphai.com
-
Clone the repository:
git clone https://github.com/ScrapeGraphAI/scrapegraph-sdk.git cd scrapegraph-sdk/scrapegraph-js -
Install dependencies:
npm install
-
Run linting and testing:
npm run lint npm test
# Run all tests
npm test
# Run tests with coverage
npm run test:coverageThis project is licensed under the MIT License - see the LICENSE file for details.
Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
- Fork the repository
- Create your feature branch (
git checkout -b feature/AmazingFeature) - Commit your changes (
git commit -m 'Add some AmazingFeature') - Push to the branch (
git push origin feature/AmazingFeature) - Open a Pull Request
- 📧 Email: support@scrapegraphai.com
- 💻 GitHub Issues: Create an issue
- 🌟 Feature Requests: Request a feature
Made with ❤️ by ScrapeGraph AI
