Skip to content
This repository was archived by the owner on Nov 10, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions crewai_tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
MySQLSearchTool,
NL2SQLTool,
OCRTool,
OlostepGoogleSearchTool,
OlostepWebScraperTool,
OxylabsAmazonProductScraperTool,
OxylabsAmazonSearchScraperTool,
OxylabsGoogleSearchScraperTool,
Expand Down
6 changes: 6 additions & 0 deletions crewai_tools/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@
from .mysql_search_tool.mysql_search_tool import MySQLSearchTool
from .nl2sql.nl2sql_tool import NL2SQLTool
from .ocr_tool.ocr_tool import OCRTool
from .olostep_google_search_tool.olostep_google_search_tool import (
OlostepGoogleSearchTool,
)
from .olostep_web_scraper_tool.olostep_web_scraper_tool import (
OlostepWebScraperTool,
)
from .oxylabs_amazon_product_scraper_tool.oxylabs_amazon_product_scraper_tool import (
OxylabsAmazonProductScraperTool,
)
Expand Down
39 changes: 39 additions & 0 deletions crewai_tools/tools/olostep_google_search_tool/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# OlostepGoogleSearchTool

## Description

The `OlostepGoogleSearchTool` allows you to perform a Google search using the Olostep API and receive structured JSON results.

## Installation

- Get an API key from [olostep.com](https://olostep.com) and set it in your environment variables as `OLOSTEP_API_KEY`.
- Install the `requests` package if you don't have it already:

```shell
pip install requests
```

## Example

Here's how to use the `OlostepGoogleSearchTool` to perform a Google search:

```python
from crewai_tools import OlostepGoogleSearchTool

# Initialize the tool
tool = OlostepGoogleSearchTool()

# Perform a search
results = tool.run(search_query="latest news on AI")
print(results)

# Perform a search with different location and language
results_localized = tool.run(search_query="best restaurants near me", location="fr", language="fr")
print(results_localized)
```

## Arguments

- `search_query` (str): The search query for Google.
- `location` (Optional[str]): The country to search from. It must be a two-letter country code (ISO 3166-1 alpha-2). Defaults to `"us"`.
- `language` (Optional[str]): The language for the search. It must be a two-letter language code (ISO 639-1). Defaults to `"en"`.
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import os
import json
from typing import Any, Optional, Type
import requests
from pydantic import BaseModel, Field
from crewai.tools import BaseTool, EnvVar

class OlostepGoogleSearchToolSchema(BaseModel):
"""Input for Olostep Google Search."""
search_query: str = Field(..., description="The search query for Google.")
location: Optional[str] = Field(default="us", description="The country to search from. It must be a two-letter country code. (ISO 3166-1 alpha-2)")
language: Optional[str] = Field(default="en", description="The language to search in. It must be a two-letter language code. (ISO 639-1)")

class OlostepGoogleSearchTool(BaseTool):
name: str = "Olostep Google Search"
description: str = "A tool to perform a Google search using the Olostep API and get structured results."
args_schema: Type[BaseModel] = OlostepGoogleSearchToolSchema

env_vars: list[EnvVar] = [
EnvVar(
name="OLOSTEP_API_KEY",
description="API key for Olostep API.",
required=True,
),
]
package_dependencies: list[str] = ["requests"]

base_url: str = "https://api.olostep.com/v1/scrapes"
api_key: Optional[str] = None

def __init__(self, api_key: Optional[str] = None, **kwargs: Any) -> None:
super().__init__(**kwargs)
self.api_key = api_key or os.environ.get("OLOSTEP_API_KEY")
if not self.api_key:
raise ValueError("OLOSTEP_API_KEY environment variable is required for OlostepGoogleSearchTool.")

def _run(self, search_query: str, location: str = "us", language: str = "en", **_: Any) -> str:
"""Synchronous execution."""
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}

url = f"https://www.google.com/search?q={search_query}&gl={location}&hl={language}"

data = {
"url_to_scrape": url,
"formats": ["json"],
"parser": {"id": "@olostep/google-search"},
}

try:
response = requests.post(self.base_url, headers=headers, json=data, timeout=30)
response.raise_for_status()
result = response.json()
if "result" in result and result["result"].get("json_content"):
return json.loads(result["result"]["json_content"])
else:
return "No JSON content found in the response."

except requests.Timeout:
return "Olostep API request timed out. Please try again later."
except requests.HTTPError as e:
return f"Olostep API request failed with status {e.response.status_code}: {e.response.text}"
except Exception as e:
return f"An unexpected error occurred: {e}"
38 changes: 38 additions & 0 deletions crewai_tools/tools/olostep_web_scraper_tool/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# OlostepWebScraperTool

## Description

The `OlostepWebScraperTool` allows you to scrape web pages using the Olostep API and receive the content in either markdown or HTML format.

## Installation

- Get an API key from [olostep.com](https://olostep.com) and set it in your environment variables as `OLOSTEP_API_KEY`.
- Install the `requests` package if you don't have it already:

```shell
pip install requests
```

## Example

Here's how to use the `OlostepWebScraperTool` to scrape a website:

```python
from crewai_tools import OlostepWebScraperTool

# Initialize the tool
tool = OlostepWebScraperTool()

# Scrape a URL and get the content in markdown (default)
result_md = tool.run(url_to_scrape="https://www.example.com")
print(result_md)

# Scrape a URL and get the content in both HTML and markdown
result_both = tool.run(url_to_scrape="https://www.example.com", formats=["html", "markdown"])
print(result_both)
```

## Arguments

- `url_to_scrape` (str): The URL of the webpage to scrape.
- `formats` (Optional[List[str]]): A list of formats to return. Can be `["markdown"]`, `["html"]`, or `["markdown", "html"]`. Defaults to `["markdown"]`.
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from typing import Any, List, Optional, Type
import os
import requests
from pydantic import BaseModel, Field
from crewai.tools import BaseTool, EnvVar

class OlostepWebScraperToolInput(BaseModel):
"""Input schema for OlostepWebScraperTool."""
url_to_scrape: str = Field(..., description="The URL of the webpage to scrape.")
formats: Optional[List[str]] = Field(default=["markdown"], description="List of formats to return. Can be 'html', 'markdown' or both.")

class OlostepWebScraperTool(BaseTool):
name: str = "Olostep Web Scraper"
description: str = "Scrapes a webpage using Olostep API and returns the content in specified formats."
args_schema: Type[BaseModel] = OlostepWebScraperToolInput

env_vars: List[EnvVar] = [
EnvVar(
name="OLOSTEP_API_KEY",
description="API key for Olostep API.",
required=True,
),
]
package_dependencies: List[str] = ["requests"]

base_url: str = "https://api.olostep.com/v1/scrapes"
api_key: Optional[str] = None

def __init__(self, api_key: Optional[str] = None, **kwargs: Any) -> None:
super().__init__(**kwargs)
self.api_key = api_key or os.environ.get("OLOSTEP_API_KEY")
if not self.api_key:
raise ValueError("OLOSTEP_API_KEY environment variable is required for OlostepWebScraperTool.")

def _run(self, url_to_scrape: str, formats: Optional[List[str]] = None, **_: Any) -> str:
"""Synchronous execution."""
if formats is None:
formats = ["markdown"]

headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
data = {
"url_to_scrape": url_to_scrape,
"formats": formats,
}

try:
response = requests.post(self.base_url, headers=headers, json=data, timeout=30)
response.raise_for_status()
result = response.json()
content = []
if "result" in result:
if "markdown" in formats and result["result"].get("markdown_content"):
content.append(f"Markdown Content:\n{result['result']['markdown_content']}")
if "html" in formats and result["result"].get("html_content"):
content.append(f"HTML Content:\n{result['result']['html_content']}")

if not content:
return "No content found for the specified formats."

return "\n\n".join(content)

except requests.Timeout:
return "Olostep API request timed out. Please try again later."
except requests.HTTPError as e:
return f"Olostep API request failed with status {e.response.status_code}: {e.response.text}"
except Exception as e:
return f"An unexpected error occurred: {e}"
90 changes: 90 additions & 0 deletions tests/tools/test_olostep_google_search_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import os
import json
import pytest
import requests
from unittest.mock import patch, MagicMock
from crewai_tools.tools.olostep_google_search_tool.olostep_google_search_tool import OlostepGoogleSearchTool

@pytest.fixture
def olostep_search_tool():
with patch.dict(os.environ, {"OLOSTEP_API_KEY": "test_api_key"}):
yield OlostepGoogleSearchTool()

def test_tool_initialization_requires_api_key():
with patch.dict(os.environ, {}, clear=True):
with pytest.raises(ValueError, match="OLOSTEP_API_KEY environment variable is required for OlostepGoogleSearchTool."):
OlostepGoogleSearchTool()

@patch('requests.post')
def test_google_search(mock_post, olostep_search_tool):
mock_response = MagicMock()
mock_response.raise_for_status.return_value = None
mock_json_content = {"organic": [{"title": "Test Search Result"}]}
mock_response.json.return_value = {
"result": {
"json_content": json.dumps(mock_json_content)
}
}
mock_post.return_value = mock_response

result = olostep_search_tool._run(search_query="test query")

assert result == mock_json_content
mock_post.assert_called_once()
called_args, called_kwargs = mock_post.call_args
assert "q=test query" in called_kwargs['json']['url_to_scrape']
assert "&gl=us" in called_kwargs['json']['url_to_scrape']
assert "&hl=en" in called_kwargs['json']['url_to_scrape']

@patch('requests.post')
def test_google_search_with_location_and_language(mock_post, olostep_search_tool):
mock_response = MagicMock()
mock_response.raise_for_status.return_value = None
mock_json_content = {"organic": [{"title": "Résultat de recherche test"}]}
mock_response.json.return_value = {
"result": {
"json_content": json.dumps(mock_json_content)
}
}
mock_post.return_value = mock_response

result = olostep_search_tool._run(search_query="requête de test", location="fr", language="fr")

assert result == mock_json_content
mock_post.assert_called_once()
called_args, called_kwargs = mock_post.call_args
assert "q=requête de test" in called_kwargs['json']['url_to_scrape']
assert "&gl=fr" in called_kwargs['json']['url_to_scrape']
assert "&hl=fr" in called_kwargs['json']['url_to_scrape']

@patch('requests.post')
def test_api_timeout(mock_post, olostep_search_tool):
mock_post.side_effect = requests.Timeout
result = olostep_search_tool._run(search_query="test")
assert "Olostep API request timed out." in result

@patch('requests.post')
def test_api_http_error(mock_post, olostep_search_tool):
mock_response = MagicMock()
mock_response.status_code = 500
mock_response.text = "Internal Server Error"
http_error = requests.HTTPError(response=mock_response)
mock_response.raise_for_status.side_effect = http_error
mock_post.return_value = mock_response

result = olostep_search_tool._run(search_query="test")
assert "Olostep API request failed with status 500: Internal Server Error" in result

@patch('requests.post')
def test_no_json_content(mock_post, olostep_search_tool):
mock_response = MagicMock()
mock_response.raise_for_status.return_value = None
mock_response.json.return_value = {
"result": {
"json_content": None
}
}
mock_post.return_value = mock_response

result = olostep_search_tool._run(search_query="test")
assert "No JSON content found in the response." in result
Loading