diff --git a/crewai_tools/__init__.py b/crewai_tools/__init__.py index f4c03ba0..340a88ec 100644 --- a/crewai_tools/__init__.py +++ b/crewai_tools/__init__.py @@ -52,6 +52,8 @@ MySQLSearchTool, NL2SQLTool, OCRTool, + OlostepGoogleSearchTool, + OlostepWebScraperTool, OxylabsAmazonProductScraperTool, OxylabsAmazonSearchScraperTool, OxylabsGoogleSearchScraperTool, diff --git a/crewai_tools/tools/__init__.py b/crewai_tools/tools/__init__.py index bf1a166d..5a4b1872 100644 --- a/crewai_tools/tools/__init__.py +++ b/crewai_tools/tools/__init__.py @@ -57,6 +57,12 @@ from .mysql_search_tool.mysql_search_tool import MySQLSearchTool from .nl2sql.nl2sql_tool import NL2SQLTool from .ocr_tool.ocr_tool import OCRTool +from .olostep_google_search_tool.olostep_google_search_tool import ( + OlostepGoogleSearchTool, +) +from .olostep_web_scraper_tool.olostep_web_scraper_tool import ( + OlostepWebScraperTool, +) from .oxylabs_amazon_product_scraper_tool.oxylabs_amazon_product_scraper_tool import ( OxylabsAmazonProductScraperTool, ) diff --git a/crewai_tools/tools/olostep_google_search_tool/README.md b/crewai_tools/tools/olostep_google_search_tool/README.md new file mode 100644 index 00000000..442eddec --- /dev/null +++ b/crewai_tools/tools/olostep_google_search_tool/README.md @@ -0,0 +1,39 @@ +# OlostepGoogleSearchTool + +## Description + +The `OlostepGoogleSearchTool` allows you to perform a Google search using the Olostep API and receive structured JSON results. + +## Installation + +- Get an API key from [olostep.com](https://olostep.com) and set it in your environment variables as `OLOSTEP_API_KEY`. +- Install the `requests` package if you don't have it already: + +```shell +pip install requests +``` + +## Example + +Here's how to use the `OlostepGoogleSearchTool` to perform a Google search: + +```python +from crewai_tools import OlostepGoogleSearchTool + +# Initialize the tool +tool = OlostepGoogleSearchTool() + +# Perform a search +results = tool.run(search_query="latest news on AI") +print(results) + +# Perform a search with different location and language +results_localized = tool.run(search_query="best restaurants near me", location="fr", language="fr") +print(results_localized) +``` + +## Arguments + +- `search_query` (str): The search query for Google. +- `location` (Optional[str]): The country to search from. It must be a two-letter country code (ISO 3166-1 alpha-2). Defaults to `"us"`. +- `language` (Optional[str]): The language for the search. It must be a two-letter language code (ISO 639-1). Defaults to `"en"`. diff --git a/crewai_tools/tools/olostep_google_search_tool/olostep_google_search_tool.py b/crewai_tools/tools/olostep_google_search_tool/olostep_google_search_tool.py new file mode 100644 index 00000000..2398ed5d --- /dev/null +++ b/crewai_tools/tools/olostep_google_search_tool/olostep_google_search_tool.py @@ -0,0 +1,66 @@ +import os +import json +from typing import Any, Optional, Type +import requests +from pydantic import BaseModel, Field +from crewai.tools import BaseTool, EnvVar + +class OlostepGoogleSearchToolSchema(BaseModel): + """Input for Olostep Google Search.""" + search_query: str = Field(..., description="The search query for Google.") + location: Optional[str] = Field(default="us", description="The country to search from. It must be a two-letter country code. (ISO 3166-1 alpha-2)") + language: Optional[str] = Field(default="en", description="The language to search in. It must be a two-letter language code. (ISO 639-1)") + +class OlostepGoogleSearchTool(BaseTool): + name: str = "Olostep Google Search" + description: str = "A tool to perform a Google search using the Olostep API and get structured results." + args_schema: Type[BaseModel] = OlostepGoogleSearchToolSchema + + env_vars: list[EnvVar] = [ + EnvVar( + name="OLOSTEP_API_KEY", + description="API key for Olostep API.", + required=True, + ), + ] + package_dependencies: list[str] = ["requests"] + + base_url: str = "https://api.olostep.com/v1/scrapes" + api_key: Optional[str] = None + + def __init__(self, api_key: Optional[str] = None, **kwargs: Any) -> None: + super().__init__(**kwargs) + self.api_key = api_key or os.environ.get("OLOSTEP_API_KEY") + if not self.api_key: + raise ValueError("OLOSTEP_API_KEY environment variable is required for OlostepGoogleSearchTool.") + + def _run(self, search_query: str, location: str = "us", language: str = "en", **_: Any) -> str: + """Synchronous execution.""" + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + } + + url = f"https://www.google.com/search?q={search_query}&gl={location}&hl={language}" + + data = { + "url_to_scrape": url, + "formats": ["json"], + "parser": {"id": "@olostep/google-search"}, + } + + try: + response = requests.post(self.base_url, headers=headers, json=data, timeout=30) + response.raise_for_status() + result = response.json() + if "result" in result and result["result"].get("json_content"): + return json.loads(result["result"]["json_content"]) + else: + return "No JSON content found in the response." + + except requests.Timeout: + return "Olostep API request timed out. Please try again later." + except requests.HTTPError as e: + return f"Olostep API request failed with status {e.response.status_code}: {e.response.text}" + except Exception as e: + return f"An unexpected error occurred: {e}" diff --git a/crewai_tools/tools/olostep_web_scraper_tool/README.md b/crewai_tools/tools/olostep_web_scraper_tool/README.md new file mode 100644 index 00000000..1712fed2 --- /dev/null +++ b/crewai_tools/tools/olostep_web_scraper_tool/README.md @@ -0,0 +1,38 @@ +# OlostepWebScraperTool + +## Description + +The `OlostepWebScraperTool` allows you to scrape web pages using the Olostep API and receive the content in either markdown or HTML format. + +## Installation + +- Get an API key from [olostep.com](https://olostep.com) and set it in your environment variables as `OLOSTEP_API_KEY`. +- Install the `requests` package if you don't have it already: + +```shell +pip install requests +``` + +## Example + +Here's how to use the `OlostepWebScraperTool` to scrape a website: + +```python +from crewai_tools import OlostepWebScraperTool + +# Initialize the tool +tool = OlostepWebScraperTool() + +# Scrape a URL and get the content in markdown (default) +result_md = tool.run(url_to_scrape="https://www.example.com") +print(result_md) + +# Scrape a URL and get the content in both HTML and markdown +result_both = tool.run(url_to_scrape="https://www.example.com", formats=["html", "markdown"]) +print(result_both) +``` + +## Arguments + +- `url_to_scrape` (str): The URL of the webpage to scrape. +- `formats` (Optional[List[str]]): A list of formats to return. Can be `["markdown"]`, `["html"]`, or `["markdown", "html"]`. Defaults to `["markdown"]`. diff --git a/crewai_tools/tools/olostep_web_scraper_tool/olostep_web_scraper_tool.py b/crewai_tools/tools/olostep_web_scraper_tool/olostep_web_scraper_tool.py new file mode 100644 index 00000000..dce4bd26 --- /dev/null +++ b/crewai_tools/tools/olostep_web_scraper_tool/olostep_web_scraper_tool.py @@ -0,0 +1,70 @@ +from typing import Any, List, Optional, Type +import os +import requests +from pydantic import BaseModel, Field +from crewai.tools import BaseTool, EnvVar + +class OlostepWebScraperToolInput(BaseModel): + """Input schema for OlostepWebScraperTool.""" + url_to_scrape: str = Field(..., description="The URL of the webpage to scrape.") + formats: Optional[List[str]] = Field(default=["markdown"], description="List of formats to return. Can be 'html', 'markdown' or both.") + +class OlostepWebScraperTool(BaseTool): + name: str = "Olostep Web Scraper" + description: str = "Scrapes a webpage using Olostep API and returns the content in specified formats." + args_schema: Type[BaseModel] = OlostepWebScraperToolInput + + env_vars: List[EnvVar] = [ + EnvVar( + name="OLOSTEP_API_KEY", + description="API key for Olostep API.", + required=True, + ), + ] + package_dependencies: List[str] = ["requests"] + + base_url: str = "https://api.olostep.com/v1/scrapes" + api_key: Optional[str] = None + + def __init__(self, api_key: Optional[str] = None, **kwargs: Any) -> None: + super().__init__(**kwargs) + self.api_key = api_key or os.environ.get("OLOSTEP_API_KEY") + if not self.api_key: + raise ValueError("OLOSTEP_API_KEY environment variable is required for OlostepWebScraperTool.") + + def _run(self, url_to_scrape: str, formats: Optional[List[str]] = None, **_: Any) -> str: + """Synchronous execution.""" + if formats is None: + formats = ["markdown"] + + headers = { + "Authorization": f"Bearer {self.api_key}", + "Content-Type": "application/json", + } + data = { + "url_to_scrape": url_to_scrape, + "formats": formats, + } + + try: + response = requests.post(self.base_url, headers=headers, json=data, timeout=30) + response.raise_for_status() + result = response.json() + content = [] + if "result" in result: + if "markdown" in formats and result["result"].get("markdown_content"): + content.append(f"Markdown Content:\n{result['result']['markdown_content']}") + if "html" in formats and result["result"].get("html_content"): + content.append(f"HTML Content:\n{result['result']['html_content']}") + + if not content: + return "No content found for the specified formats." + + return "\n\n".join(content) + + except requests.Timeout: + return "Olostep API request timed out. Please try again later." + except requests.HTTPError as e: + return f"Olostep API request failed with status {e.response.status_code}: {e.response.text}" + except Exception as e: + return f"An unexpected error occurred: {e}" diff --git a/tests/tools/test_olostep_google_search_tool.py b/tests/tools/test_olostep_google_search_tool.py new file mode 100644 index 00000000..a1ee6966 --- /dev/null +++ b/tests/tools/test_olostep_google_search_tool.py @@ -0,0 +1,90 @@ +import os +import json +import pytest +import requests +from unittest.mock import patch, MagicMock +from crewai_tools.tools.olostep_google_search_tool.olostep_google_search_tool import OlostepGoogleSearchTool + +@pytest.fixture +def olostep_search_tool(): + with patch.dict(os.environ, {"OLOSTEP_API_KEY": "test_api_key"}): + yield OlostepGoogleSearchTool() + +def test_tool_initialization_requires_api_key(): + with patch.dict(os.environ, {}, clear=True): + with pytest.raises(ValueError, match="OLOSTEP_API_KEY environment variable is required for OlostepGoogleSearchTool."): + OlostepGoogleSearchTool() + +@patch('requests.post') +def test_google_search(mock_post, olostep_search_tool): + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_json_content = {"organic": [{"title": "Test Search Result"}]} + mock_response.json.return_value = { + "result": { + "json_content": json.dumps(mock_json_content) + } + } + mock_post.return_value = mock_response + + result = olostep_search_tool._run(search_query="test query") + + assert result == mock_json_content + mock_post.assert_called_once() + called_args, called_kwargs = mock_post.call_args + assert "q=test query" in called_kwargs['json']['url_to_scrape'] + assert "&gl=us" in called_kwargs['json']['url_to_scrape'] + assert "&hl=en" in called_kwargs['json']['url_to_scrape'] + +@patch('requests.post') +def test_google_search_with_location_and_language(mock_post, olostep_search_tool): + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_json_content = {"organic": [{"title": "Résultat de recherche test"}]} + mock_response.json.return_value = { + "result": { + "json_content": json.dumps(mock_json_content) + } + } + mock_post.return_value = mock_response + + result = olostep_search_tool._run(search_query="requête de test", location="fr", language="fr") + + assert result == mock_json_content + mock_post.assert_called_once() + called_args, called_kwargs = mock_post.call_args + assert "q=requête de test" in called_kwargs['json']['url_to_scrape'] + assert "&gl=fr" in called_kwargs['json']['url_to_scrape'] + assert "&hl=fr" in called_kwargs['json']['url_to_scrape'] + +@patch('requests.post') +def test_api_timeout(mock_post, olostep_search_tool): + mock_post.side_effect = requests.Timeout + result = olostep_search_tool._run(search_query="test") + assert "Olostep API request timed out." in result + +@patch('requests.post') +def test_api_http_error(mock_post, olostep_search_tool): + mock_response = MagicMock() + mock_response.status_code = 500 + mock_response.text = "Internal Server Error" + http_error = requests.HTTPError(response=mock_response) + mock_response.raise_for_status.side_effect = http_error + mock_post.return_value = mock_response + + result = olostep_search_tool._run(search_query="test") + assert "Olostep API request failed with status 500: Internal Server Error" in result + +@patch('requests.post') +def test_no_json_content(mock_post, olostep_search_tool): + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_response.json.return_value = { + "result": { + "json_content": None + } + } + mock_post.return_value = mock_response + + result = olostep_search_tool._run(search_query="test") + assert "No JSON content found in the response." in result diff --git a/tests/tools/test_olostep_web_scraper_tool.py b/tests/tools/test_olostep_web_scraper_tool.py new file mode 100644 index 00000000..a19f3e66 --- /dev/null +++ b/tests/tools/test_olostep_web_scraper_tool.py @@ -0,0 +1,109 @@ +import os +import pytest +import requests +from unittest.mock import patch, MagicMock +from crewai_tools.tools.olostep_web_scraper_tool.olostep_web_scraper_tool import OlostepWebScraperTool + +@pytest.fixture +def olostep_tool(): + with patch.dict(os.environ, {"OLOSTEP_API_KEY": "test_api_key"}): + yield OlostepWebScraperTool() + +def test_tool_initialization_requires_api_key(): + with patch.dict(os.environ, {}, clear=True): + with pytest.raises(ValueError, match="OLOSTEP_API_KEY environment variable is required for OlostepWebScraperTool."): + OlostepWebScraperTool() + +@patch('requests.post') +def test_scrape_with_markdown_format(mock_post, olostep_tool): + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_response.json.return_value = { + "result": { + "markdown_content": "This is a test markdown content." + } + } + mock_post.return_value = mock_response + + result = olostep_tool._run(url_to_scrape="https://example.com", formats=["markdown"]) + + assert "Markdown Content:\nThis is a test markdown content." in result + mock_post.assert_called_once() + called_args, called_kwargs = mock_post.call_args + assert called_kwargs['json']['formats'] == ['markdown'] + + +@patch('requests.post') +def test_scrape_with_html_format(mock_post, olostep_tool): + mock_response = MagicMock() + mock_response.raise_for_status.return_value = None + mock_response.json.return_value = { + "result": { + "html_content": "