From 8dbcc5a222da778b24cc2f9d3e8ef22854be75da Mon Sep 17 00:00:00 2001 From: Mithun748 Date: Fri, 29 Aug 2025 17:53:15 +0530 Subject: [PATCH 01/10] Feat(VectorX): Add VectorXVectorSearchTool initial implementation --- .../vectorx_vector_search_tool/README.md | 163 ++++++++++ .../vectorx_vector_search_tool/__init__.py | 11 + .../vectorx_search_tool.py | 285 ++++++++++++++++++ 3 files changed, 459 insertions(+) create mode 100644 crewai_tools/tools/vectorx_vector_search_tool/README.md create mode 100644 crewai_tools/tools/vectorx_vector_search_tool/__init__.py create mode 100644 crewai_tools/tools/vectorx_vector_search_tool/vectorx_search_tool.py diff --git a/crewai_tools/tools/vectorx_vector_search_tool/README.md b/crewai_tools/tools/vectorx_vector_search_tool/README.md new file mode 100644 index 00000000..16f0b2c8 --- /dev/null +++ b/crewai_tools/tools/vectorx_vector_search_tool/README.md @@ -0,0 +1,163 @@ +# VectorX Search Tool for CrewAI + +This repository provides a **CrewAI tool integration** for [VectorX](https://vectorxdb.ai), enabling **semantic search** and optional **hybrid (dense + sparse) retrieval** inside CrewAI workflows. +It uses **Google Gemini embeddings** by default for dense vectors and supports **SPLADE** for sparse vectors. + +--- + +## Features + +* 🔹 **Dense search** using Gemini (`gemini-embedding-001` by default, user-configurable) +* 🔹 **Hybrid search** (dense + sparse) with support for custom sparse models by default, [prithivida/Splade\_PP\_en\_v1](https://huggingface.co/prithivida/Splade_PP_en_v1) +* 🔹 Seamless integration with **CrewAI Agents** +* 🔹 Document upsert and query +* 🔹 Custom encryption and collection support + +--- + +## Installation + +You can install the required packages in one of two ways: + +### Option 1: Install manually via pip + +```bash +pip install crewai vecx google-genai +```` + +> ⚠️ If you want to enable **sparse embeddings (SPLADE)**, also install: + +```bash +pip install transformers torch +``` + +--- + +### Option 2: Install everything from `requirements.txt` + +```bash +pip install -r requirements.txt +``` + +--- + +### `requirements.txt` contents: + +```txt +crewai==0.175.0 +vecx==0.33.1b5 +google-genai==1.32.0 +torch==2.8.0 +transformers==4.45.2 +tokenizers==0.20.3 +numpy==2.2.4 +``` +--- + + +## Usage + +### 1. Import & Configure + +```python +import os +from crewai import Agent, Crew, LLM, Task, Process +from crewai_tools import VectorXVectorSearchTool + +# Initialize the tool +tool = VectorXVectorSearchTool( + api_token=os.getenv("VECTORX_TOKEN"), + collection_name="my_vectorx_collection", + encryption_key=os.getenv("ENCRYPTION_KEY"), + use_sparse=False, # set True to enable hybrid SPLADE search + top_k=3, +) +``` + +--- + +### 2. Store Documents (Example Usage) + +```python +tool.store_documents( + [ + "Python is a versatile programming language.", + "JavaScript is widely used in web development.", + "Rust is known for safety and performance.", + ], + [ + {"category": "language", "name": "Python"}, + {"category": "language", "name": "JavaScript"}, + {"category": "language", "name": "Rust"}, + ], +) +``` + +--- + +### 3. Setup CrewAI Agent + +```python +llm = LLM( + model="gemini/gemini-1.5-flash", + api_key=os.getenv("GEMINI_API_KEY"), +) + +agent = Agent( + role="Vector Search Agent", + goal="Answer user questions using VectorX search", + backstory="You're a helpful assistant that uses VectorX for semantic retrieval.", + llm=llm, + tools=[tool], +) + +task = Task( + description="Answer the user's question using VectorX search. The user asked: {query}", + agent=agent, + expected_output="A concise, relevant answer based on documents.", +) + +crew = Crew(agents=[agent], tasks=[task], process=Process.sequential) +``` + +--- + +### 4. Run a Query (Example Usage) + +```python +if __name__ == "__main__": + question = "Tell me about Python language features." + print(f"\nQuery: {question}") + result = crew.kickoff({"query": question}) + print("\nAnswer:\n", result) +``` + +--- + +## Hybrid Search with SPLADE + +Enable hybrid mode: + +```python +tool = VectorXVectorSearchTool( + api_token=os.getenv("VECTORX_TOKEN"), + collection_name="my_vectorx_collection", + use_sparse=True, # 🔹 enable SPLADE hybrid retrieval +) +``` + +This will combine **dense Gemini embeddings** with **sparse lexical signals** from SPLADE, improving recall on keyword-heavy queries. + +--- + +## Environment Variables (.env) + +| Variable | Description | +| ---------------- | -------------------------------------------- | +| `VECTORX_TOKEN` | API token for your VectorX instance | +| `GEMINI_API_KEY` | Google Gemini API key for embeddings & LLM | +| `ENCRYPTION_KEY` | (Optional) Encryption key for secure storage | +| `GEMINI_MODEL` | (Optional) Gemini embedding model ID. Defaults to models/embedding-001 | +| `SPLADE_MODEL` | (Optional) SPLADE model name from HuggingFace. Defaults to prithivida/Splade_PP_en_v1 | + +--- diff --git a/crewai_tools/tools/vectorx_vector_search_tool/__init__.py b/crewai_tools/tools/vectorx_vector_search_tool/__init__.py new file mode 100644 index 00000000..46b9ac3f --- /dev/null +++ b/crewai_tools/tools/vectorx_vector_search_tool/__init__.py @@ -0,0 +1,11 @@ +from .vectorx_search_tool import ( + VectorXVectorSearchTool, + VectorXSearchArgs, + SpladeSparseEmbedder, +) + +__all__ = [ + "VectorXVectorSearchTool", + "VectorXSearchArgs", + "SpladeSparseEmbedder", +] diff --git a/crewai_tools/tools/vectorx_vector_search_tool/vectorx_search_tool.py b/crewai_tools/tools/vectorx_vector_search_tool/vectorx_search_tool.py new file mode 100644 index 00000000..d5e3fce3 --- /dev/null +++ b/crewai_tools/tools/vectorx_vector_search_tool/vectorx_search_tool.py @@ -0,0 +1,285 @@ +import os +import logging +import uuid +from typing import List, Dict, Callable, Optional, Any, Type + +from pydantic import BaseModel +from crewai.tools import BaseTool + +# Try importing dependencies with optional installation notes +try: + from vecx.vectorx import VectorX +except ImportError: + raise ImportError("Install vecx package via 'pip install vecx' to use VectorX features.") + +try: + from google import genai # Gemini client +except ImportError: + genai = None + +try: + from transformers import AutoTokenizer, AutoModelForMaskedLM + import torch +except ImportError: + AutoTokenizer = None + AutoModelForMaskedLM = None + torch = None + +_logger = logging.getLogger(__name__) + + +# ---------------- Sparse SPLADE Wrapper ---------------- # +class SpladeSparseEmbedder: + """Wrapper for SPLADE (prithivida/Splade_PP_en_v1) to generate sparse vectors. + + This is used for hybrid search, combining dense and sparse representations. + """ + + def __init__(self, model_name: str = None): + """Initializes the SPLADE model and tokenizer.""" + self.model_name = model_name or os.environ.get("SPLADE_MODEL", "prithivida/Splade_PP_en_v1") + if AutoTokenizer is None or AutoModelForMaskedLM is None: + raise ImportError("transformers not installed. Install with `pip install transformers`") + + self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) + self.model = AutoModelForMaskedLM.from_pretrained(self.model_name) + self.model.eval() + self.vocab_size = self.model.config.vocab_size + + def get_vocab_size(self) -> int: + """Returns vocabulary size of the model.""" + return self.vocab_size + + def encode_query(self, text: str, return_sparse: bool = True): + """Encodes a query into sparse format using SPLADE. + + Args: + text: Input query text. + return_sparse: If True, returns indices and values. + + Returns: + A list of sparse vectors with indices and values or raw logits. + """ + inputs = self.tokenizer(text, return_tensors="pt") + with torch.no_grad(): + logits = self.model(**inputs).logits.squeeze(0) # shape: [seq_len, vocab] + max_logits, _ = torch.max(logits, dim=0) # shape: [vocab] + scores = torch.log1p(torch.relu(max_logits)).cpu().numpy() + + nz = scores.nonzero()[0] + values = scores[nz] + + if return_sparse: + return [{"indices": nz.tolist(), "values": values.tolist()}] + return scores + + +# ---------------- CrewAI Tool: VectorX Search ---------------- # +class VectorXSearchArgs(BaseModel): + """Argument schema for VectorX search tool.""" + query: str + top_k: Optional[int] = None + + +class VectorXVectorSearchTool(BaseTool): + """ + CrewAI Tool for semantic search using VectorX vector database. + + Supports both dense (semantic) and sparse (keyword-like via SPLADE) search. + Default embedding model is Gemini via `google-genai`. + + Attributes: + api_token: API token for VectorX. + collection_name: Name of the index/collection in VectorX. + embed_fn: Custom embedding function (optional). + encryption_key: Encryption key for secure collections. + space_type: Vector distance metric (e.g., "cosine"). + use_sparse: Whether to use sparse (SPLADE) embedding. + sparse_embedder: SPLADE embedder instance. + sparse_vocab_size: Vocabulary size for sparse encoder. + top_k: Number of results to retrieve. + """ + + name: str = "VectorXVectorSearchTool" + description: str = ( + "Tool for semantic search using VectorX vector DB " + "with optional sparse embedding support (SPLADE)." + ) + args_schema: Type[BaseModel] = VectorXSearchArgs + + def __init__( + self, + api_token: str, + collection_name: str, + embed_fn: Optional[Callable[[str], List[float]]] = None, + encryption_key: Optional[str] = None, + space_type: str = "cosine", + use_sparse: bool = False, + sparse_embedder: Optional[Any] = None, + sparse_vocab_size: Optional[int] = None, + top_k: int = 3, + gemini_model: Optional[str] = None, + ): + """Initializes the VectorX search tool, sets up index and embedding model.""" + super().__init__() + object.__setattr__(self, "api_token", api_token) + object.__setattr__(self, "collection_name", collection_name) + object.__setattr__(self, "encryption_key", encryption_key) + object.__setattr__(self, "space_type", space_type) + object.__setattr__(self, "use_sparse", use_sparse) + object.__setattr__(self, "top_k", top_k) + + gemini_model = gemini_model or os.environ.get("GEMINI_MODEL", "models/embedding-001") + _logger.info(f"Using Gemini embedding model: {gemini_model}") + + # Setup sparse encoder + if use_sparse: + if sparse_embedder is None: + sparse_embedder = SpladeSparseEmbedder() + object.__setattr__(self, "sparse_embedder", sparse_embedder) + sparse_vocab_size = sparse_vocab_size or sparse_embedder.get_vocab_size() + else: + object.__setattr__(self, "sparse_embedder", None) + sparse_vocab_size = 0 + object.__setattr__(self, "sparse_vocab_size", sparse_vocab_size) + + # Dense embedding function setup (default: Gemini) + if embed_fn: + object.__setattr__(self, "embed_fn", embed_fn) + else: + if genai is None: + raise ImportError("google-genai not installed. Install with `pip install google-genai`") + + gemini_client = genai.Client(api_key=os.getenv("GEMINI_API_KEY")) + + def gemini_embed(text: str) -> List[float]: + """Uses Gemini to generate dense embeddings.""" + emb = gemini_client.models.embed_content( + model=gemini_model, + contents=text + ) + vector_obj = emb.embeddings[0].values + vec = [float(v) for v in (vector_obj.values() if isinstance(vector_obj, dict) else vector_obj)] + _logger.debug(f"Gemini embedding len={len(vec)}, sample={vec[:5]}") + return vec + + object.__setattr__(self, "embed_fn", gemini_embed) + + # Setup VectorX index + client = VectorX(token=api_token) + object.__setattr__(self, "client", client) + + # Determine embedding dimension + dim = len(self.embed_fn("test")) + try: + if use_sparse: + index = client.get_hybrid_index(name=collection_name, key=encryption_key) + else: + index = client.get_index(name=collection_name, key=encryption_key) + except Exception: + _logger.info(f"Creating new index {collection_name}") + if use_sparse: + client.create_hybrid_index( + name=collection_name, + dimension=dim, + space_type=space_type, + vocab_size=sparse_vocab_size, + key=encryption_key, + ) + index = client.get_hybrid_index(name=collection_name, key=encryption_key) + else: + client.create_index( + name=collection_name, + dimension=dim, + space_type=space_type, + key=encryption_key, + ) + index = client.get_index(name=collection_name, key=encryption_key) + + object.__setattr__(self, "index", index) + + def _prepare_sparse_vector(self, text: str) -> Dict[str, Any]: + """Generates sparse representation for given text using SPLADE.""" + sparse_vec = self.sparse_embedder.encode_query(text, return_sparse=True)[0] + return sparse_vec + + def _run(self, query: str, top_k: Optional[int] = None, **kwargs) -> Any: + """Performs a semantic or hybrid search on VectorX. + + Args: + query: The search query. + top_k: Number of top results to return. + + Returns: + A list of search results or error messages. + """ + top_k = top_k or self.top_k + embedding = self.embed_fn(query) + results = [] + + try: + if self.use_sparse: + sparse_vec = self._prepare_sparse_vector(query) + search_results = self.index.search( + dense_vector=embedding, + sparse_vector=sparse_vec, + dense_top_k=top_k, + sparse_top_k=top_k, + filter_query={}, + ) + for r in search_results: + results.append({ + "text": r["meta"].get("value", ""), + "score": r.get("rrf_score", 0), + "metadata": r["meta"], + }) + else: + search_results = self.index.query( + vector=embedding, + top_k=top_k, + include_vectors=False, + ) + for r in search_results: + results.append({ + "text": r["meta"].get("value", ""), + "score": r.get("similarity", 0), + "metadata": r["meta"], + }) + except Exception as e: + _logger.error(f"VectorX Search Error: {e}") + return [{"error": "Search failed"}] + + return results or [{"message": "No results found"}] + + def store_documents(self, texts: List[str], metadatas: Optional[List[Dict]] = None): + """Stores a list of documents into the VectorX index. + + Args: + texts: List of documents to store. + metadatas: Optional metadata dicts corresponding to each document. + """ + metadatas = metadatas or [{} for _ in texts] + events = [] + + for text, meta in zip(texts, metadatas): + meta["value"] = text + embedding = self.embed_fn(text) + + event = { + "id": str(uuid.uuid4()), + "meta": meta, + } + + if self.use_sparse: + sparse_vec = self._prepare_sparse_vector(text) + event["dense_vector"] = embedding + event["sparse_vector"] = sparse_vec + else: + event["vector"] = embedding + + events.append(event) + + try: + self.index.upsert(events) + except Exception as e: + _logger.error(f"VectorX Upsert Error: {e}") From 7ea40c1ea5b60ff41666b1544ad89b2de3166c5d Mon Sep 17 00:00:00 2001 From: Mithun748 Date: Fri, 29 Aug 2025 17:55:17 +0530 Subject: [PATCH 02/10] Modified __init__ files for VectorX --- crewai_tools/__init__.py | 3 +++ crewai_tools/tools/__init__.py | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/crewai_tools/__init__.py b/crewai_tools/__init__.py index 4886dbc5..596b45cd 100644 --- a/crewai_tools/__init__.py +++ b/crewai_tools/__init__.py @@ -81,10 +81,13 @@ SnowflakeConfig, SnowflakeSearchTool, SpiderTool, + SpladeSparseEmbedder, StagehandTool, TavilyExtractorTool, TavilySearchTool, TXTSearchTool, + VectorXVectorSearchTool, + VectorXSearchArgs, VisionTool, WeaviateVectorSearchTool, WebsiteSearchTool, diff --git a/crewai_tools/tools/__init__.py b/crewai_tools/tools/__init__.py index 886c27ad..dd6917d6 100644 --- a/crewai_tools/tools/__init__.py +++ b/crewai_tools/tools/__init__.py @@ -114,3 +114,8 @@ BrightDataWebUnlockerTool ) from .zapier_action_tool.zapier_action_tool import ZapierActionTools +from .vectorx_vector_search_tool.vectorx_search_tool import ( + VectorXVectorSearchTool, + VectorXSearchArgs, + SpladeSparseEmbedder, +) \ No newline at end of file From 0ca8bae497bfcb700f280b009bc822fa480909f9 Mon Sep 17 00:00:00 2001 From: Mithun748 Date: Fri, 29 Aug 2025 17:56:45 +0530 Subject: [PATCH 03/10] Added test scripts for VectorX implementation --- tests/tools/test_vectorx_search_tool.py | 105 ++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 tests/tools/test_vectorx_search_tool.py diff --git a/tests/tools/test_vectorx_search_tool.py b/tests/tools/test_vectorx_search_tool.py new file mode 100644 index 00000000..0c0f1ba5 --- /dev/null +++ b/tests/tools/test_vectorx_search_tool.py @@ -0,0 +1,105 @@ +import pytest + +# Import VectorX tool class +from crewai_tools import VectorXVectorSearchTool + +# ==== Utility Functions / Dummy Classes ==== + +def dummy_embed(text: str): + """Returns a fixed-size dense embedding for testing.""" + return [0.1] * 128 + +class DummyIndex: + """Simulates index behavior for upsert, query, and hybrid search.""" + def __init__(self): + self.docs = [] + + def upsert(self, events): + self.docs.extend(events) + + def query(self, vector, top_k, include_vectors): + return self.docs[:top_k] + + def search(self, dense_vector, sparse_vector, dense_top_k, sparse_top_k, filter_query): + return self.docs[:dense_top_k] + +class DummyClient: + """Simulates VectorX client with index lifecycle methods.""" + def get_index(self, name, key=None): + return DummyIndex() + + def create_index(self, name, dimension, space_type, key=None): + return True + + def get_hybrid_index(self, name, key=None): + return DummyIndex() + + def create_hybrid_index(self, name, dimension, space_type, vocab_size, key=None): + return True + +class DummySPLADE: + """Simulates SPLADE sparse embedder for hybrid search.""" + def get_vocab_size(self): + return 10 + + def encode_query(self, text, return_sparse=True): + return [{"indices": [0], "values": [1.0]}] + +# ==== Fixtures ==== + +@pytest.fixture +def vx_tool(monkeypatch): + """ + Fixture that provides a VectorXVectorSearchTool with + its VectorX client monkeypatched to the DummyClient. + """ + monkeypatch.setattr( + "crewai_tools.tools.vectorx_vector_search_tool.vectorx_search_tool.VectorX", + lambda token: DummyClient() + ) + return VectorXVectorSearchTool( + api_token="fake-token", + collection_name="test_collection", + embed_fn=dummy_embed, + use_sparse=False + ) + +# ==== Tests ==== + +def test_store_and_search_dense(vx_tool): + """ + Tests dense-only mode: + - Documents are stored via store_documents() + - Search returns results with `text` and `score` fields + """ + vx_tool.store_documents(["doc1", "doc2"], [{"id": "1"}, {"id": "2"}]) + results = vx_tool._run("query") + assert isinstance(results, list) + assert "text" in results[0] + assert results[0]["text"] == "doc1" + assert "score" in results[0] + +def test_hybrid_search(monkeypatch): + """ + Tests hybrid mode (dense + sparse): + - SPLADE embedder is replaced with DummySPLADE + - Documents are stored and search returns expected results + """ + monkeypatch.setattr( + "crewai_tools.tools.vectorx_vector_search_tool.vectorx_search_tool.VectorX", + lambda token: DummyClient() + ) + monkeypatch.setattr( + "crewai_tools.tools.vectorx_vector_search_tool.vectorx_search_tool.SpladeSparseEmbedder", + lambda *args, **kwargs: DummySPLADE() + ) + tool = VectorXVectorSearchTool( + api_token="tok", + collection_name="hybrid_col", + embed_fn=dummy_embed, + use_sparse=True + ) + tool.store_documents(["doc_hybrid"], [{"id": "h1"}]) + results = tool._run("query") + assert isinstance(results, list) + assert results[0]["text"] == "doc_hybrid" \ No newline at end of file From 7705d634be8de0ffc218a9e42b3aff36a90b8e4b Mon Sep 17 00:00:00 2001 From: Mithun748 Date: Fri, 29 Aug 2025 17:58:05 +0530 Subject: [PATCH 04/10] Added VectorX extra to pyproject.toml --- pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index e3ece2f9..6aaa559d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ dependencies = [ "tiktoken>=0.8.0", "stagehand>=0.4.1", "portalocker==2.7.0", + "google-genai>=1.32.0" ] [project.urls] @@ -141,6 +142,9 @@ contextual = [ "contextual-client>=0.1.0", "nest-asyncio>=1.6.0", ] +vectorx = [ + "vecx>=0.33.1b5" +] [tool.pytest.ini_options] pythonpath = ["."] From cd398cab4cddddbb2bea709787658c33f54f04d6 Mon Sep 17 00:00:00 2001 From: Mithun748 Date: Fri, 29 Aug 2025 17:59:43 +0530 Subject: [PATCH 05/10] Generated modified tool.specs.json and uv.lock --- tool.specs.json | 7676 +++++++++++++++++++++++++++++------------------ uv.lock | 116 +- 2 files changed, 4873 insertions(+), 2919 deletions(-) diff --git a/tool.specs.json b/tool.specs.json index c940d4b9..d47492ac 100644 --- a/tool.specs.json +++ b/tool.specs.json @@ -114,6 +114,80 @@ "type": "object" } }, + { + "description": "Fetches metadata from Arxiv based on a search query and optionally downloads PDFs.", + "env_vars": [], + "humanized_name": "Arxiv Paper Fetcher and Downloader", + "init_params_schema": { + "$defs": { + "EnvVar": { + "properties": { + "default": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Default" + }, + "description": { + "title": "Description", + "type": "string" + }, + "name": { + "title": "Name", + "type": "string" + }, + "required": { + "default": true, + "title": "Required", + "type": "boolean" + } + }, + "required": [ + "name", + "description" + ], + "title": "EnvVar", + "type": "object" + } + }, + "additionalProperties": true, + "properties": {}, + "title": "ArxivPaperTool", + "type": "object" + }, + "name": "ArxivPaperTool", + "package_dependencies": [ + "pydantic" + ], + "run_params_schema": { + "properties": { + "max_results": { + "default": 5, + "description": "Max results to fetch; must be between 1 and 100", + "maximum": 100, + "minimum": 1, + "title": "Max Results", + "type": "integer" + }, + "search_query": { + "description": "Search query for Arxiv, e.g., 'transformer neural network'", + "title": "Search Query", + "type": "string" + } + }, + "required": [ + "search_query" + ], + "title": "ArxivToolInput", + "type": "object" + } + }, { "description": "A tool that can be used to search the internet with a search_query.", "env_vars": [ @@ -215,22 +289,9 @@ } }, { - "description": "Load webpages url in a headless browser using Browserbase and return the contents", - "env_vars": [ - { - "default": null, - "description": "API key for Browserbase services", - "name": "BROWSERBASE_API_KEY", - "required": false - }, - { - "default": null, - "description": "Project ID for Browserbase services", - "name": "BROWSERBASE_PROJECT_ID", - "required": false - } - ], - "humanized_name": "Browserbase web load tool", + "description": "Scrapes structured data using Bright Data Dataset API from a URL and optional input parameters", + "env_vars": [], + "humanized_name": "Bright Data Dataset Tool", "init_params_schema": { "$defs": { "EnvVar": { @@ -269,30 +330,39 @@ "type": "object" } }, + "description": "CrewAI-compatible tool for scraping structured data using Bright Data Datasets.\n\nAttributes:\n name (str): Tool name displayed in the CrewAI environment.\n description (str): Tool description shown to agents or users.\n args_schema (Type[BaseModel]): Pydantic schema for validating input arguments.", "properties": { - "api_key": { + "additional_params": { "anyOf": [ { - "type": "string" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], "default": null, - "title": "Api Key" + "title": "Additional Params" }, - "browserbase": { + "dataset_type": { "anyOf": [ - {}, + { + "type": "string" + }, { "type": "null" } ], "default": null, - "title": "Browserbase" + "title": "Dataset Type" }, - "project_id": { + "format": { + "default": "json", + "title": "Format", + "type": "string" + }, + "url": { "anyOf": [ { "type": "string" @@ -302,78 +372,94 @@ } ], "default": null, - "title": "Project Id" + "title": "Url" }, - "proxy": { + "zipcode": { "anyOf": [ { - "type": "boolean" + "type": "string" }, { "type": "null" } ], "default": null, - "title": "Proxy" - }, - "session_id": { + "title": "Zipcode" + } + }, + "title": "BrightDataDatasetTool", + "type": "object" + }, + "name": "BrightDataDatasetTool", + "package_dependencies": [], + "run_params_schema": { + "description": "Schema for validating input parameters for the BrightDataDatasetTool.\n\nAttributes:\n dataset_type (str): Required Bright Data Dataset Type used to specify which dataset to access.\n format (str): Response format (json by default). Multiple formats exist - json, ndjson, jsonl, csv\n url (str): The URL from which structured data needs to be extracted.\n zipcode (Optional[str]): An optional ZIP code to narrow down the data geographically.\n additional_params (Optional[Dict]): Extra parameters for the Bright Data API call.", + "properties": { + "additional_params": { "anyOf": [ { - "type": "string" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], "default": null, - "title": "Session Id" + "description": "Additional params if any", + "title": "Additional Params" }, - "text_content": { + "dataset_type": { + "description": "The Bright Data Dataset Type", + "title": "Dataset Type", + "type": "string" + }, + "format": { "anyOf": [ { - "type": "boolean" + "type": "string" }, { "type": "null" } ], - "default": false, - "title": "Text Content" - } - }, - "title": "BrowserbaseLoadTool", - "type": "object" - }, - "name": "BrowserbaseLoadTool", - "package_dependencies": [ - "browserbase" - ], - "run_params_schema": { - "properties": { + "default": "json", + "description": "Response format (json by default)", + "title": "Format" + }, "url": { - "description": "Website URL", + "description": "The URL to extract data from", "title": "Url", "type": "string" + }, + "zipcode": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Optional zipcode", + "title": "Zipcode" } }, "required": [ + "dataset_type", "url" ], - "title": "BrowserbaseLoadToolSchema", + "title": "BrightDataDatasetToolSchema", "type": "object" } }, { - "description": "A tool that can be used to semantic search a query from a CSV's content.", + "description": "Tool to perform web search using Bright Data SERP API.", "env_vars": [], - "humanized_name": "Search a CSV's content", + "humanized_name": "Bright Data SERP Search", "init_params_schema": { "$defs": { - "Adapter": { - "properties": {}, - "title": "Adapter", - "type": "object" - }, "EnvVar": { "properties": { "default": { @@ -410,157 +496,176 @@ "type": "object" } }, + "description": "A web search tool that utilizes Bright Data's SERP API to perform queries and return either structured results\nor raw page content from search engines like Google or Bing.\n\nAttributes:\n name (str): Tool name used by the agent.\n description (str): A brief explanation of what the tool does.\n args_schema (Type[BaseModel]): Schema class for validating tool arguments.\n base_url (str): The Bright Data API endpoint used for making the POST request.\n api_key (str): Bright Data API key loaded from the environment variable 'BRIGHT_DATA_API_KEY'.\n zone (str): Zone identifier from Bright Data, loaded from the environment variable 'BRIGHT_DATA_ZONE'.\n\nRaises:\n ValueError: If API key or zone environment variables are not set.", "properties": { - "adapter": { - "$ref": "#/$defs/Adapter" + "api_key": { + "default": "", + "title": "Api Key", + "type": "string" }, - "config": { + "base_url": { + "default": "", + "title": "Base Url", + "type": "string" + }, + "country": { + "default": "us", + "title": "Country", + "type": "string" + }, + "device_type": { + "default": "desktop", + "title": "Device Type", + "type": "string" + }, + "language": { + "default": "en", + "title": "Language", + "type": "string" + }, + "parse_results": { + "default": true, + "title": "Parse Results", + "type": "boolean" + }, + "query": { "anyOf": [ { - "additionalProperties": true, - "type": "object" + "type": "string" }, { "type": "null" } ], "default": null, - "title": "Config" + "title": "Query" }, - "summarize": { - "default": false, - "title": "Summarize", - "type": "boolean" + "search_engine": { + "default": "google", + "title": "Search Engine", + "type": "string" + }, + "search_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Search Type" + }, + "zone": { + "default": "", + "title": "Zone", + "type": "string" } }, - "title": "CSVSearchTool", + "title": "BrightDataSearchTool", "type": "object" }, - "name": "CSVSearchTool", + "name": "BrightDataSearchTool", "package_dependencies": [], "run_params_schema": { - "description": "Input for CSVSearchTool.", + "description": "Schema that defines the input arguments for the BrightDataSearchToolSchema.\n\nAttributes:\n query (str): The search query to be executed (e.g., \"latest AI news\").\n search_engine (Optional[str]): The search engine to use (\"google\", \"bing\", \"yandex\"). Default is \"google\".\n country (Optional[str]): Two-letter country code for geo-targeting (e.g., \"us\", \"in\"). Default is \"us\".\n language (Optional[str]): Language code for search results (e.g., \"en\", \"es\"). Default is \"en\".\n search_type (Optional[str]): Type of search, such as \"isch\" (images), \"nws\" (news), \"jobs\", etc.\n device_type (Optional[str]): Device type to simulate (\"desktop\", \"mobile\", \"ios\", \"android\"). Default is \"desktop\".\n parse_results (Optional[bool]): If True, results will be returned in structured JSON. If False, raw HTML. Default is True.", "properties": { - "csv": { - "description": "Mandatory csv path you want to search", - "title": "Csv", - "type": "string" - }, - "search_query": { - "description": "Mandatory search query you want to use to search the CSV's content", - "title": "Search Query", - "type": "string" - } - }, - "required": [ - "search_query", - "csv" - ], - "title": "CSVSearchToolSchema", - "type": "object" - } - }, - { - "description": "A tool that can be used to semantic search a query from a Code Docs content.", - "env_vars": [], - "humanized_name": "Search a Code Docs content", - "init_params_schema": { - "$defs": { - "Adapter": { - "properties": {}, - "title": "Adapter", - "type": "object" - }, - "EnvVar": { - "properties": { - "default": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Default" - }, - "description": { - "title": "Description", + "country": { + "anyOf": [ + { "type": "string" }, - "name": { - "title": "Name", + { + "type": "null" + } + ], + "default": "us", + "description": "Two-letter country code for geo-targeting (e.g., 'us', 'gb')", + "title": "Country" + }, + "device_type": { + "anyOf": [ + { "type": "string" }, - "required": { - "default": true, - "title": "Required", - "type": "boolean" + { + "type": "null" } - }, - "required": [ - "name", - "description" ], - "title": "EnvVar", - "type": "object" - } - }, - "properties": { - "adapter": { - "$ref": "#/$defs/Adapter" + "default": "desktop", + "description": "Device type to simulate (e.g., 'mobile', 'desktop', 'ios')", + "title": "Device Type" }, - "config": { + "language": { "anyOf": [ { - "additionalProperties": true, - "type": "object" + "type": "string" }, { "type": "null" } ], - "default": null, - "title": "Config" + "default": "en", + "description": "Language code (e.g., 'en', 'es') used in the query URL", + "title": "Language" }, - "summarize": { - "default": false, - "title": "Summarize", - "type": "boolean" - } - }, - "title": "CodeDocsSearchTool", - "type": "object" - }, - "name": "CodeDocsSearchTool", - "package_dependencies": [], - "run_params_schema": { - "description": "Input for CodeDocsSearchTool.", - "properties": { - "docs_url": { - "description": "Mandatory docs_url path you want to search", - "title": "Docs Url", - "type": "string" + "parse_results": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": true, + "description": "Whether to parse and return JSON (True) or raw HTML/text (False)", + "title": "Parse Results" }, - "search_query": { - "description": "Mandatory search query you want to use to search the Code Docs content", - "title": "Search Query", + "query": { + "description": "Search query to perform", + "title": "Query", "type": "string" + }, + "search_engine": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": "google", + "description": "Search engine domain (e.g., 'google', 'bing', 'yandex')", + "title": "Search Engine" + }, + "search_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Type of search (e.g., 'isch' for images, 'nws' for news)", + "title": "Search Type" } }, "required": [ - "search_query", - "docs_url" + "query" ], - "title": "CodeDocsSearchToolSchema", + "title": "BrightDataSearchToolSchema", "type": "object" } }, { - "description": "Interprets Python3 code strings with a final print statement.", + "description": "Tool to perform web scraping using Bright Data Web Unlocker", "env_vars": [], - "humanized_name": "Code Interpreter", + "humanized_name": "Bright Data Web Unlocker Scraping", "init_params_schema": { "$defs": { "EnvVar": { @@ -599,9 +704,29 @@ "type": "object" } }, - "description": "A tool for executing Python code in isolated environments.\n\nThis tool provides functionality to run Python code either in a Docker container\nfor safe isolation or directly in a restricted sandbox. It can handle installing\nPython packages and executing arbitrary Python code.", + "description": "A tool for performing web scraping using the Bright Data Web Unlocker API.\n\nThis tool allows automated and programmatic access to web pages by routing requests\nthrough Bright Data's unlocking and proxy infrastructure, which can bypass bot\nprotection mechanisms like CAPTCHA, geo-restrictions, and anti-bot detection.\n\nAttributes:\n name (str): Name of the tool.\n description (str): Description of what the tool does.\n args_schema (Type[BaseModel]): Pydantic model schema for expected input arguments.\n base_url (str): Base URL of the Bright Data Web Unlocker API.\n api_key (str): Bright Data API key (must be set in the BRIGHT_DATA_API_KEY environment variable).\n zone (str): Bright Data zone identifier (must be set in the BRIGHT_DATA_ZONE environment variable).\n\nMethods:\n _run(**kwargs: Any) -> Any:\n Sends a scraping request to Bright Data's Web Unlocker API and returns the result.", "properties": { - "code": { + "api_key": { + "default": "", + "title": "Api Key", + "type": "string" + }, + "base_url": { + "default": "", + "title": "Base Url", + "type": "string" + }, + "data_format": { + "default": "markdown", + "title": "Data Format", + "type": "string" + }, + "format": { + "default": "raw", + "title": "Format", + "type": "string" + }, + "url": { "anyOf": [ { "type": "string" @@ -611,19 +736,23 @@ } ], "default": null, - "title": "Code" + "title": "Url" }, - "default_image_tag": { - "default": "code-interpreter:latest", - "title": "Default Image Tag", + "zone": { + "default": "", + "title": "Zone", "type": "string" - }, - "unsafe_mode": { - "default": false, - "title": "Unsafe Mode", - "type": "boolean" - }, - "user_docker_base_url": { + } + }, + "title": "BrightDataWebUnlockerTool", + "type": "object" + }, + "name": "BrightDataWebUnlockerTool", + "package_dependencies": [], + "run_params_schema": { + "description": "Pydantic schema for input parameters used by the BrightDataWebUnlockerTool.\n\nThis schema defines the structure and validation for parameters passed when performing\na web scraping request using Bright Data's Web Unlocker.\n\nAttributes:\n url (str): The target URL to scrape.\n format (Optional[str]): Format of the response returned by Bright Data. Default 'raw' format.\n data_format (Optional[str]): Response data format (html by default). markdown is one more option.", + "properties": { + "data_format": { "anyOf": [ { "type": "string" @@ -632,10 +761,11 @@ "type": "null" } ], - "default": null, - "title": "User Docker Base Url" + "default": "markdown", + "description": "Response data format (html by default)", + "title": "Data Format" }, - "user_dockerfile_path": { + "format": { "anyOf": [ { "type": "string" @@ -644,106 +774,40 @@ "type": "null" } ], - "default": null, - "title": "User Dockerfile Path" - } - }, - "title": "CodeInterpreterTool", - "type": "object" - }, - "name": "CodeInterpreterTool", - "package_dependencies": [], - "run_params_schema": { - "description": "Schema for defining inputs to the CodeInterpreterTool.\n\nThis schema defines the required parameters for code execution,\nincluding the code to run and any libraries that need to be installed.", - "properties": { - "code": { - "description": "Python3 code used to be interpreted in the Docker container. ALWAYS PRINT the final result and the output of the code", - "title": "Code", - "type": "string" + "default": "raw", + "description": "Response format (raw is standard)", + "title": "Format" }, - "libraries_used": { - "description": "List of libraries used in the code with proper installing names separated by commas. Example: numpy,pandas,beautifulsoup4", - "items": { - "type": "string" - }, - "title": "Libraries Used", - "type": "array" + "url": { + "description": "URL to perform the web scraping", + "title": "Url", + "type": "string" } }, "required": [ - "code", - "libraries_used" + "url" ], - "title": "CodeInterpreterSchema", + "title": "BrightDataUnlockerToolSchema", "type": "object" } }, { - "description": "", + "description": "Load webpages url in a headless browser using Browserbase and return the contents", "env_vars": [ { "default": null, - "description": "API key for Composio services", - "name": "COMPOSIO_API_KEY", - "required": true + "description": "API key for Browserbase services", + "name": "BROWSERBASE_API_KEY", + "required": false + }, + { + "default": null, + "description": "Project ID for Browserbase services", + "name": "BROWSERBASE_PROJECT_ID", + "required": false } ], - "humanized_name": "ComposioTool", - "init_params_schema": { - "$defs": { - "EnvVar": { - "properties": { - "default": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Default" - }, - "description": { - "title": "Description", - "type": "string" - }, - "name": { - "title": "Name", - "type": "string" - }, - "required": { - "default": true, - "title": "Required", - "type": "boolean" - } - }, - "required": [ - "name", - "description" - ], - "title": "EnvVar", - "type": "object" - } - }, - "description": "Wrapper for composio tools.", - "properties": {}, - "required": [ - "name", - "description" - ], - "title": "ComposioTool", - "type": "object" - }, - "name": "ComposioTool", - "package_dependencies": [], - "run_params_schema": {} - }, - { - "description": "A tool to search the Couchbase database for relevant information on internal documents.", - "env_vars": [], - "humanized_name": "CouchbaseFTSVectorSearchTool", + "humanized_name": "Browserbase web load tool", "init_params_schema": { "$defs": { "EnvVar": { @@ -782,9 +846,8 @@ "type": "object" } }, - "description": "Tool to search the Couchbase database", "properties": { - "bucket_name": { + "api_key": { "anyOf": [ { "type": "string" @@ -793,39 +856,20 @@ "type": "null" } ], - "default": [ - null - ], - "title": "Bucket Name" + "default": null, + "title": "Api Key" }, - "collection_name": { + "browserbase": { "anyOf": [ - { - "type": "string" - }, + {}, { "type": "null" } ], - "default": [ - null - ], - "title": "Collection Name" - }, - "embedding_key": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": "embedding", - "description": "Name of the field in the search index that stores the vector", - "title": "Embedding Key" + "default": null, + "title": "Browserbase" }, - "index_name": { + "project_id": { "anyOf": [ { "type": "string" @@ -834,24 +878,22 @@ "type": "null" } ], - "default": [ - null - ], - "title": "Index Name" + "default": null, + "title": "Project Id" }, - "limit": { + "proxy": { "anyOf": [ { - "type": "integer" + "type": "boolean" }, { "type": "null" } ], - "default": 3, - "title": "Limit" + "default": null, + "title": "Proxy" }, - "scope_name": { + "session_id": { "anyOf": [ { "type": "string" @@ -860,12 +902,10 @@ "type": "null" } ], - "default": [ - null - ], - "title": "Scope Name" + "default": null, + "title": "Session Id" }, - "scoped_index": { + "text_content": { "anyOf": [ { "type": "boolean" @@ -874,34 +914,36 @@ "type": "null" } ], - "title": "Scoped Index" + "default": false, + "title": "Text Content" } }, - "title": "CouchbaseFTSVectorSearchTool", + "title": "BrowserbaseLoadTool", "type": "object" }, - "name": "CouchbaseFTSVectorSearchTool", - "package_dependencies": [], + "name": "BrowserbaseLoadTool", + "package_dependencies": [ + "browserbase" + ], "run_params_schema": { - "description": "Input for CouchbaseTool.", "properties": { - "query": { - "description": "The query to search retrieve relevant information from the Couchbase database. Pass only the query, not the question.", - "title": "Query", + "url": { + "description": "Website URL", + "title": "Url", "type": "string" } }, "required": [ - "query" + "url" ], - "title": "CouchbaseToolSchema", + "title": "BrowserbaseLoadToolSchema", "type": "object" } }, { - "description": "A tool that can be used to semantic search a query from a DOCX's content.", + "description": "A tool that can be used to semantic search a query from a CSV's content.", "env_vars": [], - "humanized_name": "Search a DOCX's content", + "humanized_name": "Search a CSV's content", "init_params_schema": { "$defs": { "Adapter": { @@ -968,53 +1010,44 @@ "type": "boolean" } }, - "title": "DOCXSearchTool", + "title": "CSVSearchTool", "type": "object" }, - "name": "DOCXSearchTool", + "name": "CSVSearchTool", "package_dependencies": [], "run_params_schema": { - "description": "Input for DOCXSearchTool.", + "description": "Input for CSVSearchTool.", "properties": { - "docx": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "description": "Mandatory docx path you want to search", - "title": "Docx" + "csv": { + "description": "Mandatory csv path you want to search", + "title": "Csv", + "type": "string" }, "search_query": { - "description": "Mandatory search query you want to use to search the DOCX's content", + "description": "Mandatory search query you want to use to search the CSV's content", "title": "Search Query", "type": "string" } }, "required": [ - "docx", - "search_query" + "search_query", + "csv" ], - "title": "DOCXSearchToolSchema", + "title": "CSVSearchToolSchema", "type": "object" } }, { - "description": "Generates images using OpenAI's Dall-E model.", - "env_vars": [ - { - "default": null, - "description": "API key for OpenAI services", - "name": "OPENAI_API_KEY", - "required": true - } - ], - "humanized_name": "Dall-E Tool", + "description": "A tool that can be used to semantic search a query from a Code Docs content.", + "env_vars": [], + "humanized_name": "Search a Code Docs content", "init_params_schema": { "$defs": { + "Adapter": { + "properties": {}, + "title": "Adapter", + "type": "object" + }, "EnvVar": { "properties": { "default": { @@ -1052,52 +1085,59 @@ } }, "properties": { - "model": { - "default": "dall-e-3", - "title": "Model", - "type": "string" - }, - "n": { - "default": 1, - "title": "N", - "type": "integer" + "adapter": { + "$ref": "#/$defs/Adapter" }, - "quality": { - "default": "standard", - "title": "Quality", - "type": "string" + "config": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Config" }, - "size": { - "default": "1024x1024", - "title": "Size", - "type": "string" + "summarize": { + "default": false, + "title": "Summarize", + "type": "boolean" } }, - "title": "DallETool", + "title": "CodeDocsSearchTool", "type": "object" }, - "name": "DallETool", + "name": "CodeDocsSearchTool", "package_dependencies": [], "run_params_schema": { - "description": "Input for Dall-E Tool.", + "description": "Input for CodeDocsSearchTool.", "properties": { - "image_description": { - "description": "Description of the image to be generated by Dall-E.", - "title": "Image Description", + "docs_url": { + "description": "Mandatory docs_url path you want to search", + "title": "Docs Url", + "type": "string" + }, + "search_query": { + "description": "Mandatory search query you want to use to search the Code Docs content", + "title": "Search Query", "type": "string" } }, "required": [ - "image_description" + "search_query", + "docs_url" ], - "title": "ImagePromptSchema", + "title": "CodeDocsSearchToolSchema", "type": "object" } }, { - "description": "Execute SQL queries against Databricks workspace tables and return the results. Provide a 'query' parameter with the SQL query to execute.", + "description": "Interprets Python3 code strings with a final print statement.", "env_vars": [], - "humanized_name": "Databricks SQL Query", + "humanized_name": "Code Interpreter", "init_params_schema": { "$defs": { "EnvVar": { @@ -1136,9 +1176,9 @@ "type": "object" } }, - "description": "A tool for querying Databricks workspace tables using SQL.\n\nThis tool executes SQL queries against Databricks tables and returns the results.\nIt requires Databricks authentication credentials to be set as environment variables.\n\nAuthentication can be provided via:\n- Databricks CLI profile: Set DATABRICKS_CONFIG_PROFILE environment variable\n- Direct credentials: Set DATABRICKS_HOST and DATABRICKS_TOKEN environment variables\n\nExample:\n >>> tool = DatabricksQueryTool()\n >>> results = tool.run(query=\"SELECT * FROM my_table LIMIT 10\")", + "description": "A tool for executing Python code in isolated environments.\n\nThis tool provides functionality to run Python code either in a Docker container\nfor safe isolation or directly in a restricted sandbox. It can handle installing\nPython packages and executing arbitrary Python code.", "properties": { - "default_catalog": { + "code": { "anyOf": [ { "type": "string" @@ -1148,9 +1188,19 @@ } ], "default": null, - "title": "Default Catalog" + "title": "Code" }, - "default_schema": { + "default_image_tag": { + "default": "code-interpreter:latest", + "title": "Default Image Tag", + "type": "string" + }, + "unsafe_mode": { + "default": false, + "title": "Unsafe Mode", + "type": "boolean" + }, + "user_docker_base_url": { "anyOf": [ { "type": "string" @@ -1160,9 +1210,9 @@ } ], "default": null, - "title": "Default Schema" + "title": "User Docker Base Url" }, - "default_warehouse_id": { + "user_dockerfile_path": { "anyOf": [ { "type": "string" @@ -1172,88 +1222,50 @@ } ], "default": null, - "title": "Default Warehouse Id" + "title": "User Dockerfile Path" } }, - "title": "DatabricksQueryTool", + "title": "CodeInterpreterTool", "type": "object" }, - "name": "DatabricksQueryTool", - "package_dependencies": [ - "databricks-sdk" - ], + "name": "CodeInterpreterTool", + "package_dependencies": [], "run_params_schema": { - "description": "Input schema for DatabricksQueryTool.", + "description": "Schema for defining inputs to the CodeInterpreterTool.\n\nThis schema defines the required parameters for code execution,\nincluding the code to run and any libraries that need to be installed.", "properties": { - "catalog": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Databricks catalog name (optional, defaults to configured catalog)", - "title": "Catalog" - }, - "db_schema": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Databricks schema name (optional, defaults to configured schema)", - "title": "Db Schema" - }, - "query": { - "description": "SQL query to execute against the Databricks workspace table", - "title": "Query", + "code": { + "description": "Python3 code used to be interpreted in the Docker container. ALWAYS PRINT the final result and the output of the code", + "title": "Code", "type": "string" }, - "row_limit": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": 1000, - "description": "Maximum number of rows to return (default: 1000)", - "title": "Row Limit" - }, - "warehouse_id": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Databricks SQL warehouse ID (optional, defaults to configured warehouse)", - "title": "Warehouse Id" + "libraries_used": { + "description": "List of libraries used in the code with proper installing names separated by commas. Example: numpy,pandas,beautifulsoup4", + "items": { + "type": "string" + }, + "title": "Libraries Used", + "type": "array" } }, "required": [ - "query" + "code", + "libraries_used" ], - "title": "DatabricksQueryToolSchema", + "title": "CodeInterpreterSchema", "type": "object" } }, { - "description": "A tool that can be used to recursively list a directory's content.", - "env_vars": [], - "humanized_name": "List files in directory", + "description": "", + "env_vars": [ + { + "default": null, + "description": "API key for Composio services", + "name": "COMPOSIO_API_KEY", + "required": true + } + ], + "humanized_name": "ComposioTool", "init_params_schema": { "$defs": { "EnvVar": { @@ -1292,52 +1304,25 @@ "type": "object" } }, - "properties": { - "directory": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Directory" - } - }, - "title": "DirectoryReadTool", - "type": "object" - }, - "name": "DirectoryReadTool", - "package_dependencies": [], - "run_params_schema": { - "description": "Input for DirectoryReadTool.", - "properties": { - "directory": { - "description": "Mandatory directory to list content", - "title": "Directory", - "type": "string" - } - }, + "description": "Wrapper for composio tools.", + "properties": {}, "required": [ - "directory" + "name", + "description" ], - "title": "DirectoryReadToolSchema", + "title": "ComposioTool", "type": "object" - } + }, + "name": "ComposioTool", + "package_dependencies": [], + "run_params_schema": {} }, { - "description": "A tool that can be used to semantic search a query from a directory's content.", + "description": "Create a new Contextual AI RAG agent with documents and datastore", "env_vars": [], - "humanized_name": "Search a directory's content", + "humanized_name": "Contextual AI Create Agent Tool", "init_params_schema": { "$defs": { - "Adapter": { - "properties": {}, - "title": "Adapter", - "type": "object" - }, "EnvVar": { "properties": { "default": { @@ -1374,67 +1359,68 @@ "type": "object" } }, + "description": "Tool to create Contextual AI RAG agents with documents.", "properties": { - "adapter": { - "$ref": "#/$defs/Adapter" + "api_key": { + "title": "Api Key", + "type": "string" }, - "config": { - "anyOf": [ - { - "additionalProperties": true, - "type": "object" - }, - { - "type": "null" - } - ], + "contextual_client": { "default": null, - "title": "Config" - }, - "summarize": { - "default": false, - "title": "Summarize", - "type": "boolean" + "title": "Contextual Client" } }, - "title": "DirectorySearchTool", + "required": [ + "api_key" + ], + "title": "ContextualAICreateAgentTool", "type": "object" }, - "name": "DirectorySearchTool", - "package_dependencies": [], + "name": "ContextualAICreateAgentTool", + "package_dependencies": [ + "contextual-client" + ], "run_params_schema": { - "description": "Input for DirectorySearchTool.", + "description": "Schema for contextual create agent tool.", "properties": { - "directory": { - "description": "Mandatory directory you want to search", - "title": "Directory", + "agent_description": { + "description": "Description for the new agent", + "title": "Agent Description", "type": "string" }, - "search_query": { - "description": "Mandatory search query you want to use to search the directory's content", - "title": "Search Query", + "agent_name": { + "description": "Name for the new agent", + "title": "Agent Name", "type": "string" + }, + "datastore_name": { + "description": "Name for the new datastore", + "title": "Datastore Name", + "type": "string" + }, + "document_paths": { + "description": "List of file paths to upload", + "items": { + "type": "string" + }, + "title": "Document Paths", + "type": "array" } }, "required": [ - "search_query", - "directory" + "agent_name", + "agent_description", + "datastore_name", + "document_paths" ], - "title": "DirectorySearchToolSchema", + "title": "ContextualAICreateAgentSchema", "type": "object" } }, { - "description": "Search the internet using Exa", - "env_vars": [ - { - "default": null, - "description": "API key for Exa services", - "name": "EXA_API_KEY", - "required": false - } - ], - "humanized_name": "EXASearchTool", + "description": "Parse documents using Contextual AI's advanced document parser", + "env_vars": [], + "humanized_name": "Contextual AI Document Parser", "init_params_schema": { "$defs": { "EnvVar": { @@ -1473,101 +1459,55 @@ "type": "object" } }, + "description": "Tool to parse documents using Contextual AI's parser.", "properties": { "api_key": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "description": "API key for Exa services", - "required": false, - "title": "Api Key" - }, - "content": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "default": false, - "title": "Content" - }, - "summary": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "default": false, - "title": "Summary" - }, - "type": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": "auto", - "title": "Type" + "title": "Api Key", + "type": "string" } }, - "title": "EXASearchTool", + "required": [ + "api_key" + ], + "title": "ContextualAIParseTool", "type": "object" }, - "name": "EXASearchTool", + "name": "ContextualAIParseTool", "package_dependencies": [ - "exa_py" + "contextual-client" ], "run_params_schema": { + "description": "Schema for contextual parse tool.", "properties": { - "end_published_date": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "End date for the search", - "title": "End Published Date" + "enable_document_hierarchy": { + "default": true, + "description": "Enable document hierarchy", + "title": "Enable Document Hierarchy", + "type": "boolean" }, - "include_domains": { - "anyOf": [ - { - "items": { - "type": "string" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "description": "List of domains to include in the search", - "title": "Include Domains" + "figure_caption_mode": { + "default": "concise", + "description": "Figure caption mode", + "title": "Figure Caption Mode", + "type": "string" }, - "search_query": { - "description": "Mandatory search query you want to use to search the internet", - "title": "Search Query", + "file_path": { + "description": "Path to the document to parse", + "title": "File Path", "type": "string" }, - "start_published_date": { + "output_types": { + "default": [ + "markdown-per-page" + ], + "description": "List of output types", + "items": { + "type": "string" + }, + "title": "Output Types", + "type": "array" + }, + "page_range": { "anyOf": [ { "type": "string" @@ -1577,21 +1517,27 @@ } ], "default": null, - "description": "Start date for the search", - "title": "Start Published Date" + "description": "Page range to parse (e.g., '0-5')", + "title": "Page Range" + }, + "parse_mode": { + "default": "standard", + "description": "Parsing mode", + "title": "Parse Mode", + "type": "string" } }, "required": [ - "search_query" + "file_path" ], - "title": "EXABaseToolSchema", + "title": "ContextualAIParseSchema", "type": "object" } }, { - "description": "Compresses a file or directory into an archive (.zip currently supported). Useful for archiving logs, documents, or backups.", + "description": "Use this tool to query a Contextual AI RAG agent with access to your documents", "env_vars": [], - "humanized_name": "File Compressor Tool", + "humanized_name": "Contextual AI Query Tool", "init_params_schema": { "$defs": { "EnvVar": { @@ -1630,27 +1576,36 @@ "type": "object" } }, - "properties": {}, - "title": "FileCompressorTool", - "type": "object" - }, - "name": "FileCompressorTool", - "package_dependencies": [], - "run_params_schema": { - "description": "Input schema for FileCompressorTool.", + "description": "Tool to query Contextual AI RAG agents.", "properties": { - "format": { - "default": "zip", - "description": "Compression format ('zip', 'tar', 'tar.gz', 'tar.bz2', 'tar.xz').", - "title": "Format", + "api_key": { + "title": "Api Key", "type": "string" }, - "input_path": { - "description": "Path to the file or directory to compress.", - "title": "Input Path", + "contextual_client": { + "default": null, + "title": "Contextual Client" + } + }, + "required": [ + "api_key" + ], + "title": "ContextualAIQueryTool", + "type": "object" + }, + "name": "ContextualAIQueryTool", + "package_dependencies": [ + "contextual-client" + ], + "run_params_schema": { + "description": "Schema for contextual query tool.", + "properties": { + "agent_id": { + "description": "ID of the Contextual AI agent to query", + "title": "Agent Id", "type": "string" }, - "output_path": { + "datastore_id": { "anyOf": [ { "type": "string" @@ -1660,27 +1615,27 @@ } ], "default": null, - "description": "Optional output archive filename.", - "title": "Output Path" + "description": "Optional datastore ID for document readiness verification", + "title": "Datastore Id" }, - "overwrite": { - "default": false, - "description": "Whether to overwrite the archive if it already exists.", - "title": "Overwrite", - "type": "boolean" + "query": { + "description": "Query to send to the Contextual AI agent.", + "title": "Query", + "type": "string" } }, "required": [ - "input_path" + "query", + "agent_id" ], - "title": "FileCompressorToolInput", + "title": "ContextualAIQuerySchema", "type": "object" } }, { - "description": "A tool that reads the content of a file. To use this tool, provide a 'file_path' parameter with the path to the file you want to read. Optionally, provide 'start_line' to start reading from a specific line and 'line_count' to limit the number of lines read.", + "description": "Rerank documents using Contextual AI's instruction-following reranker", "env_vars": [], - "humanized_name": "Read a file's content", + "humanized_name": "Contextual AI Document Reranker", "init_params_schema": { "$defs": { "EnvVar": { @@ -1719,72 +1674,87 @@ "type": "object" } }, - "description": "A tool for reading file contents.\n\nThis tool inherits its schema handling from BaseTool to avoid recursive schema\ndefinition issues. The args_schema is set to FileReadToolSchema which defines\nthe required file_path parameter. The schema should not be overridden in the\nconstructor as it would break the inheritance chain and cause infinite loops.\n\nThe tool supports two ways of specifying the file path:\n1. At construction time via the file_path parameter\n2. At runtime via the file_path parameter in the tool's input\n\nArgs:\n file_path (Optional[str]): Path to the file to be read. If provided,\n this becomes the default file path for the tool.\n **kwargs: Additional keyword arguments passed to BaseTool.\n\nExample:\n >>> tool = FileReadTool(file_path=\"/path/to/file.txt\")\n >>> content = tool.run() # Reads /path/to/file.txt\n >>> content = tool.run(file_path=\"/path/to/other.txt\") # Reads other.txt\n >>> content = tool.run(file_path=\"/path/to/file.txt\", start_line=100, line_count=50) # Reads lines 100-149", + "description": "Tool to rerank documents using Contextual AI's instruction-following reranker.", "properties": { - "file_path": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "File Path" + "api_key": { + "title": "Api Key", + "type": "string" } }, - "title": "FileReadTool", + "required": [ + "api_key" + ], + "title": "ContextualAIRerankTool", "type": "object" }, - "name": "FileReadTool", - "package_dependencies": [], + "name": "ContextualAIRerankTool", + "package_dependencies": [ + "contextual-client" + ], "run_params_schema": { - "description": "Input for FileReadTool.", + "description": "Schema for contextual rerank tool.", "properties": { - "file_path": { - "description": "Mandatory file full path to read the file", - "title": "File Path", - "type": "string" + "documents": { + "description": "List of document texts to rerank", + "items": { + "type": "string" + }, + "title": "Documents", + "type": "array" }, - "line_count": { + "instruction": { "anyOf": [ { - "type": "integer" + "type": "string" }, { "type": "null" } ], "default": null, - "description": "Number of lines to read. If None, reads the entire file", - "title": "Line Count" + "description": "Optional instruction for reranking behavior", + "title": "Instruction" }, - "start_line": { + "metadata": { "anyOf": [ { - "type": "integer" + "items": { + "type": "string" + }, + "type": "array" }, { "type": "null" } ], - "default": 1, - "description": "Line number to start reading from (1-indexed)", - "title": "Start Line" + "default": null, + "description": "Optional metadata for each document", + "title": "Metadata" + }, + "model": { + "default": "ctxl-rerank-en-v1-instruct", + "description": "Reranker model to use", + "title": "Model", + "type": "string" + }, + "query": { + "description": "The search query to rerank documents against", + "title": "Query", + "type": "string" } }, "required": [ - "file_path" + "query", + "documents" ], - "title": "FileReadToolSchema", + "title": "ContextualAIRerankSchema", "type": "object" } }, { - "description": "A tool to write content to a specified file. Accepts filename, content, and optionally a directory path and overwrite flag as input.", + "description": "A tool to search the Couchbase database for relevant information on internal documents.", "env_vars": [], - "humanized_name": "File Writer Tool", + "humanized_name": "CouchbaseFTSVectorSearchTool", "init_params_schema": { "$defs": { "EnvVar": { @@ -1823,19 +1793,33 @@ "type": "object" } }, - "properties": {}, - "title": "FileWriterTool", - "type": "object" - }, - "name": "FileWriterTool", - "package_dependencies": [], - "run_params_schema": { + "description": "Tool to search the Couchbase database", "properties": { - "content": { - "title": "Content", - "type": "string" + "bucket_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": [ + null + ], + "title": "Bucket Name" }, - "directory": { + "cluster": { + "anyOf": [ + {}, + { + "type": "null" + } + ], + "default": null, + "title": "Cluster" + }, + "collection_name": { "anyOf": [ { "type": "string" @@ -1844,47 +1828,108 @@ "type": "null" } ], - "default": "./", - "title": "Directory" + "default": [ + null + ], + "title": "Collection Name" }, - "filename": { - "title": "Filename", - "type": "string" + "embedding_key": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": "embedding", + "description": "Name of the field in the search index that stores the vector", + "title": "Embedding Key" }, - "overwrite": { + "index_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": [ + null + ], + "title": "Index Name" + }, + "limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": 3, + "title": "Limit" + }, + "scope_name": { "anyOf": [ { "type": "string" }, + { + "type": "null" + } + ], + "default": [ + null + ], + "title": "Scope Name" + }, + "scoped_index": { + "anyOf": [ { "type": "boolean" + }, + { + "type": "null" } ], - "default": false, - "title": "Overwrite" + "title": "Scoped Index" + } + }, + "title": "CouchbaseFTSVectorSearchTool", + "type": "object" + }, + "name": "CouchbaseFTSVectorSearchTool", + "package_dependencies": [], + "run_params_schema": { + "description": "Input for CouchbaseTool.", + "properties": { + "query": { + "description": "The query to search retrieve relevant information from the Couchbase database. Pass only the query, not the question.", + "title": "Query", + "type": "string" } }, "required": [ - "filename", - "content" + "query" ], - "title": "FileWriterToolInput", + "title": "CouchbaseToolSchema", "type": "object" } }, { - "description": "Crawl webpages using Firecrawl and return the contents", - "env_vars": [ - { - "default": null, - "description": "API key for Firecrawl services", - "name": "FIRECRAWL_API_KEY", - "required": true - } - ], - "humanized_name": "Firecrawl web crawl tool", + "description": "A tool that can be used to semantic search a query from a DOCX's content.", + "env_vars": [], + "humanized_name": "Search a DOCX's content", "init_params_schema": { "$defs": { + "Adapter": { + "properties": {}, + "title": "Adapter", + "type": "object" + }, "EnvVar": { "properties": { "default": { @@ -1921,19 +1966,9 @@ "type": "object" } }, - "description": "Tool for crawling websites using Firecrawl. To run this tool, you need to have a Firecrawl API key.\n\nArgs:\n api_key (str): Your Firecrawl API key.\n config (dict): Optional. It contains Firecrawl API parameters.\n\nDefault configuration options:\n max_depth (int): Maximum depth to crawl. Default: 2\n ignore_sitemap (bool): Whether to ignore sitemap. Default: True\n limit (int): Maximum number of pages to crawl. Default: 100\n allow_backward_links (bool): Allow crawling backward links. Default: False\n allow_external_links (bool): Allow crawling external links. Default: False\n scrape_options (ScrapeOptions): Options for scraping content\n - formats (list[str]): Content formats to return. Default: [\"markdown\", \"screenshot\", \"links\"]\n - only_main_content (bool): Only return main content. Default: True\n - timeout (int): Timeout in milliseconds. Default: 30000", "properties": { - "api_key": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Api Key" + "adapter": { + "$ref": "#/$defs/Adapter" }, "config": { "anyOf": [ @@ -1945,42 +1980,60 @@ "type": "null" } ], + "default": null, "title": "Config" + }, + "summarize": { + "default": false, + "title": "Summarize", + "type": "boolean" } }, - "title": "FirecrawlCrawlWebsiteTool", + "title": "DOCXSearchTool", "type": "object" }, - "name": "FirecrawlCrawlWebsiteTool", - "package_dependencies": [ - "firecrawl-py" - ], + "name": "DOCXSearchTool", + "package_dependencies": [], "run_params_schema": { + "description": "Input for DOCXSearchTool.", "properties": { - "url": { - "description": "Website URL", - "title": "Url", + "docx": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Mandatory docx path you want to search", + "title": "Docx" + }, + "search_query": { + "description": "Mandatory search query you want to use to search the DOCX's content", + "title": "Search Query", "type": "string" } }, "required": [ - "url" + "docx", + "search_query" ], - "title": "FirecrawlCrawlWebsiteToolSchema", + "title": "DOCXSearchToolSchema", "type": "object" } }, { - "description": "Scrape webpages using Firecrawl and return the contents", + "description": "Generates images using OpenAI's Dall-E model.", "env_vars": [ { "default": null, - "description": "API key for Firecrawl services", - "name": "FIRECRAWL_API_KEY", + "description": "API key for OpenAI services", + "name": "OPENAI_API_KEY", "required": true } ], - "humanized_name": "Firecrawl web scrape tool", + "humanized_name": "Dall-E Tool", "init_params_schema": { "$defs": { "EnvVar": { @@ -2019,59 +2072,53 @@ "type": "object" } }, - "description": "Tool for scraping webpages using Firecrawl. To run this tool, you need to have a Firecrawl API key.\n\nArgs:\n api_key (str): Your Firecrawl API key.\n config (dict): Optional. It contains Firecrawl API parameters.\n\nDefault configuration options:\n formats (list[str]): Content formats to return. Default: [\"markdown\"]\n onlyMainContent (bool): Only return main content. Default: True\n includeTags (list[str]): Tags to include. Default: []\n excludeTags (list[str]): Tags to exclude. Default: []\n headers (dict): Headers to include. Default: {}\n waitFor (int): Time to wait for page to load in ms. Default: 0\n json_options (dict): Options for JSON extraction. Default: None", "properties": { - "api_key": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Api Key" + "model": { + "default": "dall-e-3", + "title": "Model", + "type": "string" }, - "config": { - "additionalProperties": true, - "title": "Config", - "type": "object" + "n": { + "default": 1, + "title": "N", + "type": "integer" + }, + "quality": { + "default": "standard", + "title": "Quality", + "type": "string" + }, + "size": { + "default": "1024x1024", + "title": "Size", + "type": "string" } }, - "title": "FirecrawlScrapeWebsiteTool", + "title": "DallETool", "type": "object" }, - "name": "FirecrawlScrapeWebsiteTool", - "package_dependencies": [ - "firecrawl-py" - ], + "name": "DallETool", + "package_dependencies": [], "run_params_schema": { + "description": "Input for Dall-E Tool.", "properties": { - "url": { - "description": "Website URL", - "title": "Url", + "image_description": { + "description": "Description of the image to be generated by Dall-E.", + "title": "Image Description", "type": "string" } }, "required": [ - "url" + "image_description" ], - "title": "FirecrawlScrapeWebsiteToolSchema", + "title": "ImagePromptSchema", "type": "object" } }, { - "description": "Search webpages using Firecrawl and return the results", - "env_vars": [ - { - "default": null, - "description": "API key for Firecrawl services", - "name": "FIRECRAWL_API_KEY", - "required": true - } - ], - "humanized_name": "Firecrawl web search tool", + "description": "Execute SQL queries against Databricks workspace tables and return the results. Provide a 'query' parameter with the SQL query to execute.", + "env_vars": [], + "humanized_name": "Databricks SQL Query", "init_params_schema": { "$defs": { "EnvVar": { @@ -2110,9 +2157,9 @@ "type": "object" } }, - "description": "Tool for searching webpages using Firecrawl. To run this tool, you need to have a Firecrawl API key.\n\nArgs:\n api_key (str): Your Firecrawl API key.\n config (dict): Optional. It contains Firecrawl API parameters.\n\nDefault configuration options:\n limit (int): Maximum number of pages to crawl. Default: 5\n tbs (str): Time before search. Default: None\n lang (str): Language. Default: \"en\"\n country (str): Country. Default: \"us\"\n location (str): Location. Default: None\n timeout (int): Timeout in milliseconds. Default: 60000", + "description": "A tool for querying Databricks workspace tables using SQL.\n\nThis tool executes SQL queries against Databricks tables and returns the results.\nIt requires Databricks authentication credentials to be set as environment variables.\n\nAuthentication can be provided via:\n- Databricks CLI profile: Set DATABRICKS_CONFIG_PROFILE environment variable\n- Direct credentials: Set DATABRICKS_HOST and DATABRICKS_TOKEN environment variables\n\nExample:\n >>> tool = DatabricksQueryTool()\n >>> results = tool.run(query=\"SELECT * FROM my_table LIMIT 10\")", "properties": { - "api_key": { + "default_catalog": { "anyOf": [ { "type": "string" @@ -2122,54 +2169,114 @@ } ], "default": null, - "title": "Api Key" + "title": "Default Catalog" }, - "config": { + "default_schema": { "anyOf": [ { - "additionalProperties": true, - "type": "object" + "type": "string" }, { "type": "null" } ], - "title": "Config" + "default": null, + "title": "Default Schema" + }, + "default_warehouse_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Default Warehouse Id" } }, - "title": "FirecrawlSearchTool", + "title": "DatabricksQueryTool", "type": "object" }, - "name": "FirecrawlSearchTool", + "name": "DatabricksQueryTool", "package_dependencies": [ - "firecrawl-py" + "databricks-sdk" ], "run_params_schema": { + "description": "Input schema for DatabricksQueryTool.", "properties": { + "catalog": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Databricks catalog name (optional, defaults to configured catalog)", + "title": "Catalog" + }, + "db_schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Databricks schema name (optional, defaults to configured schema)", + "title": "Db Schema" + }, "query": { - "description": "Search query", + "description": "SQL query to execute against the Databricks workspace table", "title": "Query", "type": "string" + }, + "row_limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": 1000, + "description": "Maximum number of rows to return (default: 1000)", + "title": "Row Limit" + }, + "warehouse_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Databricks SQL warehouse ID (optional, defaults to configured warehouse)", + "title": "Warehouse Id" } }, "required": [ "query" ], - "title": "FirecrawlSearchToolSchema", + "title": "DatabricksQueryToolSchema", "type": "object" } }, { - "description": "A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities.", + "description": "A tool that can be used to recursively list a directory's content.", "env_vars": [], - "humanized_name": "Search a github repo's content", + "humanized_name": "List files in directory", "init_params_schema": { "$defs": { - "Adapter": { - "properties": {}, - "title": "Adapter", - "type": "object" - }, "EnvVar": { "properties": { "default": { @@ -2207,92 +2314,51 @@ } }, "properties": { - "adapter": { - "$ref": "#/$defs/Adapter" - }, - "config": { + "directory": { "anyOf": [ { - "additionalProperties": true, - "type": "object" + "type": "string" }, { "type": "null" } ], "default": null, - "title": "Config" - }, - "content_types": { - "description": "Content types you want to be included search, options: [code, repo, pr, issue]", - "items": { - "type": "string" - }, - "title": "Content Types", - "type": "array" - }, - "gh_token": { - "title": "Gh Token", - "type": "string" - }, - "summarize": { - "default": false, - "title": "Summarize", - "type": "boolean" + "title": "Directory" } }, - "required": [ - "gh_token" - ], - "title": "GithubSearchTool", + "title": "DirectoryReadTool", "type": "object" }, - "name": "GithubSearchTool", + "name": "DirectoryReadTool", "package_dependencies": [], "run_params_schema": { - "description": "Input for GithubSearchTool.", + "description": "Input for DirectoryReadTool.", "properties": { - "content_types": { - "description": "Mandatory content types you want to be included search, options: [code, repo, pr, issue]", - "items": { - "type": "string" - }, - "title": "Content Types", - "type": "array" - }, - "github_repo": { - "description": "Mandatory github you want to search", - "title": "Github Repo", - "type": "string" - }, - "search_query": { - "description": "Mandatory search query you want to use to search the github repo's content", - "title": "Search Query", + "directory": { + "description": "Mandatory directory to list content", + "title": "Directory", "type": "string" } }, "required": [ - "search_query", - "github_repo", - "content_types" + "directory" ], - "title": "GithubSearchToolSchema", + "title": "DirectoryReadToolSchema", "type": "object" } }, { - "description": "Scrape or crawl a website using Hyperbrowser and return the contents in properly formatted markdown or html", - "env_vars": [ - { - "default": null, - "description": "API key for Hyperbrowser services", - "name": "HYPERBROWSER_API_KEY", - "required": false - } - ], - "humanized_name": "Hyperbrowser web load tool", + "description": "A tool that can be used to semantic search a query from a directory's content.", + "env_vars": [], + "humanized_name": "Search a directory's content", "init_params_schema": { "$defs": { + "Adapter": { + "properties": {}, + "title": "Adapter", + "type": "object" + }, "EnvVar": { "properties": { "default": { @@ -2329,88 +2395,69 @@ "type": "object" } }, - "description": "HyperbrowserLoadTool.\n\nScrape or crawl web pages and load the contents with optional parameters for configuring content extraction.\nRequires the `hyperbrowser` package.\nGet your API Key from https://app.hyperbrowser.ai/\n\nArgs:\n api_key: The Hyperbrowser API key, can be set as an environment variable `HYPERBROWSER_API_KEY` or passed directly", "properties": { - "api_key": { + "adapter": { + "$ref": "#/$defs/Adapter" + }, + "config": { "anyOf": [ { - "type": "string" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], "default": null, - "title": "Api Key" + "title": "Config" }, - "hyperbrowser": { - "anyOf": [ - {}, - { - "type": "null" - } - ], - "default": null, - "title": "Hyperbrowser" + "summarize": { + "default": false, + "title": "Summarize", + "type": "boolean" } }, - "title": "HyperbrowserLoadTool", + "title": "DirectorySearchTool", "type": "object" }, - "name": "HyperbrowserLoadTool", - "package_dependencies": [ - "hyperbrowser" - ], + "name": "DirectorySearchTool", + "package_dependencies": [], "run_params_schema": { + "description": "Input for DirectorySearchTool.", "properties": { - "operation": { - "description": "Operation to perform on the website. Either 'scrape' or 'crawl'", - "enum": [ - "scrape", - "crawl" - ], - "title": "Operation", + "directory": { + "description": "Mandatory directory you want to search", + "title": "Directory", "type": "string" }, - "params": { - "anyOf": [ - { - "additionalProperties": true, - "type": "object" - }, - { - "type": "null" - } - ], - "description": "Optional params for scrape or crawl. For more information on the supported params, visit https://docs.hyperbrowser.ai/reference/sdks/python/scrape#start-scrape-job-and-wait or https://docs.hyperbrowser.ai/reference/sdks/python/crawl#start-crawl-job-and-wait", - "title": "Params" - }, - "url": { - "description": "Website URL", - "title": "Url", + "search_query": { + "description": "Mandatory search query you want to use to search the directory's content", + "title": "Search Query", "type": "string" } }, "required": [ - "url", - "operation", - "params" + "search_query", + "directory" ], - "title": "HyperbrowserLoadToolSchema", + "title": "DirectorySearchToolSchema", "type": "object" } }, { - "description": "A tool that can be used to semantic search a query from a JSON's content.", - "env_vars": [], - "humanized_name": "Search a JSON's content", + "description": "Search the internet using Exa", + "env_vars": [ + { + "default": null, + "description": "API key for Exa services", + "name": "EXA_API_KEY", + "required": false + } + ], + "humanized_name": "EXASearchTool", "init_params_schema": { "$defs": { - "Adapter": { - "properties": {}, - "title": "Adapter", - "type": "object" - }, "EnvVar": { "properties": { "default": { @@ -2448,66 +2495,134 @@ } }, "properties": { - "adapter": { - "$ref": "#/$defs/Adapter" - }, - "config": { + "api_key": { "anyOf": [ { - "additionalProperties": true, - "type": "object" + "type": "string" }, { "type": "null" } ], + "description": "API key for Exa services", + "required": false, + "title": "Api Key" + }, + "client": { + "anyOf": [ + {}, + { + "type": "null" + } + ], "default": null, - "title": "Config" + "title": "Client" }, - "summarize": { + "content": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], "default": false, - "title": "Summarize", - "type": "boolean" + "title": "Content" + }, + "summary": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": false, + "title": "Summary" + }, + "type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": "auto", + "title": "Type" } }, - "title": "JSONSearchTool", + "title": "EXASearchTool", "type": "object" }, - "name": "JSONSearchTool", - "package_dependencies": [], + "name": "EXASearchTool", + "package_dependencies": [ + "exa_py" + ], "run_params_schema": { - "description": "Input for JSONSearchTool.", "properties": { - "json_path": { - "description": "Mandatory json path you want to search", - "title": "Json Path", - "type": "string" + "end_published_date": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "End date for the search", + "title": "End Published Date" + }, + "include_domains": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "List of domains to include in the search", + "title": "Include Domains" }, "search_query": { - "description": "Mandatory search query you want to use to search the JSON's content", + "description": "Mandatory search query you want to use to search the internet", "title": "Search Query", "type": "string" + }, + "start_published_date": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Start date for the search", + "title": "Start Published Date" } }, "required": [ - "search_query", - "json_path" + "search_query" ], - "title": "JSONSearchToolSchema", + "title": "EXABaseToolSchema", "type": "object" } }, { - "description": "Performs an API call to Linkup to retrieve contextual information.", - "env_vars": [ - { - "default": null, - "description": "API key for Linkup", - "name": "LINKUP_API_KEY", - "required": true - } - ], - "humanized_name": "Linkup Search Tool", + "description": "Compresses a file or directory into an archive (.zip currently supported). Useful for archiving logs, documents, or backups.", + "env_vars": [], + "humanized_name": "File Compressor Tool", "init_params_schema": { "$defs": { "EnvVar": { @@ -2547,19 +2662,56 @@ } }, "properties": {}, - "title": "LinkupSearchTool", + "title": "FileCompressorTool", "type": "object" }, - "name": "LinkupSearchTool", - "package_dependencies": [ - "linkup-sdk" - ], - "run_params_schema": {} - }, - { - "description": "", - "env_vars": [], - "humanized_name": "LlamaIndexTool", + "name": "FileCompressorTool", + "package_dependencies": [], + "run_params_schema": { + "description": "Input schema for FileCompressorTool.", + "properties": { + "format": { + "default": "zip", + "description": "Compression format ('zip', 'tar', 'tar.gz', 'tar.bz2', 'tar.xz').", + "title": "Format", + "type": "string" + }, + "input_path": { + "description": "Path to the file or directory to compress.", + "title": "Input Path", + "type": "string" + }, + "output_path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Optional output archive filename.", + "title": "Output Path" + }, + "overwrite": { + "default": false, + "description": "Whether to overwrite the archive if it already exists.", + "title": "Overwrite", + "type": "boolean" + } + }, + "required": [ + "input_path" + ], + "title": "FileCompressorToolInput", + "type": "object" + } + }, + { + "description": "A tool that reads the content of a file. To use this tool, provide a 'file_path' parameter with the path to the file you want to read. Optionally, provide 'start_line' to start reading from a specific line and 'line_count' to limit the number of lines read.", + "env_vars": [], + "humanized_name": "Read a file's content", "init_params_schema": { "$defs": { "EnvVar": { @@ -2598,35 +2750,74 @@ "type": "object" } }, - "description": "Tool to wrap LlamaIndex tools/query engines.", + "description": "A tool for reading file contents.\n\nThis tool inherits its schema handling from BaseTool to avoid recursive schema\ndefinition issues. The args_schema is set to FileReadToolSchema which defines\nthe required file_path parameter. The schema should not be overridden in the\nconstructor as it would break the inheritance chain and cause infinite loops.\n\nThe tool supports two ways of specifying the file path:\n1. At construction time via the file_path parameter\n2. At runtime via the file_path parameter in the tool's input\n\nArgs:\n file_path (Optional[str]): Path to the file to be read. If provided,\n this becomes the default file path for the tool.\n **kwargs: Additional keyword arguments passed to BaseTool.\n\nExample:\n >>> tool = FileReadTool(file_path=\"/path/to/file.txt\")\n >>> content = tool.run() # Reads /path/to/file.txt\n >>> content = tool.run(file_path=\"/path/to/other.txt\") # Reads other.txt\n >>> content = tool.run(file_path=\"/path/to/file.txt\", start_line=100, line_count=50) # Reads lines 100-149", "properties": { - "llama_index_tool": { - "title": "Llama Index Tool" + "file_path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "File Path" } }, - "required": [ - "name", - "description", - "llama_index_tool" - ], - "title": "LlamaIndexTool", + "title": "FileReadTool", "type": "object" }, - "name": "LlamaIndexTool", + "name": "FileReadTool", "package_dependencies": [], - "run_params_schema": {} + "run_params_schema": { + "description": "Input for FileReadTool.", + "properties": { + "file_path": { + "description": "Mandatory file full path to read the file", + "title": "File Path", + "type": "string" + }, + "line_count": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Number of lines to read. If None, reads the entire file", + "title": "Line Count" + }, + "start_line": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": 1, + "description": "Line number to start reading from (1-indexed)", + "title": "Start Line" + } + }, + "required": [ + "file_path" + ], + "title": "FileReadToolSchema", + "type": "object" + } }, { - "description": "A tool that can be used to semantic search a query from a MDX's content.", + "description": "A tool to write content to a specified file. Accepts filename, content, and optionally a directory path and overwrite flag as input.", "env_vars": [], - "humanized_name": "Search a MDX's content", + "humanized_name": "File Writer Tool", "init_params_schema": { "$defs": { - "Adapter": { - "properties": {}, - "title": "Adapter", - "type": "object" - }, "EnvVar": { "properties": { "default": { @@ -2663,73 +2854,66 @@ "type": "object" } }, + "properties": {}, + "title": "FileWriterTool", + "type": "object" + }, + "name": "FileWriterTool", + "package_dependencies": [], + "run_params_schema": { "properties": { - "adapter": { - "$ref": "#/$defs/Adapter" + "content": { + "title": "Content", + "type": "string" }, - "config": { + "directory": { "anyOf": [ { - "additionalProperties": true, - "type": "object" + "type": "string" }, { "type": "null" } ], - "default": null, - "title": "Config" + "default": "./", + "title": "Directory" }, - "summarize": { - "default": false, - "title": "Summarize", - "type": "boolean" - } - }, - "title": "MDXSearchTool", - "type": "object" - }, - "name": "MDXSearchTool", - "package_dependencies": [], - "run_params_schema": { - "description": "Input for MDXSearchTool.", - "properties": { - "mdx": { - "description": "Mandatory mdx path you want to search", - "title": "Mdx", + "filename": { + "title": "Filename", "type": "string" }, - "search_query": { - "description": "Mandatory search query you want to use to search the MDX's content", - "title": "Search Query", - "type": "string" + "overwrite": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "boolean" + } + ], + "default": false, + "title": "Overwrite" } }, "required": [ - "search_query", - "mdx" + "filename", + "content" ], - "title": "MDXSearchToolSchema", + "title": "FileWriterToolInput", "type": "object" } }, { - "description": "A tool to perfrom a vector search on a MongoDB database for relevant information on internal documents.", + "description": "Crawl webpages using Firecrawl and return the contents", "env_vars": [ { "default": null, - "description": "API key for Browserbase services", - "name": "BROWSERBASE_API_KEY", - "required": false - }, - { - "default": null, - "description": "Project ID for Browserbase services", - "name": "BROWSERBASE_PROJECT_ID", - "required": false + "description": "API key for Firecrawl services", + "name": "FIRECRAWL_API_KEY", + "required": true } ], - "humanized_name": "MongoDBVectorSearchTool", + "humanized_name": "Firecrawl web crawl tool", "init_params_schema": { "$defs": { "EnvVar": { @@ -2766,226 +2950,68 @@ ], "title": "EnvVar", "type": "object" - }, - "MongoDBVectorSearchConfig": { - "description": "Configuration for MongoDB vector search queries.", - "properties": { - "include_embeddings": { - "default": false, - "description": "Whether to include the embedding vector of each result in metadata.", - "title": "Include Embeddings", - "type": "boolean" - }, - "limit": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": 4, - "description": "number of documents to return.", - "title": "Limit" + } + }, + "description": "Tool for crawling websites using Firecrawl. To run this tool, you need to have a Firecrawl API key.\n\nArgs:\n api_key (str): Your Firecrawl API key.\n config (dict): Optional. It contains Firecrawl API parameters.\n\nDefault configuration options:\n max_depth (int): Maximum depth to crawl. Default: 2\n ignore_sitemap (bool): Whether to ignore sitemap. Default: True\n limit (int): Maximum number of pages to crawl. Default: 100\n allow_backward_links (bool): Allow crawling backward links. Default: False\n allow_external_links (bool): Allow crawling external links. Default: False\n scrape_options (ScrapeOptions): Options for scraping content\n - formats (list[str]): Content formats to return. Default: [\"markdown\", \"screenshot\", \"links\"]\n - only_main_content (bool): Only return main content. Default: True\n - timeout (int): Timeout in milliseconds. Default: 30000", + "properties": { + "api_key": { + "anyOf": [ + { + "type": "string" }, - "oversampling_factor": { - "default": 10, - "description": "Multiple of limit used when generating number of candidates at each step in the HNSW Vector Search", - "title": "Oversampling Factor", - "type": "integer" - }, - "post_filter_pipeline": { - "anyOf": [ - { - "items": { - "additionalProperties": true, - "type": "object" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Pipeline of MongoDB aggregation stages to filter/process results after $vectorSearch.", - "title": "Post Filter Pipeline" - }, - "pre_filter": { - "anyOf": [ - { - "additionalProperties": true, - "type": "object" - }, - { - "type": "null" - } - ], - "default": null, - "description": "List of MQL match expressions comparing an indexed field", - "title": "Pre Filter" - } - }, - "title": "MongoDBVectorSearchConfig", - "type": "object" - } - }, - "description": "Tool to perfrom a vector search the MongoDB database", - "properties": { - "collection_name": { - "description": "The name of the MongoDB collection", - "title": "Collection Name", - "type": "string" - }, - "connection_string": { - "description": "The connection string of the MongoDB cluster", - "title": "Connection String", - "type": "string" - }, - "database_name": { - "description": "The name of the MongoDB database", - "title": "Database Name", - "type": "string" - }, - "dimensions": { - "default": 1536, - "description": "Number of dimensions in the embedding vector", - "title": "Dimensions", - "type": "integer" - }, - "embedding_key": { - "default": "embedding", - "description": "Field that will contain the embedding for each document", - "title": "Embedding Key", - "type": "string" - }, - "embedding_model": { - "default": "text-embedding-3-large", - "description": "Text OpenAI embedding model to use", - "title": "Embedding Model", - "type": "string" - }, - "query_config": { - "anyOf": [ - { - "$ref": "#/$defs/MongoDBVectorSearchConfig" + { + "type": "null" + } + ], + "default": null, + "title": "Api Key" + }, + "config": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], - "default": null, - "description": "MongoDB Vector Search query configuration" - }, - "text_key": { - "default": "text", - "description": "MongoDB field that will contain the text for each document", - "title": "Text Key", - "type": "string" - }, - "vector_index_name": { - "default": "vector_index", - "description": "Name of the Atlas Search vector index", - "title": "Vector Index Name", - "type": "string" + "title": "Config" } }, - "required": [ - "database_name", - "collection_name", - "connection_string" - ], - "title": "MongoDBVectorSearchTool", + "title": "FirecrawlCrawlWebsiteTool", "type": "object" }, - "name": "MongoDBVectorSearchTool", + "name": "FirecrawlCrawlWebsiteTool", "package_dependencies": [ - "mongdb" + "firecrawl-py" ], "run_params_schema": { - "description": "Input for MongoDBTool.", "properties": { - "include_embeddings": { - "default": false, - "description": "Whether to include the embedding vector of each result in metadata.", - "title": "Include Embeddings", - "type": "boolean" - }, - "limit": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": 4, - "description": "number of documents to return.", - "title": "Limit" - }, - "oversampling_factor": { - "default": 10, - "description": "Multiple of limit used when generating number of candidates at each step in the HNSW Vector Search", - "title": "Oversampling Factor", - "type": "integer" - }, - "post_filter_pipeline": { - "anyOf": [ - { - "items": { - "additionalProperties": true, - "type": "object" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Pipeline of MongoDB aggregation stages to filter/process results after $vectorSearch.", - "title": "Post Filter Pipeline" - }, - "pre_filter": { - "anyOf": [ - { - "additionalProperties": true, - "type": "object" - }, - { - "type": "null" - } - ], - "default": null, - "description": "List of MQL match expressions comparing an indexed field", - "title": "Pre Filter" - }, - "query": { - "description": "The query to search retrieve relevant information from the MongoDB database. Pass only the query, not the question.", - "title": "Query", + "url": { + "description": "Website URL", + "title": "Url", "type": "string" } }, "required": [ - "query" + "url" ], - "title": "MongoDBToolSchema", + "title": "FirecrawlCrawlWebsiteToolSchema", "type": "object" } }, { - "description": "Multion gives the ability for LLMs to control web browsers using natural language instructions.\n If the status is 'CONTINUE', reissue the same instruction to continue execution", + "description": "Scrape webpages using Firecrawl and return the contents", "env_vars": [ { "default": null, - "description": "API key for Multion", - "name": "MULTION_API_KEY", + "description": "API key for Firecrawl services", + "name": "FIRECRAWL_API_KEY", "required": true } ], - "humanized_name": "Multion Browse Tool", + "humanized_name": "Firecrawl web scrape tool", "init_params_schema": { "$defs": { "EnvVar": { @@ -3024,29 +3050,9 @@ "type": "object" } }, - "description": "Tool to wrap MultiOn Browse Capabilities.", + "description": "Tool for scraping webpages using Firecrawl. To run this tool, you need to have a Firecrawl API key.\n\nArgs:\n api_key (str): Your Firecrawl API key.\n config (dict): Optional. It contains Firecrawl API parameters.\n\nDefault configuration options:\n formats (list[str]): Content formats to return. Default: [\"markdown\"]\n onlyMainContent (bool): Only return main content. Default: True\n includeTags (list[str]): Tags to include. Default: []\n excludeTags (list[str]): Tags to exclude. Default: []\n headers (dict): Headers to include. Default: {}\n waitFor (int): Time to wait for page to load in ms. Default: 0\n json_options (dict): Options for JSON extraction. Default: None", "properties": { - "local": { - "default": false, - "title": "Local", - "type": "boolean" - }, - "max_steps": { - "default": 3, - "title": "Max Steps", - "type": "integer" - }, - "multion": { - "anyOf": [ - {}, - { - "type": "null" - } - ], - "default": null, - "title": "Multion" - }, - "session_id": { + "api_key": { "anyOf": [ { "type": "string" @@ -3056,29 +3062,49 @@ } ], "default": null, - "title": "Session Id" + "title": "Api Key" + }, + "config": { + "additionalProperties": true, + "title": "Config", + "type": "object" } }, - "title": "MultiOnTool", + "title": "FirecrawlScrapeWebsiteTool", "type": "object" }, - "name": "MultiOnTool", + "name": "FirecrawlScrapeWebsiteTool", "package_dependencies": [ - "multion" + "firecrawl-py" ], - "run_params_schema": {} + "run_params_schema": { + "properties": { + "url": { + "description": "Website URL", + "title": "Url", + "type": "string" + } + }, + "required": [ + "url" + ], + "title": "FirecrawlScrapeWebsiteToolSchema", + "type": "object" + } }, { - "description": "A tool that can be used to semantic search a query from a database table's content.", - "env_vars": [], - "humanized_name": "Search a database's table content", + "description": "Search webpages using Firecrawl and return the results", + "env_vars": [ + { + "default": null, + "description": "API key for Firecrawl services", + "name": "FIRECRAWL_API_KEY", + "required": true + } + ], + "humanized_name": "Firecrawl web search tool", "init_params_schema": { "$defs": { - "Adapter": { - "properties": {}, - "title": "Adapter", - "type": "object" - }, "EnvVar": { "properties": { "default": { @@ -3115,9 +3141,19 @@ "type": "object" } }, + "description": "Tool for searching webpages using Firecrawl. To run this tool, you need to have a Firecrawl API key.\n\nArgs:\n api_key (str): Your Firecrawl API key.\n config (dict): Optional. It contains Firecrawl API parameters.\n\nDefault configuration options:\n limit (int): Maximum number of pages to crawl. Default: 5\n tbs (str): Time before search. Default: None\n lang (str): Language. Default: \"en\"\n country (str): Country. Default: \"us\"\n location (str): Location. Default: None\n timeout (int): Timeout in milliseconds. Default: 60000", "properties": { - "adapter": { - "$ref": "#/$defs/Adapter" + "api_key": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Api Key" }, "config": { "anyOf": [ @@ -3129,50 +3165,42 @@ "type": "null" } ], - "default": null, "title": "Config" - }, - "db_uri": { - "description": "Mandatory database URI", - "title": "Db Uri", - "type": "string" - }, - "summarize": { - "default": false, - "title": "Summarize", - "type": "boolean" } }, - "required": [ - "db_uri" - ], - "title": "MySQLSearchTool", + "title": "FirecrawlSearchTool", "type": "object" }, - "name": "MySQLSearchTool", - "package_dependencies": [], + "name": "FirecrawlSearchTool", + "package_dependencies": [ + "firecrawl-py" + ], "run_params_schema": { - "description": "Input for MySQLSearchTool.", "properties": { - "search_query": { - "description": "Mandatory semantic search query you want to use to search the database's content", - "title": "Search Query", + "query": { + "description": "Search query", + "title": "Query", "type": "string" } }, "required": [ - "search_query" + "query" ], - "title": "MySQLSearchToolSchema", + "title": "FirecrawlSearchToolSchema", "type": "object" } }, { - "description": "Converts natural language to SQL queries and executes them.", + "description": "A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities.", "env_vars": [], - "humanized_name": "NL2SQLTool", + "humanized_name": "Search a github repo's content", "init_params_schema": { "$defs": { + "Adapter": { + "properties": {}, + "title": "Adapter", + "type": "object" + }, "EnvVar": { "properties": { "default": { @@ -3210,64 +3238,90 @@ } }, "properties": { - "columns": { - "additionalProperties": true, - "default": {}, - "title": "Columns", - "type": "object" + "adapter": { + "$ref": "#/$defs/Adapter" }, - "db_uri": { - "description": "The URI of the database to connect to.", - "title": "Database URI", - "type": "string" + "config": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Config" }, - "tables": { - "default": [], - "items": {}, - "title": "Tables", + "content_types": { + "description": "Content types you want to be included search, options: [code, repo, pr, issue]", + "items": { + "type": "string" + }, + "title": "Content Types", "type": "array" + }, + "gh_token": { + "title": "Gh Token", + "type": "string" + }, + "summarize": { + "default": false, + "title": "Summarize", + "type": "boolean" } }, "required": [ - "db_uri" + "gh_token" ], - "title": "NL2SQLTool", + "title": "GithubSearchTool", "type": "object" }, - "name": "NL2SQLTool", + "name": "GithubSearchTool", "package_dependencies": [], "run_params_schema": { + "description": "Input for GithubSearchTool.", "properties": { - "sql_query": { - "description": "The SQL query to execute.", - "title": "SQL Query", + "content_types": { + "description": "Mandatory content types you want to be included search, options: [code, repo, pr, issue]", + "items": { + "type": "string" + }, + "title": "Content Types", + "type": "array" + }, + "github_repo": { + "description": "Mandatory github you want to search", + "title": "Github Repo", + "type": "string" + }, + "search_query": { + "description": "Mandatory search query you want to use to search the github repo's content", + "title": "Search Query", "type": "string" } }, "required": [ - "sql_query" + "search_query", + "github_repo", + "content_types" ], - "title": "NL2SQLToolInput", + "title": "GithubSearchToolSchema", "type": "object" } }, { - "description": "Scrape Amazon product pages with Oxylabs Amazon Product Scraper", + "description": "Scrape or crawl a website using Hyperbrowser and return the contents in properly formatted markdown or html", "env_vars": [ { "default": null, - "description": "Username for Oxylabs", - "name": "OXYLABS_USERNAME", - "required": true - }, - { - "default": null, - "description": "Password for Oxylabs", - "name": "OXYLABS_PASSWORD", - "required": true + "description": "API key for Hyperbrowser services", + "name": "HYPERBROWSER_API_KEY", + "required": false } ], - "humanized_name": "Oxylabs Amazon Product Scraper tool", + "humanized_name": "Hyperbrowser web load tool", "init_params_schema": { "$defs": { "EnvVar": { @@ -3304,38 +3358,88 @@ ], "title": "EnvVar", "type": "object" - }, - "OxylabsAmazonProductScraperConfig": { - "description": "Amazon Product Scraper configuration options:\nhttps://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/amazon/product", - "properties": { - "callback_url": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "URL to your callback endpoint.", - "title": "Callback Url" + } + }, + "description": "HyperbrowserLoadTool.\n\nScrape or crawl web pages and load the contents with optional parameters for configuring content extraction.\nRequires the `hyperbrowser` package.\nGet your API Key from https://app.hyperbrowser.ai/\n\nArgs:\n api_key: The Hyperbrowser API key, can be set as an environment variable `HYPERBROWSER_API_KEY` or passed directly", + "properties": { + "api_key": { + "anyOf": [ + { + "type": "string" }, - "context": { - "anyOf": [ - { - "items": {}, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Additional advanced settings and controls for specialized requirements.", - "title": "Context" + { + "type": "null" + } + ], + "default": null, + "title": "Api Key" + }, + "hyperbrowser": { + "anyOf": [ + {}, + { + "type": "null" + } + ], + "default": null, + "title": "Hyperbrowser" + } + }, + "title": "HyperbrowserLoadTool", + "type": "object" + }, + "name": "HyperbrowserLoadTool", + "package_dependencies": [ + "hyperbrowser" + ], + "run_params_schema": { + "properties": { + "operation": { + "description": "Operation to perform on the website. Either 'scrape' or 'crawl'", + "enum": [ + "scrape", + "crawl" + ], + "title": "Operation", + "type": "string" + }, + "params": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" }, - "domain": { + { + "type": "null" + } + ], + "description": "Optional params for scrape or crawl. For more information on the supported params, visit https://docs.hyperbrowser.ai/reference/sdks/python/scrape#start-scrape-job-and-wait or https://docs.hyperbrowser.ai/reference/sdks/python/crawl#start-crawl-job-and-wait", + "title": "Params" + }, + "url": { + "description": "Website URL", + "title": "Url", + "type": "string" + } + }, + "required": [ + "url", + "operation", + "params" + ], + "title": "HyperbrowserLoadToolSchema", + "type": "object" + } + }, + { + "description": "Invokes an CrewAI Platform Automation using API", + "env_vars": [], + "humanized_name": "invoke_amp_automation", + "init_params_schema": { + "$defs": { + "EnvVar": { + "properties": { + "default": { "anyOf": [ { "type": "string" @@ -3345,50 +3449,85 @@ } ], "default": null, - "description": "The domain to limit the search results to.", - "title": "Domain" + "title": "Default" }, - "geo_location": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "The Deliver to location.", - "title": "Geo Location" + "description": { + "title": "Description", + "type": "string" }, - "parse": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "default": null, - "description": "True will return structured data.", - "title": "Parse" + "name": { + "title": "Name", + "type": "string" }, - "parsing_instructions": { - "anyOf": [ - { - "additionalProperties": true, - "type": "object" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Instructions for parsing the results.", - "title": "Parsing Instructions" - }, - "render": { + "required": { + "default": true, + "title": "Required", + "type": "boolean" + } + }, + "required": [ + "name", + "description" + ], + "title": "EnvVar", + "type": "object" + } + }, + "description": "A CrewAI tool for invoking external crew/flows APIs.\n\nThis tool provides CrewAI Platform API integration with external crew services, supporting:\n- Dynamic input schema configuration\n- Automatic polling for task completion\n- Bearer token authentication\n- Comprehensive error handling\n\nExample:\n Basic usage:\n >>> tool = InvokeCrewAIAutomationTool(\n ... crew_api_url=\"https://api.example.com\",\n ... crew_bearer_token=\"your_token\",\n ... crew_name=\"My Crew\",\n ... crew_description=\"Description of what the crew does\"\n ... )\n \n With custom inputs:\n >>> custom_inputs = {\n ... \"param1\": Field(..., description=\"Description of param1\"),\n ... \"param2\": Field(default=\"default_value\", description=\"Description of param2\")\n ... }\n >>> tool = InvokeCrewAIAutomationTool(\n ... crew_api_url=\"https://api.example.com\",\n ... crew_bearer_token=\"your_token\",\n ... crew_name=\"My Crew\",\n ... crew_description=\"Description of what the crew does\",\n ... crew_inputs=custom_inputs\n ... )\n \n Example:\n >>> tools=[\n ... InvokeCrewAIAutomationTool(\n ... crew_api_url=\"https://canary-crew-[...].crewai.com\",\n ... crew_bearer_token=\"[Your token: abcdef012345]\",\n ... crew_name=\"State of AI Report\",\n ... crew_description=\"Retrieves a report on state of AI for a given year.\",\n ... crew_inputs={\n ... \"year\": Field(..., description=\"Year to retrieve the report for (integer)\")\n ... }\n ... )\n ... ]", + "properties": { + "crew_api_url": { + "title": "Crew Api Url", + "type": "string" + }, + "crew_bearer_token": { + "title": "Crew Bearer Token", + "type": "string" + }, + "max_polling_time": { + "default": 600, + "title": "Max Polling Time", + "type": "integer" + } + }, + "required": [ + "crew_api_url", + "crew_bearer_token" + ], + "title": "InvokeCrewAIAutomationTool", + "type": "object" + }, + "name": "InvokeCrewAIAutomationTool", + "package_dependencies": [], + "run_params_schema": { + "description": "Input schema for InvokeCrewAIAutomationTool.", + "properties": { + "prompt": { + "description": "The prompt or query to send to the crew", + "title": "Prompt", + "type": "string" + } + }, + "required": [ + "prompt" + ], + "title": "InvokeCrewAIAutomationInput", + "type": "object" + } + }, + { + "description": "A tool that can be used to semantic search a query from a JSON's content.", + "env_vars": [], + "humanized_name": "Search a JSON's content", + "init_params_schema": { + "$defs": { + "Adapter": { + "properties": {}, + "title": "Adapter", + "type": "object" + }, + "EnvVar": { + "properties": { + "default": { "anyOf": [ { "type": "string" @@ -3398,75 +3537,91 @@ } ], "default": null, - "description": "Enables JavaScript rendering.", - "title": "Render" + "title": "Default" }, - "user_agent_type": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Device type and browser.", - "title": "User Agent Type" + "description": { + "title": "Description", + "type": "string" + }, + "name": { + "title": "Name", + "type": "string" + }, + "required": { + "default": true, + "title": "Required", + "type": "boolean" } }, - "title": "OxylabsAmazonProductScraperConfig", + "required": [ + "name", + "description" + ], + "title": "EnvVar", "type": "object" } }, - "description": "Scrape Amazon product pages with OxylabsAmazonProductScraperTool.\n\nGet Oxylabs account:\nhttps://dashboard.oxylabs.io/en\n\nArgs:\n username (str): Oxylabs username.\n password (str): Oxylabs password.\n config: Configuration options. See ``OxylabsAmazonProductScraperConfig``", "properties": { + "adapter": { + "$ref": "#/$defs/Adapter" + }, "config": { - "$ref": "#/$defs/OxylabsAmazonProductScraperConfig" + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Config" + }, + "summarize": { + "default": false, + "title": "Summarize", + "type": "boolean" } }, - "required": [ - "config" - ], - "title": "OxylabsAmazonProductScraperTool", + "title": "JSONSearchTool", "type": "object" }, - "name": "OxylabsAmazonProductScraperTool", - "package_dependencies": [ - "oxylabs" - ], + "name": "JSONSearchTool", + "package_dependencies": [], "run_params_schema": { + "description": "Input for JSONSearchTool.", "properties": { - "query": { - "description": "Amazon product ASIN", - "title": "Query", + "json_path": { + "description": "Mandatory json path you want to search", + "title": "Json Path", + "type": "string" + }, + "search_query": { + "description": "Mandatory search query you want to use to search the JSON's content", + "title": "Search Query", "type": "string" } }, "required": [ - "query" + "search_query", + "json_path" ], - "title": "OxylabsAmazonProductScraperArgs", + "title": "JSONSearchToolSchema", "type": "object" } }, { - "description": "Scrape Amazon search results with Oxylabs Amazon Search Scraper", + "description": "Performs an API call to Linkup to retrieve contextual information.", "env_vars": [ { "default": null, - "description": "Username for Oxylabs", - "name": "OXYLABS_USERNAME", - "required": true - }, - { - "default": null, - "description": "Password for Oxylabs", - "name": "OXYLABS_PASSWORD", + "description": "API key for Linkup", + "name": "LINKUP_API_KEY", "required": true } ], - "humanized_name": "Oxylabs Amazon Search Scraper tool", + "humanized_name": "Linkup Search Tool", "init_params_schema": { "$defs": { "EnvVar": { @@ -3503,11 +3658,27 @@ ], "title": "EnvVar", "type": "object" - }, - "OxylabsAmazonSearchScraperConfig": { - "description": "Amazon Search Scraper configuration options:\nhttps://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/amazon/search", + } + }, + "properties": {}, + "title": "LinkupSearchTool", + "type": "object" + }, + "name": "LinkupSearchTool", + "package_dependencies": [ + "linkup-sdk" + ], + "run_params_schema": {} + }, + { + "description": "", + "env_vars": [], + "humanized_name": "LlamaIndexTool", + "init_params_schema": { + "$defs": { + "EnvVar": { "properties": { - "callback_url": { + "default": { "anyOf": [ { "type": "string" @@ -3517,24 +3688,62 @@ } ], "default": null, - "description": "URL to your callback endpoint.", - "title": "Callback Url" + "title": "Default" }, - "context": { - "anyOf": [ - { - "items": {}, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Additional advanced settings and controls for specialized requirements.", - "title": "Context" + "description": { + "title": "Description", + "type": "string" }, - "domain": { + "name": { + "title": "Name", + "type": "string" + }, + "required": { + "default": true, + "title": "Required", + "type": "boolean" + } + }, + "required": [ + "name", + "description" + ], + "title": "EnvVar", + "type": "object" + } + }, + "description": "Tool to wrap LlamaIndex tools/query engines.", + "properties": { + "llama_index_tool": { + "title": "Llama Index Tool" + } + }, + "required": [ + "name", + "description", + "llama_index_tool" + ], + "title": "LlamaIndexTool", + "type": "object" + }, + "name": "LlamaIndexTool", + "package_dependencies": [], + "run_params_schema": {} + }, + { + "description": "A tool that can be used to semantic search a query from a MDX's content.", + "env_vars": [], + "humanized_name": "Search a MDX's content", + "init_params_schema": { + "$defs": { + "Adapter": { + "properties": {}, + "title": "Adapter", + "type": "object" + }, + "EnvVar": { + "properties": { + "default": { "anyOf": [ { "type": "string" @@ -3544,154 +3753,97 @@ } ], "default": null, - "description": "The domain to limit the search results to.", - "title": "Domain" - }, - "geo_location": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "The Deliver to location.", - "title": "Geo Location" - }, - "pages": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": null, - "description": "The number of pages to scrape.", - "title": "Pages" - }, - "parse": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "default": null, - "description": "True will return structured data.", - "title": "Parse" - }, - "parsing_instructions": { - "anyOf": [ - { - "additionalProperties": true, - "type": "object" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Instructions for parsing the results.", - "title": "Parsing Instructions" + "title": "Default" }, - "render": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Enables JavaScript rendering.", - "title": "Render" + "description": { + "title": "Description", + "type": "string" }, - "start_page": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": null, - "description": "The starting page number.", - "title": "Start Page" + "name": { + "title": "Name", + "type": "string" }, - "user_agent_type": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Device type and browser.", - "title": "User Agent Type" + "required": { + "default": true, + "title": "Required", + "type": "boolean" } }, - "title": "OxylabsAmazonSearchScraperConfig", + "required": [ + "name", + "description" + ], + "title": "EnvVar", "type": "object" } }, - "description": "Scrape Amazon search results with OxylabsAmazonSearchScraperTool.\n\nGet Oxylabs account:\nhttps://dashboard.oxylabs.io/en\n\nArgs:\n username (str): Oxylabs username.\n password (str): Oxylabs password.\n config: Configuration options. See ``OxylabsAmazonSearchScraperConfig``", "properties": { + "adapter": { + "$ref": "#/$defs/Adapter" + }, "config": { - "$ref": "#/$defs/OxylabsAmazonSearchScraperConfig" + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Config" + }, + "summarize": { + "default": false, + "title": "Summarize", + "type": "boolean" } }, - "required": [ - "config" - ], - "title": "OxylabsAmazonSearchScraperTool", + "title": "MDXSearchTool", "type": "object" }, - "name": "OxylabsAmazonSearchScraperTool", - "package_dependencies": [ - "oxylabs" - ], + "name": "MDXSearchTool", + "package_dependencies": [], "run_params_schema": { + "description": "Input for MDXSearchTool.", "properties": { - "query": { - "description": "Amazon search term", - "title": "Query", + "mdx": { + "description": "Mandatory mdx path you want to search", + "title": "Mdx", + "type": "string" + }, + "search_query": { + "description": "Mandatory search query you want to use to search the MDX's content", + "title": "Search Query", "type": "string" } }, "required": [ - "query" + "search_query", + "mdx" ], - "title": "OxylabsAmazonSearchScraperArgs", + "title": "MDXSearchToolSchema", "type": "object" } }, { - "description": "Scrape Google Search results with Oxylabs Google Search Scraper", + "description": "A tool to perfrom a vector search on a MongoDB database for relevant information on internal documents.", "env_vars": [ { "default": null, - "description": "Username for Oxylabs", - "name": "OXYLABS_USERNAME", - "required": true + "description": "API key for Browserbase services", + "name": "BROWSERBASE_API_KEY", + "required": false }, { "default": null, - "description": "Password for Oxylabs", - "name": "OXYLABS_PASSWORD", - "required": true + "description": "Project ID for Browserbase services", + "name": "BROWSERBASE_PROJECT_ID", + "required": false } ], - "humanized_name": "Oxylabs Google Search Scraper tool", + "humanized_name": "MongoDBVectorSearchTool", "init_params_schema": { "$defs": { "EnvVar": { @@ -3729,179 +3881,147 @@ "title": "EnvVar", "type": "object" }, - "OxylabsGoogleSearchScraperConfig": { - "description": "Google Search Scraper configuration options:\nhttps://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/google/search/search", + "MongoDBVectorSearchConfig": { + "description": "Configuration for MongoDB vector search queries.", "properties": { - "callback_url": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "URL to your callback endpoint.", - "title": "Callback Url" - }, - "context": { - "anyOf": [ - { - "items": {}, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Additional advanced settings and controls for specialized requirements.", - "title": "Context" + "include_embeddings": { + "default": false, + "description": "Whether to include the embedding vector of each result in metadata.", + "title": "Include Embeddings", + "type": "boolean" }, - "domain": { + "limit": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], - "default": null, - "description": "The domain to limit the search results to.", - "title": "Domain" + "default": 4, + "description": "number of documents to return.", + "title": "Limit" }, - "geo_location": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "The Deliver to location.", - "title": "Geo Location" + "oversampling_factor": { + "default": 10, + "description": "Multiple of limit used when generating number of candidates at each step in the HNSW Vector Search", + "title": "Oversampling Factor", + "type": "integer" }, - "limit": { + "post_filter_pipeline": { "anyOf": [ { - "type": "integer" + "items": { + "additionalProperties": true, + "type": "object" + }, + "type": "array" }, { "type": "null" } ], "default": null, - "description": "Number of results to retrieve in each page.", - "title": "Limit" + "description": "Pipeline of MongoDB aggregation stages to filter/process results after $vectorSearch.", + "title": "Post Filter Pipeline" }, - "pages": { + "pre_filter": { "anyOf": [ { - "type": "integer" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], "default": null, - "description": "The number of pages to scrape.", - "title": "Pages" - }, - "parse": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "default": null, - "description": "True will return structured data.", - "title": "Parse" - }, - "parsing_instructions": { - "anyOf": [ - { - "additionalProperties": true, - "type": "object" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Instructions for parsing the results.", - "title": "Parsing Instructions" - }, - "render": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Enables JavaScript rendering.", - "title": "Render" - }, - "start_page": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": null, - "description": "The starting page number.", - "title": "Start Page" - }, - "user_agent_type": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Device type and browser.", - "title": "User Agent Type" + "description": "List of MQL match expressions comparing an indexed field", + "title": "Pre Filter" } }, - "title": "OxylabsGoogleSearchScraperConfig", + "title": "MongoDBVectorSearchConfig", "type": "object" } }, - "description": "Scrape Google Search results with OxylabsGoogleSearchScraperTool.\n\nGet Oxylabs account:\nhttps://dashboard.oxylabs.io/en\n\nArgs:\n username (str): Oxylabs username.\n password (str): Oxylabs password.\n config: Configuration options. See ``OxylabsGoogleSearchScraperConfig``", + "description": "Tool to perfrom a vector search the MongoDB database", "properties": { - "config": { - "$ref": "#/$defs/OxylabsGoogleSearchScraperConfig" + "collection_name": { + "description": "The name of the MongoDB collection", + "title": "Collection Name", + "type": "string" + }, + "connection_string": { + "description": "The connection string of the MongoDB cluster", + "title": "Connection String", + "type": "string" + }, + "database_name": { + "description": "The name of the MongoDB database", + "title": "Database Name", + "type": "string" + }, + "dimensions": { + "default": 1536, + "description": "Number of dimensions in the embedding vector", + "title": "Dimensions", + "type": "integer" + }, + "embedding_key": { + "default": "embedding", + "description": "Field that will contain the embedding for each document", + "title": "Embedding Key", + "type": "string" + }, + "embedding_model": { + "default": "text-embedding-3-large", + "description": "Text OpenAI embedding model to use", + "title": "Embedding Model", + "type": "string" + }, + "query_config": { + "anyOf": [ + { + "$ref": "#/$defs/MongoDBVectorSearchConfig" + }, + { + "type": "null" + } + ], + "default": null, + "description": "MongoDB Vector Search query configuration" + }, + "text_key": { + "default": "text", + "description": "MongoDB field that will contain the text for each document", + "title": "Text Key", + "type": "string" + }, + "vector_index_name": { + "default": "vector_index", + "description": "Name of the Atlas Search vector index", + "title": "Vector Index Name", + "type": "string" } }, "required": [ - "config" + "database_name", + "collection_name", + "connection_string" ], - "title": "OxylabsGoogleSearchScraperTool", + "title": "MongoDBVectorSearchTool", "type": "object" }, - "name": "OxylabsGoogleSearchScraperTool", + "name": "MongoDBVectorSearchTool", "package_dependencies": [ - "oxylabs" + "mongdb" ], "run_params_schema": { + "description": "Input for MongoDBTool.", "properties": { "query": { - "description": "Search query", + "description": "The query to search retrieve relevant information from the MongoDB database. Pass only the query, not the question.", "title": "Query", "type": "string" } @@ -3909,27 +4029,21 @@ "required": [ "query" ], - "title": "OxylabsGoogleSearchScraperArgs", + "title": "MongoDBToolSchema", "type": "object" } }, { - "description": "Scrape any url with Oxylabs Universal Scraper", + "description": "Multion gives the ability for LLMs to control web browsers using natural language instructions.\n If the status is 'CONTINUE', reissue the same instruction to continue execution", "env_vars": [ { "default": null, - "description": "Username for Oxylabs", - "name": "OXYLABS_USERNAME", - "required": true - }, - { - "default": null, - "description": "Password for Oxylabs", - "name": "OXYLABS_PASSWORD", + "description": "API key for Multion", + "name": "MULTION_API_KEY", "required": true } ], - "humanized_name": "Oxylabs Universal Scraper tool", + "humanized_name": "Multion Browse Tool", "init_params_schema": { "$defs": { "EnvVar": { @@ -3966,11 +4080,66 @@ ], "title": "EnvVar", "type": "object" + } + }, + "description": "Tool to wrap MultiOn Browse Capabilities.", + "properties": { + "local": { + "default": false, + "title": "Local", + "type": "boolean" }, - "OxylabsUniversalScraperConfig": { - "description": "Universal Scraper configuration options:\nhttps://developers.oxylabs.io/scraper-apis/web-scraper-api/other-websites", + "max_steps": { + "default": 3, + "title": "Max Steps", + "type": "integer" + }, + "multion": { + "anyOf": [ + {}, + { + "type": "null" + } + ], + "default": null, + "title": "Multion" + }, + "session_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Session Id" + } + }, + "title": "MultiOnTool", + "type": "object" + }, + "name": "MultiOnTool", + "package_dependencies": [ + "multion" + ], + "run_params_schema": {} + }, + { + "description": "A tool that can be used to semantic search a query from a database table's content.", + "env_vars": [], + "humanized_name": "Search a database's table content", + "init_params_schema": { + "$defs": { + "Adapter": { + "properties": {}, + "title": "Adapter", + "type": "object" + }, + "EnvVar": { "properties": { - "callback_url": { + "default": { "anyOf": [ { "type": "string" @@ -3980,136 +4149,88 @@ } ], "default": null, - "description": "URL to your callback endpoint.", - "title": "Callback Url" + "title": "Default" }, - "context": { - "anyOf": [ - { - "items": {}, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Additional advanced settings and controls for specialized requirements.", - "title": "Context" + "description": { + "title": "Description", + "type": "string" }, - "geo_location": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "The Deliver to location.", - "title": "Geo Location" + "name": { + "title": "Name", + "type": "string" }, - "parse": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "default": null, - "description": "True will return structured data.", - "title": "Parse" - }, - "parsing_instructions": { - "anyOf": [ - { - "additionalProperties": true, - "type": "object" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Instructions for parsing the results.", - "title": "Parsing Instructions" - }, - "render": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Enables JavaScript rendering.", - "title": "Render" - }, - "user_agent_type": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Device type and browser.", - "title": "User Agent Type" + "required": { + "default": true, + "title": "Required", + "type": "boolean" } }, - "title": "OxylabsUniversalScraperConfig", + "required": [ + "name", + "description" + ], + "title": "EnvVar", "type": "object" } }, - "description": "Scrape any website with OxylabsUniversalScraperTool.\n\nGet Oxylabs account:\nhttps://dashboard.oxylabs.io/en\n\nArgs:\n username (str): Oxylabs username.\n password (str): Oxylabs password.\n config: Configuration options. See ``OxylabsUniversalScraperConfig``", "properties": { + "adapter": { + "$ref": "#/$defs/Adapter" + }, "config": { - "$ref": "#/$defs/OxylabsUniversalScraperConfig" + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Config" + }, + "db_uri": { + "description": "Mandatory database URI", + "title": "Db Uri", + "type": "string" + }, + "summarize": { + "default": false, + "title": "Summarize", + "type": "boolean" } }, "required": [ - "config" + "db_uri" ], - "title": "OxylabsUniversalScraperTool", + "title": "MySQLSearchTool", "type": "object" }, - "name": "OxylabsUniversalScraperTool", - "package_dependencies": [ - "oxylabs" - ], + "name": "MySQLSearchTool", + "package_dependencies": [], "run_params_schema": { + "description": "Input for MySQLSearchTool.", "properties": { - "url": { - "description": "Website URL", - "title": "Url", + "search_query": { + "description": "Mandatory semantic search query you want to use to search the database's content", + "title": "Search Query", "type": "string" } }, "required": [ - "url" + "search_query" ], - "title": "OxylabsUniversalScraperArgs", + "title": "MySQLSearchToolSchema", "type": "object" } }, { - "description": "A tool that can be used to semantic search a query from a PDF's content.", + "description": "Converts natural language to SQL queries and executes them.", "env_vars": [], - "humanized_name": "Search a PDF's content", + "humanized_name": "NL2SQLTool", "init_params_schema": { "$defs": { - "Adapter": { - "properties": {}, - "title": "Adapter", - "type": "object" - }, "EnvVar": { "properties": { "default": { @@ -4147,66 +4268,53 @@ } }, "properties": { - "adapter": { - "$ref": "#/$defs/Adapter" + "columns": { + "additionalProperties": true, + "default": {}, + "title": "Columns", + "type": "object" }, - "config": { - "anyOf": [ - { - "additionalProperties": true, - "type": "object" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Config" + "db_uri": { + "description": "The URI of the database to connect to.", + "title": "Database URI", + "type": "string" }, - "summarize": { - "default": false, - "title": "Summarize", - "type": "boolean" + "tables": { + "default": [], + "items": {}, + "title": "Tables", + "type": "array" } }, - "title": "PDFSearchTool", + "required": [ + "db_uri" + ], + "title": "NL2SQLTool", "type": "object" }, - "name": "PDFSearchTool", + "name": "NL2SQLTool", "package_dependencies": [], "run_params_schema": { - "description": "Input for PDFSearchTool.", "properties": { - "pdf": { - "description": "Mandatory pdf path you want to search", - "title": "Pdf", - "type": "string" - }, - "query": { - "description": "Mandatory query you want to use to search the PDF's content", - "title": "Query", + "sql_query": { + "description": "The SQL query to execute.", + "title": "SQL Query", "type": "string" } }, "required": [ - "query", - "pdf" + "sql_query" ], - "title": "PDFSearchToolSchema", + "title": "NL2SQLToolInput", "type": "object" } }, { - "description": "A tool that can be used to semantic search a query from a database table's content.", + "description": "This tool uses an LLM's API to extract text from an image file.", "env_vars": [], - "humanized_name": "Search a database's table content", + "humanized_name": "Optical Character Recognition Tool", "init_params_schema": { "$defs": { - "Adapter": { - "properties": {}, - "title": "Adapter", - "type": "object" - }, "EnvVar": { "properties": { "default": { @@ -4243,69 +4351,43 @@ "type": "object" } }, - "properties": { - "adapter": { - "$ref": "#/$defs/Adapter" - }, - "config": { - "anyOf": [ - { - "additionalProperties": true, - "type": "object" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Config" - }, - "db_uri": { - "description": "Mandatory database URI", - "title": "Db Uri", - "type": "string" - }, - "summarize": { - "default": false, - "title": "Summarize", - "type": "boolean" - } - }, - "required": [ - "db_uri" - ], - "title": "PGSearchTool", + "description": "A tool for performing Optical Character Recognition on images.\n\nThis tool leverages LLMs to extract text from images. It can process\nboth local image files and images available via URLs.\n\nAttributes:\n name (str): Name of the tool.\n description (str): Description of the tool's functionality.\n args_schema (Type[BaseModel]): Pydantic schema for input validation.\n\nPrivate Attributes:\n _llm (Optional[LLM]): Language model instance for making API calls.", + "properties": {}, + "title": "OCRTool", "type": "object" }, - "name": "PGSearchTool", + "name": "OCRTool", "package_dependencies": [], "run_params_schema": { - "description": "Input for PGSearchTool.", + "description": "Input schema for Optical Character Recognition Tool.\n\nAttributes:\n image_path_url (str): Path to a local image file or URL of an image.\n For local files, provide the absolute or relative path.\n For remote images, provide the complete URL starting with 'http' or 'https'.", "properties": { - "search_query": { - "description": "Mandatory semantic search query you want to use to search the database's content", - "title": "Search Query", + "image_path_url": { + "default": "The image path or URL.", + "title": "Image Path Url", "type": "string" } }, - "required": [ - "search_query" - ], - "title": "PGSearchToolSchema", + "title": "OCRToolSchema", "type": "object" } }, { - "description": "", + "description": "Scrape Amazon product pages with Oxylabs Amazon Product Scraper", "env_vars": [ { "default": null, - "description": "API key for Patronus evaluation services", - "name": "PATRONUS_API_KEY", + "description": "Username for Oxylabs", + "name": "OXYLABS_USERNAME", + "required": true + }, + { + "default": null, + "description": "Password for Oxylabs", + "name": "OXYLABS_PASSWORD", "required": true } ], - "humanized_name": "Patronus Evaluation Tool", + "humanized_name": "Oxylabs Amazon Product Scraper tool", "init_params_schema": { "$defs": { "EnvVar": { @@ -4342,28 +4424,1150 @@ ], "title": "EnvVar", "type": "object" - } - }, - "properties": { - "criteria": { - "default": [], - "items": { - "additionalProperties": { - "type": "string" - }, - "type": "object" - }, - "title": "Criteria", - "type": "array" }, - "evaluate_url": { - "default": "https://api.patronus.ai/v1/evaluate", - "title": "Evaluate Url", - "type": "string" - }, - "evaluators": { - "default": [], - "items": { + "OxylabsAmazonProductScraperConfig": { + "description": "Amazon Product Scraper configuration options:\nhttps://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/amazon/product", + "properties": { + "callback_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "URL to your callback endpoint.", + "title": "Callback Url" + }, + "context": { + "anyOf": [ + { + "items": {}, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Additional advanced settings and controls for specialized requirements.", + "title": "Context" + }, + "domain": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The domain to limit the search results to.", + "title": "Domain" + }, + "geo_location": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The Deliver to location.", + "title": "Geo Location" + }, + "parse": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "description": "True will return structured data.", + "title": "Parse" + }, + "parsing_instructions": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Instructions for parsing the results.", + "title": "Parsing Instructions" + }, + "render": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Enables JavaScript rendering.", + "title": "Render" + }, + "user_agent_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Device type and browser.", + "title": "User Agent Type" + } + }, + "title": "OxylabsAmazonProductScraperConfig", + "type": "object" + } + }, + "description": "Scrape Amazon product pages with OxylabsAmazonProductScraperTool.\n\nGet Oxylabs account:\nhttps://dashboard.oxylabs.io/en\n\nArgs:\n username (str): Oxylabs username.\n password (str): Oxylabs password.\n config: Configuration options. See ``OxylabsAmazonProductScraperConfig``", + "properties": { + "config": { + "$ref": "#/$defs/OxylabsAmazonProductScraperConfig" + }, + "oxylabs_api": { + "title": "Oxylabs Api" + } + }, + "required": [ + "oxylabs_api", + "config" + ], + "title": "OxylabsAmazonProductScraperTool", + "type": "object" + }, + "name": "OxylabsAmazonProductScraperTool", + "package_dependencies": [ + "oxylabs" + ], + "run_params_schema": { + "properties": { + "query": { + "description": "Amazon product ASIN", + "title": "Query", + "type": "string" + } + }, + "required": [ + "query" + ], + "title": "OxylabsAmazonProductScraperArgs", + "type": "object" + } + }, + { + "description": "Scrape Amazon search results with Oxylabs Amazon Search Scraper", + "env_vars": [ + { + "default": null, + "description": "Username for Oxylabs", + "name": "OXYLABS_USERNAME", + "required": true + }, + { + "default": null, + "description": "Password for Oxylabs", + "name": "OXYLABS_PASSWORD", + "required": true + } + ], + "humanized_name": "Oxylabs Amazon Search Scraper tool", + "init_params_schema": { + "$defs": { + "EnvVar": { + "properties": { + "default": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Default" + }, + "description": { + "title": "Description", + "type": "string" + }, + "name": { + "title": "Name", + "type": "string" + }, + "required": { + "default": true, + "title": "Required", + "type": "boolean" + } + }, + "required": [ + "name", + "description" + ], + "title": "EnvVar", + "type": "object" + }, + "OxylabsAmazonSearchScraperConfig": { + "description": "Amazon Search Scraper configuration options:\nhttps://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/amazon/search", + "properties": { + "callback_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "URL to your callback endpoint.", + "title": "Callback Url" + }, + "context": { + "anyOf": [ + { + "items": {}, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Additional advanced settings and controls for specialized requirements.", + "title": "Context" + }, + "domain": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The domain to limit the search results to.", + "title": "Domain" + }, + "geo_location": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The Deliver to location.", + "title": "Geo Location" + }, + "pages": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The number of pages to scrape.", + "title": "Pages" + }, + "parse": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "description": "True will return structured data.", + "title": "Parse" + }, + "parsing_instructions": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Instructions for parsing the results.", + "title": "Parsing Instructions" + }, + "render": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Enables JavaScript rendering.", + "title": "Render" + }, + "start_page": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The starting page number.", + "title": "Start Page" + }, + "user_agent_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Device type and browser.", + "title": "User Agent Type" + } + }, + "title": "OxylabsAmazonSearchScraperConfig", + "type": "object" + } + }, + "description": "Scrape Amazon search results with OxylabsAmazonSearchScraperTool.\n\nGet Oxylabs account:\nhttps://dashboard.oxylabs.io/en\n\nArgs:\n username (str): Oxylabs username.\n password (str): Oxylabs password.\n config: Configuration options. See ``OxylabsAmazonSearchScraperConfig``", + "properties": { + "config": { + "$ref": "#/$defs/OxylabsAmazonSearchScraperConfig" + }, + "oxylabs_api": { + "title": "Oxylabs Api" + } + }, + "required": [ + "oxylabs_api", + "config" + ], + "title": "OxylabsAmazonSearchScraperTool", + "type": "object" + }, + "name": "OxylabsAmazonSearchScraperTool", + "package_dependencies": [ + "oxylabs" + ], + "run_params_schema": { + "properties": { + "query": { + "description": "Amazon search term", + "title": "Query", + "type": "string" + } + }, + "required": [ + "query" + ], + "title": "OxylabsAmazonSearchScraperArgs", + "type": "object" + } + }, + { + "description": "Scrape Google Search results with Oxylabs Google Search Scraper", + "env_vars": [ + { + "default": null, + "description": "Username for Oxylabs", + "name": "OXYLABS_USERNAME", + "required": true + }, + { + "default": null, + "description": "Password for Oxylabs", + "name": "OXYLABS_PASSWORD", + "required": true + } + ], + "humanized_name": "Oxylabs Google Search Scraper tool", + "init_params_schema": { + "$defs": { + "EnvVar": { + "properties": { + "default": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Default" + }, + "description": { + "title": "Description", + "type": "string" + }, + "name": { + "title": "Name", + "type": "string" + }, + "required": { + "default": true, + "title": "Required", + "type": "boolean" + } + }, + "required": [ + "name", + "description" + ], + "title": "EnvVar", + "type": "object" + }, + "OxylabsGoogleSearchScraperConfig": { + "description": "Google Search Scraper configuration options:\nhttps://developers.oxylabs.io/scraper-apis/web-scraper-api/targets/google/search/search", + "properties": { + "callback_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "URL to your callback endpoint.", + "title": "Callback Url" + }, + "context": { + "anyOf": [ + { + "items": {}, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Additional advanced settings and controls for specialized requirements.", + "title": "Context" + }, + "domain": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The domain to limit the search results to.", + "title": "Domain" + }, + "geo_location": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The Deliver to location.", + "title": "Geo Location" + }, + "limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Number of results to retrieve in each page.", + "title": "Limit" + }, + "pages": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The number of pages to scrape.", + "title": "Pages" + }, + "parse": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "description": "True will return structured data.", + "title": "Parse" + }, + "parsing_instructions": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Instructions for parsing the results.", + "title": "Parsing Instructions" + }, + "render": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Enables JavaScript rendering.", + "title": "Render" + }, + "start_page": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The starting page number.", + "title": "Start Page" + }, + "user_agent_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Device type and browser.", + "title": "User Agent Type" + } + }, + "title": "OxylabsGoogleSearchScraperConfig", + "type": "object" + } + }, + "description": "Scrape Google Search results with OxylabsGoogleSearchScraperTool.\n\nGet Oxylabs account:\nhttps://dashboard.oxylabs.io/en\n\nArgs:\n username (str): Oxylabs username.\n password (str): Oxylabs password.\n config: Configuration options. See ``OxylabsGoogleSearchScraperConfig``", + "properties": { + "config": { + "$ref": "#/$defs/OxylabsGoogleSearchScraperConfig" + }, + "oxylabs_api": { + "title": "Oxylabs Api" + } + }, + "required": [ + "oxylabs_api", + "config" + ], + "title": "OxylabsGoogleSearchScraperTool", + "type": "object" + }, + "name": "OxylabsGoogleSearchScraperTool", + "package_dependencies": [ + "oxylabs" + ], + "run_params_schema": { + "properties": { + "query": { + "description": "Search query", + "title": "Query", + "type": "string" + } + }, + "required": [ + "query" + ], + "title": "OxylabsGoogleSearchScraperArgs", + "type": "object" + } + }, + { + "description": "Scrape any url with Oxylabs Universal Scraper", + "env_vars": [ + { + "default": null, + "description": "Username for Oxylabs", + "name": "OXYLABS_USERNAME", + "required": true + }, + { + "default": null, + "description": "Password for Oxylabs", + "name": "OXYLABS_PASSWORD", + "required": true + } + ], + "humanized_name": "Oxylabs Universal Scraper tool", + "init_params_schema": { + "$defs": { + "EnvVar": { + "properties": { + "default": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Default" + }, + "description": { + "title": "Description", + "type": "string" + }, + "name": { + "title": "Name", + "type": "string" + }, + "required": { + "default": true, + "title": "Required", + "type": "boolean" + } + }, + "required": [ + "name", + "description" + ], + "title": "EnvVar", + "type": "object" + }, + "OxylabsUniversalScraperConfig": { + "description": "Universal Scraper configuration options:\nhttps://developers.oxylabs.io/scraper-apis/web-scraper-api/other-websites", + "properties": { + "callback_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "URL to your callback endpoint.", + "title": "Callback Url" + }, + "context": { + "anyOf": [ + { + "items": {}, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Additional advanced settings and controls for specialized requirements.", + "title": "Context" + }, + "geo_location": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The Deliver to location.", + "title": "Geo Location" + }, + "parse": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "default": null, + "description": "True will return structured data.", + "title": "Parse" + }, + "parsing_instructions": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Instructions for parsing the results.", + "title": "Parsing Instructions" + }, + "render": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Enables JavaScript rendering.", + "title": "Render" + }, + "user_agent_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Device type and browser.", + "title": "User Agent Type" + } + }, + "title": "OxylabsUniversalScraperConfig", + "type": "object" + } + }, + "description": "Scrape any website with OxylabsUniversalScraperTool.\n\nGet Oxylabs account:\nhttps://dashboard.oxylabs.io/en\n\nArgs:\n username (str): Oxylabs username.\n password (str): Oxylabs password.\n config: Configuration options. See ``OxylabsUniversalScraperConfig``", + "properties": { + "config": { + "$ref": "#/$defs/OxylabsUniversalScraperConfig" + }, + "oxylabs_api": { + "title": "Oxylabs Api" + } + }, + "required": [ + "oxylabs_api", + "config" + ], + "title": "OxylabsUniversalScraperTool", + "type": "object" + }, + "name": "OxylabsUniversalScraperTool", + "package_dependencies": [ + "oxylabs" + ], + "run_params_schema": { + "properties": { + "url": { + "description": "Website URL", + "title": "Url", + "type": "string" + } + }, + "required": [ + "url" + ], + "title": "OxylabsUniversalScraperArgs", + "type": "object" + } + }, + { + "description": "A tool that can be used to semantic search a query from a PDF's content.", + "env_vars": [], + "humanized_name": "Search a PDF's content", + "init_params_schema": { + "$defs": { + "Adapter": { + "properties": {}, + "title": "Adapter", + "type": "object" + }, + "EnvVar": { + "properties": { + "default": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Default" + }, + "description": { + "title": "Description", + "type": "string" + }, + "name": { + "title": "Name", + "type": "string" + }, + "required": { + "default": true, + "title": "Required", + "type": "boolean" + } + }, + "required": [ + "name", + "description" + ], + "title": "EnvVar", + "type": "object" + } + }, + "properties": { + "adapter": { + "$ref": "#/$defs/Adapter" + }, + "config": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Config" + }, + "summarize": { + "default": false, + "title": "Summarize", + "type": "boolean" + } + }, + "title": "PDFSearchTool", + "type": "object" + }, + "name": "PDFSearchTool", + "package_dependencies": [], + "run_params_schema": { + "description": "Input for PDFSearchTool.", + "properties": { + "pdf": { + "description": "Mandatory pdf path you want to search", + "title": "Pdf", + "type": "string" + }, + "query": { + "description": "Mandatory query you want to use to search the PDF's content", + "title": "Query", + "type": "string" + } + }, + "required": [ + "query", + "pdf" + ], + "title": "PDFSearchToolSchema", + "type": "object" + } + }, + { + "description": "A tool that can be used to semantic search a query from a database table's content.", + "env_vars": [], + "humanized_name": "Search a database's table content", + "init_params_schema": { + "$defs": { + "Adapter": { + "properties": {}, + "title": "Adapter", + "type": "object" + }, + "EnvVar": { + "properties": { + "default": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Default" + }, + "description": { + "title": "Description", + "type": "string" + }, + "name": { + "title": "Name", + "type": "string" + }, + "required": { + "default": true, + "title": "Required", + "type": "boolean" + } + }, + "required": [ + "name", + "description" + ], + "title": "EnvVar", + "type": "object" + } + }, + "properties": { + "adapter": { + "$ref": "#/$defs/Adapter" + }, + "config": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Config" + }, + "db_uri": { + "description": "Mandatory database URI", + "title": "Db Uri", + "type": "string" + }, + "summarize": { + "default": false, + "title": "Summarize", + "type": "boolean" + } + }, + "required": [ + "db_uri" + ], + "title": "PGSearchTool", + "type": "object" + }, + "name": "PGSearchTool", + "package_dependencies": [], + "run_params_schema": { + "description": "Input for PGSearchTool.", + "properties": { + "search_query": { + "description": "Mandatory semantic search query you want to use to search the database's content", + "title": "Search Query", + "type": "string" + } + }, + "required": [ + "search_query" + ], + "title": "PGSearchToolSchema", + "type": "object" + } + }, + { + "description": "", + "env_vars": [ + { + "default": null, + "description": "API key for Patronus evaluation services", + "name": "PATRONUS_API_KEY", + "required": true + } + ], + "humanized_name": "Patronus Evaluation Tool", + "init_params_schema": { + "$defs": { + "EnvVar": { + "properties": { + "default": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Default" + }, + "description": { + "title": "Description", + "type": "string" + }, + "name": { + "title": "Name", + "type": "string" + }, + "required": { + "default": true, + "title": "Required", + "type": "boolean" + } + }, + "required": [ + "name", + "description" + ], + "title": "EnvVar", + "type": "object" + } + }, + "properties": { + "criteria": { + "default": [], + "items": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "title": "Criteria", + "type": "array" + }, + "evaluate_url": { + "default": "https://api.patronus.ai/v1/evaluate", + "title": "Evaluate Url", + "type": "string" + }, + "evaluators": { + "default": [], + "items": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "title": "Evaluators", + "type": "array" + } + }, + "title": "PatronusEvalTool", + "type": "object" + }, + "name": "PatronusEvalTool", + "package_dependencies": [], + "run_params_schema": {} + }, + { + "description": "This tool calls the Patronus Evaluation API that takes the following arguments:", + "env_vars": [], + "humanized_name": "Call Patronus API tool for evaluation of model inputs and outputs", + "init_params_schema": { + "$defs": { + "EnvVar": { + "properties": { + "default": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Default" + }, + "description": { + "title": "Description", + "type": "string" + }, + "name": { + "title": "Name", + "type": "string" + }, + "required": { + "default": true, + "title": "Required", + "type": "boolean" + } + }, + "required": [ + "name", + "description" + ], + "title": "EnvVar", + "type": "object" + } + }, + "description": "PatronusEvalTool is a tool to automatically evaluate and score agent interactions.\n\nResults are logged to the Patronus platform at app.patronus.ai", + "properties": { + "evaluate_url": { + "default": "https://api.patronus.ai/v1/evaluate", + "title": "Evaluate Url", + "type": "string" + }, + "evaluators": { + "default": [], + "items": { "additionalProperties": { "type": "string" }, @@ -4373,19 +5577,251 @@ "type": "array" } }, - "title": "PatronusEvalTool", + "title": "PatronusPredefinedCriteriaEvalTool", + "type": "object" + }, + "name": "PatronusPredefinedCriteriaEvalTool", + "package_dependencies": [], + "run_params_schema": { + "properties": { + "evaluated_model_gold_answer": { + "additionalProperties": true, + "description": "The agent's gold answer only if available", + "title": "Evaluated Model Gold Answer", + "type": "object" + }, + "evaluated_model_input": { + "additionalProperties": true, + "description": "The agent's task description in simple text", + "title": "Evaluated Model Input", + "type": "object" + }, + "evaluated_model_output": { + "additionalProperties": true, + "description": "The agent's output of the task", + "title": "Evaluated Model Output", + "type": "object" + }, + "evaluated_model_retrieved_context": { + "additionalProperties": true, + "description": "The agent's context", + "title": "Evaluated Model Retrieved Context", + "type": "object" + }, + "evaluators": { + "description": "List of dictionaries containing the evaluator and criteria to evaluate the model input and output. An example input for this field: [{'evaluator': '[evaluator-from-user]', 'criteria': '[criteria-from-user]'}]", + "items": { + "additionalProperties": { + "type": "string" + }, + "type": "object" + }, + "title": "Evaluators", + "type": "array" + } + }, + "required": [ + "evaluated_model_input", + "evaluated_model_output", + "evaluated_model_retrieved_context", + "evaluated_model_gold_answer", + "evaluators" + ], + "title": "FixedBaseToolSchema", + "type": "object" + } + }, + { + "description": "A tool to search the Qdrant database for relevant information on internal documents.", + "env_vars": [], + "humanized_name": "QdrantVectorSearchTool", + "init_params_schema": { + "$defs": { + "EnvVar": { + "properties": { + "default": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Default" + }, + "description": { + "title": "Description", + "type": "string" + }, + "name": { + "title": "Name", + "type": "string" + }, + "required": { + "default": true, + "title": "Required", + "type": "boolean" + } + }, + "required": [ + "name", + "description" + ], + "title": "EnvVar", + "type": "object" + } + }, + "description": "Tool to query and filter results from a Qdrant database.\n\nThis tool enables vector similarity search on internal documents stored in Qdrant,\nwith optional filtering capabilities.\n\nAttributes:\n client: Configured QdrantClient instance\n collection_name: Name of the Qdrant collection to search\n limit: Maximum number of results to return\n score_threshold: Minimum similarity score threshold\n qdrant_url: Qdrant server URL\n qdrant_api_key: Authentication key for Qdrant", + "properties": { + "collection_name": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Collection Name" + }, + "filter_by": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Filter By" + }, + "filter_value": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Filter Value" + }, + "limit": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": 3, + "title": "Limit" + }, + "qdrant_api_key": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The API key for the Qdrant server", + "title": "Qdrant Api Key" + }, + "qdrant_url": { + "description": "The URL of the Qdrant server", + "title": "Qdrant Url", + "type": "string" + }, + "query": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Query" + }, + "score_threshold": { + "default": 0.35, + "title": "Score Threshold", + "type": "number" + } + }, + "required": [ + "qdrant_url" + ], + "title": "QdrantVectorSearchTool", + "type": "object" + }, + "name": "QdrantVectorSearchTool", + "package_dependencies": [ + "qdrant-client" + ], + "run_params_schema": { + "description": "Input for QdrantTool.", + "properties": { + "filter_by": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Filter by properties. Pass only the properties, not the question.", + "title": "Filter By" + }, + "filter_value": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Filter by value. Pass only the value, not the question.", + "title": "Filter Value" + }, + "query": { + "description": "The query to search retrieve relevant information from the Qdrant database. Pass only the query, not the question.", + "title": "Query", + "type": "string" + } + }, + "required": [ + "query" + ], + "title": "QdrantToolSchema", "type": "object" - }, - "name": "PatronusEvalTool", - "package_dependencies": [], - "run_params_schema": {} + } }, { - "description": "This tool calls the Patronus Evaluation API that takes the following arguments:", + "description": "A knowledge base that can be used to answer questions.", "env_vars": [], - "humanized_name": "Call Patronus API tool for evaluation of model inputs and outputs", + "humanized_name": "Knowledge base", "init_params_schema": { "$defs": { + "Adapter": { + "properties": {}, + "title": "Adapter", + "type": "object" + }, "EnvVar": { "properties": { "default": { @@ -4422,83 +5858,40 @@ "type": "object" } }, - "description": "PatronusEvalTool is a tool to automatically evaluate and score agent interactions.\n\nResults are logged to the Patronus platform at app.patronus.ai", "properties": { - "evaluate_url": { - "default": "https://api.patronus.ai/v1/evaluate", - "title": "Evaluate Url", - "type": "string" + "adapter": { + "$ref": "#/$defs/Adapter" }, - "evaluators": { - "default": [], - "items": { - "additionalProperties": { - "type": "string" + "config": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" }, - "type": "object" - }, - "title": "Evaluators", - "type": "array" + { + "type": "null" + } + ], + "default": null, + "title": "Config" + }, + "summarize": { + "default": false, + "title": "Summarize", + "type": "boolean" } }, - "title": "PatronusPredefinedCriteriaEvalTool", + "title": "RagTool", "type": "object" }, - "name": "PatronusPredefinedCriteriaEvalTool", + "name": "RagTool", "package_dependencies": [], - "run_params_schema": { - "properties": { - "evaluated_model_gold_answer": { - "additionalProperties": true, - "description": "The agent's gold answer only if available", - "title": "Evaluated Model Gold Answer", - "type": "object" - }, - "evaluated_model_input": { - "additionalProperties": true, - "description": "The agent's task description in simple text", - "title": "Evaluated Model Input", - "type": "object" - }, - "evaluated_model_output": { - "additionalProperties": true, - "description": "The agent's output of the task", - "title": "Evaluated Model Output", - "type": "object" - }, - "evaluated_model_retrieved_context": { - "additionalProperties": true, - "description": "The agent's context", - "title": "Evaluated Model Retrieved Context", - "type": "object" - }, - "evaluators": { - "description": "List of dictionaries containing the evaluator and criteria to evaluate the model input and output. An example input for this field: [{'evaluator': '[evaluator-from-user]', 'criteria': '[criteria-from-user]'}]", - "items": { - "additionalProperties": { - "type": "string" - }, - "type": "object" - }, - "title": "Evaluators", - "type": "array" - } - }, - "required": [ - "evaluated_model_input", - "evaluated_model_output", - "evaluated_model_retrieved_context", - "evaluated_model_gold_answer", - "evaluators" - ], - "title": "FixedBaseToolSchema", - "type": "object" - } + "run_params_schema": {} }, { - "description": "A tool to search the Qdrant database for relevant information on internal documents.", + "description": "A tool that can be used to read a website content.", "env_vars": [], - "humanized_name": "QdrantVectorSearchTool", + "humanized_name": "Read a website content", "init_params_schema": { "$defs": { "EnvVar": { @@ -4537,21 +5930,21 @@ "type": "object" } }, - "description": "Tool to query and filter results from a Qdrant database.\n\nThis tool enables vector similarity search on internal documents stored in Qdrant,\nwith optional filtering capabilities.\n\nAttributes:\n client: Configured QdrantClient instance\n collection_name: Name of the Qdrant collection to search\n limit: Maximum number of results to return\n score_threshold: Minimum similarity score threshold\n qdrant_url: Qdrant server URL\n qdrant_api_key: Authentication key for Qdrant", "properties": { - "collection_name": { + "cookies": { "anyOf": [ { - "type": "string" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], "default": null, - "title": "Collection Name" + "title": "Cookies" }, - "filter_by": { + "css_element": { "anyOf": [ { "type": "string" @@ -4561,95 +5954,146 @@ } ], "default": null, - "title": "Filter By" + "title": "Css Element" }, - "filter_value": { + "headers": { "anyOf": [ { - "type": "string" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], - "default": null, - "title": "Filter Value" + "default": { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", + "Accept-Encoding": "gzip, deflate, br", + "Accept-Language": "en-US,en;q=0.9", + "Connection": "keep-alive", + "Referer": "https://www.google.com/", + "Upgrade-Insecure-Requests": "1", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36" + }, + "title": "Headers" }, - "limit": { + "website_url": { "anyOf": [ { - "type": "integer" + "type": "string" }, { "type": "null" } ], - "default": 3, - "title": "Limit" + "default": null, + "title": "Website Url" + } + }, + "title": "ScrapeElementFromWebsiteTool", + "type": "object" + }, + "name": "ScrapeElementFromWebsiteTool", + "package_dependencies": [], + "run_params_schema": { + "description": "Input for ScrapeElementFromWebsiteTool.", + "properties": { + "css_element": { + "description": "Mandatory css reference for element to scrape from the website", + "title": "Css Element", + "type": "string" }, - "qdrant_api_key": { - "anyOf": [ - { + "website_url": { + "description": "Mandatory website url to read the file", + "title": "Website Url", + "type": "string" + } + }, + "required": [ + "website_url", + "css_element" + ], + "title": "ScrapeElementFromWebsiteToolSchema", + "type": "object" + } + }, + { + "description": "A tool that can be used to read a website content.", + "env_vars": [], + "humanized_name": "Read website content", + "init_params_schema": { + "$defs": { + "EnvVar": { + "properties": { + "default": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Default" + }, + "description": { + "title": "Description", "type": "string" }, - { - "type": "null" + "name": { + "title": "Name", + "type": "string" + }, + "required": { + "default": true, + "title": "Required", + "type": "boolean" } + }, + "required": [ + "name", + "description" ], - "default": null, - "description": "The API key for the Qdrant server", - "title": "Qdrant Api Key" - }, - "qdrant_url": { - "description": "The URL of the Qdrant server", - "title": "Qdrant Url", - "type": "string" - }, - "query": { + "title": "EnvVar", + "type": "object" + } + }, + "properties": { + "cookies": { "anyOf": [ { - "type": "string" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], "default": null, - "title": "Query" + "title": "Cookies" }, - "score_threshold": { - "default": 0.35, - "title": "Score Threshold", - "type": "number" - } - }, - "required": [ - "qdrant_url" - ], - "title": "QdrantVectorSearchTool", - "type": "object" - }, - "name": "QdrantVectorSearchTool", - "package_dependencies": [ - "qdrant-client" - ], - "run_params_schema": { - "description": "Input for QdrantTool.", - "properties": { - "filter_by": { + "headers": { "anyOf": [ { - "type": "string" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], - "default": null, - "description": "Filter by properties. Pass only the properties, not the question.", - "title": "Filter By" + "default": { + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", + "Accept-Language": "en-US,en;q=0.9", + "Connection": "keep-alive", + "Referer": "https://www.google.com/", + "Upgrade-Insecure-Requests": "1", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36" + }, + "title": "Headers" }, - "filter_value": { + "website_url": { "anyOf": [ { "type": "string" @@ -4659,33 +6103,43 @@ } ], "default": null, - "description": "Filter by value. Pass only the value, not the question.", - "title": "Filter Value" - }, - "query": { - "description": "The query to search retrieve relevant information from the Qdrant database. Pass only the query, not the question.", - "title": "Query", + "title": "Website Url" + } + }, + "title": "ScrapeWebsiteTool", + "type": "object" + }, + "name": "ScrapeWebsiteTool", + "package_dependencies": [], + "run_params_schema": { + "description": "Input for ScrapeWebsiteTool.", + "properties": { + "website_url": { + "description": "Mandatory website url to read the file", + "title": "Website Url", "type": "string" } }, "required": [ - "query" + "website_url" ], - "title": "QdrantToolSchema", + "title": "ScrapeWebsiteToolSchema", "type": "object" } }, { - "description": "A knowledge base that can be used to answer questions.", - "env_vars": [], - "humanized_name": "Knowledge base", + "description": "A tool that uses Scrapegraph AI to intelligently scrape website content.", + "env_vars": [ + { + "default": null, + "description": "API key for Scrapegraph AI services", + "name": "SCRAPEGRAPH_API_KEY", + "required": false + } + ], + "humanized_name": "Scrapegraph website scraper", "init_params_schema": { "$defs": { - "Adapter": { - "properties": {}, - "title": "Adapter", - "type": "object" - }, "EnvVar": { "properties": { "default": { @@ -4722,40 +6176,90 @@ "type": "object" } }, + "description": "A tool that uses Scrapegraph AI to intelligently scrape website content.\n\nRaises:\n ValueError: If API key is missing or URL format is invalid\n RateLimitError: If API rate limits are exceeded\n RuntimeError: If scraping operation fails", "properties": { - "adapter": { - "$ref": "#/$defs/Adapter" - }, - "config": { + "api_key": { "anyOf": [ { - "additionalProperties": true, - "type": "object" + "type": "string" }, { "type": "null" } ], "default": null, - "title": "Config" + "title": "Api Key" }, - "summarize": { + "enable_logging": { "default": false, - "title": "Summarize", + "title": "Enable Logging", "type": "boolean" + }, + "user_prompt": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "User Prompt" + }, + "website_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Website Url" } }, - "title": "RagTool", + "title": "ScrapegraphScrapeTool", "type": "object" }, - "name": "RagTool", - "package_dependencies": [], - "run_params_schema": {} + "name": "ScrapegraphScrapeTool", + "package_dependencies": [ + "scrapegraph-py" + ], + "run_params_schema": { + "description": "Input for ScrapegraphScrapeTool.", + "properties": { + "user_prompt": { + "default": "Extract the main content of the webpage", + "description": "Prompt to guide the extraction of content", + "title": "User Prompt", + "type": "string" + }, + "website_url": { + "description": "Mandatory website url to scrape", + "title": "Website Url", + "type": "string" + } + }, + "required": [ + "website_url" + ], + "title": "ScrapegraphScrapeToolSchema", + "type": "object" + } }, { - "description": "A tool that can be used to read a website content.", - "env_vars": [], - "humanized_name": "Read a website content", + "description": "Scrape a webpage url using Scrapfly and return its content as markdown or text", + "env_vars": [ + { + "default": null, + "description": "API key for Scrapfly", + "name": "SCRAPFLY_API_KEY", + "required": true + } + ], + "humanized_name": "Scrapfly web scraping API tool", "init_params_schema": { "$defs": { "EnvVar": { @@ -4795,32 +6299,45 @@ } }, "properties": { - "cookies": { + "api_key": { + "default": null, + "title": "Api Key", + "type": "string" + }, + "scrapfly": { "anyOf": [ - { - "additionalProperties": true, - "type": "object" - }, + {}, { "type": "null" } ], "default": null, - "title": "Cookies" - }, - "css_element": { + "title": "Scrapfly" + } + }, + "title": "ScrapflyScrapeWebsiteTool", + "type": "object" + }, + "name": "ScrapflyScrapeWebsiteTool", + "package_dependencies": [ + "scrapfly-sdk" + ], + "run_params_schema": { + "properties": { + "ignore_scrape_failures": { "anyOf": [ { - "type": "string" + "type": "boolean" }, { "type": "null" } ], "default": null, - "title": "Css Element" + "description": "whether to ignore failures", + "title": "Ignore Scrape Failures" }, - "headers": { + "scrape_config": { "anyOf": [ { "additionalProperties": true, @@ -4830,61 +6347,45 @@ "type": "null" } ], - "default": { - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", - "Accept-Encoding": "gzip, deflate, br", - "Accept-Language": "en-US,en;q=0.9", - "Connection": "keep-alive", - "Referer": "https://www.google.com/", - "Upgrade-Insecure-Requests": "1", - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36" - }, - "title": "Headers" + "default": null, + "description": "Scrapfly request scrape config", + "title": "Scrape Config" }, - "website_url": { + "scrape_format": { "anyOf": [ { + "enum": [ + "raw", + "markdown", + "text" + ], "type": "string" }, { "type": "null" } ], - "default": null, - "title": "Website Url" - } - }, - "title": "ScrapeElementFromWebsiteTool", - "type": "object" - }, - "name": "ScrapeElementFromWebsiteTool", - "package_dependencies": [], - "run_params_schema": { - "description": "Input for ScrapeElementFromWebsiteTool.", - "properties": { - "css_element": { - "description": "Mandatory css reference for element to scrape from the website", - "title": "Css Element", - "type": "string" + "default": "markdown", + "description": "Webpage extraction format", + "title": "Scrape Format" }, - "website_url": { - "description": "Mandatory website url to read the file", - "title": "Website Url", + "url": { + "description": "Webpage URL", + "title": "Url", "type": "string" } }, "required": [ - "website_url", - "css_element" + "url" ], - "title": "ScrapeElementFromWebsiteToolSchema", + "title": "ScrapflyScrapeWebsiteToolSchema", "type": "object" } }, { "description": "A tool that can be used to read a website content.", "env_vars": [], - "humanized_name": "Read website content", + "humanized_name": "Read a website content", "init_params_schema": { "$defs": { "EnvVar": { @@ -4924,38 +6425,64 @@ } }, "properties": { - "cookies": { + "cookie": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Cookie" + }, + "css_element": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Css Element" + }, + "driver": { + "anyOf": [ + {}, + { + "type": "null" + } + ], + "default": null, + "title": "Driver" + }, + "return_html": { "anyOf": [ { - "additionalProperties": true, - "type": "object" + "type": "boolean" }, { "type": "null" } ], - "default": null, - "title": "Cookies" + "default": false, + "title": "Return Html" }, - "headers": { + "wait_time": { "anyOf": [ { - "additionalProperties": true, - "type": "object" + "type": "integer" }, { "type": "null" } ], - "default": { - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", - "Accept-Language": "en-US,en;q=0.9", - "Connection": "keep-alive", - "Referer": "https://www.google.com/", - "Upgrade-Insecure-Requests": "1", - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36" - }, - "title": "Headers" + "default": 3, + "title": "Wait Time" }, "website_url": { "anyOf": [ @@ -4970,38 +6497,47 @@ "title": "Website Url" } }, - "title": "ScrapeWebsiteTool", + "title": "SeleniumScrapingTool", "type": "object" }, - "name": "ScrapeWebsiteTool", - "package_dependencies": [], + "name": "SeleniumScrapingTool", + "package_dependencies": [ + "selenium", + "webdriver-manager" + ], "run_params_schema": { - "description": "Input for ScrapeWebsiteTool.", + "description": "Input for SeleniumScrapingTool.", "properties": { + "css_element": { + "description": "Mandatory css reference for element to scrape from the website", + "title": "Css Element", + "type": "string" + }, "website_url": { - "description": "Mandatory website url to read the file", + "description": "Mandatory website url to read the file. Must start with http:// or https://", "title": "Website Url", "type": "string" } }, "required": [ - "website_url" + "website_url", + "css_element" ], - "title": "ScrapeWebsiteToolSchema", + "title": "SeleniumScrapingToolSchema", "type": "object" } }, { - "description": "A tool that uses Scrapegraph AI to intelligently scrape website content.", + "description": "A tool to perform to perform a Google search with a search_query.", "env_vars": [ { "default": null, - "description": "API key for Scrapegraph AI services", - "name": "SCRAPEGRAPH_API_KEY", - "required": false + "description": "API key for SerpApi searches", + "name": "SERPAPI_API_KEY", + "required": true } ], - "humanized_name": "Scrapegraph website scraper", + "humanized_name": "Google Search", "init_params_schema": { "$defs": { "EnvVar": { @@ -5040,38 +6576,29 @@ "type": "object" } }, - "description": "A tool that uses Scrapegraph AI to intelligently scrape website content.\n\nRaises:\n ValueError: If API key is missing or URL format is invalid\n RateLimitError: If API rate limits are exceeded\n RuntimeError: If scraping operation fails", "properties": { - "api_key": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Api Key" - }, - "enable_logging": { - "default": false, - "title": "Enable Logging", - "type": "boolean" - }, - "user_prompt": { + "client": { "anyOf": [ - { - "type": "string" - }, + {}, { "type": "null" } ], "default": null, - "title": "User Prompt" - }, - "website_url": { + "title": "Client" + } + }, + "title": "SerpApiGoogleSearchTool", + "type": "object" + }, + "name": "SerpApiGoogleSearchTool", + "package_dependencies": [ + "serpapi" + ], + "run_params_schema": { + "description": "Input for Google Search.", + "properties": { + "location": { "anyOf": [ { "type": "string" @@ -5081,49 +6608,33 @@ } ], "default": null, - "title": "Website Url" - } - }, - "title": "ScrapegraphScrapeTool", - "type": "object" - }, - "name": "ScrapegraphScrapeTool", - "package_dependencies": [ - "scrapegraph-py" - ], - "run_params_schema": { - "description": "Input for ScrapegraphScrapeTool.", - "properties": { - "user_prompt": { - "default": "Extract the main content of the webpage", - "description": "Prompt to guide the extraction of content", - "title": "User Prompt", - "type": "string" + "description": "Location you want the search to be performed in.", + "title": "Location" }, - "website_url": { - "description": "Mandatory website url to scrape", - "title": "Website Url", + "search_query": { + "description": "Mandatory search query you want to use to Google search.", + "title": "Search Query", "type": "string" } }, "required": [ - "website_url" + "search_query" ], - "title": "ScrapegraphScrapeToolSchema", + "title": "SerpApiGoogleSearchToolSchema", "type": "object" } }, { - "description": "Scrape a webpage url using Scrapfly and return its content as markdown or text", + "description": "A tool to perform search on Google shopping with a search_query.", "env_vars": [ { "default": null, - "description": "API key for Scrapfly", - "name": "SCRAPFLY_API_KEY", + "description": "API key for SerpApi searches", + "name": "SERPAPI_API_KEY", "required": true } ], - "humanized_name": "Scrapfly web scraping API tool", + "humanized_name": "Google Shopping", "init_params_schema": { "$defs": { "EnvVar": { @@ -5163,12 +6674,7 @@ } }, "properties": { - "api_key": { - "default": null, - "title": "Api Key", - "type": "string" - }, - "scrapfly": { + "client": { "anyOf": [ {}, { @@ -5176,80 +6682,56 @@ } ], "default": null, - "title": "Scrapfly" + "title": "Client" } }, - "title": "ScrapflyScrapeWebsiteTool", + "title": "SerpApiGoogleShoppingTool", "type": "object" }, - "name": "ScrapflyScrapeWebsiteTool", + "name": "SerpApiGoogleShoppingTool", "package_dependencies": [ - "scrapfly-sdk" + "serpapi" ], "run_params_schema": { + "description": "Input for Google Shopping.", "properties": { - "ignore_scrape_failures": { - "anyOf": [ - { - "type": "boolean" - }, - { - "type": "null" - } - ], - "default": null, - "description": "whether to ignore failures", - "title": "Ignore Scrape Failures" - }, - "scrape_config": { - "anyOf": [ - { - "additionalProperties": true, - "type": "object" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Scrapfly request scrape config", - "title": "Scrape Config" - }, - "scrape_format": { + "location": { "anyOf": [ { - "enum": [ - "raw", - "markdown", - "text" - ], "type": "string" }, { "type": "null" } ], - "default": "markdown", - "description": "Webpage extraction format", - "title": "Scrape Format" + "default": null, + "description": "Location you want the search to be performed in.", + "title": "Location" }, - "url": { - "description": "Webpage URL", - "title": "Url", + "search_query": { + "description": "Mandatory search query you want to use to Google shopping.", + "title": "Search Query", "type": "string" } }, "required": [ - "url" + "search_query" ], - "title": "ScrapflyScrapeWebsiteToolSchema", + "title": "SerpApiGoogleShoppingToolSchema", "type": "object" } }, { - "description": "A tool that can be used to read a website content.", - "env_vars": [], - "humanized_name": "Read a website content", + "description": "A tool that can be used to search the internet with a search_query. Supports different search types: 'search' (default), 'news'", + "env_vars": [ + { + "default": null, + "description": "API key for Serper", + "name": "SERPER_API_KEY", + "required": true + } + ], + "humanized_name": "Search the internet with Serper", "init_params_schema": { "$defs": { "EnvVar": { @@ -5289,66 +6771,36 @@ } }, "properties": { - "cookie": { - "anyOf": [ - { - "additionalProperties": true, - "type": "object" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Cookie" - }, - "css_element": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Css Element" - }, - "driver": { - "anyOf": [ - {}, - { - "type": "null" - } - ], - "default": null, - "title": "Driver" + "base_url": { + "default": "https://google.serper.dev", + "title": "Base Url", + "type": "string" }, - "return_html": { + "country": { "anyOf": [ { - "type": "boolean" + "type": "string" }, { "type": "null" } ], - "default": false, - "title": "Return Html" + "default": "", + "title": "Country" }, - "wait_time": { + "locale": { "anyOf": [ { - "type": "integer" + "type": "string" }, { "type": "null" } ], - "default": 3, - "title": "Wait Time" + "default": "", + "title": "Locale" }, - "website_url": { + "location": { "anyOf": [ { "type": "string" @@ -5357,51 +6809,57 @@ "type": "null" } ], - "default": null, - "title": "Website Url" + "default": "", + "title": "Location" + }, + "n_results": { + "default": 10, + "title": "N Results", + "type": "integer" + }, + "save_file": { + "default": false, + "title": "Save File", + "type": "boolean" + }, + "search_type": { + "default": "search", + "title": "Search Type", + "type": "string" } }, - "title": "SeleniumScrapingTool", + "title": "SerperDevTool", "type": "object" }, - "name": "SeleniumScrapingTool", - "package_dependencies": [ - "selenium", - "webdriver-manager" - ], + "name": "SerperDevTool", + "package_dependencies": [], "run_params_schema": { - "description": "Input for SeleniumScrapingTool.", + "description": "Input for SerperDevTool.", "properties": { - "css_element": { - "description": "Mandatory css reference for element to scrape from the website", - "title": "Css Element", - "type": "string" - }, - "website_url": { - "description": "Mandatory website url to read the file. Must start with http:// or https://", - "title": "Website Url", + "search_query": { + "description": "Mandatory search query you want to use to search the internet", + "title": "Search Query", "type": "string" } }, "required": [ - "website_url", - "css_element" + "search_query" ], - "title": "SeleniumScrapingToolSchema", + "title": "SerperDevToolSchema", "type": "object" } }, { - "description": "A tool to perform to perform a Google search with a search_query.", + "description": "Scrapes website content using Serper's scraping API. This tool can extract clean, readable content from any website URL, optionally including markdown formatting for better structure.", "env_vars": [ { "default": null, - "description": "API key for SerpApi searches", - "name": "SERPAPI_API_KEY", + "description": "API key for Serper", + "name": "SERPER_API_KEY", "required": true } ], - "humanized_name": "Google Search", + "humanized_name": "serper_scrape_website", "init_params_schema": { "$defs": { "EnvVar": { @@ -5440,67 +6898,52 @@ "type": "object" } }, - "properties": { - "client": { - "anyOf": [ - {}, - { - "type": "null" - } - ], - "default": null, - "title": "Client" - } - }, - "title": "SerpApiGoogleSearchTool", + "properties": {}, + "title": "SerperScrapeWebsiteTool", "type": "object" }, - "name": "SerpApiGoogleSearchTool", - "package_dependencies": [ - "serpapi" - ], + "name": "SerperScrapeWebsiteTool", + "package_dependencies": [], "run_params_schema": { - "description": "Input for Google Search.", + "description": "Input schema for SerperScrapeWebsite.", "properties": { - "location": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Location you want the search to be performed in.", - "title": "Location" + "include_markdown": { + "default": true, + "description": "Whether to include markdown formatting in the scraped content", + "title": "Include Markdown", + "type": "boolean" }, - "search_query": { - "description": "Mandatory search query you want to use to Google search.", - "title": "Search Query", + "url": { + "description": "The URL of the website to scrape", + "title": "Url", "type": "string" } }, "required": [ - "search_query" + "url" ], - "title": "SerpApiGoogleSearchToolSchema", + "title": "SerperScrapeWebsiteInput", "type": "object" } }, { - "description": "A tool to perform search on Google shopping with a search_query.", + "description": "A tool to perform to perform a job search in the US with a search_query.", "env_vars": [ { "default": null, - "description": "API key for SerpApi searches", - "name": "SERPAPI_API_KEY", + "description": "API key for Serply services", + "name": "SERPLY_API_KEY", "required": true } ], - "humanized_name": "Google Shopping", + "humanized_name": "Job Search", "init_params_schema": { "$defs": { + "Adapter": { + "properties": {}, + "title": "Adapter", + "type": "object" + }, "EnvVar": { "properties": { "default": { @@ -5538,28 +6981,36 @@ } }, "properties": { - "client": { + "adapter": { + "$ref": "#/$defs/Adapter" + }, + "config": { "anyOf": [ - {}, + { + "additionalProperties": true, + "type": "object" + }, { "type": "null" } ], "default": null, - "title": "Client" - } - }, - "title": "SerpApiGoogleShoppingTool", - "type": "object" - }, - "name": "SerpApiGoogleShoppingTool", - "package_dependencies": [ - "serpapi" - ], - "run_params_schema": { - "description": "Input for Google Shopping.", - "properties": { - "location": { + "title": "Config" + }, + "headers": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": {}, + "title": "Headers" + }, + "proxy_location": { "anyOf": [ { "type": "string" @@ -5568,12 +7019,30 @@ "type": "null" } ], - "default": null, - "description": "Location you want the search to be performed in.", - "title": "Location" + "default": "US", + "title": "Proxy Location" + }, + "request_url": { + "default": "https://api.serply.io/v1/job/search/", + "title": "Request Url", + "type": "string" }, + "summarize": { + "default": false, + "title": "Summarize", + "type": "boolean" + } + }, + "title": "SerplyJobSearchTool", + "type": "object" + }, + "name": "SerplyJobSearchTool", + "package_dependencies": [], + "run_params_schema": { + "description": "Input for Job Search.", + "properties": { "search_query": { - "description": "Mandatory search query you want to use to Google shopping.", + "description": "Mandatory search query you want to use to fetch jobs postings.", "title": "Search Query", "type": "string" } @@ -5581,21 +7050,21 @@ "required": [ "search_query" ], - "title": "SerpApiGoogleShoppingToolSchema", + "title": "SerplyJobSearchToolSchema", "type": "object" } }, { - "description": "A tool that can be used to search the internet with a search_query. Supports different search types: 'search' (default), 'news'", + "description": "A tool to perform News article search with a search_query.", "env_vars": [ { "default": null, - "description": "API key for Serper", - "name": "SERPER_API_KEY", + "description": "API key for Serply services", + "name": "SERPLY_API_KEY", "required": true } ], - "humanized_name": "Search the internet with Serper", + "humanized_name": "News Search", "init_params_schema": { "$defs": { "EnvVar": { @@ -5635,36 +7104,32 @@ } }, "properties": { - "base_url": { - "default": "https://google.serper.dev", - "title": "Base Url", - "type": "string" - }, - "country": { + "headers": { "anyOf": [ { - "type": "string" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], - "default": "", - "title": "Country" + "default": {}, + "title": "Headers" }, - "locale": { + "limit": { "anyOf": [ { - "type": "string" + "type": "integer" }, { "type": "null" } ], - "default": "", - "title": "Locale" + "default": 10, + "title": "Limit" }, - "location": { + "proxy_location": { "anyOf": [ { "type": "string" @@ -5673,35 +7138,25 @@ "type": "null" } ], - "default": "", - "title": "Location" - }, - "n_results": { - "default": 10, - "title": "N Results", - "type": "integer" - }, - "save_file": { - "default": false, - "title": "Save File", - "type": "boolean" + "default": "US", + "title": "Proxy Location" }, - "search_type": { - "default": "search", - "title": "Search Type", + "search_url": { + "default": "https://api.serply.io/v1/news/", + "title": "Search Url", "type": "string" } }, - "title": "SerperDevTool", + "title": "SerplyNewsSearchTool", "type": "object" }, - "name": "SerperDevTool", + "name": "SerplyNewsSearchTool", "package_dependencies": [], "run_params_schema": { - "description": "Input for SerperDevTool.", + "description": "Input for Serply News Search.", "properties": { "search_query": { - "description": "Mandatory search query you want to use to search the internet", + "description": "Mandatory search query you want to use to fetch news articles", "title": "Search Query", "type": "string" } @@ -5709,12 +7164,12 @@ "required": [ "search_query" ], - "title": "SerperDevToolSchema", + "title": "SerplyNewsSearchToolSchema", "type": "object" } }, { - "description": "A tool to perform to perform a job search in the US with a search_query.", + "description": "A tool to perform scholarly literature search with a search_query.", "env_vars": [ { "default": null, @@ -5723,14 +7178,9 @@ "required": true } ], - "humanized_name": "Job Search", + "humanized_name": "Scholar Search", "init_params_schema": { "$defs": { - "Adapter": { - "properties": {}, - "title": "Adapter", - "type": "object" - }, "EnvVar": { "properties": { "default": { @@ -5768,10 +7218,7 @@ } }, "properties": { - "adapter": { - "$ref": "#/$defs/Adapter" - }, - "config": { + "headers": { "anyOf": [ { "additionalProperties": true, @@ -5781,21 +7228,20 @@ "type": "null" } ], - "default": null, - "title": "Config" + "default": {}, + "title": "Headers" }, - "headers": { + "hl": { "anyOf": [ { - "additionalProperties": true, - "type": "object" + "type": "string" }, { "type": "null" } ], - "default": {}, - "title": "Headers" + "default": "us", + "title": "Hl" }, "proxy_location": { "anyOf": [ @@ -5809,27 +7255,22 @@ "default": "US", "title": "Proxy Location" }, - "request_url": { - "default": "https://api.serply.io/v1/job/search/", - "title": "Request Url", + "search_url": { + "default": "https://api.serply.io/v1/scholar/", + "title": "Search Url", "type": "string" - }, - "summarize": { - "default": false, - "title": "Summarize", - "type": "boolean" } }, - "title": "SerplyJobSearchTool", + "title": "SerplyScholarSearchTool", "type": "object" }, - "name": "SerplyJobSearchTool", + "name": "SerplyScholarSearchTool", "package_dependencies": [], "run_params_schema": { - "description": "Input for Job Search.", + "description": "Input for Serply Scholar Search.", "properties": { "search_query": { - "description": "Mandatory search query you want to use to fetch jobs postings.", + "description": "Mandatory search query you want to use to fetch scholarly literature", "title": "Search Query", "type": "string" } @@ -5837,12 +7278,12 @@ "required": [ "search_query" ], - "title": "SerplyJobSearchToolSchema", + "title": "SerplyScholarSearchToolSchema", "type": "object" } }, { - "description": "A tool to perform News article search with a search_query.", + "description": "A tool to perform Google search with a search_query.", "env_vars": [ { "default": null, @@ -5851,7 +7292,7 @@ "required": true } ], - "humanized_name": "News Search", + "humanized_name": "Google Search", "init_params_schema": { "$defs": { "EnvVar": { @@ -5891,6 +7332,18 @@ } }, "properties": { + "device_type": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": "desktop", + "title": "Device Type" + }, "headers": { "anyOf": [ { @@ -5904,6 +7357,18 @@ "default": {}, "title": "Headers" }, + "hl": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": "us", + "title": "Hl" + }, "limit": { "anyOf": [ { @@ -5928,22 +7393,35 @@ "default": "US", "title": "Proxy Location" }, + "query_payload": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": {}, + "title": "Query Payload" + }, "search_url": { - "default": "https://api.serply.io/v1/news/", + "default": "https://api.serply.io/v1/search/", "title": "Search Url", "type": "string" } }, - "title": "SerplyNewsSearchTool", + "title": "SerplyWebSearchTool", "type": "object" }, - "name": "SerplyNewsSearchTool", + "name": "SerplyWebSearchTool", "package_dependencies": [], "run_params_schema": { - "description": "Input for Serply News Search.", + "description": "Input for Serply Web Search.", "properties": { "search_query": { - "description": "Mandatory search query you want to use to fetch news articles", + "description": "Mandatory search query you want to use to Google search", "title": "Search Query", "type": "string" } @@ -5951,12 +7429,12 @@ "required": [ "search_query" ], - "title": "SerplyNewsSearchToolSchema", + "title": "SerplyWebSearchToolSchema", "type": "object" } }, { - "description": "A tool to perform scholarly literature search with a search_query.", + "description": "A tool to perform convert a webpage to markdown to make it easier for LLMs to understand", "env_vars": [ { "default": null, @@ -5965,9 +7443,14 @@ "required": true } ], - "humanized_name": "Scholar Search", + "humanized_name": "Webpage to Markdown", "init_params_schema": { "$defs": { + "Adapter": { + "properties": {}, + "title": "Adapter", + "type": "object" + }, "EnvVar": { "properties": { "default": { @@ -6005,7 +7488,10 @@ } }, "properties": { - "headers": { + "adapter": { + "$ref": "#/$defs/Adapter" + }, + "config": { "anyOf": [ { "additionalProperties": true, @@ -6015,20 +7501,21 @@ "type": "null" } ], - "default": {}, - "title": "Headers" + "default": null, + "title": "Config" }, - "hl": { + "headers": { "anyOf": [ { - "type": "string" + "additionalProperties": true, + "type": "object" }, { "type": "null" } ], - "default": "us", - "title": "Hl" + "default": {}, + "title": "Headers" }, "proxy_location": { "anyOf": [ @@ -6042,22 +7529,173 @@ "default": "US", "title": "Proxy Location" }, - "search_url": { - "default": "https://api.serply.io/v1/scholar/", - "title": "Search Url", + "request_url": { + "default": "https://api.serply.io/v1/request", + "title": "Request Url", "type": "string" + }, + "summarize": { + "default": false, + "title": "Summarize", + "type": "boolean" } }, - "title": "SerplyScholarSearchTool", + "title": "SerplyWebpageToMarkdownTool", "type": "object" }, - "name": "SerplyScholarSearchTool", + "name": "SerplyWebpageToMarkdownTool", "package_dependencies": [], "run_params_schema": { - "description": "Input for Serply Scholar Search.", + "description": "Input for Serply Search.", + "properties": { + "url": { + "description": "Mandatory url you want to use to fetch and convert to markdown", + "title": "Url", + "type": "string" + } + }, + "required": [ + "url" + ], + "title": "SerplyWebpageToMarkdownToolSchema", + "type": "object" + } + }, + { + "description": "A tool that can be used to semantic search a query from a database.", + "env_vars": [ + { + "default": null, + "description": "A comprehensive URL string that can encapsulate host, port, username, password, and database information, often used in environments like SingleStore notebooks or specific frameworks. For example: 'me:p455w0rd@s2-host.com/my_db'", + "name": "SINGLESTOREDB_URL", + "required": false + }, + { + "default": null, + "description": "Specifies the hostname, IP address, or URL of the SingleStoreDB workspace or cluster", + "name": "SINGLESTOREDB_HOST", + "required": false + }, + { + "default": null, + "description": "Defines the port number on which the SingleStoreDB server is listening", + "name": "SINGLESTOREDB_PORT", + "required": false + }, + { + "default": null, + "description": "Specifies the database user name", + "name": "SINGLESTOREDB_USER", + "required": false + }, + { + "default": null, + "description": "Specifies the database user password", + "name": "SINGLESTOREDB_PASSWORD", + "required": false + }, + { + "default": null, + "description": "Name of the database to connect to", + "name": "SINGLESTOREDB_DATABASE", + "required": false + }, + { + "default": null, + "description": "File containing SSL key", + "name": "SINGLESTOREDB_SSL_KEY", + "required": false + }, + { + "default": null, + "description": "File containing SSL certificate", + "name": "SINGLESTOREDB_SSL_CERT", + "required": false + }, + { + "default": null, + "description": "File containing SSL certificate authority", + "name": "SINGLESTOREDB_SSL_CA", + "required": false + }, + { + "default": null, + "description": "The timeout for connecting to the database in seconds", + "name": "SINGLESTOREDB_CONNECT_TIMEOUT", + "required": false + } + ], + "humanized_name": "Search a database's table(s) content", + "init_params_schema": { + "$defs": { + "EnvVar": { + "properties": { + "default": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Default" + }, + "description": { + "title": "Description", + "type": "string" + }, + "name": { + "title": "Name", + "type": "string" + }, + "required": { + "default": true, + "title": "Required", + "type": "boolean" + } + }, + "required": [ + "name", + "description" + ], + "title": "EnvVar", + "type": "object" + } + }, + "description": "A tool for performing semantic searches on SingleStore database tables.\n\nThis tool provides a safe interface for executing SELECT and SHOW queries\nagainst a SingleStore database with connection pooling for optimal performance.", + "properties": { + "connection_args": { + "additionalProperties": true, + "default": {}, + "title": "Connection Args", + "type": "object" + }, + "connection_pool": { + "anyOf": [ + {}, + { + "type": "null" + } + ], + "default": null, + "title": "Connection Pool" + } + }, + "title": "SingleStoreSearchTool", + "type": "object" + }, + "name": "SingleStoreSearchTool", + "package_dependencies": [ + "singlestoredb", + "SQLAlchemy" + ], + "run_params_schema": { + "description": "Input schema for SingleStoreSearchTool.\n\nThis schema defines the expected input format for the search tool,\nensuring that only valid SELECT and SHOW queries are accepted.", "properties": { "search_query": { - "description": "Mandatory search query you want to use to fetch scholarly literature", + "description": "Mandatory semantic search query you want to use to search the database's content. Only SELECT and SHOW queries are supported.", "title": "Search Query", "type": "string" } @@ -6065,21 +7703,14 @@ "required": [ "search_query" ], - "title": "SerplyScholarSearchToolSchema", + "title": "SingleStoreSearchToolSchema", "type": "object" } }, { - "description": "A tool to perform Google search with a search_query.", - "env_vars": [ - { - "default": null, - "description": "API key for Serply services", - "name": "SERPLY_API_KEY", - "required": true - } - ], - "humanized_name": "Google Search", + "description": "Execute SQL queries or semantic search on Snowflake data warehouse. Supports both raw SQL and natural language queries.", + "env_vars": [], + "humanized_name": "Snowflake Database Search", "init_params_schema": { "$defs": { "EnvVar": { @@ -6116,59 +7747,170 @@ ], "title": "EnvVar", "type": "object" - } - }, - "properties": { - "device_type": { - "anyOf": [ - { + }, + "SnowflakeConfig": { + "description": "Configuration for Snowflake connection.", + "properties": { + "account": { + "description": "Snowflake account identifier", + "pattern": "^[a-zA-Z0-9\\-_]+$", + "title": "Account", "type": "string" }, - { - "type": "null" - } - ], - "default": "desktop", - "title": "Device Type" - }, - "headers": { - "anyOf": [ - { - "additionalProperties": true, - "type": "object" + "database": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Default database", + "title": "Database" }, - { - "type": "null" - } - ], - "default": {}, - "title": "Headers" - }, - "hl": { - "anyOf": [ - { + "password": { + "anyOf": [ + { + "format": "password", + "type": "string", + "writeOnly": true + }, + { + "type": "null" + } + ], + "default": null, + "description": "Snowflake password", + "title": "Password" + }, + "private_key_path": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Path to private key file", + "title": "Private Key Path" + }, + "role": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Snowflake role", + "title": "Role" + }, + "session_parameters": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "description": "Session parameters", + "title": "Session Parameters" + }, + "snowflake_schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Default schema", + "title": "Snowflake Schema" + }, + "user": { + "description": "Snowflake username", + "title": "User", "type": "string" }, - { - "type": "null" + "warehouse": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Snowflake warehouse", + "title": "Warehouse" } + }, + "required": [ + "account", + "user" ], - "default": "us", - "title": "Hl" + "title": "SnowflakeConfig", + "type": "object" + } + }, + "description": "Tool for executing queries and semantic search on Snowflake.", + "properties": { + "config": { + "$ref": "#/$defs/SnowflakeConfig", + "description": "Snowflake connection configuration" }, - "limit": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": 10, - "title": "Limit" + "enable_caching": { + "default": true, + "description": "Enable query result caching", + "title": "Enable Caching", + "type": "boolean" }, - "proxy_location": { + "max_retries": { + "default": 3, + "description": "Maximum retry attempts", + "title": "Max Retries", + "type": "integer" + }, + "pool_size": { + "default": 5, + "description": "Size of connection pool", + "title": "Pool Size", + "type": "integer" + }, + "retry_delay": { + "default": 1.0, + "description": "Delay between retries in seconds", + "title": "Retry Delay", + "type": "number" + } + }, + "required": [ + "config" + ], + "title": "SnowflakeSearchTool", + "type": "object" + }, + "name": "SnowflakeSearchTool", + "package_dependencies": [ + "snowflake-connector-python", + "snowflake-sqlalchemy", + "cryptography" + ], + "run_params_schema": { + "description": "Input schema for SnowflakeSearchTool.", + "properties": { + "database": { "anyOf": [ { "type": "string" @@ -6177,67 +7919,62 @@ "type": "null" } ], - "default": "US", - "title": "Proxy Location" + "default": null, + "description": "Override default database", + "title": "Database" }, - "query_payload": { + "query": { + "description": "SQL query or semantic search query to execute", + "title": "Query", + "type": "string" + }, + "snowflake_schema": { "anyOf": [ { - "additionalProperties": true, - "type": "object" + "type": "string" }, { "type": "null" } ], - "default": {}, - "title": "Query Payload" + "default": null, + "description": "Override default schema", + "title": "Snowflake Schema" }, - "search_url": { - "default": "https://api.serply.io/v1/search/", - "title": "Search Url", - "type": "string" - } - }, - "title": "SerplyWebSearchTool", - "type": "object" - }, - "name": "SerplyWebSearchTool", - "package_dependencies": [], - "run_params_schema": { - "description": "Input for Serply Web Search.", - "properties": { - "search_query": { - "description": "Mandatory search query you want to use to Google search", - "title": "Search Query", - "type": "string" + "timeout": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": 300, + "description": "Query timeout in seconds", + "title": "Timeout" } }, "required": [ - "search_query" + "query" ], - "title": "SerplyWebSearchToolSchema", + "title": "SnowflakeSearchToolInput", "type": "object" } }, { - "description": "A tool to perform convert a webpage to markdown to make it easier for LLMs to understand", + "description": "A tool to scrape or crawl a website and return LLM-ready content.", "env_vars": [ { "default": null, - "description": "API key for Serply services", - "name": "SERPLY_API_KEY", + "description": "API key for Spider.cloud", + "name": "SPIDER_API_KEY", "required": true } ], - "humanized_name": "Webpage to Markdown", + "humanized_name": "SpiderTool", "init_params_schema": { "$defs": { - "Adapter": { - "properties": {}, - "title": "Adapter", - "type": "object" - }, "EnvVar": { "properties": { "default": { @@ -6272,26 +8009,59 @@ ], "title": "EnvVar", "type": "object" + }, + "SpiderToolConfig": { + "description": "Configuration settings for SpiderTool.\n\nContains all default values and constants used by SpiderTool.\nCentralizes configuration management for easier maintenance.", + "properties": { + "DEFAULT_CRAWL_LIMIT": { + "default": 5, + "title": "Default Crawl Limit", + "type": "integer" + }, + "DEFAULT_REQUEST_MODE": { + "default": "smart", + "title": "Default Request Mode", + "type": "string" + }, + "DEFAULT_RETURN_FORMAT": { + "default": "markdown", + "title": "Default Return Format", + "type": "string" + }, + "FILTER_SVG": { + "default": true, + "title": "Filter Svg", + "type": "boolean" + } + }, + "title": "SpiderToolConfig", + "type": "object" } }, + "description": "Tool for scraping and crawling websites.\nThis tool provides functionality to either scrape a single webpage or crawl multiple\npages, returning content in a format suitable for LLM processing.", "properties": { - "adapter": { - "$ref": "#/$defs/Adapter" - }, - "config": { + "api_key": { "anyOf": [ { - "additionalProperties": true, - "type": "object" + "type": "string" }, { "type": "null" } ], "default": null, - "title": "Config" + "title": "Api Key" }, - "headers": { + "config": { + "$ref": "#/$defs/SpiderToolConfig", + "default": { + "DEFAULT_CRAWL_LIMIT": 5, + "DEFAULT_REQUEST_MODE": "smart", + "DEFAULT_RETURN_FORMAT": "markdown", + "FILTER_SVG": true + } + }, + "custom_params": { "anyOf": [ { "additionalProperties": true, @@ -6301,10 +8071,19 @@ "type": "null" } ], - "default": {}, - "title": "Headers" + "default": null, + "title": "Custom Params" }, - "proxy_location": { + "log_failures": { + "default": true, + "title": "Log Failures", + "type": "boolean" + }, + "spider": { + "default": null, + "title": "Spider" + }, + "website_url": { "anyOf": [ { "type": "string" @@ -6313,45 +8092,47 @@ "type": "null" } ], - "default": "US", - "title": "Proxy Location" - }, - "request_url": { - "default": "https://api.serply.io/v1/request", - "title": "Request Url", - "type": "string" - }, - "summarize": { - "default": false, - "title": "Summarize", - "type": "boolean" + "default": null, + "title": "Website Url" } }, - "title": "SerplyWebpageToMarkdownTool", + "title": "SpiderTool", "type": "object" }, - "name": "SerplyWebpageToMarkdownTool", - "package_dependencies": [], + "name": "SpiderTool", + "package_dependencies": [ + "spider-client" + ], "run_params_schema": { - "description": "Input for Serply Search.", + "description": "Input schema for SpiderTool.", "properties": { - "url": { - "description": "Mandatory url you want to use to fetch and convert to markdown", - "title": "Url", + "mode": { + "default": "scrape", + "description": "The mode of the SpiderTool. The only two allowed modes are `scrape` or `crawl`. Crawl mode will follow up to 5 links and return their content in markdown format.", + "enum": [ + "scrape", + "crawl" + ], + "title": "Mode", + "type": "string" + }, + "website_url": { + "description": "Mandatory website URL to scrape or crawl", + "title": "Website Url", "type": "string" } }, "required": [ - "url" + "website_url" ], - "title": "SerplyWebpageToMarkdownToolSchema", + "title": "SpiderToolSchema", "type": "object" } }, { - "description": "Execute SQL queries or semantic search on Snowflake data warehouse. Supports both raw SQL and natural language queries.", + "description": "Use this tool to control a web browser and interact with websites using natural language.\n\n Capabilities:\n - Navigate to websites and follow links\n - Click buttons, links, and other elements\n - Fill in forms and input fields\n - Search within websites\n - Extract information from web pages\n - Identify and analyze elements on a page\n\n To use this tool, provide a natural language instruction describing what you want to do.\n For reliability on complex pages, use specific, atomic instructions with location hints:\n - Good: \"Click the search box in the header\"\n - Good: \"Type 'Italy' in the focused field\"\n - Bad: \"Search for Italy and click the first result\"\n\n For different types of tasks, specify the command_type:\n - 'act': For performing one atomic action (default)\n - 'navigate': For navigating to a URL\n - 'extract': For getting data from a specific page section\n - 'observe': For finding elements in a specific area", "env_vars": [], - "humanized_name": "Snowflake Database Search", + "humanized_name": "Web Automation Tool", "init_params_schema": { "$defs": { "EnvVar": { @@ -6388,170 +8169,103 @@ ], "title": "EnvVar", "type": "object" - }, - "SnowflakeConfig": { - "description": "Configuration for Snowflake connection.", - "properties": { - "account": { - "description": "Snowflake account identifier", - "pattern": "^[a-zA-Z0-9\\-_]+$", - "title": "Account", + } + }, + "description": "A tool that uses Stagehand to automate web browser interactions using natural language with atomic action handling.\n\nStagehand allows AI agents to interact with websites through a browser,\nperforming actions like clicking buttons, filling forms, and extracting data.\n\nThe tool supports four main command types:\n1. act - Perform actions like clicking, typing, scrolling, or navigating\n2. navigate - Specifically navigate to a URL (shorthand for act with navigation)\n3. extract - Extract structured data from web pages\n4. observe - Identify and analyze elements on a page\n\nUsage examples:\n- Navigate to a website: instruction=\"Go to the homepage\", url=\"https://example.com\"\n- Click a button: instruction=\"Click the login button\"\n- Fill a form: instruction=\"Fill the login form with username 'user' and password 'pass'\"\n- Extract data: instruction=\"Extract all product prices and names\", command_type=\"extract\"\n- Observe elements: instruction=\"Find all navigation menu items\", command_type=\"observe\"\n- Complex tasks: instruction=\"Step 1: Navigate to https://example.com; Step 2: Scroll down to the 'Features' section; Step 3: Click 'Learn More'\", command_type=\"act\"\n\nExample of breaking down \"Search for OpenAI\" into multiple steps:\n1. First navigation: instruction=\"Go to Google\", url=\"https://google.com\", command_type=\"navigate\"\n2. Enter search term: instruction=\"Type 'OpenAI' in the search box\", command_type=\"act\"\n3. Submit search: instruction=\"Press the Enter key or click the search button\", command_type=\"act\"\n4. Click on result: instruction=\"Click on the OpenAI website link in the search results\", command_type=\"act\"", + "properties": { + "api_key": { + "anyOf": [ + { "type": "string" }, - "database": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Default database", - "title": "Database" - }, - "password": { - "anyOf": [ - { - "format": "password", - "type": "string", - "writeOnly": true - }, - { - "type": "null" - } - ], - "default": null, - "description": "Snowflake password", - "title": "Password" - }, - "private_key_path": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Path to private key file", - "title": "Private Key Path" - }, - "role": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Snowflake role", - "title": "Role" - }, - "session_parameters": { - "anyOf": [ - { - "additionalProperties": true, - "type": "object" - }, - { - "type": "null" - } - ], - "description": "Session parameters", - "title": "Session Parameters" - }, - "snowflake_schema": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Default schema", - "title": "Snowflake Schema" + { + "type": "null" + } + ], + "default": null, + "title": "Api Key" + }, + "dom_settle_timeout_ms": { + "default": 3000, + "title": "Dom Settle Timeout Ms", + "type": "integer" + }, + "headless": { + "default": false, + "title": "Headless", + "type": "boolean" + }, + "max_retries_on_token_limit": { + "default": 3, + "title": "Max Retries On Token Limit", + "type": "integer" + }, + "model_api_key": { + "anyOf": [ + { + "type": "string" }, - "user": { - "description": "Snowflake username", - "title": "User", + { + "type": "null" + } + ], + "default": null, + "title": "Model Api Key" + }, + "project_id": { + "anyOf": [ + { "type": "string" }, - "warehouse": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Snowflake warehouse", - "title": "Warehouse" + { + "type": "null" } - }, - "required": [ - "account", - "user" ], - "title": "SnowflakeConfig", - "type": "object" - } - }, - "description": "Tool for executing queries and semantic search on Snowflake.", - "properties": { - "config": { - "$ref": "#/$defs/SnowflakeConfig", - "description": "Snowflake connection configuration" + "default": null, + "title": "Project Id" }, - "enable_caching": { + "self_heal": { "default": true, - "description": "Enable query result caching", - "title": "Enable Caching", + "title": "Self Heal", "type": "boolean" }, - "max_retries": { - "default": 3, - "description": "Maximum retry attempts", - "title": "Max Retries", - "type": "integer" + "server_url": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": "https://api.stagehand.browserbase.com/v1", + "title": "Server Url" }, - "pool_size": { - "default": 5, - "description": "Size of connection pool", - "title": "Pool Size", + "use_simplified_dom": { + "default": true, + "title": "Use Simplified Dom", + "type": "boolean" + }, + "verbose": { + "default": 1, + "title": "Verbose", "type": "integer" }, - "retry_delay": { - "default": 1.0, - "description": "Delay between retries in seconds", - "title": "Retry Delay", - "type": "number" + "wait_for_captcha_solves": { + "default": true, + "title": "Wait For Captcha Solves", + "type": "boolean" } }, - "required": [ - "config" - ], - "title": "SnowflakeSearchTool", + "title": "StagehandTool", "type": "object" }, - "name": "SnowflakeSearchTool", - "package_dependencies": [ - "snowflake-connector-python", - "snowflake-sqlalchemy", - "cryptography" - ], + "name": "StagehandTool", + "package_dependencies": [], "run_params_schema": { - "description": "Input schema for SnowflakeSearchTool.", + "description": "Input for StagehandTool.", "properties": { - "database": { + "command_type": { "anyOf": [ { "type": "string" @@ -6560,16 +8274,11 @@ "type": "null" } ], - "default": null, - "description": "Override default database", - "title": "Database" - }, - "query": { - "description": "SQL query or semantic search query to execute", - "title": "Query", - "type": "string" + "default": "act", + "description": "The type of command to execute (choose one):\n - 'act': Perform an action like clicking buttons, filling forms, etc. (default)\n - 'navigate': Specifically navigate to a URL\n - 'extract': Extract structured data from the page\n - 'observe': Identify and analyze elements on the page\n ", + "title": "Command Type" }, - "snowflake_schema": { + "instruction": { "anyOf": [ { "type": "string" @@ -6579,43 +8288,38 @@ } ], "default": null, - "description": "Override default schema", - "title": "Snowflake Schema" + "description": "Single atomic action with location context. For reliability on complex pages, use ONE specific action with location hints. Good examples: 'Click the search input field in the header', 'Type Italy in the focused field', 'Press Enter', 'Click the first link in the results area'. Avoid combining multiple actions. For 'navigate' command type, this can be omitted if only URL is provided.", + "title": "Instruction" }, - "timeout": { + "url": { "anyOf": [ { - "type": "integer" + "type": "string" }, { "type": "null" } ], - "default": 300, - "description": "Query timeout in seconds", - "title": "Timeout" + "default": null, + "description": "The URL to navigate to before executing the instruction. MUST be used with 'navigate' command. ", + "title": "Url" } }, - "required": [ - "query" - ], - "title": "SnowflakeSearchToolInput", + "title": "StagehandToolSchema", "type": "object" } }, { - "description": "A tool to scrape or crawl a website and return LLM-ready content.", - "env_vars": [ - { - "default": null, - "description": "API key for Spider.cloud", - "name": "SPIDER_API_KEY", - "required": true - } - ], - "humanized_name": "SpiderTool", + "description": "A tool that can be used to semantic search a query from a txt's content.", + "env_vars": [], + "humanized_name": "Search a txt's content", "init_params_schema": { "$defs": { + "Adapter": { + "properties": {}, + "title": "Adapter", + "type": "object" + }, "EnvVar": { "properties": { "default": { @@ -6650,36 +8354,107 @@ ], "title": "EnvVar", "type": "object" + } + }, + "properties": { + "adapter": { + "$ref": "#/$defs/Adapter" }, - "SpiderToolConfig": { - "description": "Configuration settings for SpiderTool.\n\nContains all default values and constants used by SpiderTool.\nCentralizes configuration management for easier maintenance.", + "config": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Config" + }, + "summarize": { + "default": false, + "title": "Summarize", + "type": "boolean" + } + }, + "title": "TXTSearchTool", + "type": "object" + }, + "name": "TXTSearchTool", + "package_dependencies": [], + "run_params_schema": { + "description": "Input for TXTSearchTool.", + "properties": { + "search_query": { + "description": "Mandatory search query you want to use to search the txt's content", + "title": "Search Query", + "type": "string" + }, + "txt": { + "description": "Mandatory txt path you want to search", + "title": "Txt", + "type": "string" + } + }, + "required": [ + "search_query", + "txt" + ], + "title": "TXTSearchToolSchema", + "type": "object" + } + }, + { + "description": "Extracts content from one or more web pages using the Tavily API. Returns structured data.", + "env_vars": [ + { + "default": null, + "description": "API key for Tavily extraction service", + "name": "TAVILY_API_KEY", + "required": true + } + ], + "humanized_name": "TavilyExtractorTool", + "init_params_schema": { + "$defs": { + "EnvVar": { "properties": { - "DEFAULT_CRAWL_LIMIT": { - "default": 5, - "title": "Default Crawl Limit", - "type": "integer" + "default": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Default" }, - "DEFAULT_REQUEST_MODE": { - "default": "smart", - "title": "Default Request Mode", + "description": { + "title": "Description", "type": "string" }, - "DEFAULT_RETURN_FORMAT": { - "default": "markdown", - "title": "Default Return Format", + "name": { + "title": "Name", "type": "string" }, - "FILTER_SVG": { + "required": { "default": true, - "title": "Filter Svg", + "title": "Required", "type": "boolean" } }, - "title": "SpiderToolConfig", + "required": [ + "name", + "description" + ], + "title": "EnvVar", "type": "object" } }, - "description": "Tool for scraping and crawling websites.\nThis tool provides functionality to either scrape a single webpage or crawl multiple\npages, returning content in a format suitable for LLM processing.", "properties": { "api_key": { "anyOf": [ @@ -6690,104 +8465,114 @@ "type": "null" } ], - "default": null, + "description": "The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.", "title": "Api Key" }, - "config": { - "$ref": "#/$defs/SpiderToolConfig", - "default": { - "DEFAULT_CRAWL_LIMIT": 5, - "DEFAULT_REQUEST_MODE": "smart", - "DEFAULT_RETURN_FORMAT": "markdown", - "FILTER_SVG": true - } - }, - "custom_params": { + "async_client": { "anyOf": [ + {}, { - "additionalProperties": true, - "type": "object" - }, + "type": "null" + } + ], + "default": null, + "title": "Async Client" + }, + "client": { + "anyOf": [ + {}, { "type": "null" } ], "default": null, - "title": "Custom Params" + "title": "Client" }, - "log_failures": { - "default": true, - "title": "Log Failures", - "type": "boolean" + "extract_depth": { + "default": "basic", + "description": "The depth of extraction. 'basic' for basic extraction, 'advanced' for advanced extraction.", + "enum": [ + "basic", + "advanced" + ], + "title": "Extract Depth", + "type": "string" }, - "spider": { - "default": null, - "title": "Spider" + "include_images": { + "default": false, + "description": "Whether to include images in the extraction.", + "title": "Include Images", + "type": "boolean" }, - "website_url": { + "proxies": { "anyOf": [ { - "type": "string" + "additionalProperties": { + "type": "string" + }, + "type": "object" }, { "type": "null" } ], "default": null, - "title": "Website Url" + "description": "Optional proxies to use for the Tavily API requests.", + "title": "Proxies" + }, + "timeout": { + "default": 60, + "description": "The timeout for the extraction request in seconds.", + "title": "Timeout", + "type": "integer" } }, - "title": "SpiderTool", + "title": "TavilyExtractorTool", "type": "object" }, - "name": "SpiderTool", + "name": "TavilyExtractorTool", "package_dependencies": [ - "spider-client" + "tavily-python" ], "run_params_schema": { - "description": "Input schema for SpiderTool.", + "description": "Input schema for TavilyExtractorTool.", "properties": { - "mode": { - "default": "scrape", - "description": "The mode of the SpiderTool. The only two allowed modes are `scrape` or `crawl`. Crawl mode will follow up to 5 links and return their content in markdown format.", - "enum": [ - "scrape", - "crawl" + "urls": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "string" + } ], - "title": "Mode", - "type": "string" - }, - "website_url": { - "description": "Mandatory website URL to scrape or crawl", - "title": "Website Url", - "type": "string" + "description": "The URL(s) to extract data from. Can be a single URL or a list of URLs.", + "title": "Urls" } }, "required": [ - "website_url" + "urls" ], - "title": "SpiderToolSchema", + "title": "TavilyExtractorToolSchema", "type": "object" } }, { - "description": "Use this tool to control a web browser and interact with websites using natural language.\n \n Capabilities:\n - Navigate to websites and follow links\n - Click buttons, links, and other elements\n - Fill in forms and input fields\n - Search within websites\n - Extract information from web pages\n - Identify and analyze elements on a page\n \n To use this tool, provide a natural language instruction describing what you want to do.\n For different types of tasks, specify the command_type:\n - 'act': For performing actions (default)\n - 'navigate': For navigating to a URL (shorthand for act with navigation)\n - 'extract': For getting data from the page\n - 'observe': For finding and analyzing elements", - "env_vars": [], - "humanized_name": "Web Automation Tool", + "description": "A tool that performs web searches using the Tavily Search API. It returns a JSON object containing the search results.", + "env_vars": [ + { + "default": null, + "description": "API key for Tavily search service", + "name": "TAVILY_API_KEY", + "required": true + } + ], + "humanized_name": "Tavily Search", "init_params_schema": { "$defs": { - "AvailableModel": { - "enum": [ - "gpt-4o", - "gpt-4o-mini", - "claude-3-5-sonnet-latest", - "claude-3-7-sonnet-latest", - "computer-use-preview", - "gemini-2.0-flash" - ], - "title": "AvailableModel", - "type": "string" - }, "EnvVar": { "properties": { "default": { @@ -6824,6 +8609,7 @@ "type": "object" } }, + "description": "Tool that uses the Tavily Search API to perform web searches.\n\nAttributes:\n client: An instance of TavilyClient.\n async_client: An instance of AsyncTavilyClient.\n name: The name of the tool.\n description: A description of the tool's purpose.\n args_schema: The schema for the tool's arguments.\n api_key: The Tavily API key.\n proxies: Optional proxies for the API requests.\n search_depth: The depth of the search.\n topic: The topic to focus the search on.\n time_range: The time range for the search.\n days: The number of days to search back.\n max_results: The maximum number of results to return.\n include_domains: A list of domains to include in the search.\n exclude_domains: A list of domains to exclude from the search.\n include_answer: Whether to include a direct answer to the query.\n include_raw_content: Whether to include the raw content of the search results.\n include_images: Whether to include images in the search results.\n timeout: The timeout for the search request in seconds.\n max_content_length_per_result: Maximum length for the 'content' of each search result.", "properties": { "api_key": { "anyOf": [ @@ -6834,113 +8620,143 @@ "type": "null" } ], - "default": null, + "description": "The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.", "title": "Api Key" }, - "dom_settle_timeout_ms": { - "default": 3000, - "title": "Dom Settle Timeout Ms", - "type": "integer" - }, - "headless": { - "default": false, - "title": "Headless", - "type": "boolean" - }, - "model_api_key": { + "async_client": { "anyOf": [ - { - "type": "string" - }, + {}, { "type": "null" } ], "default": null, - "title": "Model Api Key" + "title": "Async Client" }, - "model_name": { + "client": { "anyOf": [ - { - "$ref": "#/$defs/AvailableModel" - }, + {}, { "type": "null" } ], - "default": "claude-3-7-sonnet-latest" + "default": null, + "title": "Client" }, - "project_id": { + "days": { + "default": 7, + "description": "The number of days to search back.", + "title": "Days", + "type": "integer" + }, + "exclude_domains": { "anyOf": [ { - "type": "string" + "items": { + "type": "string" + }, + "type": "array" }, { "type": "null" } ], "default": null, - "title": "Project Id" - }, - "self_heal": { - "default": true, - "title": "Self Heal", - "type": "boolean" + "description": "A list of domains to exclude from the search.", + "title": "Exclude Domains" }, - "server_url": { + "include_answer": { "anyOf": [ { + "type": "boolean" + }, + { + "enum": [ + "basic", + "advanced" + ], "type": "string" + } + ], + "default": false, + "description": "Whether to include a direct answer to the query.", + "title": "Include Answer" + }, + "include_domains": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" }, { "type": "null" } ], - "default": "http://api.stagehand.browserbase.com/v1", - "title": "Server Url" + "default": null, + "description": "A list of domains to include in the search.", + "title": "Include Domains" }, - "verbose": { - "default": 1, - "title": "Verbose", - "type": "integer" + "include_images": { + "default": false, + "description": "Whether to include images in the search results.", + "title": "Include Images", + "type": "boolean" }, - "wait_for_captcha_solves": { - "default": true, - "title": "Wait For Captcha Solves", + "include_raw_content": { + "default": false, + "description": "Whether to include the raw content of the search results.", + "title": "Include Raw Content", "type": "boolean" - } - }, - "title": "StagehandTool", - "type": "object" - }, - "name": "StagehandTool", - "package_dependencies": [ - "stagehand" - ], - "run_params_schema": { - "description": "Input for StagehandTool.", - "properties": { - "command_type": { + }, + "max_content_length_per_result": { + "default": 1000, + "description": "Maximum length for the 'content' of each search result to avoid context window issues.", + "title": "Max Content Length Per Result", + "type": "integer" + }, + "max_results": { + "default": 5, + "description": "The maximum number of results to return.", + "title": "Max Results", + "type": "integer" + }, + "proxies": { "anyOf": [ { - "type": "string" + "additionalProperties": { + "type": "string" + }, + "type": "object" }, { "type": "null" } ], - "default": "act", - "description": "The type of command to execute (choose one): \n - 'act': Perform an action like clicking buttons, filling forms, etc. (default)\n - 'navigate': Specifically navigate to a URL\n - 'extract': Extract structured data from the page \n - 'observe': Identify and analyze elements on the page\n ", - "title": "Command Type" + "default": null, + "description": "Optional proxies to use for the Tavily API requests.", + "title": "Proxies" }, - "instruction": { - "description": "Natural language instruction describing what you want to do on the website. Be specific about the action you want to perform, data to extract, or elements to observe. If your task is complex, break it down into simple, sequential steps. For example: 'Step 1: Navigate to https://example.com; Step 2: Click the login button; Step 3: Enter your credentials; Step 4: Submit the form.' Complex tasks like 'Search for OpenAI' should be broken down as: 'Step 1: Navigate to https://google.com; Step 2: Type OpenAI in the search box; Step 3: Press Enter or click the search button'.", - "title": "Instruction", + "search_depth": { + "default": "basic", + "description": "The depth of the search.", + "enum": [ + "basic", + "advanced" + ], + "title": "Search Depth", "type": "string" }, - "url": { + "time_range": { "anyOf": [ { + "enum": [ + "day", + "week", + "month", + "year" + ], "type": "string" }, { @@ -6948,28 +8764,56 @@ } ], "default": null, - "description": "The URL to navigate to before executing the instruction. MUST be used with 'navigate' command. ", - "title": "Url" + "description": "The time range for the search.", + "title": "Time Range" + }, + "timeout": { + "default": 60, + "description": "The timeout for the search request in seconds.", + "title": "Timeout", + "type": "integer" + }, + "topic": { + "default": "general", + "description": "The topic to focus the search on.", + "enum": [ + "general", + "news", + "finance" + ], + "title": "Topic", + "type": "string" + } + }, + "title": "TavilySearchTool", + "type": "object" + }, + "name": "TavilySearchTool", + "package_dependencies": [ + "tavily-python" + ], + "run_params_schema": { + "description": "Input schema for TavilySearchTool.", + "properties": { + "query": { + "description": "The search query string.", + "title": "Query", + "type": "string" } }, "required": [ - "instruction" + "query" ], - "title": "StagehandToolSchema", + "title": "TavilySearchToolSchema", "type": "object" } }, { - "description": "A tool that can be used to semantic search a query from a txt's content.", + "description": "Tool for semantic search using VectorX vector DB with optional sparse embedding support (SPLADE).", "env_vars": [], - "humanized_name": "Search a txt's content", + "humanized_name": "VectorXVectorSearchTool", "init_params_schema": { "$defs": { - "Adapter": { - "properties": {}, - "title": "Adapter", - "type": "object" - }, "EnvVar": { "properties": { "default": { @@ -7006,53 +8850,37 @@ "type": "object" } }, + "description": "CrewAI Tool for semantic search using VectorX vector database.\n\nSupports both dense (semantic) and sparse (keyword-like via SPLADE) search.\nDefault embedding model is Gemini via `google-genai`.\n\nAttributes:\n api_token: API token for VectorX.\n collection_name: Name of the index/collection in VectorX.\n embed_fn: Custom embedding function (optional).\n encryption_key: Encryption key for secure collections.\n space_type: Vector distance metric (e.g., \"cosine\").\n use_sparse: Whether to use sparse (SPLADE) embedding.\n sparse_embedder: SPLADE embedder instance.\n sparse_vocab_size: Vocabulary size for sparse encoder.\n top_k: Number of results to retrieve.", + "properties": {}, + "title": "VectorXVectorSearchTool", + "type": "object" + }, + "name": "VectorXVectorSearchTool", + "package_dependencies": [], + "run_params_schema": { + "description": "Argument schema for VectorX search tool.", "properties": { - "adapter": { - "$ref": "#/$defs/Adapter" + "query": { + "title": "Query", + "type": "string" }, - "config": { + "top_k": { "anyOf": [ { - "additionalProperties": true, - "type": "object" + "type": "integer" }, { "type": "null" } ], "default": null, - "title": "Config" - }, - "summarize": { - "default": false, - "title": "Summarize", - "type": "boolean" - } - }, - "title": "TXTSearchTool", - "type": "object" - }, - "name": "TXTSearchTool", - "package_dependencies": [], - "run_params_schema": { - "description": "Input for TXTSearchTool.", - "properties": { - "search_query": { - "description": "Mandatory search query you want to use to search the txt's content", - "title": "Search Query", - "type": "string" - }, - "txt": { - "description": "Mandatory txt path you want to search", - "title": "Txt", - "type": "string" + "title": "Top K" } }, "required": [ - "search_query", - "txt" + "query" ], - "title": "TXTSearchToolSchema", + "title": "VectorXSearchArgs", "type": "object" } }, @@ -7185,6 +9013,7 @@ "text2vec-gpt4all", "text2vec-huggingface", "text2vec-mistral", + "text2vec-model2vec", "text2vec-nvidia", "text2vec-ollama", "text2vec-openai", @@ -7197,6 +9026,7 @@ "multi2vec-clip", "multi2vec-cohere", "multi2vec-jinaai", + "multi2multivec-jinaai", "multi2vec-bind", "multi2vec-palm", "multi2vec-voyageai", @@ -7209,6 +9039,18 @@ }, "description": "Tool to search the Weaviate database", "properties": { + "alpha": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": 0.75, + "title": "Alpha" + }, "collection_name": { "anyOf": [ { diff --git a/uv.lock b/uv.lock index b836cf07..c54026db 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.10, <3.14" resolution-markers = [ "python_full_version >= '3.13' and platform_python_implementation == 'PyPy' and sys_platform == 'darwin'", @@ -787,6 +787,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dc/56/b4e2ccdda8bc7732c5616bdb3bb4cea6019fdbdbbb2ee435ca784055cb8e/composio_core-0.7.20-py3-none-any.whl", hash = "sha256:e1cfb9cfc68a4622bc15827143ddf726f429d281e8f9de5d4c0965e75d039f14", size = 501152, upload-time = "2025-07-03T08:48:52.058Z" }, ] +[[package]] +name = "contextual-client" +version = "0.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "distro" }, + { name = "httpx" }, + { name = "pydantic" }, + { name = "sniffio" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/02/4d/1219b84a73551c1f70be465c8e4b496ebf788152f7b124a84cc3895d2390/contextual_client-0.8.0.tar.gz", hash = "sha256:e97c3e7c5d9b5a97f23fb7b4adfe34d8d9a42817415335b1b48f6d6774bc2747", size = 148896, upload-time = "2025-08-26T23:40:34.967Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/80/f1/336d9fe785004b38f3850367833be8c7d91a4a8f2ceefae5e1cfa5d08a05/contextual_client-0.8.0-py3-none-any.whl", hash = "sha256:41b6fba00e7bddd1ca06bbd3ddc7269c400e049f7c82b2bcc5302746c704dda3", size = 154607, upload-time = "2025-08-26T23:40:33.545Z" }, +] + [[package]] name = "contourpy" version = "1.3.2" @@ -1027,6 +1044,7 @@ dependencies = [ { name = "crewai" }, { name = "docker" }, { name = "embedchain" }, + { name = "google-genai" }, { name = "lancedb" }, { name = "openai" }, { name = "portalocker" }, @@ -1057,6 +1075,10 @@ browserbase = [ composio-core = [ { name = "composio-core" }, ] +contextual = [ + { name = "contextual-client" }, + { name = "nest-asyncio" }, +] couchbase = [ { name = "couchbase" }, ] @@ -1137,6 +1159,9 @@ stagehand = [ tavily-python = [ { name = "tavily-python" }, ] +vectorx = [ + { name = "vecx" }, +] weaviate-client = [ { name = "weaviate-client" }, ] @@ -1160,6 +1185,7 @@ requires-dist = [ { name = "chromadb", specifier = "==0.5.23" }, { name = "click", specifier = ">=8.1.8" }, { name = "composio-core", marker = "extra == 'composio-core'", specifier = ">=0.6.11.post1" }, + { name = "contextual-client", marker = "extra == 'contextual'", specifier = ">=0.1.0" }, { name = "couchbase", marker = "extra == 'couchbase'", specifier = ">=4.3.5" }, { name = "crewai", specifier = ">=0.165.1" }, { name = "cryptography", marker = "extra == 'snowflake'", specifier = ">=43.0.3" }, @@ -1169,6 +1195,7 @@ requires-dist = [ { name = "exa-py", marker = "extra == 'exa-py'", specifier = ">=1.8.7" }, { name = "firecrawl-py", marker = "extra == 'firecrawl-py'", specifier = ">=1.8.0" }, { name = "gitpython", marker = "extra == 'github'", specifier = "==3.1.38" }, + { name = "google-genai", specifier = ">=1.32.0" }, { name = "hyperbrowser", marker = "extra == 'hyperbrowser'", specifier = ">=0.18.0" }, { name = "lancedb", specifier = ">=0.5.4" }, { name = "langchain-apify", marker = "extra == 'apify'", specifier = ">=0.1.2,<1.0.0" }, @@ -1178,6 +1205,7 @@ requires-dist = [ { name = "mcpadapt", marker = "extra == 'mcp'", specifier = ">=0.1.9" }, { name = "multion", marker = "extra == 'multion'", specifier = ">=1.1.0" }, { name = "nest-asyncio", marker = "extra == 'bedrock'", specifier = ">=1.6.0" }, + { name = "nest-asyncio", marker = "extra == 'contextual'", specifier = ">=1.6.0" }, { name = "openai", specifier = ">=1.12.0" }, { name = "oxylabs", marker = "extra == 'oxylabs'", specifier = "==2.0.0" }, { name = "patronus", marker = "extra == 'patronus'", specifier = ">=0.0.16" }, @@ -1206,9 +1234,10 @@ requires-dist = [ { name = "tavily-python", marker = "extra == 'tavily-python'", specifier = ">=0.5.4" }, { name = "tiktoken", specifier = ">=0.8.0" }, { name = "unstructured", extras = ["local-inference", "all-docs"], marker = "extra == 'xml'", specifier = ">=0.17.2" }, + { name = "vecx", marker = "extra == 'vectorx'", specifier = ">=0.33.1b5" }, { name = "weaviate-client", marker = "extra == 'weaviate-client'", specifier = ">=4.10.2" }, ] -provides-extras = ["scrapfly-sdk", "sqlalchemy", "multion", "firecrawl-py", "composio-core", "browserbase", "weaviate-client", "patronus", "serpapi", "beautifulsoup4", "selenium", "spider-client", "scrapegraph-py", "linkup-sdk", "tavily-python", "hyperbrowser", "snowflake", "singlestore", "exa-py", "qdrant-client", "apify", "databricks-sdk", "couchbase", "mcp", "stagehand", "github", "rag", "xml", "oxylabs", "mongodb", "bedrock"] +provides-extras = ["scrapfly-sdk", "sqlalchemy", "multion", "firecrawl-py", "composio-core", "browserbase", "weaviate-client", "patronus", "serpapi", "beautifulsoup4", "selenium", "spider-client", "scrapegraph-py", "linkup-sdk", "tavily-python", "hyperbrowser", "snowflake", "singlestore", "exa-py", "qdrant-client", "apify", "databricks-sdk", "couchbase", "mcp", "stagehand", "github", "rag", "xml", "oxylabs", "mongodb", "bedrock", "contextual", "vectorx"] [package.metadata.requires-dev] dev = [ @@ -1815,6 +1844,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/39/aa/db9febba7b5bd9c9d772e935a5c495fb2b4ee05299e46c6c4b1e7c0b66b2/google_cloud_vision-3.10.2-py3-none-any.whl", hash = "sha256:42a17fbc2219b0a88e325e2c1df6664a8dafcbae66363fb37ebcb511b018fc87", size = 527877, upload-time = "2025-06-12T01:09:57.275Z" }, ] +[[package]] +name = "google-genai" +version = "1.32.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "google-auth" }, + { name = "httpx" }, + { name = "pydantic" }, + { name = "requests" }, + { name = "tenacity" }, + { name = "typing-extensions" }, + { name = "websockets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/03/ab/e6cdd8fa957c647ef00c4da7c59d0e734354bd49ed8d98c860732d8e1944/google_genai-1.32.0.tar.gz", hash = "sha256:349da3f5ff0e981066bd508585fcdd308d28fc4646f318c8f6d1aa6041f4c7e3", size = 240802, upload-time = "2025-08-27T22:16:32.781Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/59/55/be09472f7a656af1208196d2ef9a3d2710f3cbcf695f51acbcbe28b9472b/google_genai-1.32.0-py3-none-any.whl", hash = "sha256:c0c4b1d45adf3aa99501050dd73da2f0dea09374002231052d81a6765d15e7f6", size = 241680, upload-time = "2025-08-27T22:16:31.409Z" }, +] + [[package]] name = "googleapis-common-protos" version = "1.70.0" @@ -3346,6 +3394,54 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, ] +[[package]] +name = "msgpack" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/45/b1/ea4f68038a18c77c9467400d166d74c4ffa536f34761f7983a104357e614/msgpack-1.1.1.tar.gz", hash = "sha256:77b79ce34a2bdab2594f490c8e80dd62a02d650b91a75159a63ec413b8d104cd", size = 173555, upload-time = "2025-06-13T06:52:51.324Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/52/f30da112c1dc92cf64f57d08a273ac771e7b29dea10b4b30369b2d7e8546/msgpack-1.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:353b6fc0c36fde68b661a12949d7d49f8f51ff5fa019c1e47c87c4ff34b080ed", size = 81799, upload-time = "2025-06-13T06:51:37.228Z" }, + { url = "https://files.pythonhosted.org/packages/e4/35/7bfc0def2f04ab4145f7f108e3563f9b4abae4ab0ed78a61f350518cc4d2/msgpack-1.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:79c408fcf76a958491b4e3b103d1c417044544b68e96d06432a189b43d1215c8", size = 78278, upload-time = "2025-06-13T06:51:38.534Z" }, + { url = "https://files.pythonhosted.org/packages/e8/c5/df5d6c1c39856bc55f800bf82778fd4c11370667f9b9e9d51b2f5da88f20/msgpack-1.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78426096939c2c7482bf31ef15ca219a9e24460289c00dd0b94411040bb73ad2", size = 402805, upload-time = "2025-06-13T06:51:39.538Z" }, + { url = "https://files.pythonhosted.org/packages/20/8e/0bb8c977efecfe6ea7116e2ed73a78a8d32a947f94d272586cf02a9757db/msgpack-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b17ba27727a36cb73aabacaa44b13090feb88a01d012c0f4be70c00f75048b4", size = 408642, upload-time = "2025-06-13T06:51:41.092Z" }, + { url = "https://files.pythonhosted.org/packages/59/a1/731d52c1aeec52006be6d1f8027c49fdc2cfc3ab7cbe7c28335b2910d7b6/msgpack-1.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a17ac1ea6ec3c7687d70201cfda3b1e8061466f28f686c24f627cae4ea8efd0", size = 395143, upload-time = "2025-06-13T06:51:42.575Z" }, + { url = "https://files.pythonhosted.org/packages/2b/92/b42911c52cda2ba67a6418ffa7d08969edf2e760b09015593c8a8a27a97d/msgpack-1.1.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:88d1e966c9235c1d4e2afac21ca83933ba59537e2e2727a999bf3f515ca2af26", size = 395986, upload-time = "2025-06-13T06:51:43.807Z" }, + { url = "https://files.pythonhosted.org/packages/61/dc/8ae165337e70118d4dab651b8b562dd5066dd1e6dd57b038f32ebc3e2f07/msgpack-1.1.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:f6d58656842e1b2ddbe07f43f56b10a60f2ba5826164910968f5933e5178af75", size = 402682, upload-time = "2025-06-13T06:51:45.534Z" }, + { url = "https://files.pythonhosted.org/packages/58/27/555851cb98dcbd6ce041df1eacb25ac30646575e9cd125681aa2f4b1b6f1/msgpack-1.1.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:96decdfc4adcbc087f5ea7ebdcfd3dee9a13358cae6e81d54be962efc38f6338", size = 406368, upload-time = "2025-06-13T06:51:46.97Z" }, + { url = "https://files.pythonhosted.org/packages/d4/64/39a26add4ce16f24e99eabb9005e44c663db00e3fce17d4ae1ae9d61df99/msgpack-1.1.1-cp310-cp310-win32.whl", hash = "sha256:6640fd979ca9a212e4bcdf6eb74051ade2c690b862b679bfcb60ae46e6dc4bfd", size = 65004, upload-time = "2025-06-13T06:51:48.582Z" }, + { url = "https://files.pythonhosted.org/packages/7d/18/73dfa3e9d5d7450d39debde5b0d848139f7de23bd637a4506e36c9800fd6/msgpack-1.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:8b65b53204fe1bd037c40c4148d00ef918eb2108d24c9aaa20bc31f9810ce0a8", size = 71548, upload-time = "2025-06-13T06:51:49.558Z" }, + { url = "https://files.pythonhosted.org/packages/7f/83/97f24bf9848af23fe2ba04380388216defc49a8af6da0c28cc636d722502/msgpack-1.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:71ef05c1726884e44f8b1d1773604ab5d4d17729d8491403a705e649116c9558", size = 82728, upload-time = "2025-06-13T06:51:50.68Z" }, + { url = "https://files.pythonhosted.org/packages/aa/7f/2eaa388267a78401f6e182662b08a588ef4f3de6f0eab1ec09736a7aaa2b/msgpack-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:36043272c6aede309d29d56851f8841ba907a1a3d04435e43e8a19928e243c1d", size = 79279, upload-time = "2025-06-13T06:51:51.72Z" }, + { url = "https://files.pythonhosted.org/packages/f8/46/31eb60f4452c96161e4dfd26dbca562b4ec68c72e4ad07d9566d7ea35e8a/msgpack-1.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a32747b1b39c3ac27d0670122b57e6e57f28eefb725e0b625618d1b59bf9d1e0", size = 423859, upload-time = "2025-06-13T06:51:52.749Z" }, + { url = "https://files.pythonhosted.org/packages/45/16/a20fa8c32825cc7ae8457fab45670c7a8996d7746ce80ce41cc51e3b2bd7/msgpack-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a8b10fdb84a43e50d38057b06901ec9da52baac6983d3f709d8507f3889d43f", size = 429975, upload-time = "2025-06-13T06:51:53.97Z" }, + { url = "https://files.pythonhosted.org/packages/86/ea/6c958e07692367feeb1a1594d35e22b62f7f476f3c568b002a5ea09d443d/msgpack-1.1.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba0c325c3f485dc54ec298d8b024e134acf07c10d494ffa24373bea729acf704", size = 413528, upload-time = "2025-06-13T06:51:55.507Z" }, + { url = "https://files.pythonhosted.org/packages/75/05/ac84063c5dae79722bda9f68b878dc31fc3059adb8633c79f1e82c2cd946/msgpack-1.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:88daaf7d146e48ec71212ce21109b66e06a98e5e44dca47d853cbfe171d6c8d2", size = 413338, upload-time = "2025-06-13T06:51:57.023Z" }, + { url = "https://files.pythonhosted.org/packages/69/e8/fe86b082c781d3e1c09ca0f4dacd457ede60a13119b6ce939efe2ea77b76/msgpack-1.1.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:d8b55ea20dc59b181d3f47103f113e6f28a5e1c89fd5b67b9140edb442ab67f2", size = 422658, upload-time = "2025-06-13T06:51:58.419Z" }, + { url = "https://files.pythonhosted.org/packages/3b/2b/bafc9924df52d8f3bb7c00d24e57be477f4d0f967c0a31ef5e2225e035c7/msgpack-1.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4a28e8072ae9779f20427af07f53bbb8b4aa81151054e882aee333b158da8752", size = 427124, upload-time = "2025-06-13T06:51:59.969Z" }, + { url = "https://files.pythonhosted.org/packages/a2/3b/1f717e17e53e0ed0b68fa59e9188f3f610c79d7151f0e52ff3cd8eb6b2dc/msgpack-1.1.1-cp311-cp311-win32.whl", hash = "sha256:7da8831f9a0fdb526621ba09a281fadc58ea12701bc709e7b8cbc362feabc295", size = 65016, upload-time = "2025-06-13T06:52:01.294Z" }, + { url = "https://files.pythonhosted.org/packages/48/45/9d1780768d3b249accecc5a38c725eb1e203d44a191f7b7ff1941f7df60c/msgpack-1.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:5fd1b58e1431008a57247d6e7cc4faa41c3607e8e7d4aaf81f7c29ea013cb458", size = 72267, upload-time = "2025-06-13T06:52:02.568Z" }, + { url = "https://files.pythonhosted.org/packages/e3/26/389b9c593eda2b8551b2e7126ad3a06af6f9b44274eb3a4f054d48ff7e47/msgpack-1.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ae497b11f4c21558d95de9f64fff7053544f4d1a17731c866143ed6bb4591238", size = 82359, upload-time = "2025-06-13T06:52:03.909Z" }, + { url = "https://files.pythonhosted.org/packages/ab/65/7d1de38c8a22cf8b1551469159d4b6cf49be2126adc2482de50976084d78/msgpack-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:33be9ab121df9b6b461ff91baac6f2731f83d9b27ed948c5b9d1978ae28bf157", size = 79172, upload-time = "2025-06-13T06:52:05.246Z" }, + { url = "https://files.pythonhosted.org/packages/0f/bd/cacf208b64d9577a62c74b677e1ada005caa9b69a05a599889d6fc2ab20a/msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f64ae8fe7ffba251fecb8408540c34ee9df1c26674c50c4544d72dbf792e5ce", size = 425013, upload-time = "2025-06-13T06:52:06.341Z" }, + { url = "https://files.pythonhosted.org/packages/4d/ec/fd869e2567cc9c01278a736cfd1697941ba0d4b81a43e0aa2e8d71dab208/msgpack-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a494554874691720ba5891c9b0b39474ba43ffb1aaf32a5dac874effb1619e1a", size = 426905, upload-time = "2025-06-13T06:52:07.501Z" }, + { url = "https://files.pythonhosted.org/packages/55/2a/35860f33229075bce803a5593d046d8b489d7ba2fc85701e714fc1aaf898/msgpack-1.1.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cb643284ab0ed26f6957d969fe0dd8bb17beb567beb8998140b5e38a90974f6c", size = 407336, upload-time = "2025-06-13T06:52:09.047Z" }, + { url = "https://files.pythonhosted.org/packages/8c/16/69ed8f3ada150bf92745fb4921bd621fd2cdf5a42e25eb50bcc57a5328f0/msgpack-1.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d275a9e3c81b1093c060c3837e580c37f47c51eca031f7b5fb76f7b8470f5f9b", size = 409485, upload-time = "2025-06-13T06:52:10.382Z" }, + { url = "https://files.pythonhosted.org/packages/c6/b6/0c398039e4c6d0b2e37c61d7e0e9d13439f91f780686deb8ee64ecf1ae71/msgpack-1.1.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4fd6b577e4541676e0cc9ddc1709d25014d3ad9a66caa19962c4f5de30fc09ef", size = 412182, upload-time = "2025-06-13T06:52:11.644Z" }, + { url = "https://files.pythonhosted.org/packages/b8/d0/0cf4a6ecb9bc960d624c93effaeaae75cbf00b3bc4a54f35c8507273cda1/msgpack-1.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb29aaa613c0a1c40d1af111abf025f1732cab333f96f285d6a93b934738a68a", size = 419883, upload-time = "2025-06-13T06:52:12.806Z" }, + { url = "https://files.pythonhosted.org/packages/62/83/9697c211720fa71a2dfb632cad6196a8af3abea56eece220fde4674dc44b/msgpack-1.1.1-cp312-cp312-win32.whl", hash = "sha256:870b9a626280c86cff9c576ec0d9cbcc54a1e5ebda9cd26dab12baf41fee218c", size = 65406, upload-time = "2025-06-13T06:52:14.271Z" }, + { url = "https://files.pythonhosted.org/packages/c0/23/0abb886e80eab08f5e8c485d6f13924028602829f63b8f5fa25a06636628/msgpack-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:5692095123007180dca3e788bb4c399cc26626da51629a31d40207cb262e67f4", size = 72558, upload-time = "2025-06-13T06:52:15.252Z" }, + { url = "https://files.pythonhosted.org/packages/a1/38/561f01cf3577430b59b340b51329803d3a5bf6a45864a55f4ef308ac11e3/msgpack-1.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3765afa6bd4832fc11c3749be4ba4b69a0e8d7b728f78e68120a157a4c5d41f0", size = 81677, upload-time = "2025-06-13T06:52:16.64Z" }, + { url = "https://files.pythonhosted.org/packages/09/48/54a89579ea36b6ae0ee001cba8c61f776451fad3c9306cd80f5b5c55be87/msgpack-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8ddb2bcfd1a8b9e431c8d6f4f7db0773084e107730ecf3472f1dfe9ad583f3d9", size = 78603, upload-time = "2025-06-13T06:52:17.843Z" }, + { url = "https://files.pythonhosted.org/packages/a0/60/daba2699b308e95ae792cdc2ef092a38eb5ee422f9d2fbd4101526d8a210/msgpack-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:196a736f0526a03653d829d7d4c5500a97eea3648aebfd4b6743875f28aa2af8", size = 420504, upload-time = "2025-06-13T06:52:18.982Z" }, + { url = "https://files.pythonhosted.org/packages/20/22/2ebae7ae43cd8f2debc35c631172ddf14e2a87ffcc04cf43ff9df9fff0d3/msgpack-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d592d06e3cc2f537ceeeb23d38799c6ad83255289bb84c2e5792e5a8dea268a", size = 423749, upload-time = "2025-06-13T06:52:20.211Z" }, + { url = "https://files.pythonhosted.org/packages/40/1b/54c08dd5452427e1179a40b4b607e37e2664bca1c790c60c442c8e972e47/msgpack-1.1.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4df2311b0ce24f06ba253fda361f938dfecd7b961576f9be3f3fbd60e87130ac", size = 404458, upload-time = "2025-06-13T06:52:21.429Z" }, + { url = "https://files.pythonhosted.org/packages/2e/60/6bb17e9ffb080616a51f09928fdd5cac1353c9becc6c4a8abd4e57269a16/msgpack-1.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e4141c5a32b5e37905b5940aacbc59739f036930367d7acce7a64e4dec1f5e0b", size = 405976, upload-time = "2025-06-13T06:52:22.995Z" }, + { url = "https://files.pythonhosted.org/packages/ee/97/88983e266572e8707c1f4b99c8fd04f9eb97b43f2db40e3172d87d8642db/msgpack-1.1.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b1ce7f41670c5a69e1389420436f41385b1aa2504c3b0c30620764b15dded2e7", size = 408607, upload-time = "2025-06-13T06:52:24.152Z" }, + { url = "https://files.pythonhosted.org/packages/bc/66/36c78af2efaffcc15a5a61ae0df53a1d025f2680122e2a9eb8442fed3ae4/msgpack-1.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4147151acabb9caed4e474c3344181e91ff7a388b888f1e19ea04f7e73dc7ad5", size = 424172, upload-time = "2025-06-13T06:52:25.704Z" }, + { url = "https://files.pythonhosted.org/packages/8c/87/a75eb622b555708fe0427fab96056d39d4c9892b0c784b3a721088c7ee37/msgpack-1.1.1-cp313-cp313-win32.whl", hash = "sha256:500e85823a27d6d9bba1d057c871b4210c1dd6fb01fbb764e37e4e8847376323", size = 65347, upload-time = "2025-06-13T06:52:26.846Z" }, + { url = "https://files.pythonhosted.org/packages/ca/91/7dc28d5e2a11a5ad804cf2b7f7a5fcb1eb5a4966d66a5d2b41aee6376543/msgpack-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:6d489fba546295983abd142812bda76b57e33d0b9f5d5b71c09a583285506f69", size = 72341, upload-time = "2025-06-13T06:52:27.835Z" }, +] + [[package]] name = "msoffcrypto-tool" version = "5.4.2" @@ -7295,6 +7391,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/13/5d/1f15b252890c968d42b348d1e9b0aa12d5bf3e776704178ec37cceccdb63/vcrpy-7.0.0-py2.py3-none-any.whl", hash = "sha256:55791e26c18daa363435054d8b35bd41a4ac441b6676167635d1b37a71dbe124", size = 42321, upload-time = "2024-12-31T00:07:55.277Z" }, ] +[[package]] +name = "vecx" +version = "0.33.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cffi" }, + { name = "msgpack" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7f/3c/4eb3319c1268f83a7ff4cb36031b9079a83b6ebaa318894eb3b24481495b/vecx-0.33.3.tar.gz", hash = "sha256:0fec1a6757d07f975e3f289b2b75b73923c5f91045c60e9879c89b5f2926ea9e", size = 4025260, upload-time = "2025-07-24T12:02:08.999Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/ca/e19477b93abeefff085bcfa3e14f8b6cceb189c195d77c51d168690586eb/vecx-0.33.3-py3-none-any.whl", hash = "sha256:a790fdfd67f37ba3ec94310822c6d1e858827421a1c5b1a4fcb9c87368cd7d0a", size = 4690515, upload-time = "2025-07-24T12:02:06.609Z" }, +] + [[package]] name = "watchfiles" version = "1.1.0" From 4707509a6c48a35408cfeaf467f3c2098d410aa3 Mon Sep 17 00:00:00 2001 From: Mithun748 Date: Wed, 3 Sep 2025 09:33:21 +0530 Subject: [PATCH 06/10] Changed tool.spec.json and uv.lock --- tool.specs.json | 223 ++++++++++++++++++++++++++++-------------------- uv.lock | 116 +------------------------ 2 files changed, 133 insertions(+), 206 deletions(-) diff --git a/tool.specs.json b/tool.specs.json index d47492ac..c16df5ee 100644 --- a/tool.specs.json +++ b/tool.specs.json @@ -2508,16 +2508,6 @@ "required": false, "title": "Api Key" }, - "client": { - "anyOf": [ - {}, - { - "type": "null" - } - ], - "default": null, - "title": "Client" - }, "content": { "anyOf": [ { @@ -3190,6 +3180,113 @@ "type": "object" } }, + { + "description": "A tool that leverages CrewAI Studio's capabilities to automatically generate complete CrewAI automations based on natural language descriptions. It translates high-level requirements into functional CrewAI implementations.", + "env_vars": [ + { + "default": null, + "description": "Personal Access Token for CrewAI Enterprise API", + "name": "CREWAI_PERSONAL_ACCESS_TOKEN", + "required": true + }, + { + "default": null, + "description": "Base URL for CrewAI Enterprise API", + "name": "CREWAI_PLUS_URL", + "required": false + } + ], + "humanized_name": "Generate CrewAI Automation", + "init_params_schema": { + "$defs": { + "EnvVar": { + "properties": { + "default": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Default" + }, + "description": { + "title": "Description", + "type": "string" + }, + "name": { + "title": "Name", + "type": "string" + }, + "required": { + "default": true, + "title": "Required", + "type": "boolean" + } + }, + "required": [ + "name", + "description" + ], + "title": "EnvVar", + "type": "object" + } + }, + "properties": { + "crewai_enterprise_url": { + "description": "The base URL of CrewAI Enterprise. If not provided, it will be loaded from the environment variable CREWAI_PLUS_URL with default https://app.crewai.com.", + "title": "Crewai Enterprise Url", + "type": "string" + }, + "personal_access_token": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The user's Personal Access Token to access CrewAI Enterprise API. If not provided, it will be loaded from the environment variable CREWAI_PERSONAL_ACCESS_TOKEN.", + "title": "Personal Access Token" + } + }, + "title": "GenerateCrewaiAutomationTool", + "type": "object" + }, + "name": "GenerateCrewaiAutomationTool", + "package_dependencies": [], + "run_params_schema": { + "properties": { + "organization_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The identifier for the CrewAI Enterprise organization. If not specified, a default organization will be used.", + "title": "Organization Id" + }, + "prompt": { + "description": "The prompt to generate the CrewAI automation, e.g. 'Generate a CrewAI automation that will scrape the website and store the data in a database.'", + "title": "Prompt", + "type": "string" + } + }, + "required": [ + "prompt" + ], + "title": "GenerateCrewaiAutomationToolSchema", + "type": "object" + } + }, { "description": "A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities.", "env_vars": [], @@ -8135,6 +8232,18 @@ "humanized_name": "Web Automation Tool", "init_params_schema": { "$defs": { + "AvailableModel": { + "enum": [ + "gpt-4o", + "gpt-4o-mini", + "claude-3-5-sonnet-latest", + "claude-3-7-sonnet-latest", + "computer-use-preview", + "gemini-2.0-flash" + ], + "title": "AvailableModel", + "type": "string" + }, "EnvVar": { "properties": { "default": { @@ -8212,6 +8321,17 @@ "default": null, "title": "Model Api Key" }, + "model_name": { + "anyOf": [ + { + "$ref": "#/$defs/AvailableModel" + }, + { + "type": "null" + } + ], + "default": "claude-3-7-sonnet-latest" + }, "project_id": { "anyOf": [ { @@ -8808,82 +8928,6 @@ "type": "object" } }, - { - "description": "Tool for semantic search using VectorX vector DB with optional sparse embedding support (SPLADE).", - "env_vars": [], - "humanized_name": "VectorXVectorSearchTool", - "init_params_schema": { - "$defs": { - "EnvVar": { - "properties": { - "default": { - "anyOf": [ - { - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Default" - }, - "description": { - "title": "Description", - "type": "string" - }, - "name": { - "title": "Name", - "type": "string" - }, - "required": { - "default": true, - "title": "Required", - "type": "boolean" - } - }, - "required": [ - "name", - "description" - ], - "title": "EnvVar", - "type": "object" - } - }, - "description": "CrewAI Tool for semantic search using VectorX vector database.\n\nSupports both dense (semantic) and sparse (keyword-like via SPLADE) search.\nDefault embedding model is Gemini via `google-genai`.\n\nAttributes:\n api_token: API token for VectorX.\n collection_name: Name of the index/collection in VectorX.\n embed_fn: Custom embedding function (optional).\n encryption_key: Encryption key for secure collections.\n space_type: Vector distance metric (e.g., \"cosine\").\n use_sparse: Whether to use sparse (SPLADE) embedding.\n sparse_embedder: SPLADE embedder instance.\n sparse_vocab_size: Vocabulary size for sparse encoder.\n top_k: Number of results to retrieve.", - "properties": {}, - "title": "VectorXVectorSearchTool", - "type": "object" - }, - "name": "VectorXVectorSearchTool", - "package_dependencies": [], - "run_params_schema": { - "description": "Argument schema for VectorX search tool.", - "properties": { - "query": { - "title": "Query", - "type": "string" - }, - "top_k": { - "anyOf": [ - { - "type": "integer" - }, - { - "type": "null" - } - ], - "default": null, - "title": "Top K" - } - }, - "required": [ - "query" - ], - "title": "VectorXSearchArgs", - "type": "object" - } - }, { "description": "This tool uses OpenAI's Vision API to describe the contents of an image.", "env_vars": [ @@ -9002,10 +9046,9 @@ "type": "object" }, "Vectorizers": { - "description": "The available vectorization modules in Weaviate.\n\nThese modules encode binary data into lists of floats called vectors.\nSee the [docs](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules) for more details.\n\nAttributes:\n NONE: No vectorizer.\n TEXT2VEC_AWS: Weaviate module backed by AWS text-based embedding models.\n TEXT2VEC_COHERE: Weaviate module backed by Cohere text-based embedding models.\n TEXT2VEC_CONTEXTIONARY: Weaviate module backed by Contextionary text-based embedding models.\n TEXT2VEC_GPT4ALL: Weaviate module backed by GPT-4-All text-based embedding models.\n TEXT2VEC_HUGGINGFACE: Weaviate module backed by HuggingFace text-based embedding models.\n TEXT2VEC_OPENAI: Weaviate module backed by OpenAI and Azure-OpenAI text-based embedding models.\n TEXT2VEC_PALM: Weaviate module backed by PaLM text-based embedding models.\n TEXT2VEC_TRANSFORMERS: Weaviate module backed by Transformers text-based embedding models.\n TEXT2VEC_JINAAI: Weaviate module backed by Jina AI text-based embedding models.\n TEXT2VEC_VOYAGEAI: Weaviate module backed by Voyage AI text-based embedding models.\n TEXT2VEC_NVIDIA: Weaviate module backed by NVIDIA text-based embedding models.\n TEXT2VEC_WEAVIATE: Weaviate module backed by Weaviate's self-hosted text-based embedding models.\n IMG2VEC_NEURAL: Weaviate module backed by a ResNet-50 neural network for images.\n MULTI2VEC_CLIP: Weaviate module backed by a Sentence-BERT CLIP model for images and text.\n MULTI2VEC_PALM: Weaviate module backed by a palm model for images and text.\n MULTI2VEC_BIND: Weaviate module backed by the ImageBind model for images, text, audio, depth, IMU, thermal, and video.\n MULTI2VEC_VOYAGEAI: Weaviate module backed by a Voyage AI multimodal embedding models.\n MULTI2VEC_NVIDIA: Weaviate module backed by NVIDIA multimodal embedding models.\n REF2VEC_CENTROID: Weaviate module backed by a centroid-based model that calculates an object's vectors from its referenced vectors.", + "description": "The available vectorization modules in Weaviate.\n\nThese modules encode binary data into lists of floats called vectors.\nSee the [docs](https://weaviate.io/developers/weaviate/modules/retriever-vectorizer-modules) for more details.\n\nAttributes:\n `NONE`\n No vectorizer.\n `TEXT2VEC_AWS`\n Weaviate module backed by AWS text-based embedding models.\n `TEXT2VEC_COHERE`\n Weaviate module backed by Cohere text-based embedding models.\n `TEXT2VEC_CONTEXTIONARY`\n Weaviate module backed by Contextionary text-based embedding models.\n `TEXT2VEC_GPT4ALL`\n Weaviate module backed by GPT-4-All text-based embedding models.\n `TEXT2VEC_HUGGINGFACE`\n Weaviate module backed by HuggingFace text-based embedding models.\n `TEXT2VEC_OPENAI`\n Weaviate module backed by OpenAI and Azure-OpenAI text-based embedding models.\n `TEXT2VEC_PALM`\n Weaviate module backed by PaLM text-based embedding models.\n `TEXT2VEC_TRANSFORMERS`\n Weaviate module backed by Transformers text-based embedding models.\n `TEXT2VEC_JINAAI`\n Weaviate module backed by Jina AI text-based embedding models.\n `TEXT2VEC_VOYAGEAI`\n Weaviate module backed by Voyage AI text-based embedding models.\n `TEXT2VEC_WEAVIATE`\n Weaviate module backed by Weaviate's self-hosted text-based embedding models.\n `IMG2VEC_NEURAL`\n Weaviate module backed by a ResNet-50 neural network for images.\n `MULTI2VEC_CLIP`\n Weaviate module backed by a Sentence-BERT CLIP model for images and text.\n `MULTI2VEC_PALM`\n Weaviate module backed by a palm model for images and text.\n `MULTI2VEC_BIND`\n Weaviate module backed by the ImageBind model for images, text, audio, depth, IMU, thermal, and video.\n `MULTI2VEC_VOYAGEAI`\n Weaviate module backed by a Voyage AI multimodal embedding models.\n `REF2VEC_CENTROID`\n Weaviate module backed by a centroid-based model that calculates an object's vectors from its referenced vectors.", "enum": [ "none", - "text2colbert-jinaai", "text2vec-aws", "text2vec-cohere", "text2vec-contextionary", @@ -9013,8 +9056,6 @@ "text2vec-gpt4all", "text2vec-huggingface", "text2vec-mistral", - "text2vec-model2vec", - "text2vec-nvidia", "text2vec-ollama", "text2vec-openai", "text2vec-palm", @@ -9026,11 +9067,9 @@ "multi2vec-clip", "multi2vec-cohere", "multi2vec-jinaai", - "multi2multivec-jinaai", "multi2vec-bind", "multi2vec-palm", "multi2vec-voyageai", - "multi2vec-nvidia", "ref2vec-centroid" ], "title": "Vectorizers", diff --git a/uv.lock b/uv.lock index c54026db..b836cf07 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.10, <3.14" resolution-markers = [ "python_full_version >= '3.13' and platform_python_implementation == 'PyPy' and sys_platform == 'darwin'", @@ -787,23 +787,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dc/56/b4e2ccdda8bc7732c5616bdb3bb4cea6019fdbdbbb2ee435ca784055cb8e/composio_core-0.7.20-py3-none-any.whl", hash = "sha256:e1cfb9cfc68a4622bc15827143ddf726f429d281e8f9de5d4c0965e75d039f14", size = 501152, upload-time = "2025-07-03T08:48:52.058Z" }, ] -[[package]] -name = "contextual-client" -version = "0.8.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "anyio" }, - { name = "distro" }, - { name = "httpx" }, - { name = "pydantic" }, - { name = "sniffio" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/02/4d/1219b84a73551c1f70be465c8e4b496ebf788152f7b124a84cc3895d2390/contextual_client-0.8.0.tar.gz", hash = "sha256:e97c3e7c5d9b5a97f23fb7b4adfe34d8d9a42817415335b1b48f6d6774bc2747", size = 148896, upload-time = "2025-08-26T23:40:34.967Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/80/f1/336d9fe785004b38f3850367833be8c7d91a4a8f2ceefae5e1cfa5d08a05/contextual_client-0.8.0-py3-none-any.whl", hash = "sha256:41b6fba00e7bddd1ca06bbd3ddc7269c400e049f7c82b2bcc5302746c704dda3", size = 154607, upload-time = "2025-08-26T23:40:33.545Z" }, -] - [[package]] name = "contourpy" version = "1.3.2" @@ -1044,7 +1027,6 @@ dependencies = [ { name = "crewai" }, { name = "docker" }, { name = "embedchain" }, - { name = "google-genai" }, { name = "lancedb" }, { name = "openai" }, { name = "portalocker" }, @@ -1075,10 +1057,6 @@ browserbase = [ composio-core = [ { name = "composio-core" }, ] -contextual = [ - { name = "contextual-client" }, - { name = "nest-asyncio" }, -] couchbase = [ { name = "couchbase" }, ] @@ -1159,9 +1137,6 @@ stagehand = [ tavily-python = [ { name = "tavily-python" }, ] -vectorx = [ - { name = "vecx" }, -] weaviate-client = [ { name = "weaviate-client" }, ] @@ -1185,7 +1160,6 @@ requires-dist = [ { name = "chromadb", specifier = "==0.5.23" }, { name = "click", specifier = ">=8.1.8" }, { name = "composio-core", marker = "extra == 'composio-core'", specifier = ">=0.6.11.post1" }, - { name = "contextual-client", marker = "extra == 'contextual'", specifier = ">=0.1.0" }, { name = "couchbase", marker = "extra == 'couchbase'", specifier = ">=4.3.5" }, { name = "crewai", specifier = ">=0.165.1" }, { name = "cryptography", marker = "extra == 'snowflake'", specifier = ">=43.0.3" }, @@ -1195,7 +1169,6 @@ requires-dist = [ { name = "exa-py", marker = "extra == 'exa-py'", specifier = ">=1.8.7" }, { name = "firecrawl-py", marker = "extra == 'firecrawl-py'", specifier = ">=1.8.0" }, { name = "gitpython", marker = "extra == 'github'", specifier = "==3.1.38" }, - { name = "google-genai", specifier = ">=1.32.0" }, { name = "hyperbrowser", marker = "extra == 'hyperbrowser'", specifier = ">=0.18.0" }, { name = "lancedb", specifier = ">=0.5.4" }, { name = "langchain-apify", marker = "extra == 'apify'", specifier = ">=0.1.2,<1.0.0" }, @@ -1205,7 +1178,6 @@ requires-dist = [ { name = "mcpadapt", marker = "extra == 'mcp'", specifier = ">=0.1.9" }, { name = "multion", marker = "extra == 'multion'", specifier = ">=1.1.0" }, { name = "nest-asyncio", marker = "extra == 'bedrock'", specifier = ">=1.6.0" }, - { name = "nest-asyncio", marker = "extra == 'contextual'", specifier = ">=1.6.0" }, { name = "openai", specifier = ">=1.12.0" }, { name = "oxylabs", marker = "extra == 'oxylabs'", specifier = "==2.0.0" }, { name = "patronus", marker = "extra == 'patronus'", specifier = ">=0.0.16" }, @@ -1234,10 +1206,9 @@ requires-dist = [ { name = "tavily-python", marker = "extra == 'tavily-python'", specifier = ">=0.5.4" }, { name = "tiktoken", specifier = ">=0.8.0" }, { name = "unstructured", extras = ["local-inference", "all-docs"], marker = "extra == 'xml'", specifier = ">=0.17.2" }, - { name = "vecx", marker = "extra == 'vectorx'", specifier = ">=0.33.1b5" }, { name = "weaviate-client", marker = "extra == 'weaviate-client'", specifier = ">=4.10.2" }, ] -provides-extras = ["scrapfly-sdk", "sqlalchemy", "multion", "firecrawl-py", "composio-core", "browserbase", "weaviate-client", "patronus", "serpapi", "beautifulsoup4", "selenium", "spider-client", "scrapegraph-py", "linkup-sdk", "tavily-python", "hyperbrowser", "snowflake", "singlestore", "exa-py", "qdrant-client", "apify", "databricks-sdk", "couchbase", "mcp", "stagehand", "github", "rag", "xml", "oxylabs", "mongodb", "bedrock", "contextual", "vectorx"] +provides-extras = ["scrapfly-sdk", "sqlalchemy", "multion", "firecrawl-py", "composio-core", "browserbase", "weaviate-client", "patronus", "serpapi", "beautifulsoup4", "selenium", "spider-client", "scrapegraph-py", "linkup-sdk", "tavily-python", "hyperbrowser", "snowflake", "singlestore", "exa-py", "qdrant-client", "apify", "databricks-sdk", "couchbase", "mcp", "stagehand", "github", "rag", "xml", "oxylabs", "mongodb", "bedrock"] [package.metadata.requires-dev] dev = [ @@ -1844,25 +1815,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/39/aa/db9febba7b5bd9c9d772e935a5c495fb2b4ee05299e46c6c4b1e7c0b66b2/google_cloud_vision-3.10.2-py3-none-any.whl", hash = "sha256:42a17fbc2219b0a88e325e2c1df6664a8dafcbae66363fb37ebcb511b018fc87", size = 527877, upload-time = "2025-06-12T01:09:57.275Z" }, ] -[[package]] -name = "google-genai" -version = "1.32.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "anyio" }, - { name = "google-auth" }, - { name = "httpx" }, - { name = "pydantic" }, - { name = "requests" }, - { name = "tenacity" }, - { name = "typing-extensions" }, - { name = "websockets" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/03/ab/e6cdd8fa957c647ef00c4da7c59d0e734354bd49ed8d98c860732d8e1944/google_genai-1.32.0.tar.gz", hash = "sha256:349da3f5ff0e981066bd508585fcdd308d28fc4646f318c8f6d1aa6041f4c7e3", size = 240802, upload-time = "2025-08-27T22:16:32.781Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/59/55/be09472f7a656af1208196d2ef9a3d2710f3cbcf695f51acbcbe28b9472b/google_genai-1.32.0-py3-none-any.whl", hash = "sha256:c0c4b1d45adf3aa99501050dd73da2f0dea09374002231052d81a6765d15e7f6", size = 241680, upload-time = "2025-08-27T22:16:31.409Z" }, -] - [[package]] name = "googleapis-common-protos" version = "1.70.0" @@ -3394,54 +3346,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198, upload-time = "2023-03-07T16:47:09.197Z" }, ] -[[package]] -name = "msgpack" -version = "1.1.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/45/b1/ea4f68038a18c77c9467400d166d74c4ffa536f34761f7983a104357e614/msgpack-1.1.1.tar.gz", hash = "sha256:77b79ce34a2bdab2594f490c8e80dd62a02d650b91a75159a63ec413b8d104cd", size = 173555, upload-time = "2025-06-13T06:52:51.324Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/33/52/f30da112c1dc92cf64f57d08a273ac771e7b29dea10b4b30369b2d7e8546/msgpack-1.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:353b6fc0c36fde68b661a12949d7d49f8f51ff5fa019c1e47c87c4ff34b080ed", size = 81799, upload-time = "2025-06-13T06:51:37.228Z" }, - { url = "https://files.pythonhosted.org/packages/e4/35/7bfc0def2f04ab4145f7f108e3563f9b4abae4ab0ed78a61f350518cc4d2/msgpack-1.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:79c408fcf76a958491b4e3b103d1c417044544b68e96d06432a189b43d1215c8", size = 78278, upload-time = "2025-06-13T06:51:38.534Z" }, - { url = "https://files.pythonhosted.org/packages/e8/c5/df5d6c1c39856bc55f800bf82778fd4c11370667f9b9e9d51b2f5da88f20/msgpack-1.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78426096939c2c7482bf31ef15ca219a9e24460289c00dd0b94411040bb73ad2", size = 402805, upload-time = "2025-06-13T06:51:39.538Z" }, - { url = "https://files.pythonhosted.org/packages/20/8e/0bb8c977efecfe6ea7116e2ed73a78a8d32a947f94d272586cf02a9757db/msgpack-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b17ba27727a36cb73aabacaa44b13090feb88a01d012c0f4be70c00f75048b4", size = 408642, upload-time = "2025-06-13T06:51:41.092Z" }, - { url = "https://files.pythonhosted.org/packages/59/a1/731d52c1aeec52006be6d1f8027c49fdc2cfc3ab7cbe7c28335b2910d7b6/msgpack-1.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a17ac1ea6ec3c7687d70201cfda3b1e8061466f28f686c24f627cae4ea8efd0", size = 395143, upload-time = "2025-06-13T06:51:42.575Z" }, - { url = "https://files.pythonhosted.org/packages/2b/92/b42911c52cda2ba67a6418ffa7d08969edf2e760b09015593c8a8a27a97d/msgpack-1.1.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:88d1e966c9235c1d4e2afac21ca83933ba59537e2e2727a999bf3f515ca2af26", size = 395986, upload-time = "2025-06-13T06:51:43.807Z" }, - { url = "https://files.pythonhosted.org/packages/61/dc/8ae165337e70118d4dab651b8b562dd5066dd1e6dd57b038f32ebc3e2f07/msgpack-1.1.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:f6d58656842e1b2ddbe07f43f56b10a60f2ba5826164910968f5933e5178af75", size = 402682, upload-time = "2025-06-13T06:51:45.534Z" }, - { url = "https://files.pythonhosted.org/packages/58/27/555851cb98dcbd6ce041df1eacb25ac30646575e9cd125681aa2f4b1b6f1/msgpack-1.1.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:96decdfc4adcbc087f5ea7ebdcfd3dee9a13358cae6e81d54be962efc38f6338", size = 406368, upload-time = "2025-06-13T06:51:46.97Z" }, - { url = "https://files.pythonhosted.org/packages/d4/64/39a26add4ce16f24e99eabb9005e44c663db00e3fce17d4ae1ae9d61df99/msgpack-1.1.1-cp310-cp310-win32.whl", hash = "sha256:6640fd979ca9a212e4bcdf6eb74051ade2c690b862b679bfcb60ae46e6dc4bfd", size = 65004, upload-time = "2025-06-13T06:51:48.582Z" }, - { url = "https://files.pythonhosted.org/packages/7d/18/73dfa3e9d5d7450d39debde5b0d848139f7de23bd637a4506e36c9800fd6/msgpack-1.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:8b65b53204fe1bd037c40c4148d00ef918eb2108d24c9aaa20bc31f9810ce0a8", size = 71548, upload-time = "2025-06-13T06:51:49.558Z" }, - { url = "https://files.pythonhosted.org/packages/7f/83/97f24bf9848af23fe2ba04380388216defc49a8af6da0c28cc636d722502/msgpack-1.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:71ef05c1726884e44f8b1d1773604ab5d4d17729d8491403a705e649116c9558", size = 82728, upload-time = "2025-06-13T06:51:50.68Z" }, - { url = "https://files.pythonhosted.org/packages/aa/7f/2eaa388267a78401f6e182662b08a588ef4f3de6f0eab1ec09736a7aaa2b/msgpack-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:36043272c6aede309d29d56851f8841ba907a1a3d04435e43e8a19928e243c1d", size = 79279, upload-time = "2025-06-13T06:51:51.72Z" }, - { url = "https://files.pythonhosted.org/packages/f8/46/31eb60f4452c96161e4dfd26dbca562b4ec68c72e4ad07d9566d7ea35e8a/msgpack-1.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a32747b1b39c3ac27d0670122b57e6e57f28eefb725e0b625618d1b59bf9d1e0", size = 423859, upload-time = "2025-06-13T06:51:52.749Z" }, - { url = "https://files.pythonhosted.org/packages/45/16/a20fa8c32825cc7ae8457fab45670c7a8996d7746ce80ce41cc51e3b2bd7/msgpack-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a8b10fdb84a43e50d38057b06901ec9da52baac6983d3f709d8507f3889d43f", size = 429975, upload-time = "2025-06-13T06:51:53.97Z" }, - { url = "https://files.pythonhosted.org/packages/86/ea/6c958e07692367feeb1a1594d35e22b62f7f476f3c568b002a5ea09d443d/msgpack-1.1.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba0c325c3f485dc54ec298d8b024e134acf07c10d494ffa24373bea729acf704", size = 413528, upload-time = "2025-06-13T06:51:55.507Z" }, - { url = "https://files.pythonhosted.org/packages/75/05/ac84063c5dae79722bda9f68b878dc31fc3059adb8633c79f1e82c2cd946/msgpack-1.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:88daaf7d146e48ec71212ce21109b66e06a98e5e44dca47d853cbfe171d6c8d2", size = 413338, upload-time = "2025-06-13T06:51:57.023Z" }, - { url = "https://files.pythonhosted.org/packages/69/e8/fe86b082c781d3e1c09ca0f4dacd457ede60a13119b6ce939efe2ea77b76/msgpack-1.1.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:d8b55ea20dc59b181d3f47103f113e6f28a5e1c89fd5b67b9140edb442ab67f2", size = 422658, upload-time = "2025-06-13T06:51:58.419Z" }, - { url = "https://files.pythonhosted.org/packages/3b/2b/bafc9924df52d8f3bb7c00d24e57be477f4d0f967c0a31ef5e2225e035c7/msgpack-1.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4a28e8072ae9779f20427af07f53bbb8b4aa81151054e882aee333b158da8752", size = 427124, upload-time = "2025-06-13T06:51:59.969Z" }, - { url = "https://files.pythonhosted.org/packages/a2/3b/1f717e17e53e0ed0b68fa59e9188f3f610c79d7151f0e52ff3cd8eb6b2dc/msgpack-1.1.1-cp311-cp311-win32.whl", hash = "sha256:7da8831f9a0fdb526621ba09a281fadc58ea12701bc709e7b8cbc362feabc295", size = 65016, upload-time = "2025-06-13T06:52:01.294Z" }, - { url = "https://files.pythonhosted.org/packages/48/45/9d1780768d3b249accecc5a38c725eb1e203d44a191f7b7ff1941f7df60c/msgpack-1.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:5fd1b58e1431008a57247d6e7cc4faa41c3607e8e7d4aaf81f7c29ea013cb458", size = 72267, upload-time = "2025-06-13T06:52:02.568Z" }, - { url = "https://files.pythonhosted.org/packages/e3/26/389b9c593eda2b8551b2e7126ad3a06af6f9b44274eb3a4f054d48ff7e47/msgpack-1.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ae497b11f4c21558d95de9f64fff7053544f4d1a17731c866143ed6bb4591238", size = 82359, upload-time = "2025-06-13T06:52:03.909Z" }, - { url = "https://files.pythonhosted.org/packages/ab/65/7d1de38c8a22cf8b1551469159d4b6cf49be2126adc2482de50976084d78/msgpack-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:33be9ab121df9b6b461ff91baac6f2731f83d9b27ed948c5b9d1978ae28bf157", size = 79172, upload-time = "2025-06-13T06:52:05.246Z" }, - { url = "https://files.pythonhosted.org/packages/0f/bd/cacf208b64d9577a62c74b677e1ada005caa9b69a05a599889d6fc2ab20a/msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f64ae8fe7ffba251fecb8408540c34ee9df1c26674c50c4544d72dbf792e5ce", size = 425013, upload-time = "2025-06-13T06:52:06.341Z" }, - { url = "https://files.pythonhosted.org/packages/4d/ec/fd869e2567cc9c01278a736cfd1697941ba0d4b81a43e0aa2e8d71dab208/msgpack-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a494554874691720ba5891c9b0b39474ba43ffb1aaf32a5dac874effb1619e1a", size = 426905, upload-time = "2025-06-13T06:52:07.501Z" }, - { url = "https://files.pythonhosted.org/packages/55/2a/35860f33229075bce803a5593d046d8b489d7ba2fc85701e714fc1aaf898/msgpack-1.1.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cb643284ab0ed26f6957d969fe0dd8bb17beb567beb8998140b5e38a90974f6c", size = 407336, upload-time = "2025-06-13T06:52:09.047Z" }, - { url = "https://files.pythonhosted.org/packages/8c/16/69ed8f3ada150bf92745fb4921bd621fd2cdf5a42e25eb50bcc57a5328f0/msgpack-1.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d275a9e3c81b1093c060c3837e580c37f47c51eca031f7b5fb76f7b8470f5f9b", size = 409485, upload-time = "2025-06-13T06:52:10.382Z" }, - { url = "https://files.pythonhosted.org/packages/c6/b6/0c398039e4c6d0b2e37c61d7e0e9d13439f91f780686deb8ee64ecf1ae71/msgpack-1.1.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4fd6b577e4541676e0cc9ddc1709d25014d3ad9a66caa19962c4f5de30fc09ef", size = 412182, upload-time = "2025-06-13T06:52:11.644Z" }, - { url = "https://files.pythonhosted.org/packages/b8/d0/0cf4a6ecb9bc960d624c93effaeaae75cbf00b3bc4a54f35c8507273cda1/msgpack-1.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb29aaa613c0a1c40d1af111abf025f1732cab333f96f285d6a93b934738a68a", size = 419883, upload-time = "2025-06-13T06:52:12.806Z" }, - { url = "https://files.pythonhosted.org/packages/62/83/9697c211720fa71a2dfb632cad6196a8af3abea56eece220fde4674dc44b/msgpack-1.1.1-cp312-cp312-win32.whl", hash = "sha256:870b9a626280c86cff9c576ec0d9cbcc54a1e5ebda9cd26dab12baf41fee218c", size = 65406, upload-time = "2025-06-13T06:52:14.271Z" }, - { url = "https://files.pythonhosted.org/packages/c0/23/0abb886e80eab08f5e8c485d6f13924028602829f63b8f5fa25a06636628/msgpack-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:5692095123007180dca3e788bb4c399cc26626da51629a31d40207cb262e67f4", size = 72558, upload-time = "2025-06-13T06:52:15.252Z" }, - { url = "https://files.pythonhosted.org/packages/a1/38/561f01cf3577430b59b340b51329803d3a5bf6a45864a55f4ef308ac11e3/msgpack-1.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3765afa6bd4832fc11c3749be4ba4b69a0e8d7b728f78e68120a157a4c5d41f0", size = 81677, upload-time = "2025-06-13T06:52:16.64Z" }, - { url = "https://files.pythonhosted.org/packages/09/48/54a89579ea36b6ae0ee001cba8c61f776451fad3c9306cd80f5b5c55be87/msgpack-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8ddb2bcfd1a8b9e431c8d6f4f7db0773084e107730ecf3472f1dfe9ad583f3d9", size = 78603, upload-time = "2025-06-13T06:52:17.843Z" }, - { url = "https://files.pythonhosted.org/packages/a0/60/daba2699b308e95ae792cdc2ef092a38eb5ee422f9d2fbd4101526d8a210/msgpack-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:196a736f0526a03653d829d7d4c5500a97eea3648aebfd4b6743875f28aa2af8", size = 420504, upload-time = "2025-06-13T06:52:18.982Z" }, - { url = "https://files.pythonhosted.org/packages/20/22/2ebae7ae43cd8f2debc35c631172ddf14e2a87ffcc04cf43ff9df9fff0d3/msgpack-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d592d06e3cc2f537ceeeb23d38799c6ad83255289bb84c2e5792e5a8dea268a", size = 423749, upload-time = "2025-06-13T06:52:20.211Z" }, - { url = "https://files.pythonhosted.org/packages/40/1b/54c08dd5452427e1179a40b4b607e37e2664bca1c790c60c442c8e972e47/msgpack-1.1.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4df2311b0ce24f06ba253fda361f938dfecd7b961576f9be3f3fbd60e87130ac", size = 404458, upload-time = "2025-06-13T06:52:21.429Z" }, - { url = "https://files.pythonhosted.org/packages/2e/60/6bb17e9ffb080616a51f09928fdd5cac1353c9becc6c4a8abd4e57269a16/msgpack-1.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e4141c5a32b5e37905b5940aacbc59739f036930367d7acce7a64e4dec1f5e0b", size = 405976, upload-time = "2025-06-13T06:52:22.995Z" }, - { url = "https://files.pythonhosted.org/packages/ee/97/88983e266572e8707c1f4b99c8fd04f9eb97b43f2db40e3172d87d8642db/msgpack-1.1.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b1ce7f41670c5a69e1389420436f41385b1aa2504c3b0c30620764b15dded2e7", size = 408607, upload-time = "2025-06-13T06:52:24.152Z" }, - { url = "https://files.pythonhosted.org/packages/bc/66/36c78af2efaffcc15a5a61ae0df53a1d025f2680122e2a9eb8442fed3ae4/msgpack-1.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4147151acabb9caed4e474c3344181e91ff7a388b888f1e19ea04f7e73dc7ad5", size = 424172, upload-time = "2025-06-13T06:52:25.704Z" }, - { url = "https://files.pythonhosted.org/packages/8c/87/a75eb622b555708fe0427fab96056d39d4c9892b0c784b3a721088c7ee37/msgpack-1.1.1-cp313-cp313-win32.whl", hash = "sha256:500e85823a27d6d9bba1d057c871b4210c1dd6fb01fbb764e37e4e8847376323", size = 65347, upload-time = "2025-06-13T06:52:26.846Z" }, - { url = "https://files.pythonhosted.org/packages/ca/91/7dc28d5e2a11a5ad804cf2b7f7a5fcb1eb5a4966d66a5d2b41aee6376543/msgpack-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:6d489fba546295983abd142812bda76b57e33d0b9f5d5b71c09a583285506f69", size = 72341, upload-time = "2025-06-13T06:52:27.835Z" }, -] - [[package]] name = "msoffcrypto-tool" version = "5.4.2" @@ -7391,22 +7295,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/13/5d/1f15b252890c968d42b348d1e9b0aa12d5bf3e776704178ec37cceccdb63/vcrpy-7.0.0-py2.py3-none-any.whl", hash = "sha256:55791e26c18daa363435054d8b35bd41a4ac441b6676167635d1b37a71dbe124", size = 42321, upload-time = "2024-12-31T00:07:55.277Z" }, ] -[[package]] -name = "vecx" -version = "0.33.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cffi" }, - { name = "msgpack" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/7f/3c/4eb3319c1268f83a7ff4cb36031b9079a83b6ebaa318894eb3b24481495b/vecx-0.33.3.tar.gz", hash = "sha256:0fec1a6757d07f975e3f289b2b75b73923c5f91045c60e9879c89b5f2926ea9e", size = 4025260, upload-time = "2025-07-24T12:02:08.999Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/ca/e19477b93abeefff085bcfa3e14f8b6cceb189c195d77c51d168690586eb/vecx-0.33.3-py3-none-any.whl", hash = "sha256:a790fdfd67f37ba3ec94310822c6d1e858827421a1c5b1a4fcb9c87368cd7d0a", size = 4690515, upload-time = "2025-07-24T12:02:06.609Z" }, -] - [[package]] name = "watchfiles" version = "1.1.0" From 2fda6c61600589ed26137f9765cd1a3739e462e2 Mon Sep 17 00:00:00 2001 From: Mithun748 Date: Mon, 29 Sep 2025 11:27:16 +0530 Subject: [PATCH 07/10] Resolved Conflict - 2 --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3945a672..1cedb5bf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,6 @@ dependencies = [ "lancedb>=0.5.4", "tiktoken>=0.8.0", "stagehand>=0.4.1", - "portalocker==2.7.0", "beautifulsoup4>=4.13.4", "pypdf>=5.9.0", "python-docx>=1.2.0", From 040fa71e8d15e87c9918c7d085c9ff56041b9eb1 Mon Sep 17 00:00:00 2001 From: Mithun748 Date: Mon, 29 Sep 2025 14:56:12 +0530 Subject: [PATCH 08/10] Fixed Missing metadata problem --- .../vectorx_vector_search_tool/vectorx_search_tool.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crewai_tools/tools/vectorx_vector_search_tool/vectorx_search_tool.py b/crewai_tools/tools/vectorx_vector_search_tool/vectorx_search_tool.py index d5e3fce3..603941ac 100644 --- a/crewai_tools/tools/vectorx_vector_search_tool/vectorx_search_tool.py +++ b/crewai_tools/tools/vectorx_vector_search_tool/vectorx_search_tool.py @@ -229,9 +229,9 @@ def _run(self, query: str, top_k: Optional[int] = None, **kwargs) -> Any: ) for r in search_results: results.append({ - "text": r["meta"].get("value", ""), + "text": r.get("meta", {}).get("value", ""), "score": r.get("rrf_score", 0), - "metadata": r["meta"], + "metadata": r.get("meta", {}), }) else: search_results = self.index.query( @@ -241,9 +241,9 @@ def _run(self, query: str, top_k: Optional[int] = None, **kwargs) -> Any: ) for r in search_results: results.append({ - "text": r["meta"].get("value", ""), + "text": r.get("meta", {}).get("value", ""), "score": r.get("similarity", 0), - "metadata": r["meta"], + "metadata": r.get("meta", {}), }) except Exception as e: _logger.error(f"VectorX Search Error: {e}") From 1b9a6740eb37f54a65ea40aa3996416757342635 Mon Sep 17 00:00:00 2001 From: Mithun748 Date: Mon, 29 Sep 2025 15:19:38 +0530 Subject: [PATCH 09/10] Fixed metadata mutation and dimension issue --- .../vectorx_search_tool.py | 32 ++++++++++++++++--- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/crewai_tools/tools/vectorx_vector_search_tool/vectorx_search_tool.py b/crewai_tools/tools/vectorx_vector_search_tool/vectorx_search_tool.py index 603941ac..4853a89c 100644 --- a/crewai_tools/tools/vectorx_vector_search_tool/vectorx_search_tool.py +++ b/crewai_tools/tools/vectorx_vector_search_tool/vectorx_search_tool.py @@ -6,6 +6,9 @@ from pydantic import BaseModel from crewai.tools import BaseTool +from copy import deepcopy +from itertools import zip_longest + # Try importing dependencies with optional installation notes try: from vecx.vectorx import VectorX @@ -119,6 +122,7 @@ def __init__( sparse_vocab_size: Optional[int] = None, top_k: int = 3, gemini_model: Optional[str] = None, + embedding_dim: Optional[int] = None, ): """Initializes the VectorX search tool, sets up index and embedding model.""" super().__init__() @@ -128,6 +132,7 @@ def __init__( object.__setattr__(self, "space_type", space_type) object.__setattr__(self, "use_sparse", use_sparse) object.__setattr__(self, "top_k", top_k) + object.__setattr__(self, "embedding_dim", embedding_dim) gemini_model = gemini_model or os.environ.get("GEMINI_MODEL", "models/embedding-001") _logger.info(f"Using Gemini embedding model: {gemini_model}") @@ -170,7 +175,21 @@ def gemini_embed(text: str) -> List[float]: object.__setattr__(self, "client", client) # Determine embedding dimension - dim = len(self.embed_fn("test")) + # dim = len(self.embed_fn("test")) + + # --- CHANGED: Deferring embedding dimension API call + def _get_embedding_dim(): + if self.embedding_dim is not None: + return self.embedding_dim + try: + test_vec = self.embed_fn("test") + self.embedding_dim = len(test_vec) + except Exception: + _logger.warning("Failed to determine embedding dimension. Defaulting to 768") + self.embedding_dim = 768 + return self.embedding_dim + object.__setattr__(self, "_get_embedding_dim", _get_embedding_dim) + try: if use_sparse: index = client.get_hybrid_index(name=collection_name, key=encryption_key) @@ -181,7 +200,8 @@ def gemini_embed(text: str) -> List[float]: if use_sparse: client.create_hybrid_index( name=collection_name, - dimension=dim, + # dimension=dim, + dimension=self._get_embedding_dim(), space_type=space_type, vocab_size=sparse_vocab_size, key=encryption_key, @@ -190,7 +210,8 @@ def gemini_embed(text: str) -> List[float]: else: client.create_index( name=collection_name, - dimension=dim, + # dimension=dim, + dimension=self._get_embedding_dim(), space_type=space_type, key=encryption_key, ) @@ -261,8 +282,9 @@ def store_documents(self, texts: List[str], metadatas: Optional[List[Dict]] = No metadatas = metadatas or [{} for _ in texts] events = [] - for text, meta in zip(texts, metadatas): - meta["value"] = text + for text, meta in zip_longest(texts, metadatas, fillvalue={}): + meta_copy = deepcopy(meta) + meta_copy["value"] = text embedding = self.embed_fn(text) event = { From 6edf7264c4292826527ac9d2bc22b10cc9bbf156 Mon Sep 17 00:00:00 2001 From: Mithun748 Date: Mon, 29 Sep 2025 15:31:29 +0530 Subject: [PATCH 10/10] Fixed attribute assignment conflict --- .../tools/vectorx_vector_search_tool/vectorx_search_tool.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crewai_tools/tools/vectorx_vector_search_tool/vectorx_search_tool.py b/crewai_tools/tools/vectorx_vector_search_tool/vectorx_search_tool.py index 4853a89c..b1ebfa90 100644 --- a/crewai_tools/tools/vectorx_vector_search_tool/vectorx_search_tool.py +++ b/crewai_tools/tools/vectorx_vector_search_tool/vectorx_search_tool.py @@ -183,10 +183,10 @@ def _get_embedding_dim(): return self.embedding_dim try: test_vec = self.embed_fn("test") - self.embedding_dim = len(test_vec) + object.__setattr__(self, "embedding_dim", len(test_vec)) except Exception: _logger.warning("Failed to determine embedding dimension. Defaulting to 768") - self.embedding_dim = 768 + object.__setattr__(self, "embedding_dim", 768) return self.embedding_dim object.__setattr__(self, "_get_embedding_dim", _get_embedding_dim) @@ -289,7 +289,7 @@ def store_documents(self, texts: List[str], metadatas: Optional[List[Dict]] = No event = { "id": str(uuid.uuid4()), - "meta": meta, + "meta": meta_copy, } if self.use_sparse: