diff --git a/docs/README-nodes.md b/docs/README-nodes.md index 60fb6d5ae..fdb99e959 100644 --- a/docs/README-nodes.md +++ b/docs/README-nodes.md @@ -22,6 +22,7 @@ For information on testing nodes, see [README-node-testing.md](README-node-testi | `llm_xai` | xAI (Grok) | | | `llm_vertex` | Google Vertex AI | | | `llm_ibm_watson` | IBM Watson | | +| `llm_nebius` | Nebius Token Factory (OpenAI-compatible) | [README](../nodes/src/nodes/llm_nebius/README.md) | | `llm_vision_mistral` | Mistral Vision (multimodal, image-to-text) | [README](../nodes/src/nodes/llm_vision_mistral/README.md) | ## Vector Databases @@ -99,6 +100,16 @@ The `core` module provides built-in connectors for OneDrive, SharePoint, Google | `text_output` | Text output | | `local_text_output` | Local text file output | +## Agent Tools + +Tool nodes (`classType: ["tool"]`) expose capabilities to agents via the control-plane invoke channel rather than data lanes. + +| Node | Description | Documentation | +| -------------------- | ------------------------------------ | ------------------------------------------------------------ | +| `tool_tavily` | Tavily real-time web search for agents | [README](../nodes/src/nodes/tool_tavily/README.md) | + +The `tool_tavily` node pairs with `llm_nebius` and `agent_deepagent` to build Nebius Agentic Search — see `examples/nebius-agentic-search.pipe`. + ## Internal | Node | Description | diff --git a/examples/nebius-agentic-search.pipe b/examples/nebius-agentic-search.pipe new file mode 100644 index 000000000..9ca7a93bd --- /dev/null +++ b/examples/nebius-agentic-search.pipe @@ -0,0 +1,153 @@ +{ + "name": "Nebius Agentic Search", + "description": "A web-research agent powered by Nebius (Llama 3.3 70B) and Tavily real-time search. Ask any question and the agent will search the web, refine its queries, and return a cited answer.", + "components": [ + { + "id": "chat_1", + "provider": "chat", + "name": "Trigger", + "config": { + "hideForm": true, + "mode": "Source", + "parameters": {}, + "type": "chat" + }, + "ui": { + "position": { + "x": 20, + "y": 200 + }, + "measured": { + "width": 150, + "height": 66 + }, + "nodeType": "default", + "formDataValid": true + } + }, + { + "id": "agent_deepagent_1", + "provider": "agent_deepagent", + "name": "Nebius Agentic Search", + "config": { + "instructions": [ + "You are an agentic web-research assistant.", + "Use the tavily tool to find current information; refine your query and search again when results are weak.", + "Cite the source URLs you used and answer concisely." + ], + "parameters": {} + }, + "ui": { + "position": { + "x": 240, + "y": 200 + }, + "measured": { + "width": 150, + "height": 86 + }, + "nodeType": "default", + "formDataValid": true + }, + "input": [ + { + "lane": "questions", + "from": "chat_1" + } + ] + }, + { + "id": "llm_nebius_1", + "provider": "llm_nebius", + "name": "Nebius LLM", + "config": { + "profile": "llama-3-3-70b", + "llama-3-3-70b": { + "apikey": "${NEBIUS_API_KEY}" + }, + "parameters": {} + }, + "ui": { + "position": { + "x": 130, + "y": 380 + }, + "measured": { + "width": 150, + "height": 66 + }, + "nodeType": "default", + "formDataValid": true + }, + "control": [ + { + "classType": "llm", + "from": "agent_deepagent_1" + } + ] + }, + { + "id": "tool_tavily_1", + "provider": "tool_tavily", + "name": "Tavily", + "config": { + "apikey": "${TAVILY_API_KEY}", + "maxResults": 5, + "searchDepth": "advanced", + "topic": "general" + }, + "ui": { + "position": { + "x": 350, + "y": 380 + }, + "measured": { + "width": 150, + "height": 37 + }, + "nodeType": "default", + "formDataValid": true + }, + "control": [ + { + "classType": "tool", + "from": "agent_deepagent_1" + } + ] + }, + { + "id": "response_answers_1", + "provider": "response_answers", + "config": { + "laneName": "answers" + }, + "ui": { + "position": { + "x": 447, + "y": 222 + }, + "measured": { + "width": 150, + "height": 66 + }, + "nodeType": "default", + "formDataValid": true + }, + "input": [ + { + "lane": "answers", + "from": "agent_deepagent_1" + } + ] + } + ], + "source": "chat_1", + "project_id": "c5ba857f-30ff-4f3d-b5bf-9ee90d3ee33b", + "viewport": { + "x": 15.5, + "y": -120.9, + "zoom": 1.04 + }, + "version": 1, + "docRevision": 1 +} \ No newline at end of file diff --git a/nodes/src/nodes/llm_nebius/IGlobal.py b/nodes/src/nodes/llm_nebius/IGlobal.py new file mode 100644 index 000000000..a4a78a16e --- /dev/null +++ b/nodes/src/nodes/llm_nebius/IGlobal.py @@ -0,0 +1,101 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2026 Aparavi Software AG +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================= + +import os +from typing import Optional +from rocketlib import IGlobalBase, warning +from ai.common.config import Config +from ai.common.chat import ChatBase + + +class IGlobal(IGlobalBase): + """Global handler for the Nebius Token Factory LLM node.""" + + _chat: Optional[ChatBase] = None + + _VALIDATION_PROMPT = 'Hi' + _BASE_URL = 'https://api.tokenfactory.nebius.com/v1/' + + def _resolve_apikey(self, config) -> str: + return str(config.get('apikey') or os.environ.get('NEBIUS_API_KEY', '')).strip() + + def validateConfig(self): + """Probe the model with a 1-token request to validate key + model at save time.""" + from depends import depends # type: ignore + + requirements = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'requirements.txt') + depends(requirements) + + try: + from openai import ( + OpenAI, + APIStatusError, + OpenAIError, + AuthenticationError, + RateLimitError, + APIConnectionError, + ) + + config = Config.getNodeConfig(self.glb.logicalType, self.glb.connConfig) + apikey = self._resolve_apikey(config) + model = config.get('model') + if not model or not apikey: + return + try: + client = OpenAI(api_key=apikey, base_url=self._BASE_URL) + client.chat.completions.create( + model=model, + messages=[{'role': 'user', 'content': self._VALIDATION_PROMPT}], + max_tokens=1, + ) + except RateLimitError: + return + except APIStatusError as e: + status = getattr(e, 'status_code', None) or getattr(e, 'status', None) + if status == 429: + return + warning(f'Nebius validation error {status}: {e}') + return + except (AuthenticationError, APIConnectionError, OpenAIError) as e: + warning(str(e)) + return + except Exception as e: + warning(str(e)) + + def beginGlobal(self): + """Initialize the Nebius chat client.""" + from depends import depends # type: ignore + + requirements = os.path.dirname(os.path.realpath(__file__)) + '/requirements.txt' + depends(requirements) + + from .nebius import Chat + + bag = self.IEndpoint.endpoint.bag + config = Config.getNodeConfig(self.glb.logicalType, self.glb.connConfig) + if not self._resolve_apikey(config): + raise ValueError('Nebius API key is required.') + self._chat = Chat(self.glb.logicalType, config, bag) + + def endGlobal(self): + self._chat = None diff --git a/nodes/src/nodes/llm_nebius/IInstance.py b/nodes/src/nodes/llm_nebius/IInstance.py new file mode 100644 index 000000000..77e200613 --- /dev/null +++ b/nodes/src/nodes/llm_nebius/IInstance.py @@ -0,0 +1,28 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2026 Aparavi Software AG +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================= + +from ai.common.llm_base import LLMBase + + +class IInstance(LLMBase): + pass diff --git a/nodes/src/nodes/llm_nebius/README.md b/nodes/src/nodes/llm_nebius/README.md new file mode 100644 index 000000000..e0c4a1fe0 --- /dev/null +++ b/nodes/src/nodes/llm_nebius/README.md @@ -0,0 +1,44 @@ +--- +title: Nebius +date: 2026-06-01 +sidebar_position: 1 +--- + + + Nebius - RocketRide Documentation + + +## What it does + +Connects Nebius Token Factory-hosted models to your pipeline via an OpenAI-compatible API. The base URL is fixed at `https://api.tokenfactory.nebius.com/v1/` — no endpoint configuration needed. Used primarily as an `llm` invoke connection by agents (including Nebius Agentic Search) and other nodes that need an LLM. Can also be used directly via lanes. + +**Lanes:** + +| Lane in | Lane out | Description | +| ----------- | --------- | ---------------------------------------------------- | +| `questions` | `answers` | Send a question directly, receive a generated answer | + +## Configuration + +| Field | Description | +| ------- | ----------------------------------------------- | +| Model | Model profile or custom model ID (see below) | +| API Key | Nebius Token Factory API key (`NEBIUS_API_KEY`) | + +The API key can be supplied via the node's **API Key** field or the `NEBIUS_API_KEY` environment variable. + +## Model profiles + +| Profile | Model ID | Context | +| -------------------------- | --------------------------------------- | ------- | +| Llama 3.3 70B _(default)_ | `meta-llama/Llama-3.3-70B-Instruct` | 131,072 | +| Qwen3 235B | `Qwen/Qwen3-235B-A22B` | 131,072 | +| DeepSeek V3 | `deepseek-ai/DeepSeek-V3` | 131,072 | +| Custom | any Token Factory model ID | 131,072 | + +**Custom** — specify any Nebius Token Factory model ID and token limit directly. + +## Upstream docs + +- [Nebius Token Factory model catalogue](https://tokenfactory.nebius.com/models) +- [Nebius AI documentation](https://docs.nebius.com) diff --git a/nodes/src/nodes/llm_nebius/__init__.py b/nodes/src/nodes/llm_nebius/__init__.py new file mode 100644 index 000000000..f56614275 --- /dev/null +++ b/nodes/src/nodes/llm_nebius/__init__.py @@ -0,0 +1,39 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2026 Aparavi Software AG +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================= + +from .IGlobal import IGlobal +from .IInstance import IInstance + + +def getChat(): + """Get the Chat class from the module.""" + from .nebius import Chat + + return Chat + + +__all__ = [ + 'IGlobal', + 'IInstance', + 'getChat', +] diff --git a/nodes/src/nodes/llm_nebius/nebius.py b/nodes/src/nodes/llm_nebius/nebius.py new file mode 100644 index 000000000..70d3c1b93 --- /dev/null +++ b/nodes/src/nodes/llm_nebius/nebius.py @@ -0,0 +1,74 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2026 Aparavi Software AG +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================= + +"""Nebius Token Factory binding for the ChatLLM (OpenAI-compatible).""" + +import os +from typing import Any, Dict +from openai import AuthenticationError, APIError, RateLimitError, APIConnectionError +from ai.common.chat import ChatBase +from ai.common.config import Config +from langchain_openai import ChatOpenAI + +NEBIUS_BASE_URL = 'https://api.tokenfactory.nebius.com/v1/' + + +class Chat(ChatBase): + """Creates a Nebius Token Factory chat bot.""" + + _llm: ChatOpenAI + + def __init__(self, provider: str, connConfig: Dict[str, Any], bag: Dict[str, Any]): + super().__init__(provider, connConfig, bag) + + config = Config.getNodeConfig(provider, connConfig) + + # Resolve the key from config first, then the NEBIUS_API_KEY env var. + # The 'sk-dummy' placeholder lets the client initialise before a key is + # saved (e.g. during config-only open mode). + apikey = config.get('apikey') or os.environ.get('NEBIUS_API_KEY') or 'sk-dummy' + + self._llm = ChatOpenAI( + model=self._model, + base_url=NEBIUS_BASE_URL, + api_key=apikey, + temperature=0, + max_tokens=self._modelOutputTokens, + ) + + bag['chat'] = self + + def is_retryable_error(self, error): + return isinstance(error, (RateLimitError, APIConnectionError)) + + def map_exception(self, error): + if isinstance(error, AuthenticationError): + return ValueError('Invalid Nebius API key.') + elif isinstance(error, RateLimitError): + return ValueError(f'Nebius rate limit: {error}') + elif isinstance(error, APIConnectionError): + return ValueError('Failed to connect to the Nebius Token Factory API.') + elif isinstance(error, APIError): + return ValueError(f'Nebius API error: {error}') + else: + return super().map_exception(error) diff --git a/nodes/src/nodes/llm_nebius/nebius.svg b/nodes/src/nodes/llm_nebius/nebius.svg new file mode 100644 index 000000000..461837eaf --- /dev/null +++ b/nodes/src/nodes/llm_nebius/nebius.svg @@ -0,0 +1 @@ + diff --git a/nodes/src/nodes/llm_nebius/requirements.txt b/nodes/src/nodes/llm_nebius/requirements.txt new file mode 100644 index 000000000..cd7bae8f6 --- /dev/null +++ b/nodes/src/nodes/llm_nebius/requirements.txt @@ -0,0 +1,2 @@ +langchain-openai +openai diff --git a/nodes/src/nodes/llm_nebius/services.json b/nodes/src/nodes/llm_nebius/services.json new file mode 100644 index 000000000..3493e0b15 --- /dev/null +++ b/nodes/src/nodes/llm_nebius/services.json @@ -0,0 +1,195 @@ +{ + // + // Required: + // The displayable name of this node + // + "title": "Nebius", + // + // Required: + // The protocol is the endpoint protocol + // + "protocol": "llm_nebius://", + // + // Required: + // Class type of the node - what it does + // + "classType": ["llm"], + // + // Required: + // Capabilities are flags that change the behavior of the underlying + // engine + // + "capabilities": ["invoke"], + // + // Optional: + // Register is either filter, endpoint or ignored if not specified. If the + // type is specified, a factory is registered of that given type + // + "register": "filter", + // + // Optional: + // The node is the actual physical node to instantiate - if + // not specified, the protocol will be used + // + "node": "python", + // + // Optional: + // The path is the executable/script code - it is node dependent + // and is optional for most node + // + "path": "nodes.llm_nebius", + // + // Required: + // The prefix map when added/removed when converting URLs <=> paths + // + "prefix": "llm", + // + // Optional: + // Description of this driver + // + "description": ["Connects to Nebius Token Factory's OpenAI-compatible inference API.", "Hosts open models (Llama, Qwen, DeepSeek) for reasoning, generation, and tool-calling. Used as an `llm` invoke connection by agents, including Nebius Agentic Search."], + // + // Optional: + // The icon is the icon to display in the UI for this node + // + "icon": "nebius.svg", + "documentation": "https://docs.rocketride.org", + // + // Optional: + // Rendering hints to the UI which indicate which fields of + // the configuration should be used to display information + // on the tile box + // + "tile": ["Model: ${parameters.llm_nebius.profile}"], + // + // Optional: + // As a pipe component, define what this pipe component takes + // and what it produces + // + "lanes": { + "questions": ["answers"] + }, + // + // Optional: + // Profile section are configuration options used by the driver + // itself + // + "preconfig": { + // Define the values that will be merged into any profile configuration + // specified, unless the profile is 'absolute' + "default": "llama-3-3-70b", + // Defines profiles used with the "profile": key + "profiles": { + "llama-3-3-70b": { + "title": "Llama 3.3 70B Instruct", + "model": "meta-llama/Llama-3.3-70B-Instruct", + "apikey": "", + "modelTotalTokens": 131072 + }, + "qwen3-235b": { + "title": "Qwen3 235B", + "model": "Qwen/Qwen3-235B-A22B", + "apikey": "", + "modelTotalTokens": 131072 + }, + "deepseek-v3": { + "title": "DeepSeek V3", + "model": "deepseek-ai/DeepSeek-V3", + "apikey": "", + "modelTotalTokens": 131072 + }, + "custom": { + "model": "", + "apikey": "", + "modelTotalTokens": 131072 + } + } + }, + // + // Optional: + // Local fields definitions - these define fields only for the + // current service. You may specify them here, or directly + // in the shape + // + "fields": { + "model": { + "type": "string", + "title": "Model", + "description": "Nebius Token Factory model id (e.g. meta-llama/Llama-3.3-70B-Instruct). Full list: https://tokenfactory.nebius.com/models" + }, + "modelTotalTokens": { + "type": "number", + "title": "Tokens", + "description": "Total Tokens" + }, + "llm_nebius.llama-3-3-70b": { + "object": "llama-3-3-70b", + "properties": ["llm.cloud.apikey"] + }, + "llm_nebius.qwen3-235b": { + "object": "qwen3-235b", + "properties": ["llm.cloud.apikey"] + }, + "llm_nebius.deepseek-v3": { + "object": "deepseek-v3", + "properties": ["llm.cloud.apikey"] + }, + "llm_nebius.custom": { + "object": "custom", + "properties": ["model", "modelTotalTokens", "llm.cloud.apikey"] + }, + "llm_nebius.profile": { + "title": "Model", + "description": "Nebius Token Factory model", + "type": "string", + "default": "llama-3-3-70b", + "enum": ["*>preconfig.profiles.*.title"], + "conditional": [ + { + "value": "llama-3-3-70b", + "properties": ["llm_nebius.llama-3-3-70b"] + }, + { + "value": "qwen3-235b", + "properties": ["llm_nebius.qwen3-235b"] + }, + { + "value": "deepseek-v3", + "properties": ["llm_nebius.deepseek-v3"] + }, + { + "value": "custom", + "properties": ["llm_nebius.custom"] + } + ] + } + }, + // + // Required: + // Defines the fields (shape) of the service. Either source or target + // may be specified, or both, but at least one is required + // + "shape": [ + { + "section": "Pipe", + "title": "Nebius", + "properties": ["llm_nebius.profile"] + } + ], + "test": { + "requires": ["NEBIUS_API_KEY"], + "profiles": ["llama-3-3-70b"], + "outputs": ["answers"], + "cases": [ + { + "name": "LLM returns mock response", + "text": "What is 2+2?", + "expect": { + "answers": { + "contains": "Mock LLM response" + } + } + } + ] + } +} diff --git a/nodes/src/nodes/tool_tavily/IGlobal.py b/nodes/src/nodes/tool_tavily/IGlobal.py new file mode 100644 index 000000000..10bb67c93 --- /dev/null +++ b/nodes/src/nodes/tool_tavily/IGlobal.py @@ -0,0 +1,81 @@ +# ============================================================================= +# RocketRide Engine +# ============================================================================= +# MIT License +# Copyright (c) 2026 Aparavi Software AG +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================= + +""" +Tavily tool node - global (shared) state. + +Reads the Tavily API key and search configuration from the node config. +Tool logic lives on IInstance via @tool_function. +""" + +from __future__ import annotations + +import os + +from ai.common.config import Config +from rocketlib import IGlobalBase, OPEN_MODE, error, warning + + +class IGlobal(IGlobalBase): + """Global state for tool_tavily.""" + + apikey: str = '' + max_results: int = 5 + search_depth: str = 'advanced' + topic: str = 'general' + + def beginGlobal(self) -> None: + if self.IEndpoint.endpoint.openMode == OPEN_MODE.CONFIG: + return + + cfg = Config.getNodeConfig(self.glb.logicalType, self.glb.connConfig) + + apikey = str(cfg.get('apikey') or os.environ.get('TAVILY_API_KEY', '')).strip() + + if not apikey: + error('tool_tavily: apikey is required — set it in node config or TAVILY_API_KEY env var') + raise ValueError('tool_tavily: apikey is required') + + self.apikey = apikey + raw_max = cfg.get('maxResults', 5) + if raw_max is None: + raw_max = 5 + self.max_results = max(1, min(20, int(raw_max))) + search_depth = str(cfg.get('searchDepth') or 'advanced').strip() + self.search_depth = search_depth if search_depth in ('basic', 'advanced') else 'advanced' + topic = str(cfg.get('topic') or 'general').strip() + self.topic = topic if topic in ('general', 'news', 'finance') else 'general' + + def validateConfig(self) -> None: + try: + cfg = Config.getNodeConfig(self.glb.logicalType, self.glb.connConfig) + apikey = str(cfg.get('apikey') or os.environ.get('TAVILY_API_KEY', '')).strip() + if not apikey: + warning('apikey is required') + except Exception as e: + warning(str(e)) + + def endGlobal(self) -> None: + self.apikey = '' diff --git a/nodes/src/nodes/tool_tavily/IInstance.py b/nodes/src/nodes/tool_tavily/IInstance.py new file mode 100644 index 000000000..0015fc28d --- /dev/null +++ b/nodes/src/nodes/tool_tavily/IInstance.py @@ -0,0 +1,258 @@ +# ============================================================================= +# RocketRide Engine +# ============================================================================= +# MIT License +# Copyright (c) 2026 Aparavi Software AG +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================= + +""" +Tavily tool node instance. + +Exposes ``tavily`` as a @tool_function for real-time web search via the Tavily API. +""" + +from __future__ import annotations + +import ipaddress +import socket +import time +from typing import Any, Dict +from urllib.parse import urlparse + +import requests + +from rocketlib import IInstanceBase, tool_function, debug + +from ai.common.utils import normalize_tool_input + +from .IGlobal import IGlobal + +TAVILY_API_URL = 'https://api.tavily.com/search' +VALID_SEARCH_DEPTHS = {'basic', 'advanced'} +VALID_TOPICS = {'general', 'news', 'finance'} +VALID_TIME_RANGES = {'day', 'week', 'month', 'year'} + + +class IInstance(IInstanceBase): + """Node instance exposing Tavily web search as an agent tool.""" + + IGlobal: IGlobal + + @tool_function( + input_schema={ + 'type': 'object', + 'required': ['query'], + 'properties': { + 'query': { + 'type': 'string', + 'description': 'The search query — a natural language question or keyword phrase.', + }, + 'max_results': { + 'type': 'integer', + 'description': 'Number of results to return (1-20). Defaults to the node config value.', + }, + 'search_depth': { + 'type': 'string', + 'enum': sorted(VALID_SEARCH_DEPTHS), + 'description': '"basic" (fast) or "advanced" (deeper). Defaults to node config.', + }, + 'topic': { + 'type': 'string', + 'enum': sorted(VALID_TOPICS), + 'description': 'Search category: "general", "news", or "finance".', + }, + 'time_range': { + 'type': 'string', + 'enum': ['day', 'week', 'month', 'year'], + 'description': 'Restrict results to a recent time window.', + }, + 'include_domains': { + 'type': 'array', + 'items': {'type': 'string'}, + 'description': 'Only return results from these domains.', + }, + 'exclude_domains': { + 'type': 'array', + 'items': {'type': 'string'}, + 'description': 'Exclude results from these domains.', + }, + }, + }, + output_schema={ + 'type': 'object', + 'properties': { + 'success': {'type': 'boolean'}, + 'query': {'type': 'string'}, + 'num_results': {'type': 'integer'}, + 'results': {'type': 'array', 'items': {'type': 'object'}}, + 'error': {'type': 'string'}, + }, + }, + description='Search the web in real time using Tavily. Provide a natural language query to find relevant, current web pages. Returns structured results with title, URL, content snippet, and relevance score.', + ) + def tavily(self, args): + """Search the web using the Tavily API.""" + args = normalize_tool_input(args, tool_name='tavily') + + query = (args.get('query') or '').strip() + if not query: + return { + 'success': False, + 'query': '', + 'num_results': 0, + 'results': [], + 'error': 'query is required and must be a non-empty string', + } + + cfg = self.IGlobal + + max_results = args.get('max_results', cfg.max_results) + if isinstance(max_results, bool) or not isinstance(max_results, int): + max_results = cfg.max_results + search_depth = args.get('search_depth', cfg.search_depth) + if search_depth not in VALID_SEARCH_DEPTHS: + search_depth = cfg.search_depth + topic = args.get('topic', cfg.topic) + if topic not in VALID_TOPICS: + topic = cfg.topic + + payload: Dict[str, Any] = { + 'query': query, + 'max_results': max(1, min(20, max_results)), + 'search_depth': search_depth, + 'topic': topic, + } + time_range = args.get('time_range') + if time_range in VALID_TIME_RANGES: + payload['time_range'] = time_range + include_domains = args.get('include_domains') + if include_domains and isinstance(include_domains, list): + payload['include_domains'] = include_domains + exclude_domains = args.get('exclude_domains') + if exclude_domains and isinstance(exclude_domains, list): + payload['exclude_domains'] = exclude_domains + + headers = { + 'accept': 'application/json', + 'content-type': 'application/json', + 'authorization': f'Bearer {cfg.apikey}', + } + + try: + body = _request_with_retry(url=TAVILY_API_URL, headers=headers, payload=payload) + except RuntimeError as exc: + return {'success': False, 'query': query, 'num_results': 0, 'results': [], 'error': str(exc)} + + return _shape_results(query, body) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _shape_results(query: str, body: Dict[str, Any]) -> Dict[str, Any]: + """Map a Tavily response body into the tool's output schema, dropping unsafe URLs.""" + results = [] + for item in body.get('results', []) or []: + url = item.get('url', '') + if not url: + continue + try: + url = _validate_public_url(url) + except ValueError: + continue + results.append( + { + 'title': item.get('title', ''), + 'url': url, + 'content': item.get('content', ''), + 'score': item.get('score'), + 'published_date': item.get('published_date'), + } + ) + return {'success': True, 'query': query, 'num_results': len(results), 'results': results} + + +def _validate_public_url(raw_url: str) -> str: + """Reject private/loopback/reserved hosts to prevent SSRF (clone of search_exa).""" + parsed = urlparse(raw_url) + if parsed.scheme not in ('http', 'https') or not parsed.hostname: + raise ValueError(f'Tavily returned an invalid URL: {raw_url}') + try: + addrinfo = socket.getaddrinfo(parsed.hostname, None, type=socket.SOCK_STREAM) + except socket.gaierror as e: + raise ValueError(f'Tavily returned an unresolved URL host: {parsed.hostname}') from e + for _, _, _, _, sockaddr in addrinfo: + ip = ipaddress.ip_address(sockaddr[0]) + if ( + ip.is_private + or ip.is_loopback + or ip.is_link_local + or ip.is_reserved + or ip.is_multicast + or ip.is_unspecified + ): + raise ValueError(f'Tavily returned a blocked URL host: {parsed.hostname}') + return raw_url + + +def _request_with_retry( + *, url: str, headers: Dict[str, str], payload: Dict[str, Any], max_retries: int = 3, base_delay: float = 2.0 +) -> Dict[str, Any]: + """POST to the Tavily API with exponential-backoff retry on 429/5xx (clone of tool_exa_search).""" + for attempt in range(max_retries + 1): + try: + resp = requests.post(url, headers=headers, json=payload, timeout=30) + + if resp.status_code == 429: + if attempt < max_retries: + delay = base_delay * (2**attempt) + debug(f'Tavily rate limit hit (429), retrying in {delay}s (attempt {attempt + 1}/{max_retries})') + time.sleep(delay) + continue + resp.raise_for_status() + + if 500 <= resp.status_code < 600: + if attempt < max_retries: + delay = base_delay * (2**attempt) + debug( + f'Tavily server error ({resp.status_code}), retrying in {delay}s (attempt {attempt + 1}/{max_retries})' + ) + time.sleep(delay) + continue + resp.raise_for_status() + + resp.raise_for_status() + return resp.json() + + except requests.exceptions.Timeout: + if attempt < max_retries: + delay = base_delay * (2**attempt) + debug(f'Tavily request timeout, retrying in {delay}s ({attempt + 1}/{max_retries})') + time.sleep(delay) + continue + raise RuntimeError('Tavily: request timed out after all retries') from None + except requests.RequestException as exc: + status = getattr(getattr(exc, 'response', None), 'status_code', None) + detail = f' (HTTP {status})' if status else '' + raise RuntimeError(f'Tavily request failed{detail}: {type(exc).__name__}') from None + raise RuntimeError('Tavily: max retries exceeded') diff --git a/nodes/src/nodes/tool_tavily/README.md b/nodes/src/nodes/tool_tavily/README.md new file mode 100644 index 000000000..69871ae1b --- /dev/null +++ b/nodes/src/nodes/tool_tavily/README.md @@ -0,0 +1,26 @@ +# tool_tavily + +Exposes [Tavily](https://tavily.com) real-time web search as an agent tool node. + +## What it does + +Agents invoke this node via the tool invoke channel. The node performs a live web search using the Tavily API and returns structured results containing titles, URLs, content snippets, and relevance scores. + +Because `lanes` is empty (`{}`), this node has no pipeline input/output lanes — it is consumed exclusively by agent runtimes through the `invoke` capability. + +## Setup + +Set your Tavily API key via the node config field **API Key** or the environment variable: + +``` +TAVILY_API_KEY=tvly-... +``` + +## Config fields + +| Field | Default | Description | +| ------------ | ---------- | ------------------------------------------------ | +| API Key | *(empty)* | Tavily API key (from https://tavily.com). Encrypted at rest. | +| Max Results | `5` | Maximum number of results returned (1–20). | +| Search Depth | `advanced` | `basic` or `advanced` — controls result quality. | +| Topic | `general` | `general`, `news`, or `finance`. | diff --git a/nodes/src/nodes/tool_tavily/__init__.py b/nodes/src/nodes/tool_tavily/__init__.py new file mode 100644 index 000000000..37de11f28 --- /dev/null +++ b/nodes/src/nodes/tool_tavily/__init__.py @@ -0,0 +1,29 @@ +# ============================================================================= +# RocketRide Engine +# ============================================================================= +# MIT License +# Copyright (c) 2026 Aparavi Software AG +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================= + +from .IGlobal import IGlobal +from .IInstance import IInstance + +__all__ = ['IGlobal', 'IInstance'] diff --git a/nodes/src/nodes/tool_tavily/requirements.txt b/nodes/src/nodes/tool_tavily/requirements.txt new file mode 100644 index 000000000..f2293605c --- /dev/null +++ b/nodes/src/nodes/tool_tavily/requirements.txt @@ -0,0 +1 @@ +requests diff --git a/nodes/src/nodes/tool_tavily/services.json b/nodes/src/nodes/tool_tavily/services.json new file mode 100644 index 000000000..75b6dca7a --- /dev/null +++ b/nodes/src/nodes/tool_tavily/services.json @@ -0,0 +1,147 @@ +{ + // + // Required: + // The displayable name of this node + // + "title": "Tavily", + // + // Required: + // The protocol is the endpoint protocol + // + "protocol": "tool_tavily://", + // + // Required: + // Class type of the node - what it does + // + "classType": ["tool"], + // + // Required: + // Capabilities are flags that change the behavior of the underlying engine. + // "experimental" marks this node as not yet production-ready. + // + "capabilities": ["invoke", "experimental"], + // + // Optional: + // Register is either filter, endpoint or ignored if not specified. + // + "register": "filter", + // + // Optional: + // The node is the actual physical node to instantiate. + // + "node": "python", + // + // Optional: + // The path is the executable/script code. + // + "path": "nodes.tool_tavily", + // + // Required: + // The prefix map when converting URLs <=> paths. + // + "prefix": "tavily", + // + // Optional: + // The icon to display in the UI for this node. + // + "icon": "tavily.svg", + // + // Optional: + // Description of this node shown in the pipeline builder. + // + "description": ["Exposes Tavily real-time web search as an agent tool.", "Performs live web searches via the Tavily API and returns structured results with titles, URLs, content snippets, and relevance scores."], + "tile": [], + "lanes": {}, + // + // Optional: + // Profile section - configuration options used by the driver itself. + // The default profile is applied when the node is first added to a pipeline. + // + "preconfig": { + "default": "default", + "profiles": { + "default": { + "title": "Tavily", + "apikey": "", + "maxResults": 5, + "searchDepth": "advanced", + "topic": "general" + } + } + }, + // + // Optional: + // Local field definitions. These define the config fields exposed in the UI. + // "secure: true" fields are encrypted at rest and masked in the UI. + // + "fields": { + "tool_tavily.apikey": { + "type": "string", + "title": "API Key", + "description": "Tavily API key (from https://tavily.com)", + "default": "", + "secure": true, + "ui": { + "ui:widget": "ApiKeyWidget" + } + }, + "tool_tavily.maxResults": { + "type": "integer", + "title": "Max Results", + "description": "Maximum number of search results to return (1-20)", + "default": 5, + "minimum": 1, + "maximum": 20 + }, + "tool_tavily.searchDepth": { + "type": "string", + "title": "Search Depth", + "description": "Tavily search depth", + "default": "advanced", + "enum": [ + ["basic", "Basic"], + ["advanced", "Advanced"] + ] + }, + "tool_tavily.topic": { + "type": "string", + "title": "Topic", + "description": "Search topic category", + "default": "general", + "enum": [ + ["general", "General"], + ["news", "News"], + ["finance", "Finance"] + ] + } + }, + // + // Test configuration for automated node testing. + // Gated on TAVILY_API_KEY: the dynamic test builds a pipeline and runs the + // node, which calls beginGlobal() and therefore needs a key, so the case is + // skipped when the key is absent (e.g. CI without the secret). Node + // structure is still validated by the keyless contract tests. + // + "test": { + "requires": ["TAVILY_API_KEY"], + "profiles": ["default"], + "outputs": [], + "cases": [ + { + "name": "Tavily smoke test (requires key)", + "text": "test query" + } + ] + }, + // + // Required: + // Defines the fields (shape) shown in the pipeline builder side panel. + // + "shape": [ + { + "section": "Pipe", + "title": "Tavily", + "properties": ["type", "tool_tavily.apikey", "tool_tavily.maxResults", "tool_tavily.searchDepth", "tool_tavily.topic"] + } + ] +} diff --git a/nodes/src/nodes/tool_tavily/tavily.svg b/nodes/src/nodes/tool_tavily/tavily.svg new file mode 100644 index 000000000..642d33ee2 --- /dev/null +++ b/nodes/src/nodes/tool_tavily/tavily.svg @@ -0,0 +1 @@ + diff --git a/nodes/test/test_tool_tavily.py b/nodes/test/test_tool_tavily.py new file mode 100644 index 000000000..18d226f8a --- /dev/null +++ b/nodes/test/test_tool_tavily.py @@ -0,0 +1,136 @@ +# ============================================================================= +# RocketRide Engine +# ============================================================================= +# MIT License +# Copyright (c) 2026 Aparavi Software AG +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================= + +"""Unit tests for tool_tavily pure helpers (no network).""" + +from __future__ import annotations + +import sys +from pathlib import Path +from unittest.mock import MagicMock + +# --------------------------------------------------------------------------- +# Bootstrap: when run under a bare interpreter that lacks the engine runtime +# (rocketlib, ai.common, requests), inject lightweight stubs ONLY for modules +# that are not already present, import the module under test, then REMOVE the +# stubs we added. Restoring is essential: under the full `builder nodes:test-full` +# run these modules are real and shared across the whole pytest session, so a +# leaked MagicMock stub would break unrelated nodes' tests (e.g. tool_git, +# tool_filesystem, which rely on the real rocketlib schema helpers). The pure +# helpers under test (_shape_results, _validate_public_url) hold no runtime +# dependency on the stubbed modules, so dropping the stubs after import is safe. +# --------------------------------------------------------------------------- + +import importlib + +# Add nodes/src to sys.path so `nodes.tool_tavily.IInstance` is resolvable. +_NODES_SRC = Path(__file__).resolve().parents[1] / 'src' +if str(_NODES_SRC) not in sys.path: + sys.path.insert(0, str(_NODES_SRC)) + + +def _build_import_stubs(): + """Return {module_name: stub} for the deps needed only to import the module.""" + rocketlib = MagicMock() + rocketlib.IInstanceBase = object # must be a real class for inheritance + rocketlib.IGlobalBase = object + rocketlib.tool_function = lambda **kwargs: lambda f: f # pass-through decorator + rocketlib.debug = lambda *a, **kw: None + rocketlib.error = lambda *a, **kw: None + rocketlib.warning = lambda *a, **kw: None + rocketlib.OPEN_MODE = MagicMock() + + depends = MagicMock() + depends.depends = lambda *a, **kw: None + + ai_common_utils = MagicMock() + ai_common_utils.normalize_tool_input = lambda args, **kw: args if isinstance(args, dict) else {} + + requests = MagicMock() + requests.exceptions = MagicMock() + requests.exceptions.Timeout = TimeoutError + requests.exceptions.RequestException = Exception + + return { + 'rocketlib': rocketlib, + 'depends': depends, + 'ai': MagicMock(), + 'ai.common': MagicMock(), + 'ai.common.utils': ai_common_utils, + 'ai.common.config': MagicMock(), + 'requests': requests, + } + + +_added_stubs = [] +for _name, _stub in _build_import_stubs().items(): + if _name not in sys.modules: + sys.modules[_name] = _stub + _added_stubs.append(_name) + +mod = importlib.import_module('nodes.tool_tavily.IInstance') + +# Drop the stubs we injected so they never leak into the shared pytest session. +for _name in _added_stubs: + sys.modules.pop(_name, None) + + +def test_shape_results_maps_tavily_fields(monkeypatch): + import socket + + # _shape_results validates each URL via _validate_public_url -> socket.getaddrinfo; + # stub DNS so the test stays network-independent (offline CI safe). + monkeypatch.setattr( + socket, + 'getaddrinfo', + lambda *a, **k: [(socket.AF_INET, socket.SOCK_STREAM, 0, '', ('93.184.216.34', 0))], + ) + body = {'results': [{'title': 'T', 'url': 'https://example.com', 'content': 'snippet', 'score': 0.9}]} + shaped = mod._shape_results('q', body) + assert shaped['success'] is True + assert shaped['query'] == 'q' + assert shaped['num_results'] == 1 + assert shaped['results'][0]['url'] == 'https://example.com' + assert shaped['results'][0]['score'] == 0.9 + assert shaped['results'][0]['content'] == 'snippet' + assert shaped['results'][0]['published_date'] is None + + +def test_validate_public_url_rejects_loopback(): + import pytest + + with pytest.raises(ValueError): + mod._validate_public_url('http://127.0.0.1/secret') + + +def test_validate_public_url_allows_public_https(monkeypatch): + import socket + + monkeypatch.setattr( + socket, + 'getaddrinfo', + lambda *a, **k: [(socket.AF_INET, socket.SOCK_STREAM, 0, '', ('93.184.216.34', 0))], + ) + assert mod._validate_public_url('https://example.com/page') == 'https://example.com/page'