Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
282 changes: 282 additions & 0 deletions nodes/src/nodes/library/ssrf_protection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,282 @@
# =============================================================================
# MIT License
# Copyright (c) 2024 RocketRide Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# =============================================================================

"""
SSRF (Server-Side Request Forgery) protection utilities.

Validates URLs and resolved IP addresses to prevent requests to private,
loopback, link-local, and reserved IP ranges. Supports a configurable
allowlist so self-hosted operators can permit specific internal services.

DNS resolution is performed before the IP check to prevent DNS rebinding
attacks where a hostname initially resolves to a public IP but later
resolves to an internal one.

Usage::

from library.ssrf_protection import validate_url, SSRFError

# Block all private IPs (default)
validate_url('http://192.168.1.1/api') # raises SSRFError

# Allow specific private ranges
validate_url(
'http://192.168.1.100/api',
allowed_private=['192.168.1.0/24'],
)
"""

from __future__ import annotations

import ipaddress
import os
import socket
from typing import List, Optional, Sequence
from urllib.parse import urlparse

# ---------------------------------------------------------------------------
# Blocked networks (RFC 1918, loopback, link-local, metadata, etc.)
# ---------------------------------------------------------------------------

_BLOCKED_IPV4 = [
ipaddress.IPv4Network('0.0.0.0/8'), # "This host" (RFC 1122)
ipaddress.IPv4Network('10.0.0.0/8'), # Private (RFC 1918)
ipaddress.IPv4Network('100.64.0.0/10'), # Shared address (RFC 6598)
ipaddress.IPv4Network('127.0.0.0/8'), # Loopback (RFC 1122)
ipaddress.IPv4Network('169.254.0.0/16'), # Link-local (RFC 3927) + cloud metadata
ipaddress.IPv4Network('172.16.0.0/12'), # Private (RFC 1918)
ipaddress.IPv4Network('192.0.0.0/24'), # IETF protocol assignments (RFC 6890)
ipaddress.IPv4Network('192.0.2.0/24'), # Documentation (RFC 5737)
ipaddress.IPv4Network('192.168.0.0/16'), # Private (RFC 1918)
ipaddress.IPv4Network('198.18.0.0/15'), # Benchmarking (RFC 2544)
ipaddress.IPv4Network('198.51.100.0/24'), # Documentation (RFC 5737)
ipaddress.IPv4Network('203.0.113.0/24'), # Documentation (RFC 5737)
ipaddress.IPv4Network('224.0.0.0/4'), # Multicast (RFC 5771)
ipaddress.IPv4Network('240.0.0.0/4'), # Reserved (RFC 1112)
ipaddress.IPv4Network('255.255.255.255/32'), # Broadcast
]

_BLOCKED_IPV6 = [
ipaddress.IPv6Network('::1/128'), # Loopback
ipaddress.IPv6Network('::/128'), # Unspecified
ipaddress.IPv6Network('::ffff:0:0/96'), # IPv4-mapped (checked via mapped v4)
ipaddress.IPv6Network('64:ff9b::/96'), # NAT64 (RFC 6052)
ipaddress.IPv6Network('100::/64'), # Discard (RFC 6666)
ipaddress.IPv6Network('2001:db8::/32'), # Documentation (RFC 3849)
ipaddress.IPv6Network('fc00::/7'), # Unique local (RFC 4193)
ipaddress.IPv6Network('fe80::/10'), # Link-local (RFC 4291)
ipaddress.IPv6Network('ff00::/8'), # Multicast (RFC 4291)
]

# Hostnames that are always blocked regardless of IP resolution.
_BLOCKED_HOSTNAMES = frozenset(
{
'localhost',
'metadata.google.internal',
}
)
Comment on lines +92 to +97
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Consider adding additional cloud metadata hostnames.

The current list blocks localhost and metadata.google.internal. For more comprehensive cloud protection, consider adding:

  • metadata.google.internal ✓ (present)
  • AWS: IP-based (169.254.169.254 ✓ blocked by range)
  • Azure: metadata.azure.com, management.azure.com (some instance metadata scenarios)
  • DigitalOcean, Oracle Cloud, etc.

This is optional since the IP-based blocking catches most metadata endpoints.

💡 Optional: Expand blocked hostnames
 _BLOCKED_HOSTNAMES = frozenset(
     {
         'localhost',
         'metadata.google.internal',
+        'metadata.azure.com',
+        'instance-data',  # some Linode/legacy
     }
 )
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
_BLOCKED_HOSTNAMES = frozenset(
{
'localhost',
'metadata.google.internal',
}
)
_BLOCKED_HOSTNAMES = frozenset(
{
'localhost',
'metadata.google.internal',
'metadata.azure.com',
'instance-data', # some Linode/legacy
}
)
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@nodes/src/nodes/library/ssrf_protection.py` around lines 92 - 97, Update the
_BLOCKED_HOSTNAMES frozenset in ssrf_protection.py to include additional cloud
metadata hostnames (e.g., 'metadata.azure.com', 'management.azure.com', and
other provider metadata hosts you want to cover) so the SSFR protection covers
Azure and other cloud providers in addition to existing 'localhost' and
'metadata.google.internal'; modify the _BLOCKED_HOSTNAMES set literal (symbol:
_BLOCKED_HOSTNAMES) to add these hostnames and keep the existing IP-range-based
checks for AWS/other providers intact.


# Environment variable for the global allowlist (comma-separated CIDRs).
SSRF_ALLOWLIST_ENV = 'ROCKETRIDE_SSRF_ALLOWLIST'

# Only allow http and https schemes.
_ALLOWED_SCHEMES = frozenset({'http', 'https'})


# ---------------------------------------------------------------------------
# Exceptions
# ---------------------------------------------------------------------------


class SSRFError(ValueError):
"""Raised when a URL targets a blocked (private/reserved) IP address."""


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------


def validate_url(
url: str,
*,
allowed_private: Optional[Sequence[str]] = None,
) -> str:
"""Validate *url* against SSRF rules and return the resolved URL.

Parameters
----------
url:
The URL to validate (must use ``http`` or ``https`` scheme).
allowed_private:
An optional list of CIDR strings (e.g. ``['192.168.1.0/24']``) that
should be permitted even though they fall within blocked ranges.
This is merged with the global allowlist from the
``ROCKETRIDE_SSRF_ALLOWLIST`` environment variable.

Returns
-------
str
The original *url* unchanged, if validation passes.

Raises
------
SSRFError
If the URL targets a blocked IP, uses a disallowed scheme, or
cannot be resolved.
"""
parsed = urlparse(url)

# -- Scheme check -------------------------------------------------------
scheme = (parsed.scheme or '').lower()
if scheme not in _ALLOWED_SCHEMES:
raise SSRFError(f'SSRF protection: scheme {scheme!r} is not allowed. Only {sorted(_ALLOWED_SCHEMES)} are permitted.')

# -- Extract hostname ---------------------------------------------------
hostname = (parsed.hostname or '').lower().strip('.')
if not hostname:
raise SSRFError('SSRF protection: URL has no hostname.')

# -- Blocked hostname check ---------------------------------------------
if hostname in _BLOCKED_HOSTNAMES:
raise SSRFError(f'SSRF protection: hostname {hostname!r} is blocked.')

# -- Build combined allowlist -------------------------------------------
allow_nets = _build_allowlist(allowed_private)

# -- DNS resolution + IP check ------------------------------------------
port = parsed.port or (443 if scheme == 'https' else 80)
_resolve_and_check(hostname, port, allow_nets)

return url


def resolve_and_validate(
hostname: str,
port: int = 80,
*,
allowed_private: Optional[Sequence[str]] = None,
) -> List[str]:
"""Resolve *hostname* and validate all resulting IPs.

Returns the list of resolved IP address strings. Raises ``SSRFError``
if any resolved address is blocked.
"""
allow_nets = _build_allowlist(allowed_private)
return _resolve_and_check(hostname, port, allow_nets)


# ---------------------------------------------------------------------------
# Internals
# ---------------------------------------------------------------------------


def _build_allowlist(
extra: Optional[Sequence[str]] = None,
) -> list[ipaddress.IPv4Network | ipaddress.IPv6Network]:
"""Merge per-call allowlist with the global env-var allowlist."""
nets: list[ipaddress.IPv4Network | ipaddress.IPv6Network] = []

# Global allowlist from environment
env_val = os.environ.get(SSRF_ALLOWLIST_ENV, '').strip()
if env_val:
for cidr in env_val.split(','):
cidr = cidr.strip()
if cidr:
try:
nets.append(ipaddress.ip_network(cidr, strict=False))
except ValueError:
pass # silently skip malformed entries

# Per-call allowlist
for cidr in extra or []:
cidr_s = str(cidr).strip()
if cidr_s:
try:
nets.append(ipaddress.ip_network(cidr_s, strict=False))
except ValueError:
pass

return nets
Comment on lines +194 to +220
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Silent failure on malformed CIDRs is intentional but worth logging.

The code silently ignores malformed CIDR entries (lines 208-209, 217-218). While this is defensive, operators may not realize their allowlist is partially broken. Consider emitting a warning for malformed entries.

📝 Optional: Log malformed CIDR warnings
+import logging
+
+_logger = logging.getLogger(__name__)
+
 def _build_allowlist(
     extra: Optional[Sequence[str]] = None,
 ) -> list[ipaddress.IPv4Network | ipaddress.IPv6Network]:
     ...
                 try:
                     nets.append(ipaddress.ip_network(cidr, strict=False))
                 except ValueError:
-                    pass  # silently skip malformed entries
+                    _logger.warning('Ignoring malformed SSRF allowlist CIDR: %r', cidr)
🧰 Tools
🪛 Ruff (0.15.7)

[warning] 204-204: for loop variable cidr overwritten by assignment target

(PLW2901)


[warning] 206-209: Use contextlib.suppress(ValueError) instead of try-except-pass

(SIM105)


[warning] 215-218: Use contextlib.suppress(ValueError) instead of try-except-pass

Replace try-except-pass with with contextlib.suppress(ValueError): ...

(SIM105)

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@nodes/src/nodes/library/ssrf_protection.py` around lines 194 - 220, The
_build_allowlist function currently swallows ValueError for malformed CIDR
strings; change the except blocks to log a warning instead of silently passing:
get a module logger via logging.getLogger(__name__) (or reuse an existing logger
if one is available), and in both exception handlers call logger.warning with
the malformed cidr string and its source (e.g., "SSRF_ALLOWLIST_ENV" vs
"per-call extra") and note that the entry will be skipped; keep the behavior of
skipping malformed entries but surface a clear warning including the bad value
and context.



def _resolve_and_check(
hostname: str,
port: int,
allow_nets: list[ipaddress.IPv4Network | ipaddress.IPv6Network],
) -> List[str]:
"""Resolve hostname via DNS and check every resulting IP."""
# If hostname is already an IP literal, skip DNS.
try:
addr = ipaddress.ip_address(hostname)
_check_ip(addr, hostname, allow_nets)
return [str(addr)]
except ValueError:
pass # not an IP literal — resolve via DNS

try:
addrinfos = socket.getaddrinfo(hostname, port, proto=socket.IPPROTO_TCP)
except socket.gaierror as exc:
raise SSRFError(f'SSRF protection: cannot resolve hostname {hostname!r}: {exc}') from exc

if not addrinfos:
raise SSRFError(f'SSRF protection: hostname {hostname!r} resolved to no addresses.')

resolved_ips: List[str] = []
for family, _type, _proto, _canonname, sockaddr in addrinfos:
ip_str = sockaddr[0]
addr = ipaddress.ip_address(ip_str)
_check_ip(addr, hostname, allow_nets)
if ip_str not in resolved_ips:
resolved_ips.append(ip_str)
Comment on lines +246 to +251
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🧹 Nitpick | 🔵 Trivial

Rename unused loop variable family to _family.

Static analysis correctly flags that family is unpacked but not used. Prefix with underscore to indicate intentional discard.

♻️ Proposed fix
-    for family, _type, _proto, _canonname, sockaddr in addrinfos:
+    for _family, _type, _proto, _canonname, sockaddr in addrinfos:
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
for family, _type, _proto, _canonname, sockaddr in addrinfos:
ip_str = sockaddr[0]
addr = ipaddress.ip_address(ip_str)
_check_ip(addr, hostname, allow_nets)
if ip_str not in resolved_ips:
resolved_ips.append(ip_str)
for _family, _type, _proto, _canonname, sockaddr in addrinfos:
ip_str = sockaddr[0]
addr = ipaddress.ip_address(ip_str)
_check_ip(addr, hostname, allow_nets)
if ip_str not in resolved_ips:
resolved_ips.append(ip_str)
🧰 Tools
🪛 Ruff (0.15.7)

[warning] 246-246: Loop control variable family not used within loop body

Rename unused family to _family

(B007)

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@nodes/src/nodes/library/ssrf_protection.py` around lines 246 - 251, Rename
the unused loop variable "family" to "_family" in the for-loop that unpacks
addrinfos (for family, _type, _proto, _canonname, sockaddr in addrinfos) inside
ssrf_protection.py so static analysis no longer flags an unused variable; update
the unpacking to (_family, _type, _proto, _canonname, sockaddr) and ensure there
are no other references to "family" in the surrounding function (e.g., _check_ip
usage remains unchanged).


return resolved_ips


def _check_ip(
addr: ipaddress.IPv4Address | ipaddress.IPv6Address,
hostname: str,
allow_nets: list[ipaddress.IPv4Network | ipaddress.IPv6Network],
) -> None:
"""Raise ``SSRFError`` if *addr* falls within a blocked range."""
# For IPv6-mapped IPv4 addresses, also check the embedded v4 address.
check_addrs = [addr]
if isinstance(addr, ipaddress.IPv6Address) and addr.ipv4_mapped:
check_addrs.append(addr.ipv4_mapped)

for check_addr in check_addrs:
if not _is_blocked(check_addr):
continue

# Check if the address is in the allowlist
if any(check_addr in net for net in allow_nets):
continue

raise SSRFError(f'SSRF protection: request to {hostname!r} blocked — resolved IP {check_addr} is in a private/reserved range. If this is intentional, add the IP or CIDR to the ROCKETRIDE_SSRF_ALLOWLIST environment variable or the node-level allowlist.')


def _is_blocked(addr: ipaddress.IPv4Address | ipaddress.IPv6Address) -> bool:
"""Return True if *addr* is in any blocked range."""
if isinstance(addr, ipaddress.IPv4Address):
return any(addr in net for net in _BLOCKED_IPV4)
return any(addr in net for net in _BLOCKED_IPV6)
28 changes: 25 additions & 3 deletions nodes/src/nodes/tool_http_request/IGlobal.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

from __future__ import annotations

import json as _json
import re
from typing import List, Set

Expand Down Expand Up @@ -65,12 +66,14 @@ def beginGlobal(self) -> None:
server_name = str((cfg.get('serverName') or 'http')).strip()

enabled_methods, url_patterns = self._build_guardrails(cfg)
ssrf_allowed_private = self._build_ssrf_allowlist(cfg)

try:
self.driver = HttpDriver(
server_name=server_name,
enabled_methods=enabled_methods,
url_patterns=url_patterns,
ssrf_allowed_private=ssrf_allowed_private,
)
except Exception as e:
warning(str(e))
Expand All @@ -86,10 +89,9 @@ def _build_guardrails(cfg: dict) -> tuple[Set[str], List[re.Pattern]]:

raw_whitelist = cfg.get('urlWhitelist') or []
if not isinstance(raw_whitelist, list):
import json
try:
raw_whitelist = json.loads(str(raw_whitelist))
except (json.JSONDecodeError, TypeError, ValueError):
raw_whitelist = _json.loads(str(raw_whitelist))
except (_json.JSONDecodeError, TypeError, ValueError):
raw_whitelist = []
patterns: List[re.Pattern] = []
for row in raw_whitelist:
Expand All @@ -104,6 +106,26 @@ def _build_guardrails(cfg: dict) -> tuple[Set[str], List[re.Pattern]]:

return enabled, patterns

@staticmethod
def _build_ssrf_allowlist(cfg: dict) -> List[str]:
"""Read the SSRF private-IP allowlist from the node config.

Expects ``cfg['ssrfAllowlist']`` to be a JSON array of strings
(CIDR notation), e.g. ``["192.168.1.0/24", "10.0.0.5/32"]``.
"""
raw = cfg.get('ssrfAllowlist') or []
if not isinstance(raw, list):
try:
raw = _json.loads(str(raw))
except (_json.JSONDecodeError, TypeError, ValueError):
raw = []
result: List[str] = []
for entry in raw:
val = str(entry).strip() if entry else ''
if val:
result.append(val)
return result
Comment on lines +109 to +127
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify ssrfAllowlist field is missing from services.json
rg -n 'ssrfAllowlist' nodes/src/nodes/tool_http_request/services.json || echo "ssrfAllowlist field NOT found in services.json"

Repository: rocketride-org/rocketride-server

Length of output: 122


🏁 Script executed:

# Check how services.json is used in the codebase
rg -n 'services\.json' nodes/src/nodes/tool_http_request/ | head -20

Repository: rocketride-org/rocketride-server

Length of output: 58


🏁 Script executed:

# Look for how cfg parameter is constructed/populated
rg -n 'cfg\[' nodes/src/nodes/tool_http_request/IGlobal.py | head -20

Repository: rocketride-org/rocketride-server

Length of output: 150


🏁 Script executed:

# Check for other optional fields in services.json and how they're handled
rg -n '"default"' nodes/src/nodes/tool_http_request/services.json | head -20

Repository: rocketride-org/rocketride-server

Length of output: 328


🏁 Script executed:

# Search for any test files or examples using ssrfAllowlist
find nodes/src/nodes/tool_http_request -name '*test*' -o -name '*example*' | xargs grep -l 'ssrfAllowlist' 2>/dev/null || echo "No test files found with ssrfAllowlist"

Repository: rocketride-org/rocketride-server

Length of output: 114


🏁 Script executed:

# Look at the overall structure of services.json to understand field patterns
head -60 nodes/src/nodes/tool_http_request/services.json

Repository: rocketride-org/rocketride-server

Length of output: 1515


🏁 Script executed:

# Find where _build_ssrf_allowlist is called and where cfg comes from
rg -n '_build_ssrf_allowlist' nodes/src/nodes/tool_http_request/

Repository: rocketride-org/rocketride-server

Length of output: 290


🏁 Script executed:

# Look for where IGlobal is instantiated and cfg is passed
rg -n 'IGlobal' nodes/src/nodes/tool_http_request/ -A 3

Repository: rocketride-org/rocketride-server

Length of output: 2164


🏁 Script executed:

# Search for config loading/initialization code
rg -n 'cfg.*=' nodes/src/nodes/tool_http_request/IGlobal.py | head -20

Repository: rocketride-org/rocketride-server

Length of output: 242


🏁 Script executed:

# Check if there's documentation about how config is populated
find nodes/src/nodes/tool_http_request -name '*.md' -o -name '*.txt' | xargs grep -l 'config\|cfg' 2>/dev/null || echo "No docs found"

Repository: rocketride-org/rocketride-server

Length of output: 89


🏁 Script executed:

# Look at the __init__ or setup methods to see how cfg is used
rg -n 'def __init__|def setup|def __new__' nodes/src/nodes/tool_http_request/IGlobal.py -A 10

Repository: rocketride-org/rocketride-server

Length of output: 58


🏁 Script executed:

# Find the Config class
rg -n 'class Config' nodes/src/nodes/tool_http_request/

Repository: rocketride-org/rocketride-server

Length of output: 58


🏁 Script executed:

# Find Config imports in IGlobal.py
rg -n 'import.*Config|from.*Config' nodes/src/nodes/tool_http_request/IGlobal.py

Repository: rocketride-org/rocketride-server

Length of output: 114


🏁 Script executed:

# Search in rocketlib for Config class
rg -n 'def getNodeConfig' --type py

Repository: rocketride-org/rocketride-server

Length of output: 548


🏁 Script executed:

# Check if connConfig contains raw/undeclared config values
rg -n 'connConfig' nodes/src/nodes/tool_http_request/ -B 2 -A 2

Repository: rocketride-org/rocketride-server

Length of output: 965


🏁 Script executed:

# Read the Config.getNodeConfig implementation
sed -n '71,150p' packages/ai/src/ai/common/config.py

Repository: rocketride-org/rocketride-server

Length of output: 3338


🏁 Script executed:

# Look for more context around getNodeConfig
sed -n '60,200p' packages/ai/src/ai/common/config.py

Repository: rocketride-org/rocketride-server

Length of output: 5272


🏁 Script executed:

# Look for cfg.get() calls with defaults in other node files, to see if undeclared optional params are common
rg -n 'cfg\.get\(' nodes/src/nodes/ --type py | head -30

Repository: rocketride-org/rocketride-server

Length of output: 3238


🏁 Script executed:

# Check another node's services.json to compare patterns
find nodes/src/nodes -name 'services.json' -type f | head -3 | xargs wc -l

Repository: rocketride-org/rocketride-server

Length of output: 247


🏁 Script executed:

# Compare fields declared in another node vs cfg.get calls
fd 'services.json' nodes/src/nodes/tool_anthropic/ | xargs cat | head -100

Repository: rocketride-org/rocketride-server

Length of output: 194


🏁 Script executed:

# Check tool_python services.json for timeout and allowedModules fields
rg -n 'timeout|allowedModules' nodes/src/nodes/tool_python/services.json

Repository: rocketride-org/rocketride-server

Length of output: 270


🏁 Script executed:

# Check tool_http_request services.json for urlWhitelist
rg -n 'urlWhitelist' nodes/src/nodes/tool_http_request/services.json

Repository: rocketride-org/rocketride-server

Length of output: 357


🏁 Script executed:

# Look at the complete tool_http_request services.json to see all declared fields
cat nodes/src/nodes/tool_http_request/services.json | jq '.fields | keys' 2>/dev/null || rg -n '".*".*:' nodes/src/nodes/tool_http_request/services.json | grep -E '^\s*"[^"]+":' | head -30

Repository: rocketride-org/rocketride-server

Length of output: 373


Add http_request.ssrfAllowlist field declaration to services.json.

The _build_ssrf_allowlist method reads cfg.get('ssrfAllowlist') (line 116), but this field is not declared in the node's services.json. Without this declaration, the configuration system will not expose the field to users via the UI, making the SSRF allowlist feature undiscoverable and non-functional. All other optional parameters in this node (urlWhitelist, serverName) are properly declared in services.json; ssrfAllowlist must be too.

Add to services.json fields section:
"http_request.ssrfAllowlist": {
    "type": "string",
    "title": "SSRF Allowlist",
    "description": "JSON array of CIDR strings to permit (e.g. [\"192.168.1.0/24\"])",
    "default": "[]"
}
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@nodes/src/nodes/tool_http_request/IGlobal.py` around lines 109 - 127, The
node reads cfg.get('ssrfAllowlist') in the _build_ssrf_allowlist method but that
configuration key isn't declared in services.json, so add a new entry for
"http_request.ssrfAllowlist" to the node's services.json fields section; declare
it as a string (or appropriate type) with a title like "SSRF Allowlist", a
description noting it expects a JSON array of CIDR strings (e.g.
["192.168.1.0/24"]), and a sensible default (e.g. "[]") so the UI exposes and
persists the ssrfAllowlist config consumed by _build_ssrf_allowlist.


def validateConfig(self) -> None:
try:
cfg = Config.getNodeConfig(self.glb.logicalType, self.glb.connConfig)
Expand Down
19 changes: 16 additions & 3 deletions nodes/src/nodes/tool_http_request/http_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,13 @@

import re
import time
from typing import Any, Dict, Optional
from typing import Any, Dict, List, Optional

import requests
from requests.auth import HTTPBasicAuth

from library.ssrf_protection import validate_url

DEFAULT_TIMEOUT_SECONDS = 30
MAX_TIMEOUT_SECONDS = 300

Expand All @@ -51,14 +53,24 @@ def execute_request(
auth: Optional[Dict[str, Any]] = None,
body: Optional[Dict[str, Any]] = None,
timeout: Optional[float] = None,
ssrf_allowed_private: Optional[List[str]] = None,
) -> Dict[str, Any]:
"""Execute an HTTP request and return a structured response.

Raises ``requests.RequestException`` on transport-level failures.
"""
Parameters
----------
ssrf_allowed_private:
Optional list of CIDR strings that should be permitted even though
they fall within normally-blocked private/reserved IP ranges.

Raises ``requests.RequestException`` on transport-level failures and
``SSRFError`` if the URL targets a blocked IP range.
"""
resolved_url = _resolve_path_params(url, path_params)

# --- SSRF protection: validate the resolved URL before connecting ---
validate_url(resolved_url, allowed_private=ssrf_allowed_private)
Comment on lines +71 to +72
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Critical: SSRF protection bypassed via HTTP redirects.

The SSRF validation at line 72 only checks the initial URL. However, requests.request() follows redirects by default (allow_redirects=True). An attacker can bypass SSRF protection:

  1. Submit https://attacker.com/redirect (passes validation - public IP)
  2. Attacker's server returns 302 redirect to http://169.254.169.254/...
  3. requests follows the redirect without re-validation
  4. Cloud metadata or internal service accessed

Disable automatic redirects and manually validate each hop, or use a custom transport adapter.

🔒 Proposed fix: disable automatic redirects
     req_kwargs: Dict[str, Any] = {
         'method': method.upper(),
         'url': resolved_url,
         'headers': req_headers,
         'params': merged_params or None,
         'auth': req_auth,
+        'allow_redirects': False,
     }

Then handle redirects manually in the response, or document that redirects are intentionally not followed. If redirect-following is required, implement a session with a custom redirect hook that validates each Location header before following.

Also applies to: 99-100

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@nodes/src/nodes/tool_http_request/http_client.py` around lines 71 - 72, The
SSRF check only validates the initial resolved_url via
validate_url(resolved_url, allowed_private=ssrf_allowed_private) but
requests.request follows redirects by default, allowing redirect-based SSRF; fix
by disabling automatic redirects in the HTTP call (set allow_redirects=False on
requests.request used in this module) and implement manual redirect handling: on
3xx responses read the Location header, resolve the next URL, call
validate_url(next_resolved_url, allowed_private=ssrf_allowed_private) before
following, enforce a max redirect count, and repeat; alternatively, if you must
keep automatic redirects, implement a requests.Session with a custom redirect
hook that validates each redirected Location using validate_url before allowing
the redirect.


req_headers = dict(headers or {})
req_auth = None
extra_params: Dict[str, str] = {}
Expand Down Expand Up @@ -95,6 +107,7 @@ def execute_request(
# Internal helpers
# ---------------------------------------------------------------------------


def _resolve_path_params(url: str, path_params: Optional[Dict[str, str]]) -> str:
"""Replace ``:name`` placeholders in the URL with values from *path_params*."""
if not path_params:
Expand Down
Loading
Loading