Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions nodes/src/nodes/db_clickhouse/IGlobal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# =============================================================================
# MIT License
# Copyright (c) 2026 Aparavi Software AG
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# =============================================================================

import urllib.parse
from typing import Any, Dict

from ai.common.database import DatabaseGlobalBase


class IGlobal(DatabaseGlobalBase):
"""ClickHouse-specific global state.

Implements the two abstract methods that carry ClickHouse knowledge:
how to read connection params from the node config, and how to build a
clickhouse-sqlalchemy DSN from those params. Everything else (schema
reflection, type inference, session lifecycle) lives in the base.

The DSN uses the native TCP interface (``clickhouse+native://``, default
port 9000) via the ``clickhouse-driver`` backend. ClickHouse has no
foreign keys; ``clickhouse-sqlalchemy`` reflects an empty FK list and a
best-effort primary key, so the dialect-agnostic base works unchanged.
"""

@staticmethod
def _normalize_field(value: Any, default: str) -> str:
"""Coerce a config value to a stripped string, returning ``default`` when it is None or empty.

Non-string values are coerced via ``str()`` first, so a stored null or a
non-string (e.g. a number) can never raise ``AttributeError`` on ``.strip()``.
"""
if value is None:
return default
text = str(value).strip()
return text or default

def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]:
"""Map the node's stored config to a flat ClickHouse connection-params dict."""
# Config.getNodeConfig() strips the node namespace prefix before returning;
# keys are unprefixed here by design (e.g. 'host', not 'clickhouse.host').
# 'tls' is a ClickHouse-specific option (not present on the MySQL/PostgreSQL
# nodes). It is distinct from the field-level "secure": true attribute on the
# password field — that attribute only marks the value as a masked secret and
# is shared identically across all three database nodes.
tls = config.get('tls', False)
if isinstance(tls, str):
# Config values may arrive as strings ('true'/'false'); 'false' must
# not be truthy, so don't use bool() directly.
tls = tls.strip().lower() in {'1', 'true', 'yes', 'on'}
return {
'host': self._normalize_field(config.get('host'), 'localhost'),
'user': self._normalize_field(config.get('user'), 'default'),
'password': config.get('password') or '', # Do not strip — whitespace is valid in passwords
'database': self._normalize_field(config.get('database'), 'default'),
'table': self._normalize_field(config.get('table'), 'table'),
# Normalised to a flag string so the params dict stays Dict[str, str];
# consumed by _build_connection_url below.
'tls': 'true' if tls else '',
}

def _build_connection_url(self, params: Dict[str, str]) -> str:
"""Build a clickhouse-sqlalchemy native-TCP DSN, enabling TLS when requested."""
# URL-encode user / password / database so reserved characters
# (e.g. @, /, #, :) can't break the SQLAlchemy connection string.
user = urllib.parse.quote_plus(params['user'])
password = urllib.parse.quote_plus(params['password'])
database = urllib.parse.quote_plus(params['database'])

host = params['host']
if params.get('tls'):
# TLS is required by managed services such as ClickHouse Cloud, whose
# native-protocol TLS port is 9440. Default to it when the user did
# not pin an explicit port, so a bare cloud hostname just works.
# Port detection is bracket-aware: a bracketed IPv6 literal (e.g.
# [::1]) only carries a port when a ':' follows the closing ']'.
if host.startswith('['):
has_port = ']' in host and ':' in host.split(']', 1)[1]
else:
has_port = ':' in host
if not has_port:
host = f'{host}:9440'
Comment thread
coderabbitai[bot] marked this conversation as resolved.
# ?secure=true is clickhouse-driver's own wire-level parameter name for
# enabling TLS; it is unrelated to the node's "tls" config field.
return f'clickhouse+native://{user}:{password}@{host}/{database}?secure=true'

# Plaintext native (e.g. a local server); defaults to port 9000 when the
# host carries no explicit port. SQLAlchemy handles host:port correctly.
return f'clickhouse+native://{user}:{password}@{host}/{database}'

def _max_validation_attempts(self, config: Dict[str, Any]) -> int:
"""Return the EXPLAIN-validation retry count, clamped to the documented 1..20 range."""
try:
value = int(config.get('max_attempts', 5))
except (ValueError, TypeError):
return 5
Comment thread
coderabbitai[bot] marked this conversation as resolved.
# Clamp to the documented 1..20 range (services.json minimum/maximum) so
# a value supplied directly (bypassing UI validation) can't request 0,
# negative, or excessive EXPLAIN-validation retries.
return max(1, min(20, value))

def _db_description(self, config: Dict[str, Any]) -> str:
"""Return the user-provided database description, always as a string."""
# A stored null (or non-string) must not violate the -> str contract.
value = config.get('db_description')
return value if isinstance(value, str) else ''
24 changes: 24 additions & 0 deletions nodes/src/nodes/db_clickhouse/IInstance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# =============================================================================
# MIT License
# Copyright (c) 2026 Aparavi Software AG
# =============================================================================

from ai.common.database import DatabaseInstanceBase
from .IGlobal import IGlobal


class IInstance(DatabaseInstanceBase):
"""ClickHouse-specific instance.

All tool methods and lane handlers are inherited from DatabaseInstanceBase.
"""

IGlobal: IGlobal

def _db_display_name(self) -> str:
"""Return the human-readable database name used in tool descriptions."""
return 'ClickHouse'

def _db_dialect(self) -> str:
"""Return the machine-readable dialect identifier surfaced via QuestionType.DIALECT."""
return 'clickhouse'
76 changes: 76 additions & 0 deletions nodes/src/nodes/db_clickhouse/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
---
title: ClickHouse
date: 2026-06-01
sidebar_position: 1
---

<head>
<title>ClickHouse - RocketRide Documentation</title>
</head>

## What it does

ClickHouse node with two roles: pipeline node (natural-language queries via lanes) and tool node (agents call it directly). Connects over the native TCP protocol (default port 9000) via `clickhouse-driver`. This is a **query / read** node — it does not expose a pipeline ingestion (insert) lane (see [Ingestion](#ingestion)).

## Connections

| Connection | Required | Description |
| ---------- | -------- | ---------------------------------------------- |
| `llm` | yes | LLM used to generate SQL from natural language |

## As a pipeline node

**Lanes:**

| Lane in | Lane out | Description |
| ----------- | --------- | ----------------------------------------------------- |
| `questions` | `table` | Translate question → SQL → execute, return as table |
| `questions` | `text` | Translate question → SQL → execute, return as text |
| `questions` | `answers` | Translate question → SQL → execute, return as answers |

## As a tool

When connected to an agent, exposes three functions under the configured server name (default: `clickhouse`):

| Function | Description |
| ----------------------- | ------------------------------------------------------------------------ |
| `clickhouse.get_data` | Natural language → SQL → execute, returns rows (default 250, max 25 000) |
| `clickhouse.get_schema` | Returns tables, columns, types, and primary keys |
| `clickhouse.get_sql` | Natural language → SQL only — no execution |

Only `SELECT` is permitted for queries.

## Configuration

| Field | Default | Description |
| ----------------------- | ----------- | ------------------------------------------------------------------------------------ |
| Database Description | — | Plain-language description of the database, used to guide SQL generation |
| Host | `localhost` | ClickHouse server address, optionally `host:port` (native protocol, defaults to 9000) |
| User | `default` | Database username |
| Password | — | Database password (empty for the stock `default` user) |
| Database | `default` | Database name |
| Use TLS | `false` | Connect over TLS. Turn ON for **ClickHouse Cloud** (assumes native TLS port 9440 when the host has no explicit port). ClickHouse-only — not present on the MySQL/PostgreSQL nodes |
| Table | `table` | Target table name |
| Max Validation Attempts | `5` | Retry limit for EXPLAIN-based SQL validation (range 1–20) |
| Allow direct execution | `false` | Permit raw `QuestionType.EXECUTE` SQL without LLM translation or safety checks |

## SQL validation

Generated SQL is validated by running `EXPLAIN` against the live database. If validation fails, the error is fed back to the LLM for a corrected query. This repeats up to **Max Validation Attempts** times before the node raises an error.

## ClickHouse Cloud

To connect to a ClickHouse Cloud service:

1. In the Cloud console, open your service → **Connect** and copy the **native** endpoint host (e.g. `abc123.us-east-1.aws.clickhouse.cloud`) and the `default` user password.
2. Configure the node with: **Host** = that hostname (no port needed — TLS port 9440 is assumed), **User** = `default`, **Password** = your service password, **Use TLS** = ON.
3. Make sure your machine's IP is allowed under the service's **IP Access List** (or set it to "Anywhere" for testing).

## Ingestion

Unlike the MySQL/PostgreSQL nodes, this node intentionally does **not** expose the ingestion/input `answers` lane (used for pipeline inserts). This removes only that input lane — **not** the `questions → answers` output lane used for querying, which still works. The shared auto-create-table helper builds tables with an auto-increment integer primary key and no table engine — neither of which exists in ClickHouse (tables require an explicit engine such as `MergeTree`) — so the inherited insert/auto-create path cannot work here. Create your tables in ClickHouse directly, and use this node for querying. (A ClickHouse-correct ingestion path can be added later as a separate feature.)

## Notes

- ClickHouse is column-oriented and has no foreign keys; the reflected schema therefore exposes columns and (best-effort) primary keys but no FK relationships.
- The node is **read-only by default**: the natural-language path only ever runs `SELECT`. Raw SQL (`QuestionType.EXECUTE`) is gated behind the **Allow direct execution** toggle and is intended only for trusted callers.
37 changes: 37 additions & 0 deletions nodes/src/nodes/db_clickhouse/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# =============================================================================
# MIT License
# Copyright (c) 2026 Aparavi Software AG
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# =============================================================================

# ------------------------------------------------------------------------------
# Main module
# ------------------------------------------------------------------------------
import os
from depends import depends # type: ignore

# Load the requirements
requirements = os.path.dirname(os.path.realpath(__file__)) + '/requirements.txt'
depends(requirements)

from .IGlobal import IGlobal # noqa: E402
from .IInstance import IInstance # noqa: E402

__all__ = ['IGlobal', 'IInstance']
11 changes: 11 additions & 0 deletions nodes/src/nodes/db_clickhouse/clickhouse.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 2 additions & 0 deletions nodes/src/nodes/db_clickhouse/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
clickhouse-sqlalchemy==0.3.2
clickhouse-driver==0.2.9
Loading
Loading