Skip to content

Commit d34d7fc

Browse files
mithileshgauclaude
andauthored
feat(node): add ClickHouse database node (db_postgres clone) (#1064)
* feat(node): add ClickHouse database node (db_postgres clone) Adds a dedicated ClickHouse node, structured as a thin dialect clone of the existing db_mysql / db_postgres nodes — connection params + DSN builder are the only ClickHouse-specific code; schema reflection, NL->SQL, EXPLAIN validation, SELECT-only safety, and insertion are inherited unchanged from ai.common.database. Dual role (classType ["database","tool"]): - pipeline node: questions -> SQL -> execute; answers/table -> insert - agent tool: clickhouse.get_data / get_schema / get_sql Driver: clickhouse-sqlalchemy native TCP (clickhouse-driver), port 9000. ClickHouse-only extra: a `tls` toggle (distinct from the shared password-field "secure" attribute) that switches the DSN to TLS and assumes the ClickHouse Cloud native port 9440 — verified against a local server and ClickHouse Cloud. Fixes #1051 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 30af066 commit d34d7fc

11 files changed

Lines changed: 723 additions & 6 deletions

File tree

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
# =============================================================================
2+
# MIT License
3+
# Copyright (c) 2026 Aparavi Software AG
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be included in
13+
# all copies or substantial portions of the Software.
14+
#
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
# =============================================================================
23+
24+
import urllib.parse
25+
from typing import Any, Dict
26+
27+
from ai.common.database import DatabaseGlobalBase
28+
29+
30+
class IGlobal(DatabaseGlobalBase):
31+
"""ClickHouse-specific global state.
32+
33+
Implements the two abstract methods that carry ClickHouse knowledge:
34+
how to read connection params from the node config, and how to build a
35+
clickhouse-sqlalchemy DSN from those params. Everything else (schema
36+
reflection, type inference, session lifecycle) lives in the base.
37+
38+
The DSN uses the native TCP interface (``clickhouse+native://``, default
39+
port 9000) via the ``clickhouse-driver`` backend. ClickHouse has no
40+
foreign keys; ``clickhouse-sqlalchemy`` reflects an empty FK list and a
41+
best-effort primary key, so the dialect-agnostic base works unchanged.
42+
"""
43+
44+
@staticmethod
45+
def _normalize_field(value: Any, default: str) -> str:
46+
"""Coerce a config value to a stripped string, returning ``default`` when it is None or empty.
47+
48+
Non-string values are coerced via ``str()`` first, so a stored null or a
49+
non-string (e.g. a number) can never raise ``AttributeError`` on ``.strip()``.
50+
"""
51+
if value is None:
52+
return default
53+
text = str(value).strip()
54+
return text or default
55+
56+
def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]:
57+
"""Map the node's stored config to a flat ClickHouse connection-params dict."""
58+
# Config.getNodeConfig() strips the node namespace prefix before returning;
59+
# keys are unprefixed here by design (e.g. 'host', not 'clickhouse.host').
60+
# 'tls' is a ClickHouse-specific option (not present on the MySQL/PostgreSQL
61+
# nodes). It is distinct from the field-level "secure": true attribute on the
62+
# password field — that attribute only marks the value as a masked secret and
63+
# is shared identically across all three database nodes.
64+
tls = config.get('tls', False)
65+
if isinstance(tls, str):
66+
# Config values may arrive as strings ('true'/'false'); 'false' must
67+
# not be truthy, so don't use bool() directly.
68+
tls = tls.strip().lower() in {'1', 'true', 'yes', 'on'}
69+
return {
70+
'host': self._normalize_field(config.get('host'), 'localhost'),
71+
'user': self._normalize_field(config.get('user'), 'default'),
72+
'password': config.get('password') or '', # Do not strip — whitespace is valid in passwords
73+
'database': self._normalize_field(config.get('database'), 'default'),
74+
'table': self._normalize_field(config.get('table'), 'table'),
75+
# Normalised to a flag string so the params dict stays Dict[str, str];
76+
# consumed by _build_connection_url below.
77+
'tls': 'true' if tls else '',
78+
}
79+
80+
def _build_connection_url(self, params: Dict[str, str]) -> str:
81+
"""Build a clickhouse-sqlalchemy native-TCP DSN, enabling TLS when requested."""
82+
# URL-encode user / password / database so reserved characters
83+
# (e.g. @, /, #, :) can't break the SQLAlchemy connection string.
84+
user = urllib.parse.quote_plus(params['user'])
85+
password = urllib.parse.quote_plus(params['password'])
86+
database = urllib.parse.quote_plus(params['database'])
87+
88+
host = params['host']
89+
if params.get('tls'):
90+
# TLS is required by managed services such as ClickHouse Cloud, whose
91+
# native-protocol TLS port is 9440. Default to it when the user did
92+
# not pin an explicit port, so a bare cloud hostname just works.
93+
# Port detection is bracket-aware: a bracketed IPv6 literal (e.g.
94+
# [::1]) only carries a port when a ':' follows the closing ']'.
95+
if host.startswith('['):
96+
has_port = ']' in host and ':' in host.split(']', 1)[1]
97+
else:
98+
has_port = ':' in host
99+
if not has_port:
100+
host = f'{host}:9440'
101+
# ?secure=true is clickhouse-driver's own wire-level parameter name for
102+
# enabling TLS; it is unrelated to the node's "tls" config field.
103+
return f'clickhouse+native://{user}:{password}@{host}/{database}?secure=true'
104+
105+
# Plaintext native (e.g. a local server); defaults to port 9000 when the
106+
# host carries no explicit port. SQLAlchemy handles host:port correctly.
107+
return f'clickhouse+native://{user}:{password}@{host}/{database}'
108+
109+
def _max_validation_attempts(self, config: Dict[str, Any]) -> int:
110+
"""Return the EXPLAIN-validation retry count, clamped to the documented 1..20 range."""
111+
try:
112+
value = int(config.get('max_attempts', 5))
113+
except (ValueError, TypeError):
114+
return 5
115+
# Clamp to the documented 1..20 range (services.json minimum/maximum) so
116+
# a value supplied directly (bypassing UI validation) can't request 0,
117+
# negative, or excessive EXPLAIN-validation retries.
118+
return max(1, min(20, value))
119+
120+
def _db_description(self, config: Dict[str, Any]) -> str:
121+
"""Return the user-provided database description, always as a string."""
122+
# A stored null (or non-string) must not violate the -> str contract.
123+
value = config.get('db_description')
124+
return value if isinstance(value, str) else ''
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# =============================================================================
2+
# MIT License
3+
# Copyright (c) 2026 Aparavi Software AG
4+
# =============================================================================
5+
6+
from ai.common.database import DatabaseInstanceBase
7+
from .IGlobal import IGlobal
8+
9+
10+
class IInstance(DatabaseInstanceBase):
11+
"""ClickHouse-specific instance.
12+
13+
All tool methods and lane handlers are inherited from DatabaseInstanceBase.
14+
"""
15+
16+
IGlobal: IGlobal
17+
18+
def _db_display_name(self) -> str:
19+
"""Return the human-readable database name used in tool descriptions."""
20+
return 'ClickHouse'
21+
22+
def _db_dialect(self) -> str:
23+
"""Return the machine-readable dialect identifier surfaced via QuestionType.DIALECT."""
24+
return 'clickhouse'
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
---
2+
title: ClickHouse
3+
date: 2026-06-01
4+
sidebar_position: 1
5+
---
6+
7+
<head>
8+
<title>ClickHouse - RocketRide Documentation</title>
9+
</head>
10+
11+
## What it does
12+
13+
ClickHouse node with two roles: pipeline node (natural-language queries via lanes) and tool node (agents call it directly). Connects over the native TCP protocol (default port 9000) via `clickhouse-driver`. This is a **query / read** node — it does not expose a pipeline ingestion (insert) lane (see [Ingestion](#ingestion)).
14+
15+
## Connections
16+
17+
| Connection | Required | Description |
18+
| ---------- | -------- | ---------------------------------------------- |
19+
| `llm` | yes | LLM used to generate SQL from natural language |
20+
21+
## As a pipeline node
22+
23+
**Lanes:**
24+
25+
| Lane in | Lane out | Description |
26+
| ----------- | --------- | ----------------------------------------------------- |
27+
| `questions` | `table` | Translate question → SQL → execute, return as table |
28+
| `questions` | `text` | Translate question → SQL → execute, return as text |
29+
| `questions` | `answers` | Translate question → SQL → execute, return as answers |
30+
31+
## As a tool
32+
33+
When connected to an agent, exposes three functions under the configured server name (default: `clickhouse`):
34+
35+
| Function | Description |
36+
| ----------------------- | ------------------------------------------------------------------------ |
37+
| `clickhouse.get_data` | Natural language → SQL → execute, returns rows (default 250, max 25 000) |
38+
| `clickhouse.get_schema` | Returns tables, columns, types, and primary keys |
39+
| `clickhouse.get_sql` | Natural language → SQL only — no execution |
40+
41+
Only `SELECT` is permitted for queries.
42+
43+
## Configuration
44+
45+
| Field | Default | Description |
46+
| ----------------------- | ----------- | ------------------------------------------------------------------------------------ |
47+
| Database Description || Plain-language description of the database, used to guide SQL generation |
48+
| Host | `localhost` | ClickHouse server address, optionally `host:port` (native protocol, defaults to 9000) |
49+
| User | `default` | Database username |
50+
| Password || Database password (empty for the stock `default` user) |
51+
| Database | `default` | Database name |
52+
| Use TLS | `false` | Connect over TLS. Turn ON for **ClickHouse Cloud** (assumes native TLS port 9440 when the host has no explicit port). ClickHouse-only — not present on the MySQL/PostgreSQL nodes |
53+
| Table | `table` | Target table name |
54+
| Max Validation Attempts | `5` | Retry limit for EXPLAIN-based SQL validation (range 1–20) |
55+
| Allow direct execution | `false` | Permit raw `QuestionType.EXECUTE` SQL without LLM translation or safety checks |
56+
57+
## SQL validation
58+
59+
Generated SQL is validated by running `EXPLAIN` against the live database. If validation fails, the error is fed back to the LLM for a corrected query. This repeats up to **Max Validation Attempts** times before the node raises an error.
60+
61+
## ClickHouse Cloud
62+
63+
To connect to a ClickHouse Cloud service:
64+
65+
1. In the Cloud console, open your service → **Connect** and copy the **native** endpoint host (e.g. `abc123.us-east-1.aws.clickhouse.cloud`) and the `default` user password.
66+
2. Configure the node with: **Host** = that hostname (no port needed — TLS port 9440 is assumed), **User** = `default`, **Password** = your service password, **Use TLS** = ON.
67+
3. Make sure your machine's IP is allowed under the service's **IP Access List** (or set it to "Anywhere" for testing).
68+
69+
## Ingestion
70+
71+
Unlike the MySQL/PostgreSQL nodes, this node intentionally does **not** expose the ingestion/input `answers` lane (used for pipeline inserts). This removes only that input lane — **not** the `questions → answers` output lane used for querying, which still works. The shared auto-create-table helper builds tables with an auto-increment integer primary key and no table engine — neither of which exists in ClickHouse (tables require an explicit engine such as `MergeTree`) — so the inherited insert/auto-create path cannot work here. Create your tables in ClickHouse directly, and use this node for querying. (A ClickHouse-correct ingestion path can be added later as a separate feature.)
72+
73+
## Notes
74+
75+
- ClickHouse is column-oriented and has no foreign keys; the reflected schema therefore exposes columns and (best-effort) primary keys but no FK relationships.
76+
- The node is **read-only by default**: the natural-language path only ever runs `SELECT`. Raw SQL (`QuestionType.EXECUTE`) is gated behind the **Allow direct execution** toggle and is intended only for trusted callers.
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# =============================================================================
2+
# MIT License
3+
# Copyright (c) 2026 Aparavi Software AG
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be included in
13+
# all copies or substantial portions of the Software.
14+
#
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
# =============================================================================
23+
24+
# ------------------------------------------------------------------------------
25+
# Main module
26+
# ------------------------------------------------------------------------------
27+
import os
28+
from depends import depends # type: ignore
29+
30+
# Load the requirements
31+
requirements = os.path.dirname(os.path.realpath(__file__)) + '/requirements.txt'
32+
depends(requirements)
33+
34+
from .IGlobal import IGlobal # noqa: E402
35+
from .IInstance import IInstance # noqa: E402
36+
37+
__all__ = ['IGlobal', 'IInstance']
Lines changed: 11 additions & 0 deletions
Loading
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
clickhouse-sqlalchemy==0.3.2
2+
clickhouse-driver==0.2.9

0 commit comments

Comments
 (0)