Skip to content

Commit 4186f3a

Browse files
Mithilesh Gauriharclaude
authored andcommitted
feat(node): add ClickHouse database node (db_postgres clone)
Adds a dedicated ClickHouse node, structured as a thin dialect clone of the existing db_mysql / db_postgres nodes — connection params + DSN builder are the only ClickHouse-specific code; schema reflection, NL->SQL, EXPLAIN validation, SELECT-only safety, and insertion are inherited unchanged from ai.common.database. Dual role (classType ["database","tool"]): - pipeline node: questions -> SQL -> execute; answers/table -> insert - agent tool: clickhouse.get_data / get_schema / get_sql Driver: clickhouse-sqlalchemy native TCP (clickhouse-driver), port 9000. ClickHouse-only extra: a `tls` toggle (distinct from the shared password-field "secure" attribute) that switches the DSN to TLS and assumes the ClickHouse Cloud native port 9440 — verified against a local server and ClickHouse Cloud. Read-only by default; raw SQL gated behind allow_execute, matching MySQL/PostgreSQL. Includes DSN unit tests (nodes/test/db_clickhouse). Fixes #1051 Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent e033856 commit 4186f3a

9 files changed

Lines changed: 604 additions & 0 deletions

File tree

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# =============================================================================
2+
# MIT License
3+
# Copyright (c) 2026 Aparavi Software AG
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be included in
13+
# all copies or substantial portions of the Software.
14+
#
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
# =============================================================================
23+
24+
import urllib.parse
25+
from typing import Any, Dict
26+
27+
from ai.common.database import DatabaseGlobalBase
28+
29+
30+
class IGlobal(DatabaseGlobalBase):
31+
"""ClickHouse-specific global state.
32+
33+
Implements the two abstract methods that carry ClickHouse knowledge:
34+
how to read connection params from the node config, and how to build a
35+
clickhouse-sqlalchemy DSN from those params. Everything else (schema
36+
reflection, type inference, session lifecycle) lives in the base.
37+
38+
The DSN uses the native TCP interface (``clickhouse+native://``, default
39+
port 9000) via the ``clickhouse-driver`` backend. ClickHouse has no
40+
foreign keys; ``clickhouse-sqlalchemy`` reflects an empty FK list and a
41+
best-effort primary key, so the dialect-agnostic base works unchanged.
42+
"""
43+
44+
def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]:
45+
# Config.getNodeConfig() strips the node namespace prefix before returning;
46+
# keys are unprefixed here by design (e.g. 'host', not 'clickhouse.host').
47+
# 'tls' is a ClickHouse-specific option (not present on the MySQL/PostgreSQL
48+
# nodes). It is distinct from the field-level "secure": true attribute on the
49+
# password field — that attribute only marks the value as a masked secret and
50+
# is shared identically across all three database nodes.
51+
tls = config.get('tls', False)
52+
if isinstance(tls, str):
53+
# Config values may arrive as strings ('true'/'false'); 'false' must
54+
# not be truthy, so don't use bool() directly.
55+
tls = tls.strip().lower() in {'1', 'true', 'yes', 'on'}
56+
return {
57+
'host': config.get('host', 'localhost').strip(),
58+
'user': config.get('user', 'default').strip(),
59+
'password': config.get('password', ''), # Do not strip — whitespace is valid in passwords
60+
'database': config.get('database', 'default').strip(),
61+
'table': config.get('table', 'table').strip(),
62+
# Normalised to a flag string so the params dict stays Dict[str, str];
63+
# consumed by _build_connection_url below.
64+
'tls': 'true' if tls else '',
65+
}
66+
67+
def _build_connection_url(self, params: Dict[str, str]) -> str:
68+
# URL-encode the password so special characters (e.g. @, /, #) don't
69+
# break the SQLAlchemy connection string.
70+
password = urllib.parse.quote_plus(params['password'])
71+
72+
host = params['host']
73+
if params.get('tls'):
74+
# TLS is required by managed services such as ClickHouse Cloud, whose
75+
# native-protocol TLS port is 9440. Default to it when the user did
76+
# not pin an explicit port, so a bare cloud hostname just works.
77+
if ':' not in host:
78+
host = f'{host}:9440'
79+
# ?secure=true is clickhouse-driver's own wire-level parameter name for
80+
# enabling TLS; it is unrelated to the node's "tls" config field.
81+
return f'clickhouse+native://{params["user"]}:{password}@{host}/{params["database"]}?secure=true'
82+
83+
# Plaintext native (e.g. a local server); defaults to port 9000 when the
84+
# host carries no explicit port. SQLAlchemy handles host:port correctly.
85+
return f'clickhouse+native://{params["user"]}:{password}@{host}/{params["database"]}'
86+
87+
def _max_validation_attempts(self, config: Dict[str, Any]) -> int:
88+
try:
89+
return int(config.get('max_attempts', 5))
90+
except (ValueError, TypeError):
91+
return 5
92+
93+
def _db_description(self, config: Dict[str, Any]) -> str:
94+
return config.get('db_description', '')
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# =============================================================================
2+
# MIT License
3+
# Copyright (c) 2026 Aparavi Software AG
4+
# =============================================================================
5+
6+
from ai.common.database import DatabaseInstanceBase
7+
from .IGlobal import IGlobal
8+
9+
10+
class IInstance(DatabaseInstanceBase):
11+
"""ClickHouse-specific instance.
12+
13+
All tool methods and lane handlers are inherited from DatabaseInstanceBase.
14+
"""
15+
16+
IGlobal: IGlobal
17+
18+
def _db_display_name(self) -> str:
19+
return 'ClickHouse'
20+
21+
def _db_dialect(self) -> str:
22+
return 'clickhouse'
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
---
2+
title: ClickHouse
3+
date: 2026-06-01
4+
sidebar_position: 1
5+
---
6+
7+
<head>
8+
<title>ClickHouse - RocketRide Documentation</title>
9+
</head>
10+
11+
## What it does
12+
13+
ClickHouse node with two roles: pipeline node (natural-language queries via lanes) and tool node (agents call it directly). Connects over the native TCP protocol (default port 9000) via `clickhouse-driver`.
14+
15+
## Connections
16+
17+
| Connection | Required | Description |
18+
| ---------- | -------- | ---------------------------------------------- |
19+
| `llm` | yes | LLM used to generate SQL from natural language |
20+
21+
## As a pipeline node
22+
23+
**Lanes:**
24+
25+
| Lane in | Lane out | Description |
26+
| ----------- | --------- | ----------------------------------------------------- |
27+
| `questions` | `table` | Translate question → SQL → execute, return as table |
28+
| `questions` | `text` | Translate question → SQL → execute, return as text |
29+
| `questions` | `answers` | Translate question → SQL → execute, return as answers |
30+
| `answers` || Parse structured data and insert into table |
31+
32+
Auto-creates the target table on first insert if it doesn't exist.
33+
34+
## As a tool
35+
36+
When connected to an agent, exposes three functions under the configured server name (default: `clickhouse`):
37+
38+
| Function | Description |
39+
| ----------------------- | ------------------------------------------------------------------------ |
40+
| `clickhouse.get_data` | Natural language → SQL → execute, returns rows (default 250, max 25 000) |
41+
| `clickhouse.get_schema` | Returns tables, columns, types, and primary keys |
42+
| `clickhouse.get_sql` | Natural language → SQL only — no execution |
43+
44+
Only `SELECT` is permitted for queries. Insert operations use the `answers` lane.
45+
46+
## Configuration
47+
48+
| Field | Default | Description |
49+
| ----------------------- | ----------- | ------------------------------------------------------------------------------------ |
50+
| Database Description || Plain-language description of the database, used to guide SQL generation |
51+
| Host | `localhost` | ClickHouse server address, optionally `host:port` (native protocol, defaults to 9000) |
52+
| User | `default` | Database username |
53+
| Password || Database password (empty for the stock `default` user) |
54+
| Database | `default` | Database name |
55+
| Use TLS | `false` | Connect over TLS. Turn ON for **ClickHouse Cloud** (assumes native TLS port 9440 when the host has no explicit port). ClickHouse-only — not present on the MySQL/PostgreSQL nodes |
56+
| Table | `table` | Target table name |
57+
| Max Validation Attempts | `5` | Retry limit for EXPLAIN-based SQL validation (range 1–20) |
58+
| Allow direct execution | `false` | Permit raw `QuestionType.EXECUTE` SQL without LLM translation or safety checks |
59+
60+
## SQL validation
61+
62+
Generated SQL is validated by running `EXPLAIN` against the live database. If validation fails, the error is fed back to the LLM for a corrected query. This repeats up to **Max Validation Attempts** times before the node raises an error.
63+
64+
## ClickHouse Cloud
65+
66+
To connect to a ClickHouse Cloud service:
67+
68+
1. In the Cloud console, open your service → **Connect** and copy the **native** endpoint host (e.g. `abc123.us-east-1.aws.clickhouse.cloud`) and the `default` user password.
69+
2. Configure the node with: **Host** = that hostname (no port needed — TLS port 9440 is assumed), **User** = `default`, **Password** = your service password, **Use TLS** = ON.
70+
3. Make sure your machine's IP is allowed under the service's **IP Access List** (or set it to "Anywhere" for testing).
71+
72+
## Notes
73+
74+
- ClickHouse is column-oriented and has no foreign keys; the reflected schema therefore exposes columns and (best-effort) primary keys but no FK relationships.
75+
- The node is **read-only by default**: the natural-language path only ever runs `SELECT`. Direct write/DDL statements require the **Allow direct execution** toggle.
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# =============================================================================
2+
# MIT License
3+
# Copyright (c) 2026 Aparavi Software AG
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be included in
13+
# all copies or substantial portions of the Software.
14+
#
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
# =============================================================================
23+
24+
# ------------------------------------------------------------------------------
25+
# Main module
26+
# ------------------------------------------------------------------------------
27+
import os
28+
from depends import depends # type: ignore
29+
30+
# Load the requirements
31+
requirements = os.path.dirname(os.path.realpath(__file__)) + '/requirements.txt'
32+
depends(requirements)
33+
34+
from .IGlobal import IGlobal # noqa: E402
35+
from .IInstance import IInstance # noqa: E402
36+
37+
__all__ = ['IGlobal', 'IInstance']
Lines changed: 11 additions & 0 deletions
Loading
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
clickhouse-sqlalchemy==0.3.2
2+
clickhouse-driver==0.2.9

0 commit comments

Comments
 (0)