From 4186f3aee0fe6642b2587466317b63e5b3a1d7c2 Mon Sep 17 00:00:00 2001 From: Mithilesh Gaurihar Date: Mon, 1 Jun 2026 23:24:06 -0700 Subject: [PATCH 1/4] feat(node): add ClickHouse database node (db_postgres clone) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a dedicated ClickHouse node, structured as a thin dialect clone of the existing db_mysql / db_postgres nodes — connection params + DSN builder are the only ClickHouse-specific code; schema reflection, NL->SQL, EXPLAIN validation, SELECT-only safety, and insertion are inherited unchanged from ai.common.database. Dual role (classType ["database","tool"]): - pipeline node: questions -> SQL -> execute; answers/table -> insert - agent tool: clickhouse.get_data / get_schema / get_sql Driver: clickhouse-sqlalchemy native TCP (clickhouse-driver), port 9000. ClickHouse-only extra: a `tls` toggle (distinct from the shared password-field "secure" attribute) that switches the DSN to TLS and assumes the ClickHouse Cloud native port 9440 — verified against a local server and ClickHouse Cloud. Read-only by default; raw SQL gated behind allow_execute, matching MySQL/PostgreSQL. Includes DSN unit tests (nodes/test/db_clickhouse). Fixes #1051 Co-Authored-By: Claude Opus 4.8 (1M context) --- nodes/src/nodes/db_clickhouse/IGlobal.py | 94 +++++++++ nodes/src/nodes/db_clickhouse/IInstance.py | 22 ++ nodes/src/nodes/db_clickhouse/README.md | 75 +++++++ nodes/src/nodes/db_clickhouse/__init__.py | 37 ++++ nodes/src/nodes/db_clickhouse/clickhouse.svg | 11 + .../src/nodes/db_clickhouse/requirements.txt | 2 + nodes/src/nodes/db_clickhouse/services.json | 193 ++++++++++++++++++ nodes/test/db_clickhouse/__init__.py | 0 .../test/db_clickhouse/test_clickhouse_dsn.py | 170 +++++++++++++++ 9 files changed, 604 insertions(+) create mode 100644 nodes/src/nodes/db_clickhouse/IGlobal.py create mode 100644 nodes/src/nodes/db_clickhouse/IInstance.py create mode 100644 nodes/src/nodes/db_clickhouse/README.md create mode 100644 nodes/src/nodes/db_clickhouse/__init__.py create mode 100644 nodes/src/nodes/db_clickhouse/clickhouse.svg create mode 100644 nodes/src/nodes/db_clickhouse/requirements.txt create mode 100644 nodes/src/nodes/db_clickhouse/services.json create mode 100644 nodes/test/db_clickhouse/__init__.py create mode 100644 nodes/test/db_clickhouse/test_clickhouse_dsn.py diff --git a/nodes/src/nodes/db_clickhouse/IGlobal.py b/nodes/src/nodes/db_clickhouse/IGlobal.py new file mode 100644 index 000000000..0bd2bd049 --- /dev/null +++ b/nodes/src/nodes/db_clickhouse/IGlobal.py @@ -0,0 +1,94 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2026 Aparavi Software AG +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================= + +import urllib.parse +from typing import Any, Dict + +from ai.common.database import DatabaseGlobalBase + + +class IGlobal(DatabaseGlobalBase): + """ClickHouse-specific global state. + + Implements the two abstract methods that carry ClickHouse knowledge: + how to read connection params from the node config, and how to build a + clickhouse-sqlalchemy DSN from those params. Everything else (schema + reflection, type inference, session lifecycle) lives in the base. + + The DSN uses the native TCP interface (``clickhouse+native://``, default + port 9000) via the ``clickhouse-driver`` backend. ClickHouse has no + foreign keys; ``clickhouse-sqlalchemy`` reflects an empty FK list and a + best-effort primary key, so the dialect-agnostic base works unchanged. + """ + + def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]: + # Config.getNodeConfig() strips the node namespace prefix before returning; + # keys are unprefixed here by design (e.g. 'host', not 'clickhouse.host'). + # 'tls' is a ClickHouse-specific option (not present on the MySQL/PostgreSQL + # nodes). It is distinct from the field-level "secure": true attribute on the + # password field — that attribute only marks the value as a masked secret and + # is shared identically across all three database nodes. + tls = config.get('tls', False) + if isinstance(tls, str): + # Config values may arrive as strings ('true'/'false'); 'false' must + # not be truthy, so don't use bool() directly. + tls = tls.strip().lower() in {'1', 'true', 'yes', 'on'} + return { + 'host': config.get('host', 'localhost').strip(), + 'user': config.get('user', 'default').strip(), + 'password': config.get('password', ''), # Do not strip — whitespace is valid in passwords + 'database': config.get('database', 'default').strip(), + 'table': config.get('table', 'table').strip(), + # Normalised to a flag string so the params dict stays Dict[str, str]; + # consumed by _build_connection_url below. + 'tls': 'true' if tls else '', + } + + def _build_connection_url(self, params: Dict[str, str]) -> str: + # URL-encode the password so special characters (e.g. @, /, #) don't + # break the SQLAlchemy connection string. + password = urllib.parse.quote_plus(params['password']) + + host = params['host'] + if params.get('tls'): + # TLS is required by managed services such as ClickHouse Cloud, whose + # native-protocol TLS port is 9440. Default to it when the user did + # not pin an explicit port, so a bare cloud hostname just works. + if ':' not in host: + host = f'{host}:9440' + # ?secure=true is clickhouse-driver's own wire-level parameter name for + # enabling TLS; it is unrelated to the node's "tls" config field. + return f'clickhouse+native://{params["user"]}:{password}@{host}/{params["database"]}?secure=true' + + # Plaintext native (e.g. a local server); defaults to port 9000 when the + # host carries no explicit port. SQLAlchemy handles host:port correctly. + return f'clickhouse+native://{params["user"]}:{password}@{host}/{params["database"]}' + + def _max_validation_attempts(self, config: Dict[str, Any]) -> int: + try: + return int(config.get('max_attempts', 5)) + except (ValueError, TypeError): + return 5 + + def _db_description(self, config: Dict[str, Any]) -> str: + return config.get('db_description', '') diff --git a/nodes/src/nodes/db_clickhouse/IInstance.py b/nodes/src/nodes/db_clickhouse/IInstance.py new file mode 100644 index 000000000..7a79a2b66 --- /dev/null +++ b/nodes/src/nodes/db_clickhouse/IInstance.py @@ -0,0 +1,22 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2026 Aparavi Software AG +# ============================================================================= + +from ai.common.database import DatabaseInstanceBase +from .IGlobal import IGlobal + + +class IInstance(DatabaseInstanceBase): + """ClickHouse-specific instance. + + All tool methods and lane handlers are inherited from DatabaseInstanceBase. + """ + + IGlobal: IGlobal + + def _db_display_name(self) -> str: + return 'ClickHouse' + + def _db_dialect(self) -> str: + return 'clickhouse' diff --git a/nodes/src/nodes/db_clickhouse/README.md b/nodes/src/nodes/db_clickhouse/README.md new file mode 100644 index 000000000..bc3d2f625 --- /dev/null +++ b/nodes/src/nodes/db_clickhouse/README.md @@ -0,0 +1,75 @@ +--- +title: ClickHouse +date: 2026-06-01 +sidebar_position: 1 +--- + + + ClickHouse - RocketRide Documentation + + +## What it does + +ClickHouse node with two roles: pipeline node (natural-language queries via lanes) and tool node (agents call it directly). Connects over the native TCP protocol (default port 9000) via `clickhouse-driver`. + +## Connections + +| Connection | Required | Description | +| ---------- | -------- | ---------------------------------------------- | +| `llm` | yes | LLM used to generate SQL from natural language | + +## As a pipeline node + +**Lanes:** + +| Lane in | Lane out | Description | +| ----------- | --------- | ----------------------------------------------------- | +| `questions` | `table` | Translate question → SQL → execute, return as table | +| `questions` | `text` | Translate question → SQL → execute, return as text | +| `questions` | `answers` | Translate question → SQL → execute, return as answers | +| `answers` | — | Parse structured data and insert into table | + +Auto-creates the target table on first insert if it doesn't exist. + +## As a tool + +When connected to an agent, exposes three functions under the configured server name (default: `clickhouse`): + +| Function | Description | +| ----------------------- | ------------------------------------------------------------------------ | +| `clickhouse.get_data` | Natural language → SQL → execute, returns rows (default 250, max 25 000) | +| `clickhouse.get_schema` | Returns tables, columns, types, and primary keys | +| `clickhouse.get_sql` | Natural language → SQL only — no execution | + +Only `SELECT` is permitted for queries. Insert operations use the `answers` lane. + +## Configuration + +| Field | Default | Description | +| ----------------------- | ----------- | ------------------------------------------------------------------------------------ | +| Database Description | — | Plain-language description of the database, used to guide SQL generation | +| Host | `localhost` | ClickHouse server address, optionally `host:port` (native protocol, defaults to 9000) | +| User | `default` | Database username | +| Password | — | Database password (empty for the stock `default` user) | +| Database | `default` | Database name | +| Use TLS | `false` | Connect over TLS. Turn ON for **ClickHouse Cloud** (assumes native TLS port 9440 when the host has no explicit port). ClickHouse-only — not present on the MySQL/PostgreSQL nodes | +| Table | `table` | Target table name | +| Max Validation Attempts | `5` | Retry limit for EXPLAIN-based SQL validation (range 1–20) | +| Allow direct execution | `false` | Permit raw `QuestionType.EXECUTE` SQL without LLM translation or safety checks | + +## SQL validation + +Generated SQL is validated by running `EXPLAIN` against the live database. If validation fails, the error is fed back to the LLM for a corrected query. This repeats up to **Max Validation Attempts** times before the node raises an error. + +## ClickHouse Cloud + +To connect to a ClickHouse Cloud service: + +1. In the Cloud console, open your service → **Connect** and copy the **native** endpoint host (e.g. `abc123.us-east-1.aws.clickhouse.cloud`) and the `default` user password. +2. Configure the node with: **Host** = that hostname (no port needed — TLS port 9440 is assumed), **User** = `default`, **Password** = your service password, **Use TLS** = ON. +3. Make sure your machine's IP is allowed under the service's **IP Access List** (or set it to "Anywhere" for testing). + +## Notes + +- ClickHouse is column-oriented and has no foreign keys; the reflected schema therefore exposes columns and (best-effort) primary keys but no FK relationships. +- The node is **read-only by default**: the natural-language path only ever runs `SELECT`. Direct write/DDL statements require the **Allow direct execution** toggle. diff --git a/nodes/src/nodes/db_clickhouse/__init__.py b/nodes/src/nodes/db_clickhouse/__init__.py new file mode 100644 index 000000000..0166206f4 --- /dev/null +++ b/nodes/src/nodes/db_clickhouse/__init__.py @@ -0,0 +1,37 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2026 Aparavi Software AG +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================= + +# ------------------------------------------------------------------------------ +# Main module +# ------------------------------------------------------------------------------ +import os +from depends import depends # type: ignore + +# Load the requirements +requirements = os.path.dirname(os.path.realpath(__file__)) + '/requirements.txt' +depends(requirements) + +from .IGlobal import IGlobal # noqa: E402 +from .IInstance import IInstance # noqa: E402 + +__all__ = ['IGlobal', 'IInstance'] diff --git a/nodes/src/nodes/db_clickhouse/clickhouse.svg b/nodes/src/nodes/db_clickhouse/clickhouse.svg new file mode 100644 index 000000000..5c44bdfc9 --- /dev/null +++ b/nodes/src/nodes/db_clickhouse/clickhouse.svg @@ -0,0 +1,11 @@ + + + ClickHouse + + + + + + + + diff --git a/nodes/src/nodes/db_clickhouse/requirements.txt b/nodes/src/nodes/db_clickhouse/requirements.txt new file mode 100644 index 000000000..abf8d9c4b --- /dev/null +++ b/nodes/src/nodes/db_clickhouse/requirements.txt @@ -0,0 +1,2 @@ +clickhouse-sqlalchemy==0.3.2 +clickhouse-driver==0.2.9 diff --git a/nodes/src/nodes/db_clickhouse/services.json b/nodes/src/nodes/db_clickhouse/services.json new file mode 100644 index 000000000..f3ef35523 --- /dev/null +++ b/nodes/src/nodes/db_clickhouse/services.json @@ -0,0 +1,193 @@ +{ + // + // Required: + // The displayable name of this node + // + "title": "ClickHouse", + // + // Required: + // The protocol is the endpoint protocol + // + "protocol": "db_clickhouse://", + // + // Required: + // Class type of the node - what it does + // + "classType": ["database", "tool"], + // + // Required: + // Capabilities are flags that change the behavior of the underlying + // engine + // + "capabilities": ["noremote", "invoke"], + // + // Optional: + // Register is either filter, endpoint or ignored if not specified. If the + // type is specified, a factory is registered of that given type + // + "register": "filter", + // + // Optional: + // The node is the actual physical node to instantiate - if + // not specified, the protocol will be used + // + "node": "python", + // + // Optional: + // The path is the executable/script code - it is node dependent + // and is optional for most node + // + "path": "nodes.db_clickhouse", + // + // Required: + // The prefix map when added/removed when converting URLs <=> paths + // + "prefix": "clickhouse", + // + // Optional: + // Description of this driver + // + "description": ["A processing component that takes structured table data and inserts it ", "into a ClickHouse database. Designed for analytical data ingestion, it ", "supports field mapping and basic configuration of connection credentials ", "and target tables. Ideal for storing processed results or structured outputs, ", "this component ensures reliable insertion of records into columnar storage."], + // + // Optional: + // The icon is the icon to display in the UI for this node + // + "icon": "clickhouse.svg", + "documentation": "https://docs.rocketride.org", + // + // Optional: + // Defines the invoke connections that may/must be connected + // which are utilized by this driver + // + "invoke": { + "llm": { + "description": "LLM to use to craft SQL queries from question", + "min": 1 + } + }, + // + // Optional: + // As a pipe component, define what this pipe component takes + // and what it produces + // + "lanes": { + "answers": [], + "questions": ["table", "text", "answers"] + }, + // + // Optional: + // Profile section are configuration options used by the driver + // itself + // + "preconfig": { + // Define the values that will be merged into any profile configuration + // specified, unless the profile is 'absolute' + "default": "default", + // Defines profiles used with the "profile": key + "profiles": { + "default": { + "database": "default" + } + } + }, + // + // Optional: + // Local fields definitions - these define fields only for the + // current service. You may specify them here, or directly + // in the shape + // + "fields": { + "clickhouse.host": { + "type": "string", + "title": "ClickHouse host", + "default": "localhost", + "description": "Host name or IP address of the ClickHouse server, optionally including a native-protocol port (e.g. localhost:9440). Defaults to port 9000 when none is given." + }, + "clickhouse.user": { + "type": "string", + "title": "User", + "default": "default", + "description": "User to connect to the ClickHouse server" + }, + "clickhouse.password": { + "type": "string", + "title": "Password", + "description": "Password to connect to the ClickHouse server", + "secure": true, + "ui": { + "ui:widget": "password" + } + }, + "clickhouse.database": { + "type": "string", + "title": "Database name", + "default": "default", + "minLength": 1, + "maxLength": 63, + "description": "Name of database" + }, + "clickhouse.tls": { + "type": "boolean", + "title": "Use TLS", + "default": false, + "description": "Connect over TLS. Required for managed services such as ClickHouse Cloud (native TLS port 9440 is assumed when the host has no explicit port). Leave OFF for a plaintext local server on port 9000. ClickHouse-specific — MySQL/PostgreSQL nodes do not expose this." + }, + "clickhouse.table": { + "type": "string", + "title": "Table name", + "default": "table", + "description": "Name of table" + }, + "clickhouse.db_description": { + "type": "string", + "title": "Database description", + "default": "", + "description": "What is this database used for? Describe its content and purpose — this helps the LLM generate more accurate queries.", + "ui": { + "ui:widget": "textarea" + } + }, + "clickhouse.max_attempts": { + "type": "integer", + "title": "Max validation attempts", + "default": 5, + "minimum": 1, + "maximum": 20, + "description": "Maximum number of times to re-ask the LLM if EXPLAIN rejects the generated SQL" + }, + "clickhouse.allow_execute": { + "type": "boolean", + "title": "Allow direct query execution", + "default": false, + "description": "Permit QuestionType.EXECUTE callers to run raw SQL without LLM translation or safety checks. Leave OFF unless a trusted application explicitly needs to issue SQL directly." + }, + "clickhouse.default": { + "object": "default", + "properties": ["clickhouse.db_description", "clickhouse.host", "clickhouse.user", "clickhouse.password", "clickhouse.database", "clickhouse.tls", "clickhouse.table", "clickhouse.max_attempts", "clickhouse.allow_execute"] + }, + "clickhouse.profile": { + "hidden": true, + "type": "string", + "default": "default", + "enum": [["default", "Default"]], + "conditional": [ + { + "value": "default", + "properties": ["clickhouse.default"] + } + ] + } + }, + // + // Required: + // Defines the fields (shape) of the service. Either source or target + // may be specified, or both, but at least one is required + // + "shape": [ + { + "section": "Pipe", + "title": "ClickHouse", + "properties": ["clickhouse.profile"] + } + ] +} diff --git a/nodes/test/db_clickhouse/__init__.py b/nodes/test/db_clickhouse/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/nodes/test/db_clickhouse/test_clickhouse_dsn.py b/nodes/test/db_clickhouse/test_clickhouse_dsn.py new file mode 100644 index 000000000..83594d721 --- /dev/null +++ b/nodes/test/db_clickhouse/test_clickhouse_dsn.py @@ -0,0 +1,170 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2026 Aparavi Software AG +# ============================================================================= + +""" +Unit tests for the ClickHouse node's only dialect-specific logic: +``IGlobal._connection_params`` and ``IGlobal._build_connection_url``. + +Everything else (schema reflection, query execution, insertion) is inherited +unchanged from ``ai.common.database`` and is covered by that package's tests +(``packages/ai/tests/ai/common/database/test_db_base.py``). The genuinely new +ClickHouse code is the native-protocol DSN builder and its TLS branch, so that +is what we pin here. + +The node module imports ``from ai.common.database import DatabaseGlobalBase``, +which would pull SQLAlchemy + rocketlib. We stub a trivial base into +``sys.modules`` and load ``IGlobal.py`` directly by file path so the test runs +without the full engine environment (mirroring how test_contracts mocks +engine libs). +""" + +from __future__ import annotations + +import importlib.util +import sys +import types +from pathlib import Path + +import pytest + +NODE_DIR = Path(__file__).resolve().parents[2] / 'src' / 'nodes' / 'db_clickhouse' + + +@pytest.fixture(scope='module') +def IGlobal(): + """Load db_clickhouse/IGlobal.py with a stubbed ai.common.database base.""" + # Stub the ai.common.database package so the node import resolves without + # SQLAlchemy/rocketlib. DatabaseGlobalBase only needs to be a plain base — + # the two methods under test do not touch any base machinery. + ai = types.ModuleType('ai') + ai_common = types.ModuleType('ai.common') + ai_db = types.ModuleType('ai.common.database') + + class _StubBase: # noqa: D401 - trivial stand-in for DatabaseGlobalBase + """Minimal stand-in so the concrete subclass is instantiable.""" + + ai_db.DatabaseGlobalBase = _StubBase + ai.common = ai_common + ai_common.database = ai_db + saved = {k: sys.modules.get(k) for k in ('ai', 'ai.common', 'ai.common.database')} + sys.modules.update({'ai': ai, 'ai.common': ai_common, 'ai.common.database': ai_db}) + + try: + spec = importlib.util.spec_from_file_location('db_clickhouse_iglobal', NODE_DIR / 'IGlobal.py') + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + yield mod.IGlobal + finally: + for k, v in saved.items(): + if v is None: + sys.modules.pop(k, None) + else: + sys.modules[k] = v + + +@pytest.fixture +def g(IGlobal): + """A bare IGlobal instance (no engine/lifecycle needed for these methods).""" + return IGlobal.__new__(IGlobal) + + +# --------------------------------------------------------------------------- +# _connection_params +# --------------------------------------------------------------------------- + + +def test_connection_params_defaults(g): + """Empty config yields ClickHouse-appropriate defaults; tls off.""" + p = g._connection_params({}) + assert p == { + 'host': 'localhost', + 'user': 'default', + 'password': '', + 'database': 'default', + 'table': 'table', + 'tls': '', + } + + +def test_connection_params_strips_but_keeps_password_whitespace(g): + """Host/user/db/table are stripped; password is preserved verbatim.""" + p = g._connection_params({'host': ' h ', 'user': ' u ', 'database': ' db ', 'table': ' t ', 'password': ' pw '}) + assert (p['host'], p['user'], p['database'], p['table']) == ('h', 'u', 'db', 't') + assert p['password'] == ' pw ' + + +@pytest.mark.parametrize( + 'value, expected', + [ + (True, 'true'), + ('true', 'true'), + ('True', 'true'), + ('1', 'true'), + ('yes', 'true'), + ('on', 'true'), + (False, ''), + ('false', ''), + ('0', ''), + ('', ''), + (None, ''), + ], +) +def test_connection_params_tls_parsing(g, value, expected): + """The tls flag accepts booleans and common truthy/falsey strings; 'false' is not truthy.""" + assert g._connection_params({'tls': value})['tls'] == expected + + +# --------------------------------------------------------------------------- +# _build_connection_url +# --------------------------------------------------------------------------- + + +def test_build_url_plaintext_local(g): + """Without tls: plain native DSN, no port forced, no secure param.""" + url = g._build_connection_url(g._connection_params({'host': 'localhost', 'user': 'u', 'password': 'p'})) + assert url == 'clickhouse+native://u:p@localhost/default' + + +def test_build_url_tls_bare_host_defaults_to_9440(g): + """With tls and no explicit port: assume the ClickHouse Cloud native TLS port 9440 and add ?secure=true.""" + url = g._build_connection_url( + g._connection_params({'host': 'cloud.example.com', 'user': 'default', 'password': 'pw', 'tls': True}) + ) + assert url == 'clickhouse+native://default:pw@cloud.example.com:9440/default?secure=true' + + +def test_build_url_tls_keeps_explicit_port(g): + """An explicit port is respected even when tls is on.""" + url = g._build_connection_url( + g._connection_params({'host': 'cloud.example.com:9000', 'password': 'pw', 'tls': 'true'}) + ) + assert url == 'clickhouse+native://default:pw@cloud.example.com:9000/default?secure=true' + + +def test_build_url_url_encodes_password(g): + """Special characters in the password are URL-encoded so the DSN stays valid.""" + url = g._build_connection_url(g._connection_params({'host': 'h', 'user': 'u', 'password': 'p@s/s#1'})) + assert 'p%40s%2Fs%231' in url + assert url == 'clickhouse+native://u:p%40s%2Fs%231@h/default' + + +# --------------------------------------------------------------------------- +# _max_validation_attempts +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + 'cfg, expected', + [ + ({}, 5), + ({'max_attempts': 9}, 9), + ({'max_attempts': '7'}, 7), + ({'max_attempts': 'nope'}, 5), + ({'max_attempts': None}, 5), + ], +) +def test_max_validation_attempts(g, cfg, expected): + """max_attempts is read as int with a safe fallback of 5.""" + assert g._max_validation_attempts(cfg) == expected From 5f3431e0ef23ad73f4be405d471adb658af7307b Mon Sep 17 00:00:00 2001 From: Mithilesh Gaurihar Date: Tue, 2 Jun 2026 10:46:47 -0700 Subject: [PATCH 2/4] fix(db): encode DSN identifiers; harden ClickHouse config; drop unsupported insert lane MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses review feedback on the ClickHouse node PR. DSN safety (db_clickhouse + db_mysql + db_postgres, for parity): - URL-encode user and database (not just password) in _build_connection_url so reserved characters can't break the SQLAlchemy URL. db_clickhouse hardening: - _connection_params: coerce explicit null host/user/database/table to their defaults before .strip() (was AttributeError on a stored null). - _build_connection_url: bracket-aware port detection for IPv6 literals so a TLS host like [::1] correctly gets :9440 and isn't mis-parsed. - _max_validation_attempts: clamp to the documented 1..20 range. Remove the broken / ungated insert path: - Drop the `answers` ingestion lane from the manifest. The inherited auto-create-table helper uses an auto-increment integer PK and no table engine — neither exists in ClickHouse — so the advertised insert/auto-create path could not work, and it also bypassed allow_execute. Removing the lane resolves both the "broken on ClickHouse" and "ungated write" review notes. - Rewrite the node description (query/agent-tool focus, not insert-only) and update the README (remove insert lane + auto-create claims, add an Ingestion section explaining the deliberate exclusion). Tests: add coverage for null coercion, IPv6+TLS, user/database encoding, and max_attempts clamping (32 DSN unit tests pass). Refs #1051 --- nodes/src/nodes/db_clickhouse/IGlobal.py | 37 +++++++++++----- nodes/src/nodes/db_clickhouse/README.md | 13 +++--- nodes/src/nodes/db_clickhouse/services.json | 3 +- nodes/src/nodes/db_mysql/IGlobal.py | 8 ++-- nodes/src/nodes/db_postgres/IGlobal.py | 8 ++-- .../test/db_clickhouse/test_clickhouse_dsn.py | 42 ++++++++++++++++++- 6 files changed, 85 insertions(+), 26 deletions(-) diff --git a/nodes/src/nodes/db_clickhouse/IGlobal.py b/nodes/src/nodes/db_clickhouse/IGlobal.py index 0bd2bd049..9509e0028 100644 --- a/nodes/src/nodes/db_clickhouse/IGlobal.py +++ b/nodes/src/nodes/db_clickhouse/IGlobal.py @@ -53,42 +53,57 @@ def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]: # Config values may arrive as strings ('true'/'false'); 'false' must # not be truthy, so don't use bool() directly. tls = tls.strip().lower() in {'1', 'true', 'yes', 'on'} + # `(config.get(k) or default)` coerces an explicit JSON null (or empty + # string) to the intended default before .strip(), so a stored null + # can't raise AttributeError. return { - 'host': config.get('host', 'localhost').strip(), - 'user': config.get('user', 'default').strip(), - 'password': config.get('password', ''), # Do not strip — whitespace is valid in passwords - 'database': config.get('database', 'default').strip(), - 'table': config.get('table', 'table').strip(), + 'host': (config.get('host') or 'localhost').strip(), + 'user': (config.get('user') or 'default').strip(), + 'password': config.get('password') or '', # Do not strip — whitespace is valid in passwords + 'database': (config.get('database') or 'default').strip(), + 'table': (config.get('table') or 'table').strip(), # Normalised to a flag string so the params dict stays Dict[str, str]; # consumed by _build_connection_url below. 'tls': 'true' if tls else '', } def _build_connection_url(self, params: Dict[str, str]) -> str: - # URL-encode the password so special characters (e.g. @, /, #) don't - # break the SQLAlchemy connection string. + # URL-encode user / password / database so reserved characters + # (e.g. @, /, #, :) can't break the SQLAlchemy connection string. + user = urllib.parse.quote_plus(params['user']) password = urllib.parse.quote_plus(params['password']) + database = urllib.parse.quote_plus(params['database']) host = params['host'] if params.get('tls'): # TLS is required by managed services such as ClickHouse Cloud, whose # native-protocol TLS port is 9440. Default to it when the user did # not pin an explicit port, so a bare cloud hostname just works. - if ':' not in host: + # Port detection is bracket-aware: a bracketed IPv6 literal (e.g. + # [::1]) only carries a port when a ':' follows the closing ']'. + if host.startswith('['): + has_port = ']' in host and ':' in host.split(']', 1)[1] + else: + has_port = ':' in host + if not has_port: host = f'{host}:9440' # ?secure=true is clickhouse-driver's own wire-level parameter name for # enabling TLS; it is unrelated to the node's "tls" config field. - return f'clickhouse+native://{params["user"]}:{password}@{host}/{params["database"]}?secure=true' + return f'clickhouse+native://{user}:{password}@{host}/{database}?secure=true' # Plaintext native (e.g. a local server); defaults to port 9000 when the # host carries no explicit port. SQLAlchemy handles host:port correctly. - return f'clickhouse+native://{params["user"]}:{password}@{host}/{params["database"]}' + return f'clickhouse+native://{user}:{password}@{host}/{database}' def _max_validation_attempts(self, config: Dict[str, Any]) -> int: try: - return int(config.get('max_attempts', 5)) + value = int(config.get('max_attempts', 5)) except (ValueError, TypeError): return 5 + # Clamp to the documented 1..20 range (services.json minimum/maximum) so + # a value supplied directly (bypassing UI validation) can't request 0, + # negative, or excessive EXPLAIN-validation retries. + return max(1, min(20, value)) def _db_description(self, config: Dict[str, Any]) -> str: return config.get('db_description', '') diff --git a/nodes/src/nodes/db_clickhouse/README.md b/nodes/src/nodes/db_clickhouse/README.md index bc3d2f625..b9d9a07fa 100644 --- a/nodes/src/nodes/db_clickhouse/README.md +++ b/nodes/src/nodes/db_clickhouse/README.md @@ -10,7 +10,7 @@ sidebar_position: 1 ## What it does -ClickHouse node with two roles: pipeline node (natural-language queries via lanes) and tool node (agents call it directly). Connects over the native TCP protocol (default port 9000) via `clickhouse-driver`. +ClickHouse node with two roles: pipeline node (natural-language queries via lanes) and tool node (agents call it directly). Connects over the native TCP protocol (default port 9000) via `clickhouse-driver`. This is a **query / read** node — it does not expose a pipeline ingestion (insert) lane (see [Ingestion](#ingestion)). ## Connections @@ -27,9 +27,6 @@ ClickHouse node with two roles: pipeline node (natural-language queries via lane | `questions` | `table` | Translate question → SQL → execute, return as table | | `questions` | `text` | Translate question → SQL → execute, return as text | | `questions` | `answers` | Translate question → SQL → execute, return as answers | -| `answers` | — | Parse structured data and insert into table | - -Auto-creates the target table on first insert if it doesn't exist. ## As a tool @@ -41,7 +38,7 @@ When connected to an agent, exposes three functions under the configured server | `clickhouse.get_schema` | Returns tables, columns, types, and primary keys | | `clickhouse.get_sql` | Natural language → SQL only — no execution | -Only `SELECT` is permitted for queries. Insert operations use the `answers` lane. +Only `SELECT` is permitted for queries. ## Configuration @@ -69,7 +66,11 @@ To connect to a ClickHouse Cloud service: 2. Configure the node with: **Host** = that hostname (no port needed — TLS port 9440 is assumed), **User** = `default`, **Password** = your service password, **Use TLS** = ON. 3. Make sure your machine's IP is allowed under the service's **IP Access List** (or set it to "Anywhere" for testing). +## Ingestion + +Unlike the MySQL/PostgreSQL nodes, this node intentionally does **not** expose a pipeline ingestion (`answers`) lane. The shared auto-create-table helper builds tables with an auto-increment integer primary key and no table engine — neither of which exists in ClickHouse (tables require an explicit engine such as `MergeTree`) — so the inherited insert/auto-create path cannot work here. Create your tables in ClickHouse directly, and use this node for querying. (A ClickHouse-correct ingestion path can be added later as a separate feature.) + ## Notes - ClickHouse is column-oriented and has no foreign keys; the reflected schema therefore exposes columns and (best-effort) primary keys but no FK relationships. -- The node is **read-only by default**: the natural-language path only ever runs `SELECT`. Direct write/DDL statements require the **Allow direct execution** toggle. +- The node is **read-only by default**: the natural-language path only ever runs `SELECT`. Raw SQL (`QuestionType.EXECUTE`) is gated behind the **Allow direct execution** toggle and is intended only for trusted callers. diff --git a/nodes/src/nodes/db_clickhouse/services.json b/nodes/src/nodes/db_clickhouse/services.json index f3ef35523..42c70091c 100644 --- a/nodes/src/nodes/db_clickhouse/services.json +++ b/nodes/src/nodes/db_clickhouse/services.json @@ -47,7 +47,7 @@ // Optional: // Description of this driver // - "description": ["A processing component that takes structured table data and inserts it ", "into a ClickHouse database. Designed for analytical data ingestion, it ", "supports field mapping and basic configuration of connection credentials ", "and target tables. Ideal for storing processed results or structured outputs, ", "this component ensures reliable insertion of records into columnar storage."], + "description": ["A ClickHouse component that answers natural-language questions by translating ", "them into SQL and executing them against the database, returning rows as a table, ", "text, or structured answers. It also exposes an agent tool surface (get_data, ", "get_schema, get_sql) for question-to-SQL querying. Read-only by default; raw SQL ", "execution is available to trusted callers via the allow_execute option."], // // Optional: // The icon is the icon to display in the UI for this node @@ -71,7 +71,6 @@ // and what it produces // "lanes": { - "answers": [], "questions": ["table", "text", "answers"] }, // diff --git a/nodes/src/nodes/db_mysql/IGlobal.py b/nodes/src/nodes/db_mysql/IGlobal.py index ddb99a86e..341c9315f 100644 --- a/nodes/src/nodes/db_mysql/IGlobal.py +++ b/nodes/src/nodes/db_mysql/IGlobal.py @@ -48,10 +48,12 @@ def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]: } def _build_connection_url(self, params: Dict[str, str]) -> str: - # URL-encode the password so special characters (e.g. @, /, #) don't - # break the SQLAlchemy connection string. + # URL-encode user / password / database so reserved characters + # (e.g. @, /, #, :) don't break the SQLAlchemy connection string. + user = urllib.parse.quote_plus(params['user']) password = urllib.parse.quote_plus(params['password']) - return f'mysql+pymysql://{params["user"]}:{password}@{params["host"]}/{params["database"]}' + database = urllib.parse.quote_plus(params['database']) + return f'mysql+pymysql://{user}:{password}@{params["host"]}/{database}' def _max_validation_attempts(self, config: Dict[str, Any]) -> int: try: diff --git a/nodes/src/nodes/db_postgres/IGlobal.py b/nodes/src/nodes/db_postgres/IGlobal.py index 55bef874e..2956a50ae 100644 --- a/nodes/src/nodes/db_postgres/IGlobal.py +++ b/nodes/src/nodes/db_postgres/IGlobal.py @@ -48,12 +48,14 @@ def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]: } def _build_connection_url(self, params: Dict[str, str]) -> str: - # URL-encode the password so special characters (e.g. @, /, #) don't - # break the SQLAlchemy connection string. + # URL-encode user / password / database so reserved characters + # (e.g. @, /, #, :) don't break the SQLAlchemy connection string. # Host may include an explicit port (e.g. localhost:5433); SQLAlchemy # handles host:port in the authority section correctly. + user = urllib.parse.quote_plus(params['user']) password = urllib.parse.quote_plus(params['password']) - return f'postgresql+psycopg2://{params["user"]}:{password}@{params["host"]}/{params["database"]}' + database = urllib.parse.quote_plus(params['database']) + return f'postgresql+psycopg2://{user}:{password}@{params["host"]}/{database}' def _max_validation_attempts(self, config: Dict[str, Any]) -> int: try: diff --git a/nodes/test/db_clickhouse/test_clickhouse_dsn.py b/nodes/test/db_clickhouse/test_clickhouse_dsn.py index 83594d721..aee4fff14 100644 --- a/nodes/test/db_clickhouse/test_clickhouse_dsn.py +++ b/nodes/test/db_clickhouse/test_clickhouse_dsn.py @@ -95,6 +95,19 @@ def test_connection_params_strips_but_keeps_password_whitespace(g): assert p['password'] == ' pw ' +def test_connection_params_coerces_none_to_defaults(g): + """Explicit null values fall back to defaults instead of raising AttributeError.""" + p = g._connection_params({'host': None, 'user': None, 'password': None, 'database': None, 'table': None}) + assert p == { + 'host': 'localhost', + 'user': 'default', + 'password': '', + 'database': 'default', + 'table': 'table', + 'tls': '', + } + + @pytest.mark.parametrize( 'value, expected', [ @@ -150,6 +163,26 @@ def test_build_url_url_encodes_password(g): assert url == 'clickhouse+native://u:p%40s%2Fs%231@h/default' +def test_build_url_url_encodes_user_and_database(g): + """User and database with reserved characters are URL-encoded, not just the password.""" + url = g._build_connection_url( + g._connection_params({'host': 'h', 'user': 'a@b', 'password': 'p', 'database': 'db/1'}) + ) + assert url == 'clickhouse+native://a%40b:p@h/db%2F1' + + +def test_build_url_tls_ipv6_bare_defaults_to_9440(g): + """A bracketed IPv6 literal with no port gets :9440 appended (brackets preserved).""" + url = g._build_connection_url(g._connection_params({'host': '[::1]', 'password': 'pw', 'tls': True})) + assert url == 'clickhouse+native://default:pw@[::1]:9440/default?secure=true' + + +def test_build_url_tls_ipv6_keeps_explicit_port(g): + """A bracketed IPv6 literal that already has a port is left unchanged.""" + url = g._build_connection_url(g._connection_params({'host': '[::1]:9000', 'password': 'pw', 'tls': True})) + assert url == 'clickhouse+native://default:pw@[::1]:9000/default?secure=true' + + # --------------------------------------------------------------------------- # _max_validation_attempts # --------------------------------------------------------------------------- @@ -163,8 +196,15 @@ def test_build_url_url_encodes_password(g): ({'max_attempts': '7'}, 7), ({'max_attempts': 'nope'}, 5), ({'max_attempts': None}, 5), + # Out-of-range values are clamped to the documented 1..20 bounds. + ({'max_attempts': 0}, 1), + ({'max_attempts': -3}, 1), + ({'max_attempts': 1}, 1), + ({'max_attempts': 20}, 20), + ({'max_attempts': 100}, 20), + ({'max_attempts': '25'}, 20), ], ) def test_max_validation_attempts(g, cfg, expected): - """max_attempts is read as int with a safe fallback of 5.""" + """max_attempts is parsed as int, clamped to 1..20, with a safe fallback of 5.""" assert g._max_validation_attempts(cfg) == expected From 7cc4b8c6a50f2c4af10b232331d1490fa06d8b1e Mon Sep 17 00:00:00 2001 From: Mithilesh Gaurihar Date: Tue, 2 Jun 2026 10:54:16 -0700 Subject: [PATCH 3/4] fix(db_clickhouse): coerce _db_description to str; clarify README ingestion note - _db_description: a stored `"db_description": null` returned None, violating the -> str contract. Coerce null / non-string to '' (add regression tests). - README Ingestion: clarify that removing the `answers` lane drops only the ingestion/input lane (pipeline inserts), not the `questions -> answers` output lane used for querying. Refs #1051 --- nodes/src/nodes/db_clickhouse/IGlobal.py | 4 +++- nodes/src/nodes/db_clickhouse/README.md | 2 +- .../test/db_clickhouse/test_clickhouse_dsn.py | 21 +++++++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/nodes/src/nodes/db_clickhouse/IGlobal.py b/nodes/src/nodes/db_clickhouse/IGlobal.py index 9509e0028..661d61a37 100644 --- a/nodes/src/nodes/db_clickhouse/IGlobal.py +++ b/nodes/src/nodes/db_clickhouse/IGlobal.py @@ -106,4 +106,6 @@ def _max_validation_attempts(self, config: Dict[str, Any]) -> int: return max(1, min(20, value)) def _db_description(self, config: Dict[str, Any]) -> str: - return config.get('db_description', '') + # A stored null (or non-string) must not violate the -> str contract. + value = config.get('db_description') + return value if isinstance(value, str) else '' diff --git a/nodes/src/nodes/db_clickhouse/README.md b/nodes/src/nodes/db_clickhouse/README.md index b9d9a07fa..638df4b41 100644 --- a/nodes/src/nodes/db_clickhouse/README.md +++ b/nodes/src/nodes/db_clickhouse/README.md @@ -68,7 +68,7 @@ To connect to a ClickHouse Cloud service: ## Ingestion -Unlike the MySQL/PostgreSQL nodes, this node intentionally does **not** expose a pipeline ingestion (`answers`) lane. The shared auto-create-table helper builds tables with an auto-increment integer primary key and no table engine — neither of which exists in ClickHouse (tables require an explicit engine such as `MergeTree`) — so the inherited insert/auto-create path cannot work here. Create your tables in ClickHouse directly, and use this node for querying. (A ClickHouse-correct ingestion path can be added later as a separate feature.) +Unlike the MySQL/PostgreSQL nodes, this node intentionally does **not** expose the ingestion/input `answers` lane (used for pipeline inserts). This removes only that input lane — **not** the `questions → answers` output lane used for querying, which still works. The shared auto-create-table helper builds tables with an auto-increment integer primary key and no table engine — neither of which exists in ClickHouse (tables require an explicit engine such as `MergeTree`) — so the inherited insert/auto-create path cannot work here. Create your tables in ClickHouse directly, and use this node for querying. (A ClickHouse-correct ingestion path can be added later as a separate feature.) ## Notes diff --git a/nodes/test/db_clickhouse/test_clickhouse_dsn.py b/nodes/test/db_clickhouse/test_clickhouse_dsn.py index aee4fff14..2736fa950 100644 --- a/nodes/test/db_clickhouse/test_clickhouse_dsn.py +++ b/nodes/test/db_clickhouse/test_clickhouse_dsn.py @@ -208,3 +208,24 @@ def test_build_url_tls_ipv6_keeps_explicit_port(g): def test_max_validation_attempts(g, cfg, expected): """max_attempts is parsed as int, clamped to 1..20, with a safe fallback of 5.""" assert g._max_validation_attempts(cfg) == expected + + +# --------------------------------------------------------------------------- +# _db_description +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + 'cfg, expected', + [ + ({}, ''), + ({'db_description': 'sales events'}, 'sales events'), + ({'db_description': None}, ''), + ({'db_description': 123}, ''), + ], +) +def test_db_description_always_returns_str(g, cfg, expected): + """_db_description honors its -> str contract, coercing null/non-string to ''.""" + result = g._db_description(cfg) + assert isinstance(result, str) + assert result == expected From 6f9b0ef1939173063b56618b2036160d8a62a02f Mon Sep 17 00:00:00 2001 From: Mithilesh Gaurihar Date: Tue, 2 Jun 2026 11:05:55 -0700 Subject: [PATCH 4/4] refactor(db_clickhouse): add _normalize_field helper and docstrings - Extract a _normalize_field(value, default) helper used for host/user/ database/table: coerces non-None values via str() before .strip() (so a non-string config value can't raise AttributeError) and falls back to the default when the result is empty/whitespace-only. Password stays uncoerced (whitespace preserved). Adds regression coverage. - Add docstrings to all overridden methods across the three DB nodes (db_clickhouse / db_mysql / db_postgres) to satisfy the docstring-coverage threshold (touched files now 100%). Refs #1051 --- nodes/src/nodes/db_clickhouse/IGlobal.py | 27 ++++++++++++++----- nodes/src/nodes/db_clickhouse/IInstance.py | 2 ++ nodes/src/nodes/db_mysql/IGlobal.py | 4 +++ nodes/src/nodes/db_postgres/IGlobal.py | 4 +++ .../test/db_clickhouse/test_clickhouse_dsn.py | 8 ++++++ 5 files changed, 38 insertions(+), 7 deletions(-) diff --git a/nodes/src/nodes/db_clickhouse/IGlobal.py b/nodes/src/nodes/db_clickhouse/IGlobal.py index 661d61a37..75b3870bf 100644 --- a/nodes/src/nodes/db_clickhouse/IGlobal.py +++ b/nodes/src/nodes/db_clickhouse/IGlobal.py @@ -41,7 +41,20 @@ class IGlobal(DatabaseGlobalBase): best-effort primary key, so the dialect-agnostic base works unchanged. """ + @staticmethod + def _normalize_field(value: Any, default: str) -> str: + """Coerce a config value to a stripped string, returning ``default`` when it is None or empty. + + Non-string values are coerced via ``str()`` first, so a stored null or a + non-string (e.g. a number) can never raise ``AttributeError`` on ``.strip()``. + """ + if value is None: + return default + text = str(value).strip() + return text or default + def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]: + """Map the node's stored config to a flat ClickHouse connection-params dict.""" # Config.getNodeConfig() strips the node namespace prefix before returning; # keys are unprefixed here by design (e.g. 'host', not 'clickhouse.host'). # 'tls' is a ClickHouse-specific option (not present on the MySQL/PostgreSQL @@ -53,21 +66,19 @@ def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]: # Config values may arrive as strings ('true'/'false'); 'false' must # not be truthy, so don't use bool() directly. tls = tls.strip().lower() in {'1', 'true', 'yes', 'on'} - # `(config.get(k) or default)` coerces an explicit JSON null (or empty - # string) to the intended default before .strip(), so a stored null - # can't raise AttributeError. return { - 'host': (config.get('host') or 'localhost').strip(), - 'user': (config.get('user') or 'default').strip(), + 'host': self._normalize_field(config.get('host'), 'localhost'), + 'user': self._normalize_field(config.get('user'), 'default'), 'password': config.get('password') or '', # Do not strip — whitespace is valid in passwords - 'database': (config.get('database') or 'default').strip(), - 'table': (config.get('table') or 'table').strip(), + 'database': self._normalize_field(config.get('database'), 'default'), + 'table': self._normalize_field(config.get('table'), 'table'), # Normalised to a flag string so the params dict stays Dict[str, str]; # consumed by _build_connection_url below. 'tls': 'true' if tls else '', } def _build_connection_url(self, params: Dict[str, str]) -> str: + """Build a clickhouse-sqlalchemy native-TCP DSN, enabling TLS when requested.""" # URL-encode user / password / database so reserved characters # (e.g. @, /, #, :) can't break the SQLAlchemy connection string. user = urllib.parse.quote_plus(params['user']) @@ -96,6 +107,7 @@ def _build_connection_url(self, params: Dict[str, str]) -> str: return f'clickhouse+native://{user}:{password}@{host}/{database}' def _max_validation_attempts(self, config: Dict[str, Any]) -> int: + """Return the EXPLAIN-validation retry count, clamped to the documented 1..20 range.""" try: value = int(config.get('max_attempts', 5)) except (ValueError, TypeError): @@ -106,6 +118,7 @@ def _max_validation_attempts(self, config: Dict[str, Any]) -> int: return max(1, min(20, value)) def _db_description(self, config: Dict[str, Any]) -> str: + """Return the user-provided database description, always as a string.""" # A stored null (or non-string) must not violate the -> str contract. value = config.get('db_description') return value if isinstance(value, str) else '' diff --git a/nodes/src/nodes/db_clickhouse/IInstance.py b/nodes/src/nodes/db_clickhouse/IInstance.py index 7a79a2b66..08d593c48 100644 --- a/nodes/src/nodes/db_clickhouse/IInstance.py +++ b/nodes/src/nodes/db_clickhouse/IInstance.py @@ -16,7 +16,9 @@ class IInstance(DatabaseInstanceBase): IGlobal: IGlobal def _db_display_name(self) -> str: + """Return the human-readable database name used in tool descriptions.""" return 'ClickHouse' def _db_dialect(self) -> str: + """Return the machine-readable dialect identifier surfaced via QuestionType.DIALECT.""" return 'clickhouse' diff --git a/nodes/src/nodes/db_mysql/IGlobal.py b/nodes/src/nodes/db_mysql/IGlobal.py index 341c9315f..eb99e5d1b 100644 --- a/nodes/src/nodes/db_mysql/IGlobal.py +++ b/nodes/src/nodes/db_mysql/IGlobal.py @@ -37,6 +37,7 @@ class IGlobal(DatabaseGlobalBase): """ def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]: + """Map the node's stored config to a flat MySQL connection-params dict.""" # Config.getNodeConfig() strips the node namespace prefix before returning; # keys are unprefixed here by design (e.g. 'host', not 'mysql.host'). return { @@ -48,6 +49,7 @@ def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]: } def _build_connection_url(self, params: Dict[str, str]) -> str: + """Build a pymysql MySQL DSN from the connection params.""" # URL-encode user / password / database so reserved characters # (e.g. @, /, #, :) don't break the SQLAlchemy connection string. user = urllib.parse.quote_plus(params['user']) @@ -56,10 +58,12 @@ def _build_connection_url(self, params: Dict[str, str]) -> str: return f'mysql+pymysql://{user}:{password}@{params["host"]}/{database}' def _max_validation_attempts(self, config: Dict[str, Any]) -> int: + """Return the EXPLAIN-validation retry count from config (default 5).""" try: return int(config.get('max_attempts', 5)) except (ValueError, TypeError): return 5 def _db_description(self, config: Dict[str, Any]) -> str: + """Return the user-provided database description (empty string if unset).""" return config.get('db_description', '') diff --git a/nodes/src/nodes/db_postgres/IGlobal.py b/nodes/src/nodes/db_postgres/IGlobal.py index 2956a50ae..429fa172b 100644 --- a/nodes/src/nodes/db_postgres/IGlobal.py +++ b/nodes/src/nodes/db_postgres/IGlobal.py @@ -37,6 +37,7 @@ class IGlobal(DatabaseGlobalBase): """ def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]: + """Map the node's stored config to a flat PostgreSQL connection-params dict.""" # Config.getNodeConfig() strips the node namespace prefix before returning; # keys are unprefixed here by design (e.g. 'host', not 'postgresdb.host'). return { @@ -48,6 +49,7 @@ def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]: } def _build_connection_url(self, params: Dict[str, str]) -> str: + """Build a psycopg2 PostgreSQL DSN from the connection params.""" # URL-encode user / password / database so reserved characters # (e.g. @, /, #, :) don't break the SQLAlchemy connection string. # Host may include an explicit port (e.g. localhost:5433); SQLAlchemy @@ -58,10 +60,12 @@ def _build_connection_url(self, params: Dict[str, str]) -> str: return f'postgresql+psycopg2://{user}:{password}@{params["host"]}/{database}' def _max_validation_attempts(self, config: Dict[str, Any]) -> int: + """Return the EXPLAIN-validation retry count from config (default 5).""" try: return int(config.get('max_attempts', 5)) except (ValueError, TypeError): return 5 def _db_description(self, config: Dict[str, Any]) -> str: + """Return the user-provided database description (empty string if unset).""" return config.get('db_description', '') diff --git a/nodes/test/db_clickhouse/test_clickhouse_dsn.py b/nodes/test/db_clickhouse/test_clickhouse_dsn.py index 2736fa950..c949e4d66 100644 --- a/nodes/test/db_clickhouse/test_clickhouse_dsn.py +++ b/nodes/test/db_clickhouse/test_clickhouse_dsn.py @@ -108,6 +108,14 @@ def test_connection_params_coerces_none_to_defaults(g): } +def test_connection_params_normalizes_whitespace_and_nonstring(g): + """Whitespace-only values fall back to defaults; non-string values are coerced to str.""" + p = g._connection_params({'host': ' ', 'database': ' analytics ', 'table': 42}) + assert p['host'] == 'localhost' # whitespace-only -> default + assert p['database'] == 'analytics' # stripped + assert p['table'] == '42' # non-string coerced, no AttributeError + + @pytest.mark.parametrize( 'value, expected', [