diff --git a/nodes/src/nodes/db_clickhouse/IGlobal.py b/nodes/src/nodes/db_clickhouse/IGlobal.py new file mode 100644 index 000000000..75b3870bf --- /dev/null +++ b/nodes/src/nodes/db_clickhouse/IGlobal.py @@ -0,0 +1,124 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2026 Aparavi Software AG +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================= + +import urllib.parse +from typing import Any, Dict + +from ai.common.database import DatabaseGlobalBase + + +class IGlobal(DatabaseGlobalBase): + """ClickHouse-specific global state. + + Implements the two abstract methods that carry ClickHouse knowledge: + how to read connection params from the node config, and how to build a + clickhouse-sqlalchemy DSN from those params. Everything else (schema + reflection, type inference, session lifecycle) lives in the base. + + The DSN uses the native TCP interface (``clickhouse+native://``, default + port 9000) via the ``clickhouse-driver`` backend. ClickHouse has no + foreign keys; ``clickhouse-sqlalchemy`` reflects an empty FK list and a + best-effort primary key, so the dialect-agnostic base works unchanged. + """ + + @staticmethod + def _normalize_field(value: Any, default: str) -> str: + """Coerce a config value to a stripped string, returning ``default`` when it is None or empty. + + Non-string values are coerced via ``str()`` first, so a stored null or a + non-string (e.g. a number) can never raise ``AttributeError`` on ``.strip()``. + """ + if value is None: + return default + text = str(value).strip() + return text or default + + def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]: + """Map the node's stored config to a flat ClickHouse connection-params dict.""" + # Config.getNodeConfig() strips the node namespace prefix before returning; + # keys are unprefixed here by design (e.g. 'host', not 'clickhouse.host'). + # 'tls' is a ClickHouse-specific option (not present on the MySQL/PostgreSQL + # nodes). It is distinct from the field-level "secure": true attribute on the + # password field — that attribute only marks the value as a masked secret and + # is shared identically across all three database nodes. + tls = config.get('tls', False) + if isinstance(tls, str): + # Config values may arrive as strings ('true'/'false'); 'false' must + # not be truthy, so don't use bool() directly. + tls = tls.strip().lower() in {'1', 'true', 'yes', 'on'} + return { + 'host': self._normalize_field(config.get('host'), 'localhost'), + 'user': self._normalize_field(config.get('user'), 'default'), + 'password': config.get('password') or '', # Do not strip — whitespace is valid in passwords + 'database': self._normalize_field(config.get('database'), 'default'), + 'table': self._normalize_field(config.get('table'), 'table'), + # Normalised to a flag string so the params dict stays Dict[str, str]; + # consumed by _build_connection_url below. + 'tls': 'true' if tls else '', + } + + def _build_connection_url(self, params: Dict[str, str]) -> str: + """Build a clickhouse-sqlalchemy native-TCP DSN, enabling TLS when requested.""" + # URL-encode user / password / database so reserved characters + # (e.g. @, /, #, :) can't break the SQLAlchemy connection string. + user = urllib.parse.quote_plus(params['user']) + password = urllib.parse.quote_plus(params['password']) + database = urllib.parse.quote_plus(params['database']) + + host = params['host'] + if params.get('tls'): + # TLS is required by managed services such as ClickHouse Cloud, whose + # native-protocol TLS port is 9440. Default to it when the user did + # not pin an explicit port, so a bare cloud hostname just works. + # Port detection is bracket-aware: a bracketed IPv6 literal (e.g. + # [::1]) only carries a port when a ':' follows the closing ']'. + if host.startswith('['): + has_port = ']' in host and ':' in host.split(']', 1)[1] + else: + has_port = ':' in host + if not has_port: + host = f'{host}:9440' + # ?secure=true is clickhouse-driver's own wire-level parameter name for + # enabling TLS; it is unrelated to the node's "tls" config field. + return f'clickhouse+native://{user}:{password}@{host}/{database}?secure=true' + + # Plaintext native (e.g. a local server); defaults to port 9000 when the + # host carries no explicit port. SQLAlchemy handles host:port correctly. + return f'clickhouse+native://{user}:{password}@{host}/{database}' + + def _max_validation_attempts(self, config: Dict[str, Any]) -> int: + """Return the EXPLAIN-validation retry count, clamped to the documented 1..20 range.""" + try: + value = int(config.get('max_attempts', 5)) + except (ValueError, TypeError): + return 5 + # Clamp to the documented 1..20 range (services.json minimum/maximum) so + # a value supplied directly (bypassing UI validation) can't request 0, + # negative, or excessive EXPLAIN-validation retries. + return max(1, min(20, value)) + + def _db_description(self, config: Dict[str, Any]) -> str: + """Return the user-provided database description, always as a string.""" + # A stored null (or non-string) must not violate the -> str contract. + value = config.get('db_description') + return value if isinstance(value, str) else '' diff --git a/nodes/src/nodes/db_clickhouse/IInstance.py b/nodes/src/nodes/db_clickhouse/IInstance.py new file mode 100644 index 000000000..08d593c48 --- /dev/null +++ b/nodes/src/nodes/db_clickhouse/IInstance.py @@ -0,0 +1,24 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2026 Aparavi Software AG +# ============================================================================= + +from ai.common.database import DatabaseInstanceBase +from .IGlobal import IGlobal + + +class IInstance(DatabaseInstanceBase): + """ClickHouse-specific instance. + + All tool methods and lane handlers are inherited from DatabaseInstanceBase. + """ + + IGlobal: IGlobal + + def _db_display_name(self) -> str: + """Return the human-readable database name used in tool descriptions.""" + return 'ClickHouse' + + def _db_dialect(self) -> str: + """Return the machine-readable dialect identifier surfaced via QuestionType.DIALECT.""" + return 'clickhouse' diff --git a/nodes/src/nodes/db_clickhouse/README.md b/nodes/src/nodes/db_clickhouse/README.md new file mode 100644 index 000000000..638df4b41 --- /dev/null +++ b/nodes/src/nodes/db_clickhouse/README.md @@ -0,0 +1,76 @@ +--- +title: ClickHouse +date: 2026-06-01 +sidebar_position: 1 +--- + + + ClickHouse - RocketRide Documentation + + +## What it does + +ClickHouse node with two roles: pipeline node (natural-language queries via lanes) and tool node (agents call it directly). Connects over the native TCP protocol (default port 9000) via `clickhouse-driver`. This is a **query / read** node — it does not expose a pipeline ingestion (insert) lane (see [Ingestion](#ingestion)). + +## Connections + +| Connection | Required | Description | +| ---------- | -------- | ---------------------------------------------- | +| `llm` | yes | LLM used to generate SQL from natural language | + +## As a pipeline node + +**Lanes:** + +| Lane in | Lane out | Description | +| ----------- | --------- | ----------------------------------------------------- | +| `questions` | `table` | Translate question → SQL → execute, return as table | +| `questions` | `text` | Translate question → SQL → execute, return as text | +| `questions` | `answers` | Translate question → SQL → execute, return as answers | + +## As a tool + +When connected to an agent, exposes three functions under the configured server name (default: `clickhouse`): + +| Function | Description | +| ----------------------- | ------------------------------------------------------------------------ | +| `clickhouse.get_data` | Natural language → SQL → execute, returns rows (default 250, max 25 000) | +| `clickhouse.get_schema` | Returns tables, columns, types, and primary keys | +| `clickhouse.get_sql` | Natural language → SQL only — no execution | + +Only `SELECT` is permitted for queries. + +## Configuration + +| Field | Default | Description | +| ----------------------- | ----------- | ------------------------------------------------------------------------------------ | +| Database Description | — | Plain-language description of the database, used to guide SQL generation | +| Host | `localhost` | ClickHouse server address, optionally `host:port` (native protocol, defaults to 9000) | +| User | `default` | Database username | +| Password | — | Database password (empty for the stock `default` user) | +| Database | `default` | Database name | +| Use TLS | `false` | Connect over TLS. Turn ON for **ClickHouse Cloud** (assumes native TLS port 9440 when the host has no explicit port). ClickHouse-only — not present on the MySQL/PostgreSQL nodes | +| Table | `table` | Target table name | +| Max Validation Attempts | `5` | Retry limit for EXPLAIN-based SQL validation (range 1–20) | +| Allow direct execution | `false` | Permit raw `QuestionType.EXECUTE` SQL without LLM translation or safety checks | + +## SQL validation + +Generated SQL is validated by running `EXPLAIN` against the live database. If validation fails, the error is fed back to the LLM for a corrected query. This repeats up to **Max Validation Attempts** times before the node raises an error. + +## ClickHouse Cloud + +To connect to a ClickHouse Cloud service: + +1. In the Cloud console, open your service → **Connect** and copy the **native** endpoint host (e.g. `abc123.us-east-1.aws.clickhouse.cloud`) and the `default` user password. +2. Configure the node with: **Host** = that hostname (no port needed — TLS port 9440 is assumed), **User** = `default`, **Password** = your service password, **Use TLS** = ON. +3. Make sure your machine's IP is allowed under the service's **IP Access List** (or set it to "Anywhere" for testing). + +## Ingestion + +Unlike the MySQL/PostgreSQL nodes, this node intentionally does **not** expose the ingestion/input `answers` lane (used for pipeline inserts). This removes only that input lane — **not** the `questions → answers` output lane used for querying, which still works. The shared auto-create-table helper builds tables with an auto-increment integer primary key and no table engine — neither of which exists in ClickHouse (tables require an explicit engine such as `MergeTree`) — so the inherited insert/auto-create path cannot work here. Create your tables in ClickHouse directly, and use this node for querying. (A ClickHouse-correct ingestion path can be added later as a separate feature.) + +## Notes + +- ClickHouse is column-oriented and has no foreign keys; the reflected schema therefore exposes columns and (best-effort) primary keys but no FK relationships. +- The node is **read-only by default**: the natural-language path only ever runs `SELECT`. Raw SQL (`QuestionType.EXECUTE`) is gated behind the **Allow direct execution** toggle and is intended only for trusted callers. diff --git a/nodes/src/nodes/db_clickhouse/__init__.py b/nodes/src/nodes/db_clickhouse/__init__.py new file mode 100644 index 000000000..0166206f4 --- /dev/null +++ b/nodes/src/nodes/db_clickhouse/__init__.py @@ -0,0 +1,37 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2026 Aparavi Software AG +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# ============================================================================= + +# ------------------------------------------------------------------------------ +# Main module +# ------------------------------------------------------------------------------ +import os +from depends import depends # type: ignore + +# Load the requirements +requirements = os.path.dirname(os.path.realpath(__file__)) + '/requirements.txt' +depends(requirements) + +from .IGlobal import IGlobal # noqa: E402 +from .IInstance import IInstance # noqa: E402 + +__all__ = ['IGlobal', 'IInstance'] diff --git a/nodes/src/nodes/db_clickhouse/clickhouse.svg b/nodes/src/nodes/db_clickhouse/clickhouse.svg new file mode 100644 index 000000000..5c44bdfc9 --- /dev/null +++ b/nodes/src/nodes/db_clickhouse/clickhouse.svg @@ -0,0 +1,11 @@ + + + ClickHouse + + + + + + + + diff --git a/nodes/src/nodes/db_clickhouse/requirements.txt b/nodes/src/nodes/db_clickhouse/requirements.txt new file mode 100644 index 000000000..abf8d9c4b --- /dev/null +++ b/nodes/src/nodes/db_clickhouse/requirements.txt @@ -0,0 +1,2 @@ +clickhouse-sqlalchemy==0.3.2 +clickhouse-driver==0.2.9 diff --git a/nodes/src/nodes/db_clickhouse/services.json b/nodes/src/nodes/db_clickhouse/services.json new file mode 100644 index 000000000..42c70091c --- /dev/null +++ b/nodes/src/nodes/db_clickhouse/services.json @@ -0,0 +1,192 @@ +{ + // + // Required: + // The displayable name of this node + // + "title": "ClickHouse", + // + // Required: + // The protocol is the endpoint protocol + // + "protocol": "db_clickhouse://", + // + // Required: + // Class type of the node - what it does + // + "classType": ["database", "tool"], + // + // Required: + // Capabilities are flags that change the behavior of the underlying + // engine + // + "capabilities": ["noremote", "invoke"], + // + // Optional: + // Register is either filter, endpoint or ignored if not specified. If the + // type is specified, a factory is registered of that given type + // + "register": "filter", + // + // Optional: + // The node is the actual physical node to instantiate - if + // not specified, the protocol will be used + // + "node": "python", + // + // Optional: + // The path is the executable/script code - it is node dependent + // and is optional for most node + // + "path": "nodes.db_clickhouse", + // + // Required: + // The prefix map when added/removed when converting URLs <=> paths + // + "prefix": "clickhouse", + // + // Optional: + // Description of this driver + // + "description": ["A ClickHouse component that answers natural-language questions by translating ", "them into SQL and executing them against the database, returning rows as a table, ", "text, or structured answers. It also exposes an agent tool surface (get_data, ", "get_schema, get_sql) for question-to-SQL querying. Read-only by default; raw SQL ", "execution is available to trusted callers via the allow_execute option."], + // + // Optional: + // The icon is the icon to display in the UI for this node + // + "icon": "clickhouse.svg", + "documentation": "https://docs.rocketride.org", + // + // Optional: + // Defines the invoke connections that may/must be connected + // which are utilized by this driver + // + "invoke": { + "llm": { + "description": "LLM to use to craft SQL queries from question", + "min": 1 + } + }, + // + // Optional: + // As a pipe component, define what this pipe component takes + // and what it produces + // + "lanes": { + "questions": ["table", "text", "answers"] + }, + // + // Optional: + // Profile section are configuration options used by the driver + // itself + // + "preconfig": { + // Define the values that will be merged into any profile configuration + // specified, unless the profile is 'absolute' + "default": "default", + // Defines profiles used with the "profile": key + "profiles": { + "default": { + "database": "default" + } + } + }, + // + // Optional: + // Local fields definitions - these define fields only for the + // current service. You may specify them here, or directly + // in the shape + // + "fields": { + "clickhouse.host": { + "type": "string", + "title": "ClickHouse host", + "default": "localhost", + "description": "Host name or IP address of the ClickHouse server, optionally including a native-protocol port (e.g. localhost:9440). Defaults to port 9000 when none is given." + }, + "clickhouse.user": { + "type": "string", + "title": "User", + "default": "default", + "description": "User to connect to the ClickHouse server" + }, + "clickhouse.password": { + "type": "string", + "title": "Password", + "description": "Password to connect to the ClickHouse server", + "secure": true, + "ui": { + "ui:widget": "password" + } + }, + "clickhouse.database": { + "type": "string", + "title": "Database name", + "default": "default", + "minLength": 1, + "maxLength": 63, + "description": "Name of database" + }, + "clickhouse.tls": { + "type": "boolean", + "title": "Use TLS", + "default": false, + "description": "Connect over TLS. Required for managed services such as ClickHouse Cloud (native TLS port 9440 is assumed when the host has no explicit port). Leave OFF for a plaintext local server on port 9000. ClickHouse-specific — MySQL/PostgreSQL nodes do not expose this." + }, + "clickhouse.table": { + "type": "string", + "title": "Table name", + "default": "table", + "description": "Name of table" + }, + "clickhouse.db_description": { + "type": "string", + "title": "Database description", + "default": "", + "description": "What is this database used for? Describe its content and purpose — this helps the LLM generate more accurate queries.", + "ui": { + "ui:widget": "textarea" + } + }, + "clickhouse.max_attempts": { + "type": "integer", + "title": "Max validation attempts", + "default": 5, + "minimum": 1, + "maximum": 20, + "description": "Maximum number of times to re-ask the LLM if EXPLAIN rejects the generated SQL" + }, + "clickhouse.allow_execute": { + "type": "boolean", + "title": "Allow direct query execution", + "default": false, + "description": "Permit QuestionType.EXECUTE callers to run raw SQL without LLM translation or safety checks. Leave OFF unless a trusted application explicitly needs to issue SQL directly." + }, + "clickhouse.default": { + "object": "default", + "properties": ["clickhouse.db_description", "clickhouse.host", "clickhouse.user", "clickhouse.password", "clickhouse.database", "clickhouse.tls", "clickhouse.table", "clickhouse.max_attempts", "clickhouse.allow_execute"] + }, + "clickhouse.profile": { + "hidden": true, + "type": "string", + "default": "default", + "enum": [["default", "Default"]], + "conditional": [ + { + "value": "default", + "properties": ["clickhouse.default"] + } + ] + } + }, + // + // Required: + // Defines the fields (shape) of the service. Either source or target + // may be specified, or both, but at least one is required + // + "shape": [ + { + "section": "Pipe", + "title": "ClickHouse", + "properties": ["clickhouse.profile"] + } + ] +} diff --git a/nodes/src/nodes/db_mysql/IGlobal.py b/nodes/src/nodes/db_mysql/IGlobal.py index ddb99a86e..eb99e5d1b 100644 --- a/nodes/src/nodes/db_mysql/IGlobal.py +++ b/nodes/src/nodes/db_mysql/IGlobal.py @@ -37,6 +37,7 @@ class IGlobal(DatabaseGlobalBase): """ def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]: + """Map the node's stored config to a flat MySQL connection-params dict.""" # Config.getNodeConfig() strips the node namespace prefix before returning; # keys are unprefixed here by design (e.g. 'host', not 'mysql.host'). return { @@ -48,16 +49,21 @@ def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]: } def _build_connection_url(self, params: Dict[str, str]) -> str: - # URL-encode the password so special characters (e.g. @, /, #) don't - # break the SQLAlchemy connection string. + """Build a pymysql MySQL DSN from the connection params.""" + # URL-encode user / password / database so reserved characters + # (e.g. @, /, #, :) don't break the SQLAlchemy connection string. + user = urllib.parse.quote_plus(params['user']) password = urllib.parse.quote_plus(params['password']) - return f'mysql+pymysql://{params["user"]}:{password}@{params["host"]}/{params["database"]}' + database = urllib.parse.quote_plus(params['database']) + return f'mysql+pymysql://{user}:{password}@{params["host"]}/{database}' def _max_validation_attempts(self, config: Dict[str, Any]) -> int: + """Return the EXPLAIN-validation retry count from config (default 5).""" try: return int(config.get('max_attempts', 5)) except (ValueError, TypeError): return 5 def _db_description(self, config: Dict[str, Any]) -> str: + """Return the user-provided database description (empty string if unset).""" return config.get('db_description', '') diff --git a/nodes/src/nodes/db_postgres/IGlobal.py b/nodes/src/nodes/db_postgres/IGlobal.py index 55bef874e..429fa172b 100644 --- a/nodes/src/nodes/db_postgres/IGlobal.py +++ b/nodes/src/nodes/db_postgres/IGlobal.py @@ -37,6 +37,7 @@ class IGlobal(DatabaseGlobalBase): """ def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]: + """Map the node's stored config to a flat PostgreSQL connection-params dict.""" # Config.getNodeConfig() strips the node namespace prefix before returning; # keys are unprefixed here by design (e.g. 'host', not 'postgresdb.host'). return { @@ -48,18 +49,23 @@ def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]: } def _build_connection_url(self, params: Dict[str, str]) -> str: - # URL-encode the password so special characters (e.g. @, /, #) don't - # break the SQLAlchemy connection string. + """Build a psycopg2 PostgreSQL DSN from the connection params.""" + # URL-encode user / password / database so reserved characters + # (e.g. @, /, #, :) don't break the SQLAlchemy connection string. # Host may include an explicit port (e.g. localhost:5433); SQLAlchemy # handles host:port in the authority section correctly. + user = urllib.parse.quote_plus(params['user']) password = urllib.parse.quote_plus(params['password']) - return f'postgresql+psycopg2://{params["user"]}:{password}@{params["host"]}/{params["database"]}' + database = urllib.parse.quote_plus(params['database']) + return f'postgresql+psycopg2://{user}:{password}@{params["host"]}/{database}' def _max_validation_attempts(self, config: Dict[str, Any]) -> int: + """Return the EXPLAIN-validation retry count from config (default 5).""" try: return int(config.get('max_attempts', 5)) except (ValueError, TypeError): return 5 def _db_description(self, config: Dict[str, Any]) -> str: + """Return the user-provided database description (empty string if unset).""" return config.get('db_description', '') diff --git a/nodes/test/db_clickhouse/__init__.py b/nodes/test/db_clickhouse/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/nodes/test/db_clickhouse/test_clickhouse_dsn.py b/nodes/test/db_clickhouse/test_clickhouse_dsn.py new file mode 100644 index 000000000..c949e4d66 --- /dev/null +++ b/nodes/test/db_clickhouse/test_clickhouse_dsn.py @@ -0,0 +1,239 @@ +# ============================================================================= +# MIT License +# Copyright (c) 2026 Aparavi Software AG +# ============================================================================= + +""" +Unit tests for the ClickHouse node's only dialect-specific logic: +``IGlobal._connection_params`` and ``IGlobal._build_connection_url``. + +Everything else (schema reflection, query execution, insertion) is inherited +unchanged from ``ai.common.database`` and is covered by that package's tests +(``packages/ai/tests/ai/common/database/test_db_base.py``). The genuinely new +ClickHouse code is the native-protocol DSN builder and its TLS branch, so that +is what we pin here. + +The node module imports ``from ai.common.database import DatabaseGlobalBase``, +which would pull SQLAlchemy + rocketlib. We stub a trivial base into +``sys.modules`` and load ``IGlobal.py`` directly by file path so the test runs +without the full engine environment (mirroring how test_contracts mocks +engine libs). +""" + +from __future__ import annotations + +import importlib.util +import sys +import types +from pathlib import Path + +import pytest + +NODE_DIR = Path(__file__).resolve().parents[2] / 'src' / 'nodes' / 'db_clickhouse' + + +@pytest.fixture(scope='module') +def IGlobal(): + """Load db_clickhouse/IGlobal.py with a stubbed ai.common.database base.""" + # Stub the ai.common.database package so the node import resolves without + # SQLAlchemy/rocketlib. DatabaseGlobalBase only needs to be a plain base — + # the two methods under test do not touch any base machinery. + ai = types.ModuleType('ai') + ai_common = types.ModuleType('ai.common') + ai_db = types.ModuleType('ai.common.database') + + class _StubBase: # noqa: D401 - trivial stand-in for DatabaseGlobalBase + """Minimal stand-in so the concrete subclass is instantiable.""" + + ai_db.DatabaseGlobalBase = _StubBase + ai.common = ai_common + ai_common.database = ai_db + saved = {k: sys.modules.get(k) for k in ('ai', 'ai.common', 'ai.common.database')} + sys.modules.update({'ai': ai, 'ai.common': ai_common, 'ai.common.database': ai_db}) + + try: + spec = importlib.util.spec_from_file_location('db_clickhouse_iglobal', NODE_DIR / 'IGlobal.py') + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + yield mod.IGlobal + finally: + for k, v in saved.items(): + if v is None: + sys.modules.pop(k, None) + else: + sys.modules[k] = v + + +@pytest.fixture +def g(IGlobal): + """A bare IGlobal instance (no engine/lifecycle needed for these methods).""" + return IGlobal.__new__(IGlobal) + + +# --------------------------------------------------------------------------- +# _connection_params +# --------------------------------------------------------------------------- + + +def test_connection_params_defaults(g): + """Empty config yields ClickHouse-appropriate defaults; tls off.""" + p = g._connection_params({}) + assert p == { + 'host': 'localhost', + 'user': 'default', + 'password': '', + 'database': 'default', + 'table': 'table', + 'tls': '', + } + + +def test_connection_params_strips_but_keeps_password_whitespace(g): + """Host/user/db/table are stripped; password is preserved verbatim.""" + p = g._connection_params({'host': ' h ', 'user': ' u ', 'database': ' db ', 'table': ' t ', 'password': ' pw '}) + assert (p['host'], p['user'], p['database'], p['table']) == ('h', 'u', 'db', 't') + assert p['password'] == ' pw ' + + +def test_connection_params_coerces_none_to_defaults(g): + """Explicit null values fall back to defaults instead of raising AttributeError.""" + p = g._connection_params({'host': None, 'user': None, 'password': None, 'database': None, 'table': None}) + assert p == { + 'host': 'localhost', + 'user': 'default', + 'password': '', + 'database': 'default', + 'table': 'table', + 'tls': '', + } + + +def test_connection_params_normalizes_whitespace_and_nonstring(g): + """Whitespace-only values fall back to defaults; non-string values are coerced to str.""" + p = g._connection_params({'host': ' ', 'database': ' analytics ', 'table': 42}) + assert p['host'] == 'localhost' # whitespace-only -> default + assert p['database'] == 'analytics' # stripped + assert p['table'] == '42' # non-string coerced, no AttributeError + + +@pytest.mark.parametrize( + 'value, expected', + [ + (True, 'true'), + ('true', 'true'), + ('True', 'true'), + ('1', 'true'), + ('yes', 'true'), + ('on', 'true'), + (False, ''), + ('false', ''), + ('0', ''), + ('', ''), + (None, ''), + ], +) +def test_connection_params_tls_parsing(g, value, expected): + """The tls flag accepts booleans and common truthy/falsey strings; 'false' is not truthy.""" + assert g._connection_params({'tls': value})['tls'] == expected + + +# --------------------------------------------------------------------------- +# _build_connection_url +# --------------------------------------------------------------------------- + + +def test_build_url_plaintext_local(g): + """Without tls: plain native DSN, no port forced, no secure param.""" + url = g._build_connection_url(g._connection_params({'host': 'localhost', 'user': 'u', 'password': 'p'})) + assert url == 'clickhouse+native://u:p@localhost/default' + + +def test_build_url_tls_bare_host_defaults_to_9440(g): + """With tls and no explicit port: assume the ClickHouse Cloud native TLS port 9440 and add ?secure=true.""" + url = g._build_connection_url( + g._connection_params({'host': 'cloud.example.com', 'user': 'default', 'password': 'pw', 'tls': True}) + ) + assert url == 'clickhouse+native://default:pw@cloud.example.com:9440/default?secure=true' + + +def test_build_url_tls_keeps_explicit_port(g): + """An explicit port is respected even when tls is on.""" + url = g._build_connection_url( + g._connection_params({'host': 'cloud.example.com:9000', 'password': 'pw', 'tls': 'true'}) + ) + assert url == 'clickhouse+native://default:pw@cloud.example.com:9000/default?secure=true' + + +def test_build_url_url_encodes_password(g): + """Special characters in the password are URL-encoded so the DSN stays valid.""" + url = g._build_connection_url(g._connection_params({'host': 'h', 'user': 'u', 'password': 'p@s/s#1'})) + assert 'p%40s%2Fs%231' in url + assert url == 'clickhouse+native://u:p%40s%2Fs%231@h/default' + + +def test_build_url_url_encodes_user_and_database(g): + """User and database with reserved characters are URL-encoded, not just the password.""" + url = g._build_connection_url( + g._connection_params({'host': 'h', 'user': 'a@b', 'password': 'p', 'database': 'db/1'}) + ) + assert url == 'clickhouse+native://a%40b:p@h/db%2F1' + + +def test_build_url_tls_ipv6_bare_defaults_to_9440(g): + """A bracketed IPv6 literal with no port gets :9440 appended (brackets preserved).""" + url = g._build_connection_url(g._connection_params({'host': '[::1]', 'password': 'pw', 'tls': True})) + assert url == 'clickhouse+native://default:pw@[::1]:9440/default?secure=true' + + +def test_build_url_tls_ipv6_keeps_explicit_port(g): + """A bracketed IPv6 literal that already has a port is left unchanged.""" + url = g._build_connection_url(g._connection_params({'host': '[::1]:9000', 'password': 'pw', 'tls': True})) + assert url == 'clickhouse+native://default:pw@[::1]:9000/default?secure=true' + + +# --------------------------------------------------------------------------- +# _max_validation_attempts +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + 'cfg, expected', + [ + ({}, 5), + ({'max_attempts': 9}, 9), + ({'max_attempts': '7'}, 7), + ({'max_attempts': 'nope'}, 5), + ({'max_attempts': None}, 5), + # Out-of-range values are clamped to the documented 1..20 bounds. + ({'max_attempts': 0}, 1), + ({'max_attempts': -3}, 1), + ({'max_attempts': 1}, 1), + ({'max_attempts': 20}, 20), + ({'max_attempts': 100}, 20), + ({'max_attempts': '25'}, 20), + ], +) +def test_max_validation_attempts(g, cfg, expected): + """max_attempts is parsed as int, clamped to 1..20, with a safe fallback of 5.""" + assert g._max_validation_attempts(cfg) == expected + + +# --------------------------------------------------------------------------- +# _db_description +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + 'cfg, expected', + [ + ({}, ''), + ({'db_description': 'sales events'}, 'sales events'), + ({'db_description': None}, ''), + ({'db_description': 123}, ''), + ], +) +def test_db_description_always_returns_str(g, cfg, expected): + """_db_description honors its -> str contract, coercing null/non-string to ''.""" + result = g._db_description(cfg) + assert isinstance(result, str) + assert result == expected