|
| 1 | +# ============================================================================= |
| 2 | +# MIT License |
| 3 | +# Copyright (c) 2026 Aparavi Software AG |
| 4 | +# |
| 5 | +# Permission is hereby granted, free of charge, to any person obtaining a copy |
| 6 | +# of this software and associated documentation files (the "Software"), to deal |
| 7 | +# in the Software without restriction, including without limitation the rights |
| 8 | +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 9 | +# copies of the Software, and to permit persons to whom the Software is |
| 10 | +# furnished to do so, subject to the following conditions: |
| 11 | +# |
| 12 | +# The above copyright notice and this permission notice shall be included in |
| 13 | +# all copies or substantial portions of the Software. |
| 14 | +# |
| 15 | +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 18 | +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 20 | +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 21 | +# SOFTWARE. |
| 22 | +# ============================================================================= |
| 23 | + |
| 24 | +import urllib.parse |
| 25 | +from typing import Any, Dict |
| 26 | + |
| 27 | +from ai.common.database import DatabaseGlobalBase |
| 28 | + |
| 29 | + |
| 30 | +class IGlobal(DatabaseGlobalBase): |
| 31 | + """ClickHouse-specific global state. |
| 32 | +
|
| 33 | + Implements the two abstract methods that carry ClickHouse knowledge: |
| 34 | + how to read connection params from the node config, and how to build a |
| 35 | + clickhouse-sqlalchemy DSN from those params. Everything else (schema |
| 36 | + reflection, type inference, session lifecycle) lives in the base. |
| 37 | +
|
| 38 | + The DSN uses the native TCP interface (``clickhouse+native://``, default |
| 39 | + port 9000) via the ``clickhouse-driver`` backend. ClickHouse has no |
| 40 | + foreign keys; ``clickhouse-sqlalchemy`` reflects an empty FK list and a |
| 41 | + best-effort primary key, so the dialect-agnostic base works unchanged. |
| 42 | + """ |
| 43 | + |
| 44 | + @staticmethod |
| 45 | + def _normalize_field(value: Any, default: str) -> str: |
| 46 | + """Coerce a config value to a stripped string, returning ``default`` when it is None or empty. |
| 47 | +
|
| 48 | + Non-string values are coerced via ``str()`` first, so a stored null or a |
| 49 | + non-string (e.g. a number) can never raise ``AttributeError`` on ``.strip()``. |
| 50 | + """ |
| 51 | + if value is None: |
| 52 | + return default |
| 53 | + text = str(value).strip() |
| 54 | + return text or default |
| 55 | + |
| 56 | + def _connection_params(self, config: Dict[str, Any]) -> Dict[str, str]: |
| 57 | + """Map the node's stored config to a flat ClickHouse connection-params dict.""" |
| 58 | + # Config.getNodeConfig() strips the node namespace prefix before returning; |
| 59 | + # keys are unprefixed here by design (e.g. 'host', not 'clickhouse.host'). |
| 60 | + # 'tls' is a ClickHouse-specific option (not present on the MySQL/PostgreSQL |
| 61 | + # nodes). It is distinct from the field-level "secure": true attribute on the |
| 62 | + # password field — that attribute only marks the value as a masked secret and |
| 63 | + # is shared identically across all three database nodes. |
| 64 | + tls = config.get('tls', False) |
| 65 | + if isinstance(tls, str): |
| 66 | + # Config values may arrive as strings ('true'/'false'); 'false' must |
| 67 | + # not be truthy, so don't use bool() directly. |
| 68 | + tls = tls.strip().lower() in {'1', 'true', 'yes', 'on'} |
| 69 | + return { |
| 70 | + 'host': self._normalize_field(config.get('host'), 'localhost'), |
| 71 | + 'user': self._normalize_field(config.get('user'), 'default'), |
| 72 | + 'password': config.get('password') or '', # Do not strip — whitespace is valid in passwords |
| 73 | + 'database': self._normalize_field(config.get('database'), 'default'), |
| 74 | + 'table': self._normalize_field(config.get('table'), 'table'), |
| 75 | + # Normalised to a flag string so the params dict stays Dict[str, str]; |
| 76 | + # consumed by _build_connection_url below. |
| 77 | + 'tls': 'true' if tls else '', |
| 78 | + } |
| 79 | + |
| 80 | + def _build_connection_url(self, params: Dict[str, str]) -> str: |
| 81 | + """Build a clickhouse-sqlalchemy native-TCP DSN, enabling TLS when requested.""" |
| 82 | + # URL-encode user / password / database so reserved characters |
| 83 | + # (e.g. @, /, #, :) can't break the SQLAlchemy connection string. |
| 84 | + user = urllib.parse.quote_plus(params['user']) |
| 85 | + password = urllib.parse.quote_plus(params['password']) |
| 86 | + database = urllib.parse.quote_plus(params['database']) |
| 87 | + |
| 88 | + host = params['host'] |
| 89 | + if params.get('tls'): |
| 90 | + # TLS is required by managed services such as ClickHouse Cloud, whose |
| 91 | + # native-protocol TLS port is 9440. Default to it when the user did |
| 92 | + # not pin an explicit port, so a bare cloud hostname just works. |
| 93 | + # Port detection is bracket-aware: a bracketed IPv6 literal (e.g. |
| 94 | + # [::1]) only carries a port when a ':' follows the closing ']'. |
| 95 | + if host.startswith('['): |
| 96 | + has_port = ']' in host and ':' in host.split(']', 1)[1] |
| 97 | + else: |
| 98 | + has_port = ':' in host |
| 99 | + if not has_port: |
| 100 | + host = f'{host}:9440' |
| 101 | + # ?secure=true is clickhouse-driver's own wire-level parameter name for |
| 102 | + # enabling TLS; it is unrelated to the node's "tls" config field. |
| 103 | + return f'clickhouse+native://{user}:{password}@{host}/{database}?secure=true' |
| 104 | + |
| 105 | + # Plaintext native (e.g. a local server); defaults to port 9000 when the |
| 106 | + # host carries no explicit port. SQLAlchemy handles host:port correctly. |
| 107 | + return f'clickhouse+native://{user}:{password}@{host}/{database}' |
| 108 | + |
| 109 | + def _max_validation_attempts(self, config: Dict[str, Any]) -> int: |
| 110 | + """Return the EXPLAIN-validation retry count, clamped to the documented 1..20 range.""" |
| 111 | + try: |
| 112 | + value = int(config.get('max_attempts', 5)) |
| 113 | + except (ValueError, TypeError): |
| 114 | + return 5 |
| 115 | + # Clamp to the documented 1..20 range (services.json minimum/maximum) so |
| 116 | + # a value supplied directly (bypassing UI validation) can't request 0, |
| 117 | + # negative, or excessive EXPLAIN-validation retries. |
| 118 | + return max(1, min(20, value)) |
| 119 | + |
| 120 | + def _db_description(self, config: Dict[str, Any]) -> str: |
| 121 | + """Return the user-provided database description, always as a string.""" |
| 122 | + # A stored null (or non-string) must not violate the -> str contract. |
| 123 | + value = config.get('db_description') |
| 124 | + return value if isinstance(value, str) else '' |
0 commit comments