Skip to content
This repository was archived by the owner on May 7, 2026. It is now read-only.
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions ibis-server/app/model/metadata/clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
# Date/Time Types
"date": RustWrenEngineColumnType.DATE,
"datetime": RustWrenEngineColumnType.TIMESTAMP,
"datetime64": RustWrenEngineColumnType.TIMESTAMP,
# String Types
"string": RustWrenEngineColumnType.VARCHAR,
"fixedstring": RustWrenEngineColumnType.CHAR,
Expand Down Expand Up @@ -89,11 +90,12 @@ def get_table_list(self) -> list[Table]:
)

# table exists, and add column to the table
is_nullable = 'nullable(' in row["data_type"].lower()
unique_tables[schema_table].columns.append(
Column(
name=row["column_name"],
type=self._transform_column_type(row["data_type"]),
notNull=False,
notNull=not is_nullable,
description=row["column_comment"],
properties=None,
)
Comment on lines +93 to 101
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Nullability detection is incorrect for Array(Nullable(...)); only top-level Nullable should count.

Sub-string check flags arrays-of-nullable elements as nullable columns. Handle wrapper chain: Nullable(...), LowCardinality(Nullable(...)) → nullable; Array(Nullable(...)) → not nullable at column level.

Apply this minimal change here:

-            is_nullable = 'nullable(' in row["data_type"].lower()
+            is_nullable = self._is_column_nullable(row["data_type"])

Add this helper inside ClickHouseMetadata (see additional snippet below).

Committable suggestion skipped: line range outside the PR's diff.

🤖 Prompt for AI Agents
In ibis-server/app/model/metadata/clickhouse.py around lines 93-101, the current
substring check flags types like Array(Nullable(...)) as nullable; add a helper
method on ClickHouseMetadata (e.g., is_top_level_nullable(data_type: str) ->
bool) that lowercases and trims the type and returns True only when the
outermost wrapper is Nullable(...) or when the outermost wrapper is
LowCardinality(...) whose immediate inner wrapper is Nullable(...); do not treat
Nullable(...) inside Array(...) as top-level nullable. Replace the current
'nullable = "nullable(" in row["data_type"].lower()' logic to call this helper
when computing notNull.

Expand All @@ -119,7 +121,9 @@ def _transform_column_type(self, data_type: str) -> RustWrenEngineColumnType:
The corresponding RustWrenEngineColumnType
"""
# Convert to lowercase for comparison
normalized_type = data_type.lower()
# Extract inner type from wrappers like Nullable(...), Array(...), etc.
inner_type = self._extract_inner_type(data_type)
normalized_type = inner_type.lower()

# Use the module-level mapping table
mapped_type = CLICKHOUSE_TYPE_MAPPING.get(
Expand All @@ -130,3 +134,25 @@ def _transform_column_type(self, data_type: str) -> RustWrenEngineColumnType:
logger.warning(f"Unknown ClickHouse data type: {data_type}")

return mapped_type

def _extract_inner_type(self, data_type: str) -> str:
"""Extract the inner type from ClickHouse type definitions.

This handles types wrapped in Nullable(...), Array(...), etc.

Args:
data_type: The ClickHouse data type string

Returns:
The extracted inner type string
"""

if '(' in data_type and data_type.endswith(')'):
paren_start = data_type.find('(')
type_name = data_type[:paren_start].lower()
inner = data_type[paren_start + 1:-1]

if type_name in ['nullable', 'array', 'lowcardinality']:
return self._extract_inner_type(inner)
else:
return type_name
Comment on lines +138 to +158
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

Bug: _extract_inner_type is defined at module scope and can return None → runtime crash.

  • Defined outside the class but called as self._extract_inner_type(...) → AttributeError.
  • No default return when there are no parentheses → inner_type becomes None and .lower() at Line 126 crashes.

Replace this block with a method inside ClickHouseMetadata and guarantee a string return:

- def _extract_inner_type(self, data_type: str) -> str:
-         """Extract the inner type from ClickHouse type definitions.
- 
-         This handles types wrapped in Nullable(...), Array(...), etc.
- 
-         Args:
-             data_type: The ClickHouse data type string
- 
-         Returns:
-             The extracted inner type string
-         """
- 
-         if '(' in data_type and data_type.endswith(')'):
-             paren_start = data_type.find('(')
-             type_name = data_type[:paren_start].lower()
-             inner = data_type[paren_start + 1:-1]
- 
-             if type_name in ['nullable', 'array', 'lowcardinality']:
-                 return self._extract_inner_type(inner)
-             else:
-                 return type_name
+    def _extract_inner_type(self, data_type: str) -> str:
+        """Extract base type from ClickHouse wrappers like Nullable(...), Array(...), LowCardinality(...)."""
+        s = (data_type or "").strip()
+        while True:
+            if "(" in s and s.endswith(")"):
+                paren_start = s.find("(")
+                type_name = s[:paren_start].strip().lower()
+                inner = s[paren_start + 1 : -1].strip()
+                if type_name in ("nullable", "array", "lowcardinality"):
+                    s = inner
+                    continue
+                # Non-wrapper with params, e.g., Decimal(10,2) or FixedString(16)
+                return type_name
+            # No params/wrappers; return as-is (caller lowercases)
+            return s

Additionally, add this helper inside the same class to correctly compute column nullability:

def _is_column_nullable(self, data_type: str) -> bool:
    s = (data_type or "").strip()
    while True:
        s_l = s.lower()
        if s_l.startswith("nullable(") and s.endswith(")"):
            return True
        if s_l.startswith("lowcardinality(") and s.endswith(")"):
            # unwrap and continue; LowCardinality(Nullable(T)) => nullable
            s = s[s.find("(") + 1 : -1].strip()
            continue
        if s_l.startswith("array(") and s.endswith(")"):
            # Array(Nullable(T)) does not make the column nullable
            return False
        return False
🤖 Prompt for AI Agents
In ibis-server/app/model/metadata/clickhouse.py around lines 138 to 158, move
_extract_inner_type into the ClickHouseMetadata class (so it can be called as
self._extract_inner_type) and make it always return a string: when the input
contains parentheses unwrap permitted wrappers (nullable, array, lowcardinality)
recursively and return the inner type name lowercased; when there are no
parentheses simply return data_type.strip().lower() (never None). Also add the
_is_column_nullable helper as an instance method in the same class exactly as
described in the comment to correctly determine nullability (handle Nullable,
LowCardinality wrapping and Array semantics), and use these methods where inner
type and nullability are computed.

Loading