1313from mindsdb .integrations .libs .response import (
1414 HandlerStatusResponse as StatusResponse ,
1515 HandlerResponse as Response ,
16- RESPONSE_TYPE
16+ RESPONSE_TYPE ,
1717)
1818
1919logger = log .getLogger (__name__ )
@@ -24,15 +24,15 @@ class ClickHouseHandler(MetaDatabaseHandler):
2424 This handler handles connection and execution of the ClickHouse statements.
2525 """
2626
27- name = ' clickhouse'
27+ name = " clickhouse"
2828
2929 def __init__ (self , name , connection_data , ** kwargs ):
3030 super ().__init__ (name )
31- self .dialect = ' clickhouse'
31+ self .dialect = " clickhouse"
3232 self .connection_data = connection_data
3333 self .renderer = SqlalchemyRender (ClickHouseDialect )
3434 self .is_connected = False
35- self .protocol = connection_data .get (' protocol' , ' native' )
35+ self .protocol = connection_data .get (" protocol" , " native" )
3636 self ._has_is_nullable_column = None # Cache for version check
3737
3838 def __del__ (self ):
@@ -52,23 +52,23 @@ def connect(self):
5252 if self .is_connected :
5353 return self .connection
5454
55- protocol = "clickhouse+native" if self .protocol == ' native' else "clickhouse+http"
56- host = quote (self .connection_data [' host' ])
57- port = self .connection_data [' port' ]
58- user = quote (self .connection_data [' user' ])
59- password = quote (self .connection_data [' password' ])
60- database = quote (self .connection_data [' database' ])
61- url = f' { protocol } ://{ user } :{ password } @{ host } :{ port } /{ database } '
55+ protocol = "clickhouse+native" if self .protocol == " native" else "clickhouse+http"
56+ host = quote (self .connection_data [" host" ])
57+ port = self .connection_data [" port" ]
58+ user = quote (self .connection_data [" user" ])
59+ password = quote (self .connection_data [" password" ])
60+ database = quote (self .connection_data [" database" ])
61+ url = f" { protocol } ://{ user } :{ password } @{ host } :{ port } /{ database } "
6262 # This is not redundunt. Check https://clickhouse-sqlalchemy.readthedocs.io/en/latest/connection.html#http
63- if self .protocol == ' https' :
63+ if self .protocol == " https" :
6464 url = url + "?protocol=https"
6565 try :
6666 engine = create_engine (url )
6767 connection = engine .raw_connection ()
6868 self .is_connected = True
6969 self .connection = connection
7070 except SQLAlchemyError as e :
71- logger .error (f' Error connecting to ClickHouse { self .connection_data [" database" ]} , { e } !' )
71+ logger .error (f" Error connecting to ClickHouse { self .connection_data [' database' ]} , { e } !" )
7272 self .is_connected = False
7373 raise
7474
@@ -88,12 +88,12 @@ def check_connection(self) -> StatusResponse:
8888 connection = self .connect ()
8989 cur = connection .cursor ()
9090 try :
91- cur .execute (' select 1;' )
91+ cur .execute (" select 1;" )
9292 finally :
9393 cur .close ()
9494 response .success = True
9595 except SQLAlchemyError as e :
96- logger .error (f' Error connecting to ClickHouse { self .connection_data [" database" ]} , { e } !' )
96+ logger .error (f" Error connecting to ClickHouse { self .connection_data [' database' ]} , { e } !" )
9797 response .error_message = str (e )
9898 self .is_connected = False
9999
@@ -119,22 +119,13 @@ def native_query(self, query: str) -> Response:
119119 cur .execute (query )
120120 result = cur .fetchall ()
121121 if result :
122- response = Response (
123- RESPONSE_TYPE .TABLE ,
124- pd .DataFrame (
125- result ,
126- columns = [x [0 ] for x in cur .description ]
127- )
128- )
122+ response = Response (RESPONSE_TYPE .TABLE , pd .DataFrame (result , columns = [x [0 ] for x in cur .description ]))
129123 else :
130124 response = Response (RESPONSE_TYPE .OK )
131125 connection .commit ()
132126 except SQLAlchemyError as e :
133- logger .error (f'Error running query: { query } on { self .connection_data ["database" ]} !' )
134- response = Response (
135- RESPONSE_TYPE .ERROR ,
136- error_message = str (e )
137- )
127+ logger .error (f"Error running query: { query } on { self .connection_data ['database' ]} !" )
128+ response = Response (RESPONSE_TYPE .ERROR , error_message = str (e ))
138129 connection .rollback ()
139130 finally :
140131 cur .close ()
@@ -157,7 +148,7 @@ def get_tables(self) -> Response:
157148 df = result .data_frame
158149
159150 if df is not None :
160- result .data_frame = df .rename (columns = {df .columns [0 ]: ' table_name' })
151+ result .data_frame = df .rename (columns = {df .columns [0 ]: " table_name" })
161152
162153 return result
163154
@@ -173,13 +164,13 @@ def _check_has_is_nullable_column(self) -> bool:
173164 """
174165 Checks if the is_nullable column exists in system.columns table.
175166 This column was added in ClickHouse 23.x.
176-
167+
177168 Returns:
178169 bool: True if is_nullable column exists, False otherwise.
179170 """
180171 if self ._has_is_nullable_column is not None :
181172 return self ._has_is_nullable_column
182-
173+
183174 try :
184175 check_query = """
185176 SELECT name
@@ -189,14 +180,11 @@ def _check_has_is_nullable_column(self) -> bool:
189180 AND name = 'is_nullable'
190181 """
191182 result = self .native_query (check_query )
192- self ._has_is_nullable_column = (
193- result .resp_type == RESPONSE_TYPE .TABLE
194- and not result .data_frame .empty
195- )
183+ self ._has_is_nullable_column = result .resp_type == RESPONSE_TYPE .TABLE and not result .data_frame .empty
196184 except Exception as e :
197185 logger .warning (f"Could not check for is_nullable column: { e } " )
198186 self ._has_is_nullable_column = False
199-
187+
200188 return self ._has_is_nullable_column
201189
202190 def meta_get_tables (self , table_names : Optional [List [str ]] = None ) -> Response :
@@ -210,8 +198,8 @@ def meta_get_tables(self, table_names: Optional[List[str]] = None) -> Response:
210198 Returns:
211199 Response: A response object containing the metadata information.
212200 """
213- database = self .connection_data [' database' ]
214-
201+ database = self .connection_data [" database" ]
202+
215203 query = f"""
216204 SELECT
217205 name as table_name,
@@ -244,23 +232,23 @@ def meta_get_columns(self, table_names: Optional[List[str]] = None) -> Response:
244232 Returns:
245233 Response: A response object containing the column metadata.
246234 """
247- database = self .connection_data [' database' ]
248-
235+ database = self .connection_data [" database" ]
236+
249237 # Check if is_nullable column is available (ClickHouse 23.x+)
250238 has_is_nullable = self ._check_has_is_nullable_column ()
251-
239+
252240 # Build the SELECT clause based on available columns
253241 select_clause = """
254242 table as table_name,
255243 name as column_name,
256244 type as data_type,
257245 comment as column_description,
258246 default_expression as column_default"""
259-
247+
260248 if has_is_nullable :
261249 select_clause += """,
262250 is_nullable as is_nullable"""
263-
251+
264252 query = f"""
265253 SELECT { select_clause }
266254 FROM system.columns
@@ -295,15 +283,15 @@ def meta_get_column_statistics_for_table(
295283 ) -> Response :
296284 """
297285 Retrieves column statistics for a specific table.
298-
286+
299287 Args:
300288 table_name (str): The name of the table.
301- column_names (Optional[List[str]]): List of column names to retrieve statistics for.
289+ column_names (Optional[List[str]]): List of column names to retrieve statistics for.
302290 If None, statistics for all columns will be returned.
303291 Returns:
304292 Response: A response object containing the column statistics for the table.
305293 """
306- database = self .connection_data [' database' ]
294+ database = self .connection_data [" database" ]
307295
308296 # Get the list of columns for this table
309297 columns_query = f"""
@@ -326,14 +314,16 @@ def meta_get_column_statistics_for_table(
326314 # Build statistics query - collect all stats in one query
327315 select_parts = []
328316 for _ , row in columns_result .data_frame .iterrows ():
329- col = row [' name' ]
317+ col = row [" name" ]
330318 # Use backticks to handle special characters in column names
331- select_parts .extend ([
332- f"countIf(`{ col } ` IS NULL) AS nulls_{ col } " ,
333- f"uniq(`{ col } `) AS distincts_{ col } " ,
334- f"toString(min(`{ col } `)) AS min_{ col } " ,
335- f"toString(max(`{ col } `)) AS max_{ col } " ,
336- ])
319+ select_parts .extend (
320+ [
321+ f"countIf(`{ col } ` IS NULL) AS nulls_{ col } " ,
322+ f"uniq(`{ col } `) AS distincts_{ col } " ,
323+ f"toString(min(`{ col } `)) AS min_{ col } " ,
324+ f"toString(max(`{ col } `)) AS max_{ col } " ,
325+ ]
326+ )
337327
338328 if not select_parts :
339329 return Response (RESPONSE_TYPE .TABLE , pd .DataFrame ())
@@ -342,7 +332,7 @@ def meta_get_column_statistics_for_table(
342332 stats_query = f"""
343333 SELECT
344334 count(*) AS total_rows,
345- { ', ' .join (select_parts )}
335+ { ", " .join (select_parts )}
346336 FROM `{ database } `.`{ table_name } `
347337 """
348338
@@ -353,55 +343,58 @@ def meta_get_column_statistics_for_table(
353343 # Return placeholder stats
354344 placeholder_data = []
355345 for _ , row in columns_result .data_frame .iterrows ():
356- placeholder_data .append ({
357- 'table_name' : table_name ,
358- 'column_name' : row ['name' ],
359- 'null_percentage' : None ,
360- 'distinct_values_count' : None ,
361- 'most_common_values' : None ,
362- 'most_common_frequencies' : None ,
363- 'minimum_value' : None ,
364- 'maximum_value' : None ,
365- })
346+ placeholder_data .append (
347+ {
348+ "table_name" : table_name ,
349+ "column_name" : row ["name" ],
350+ "null_percentage" : None ,
351+ "distinct_values_count" : None ,
352+ "most_common_values" : None ,
353+ "most_common_frequencies" : None ,
354+ "minimum_value" : None ,
355+ "maximum_value" : None ,
356+ }
357+ )
366358 return Response (RESPONSE_TYPE .TABLE , pd .DataFrame (placeholder_data ))
367359
368360 # Parse the stats result
369361 stats_data = stats_result .data_frame .iloc [0 ]
370- total_rows = stats_data .get (' total_rows' , 0 )
362+ total_rows = stats_data .get (" total_rows" , 0 )
371363
372364 # Build the final statistics DataFrame
373365 all_stats = []
374366 for _ , row in columns_result .data_frame .iterrows ():
375- col = row [' name' ]
376- nulls = stats_data .get (f' nulls_{ col } ' , 0 )
377- distincts = stats_data .get (f' distincts_{ col } ' , None )
378- min_val = stats_data .get (f' min_{ col } ' , None )
379- max_val = stats_data .get (f' max_{ col } ' , None )
367+ col = row [" name" ]
368+ nulls = stats_data .get (f" nulls_{ col } " , 0 )
369+ distincts = stats_data .get (f" distincts_{ col } " , None )
370+ min_val = stats_data .get (f" min_{ col } " , None )
371+ max_val = stats_data .get (f" max_{ col } " , None )
380372
381373 # Calculate null percentage
382374 null_pct = None
383375 if total_rows is not None and total_rows > 0 :
384376 null_pct = round ((nulls / total_rows ) * 100 , 2 )
385377
386- all_stats .append ({
387- 'table_name' : table_name ,
388- 'column_name' : col ,
389- 'null_percentage' : null_pct ,
390- 'distinct_values_count' : distincts ,
391- 'most_common_values' : None ,
392- 'most_common_frequencies' : None ,
393- 'minimum_value' : min_val ,
394- 'maximum_value' : max_val ,
395- })
378+ all_stats .append (
379+ {
380+ "table_name" : table_name ,
381+ "column_name" : col ,
382+ "null_percentage" : null_pct ,
383+ "distinct_values_count" : distincts ,
384+ "most_common_values" : None ,
385+ "most_common_frequencies" : None ,
386+ "minimum_value" : min_val ,
387+ "maximum_value" : max_val ,
388+ }
389+ )
396390
397391 return Response (RESPONSE_TYPE .TABLE , pd .DataFrame (all_stats ))
398392
399393 except Exception as e :
400394 logger .error (f"Exception while fetching statistics for table { table_name } : { e } " )
401395 # Return empty stats on error
402396 return Response (
403- RESPONSE_TYPE .ERROR ,
404- error_message = f"Could not retrieve statistics for table { table_name } : { str (e )} "
397+ RESPONSE_TYPE .ERROR , error_message = f"Could not retrieve statistics for table { table_name } : { str (e )} "
405398 )
406399
407400 def meta_get_primary_keys (self , table_names : Optional [List [str ]] = None ) -> Response :
@@ -414,8 +407,8 @@ def meta_get_primary_keys(self, table_names: Optional[List[str]] = None) -> Resp
414407 Returns:
415408 Response: A response object containing the primary key information.
416409 """
417- database = self .connection_data [' database' ]
418-
410+ database = self .connection_data [" database" ]
411+
419412 query = f"""
420413 SELECT
421414 table as table_name,
@@ -449,13 +442,15 @@ def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> Resp
449442 """
450443 # ClickHouse does not support foreign key constraints
451444 # Return an empty DataFrame with the expected columns
452- df = pd .DataFrame (columns = [
453- 'parent_table_name' ,
454- 'parent_column_name' ,
455- 'child_table_name' ,
456- 'child_column_name' ,
457- 'constraint_name'
458- ])
445+ df = pd .DataFrame (
446+ columns = [
447+ "parent_table_name" ,
448+ "parent_column_name" ,
449+ "child_table_name" ,
450+ "child_column_name" ,
451+ "constraint_name" ,
452+ ]
453+ )
459454 return Response (RESPONSE_TYPE .TABLE , df )
460455
461456 def meta_get_handler_info (self , ** kwargs ) -> str :
0 commit comments