Skip to content

Commit f6f1f27

Browse files
committed
[fix] Format clickhouse_handler with ruff 0.11.11
1 parent fea1582 commit f6f1f27

1 file changed

Lines changed: 85 additions & 90 deletions

File tree

mindsdb/integrations/handlers/clickhouse_handler/clickhouse_handler.py

Lines changed: 85 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from mindsdb.integrations.libs.response import (
1414
HandlerStatusResponse as StatusResponse,
1515
HandlerResponse as Response,
16-
RESPONSE_TYPE
16+
RESPONSE_TYPE,
1717
)
1818

1919
logger = log.getLogger(__name__)
@@ -24,15 +24,15 @@ class ClickHouseHandler(MetaDatabaseHandler):
2424
This handler handles connection and execution of the ClickHouse statements.
2525
"""
2626

27-
name = 'clickhouse'
27+
name = "clickhouse"
2828

2929
def __init__(self, name, connection_data, **kwargs):
3030
super().__init__(name)
31-
self.dialect = 'clickhouse'
31+
self.dialect = "clickhouse"
3232
self.connection_data = connection_data
3333
self.renderer = SqlalchemyRender(ClickHouseDialect)
3434
self.is_connected = False
35-
self.protocol = connection_data.get('protocol', 'native')
35+
self.protocol = connection_data.get("protocol", "native")
3636
self._has_is_nullable_column = None # Cache for version check
3737

3838
def __del__(self):
@@ -52,23 +52,23 @@ def connect(self):
5252
if self.is_connected:
5353
return self.connection
5454

55-
protocol = "clickhouse+native" if self.protocol == 'native' else "clickhouse+http"
56-
host = quote(self.connection_data['host'])
57-
port = self.connection_data['port']
58-
user = quote(self.connection_data['user'])
59-
password = quote(self.connection_data['password'])
60-
database = quote(self.connection_data['database'])
61-
url = f'{protocol}://{user}:{password}@{host}:{port}/{database}'
55+
protocol = "clickhouse+native" if self.protocol == "native" else "clickhouse+http"
56+
host = quote(self.connection_data["host"])
57+
port = self.connection_data["port"]
58+
user = quote(self.connection_data["user"])
59+
password = quote(self.connection_data["password"])
60+
database = quote(self.connection_data["database"])
61+
url = f"{protocol}://{user}:{password}@{host}:{port}/{database}"
6262
# This is not redundunt. Check https://clickhouse-sqlalchemy.readthedocs.io/en/latest/connection.html#http
63-
if self.protocol == 'https':
63+
if self.protocol == "https":
6464
url = url + "?protocol=https"
6565
try:
6666
engine = create_engine(url)
6767
connection = engine.raw_connection()
6868
self.is_connected = True
6969
self.connection = connection
7070
except SQLAlchemyError as e:
71-
logger.error(f'Error connecting to ClickHouse {self.connection_data["database"]}, {e}!')
71+
logger.error(f"Error connecting to ClickHouse {self.connection_data['database']}, {e}!")
7272
self.is_connected = False
7373
raise
7474

@@ -88,12 +88,12 @@ def check_connection(self) -> StatusResponse:
8888
connection = self.connect()
8989
cur = connection.cursor()
9090
try:
91-
cur.execute('select 1;')
91+
cur.execute("select 1;")
9292
finally:
9393
cur.close()
9494
response.success = True
9595
except SQLAlchemyError as e:
96-
logger.error(f'Error connecting to ClickHouse {self.connection_data["database"]}, {e}!')
96+
logger.error(f"Error connecting to ClickHouse {self.connection_data['database']}, {e}!")
9797
response.error_message = str(e)
9898
self.is_connected = False
9999

@@ -119,22 +119,13 @@ def native_query(self, query: str) -> Response:
119119
cur.execute(query)
120120
result = cur.fetchall()
121121
if result:
122-
response = Response(
123-
RESPONSE_TYPE.TABLE,
124-
pd.DataFrame(
125-
result,
126-
columns=[x[0] for x in cur.description]
127-
)
128-
)
122+
response = Response(RESPONSE_TYPE.TABLE, pd.DataFrame(result, columns=[x[0] for x in cur.description]))
129123
else:
130124
response = Response(RESPONSE_TYPE.OK)
131125
connection.commit()
132126
except SQLAlchemyError as e:
133-
logger.error(f'Error running query: {query} on {self.connection_data["database"]}!')
134-
response = Response(
135-
RESPONSE_TYPE.ERROR,
136-
error_message=str(e)
137-
)
127+
logger.error(f"Error running query: {query} on {self.connection_data['database']}!")
128+
response = Response(RESPONSE_TYPE.ERROR, error_message=str(e))
138129
connection.rollback()
139130
finally:
140131
cur.close()
@@ -157,7 +148,7 @@ def get_tables(self) -> Response:
157148
df = result.data_frame
158149

159150
if df is not None:
160-
result.data_frame = df.rename(columns={df.columns[0]: 'table_name'})
151+
result.data_frame = df.rename(columns={df.columns[0]: "table_name"})
161152

162153
return result
163154

@@ -173,13 +164,13 @@ def _check_has_is_nullable_column(self) -> bool:
173164
"""
174165
Checks if the is_nullable column exists in system.columns table.
175166
This column was added in ClickHouse 23.x.
176-
167+
177168
Returns:
178169
bool: True if is_nullable column exists, False otherwise.
179170
"""
180171
if self._has_is_nullable_column is not None:
181172
return self._has_is_nullable_column
182-
173+
183174
try:
184175
check_query = """
185176
SELECT name
@@ -189,14 +180,11 @@ def _check_has_is_nullable_column(self) -> bool:
189180
AND name = 'is_nullable'
190181
"""
191182
result = self.native_query(check_query)
192-
self._has_is_nullable_column = (
193-
result.resp_type == RESPONSE_TYPE.TABLE
194-
and not result.data_frame.empty
195-
)
183+
self._has_is_nullable_column = result.resp_type == RESPONSE_TYPE.TABLE and not result.data_frame.empty
196184
except Exception as e:
197185
logger.warning(f"Could not check for is_nullable column: {e}")
198186
self._has_is_nullable_column = False
199-
187+
200188
return self._has_is_nullable_column
201189

202190
def meta_get_tables(self, table_names: Optional[List[str]] = None) -> Response:
@@ -210,8 +198,8 @@ def meta_get_tables(self, table_names: Optional[List[str]] = None) -> Response:
210198
Returns:
211199
Response: A response object containing the metadata information.
212200
"""
213-
database = self.connection_data['database']
214-
201+
database = self.connection_data["database"]
202+
215203
query = f"""
216204
SELECT
217205
name as table_name,
@@ -244,23 +232,23 @@ def meta_get_columns(self, table_names: Optional[List[str]] = None) -> Response:
244232
Returns:
245233
Response: A response object containing the column metadata.
246234
"""
247-
database = self.connection_data['database']
248-
235+
database = self.connection_data["database"]
236+
249237
# Check if is_nullable column is available (ClickHouse 23.x+)
250238
has_is_nullable = self._check_has_is_nullable_column()
251-
239+
252240
# Build the SELECT clause based on available columns
253241
select_clause = """
254242
table as table_name,
255243
name as column_name,
256244
type as data_type,
257245
comment as column_description,
258246
default_expression as column_default"""
259-
247+
260248
if has_is_nullable:
261249
select_clause += """,
262250
is_nullable as is_nullable"""
263-
251+
264252
query = f"""
265253
SELECT {select_clause}
266254
FROM system.columns
@@ -295,15 +283,15 @@ def meta_get_column_statistics_for_table(
295283
) -> Response:
296284
"""
297285
Retrieves column statistics for a specific table.
298-
286+
299287
Args:
300288
table_name (str): The name of the table.
301-
column_names (Optional[List[str]]): List of column names to retrieve statistics for.
289+
column_names (Optional[List[str]]): List of column names to retrieve statistics for.
302290
If None, statistics for all columns will be returned.
303291
Returns:
304292
Response: A response object containing the column statistics for the table.
305293
"""
306-
database = self.connection_data['database']
294+
database = self.connection_data["database"]
307295

308296
# Get the list of columns for this table
309297
columns_query = f"""
@@ -326,14 +314,16 @@ def meta_get_column_statistics_for_table(
326314
# Build statistics query - collect all stats in one query
327315
select_parts = []
328316
for _, row in columns_result.data_frame.iterrows():
329-
col = row['name']
317+
col = row["name"]
330318
# Use backticks to handle special characters in column names
331-
select_parts.extend([
332-
f"countIf(`{col}` IS NULL) AS nulls_{col}",
333-
f"uniq(`{col}`) AS distincts_{col}",
334-
f"toString(min(`{col}`)) AS min_{col}",
335-
f"toString(max(`{col}`)) AS max_{col}",
336-
])
319+
select_parts.extend(
320+
[
321+
f"countIf(`{col}` IS NULL) AS nulls_{col}",
322+
f"uniq(`{col}`) AS distincts_{col}",
323+
f"toString(min(`{col}`)) AS min_{col}",
324+
f"toString(max(`{col}`)) AS max_{col}",
325+
]
326+
)
337327

338328
if not select_parts:
339329
return Response(RESPONSE_TYPE.TABLE, pd.DataFrame())
@@ -342,7 +332,7 @@ def meta_get_column_statistics_for_table(
342332
stats_query = f"""
343333
SELECT
344334
count(*) AS total_rows,
345-
{', '.join(select_parts)}
335+
{", ".join(select_parts)}
346336
FROM `{database}`.`{table_name}`
347337
"""
348338

@@ -353,55 +343,58 @@ def meta_get_column_statistics_for_table(
353343
# Return placeholder stats
354344
placeholder_data = []
355345
for _, row in columns_result.data_frame.iterrows():
356-
placeholder_data.append({
357-
'table_name': table_name,
358-
'column_name': row['name'],
359-
'null_percentage': None,
360-
'distinct_values_count': None,
361-
'most_common_values': None,
362-
'most_common_frequencies': None,
363-
'minimum_value': None,
364-
'maximum_value': None,
365-
})
346+
placeholder_data.append(
347+
{
348+
"table_name": table_name,
349+
"column_name": row["name"],
350+
"null_percentage": None,
351+
"distinct_values_count": None,
352+
"most_common_values": None,
353+
"most_common_frequencies": None,
354+
"minimum_value": None,
355+
"maximum_value": None,
356+
}
357+
)
366358
return Response(RESPONSE_TYPE.TABLE, pd.DataFrame(placeholder_data))
367359

368360
# Parse the stats result
369361
stats_data = stats_result.data_frame.iloc[0]
370-
total_rows = stats_data.get('total_rows', 0)
362+
total_rows = stats_data.get("total_rows", 0)
371363

372364
# Build the final statistics DataFrame
373365
all_stats = []
374366
for _, row in columns_result.data_frame.iterrows():
375-
col = row['name']
376-
nulls = stats_data.get(f'nulls_{col}', 0)
377-
distincts = stats_data.get(f'distincts_{col}', None)
378-
min_val = stats_data.get(f'min_{col}', None)
379-
max_val = stats_data.get(f'max_{col}', None)
367+
col = row["name"]
368+
nulls = stats_data.get(f"nulls_{col}", 0)
369+
distincts = stats_data.get(f"distincts_{col}", None)
370+
min_val = stats_data.get(f"min_{col}", None)
371+
max_val = stats_data.get(f"max_{col}", None)
380372

381373
# Calculate null percentage
382374
null_pct = None
383375
if total_rows is not None and total_rows > 0:
384376
null_pct = round((nulls / total_rows) * 100, 2)
385377

386-
all_stats.append({
387-
'table_name': table_name,
388-
'column_name': col,
389-
'null_percentage': null_pct,
390-
'distinct_values_count': distincts,
391-
'most_common_values': None,
392-
'most_common_frequencies': None,
393-
'minimum_value': min_val,
394-
'maximum_value': max_val,
395-
})
378+
all_stats.append(
379+
{
380+
"table_name": table_name,
381+
"column_name": col,
382+
"null_percentage": null_pct,
383+
"distinct_values_count": distincts,
384+
"most_common_values": None,
385+
"most_common_frequencies": None,
386+
"minimum_value": min_val,
387+
"maximum_value": max_val,
388+
}
389+
)
396390

397391
return Response(RESPONSE_TYPE.TABLE, pd.DataFrame(all_stats))
398392

399393
except Exception as e:
400394
logger.error(f"Exception while fetching statistics for table {table_name}: {e}")
401395
# Return empty stats on error
402396
return Response(
403-
RESPONSE_TYPE.ERROR,
404-
error_message=f"Could not retrieve statistics for table {table_name}: {str(e)}"
397+
RESPONSE_TYPE.ERROR, error_message=f"Could not retrieve statistics for table {table_name}: {str(e)}"
405398
)
406399

407400
def meta_get_primary_keys(self, table_names: Optional[List[str]] = None) -> Response:
@@ -414,8 +407,8 @@ def meta_get_primary_keys(self, table_names: Optional[List[str]] = None) -> Resp
414407
Returns:
415408
Response: A response object containing the primary key information.
416409
"""
417-
database = self.connection_data['database']
418-
410+
database = self.connection_data["database"]
411+
419412
query = f"""
420413
SELECT
421414
table as table_name,
@@ -449,13 +442,15 @@ def meta_get_foreign_keys(self, table_names: Optional[List[str]] = None) -> Resp
449442
"""
450443
# ClickHouse does not support foreign key constraints
451444
# Return an empty DataFrame with the expected columns
452-
df = pd.DataFrame(columns=[
453-
'parent_table_name',
454-
'parent_column_name',
455-
'child_table_name',
456-
'child_column_name',
457-
'constraint_name'
458-
])
445+
df = pd.DataFrame(
446+
columns=[
447+
"parent_table_name",
448+
"parent_column_name",
449+
"child_table_name",
450+
"child_column_name",
451+
"constraint_name",
452+
]
453+
)
459454
return Response(RESPONSE_TYPE.TABLE, df)
460455

461456
def meta_get_handler_info(self, **kwargs) -> str:

0 commit comments

Comments
 (0)