Skip to content

Commit b74dbe7

Browse files
committed
Fixing time grain issues
1 parent 8c954d7 commit b74dbe7

14 files changed

Lines changed: 879 additions & 93 deletions

File tree

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#
18+
19+
name: Python SQL Utils CI
20+
21+
on:
22+
push:
23+
paths:
24+
- 'exec/java-exec/src/main/resources/python/**'
25+
pull_request:
26+
paths:
27+
- 'exec/java-exec/src/main/resources/python/**'
28+
29+
defaults:
30+
run:
31+
working-directory: exec/java-exec/src/main/resources/python
32+
33+
jobs:
34+
test:
35+
name: Test
36+
runs-on: ubuntu-latest
37+
strategy:
38+
matrix:
39+
python-version: ['3.11']
40+
steps:
41+
- name: Checkout
42+
uses: actions/checkout@v4
43+
44+
- name: Setup Python ${{ matrix.python-version }}
45+
uses: actions/setup-python@v5
46+
with:
47+
python-version: ${{ matrix.python-version }}
48+
cache: 'pip'
49+
cache-dependency-path: exec/java-exec/src/main/resources/python/requirements-test.txt
50+
51+
- name: Install dependencies
52+
run: pip install -r requirements-test.txt
53+
54+
- name: Run tests
55+
run: pytest -v test_sql_utils.py

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,3 +34,7 @@ tools/venv/
3434
venv/
3535
.vscode/*
3636
/exec/java-exec/src/main/resources/python/.venv/
37+
__pycache__/
38+
*.pyc
39+
.pytest_cache/
40+
.coverage

exec/java-exec/src/main/java/org/apache/drill/exec/server/rest/SqlTranspiler.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ public final class SqlTranspiler {
5151
private Value jsonParseFunc;
5252
private Value convertDataTypeFunc;
5353
private Value formatSqlFunc;
54+
private Value changeTimeGrainFunc;
5455
private boolean available;
5556

5657
/**
@@ -81,6 +82,7 @@ private SqlTranspiler() {
8182
jsonParseFunc = pythonContext.getBindings("python").getMember("json_parse");
8283
convertDataTypeFunc = pythonContext.getBindings("python").getMember("convert_data_type_raw");
8384
formatSqlFunc = pythonContext.getBindings("python").getMember("format_sql");
85+
changeTimeGrainFunc = pythonContext.getBindings("python").getMember("change_time_grain_raw");
8486
available = true;
8587
logger.info("SqlTranspiler initialized successfully with GraalPy + sqlglot");
8688
} catch (Exception e) {
@@ -168,6 +170,34 @@ public synchronized String formatSql(String sql) {
168170
}
169171
}
170172

173+
/**
174+
* Change the time grain of a temporal column in a SQL query using sqlglot AST manipulation.
175+
*
176+
* @param sql the SQL query
177+
* @param columnName the temporal column to transform
178+
* @param timeGrain the time grain (e.g. "MONTH", "YEAR")
179+
* @param columnsJson optional JSON array string of column names (for star queries)
180+
* @return the transformed SQL, or the original SQL if transformation fails
181+
*/
182+
public synchronized String changeTimeGrain(String sql, String columnName,
183+
String timeGrain, String columnsJson) {
184+
if (!available) {
185+
return sql;
186+
}
187+
try {
188+
String result;
189+
if (columnsJson != null && !columnsJson.isEmpty()) {
190+
result = changeTimeGrainFunc.execute(sql, columnName, timeGrain, columnsJson).asString();
191+
} else {
192+
result = changeTimeGrainFunc.execute(sql, columnName, timeGrain).asString();
193+
}
194+
return result;
195+
} catch (Exception e) {
196+
logger.warn("Time grain change failed: {}", e.getMessage(), e);
197+
return sql;
198+
}
199+
}
200+
171201
/**
172202
* Returns whether the transpiler is available (GraalPy + sqlglot initialized successfully).
173203
*/

exec/java-exec/src/main/java/org/apache/drill/exec/server/rest/TranspileResources.java

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,67 @@ public Response convertDataType(ConvertDataTypeRequest request) {
192192
return Response.ok(new TranspileResponse(result, true, formattedOriginal)).build();
193193
}
194194

195+
// ==================== Change Time Grain ====================
196+
197+
public static class TimeGrainRequest {
198+
@JsonProperty
199+
public String sql;
200+
201+
@JsonProperty
202+
public String columnName;
203+
204+
@JsonProperty
205+
public String timeGrain;
206+
207+
@JsonProperty
208+
public List<String> columns;
209+
210+
public TimeGrainRequest() {
211+
}
212+
213+
@JsonCreator
214+
public TimeGrainRequest(
215+
@JsonProperty("sql") String sql,
216+
@JsonProperty("columnName") String columnName,
217+
@JsonProperty("timeGrain") String timeGrain,
218+
@JsonProperty("columns") List<String> columns) {
219+
this.sql = sql;
220+
this.columnName = columnName;
221+
this.timeGrain = timeGrain;
222+
this.columns = columns;
223+
}
224+
}
225+
226+
@POST
227+
@Path("/time-grain")
228+
@Consumes(MediaType.APPLICATION_JSON)
229+
@Produces(MediaType.APPLICATION_JSON)
230+
@Operation(summary = "Change time grain",
231+
description = "Wraps a temporal column with DATE_TRUNC using sqlglot AST manipulation")
232+
public Response changeTimeGrain(TimeGrainRequest request) {
233+
if (request.sql == null || request.sql.trim().isEmpty()) {
234+
return Response.ok(new TranspileResponse("", true)).build();
235+
}
236+
if (request.columnName == null || request.timeGrain == null) {
237+
return Response.status(Response.Status.BAD_REQUEST)
238+
.entity(new TranspileResponse(request.sql, false)).build();
239+
}
240+
241+
SqlTranspiler transpiler = SqlTranspiler.getInstance();
242+
String columnsJson = null;
243+
if (request.columns != null && !request.columns.isEmpty()) {
244+
try {
245+
columnsJson = new ObjectMapper().writeValueAsString(request.columns);
246+
} catch (Exception e) {
247+
columnsJson = null;
248+
}
249+
}
250+
251+
String result = transpiler.changeTimeGrain(
252+
request.sql, request.columnName, request.timeGrain, columnsJson);
253+
return Response.ok(new TranspileResponse(result, true)).build();
254+
}
255+
195256
// ==================== Format SQL ====================
196257

197258
public static class FormatRequest {
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
pytest>=7.0
18+
sqlglot==25.33.0

exec/java-exec/src/main/resources/python/sql_utils.py

Lines changed: 95 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ def convert_data_type(sql, column_name, data_type, column_list=None):
382382
if isinstance(column.parent, exp.Alias):
383383
# Column already has an alias — preserve it
384384
existing_alias = column.parent.alias
385-
cast_expr = parse_one(f"CAST({_get_column_name(column)} AS {data_type}) AS `{existing_alias}`")
385+
cast_expr = parse_one(f"CAST({_get_column_name(column)} AS {data_type}) AS `{existing_alias}`", read="drill")
386386
column.parent.replace(cast_expr)
387387
elif isinstance(column.parent, exp.Func):
388388
# Column is inside a function — wrap the outermost function
@@ -392,14 +392,14 @@ def convert_data_type(sql, column_name, data_type, column_list=None):
392392

393393
if isinstance(parent.parent, exp.Alias):
394394
existing_alias = parent.parent.alias
395-
cast_expr = parse_one(f"CAST({parent} AS {data_type}) AS `{existing_alias}`")
395+
cast_expr = parse_one(f"CAST({parent} AS {data_type}) AS `{existing_alias}`", read="drill")
396396
parent.parent.replace(cast_expr)
397397
else:
398-
cast_expr = parse_one(f"CAST({parent} AS {data_type}) AS {alias}")
398+
cast_expr = parse_one(f"CAST({parent} AS {data_type}) AS {alias}", read="drill")
399399
parent.replace(cast_expr)
400400
else:
401401
# Simple column with no alias — add original name as alias
402-
cast_expr = parse_one(f"CAST({_get_column_name(column)} AS {data_type}) AS {alias}")
402+
cast_expr = parse_one(f"CAST({_get_column_name(column)} AS {data_type}) AS {alias}", read="drill")
403403
column.replace(cast_expr)
404404
return parsed_query.sql(dialect="drill", pretty=True)
405405

@@ -418,3 +418,94 @@ def convert_data_type_raw(sql, column_name, data_type, columns_json=None):
418418
"""
419419
columns = json.loads(columns_json) if columns_json else None
420420
return convert_data_type(sql, column_name, data_type, columns)
421+
422+
def change_time_grain(sql: str, column_name: str, time_grain: str, column_list: list[str]):
423+
"""
424+
Modifies the SQL query to change the time grain of a specified column by replacing it with a function call.
425+
The function handles different cases in the query structure, including simple columns, aliased columns,
426+
columns within functions, and subqueries. If the column has no alias, the original column name is used as
427+
the alias for the transformed column.
428+
429+
Parameters:
430+
sql (str): The original SQL query string to be modified.
431+
column_name (str): The name of the column whose time grain needs to be changed.
432+
time_grain (str): The time grain transformation to be applied to the column.
433+
column_list (list[str]): A list of all column names in the table, used when the query contains a wildcard (*).
434+
435+
Returns:
436+
dict: A dictionary containing the modified SQL query string. The key is:
437+
- "sql": The new SQL query string with the updated time grain transformation.
438+
439+
Raises:
440+
None
441+
"""
442+
443+
function_name = "DATE_TRUNC"
444+
time_grain = time_grain.upper()
445+
parsed_query = parse_one(sql, read="drill")
446+
447+
# Should never be a star query
448+
if is_star_query(parsed_query):
449+
# replace_star_with_columns expects a dict; convert list to dict if needed
450+
if isinstance(column_list, list):
451+
column_list = {col: "VARCHAR" for col in column_list}
452+
parsed_query = replace_star_with_columns(parsed_query, column_list)
453+
454+
column_nodes = parsed_query.find_all(exp.Column)
455+
for column in column_nodes:
456+
if column.alias_or_name == column_name:
457+
# There are several cases
458+
# 1. The column is a simple column with no alias.
459+
# 2. The column is a simple column with an alias.
460+
# 3. The column is a function.
461+
# 4. The column is already a DATE_TRUNC function
462+
# 5. The column is a subquery.
463+
if isinstance(column, exp.Column):
464+
if isinstance(column.parent, exp.Alias):
465+
# Case 2: Column already has an alias. In this case, we reuse the alias.
466+
updated_node = parse_one(function_name + f"({time_grain}, {_get_column_name(column)})")
467+
column.replace(updated_node)
468+
469+
# Case 4: Existing DATE_TRUNC
470+
elif isinstance(column.parent, exp.DateTrunc):
471+
parent = column.parent
472+
parent.set("unit", exp.Literal.string(time_grain))
473+
474+
elif isinstance(column.parent, exp.Func):
475+
# Case 3: The column is in a function.
476+
# Recurse out of the current node to find the outermost parent node that is a function
477+
parent = column.parent
478+
while isinstance(parent.parent, exp.Func):
479+
parent = parent.parent
480+
481+
if isinstance(parent.parent, exp.Alias):
482+
updated_node = parse_one(f"{function_name}({time_grain}, {parent})")
483+
else:
484+
updated_node = parse_one(f"{function_name}({time_grain}, {parent}) AS {column_name}")
485+
parent.replace(updated_node)
486+
487+
else:
488+
# Case 1: Column has no alias. In this case, we add the original column name as an alias
489+
updated_node = parse_one(
490+
f"{function_name}({time_grain}, {_get_column_name(column)}) AS {column.alias_or_name}")
491+
column.replace(updated_node)
492+
return {
493+
"sql": parsed_query.sql(dialect="drill", pretty=True, normalize_functions="lower")
494+
}
495+
496+
497+
def change_time_grain_raw(sql, column_name, time_grain, columns_json=None):
498+
"""Entry point for Java/GraalPy. Accepts a JSON string for the column list.
499+
500+
Args:
501+
sql: The SQL query string.
502+
column_name: The temporal column to transform.
503+
time_grain: The time grain (e.g. 'MONTH', 'YEAR').
504+
columns_json: Optional JSON array string of column names (for star queries).
505+
506+
Returns:
507+
The transformed SQL string.
508+
"""
509+
columns = json.loads(columns_json) if columns_json else []
510+
result = change_time_grain(sql, column_name, time_grain, columns)
511+
return result["sql"]

0 commit comments

Comments
 (0)