@@ -382,7 +382,7 @@ def convert_data_type(sql, column_name, data_type, column_list=None):
382382 if isinstance (column .parent , exp .Alias ):
383383 # Column already has an alias — preserve it
384384 existing_alias = column .parent .alias
385- cast_expr = parse_one (f"CAST({ _get_column_name (column )} AS { data_type } ) AS `{ existing_alias } `" )
385+ cast_expr = parse_one (f"CAST({ _get_column_name (column )} AS { data_type } ) AS `{ existing_alias } `" , read = "drill" )
386386 column .parent .replace (cast_expr )
387387 elif isinstance (column .parent , exp .Func ):
388388 # Column is inside a function — wrap the outermost function
@@ -392,14 +392,14 @@ def convert_data_type(sql, column_name, data_type, column_list=None):
392392
393393 if isinstance (parent .parent , exp .Alias ):
394394 existing_alias = parent .parent .alias
395- cast_expr = parse_one (f"CAST({ parent } AS { data_type } ) AS `{ existing_alias } `" )
395+ cast_expr = parse_one (f"CAST({ parent } AS { data_type } ) AS `{ existing_alias } `" , read = "drill" )
396396 parent .parent .replace (cast_expr )
397397 else :
398- cast_expr = parse_one (f"CAST({ parent } AS { data_type } ) AS { alias } " )
398+ cast_expr = parse_one (f"CAST({ parent } AS { data_type } ) AS { alias } " , read = "drill" )
399399 parent .replace (cast_expr )
400400 else :
401401 # Simple column with no alias — add original name as alias
402- cast_expr = parse_one (f"CAST({ _get_column_name (column )} AS { data_type } ) AS { alias } " )
402+ cast_expr = parse_one (f"CAST({ _get_column_name (column )} AS { data_type } ) AS { alias } " , read = "drill" )
403403 column .replace (cast_expr )
404404 return parsed_query .sql (dialect = "drill" , pretty = True )
405405
@@ -418,3 +418,94 @@ def convert_data_type_raw(sql, column_name, data_type, columns_json=None):
418418 """
419419 columns = json .loads (columns_json ) if columns_json else None
420420 return convert_data_type (sql , column_name , data_type , columns )
421+
422+ def change_time_grain (sql : str , column_name : str , time_grain : str , column_list : list [str ]):
423+ """
424+ Modifies the SQL query to change the time grain of a specified column by replacing it with a function call.
425+ The function handles different cases in the query structure, including simple columns, aliased columns,
426+ columns within functions, and subqueries. If the column has no alias, the original column name is used as
427+ the alias for the transformed column.
428+
429+ Parameters:
430+ sql (str): The original SQL query string to be modified.
431+ column_name (str): The name of the column whose time grain needs to be changed.
432+ time_grain (str): The time grain transformation to be applied to the column.
433+ column_list (list[str]): A list of all column names in the table, used when the query contains a wildcard (*).
434+
435+ Returns:
436+ dict: A dictionary containing the modified SQL query string. The key is:
437+ - "sql": The new SQL query string with the updated time grain transformation.
438+
439+ Raises:
440+ None
441+ """
442+
443+ function_name = "DATE_TRUNC"
444+ time_grain = time_grain .upper ()
445+ parsed_query = parse_one (sql , read = "drill" )
446+
447+ # Should never be a star query
448+ if is_star_query (parsed_query ):
449+ # replace_star_with_columns expects a dict; convert list to dict if needed
450+ if isinstance (column_list , list ):
451+ column_list = {col : "VARCHAR" for col in column_list }
452+ parsed_query = replace_star_with_columns (parsed_query , column_list )
453+
454+ column_nodes = parsed_query .find_all (exp .Column )
455+ for column in column_nodes :
456+ if column .alias_or_name == column_name :
457+ # There are several cases
458+ # 1. The column is a simple column with no alias.
459+ # 2. The column is a simple column with an alias.
460+ # 3. The column is a function.
461+ # 4. The column is already a DATE_TRUNC function
462+ # 5. The column is a subquery.
463+ if isinstance (column , exp .Column ):
464+ if isinstance (column .parent , exp .Alias ):
465+ # Case 2: Column already has an alias. In this case, we reuse the alias.
466+ updated_node = parse_one (function_name + f"({ time_grain } , { _get_column_name (column )} )" )
467+ column .replace (updated_node )
468+
469+ # Case 4: Existing DATE_TRUNC
470+ elif isinstance (column .parent , exp .DateTrunc ):
471+ parent = column .parent
472+ parent .set ("unit" , exp .Literal .string (time_grain ))
473+
474+ elif isinstance (column .parent , exp .Func ):
475+ # Case 3: The column is in a function.
476+ # Recurse out of the current node to find the outermost parent node that is a function
477+ parent = column .parent
478+ while isinstance (parent .parent , exp .Func ):
479+ parent = parent .parent
480+
481+ if isinstance (parent .parent , exp .Alias ):
482+ updated_node = parse_one (f"{ function_name } ({ time_grain } , { parent } )" )
483+ else :
484+ updated_node = parse_one (f"{ function_name } ({ time_grain } , { parent } ) AS { column_name } " )
485+ parent .replace (updated_node )
486+
487+ else :
488+ # Case 1: Column has no alias. In this case, we add the original column name as an alias
489+ updated_node = parse_one (
490+ f"{ function_name } ({ time_grain } , { _get_column_name (column )} ) AS { column .alias_or_name } " )
491+ column .replace (updated_node )
492+ return {
493+ "sql" : parsed_query .sql (dialect = "drill" , pretty = True , normalize_functions = "lower" )
494+ }
495+
496+
497+ def change_time_grain_raw (sql , column_name , time_grain , columns_json = None ):
498+ """Entry point for Java/GraalPy. Accepts a JSON string for the column list.
499+
500+ Args:
501+ sql: The SQL query string.
502+ column_name: The temporal column to transform.
503+ time_grain: The time grain (e.g. 'MONTH', 'YEAR').
504+ columns_json: Optional JSON array string of column names (for star queries).
505+
506+ Returns:
507+ The transformed SQL string.
508+ """
509+ columns = json .loads (columns_json ) if columns_json else []
510+ result = change_time_grain (sql , column_name , time_grain , columns )
511+ return result ["sql" ]
0 commit comments