|
| 1 | +// Licensed to the Apache Software Foundation (ASF) under one |
| 2 | +// or more contributor license agreements. See the NOTICE file |
| 3 | +// distributed with this work for additional information |
| 4 | +// regarding copyright ownership. The ASF licenses this file |
| 5 | +// to you under the Apache License, Version 2.0 (the |
| 6 | +// "License"); you may not use this file except in compliance |
| 7 | +// with the License. You may obtain a copy of the License at |
| 8 | +// |
| 9 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | +// |
| 11 | +// Unless required by applicable law or agreed to in writing, |
| 12 | +// software distributed under the License is distributed on an |
| 13 | +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 14 | +// KIND, either express or implied. See the License for the |
| 15 | +// specific language governing permissions and limitations |
| 16 | +// under the License. |
| 17 | + |
| 18 | +use std::any::TypeId; |
| 19 | + |
| 20 | +use sqlparser::ast::{Expr, Map, MapEntry}; |
| 21 | +use sqlparser::dialect::{Dialect, GenericDialect, Precedence}; |
| 22 | +use sqlparser::keywords::Keyword; |
| 23 | +use sqlparser::parser::{Parser, ParserError}; |
| 24 | +use sqlparser::tokenizer::Token; |
| 25 | + |
| 26 | +/// DataFusion's default SQL dialect. |
| 27 | +/// |
| 28 | +/// This dialect is intentionally permissive and enables SQL features that |
| 29 | +/// DataFusion's parser and planner support across multiple upstream dialects. |
| 30 | +/// It is similar to sqlparser's `GenericDialect`, but is defined in DataFusion |
| 31 | +/// to decouple behavior from upstream defaults. |
| 32 | +#[derive(Debug, Default)] |
| 33 | +pub struct DataFusionDialect; |
| 34 | + |
| 35 | +pub const DATAFUSION_DIALECT: DataFusionDialect = DataFusionDialect; |
| 36 | + |
| 37 | +impl Dialect for DataFusionDialect { |
| 38 | + fn dialect(&self) -> TypeId { |
| 39 | + TypeId::of::<GenericDialect>() |
| 40 | + } |
| 41 | + |
| 42 | + fn is_delimited_identifier_start(&self, ch: char) -> bool { |
| 43 | + ch == '"' || ch == '`' |
| 44 | + } |
| 45 | + |
| 46 | + fn is_identifier_start(&self, ch: char) -> bool { |
| 47 | + ch.is_alphabetic() || ch == '_' || ch == '#' || ch == '@' |
| 48 | + } |
| 49 | + |
| 50 | + fn is_identifier_part(&self, ch: char) -> bool { |
| 51 | + ch.is_alphabetic() |
| 52 | + || ch.is_ascii_digit() |
| 53 | + || ch == '@' |
| 54 | + || ch == '$' |
| 55 | + || ch == '#' |
| 56 | + || ch == '_' |
| 57 | + } |
| 58 | + |
| 59 | + fn supports_unicode_string_literal(&self) -> bool { |
| 60 | + true |
| 61 | + } |
| 62 | + |
| 63 | + fn supports_group_by_expr(&self) -> bool { |
| 64 | + true |
| 65 | + } |
| 66 | + |
| 67 | + fn supports_group_by_with_modifier(&self) -> bool { |
| 68 | + true |
| 69 | + } |
| 70 | + |
| 71 | + fn supports_left_associative_joins_without_parens(&self) -> bool { |
| 72 | + true |
| 73 | + } |
| 74 | + |
| 75 | + fn supports_connect_by(&self) -> bool { |
| 76 | + true |
| 77 | + } |
| 78 | + |
| 79 | + fn supports_match_recognize(&self) -> bool { |
| 80 | + true |
| 81 | + } |
| 82 | + |
| 83 | + fn supports_pipe_operator(&self) -> bool { |
| 84 | + true |
| 85 | + } |
| 86 | + |
| 87 | + fn supports_start_transaction_modifier(&self) -> bool { |
| 88 | + true |
| 89 | + } |
| 90 | + |
| 91 | + fn supports_window_function_null_treatment_arg(&self) -> bool { |
| 92 | + true |
| 93 | + } |
| 94 | + |
| 95 | + fn supports_dictionary_syntax(&self) -> bool { |
| 96 | + true |
| 97 | + } |
| 98 | + |
| 99 | + fn supports_window_clause_named_window_reference(&self) -> bool { |
| 100 | + true |
| 101 | + } |
| 102 | + |
| 103 | + fn supports_parenthesized_set_variables(&self) -> bool { |
| 104 | + true |
| 105 | + } |
| 106 | + |
| 107 | + fn supports_select_wildcard_except(&self) -> bool { |
| 108 | + true |
| 109 | + } |
| 110 | + |
| 111 | + fn support_map_literal_syntax(&self) -> bool { |
| 112 | + true |
| 113 | + } |
| 114 | + |
| 115 | + fn allow_extract_custom(&self) -> bool { |
| 116 | + true |
| 117 | + } |
| 118 | + |
| 119 | + fn allow_extract_single_quotes(&self) -> bool { |
| 120 | + true |
| 121 | + } |
| 122 | + |
| 123 | + fn supports_extract_comma_syntax(&self) -> bool { |
| 124 | + true |
| 125 | + } |
| 126 | + |
| 127 | + fn supports_create_view_comment_syntax(&self) -> bool { |
| 128 | + true |
| 129 | + } |
| 130 | + |
| 131 | + fn supports_parens_around_table_factor(&self) -> bool { |
| 132 | + true |
| 133 | + } |
| 134 | + |
| 135 | + fn supports_values_as_table_factor(&self) -> bool { |
| 136 | + true |
| 137 | + } |
| 138 | + |
| 139 | + fn supports_create_index_with_clause(&self) -> bool { |
| 140 | + true |
| 141 | + } |
| 142 | + |
| 143 | + fn supports_explain_with_utility_options(&self) -> bool { |
| 144 | + true |
| 145 | + } |
| 146 | + |
| 147 | + fn supports_limit_comma(&self) -> bool { |
| 148 | + true |
| 149 | + } |
| 150 | + |
| 151 | + fn supports_from_first_select(&self) -> bool { |
| 152 | + true |
| 153 | + } |
| 154 | + |
| 155 | + fn supports_projection_trailing_commas(&self) -> bool { |
| 156 | + true |
| 157 | + } |
| 158 | + |
| 159 | + fn supports_asc_desc_in_column_definition(&self) -> bool { |
| 160 | + true |
| 161 | + } |
| 162 | + |
| 163 | + fn supports_try_convert(&self) -> bool { |
| 164 | + true |
| 165 | + } |
| 166 | + |
| 167 | + fn supports_bitwise_shift_operators(&self) -> bool { |
| 168 | + true |
| 169 | + } |
| 170 | + |
| 171 | + fn supports_comment_on(&self) -> bool { |
| 172 | + true |
| 173 | + } |
| 174 | + |
| 175 | + fn supports_load_extension(&self) -> bool { |
| 176 | + true |
| 177 | + } |
| 178 | + |
| 179 | + fn supports_named_fn_args_with_assignment_operator(&self) -> bool { |
| 180 | + true |
| 181 | + } |
| 182 | + |
| 183 | + fn supports_struct_literal(&self) -> bool { |
| 184 | + true |
| 185 | + } |
| 186 | + |
| 187 | + fn supports_empty_projections(&self) -> bool { |
| 188 | + true |
| 189 | + } |
| 190 | + |
| 191 | + fn supports_nested_comments(&self) -> bool { |
| 192 | + true |
| 193 | + } |
| 194 | + |
| 195 | + fn supports_user_host_grantee(&self) -> bool { |
| 196 | + true |
| 197 | + } |
| 198 | + |
| 199 | + fn supports_string_escape_constant(&self) -> bool { |
| 200 | + true |
| 201 | + } |
| 202 | + |
| 203 | + fn supports_array_typedef_with_brackets(&self) -> bool { |
| 204 | + true |
| 205 | + } |
| 206 | + |
| 207 | + fn supports_match_against(&self) -> bool { |
| 208 | + true |
| 209 | + } |
| 210 | + |
| 211 | + fn supports_set_names(&self) -> bool { |
| 212 | + true |
| 213 | + } |
| 214 | + |
| 215 | + fn supports_comma_separated_set_assignments(&self) -> bool { |
| 216 | + true |
| 217 | + } |
| 218 | + |
| 219 | + fn supports_filter_during_aggregation(&self) -> bool { |
| 220 | + true |
| 221 | + } |
| 222 | + |
| 223 | + fn supports_select_wildcard_exclude(&self) -> bool { |
| 224 | + true |
| 225 | + } |
| 226 | + |
| 227 | + fn supports_data_type_signed_suffix(&self) -> bool { |
| 228 | + true |
| 229 | + } |
| 230 | + |
| 231 | + fn supports_interval_options(&self) -> bool { |
| 232 | + true |
| 233 | + } |
| 234 | + |
| 235 | + fn supports_quote_delimited_string(&self) -> bool { |
| 236 | + true |
| 237 | + } |
| 238 | + |
| 239 | + fn supports_lambda_functions(&self) -> bool { |
| 240 | + true |
| 241 | + } |
| 242 | + |
| 243 | + fn supports_select_wildcard_replace(&self) -> bool { |
| 244 | + true |
| 245 | + } |
| 246 | + |
| 247 | + fn supports_select_wildcard_ilike(&self) -> bool { |
| 248 | + true |
| 249 | + } |
| 250 | + |
| 251 | + fn supports_select_wildcard_rename(&self) -> bool { |
| 252 | + true |
| 253 | + } |
| 254 | + |
| 255 | + fn supports_optimize_table(&self) -> bool { |
| 256 | + true |
| 257 | + } |
| 258 | + |
| 259 | + fn supports_install(&self) -> bool { |
| 260 | + true |
| 261 | + } |
| 262 | + |
| 263 | + fn supports_detach(&self) -> bool { |
| 264 | + true |
| 265 | + } |
| 266 | + |
| 267 | + fn supports_prewhere(&self) -> bool { |
| 268 | + true |
| 269 | + } |
| 270 | + |
| 271 | + fn supports_with_fill(&self) -> bool { |
| 272 | + true |
| 273 | + } |
| 274 | + |
| 275 | + fn supports_limit_by(&self) -> bool { |
| 276 | + true |
| 277 | + } |
| 278 | + |
| 279 | + fn supports_interpolate(&self) -> bool { |
| 280 | + true |
| 281 | + } |
| 282 | + |
| 283 | + fn supports_settings(&self) -> bool { |
| 284 | + true |
| 285 | + } |
| 286 | + |
| 287 | + fn supports_select_format(&self) -> bool { |
| 288 | + true |
| 289 | + } |
| 290 | + |
| 291 | + fn supports_comment_optimizer_hint(&self) -> bool { |
| 292 | + true |
| 293 | + } |
| 294 | + |
| 295 | + fn parse_prefix(&self, parser: &mut Parser) -> Option<Result<Expr, ParserError>> { |
| 296 | + let token = parser.peek_token_ref(); |
| 297 | + let is_map_literal = matches!( |
| 298 | + token.token, |
| 299 | + Token::Word(ref word) if word.keyword == Keyword::MAP |
| 300 | + ) && matches!(parser.peek_tokens::<2>()[1], Token::LBrace); |
| 301 | + |
| 302 | + if !is_map_literal { |
| 303 | + return None; |
| 304 | + } |
| 305 | + |
| 306 | + Some(parse_map_literal(parser)) |
| 307 | + } |
| 308 | +} |
| 309 | + |
| 310 | +fn parse_map_literal(parser: &mut Parser) -> Result<Expr, ParserError> { |
| 311 | + let _ = parser.parse_keyword(Keyword::MAP); |
| 312 | + parser.expect_token(&Token::LBrace)?; |
| 313 | + let entries = parser.parse_comma_separated0(parse_map_entry, Token::RBrace)?; |
| 314 | + parser.expect_token(&Token::RBrace)?; |
| 315 | + Ok(Expr::Map(Map { entries })) |
| 316 | +} |
| 317 | + |
| 318 | +fn parse_map_entry(parser: &mut Parser) -> Result<MapEntry, ParserError> { |
| 319 | + let key = parser.parse_subexpr(DATAFUSION_DIALECT.prec_value(Precedence::Colon))?; |
| 320 | + parser.expect_token(&Token::Colon)?; |
| 321 | + let value = parser.parse_expr()?; |
| 322 | + Ok(MapEntry { |
| 323 | + key: Box::new(key), |
| 324 | + value: Box::new(value), |
| 325 | + }) |
| 326 | +} |
| 327 | + |
| 328 | +/// Returns the DataFusion dialect for `generic` (and `datafusion`) and otherwise |
| 329 | +/// falls back to sqlparser's built-in dialect lookup. |
| 330 | +pub fn datafusion_dialect_from_str( |
| 331 | + dialect_name: impl AsRef<str>, |
| 332 | +) -> Option<Box<dyn Dialect>> { |
| 333 | + let dialect_name = dialect_name.as_ref(); |
| 334 | + match dialect_name.to_lowercase().as_str() { |
| 335 | + "generic" | "datafusion" => Some(Box::new(DATAFUSION_DIALECT)), |
| 336 | + _ => sqlparser::dialect::dialect_from_str(dialect_name), |
| 337 | + } |
| 338 | +} |
0 commit comments