Skip to content

Commit 93fa56e

Browse files
committed
Add DataFusion specific dialect
1 parent 8bda485 commit 93fa56e

18 files changed

Lines changed: 469 additions & 115 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

datafusion-cli/src/highlighter.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,11 @@ use std::{
2323
};
2424

2525
use datafusion::sql::sqlparser::{
26-
dialect::{Dialect, GenericDialect, dialect_from_str},
26+
dialect::Dialect,
2727
keywords::Keyword,
2828
tokenizer::{Token, Tokenizer},
2929
};
30+
use datafusion_common::{DATAFUSION_DIALECT, datafusion_dialect_from_str};
3031
use datafusion_common::config;
3132
use rustyline::highlight::{CmdKind, Highlighter};
3233

@@ -38,7 +39,8 @@ pub struct SyntaxHighlighter {
3839

3940
impl SyntaxHighlighter {
4041
pub fn new(dialect: &config::Dialect) -> Self {
41-
let dialect = dialect_from_str(dialect).unwrap_or(Box::new(GenericDialect {}));
42+
let dialect =
43+
datafusion_dialect_from_str(dialect).unwrap_or(Box::new(DATAFUSION_DIALECT));
4244
Self { dialect }
4345
}
4446
}

datafusion/common/src/lib.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ pub mod test_util;
6161
pub mod tree_node;
6262
pub mod types;
6363
pub mod utils;
64+
pub mod sql_dialect;
6465
/// Reexport arrow crate
6566
pub use arrow;
6667
pub use column::Column;
@@ -90,6 +91,9 @@ pub use nested_struct::cast_column;
9091
pub use null_equality::NullEquality;
9192
pub use param_value::ParamValues;
9293
pub use scalar::{ScalarType, ScalarValue};
94+
pub use sql_dialect::{
95+
DATAFUSION_DIALECT, DataFusionDialect, datafusion_dialect_from_str,
96+
};
9397
pub use schema_reference::SchemaReference;
9498
pub use spans::{Location, Span, Spans};
9599
pub use stats::{ColumnStatistics, Statistics};
Lines changed: 338 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,338 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use std::any::TypeId;
19+
20+
use sqlparser::ast::{Expr, Map, MapEntry};
21+
use sqlparser::dialect::{Dialect, GenericDialect, Precedence};
22+
use sqlparser::keywords::Keyword;
23+
use sqlparser::parser::{Parser, ParserError};
24+
use sqlparser::tokenizer::Token;
25+
26+
/// DataFusion's default SQL dialect.
27+
///
28+
/// This dialect is intentionally permissive and enables SQL features that
29+
/// DataFusion's parser and planner support across multiple upstream dialects.
30+
/// It is similar to sqlparser's `GenericDialect`, but is defined in DataFusion
31+
/// to decouple behavior from upstream defaults.
32+
#[derive(Debug, Default)]
33+
pub struct DataFusionDialect;
34+
35+
pub const DATAFUSION_DIALECT: DataFusionDialect = DataFusionDialect;
36+
37+
impl Dialect for DataFusionDialect {
38+
fn dialect(&self) -> TypeId {
39+
TypeId::of::<GenericDialect>()
40+
}
41+
42+
fn is_delimited_identifier_start(&self, ch: char) -> bool {
43+
ch == '"' || ch == '`'
44+
}
45+
46+
fn is_identifier_start(&self, ch: char) -> bool {
47+
ch.is_alphabetic() || ch == '_' || ch == '#' || ch == '@'
48+
}
49+
50+
fn is_identifier_part(&self, ch: char) -> bool {
51+
ch.is_alphabetic()
52+
|| ch.is_ascii_digit()
53+
|| ch == '@'
54+
|| ch == '$'
55+
|| ch == '#'
56+
|| ch == '_'
57+
}
58+
59+
fn supports_unicode_string_literal(&self) -> bool {
60+
true
61+
}
62+
63+
fn supports_group_by_expr(&self) -> bool {
64+
true
65+
}
66+
67+
fn supports_group_by_with_modifier(&self) -> bool {
68+
true
69+
}
70+
71+
fn supports_left_associative_joins_without_parens(&self) -> bool {
72+
true
73+
}
74+
75+
fn supports_connect_by(&self) -> bool {
76+
true
77+
}
78+
79+
fn supports_match_recognize(&self) -> bool {
80+
true
81+
}
82+
83+
fn supports_pipe_operator(&self) -> bool {
84+
true
85+
}
86+
87+
fn supports_start_transaction_modifier(&self) -> bool {
88+
true
89+
}
90+
91+
fn supports_window_function_null_treatment_arg(&self) -> bool {
92+
true
93+
}
94+
95+
fn supports_dictionary_syntax(&self) -> bool {
96+
true
97+
}
98+
99+
fn supports_window_clause_named_window_reference(&self) -> bool {
100+
true
101+
}
102+
103+
fn supports_parenthesized_set_variables(&self) -> bool {
104+
true
105+
}
106+
107+
fn supports_select_wildcard_except(&self) -> bool {
108+
true
109+
}
110+
111+
fn support_map_literal_syntax(&self) -> bool {
112+
true
113+
}
114+
115+
fn allow_extract_custom(&self) -> bool {
116+
true
117+
}
118+
119+
fn allow_extract_single_quotes(&self) -> bool {
120+
true
121+
}
122+
123+
fn supports_extract_comma_syntax(&self) -> bool {
124+
true
125+
}
126+
127+
fn supports_create_view_comment_syntax(&self) -> bool {
128+
true
129+
}
130+
131+
fn supports_parens_around_table_factor(&self) -> bool {
132+
true
133+
}
134+
135+
fn supports_values_as_table_factor(&self) -> bool {
136+
true
137+
}
138+
139+
fn supports_create_index_with_clause(&self) -> bool {
140+
true
141+
}
142+
143+
fn supports_explain_with_utility_options(&self) -> bool {
144+
true
145+
}
146+
147+
fn supports_limit_comma(&self) -> bool {
148+
true
149+
}
150+
151+
fn supports_from_first_select(&self) -> bool {
152+
true
153+
}
154+
155+
fn supports_projection_trailing_commas(&self) -> bool {
156+
true
157+
}
158+
159+
fn supports_asc_desc_in_column_definition(&self) -> bool {
160+
true
161+
}
162+
163+
fn supports_try_convert(&self) -> bool {
164+
true
165+
}
166+
167+
fn supports_bitwise_shift_operators(&self) -> bool {
168+
true
169+
}
170+
171+
fn supports_comment_on(&self) -> bool {
172+
true
173+
}
174+
175+
fn supports_load_extension(&self) -> bool {
176+
true
177+
}
178+
179+
fn supports_named_fn_args_with_assignment_operator(&self) -> bool {
180+
true
181+
}
182+
183+
fn supports_struct_literal(&self) -> bool {
184+
true
185+
}
186+
187+
fn supports_empty_projections(&self) -> bool {
188+
true
189+
}
190+
191+
fn supports_nested_comments(&self) -> bool {
192+
true
193+
}
194+
195+
fn supports_user_host_grantee(&self) -> bool {
196+
true
197+
}
198+
199+
fn supports_string_escape_constant(&self) -> bool {
200+
true
201+
}
202+
203+
fn supports_array_typedef_with_brackets(&self) -> bool {
204+
true
205+
}
206+
207+
fn supports_match_against(&self) -> bool {
208+
true
209+
}
210+
211+
fn supports_set_names(&self) -> bool {
212+
true
213+
}
214+
215+
fn supports_comma_separated_set_assignments(&self) -> bool {
216+
true
217+
}
218+
219+
fn supports_filter_during_aggregation(&self) -> bool {
220+
true
221+
}
222+
223+
fn supports_select_wildcard_exclude(&self) -> bool {
224+
true
225+
}
226+
227+
fn supports_data_type_signed_suffix(&self) -> bool {
228+
true
229+
}
230+
231+
fn supports_interval_options(&self) -> bool {
232+
true
233+
}
234+
235+
fn supports_quote_delimited_string(&self) -> bool {
236+
true
237+
}
238+
239+
fn supports_lambda_functions(&self) -> bool {
240+
true
241+
}
242+
243+
fn supports_select_wildcard_replace(&self) -> bool {
244+
true
245+
}
246+
247+
fn supports_select_wildcard_ilike(&self) -> bool {
248+
true
249+
}
250+
251+
fn supports_select_wildcard_rename(&self) -> bool {
252+
true
253+
}
254+
255+
fn supports_optimize_table(&self) -> bool {
256+
true
257+
}
258+
259+
fn supports_install(&self) -> bool {
260+
true
261+
}
262+
263+
fn supports_detach(&self) -> bool {
264+
true
265+
}
266+
267+
fn supports_prewhere(&self) -> bool {
268+
true
269+
}
270+
271+
fn supports_with_fill(&self) -> bool {
272+
true
273+
}
274+
275+
fn supports_limit_by(&self) -> bool {
276+
true
277+
}
278+
279+
fn supports_interpolate(&self) -> bool {
280+
true
281+
}
282+
283+
fn supports_settings(&self) -> bool {
284+
true
285+
}
286+
287+
fn supports_select_format(&self) -> bool {
288+
true
289+
}
290+
291+
fn supports_comment_optimizer_hint(&self) -> bool {
292+
true
293+
}
294+
295+
fn parse_prefix(&self, parser: &mut Parser) -> Option<Result<Expr, ParserError>> {
296+
let token = parser.peek_token_ref();
297+
let is_map_literal = matches!(
298+
token.token,
299+
Token::Word(ref word) if word.keyword == Keyword::MAP
300+
) && matches!(parser.peek_tokens::<2>()[1], Token::LBrace);
301+
302+
if !is_map_literal {
303+
return None;
304+
}
305+
306+
Some(parse_map_literal(parser))
307+
}
308+
}
309+
310+
fn parse_map_literal(parser: &mut Parser) -> Result<Expr, ParserError> {
311+
let _ = parser.parse_keyword(Keyword::MAP);
312+
parser.expect_token(&Token::LBrace)?;
313+
let entries = parser.parse_comma_separated0(parse_map_entry, Token::RBrace)?;
314+
parser.expect_token(&Token::RBrace)?;
315+
Ok(Expr::Map(Map { entries }))
316+
}
317+
318+
fn parse_map_entry(parser: &mut Parser) -> Result<MapEntry, ParserError> {
319+
let key = parser.parse_subexpr(DATAFUSION_DIALECT.prec_value(Precedence::Colon))?;
320+
parser.expect_token(&Token::Colon)?;
321+
let value = parser.parse_expr()?;
322+
Ok(MapEntry {
323+
key: Box::new(key),
324+
value: Box::new(value),
325+
})
326+
}
327+
328+
/// Returns the DataFusion dialect for `generic` (and `datafusion`) and otherwise
329+
/// falls back to sqlparser's built-in dialect lookup.
330+
pub fn datafusion_dialect_from_str(
331+
dialect_name: impl AsRef<str>,
332+
) -> Option<Box<dyn Dialect>> {
333+
let dialect_name = dialect_name.as_ref();
334+
match dialect_name.to_lowercase().as_str() {
335+
"generic" | "datafusion" => Some(Box::new(DATAFUSION_DIALECT)),
336+
_ => sqlparser::dialect::dialect_from_str(dialect_name),
337+
}
338+
}

0 commit comments

Comments
 (0)