Skip to content

Commit 7183a7b

Browse files
authored
Allowing unescaped keywords as identifiers (#82)
* do not escape some keywords * test * bump version
1 parent 3f0562a commit 7183a7b

3 files changed

Lines changed: 36 additions & 24 deletions

File tree

mindsdb_sql_parser/__about__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
__title__ = 'mindsdb_sql_parser'
22
__package_name__ = 'mindsdb_sql_parser'
3-
__version__ = '0.13.6'
3+
__version__ = '0.13.7'
44
__description__ = "Mindsdb SQL parser"
55
__email__ = "jorge@mindsdb.com"
66
__author__ = 'MindsDB Inc'

mindsdb_sql_parser/ast/select/identifier.py

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,29 +21,20 @@ def path_str_to_parts(path_str: str):
2121
return parts, is_quoted
2222

2323

24-
RESERVED_KEYWORDS = {
25-
'PERSIST', 'IF', 'EXISTS', 'NULLS', 'FIRST', 'LAST',
26-
'ORDER', 'BY', 'GROUP', 'PARTITION'
24+
# Here is a hardcoded set of keywords that can be used as identifiers without escaping.
25+
# For example, in a query like this: select {keyword} from tbl
26+
# If there is a need to update this list, an example code to retrieve all keywords can be found here in v0.13.6
27+
keywords_to_escape = {
28+
"VALUES", "DESCRIBE", "THEN", "WRITE", "WITH", "INSERT", "DROP", "CROSS",
29+
"SET", "ASC", "IS", "IN", "NOT", "INTO", "WINDOW", "ALTER", "WHERE",
30+
"DISTINCT", "USE", "INNER", "COLLATE", "FOR", "USING", "FULL", "LIKE",
31+
"JOIN", "SELECT", "OVER", "CASE", "LIMIT", "END", "UNION", "DELETE",
32+
"HAVING", "OUTER", "FROM", "AS", "CHARACTER", "INTERSECT", "CONVERT",
33+
"WHEN", "OR", "AND", "UPDATE", "BETWEEN", "DESC", "EXPLAIN", "SHOW",
34+
"EXCEPT", "LEFT", "ELSE", "READ", "RIGHT"
2735
}
2836

2937

30-
_reserved_keywords: set[str] = None
31-
32-
33-
def get_reserved_words() -> set[str]:
34-
global _reserved_keywords
35-
36-
if _reserved_keywords is None:
37-
from mindsdb_sql_parser.lexer import MindsDBLexer
38-
39-
_reserved_keywords = RESERVED_KEYWORDS
40-
for word in MindsDBLexer.tokens:
41-
if '_' not in word:
42-
# exclude combinations
43-
_reserved_keywords.add(word)
44-
return _reserved_keywords
45-
46-
4738
class Identifier(ASTNode):
4839
def __init__(
4940
self, path_str=None, parts=None, is_outer=False, with_rollup=False,
@@ -77,15 +68,14 @@ def append(self, other: "Identifier") -> None:
7768
self.is_quoted += other.is_quoted
7869

7970
def iter_parts_str(self):
80-
reserved_words = get_reserved_words()
8171
for part, is_quoted in zip(self.parts, self.is_quoted):
8272
if isinstance(part, Star):
8373
part = str(part)
8474
else:
8575
if (
8676
is_quoted
8777
or not no_wrap_identifier_regex.fullmatch(part)
88-
or part.upper() in reserved_words
78+
or part.upper() in keywords_to_escape
8979
):
9080
part = f'`{part}`'
9181
yield part

tests/test_base_sql/test_select_structure.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -738,7 +738,29 @@ def test_partial_backticks(self):
738738
sql = "SELECT `integration`.`some table`.column"
739739
ast = parse_sql(sql)
740740

741-
expected_ast = Select(targets=[Identifier(parts=['integration', 'some table', 'column']),],)
741+
expected_ast = Select(
742+
targets=[
743+
Identifier(
744+
parts=['integration', 'some table', 'column'],
745+
is_quoted=[True, True, False]
746+
),
747+
],
748+
)
749+
750+
assert ast.to_tree() == expected_ast.to_tree()
751+
assert str(ast) == str(expected_ast)
752+
753+
def test_keyword_escaping(self):
754+
sql = "select ID, `ID`, `VALUES`"
755+
ast = parse_sql(sql)
756+
757+
expected_ast = Select(
758+
targets=[
759+
Identifier(parts=['ID'], is_quoted=[False]),
760+
Identifier(parts=['ID'], is_quoted=[True]),
761+
Identifier(parts=['VALUES'], is_quoted=[True]),
762+
],
763+
)
742764

743765
assert ast.to_tree() == expected_ast.to_tree()
744766
assert str(ast) == str(expected_ast)

0 commit comments

Comments
 (0)