diff --git a/mindsdb_sql_parser/lexer.py b/mindsdb_sql_parser/lexer.py index 7a76fe0..b63d065 100644 --- a/mindsdb_sql_parser/lexer.py +++ b/mindsdb_sql_parser/lexer.py @@ -346,11 +346,13 @@ def INTEGER(self, t): @_(r"'(?:\\.|[^'])*(?:''(?:\\.|[^'])*)*'") def QUOTE_STRING(self, t): + t.raw_value = t.value t.value = t.value.replace('\\"', '"').replace("\\'", "'").replace("''", "'") return t @_(r'"(?:\\.|[^"])*"') def DQUOTE_STRING(self, t): + t.raw_value = t.value t.value = t.value.replace('\\"', '"').replace("\\'", "'") return t diff --git a/mindsdb_sql_parser/parser.py b/mindsdb_sql_parser/parser.py index 5980296..d6a9fc5 100644 --- a/mindsdb_sql_parser/parser.py +++ b/mindsdb_sql_parser/parser.py @@ -1308,7 +1308,7 @@ def from_table_aliased(self, p): def from_table(self, p): query = NativeQuery( integration=p.identifier, - query=tokens_to_string(p.raw_query) + query=tokens_to_string(p.raw_query, use_raw_values=True) ) return query diff --git a/mindsdb_sql_parser/utils.py b/mindsdb_sql_parser/utils.py index 8420249..0d94313 100644 --- a/mindsdb_sql_parser/utils.py +++ b/mindsdb_sql_parser/utils.py @@ -59,7 +59,7 @@ def to_single_line(text): return text -def tokens_to_string(tokens): +def tokens_to_string(tokens, use_raw_values: bool = False): # converts list of token (after lexer) to original string line_num = tokens[0].lineno @@ -81,10 +81,15 @@ def tokens_to_string(tokens): # filling space between tokens line += ' '*(token.index - shift - len(line)) + if use_raw_values and hasattr(token, 'raw_value'): + value = token.raw_value or token.value + else: + value = token.value + # add token - line += token.value + line += value - last_pos = token.index + len(token.value) + last_pos = token.index + len(value) # last line content += line diff --git a/sly/lex.py b/sly/lex.py index 91a53c1..48d6ae7 100644 --- a/sly/lex.py +++ b/sly/lex.py @@ -74,9 +74,9 @@ class Token(object): ''' Representation of a single token. ''' - __slots__ = ('type', 'value', 'lineno', 'index', 'end') + __slots__ = ('type', 'value', 'lineno', 'index', 'end', 'raw_value') def __repr__(self): - return f'Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index}, end={self.end})' + return f'Token(type={self.type!r}, value={self.value!r}, lineno={self.lineno}, index={self.index}, end={self.end}, raw_value={self.raw_value!r})' class TokenStr(str): @staticmethod diff --git a/tests/test_mindsdb/test_selects.py b/tests/test_mindsdb/test_selects.py index 2c6ee71..51dc86d 100644 --- a/tests/test_mindsdb/test_selects.py +++ b/tests/test_mindsdb/test_selects.py @@ -45,7 +45,7 @@ def test_select_status_column(self): def test_native_query(self): sql = """ SELECT status - FROM int1 (select q from p from r) + FROM int1 (select q from p from r where x = 'test''test') group by 1 limit 1 """ @@ -54,7 +54,7 @@ def test_native_query(self): targets=[Identifier('status')], from_table=NativeQuery( integration=Identifier('int1'), - query='select q from p from r' + query="select q from p from r where x = 'test''test'" ), limit=Constant(1), group_by=[Constant(1)]