@@ -1387,7 +1387,7 @@ impl<'a> Parser<'a> {
13871387 debug!("parsing expr");
13881388 let mut expr = self.parse_prefix()?;
13891389
1390- expr = self.parse_compound_expr(expr, vec![] )?;
1390+ expr = self.parse_compound_expr(expr, None )?;
13911391
13921392 debug!("prefix: {expr:?}");
13931393 loop {
@@ -1705,34 +1705,42 @@ impl<'a> Parser<'a> {
17051705 // name is not followed by a string literal, but in fact in PostgreSQL it is a valid
17061706 // expression that should parse as the column name "date".
17071707 let loc = self.peek_token_ref().span.start;
1708- let opt_expr = self.maybe_parse(|parser| {
1709- match parser.parse_data_type()? {
1710- DataType::Interval { .. } => parser.parse_interval(),
1711- // PostgreSQL allows almost any identifier to be used as custom data type name,
1712- // and we support that in `parse_data_type()`. But unlike Postgres we don't
1713- // have a list of globally reserved keywords (since they vary across dialects),
1714- // so given `NOT 'a' LIKE 'b'`, we'd accept `NOT` as a possible custom data type
1715- // name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of
1716- // an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the
1717- // `type 'string'` syntax for the custom data types at all.
1718- DataType::Custom(..) => parser_err!("dummy", loc),
1719- // MySQL supports using the `BINARY` keyword as a cast to binary type.
1720- DataType::Binary(..) if self.dialect.supports_binary_kw_as_cast() => {
1721- Ok(Expr::Cast {
1722- kind: CastKind::Cast,
1723- expr: Box::new(parser.parse_expr()?),
1724- data_type: DataType::Binary(None),
1725- array: false,
1726- format: None,
1727- })
1708+ // Short-circuit: only attempt typed-string parsing if the next token
1709+ // is a known data type keyword. Since DataType::Custom is rejected
1710+ // below anyway, there is no point speculatively parsing (and then
1711+ // dropping) a full DataType for every non-type-keyword token.
1712+ let opt_expr = if self.peek_known_data_type_keyword() {
1713+ self.maybe_parse(|parser| {
1714+ match parser.parse_data_type()? {
1715+ DataType::Interval { .. } => parser.parse_interval(),
1716+ // PostgreSQL allows almost any identifier to be used as custom data type name,
1717+ // and we support that in `parse_data_type()`. But unlike Postgres we don't
1718+ // have a list of globally reserved keywords (since they vary across dialects),
1719+ // so given `NOT 'a' LIKE 'b'`, we'd accept `NOT` as a possible custom data type
1720+ // name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of
1721+ // an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the
1722+ // `type 'string'` syntax for the custom data types at all.
1723+ DataType::Custom(..) => parser_err!("dummy", loc),
1724+ // MySQL supports using the `BINARY` keyword as a cast to binary type.
1725+ DataType::Binary(..) if self.dialect.supports_binary_kw_as_cast() => {
1726+ Ok(Expr::Cast {
1727+ kind: CastKind::Cast,
1728+ expr: Box::new(parser.parse_expr()?),
1729+ data_type: DataType::Binary(None),
1730+ array: false,
1731+ format: None,
1732+ })
1733+ }
1734+ data_type => Ok(Expr::TypedString(TypedString {
1735+ data_type,
1736+ value: parser.parse_value()?,
1737+ uses_odbc_syntax: false,
1738+ })),
17281739 }
1729- data_type => Ok(Expr::TypedString(TypedString {
1730- data_type,
1731- value: parser.parse_value()?,
1732- uses_odbc_syntax: false,
1733- })),
1734- }
1735- })?;
1740+ })?
1741+ } else {
1742+ None
1743+ };
17361744
17371745 if let Some(expr) = opt_expr {
17381746 return Ok(expr);
@@ -1956,7 +1964,7 @@ impl<'a> Parser<'a> {
19561964 pub fn parse_compound_expr(
19571965 &mut self,
19581966 root: Expr,
1959- mut chain: Vec<AccessExpr>,
1967+ mut chain: Option< Vec<AccessExpr> >,
19601968 ) -> Result<Expr, ParserError> {
19611969 let mut ending_wildcard: Option<TokenWithSpan> = None;
19621970 loop {
@@ -5980,7 +5988,11 @@ impl<'a> Parser<'a> {
59805988 }
59815989 }
59825990
5983- if let Some(next_data_type) = self.maybe_parse(parse_data_type_no_default)? {
5991+ if let Some(next_data_type) = if matches!(self.peek_token_ref().token, Token::Word(_)) {
5992+ self.maybe_parse(parse_data_type_no_default)?
5993+ } else {
5994+ None
5995+ } {
59845996 let token = self.token_at(data_type_idx);
59855997
59865998 // We ensure that the token is a `Word` token, and not other special tokens.
@@ -8931,8 +8943,12 @@ impl<'a> Parser<'a> {
89318943 let data_type = if self.is_column_type_sqlite_unspecified() {
89328944 DataType::Unspecified
89338945 } else if optional_data_type {
8934- self.maybe_parse(|parser| parser.parse_data_type())?
8935- .unwrap_or(DataType::Unspecified)
8946+ if matches!(self.peek_token_ref().token, Token::Word(_)) {
8947+ self.maybe_parse(|parser| parser.parse_data_type())?
8948+ .unwrap_or(DataType::Unspecified)
8949+ } else {
8950+ DataType::Unspecified
8951+ }
89368952 } else {
89378953 self.parse_data_type()?
89388954 };
@@ -11734,6 +11750,116 @@ impl<'a> Parser<'a> {
1173411750 Ok(values)
1173511751 }
1173611752
11753+ /// Returns true if the next token is a keyword that can start a known
11754+ /// (non-custom) data type. This is useful for short-circuiting speculative
11755+ /// `parse_data_type` calls: if the next token is not a data type keyword,
11756+ /// we can skip the attempt entirely and avoid allocating a `DataType` value
11757+ /// that would be immediately dropped on failure.
11758+ ///
11759+ /// Note: this does NOT cover custom data types (arbitrary identifiers).
11760+ /// It only checks for built-in SQL type keywords.
11761+ fn peek_known_data_type_keyword(&self) -> bool {
11762+ match &self.peek_token_ref().token {
11763+ Token::Word(w) => matches!(
11764+ w.keyword,
11765+ Keyword::BOOLEAN
11766+ | Keyword::BOOL
11767+ | Keyword::FLOAT
11768+ | Keyword::REAL
11769+ | Keyword::FLOAT4
11770+ | Keyword::FLOAT32
11771+ | Keyword::FLOAT64
11772+ | Keyword::FLOAT8
11773+ | Keyword::DOUBLE
11774+ | Keyword::TINYINT
11775+ | Keyword::INT2
11776+ | Keyword::SMALLINT
11777+ | Keyword::MEDIUMINT
11778+ | Keyword::INT
11779+ | Keyword::INT4
11780+ | Keyword::INT8
11781+ | Keyword::INT16
11782+ | Keyword::INT32
11783+ | Keyword::INT64
11784+ | Keyword::INT128
11785+ | Keyword::INT256
11786+ | Keyword::INTEGER
11787+ | Keyword::BIGINT
11788+ | Keyword::HUGEINT
11789+ | Keyword::UBIGINT
11790+ | Keyword::UHUGEINT
11791+ | Keyword::USMALLINT
11792+ | Keyword::UTINYINT
11793+ | Keyword::UINT8
11794+ | Keyword::UINT16
11795+ | Keyword::UINT32
11796+ | Keyword::UINT64
11797+ | Keyword::UINT128
11798+ | Keyword::UINT256
11799+ | Keyword::VARCHAR
11800+ | Keyword::NVARCHAR
11801+ | Keyword::CHARACTER
11802+ | Keyword::CHAR
11803+ | Keyword::CLOB
11804+ | Keyword::BINARY
11805+ | Keyword::VARBINARY
11806+ | Keyword::BLOB
11807+ | Keyword::TINYBLOB
11808+ | Keyword::MEDIUMBLOB
11809+ | Keyword::LONGBLOB
11810+ | Keyword::BYTES
11811+ | Keyword::BIT
11812+ | Keyword::VARBIT
11813+ | Keyword::UUID
11814+ | Keyword::DATE
11815+ | Keyword::DATE32
11816+ | Keyword::DATETIME
11817+ | Keyword::DATETIME64
11818+ | Keyword::TIMESTAMP
11819+ | Keyword::TIMESTAMPTZ
11820+ | Keyword::TIMESTAMP_NTZ
11821+ | Keyword::TIME
11822+ | Keyword::TIMETZ
11823+ | Keyword::INTERVAL
11824+ | Keyword::JSON
11825+ | Keyword::JSONB
11826+ | Keyword::REGCLASS
11827+ | Keyword::STRING
11828+ | Keyword::FIXEDSTRING
11829+ | Keyword::TEXT
11830+ | Keyword::TINYTEXT
11831+ | Keyword::MEDIUMTEXT
11832+ | Keyword::LONGTEXT
11833+ | Keyword::BYTEA
11834+ | Keyword::NUMERIC
11835+ | Keyword::DECIMAL
11836+ | Keyword::DEC
11837+ | Keyword::BIGNUMERIC
11838+ | Keyword::BIGDECIMAL
11839+ | Keyword::ENUM
11840+ | Keyword::ENUM8
11841+ | Keyword::ENUM16
11842+ | Keyword::SET
11843+ | Keyword::ARRAY
11844+ | Keyword::STRUCT
11845+ | Keyword::UNION
11846+ | Keyword::NULLABLE
11847+ | Keyword::LOWCARDINALITY
11848+ | Keyword::MAP
11849+ | Keyword::NESTED
11850+ | Keyword::TUPLE
11851+ | Keyword::TRIGGER
11852+ | Keyword::ANY
11853+ | Keyword::TABLE
11854+ | Keyword::SIGNED
11855+ | Keyword::UNSIGNED
11856+ | Keyword::TSVECTOR
11857+ | Keyword::TSQUERY
11858+ ),
11859+ _ => false,
11860+ }
11861+ }
11862+
1173711863 /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
1173811864 pub fn parse_data_type(&mut self) -> Result<DataType, ParserError> {
1173911865 let (ty, trailing_bracket) = self.parse_data_type_helper()?;
@@ -12983,7 +13109,11 @@ impl<'a> Parser<'a> {
1298313109 if self.consume_token(&Token::LParen) {
1298413110 let cols = self.parse_comma_separated(|p| {
1298513111 let name = p.parse_identifier()?;
12986- let data_type = p.maybe_parse(|p| p.parse_data_type())?;
13112+ let data_type = if matches!(p.peek_token_ref().token, Token::Word(_)) {
13113+ p.maybe_parse(|p| p.parse_data_type())?
13114+ } else {
13115+ None
13116+ };
1298713117 Ok(TableAliasColumnDef { name, data_type })
1298813118 })?;
1298913119 self.expect_token(&Token::RParen)?;
0 commit comments