Skip to content

Commit 30dabf8

Browse files
author
Alexander Beedie
committed
Reduce cost of parse_data_type calls
1 parent e81eb14 commit 30dabf8

1 file changed

Lines changed: 163 additions & 33 deletions

File tree

src/parser/mod.rs

Lines changed: 163 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1387,7 +1387,7 @@ impl<'a> Parser<'a> {
13871387
debug!("parsing expr");
13881388
let mut expr = self.parse_prefix()?;
13891389

1390-
expr = self.parse_compound_expr(expr, vec![])?;
1390+
expr = self.parse_compound_expr(expr, None)?;
13911391

13921392
debug!("prefix: {expr:?}");
13931393
loop {
@@ -1705,34 +1705,42 @@ impl<'a> Parser<'a> {
17051705
// name is not followed by a string literal, but in fact in PostgreSQL it is a valid
17061706
// expression that should parse as the column name "date".
17071707
let loc = self.peek_token_ref().span.start;
1708-
let opt_expr = self.maybe_parse(|parser| {
1709-
match parser.parse_data_type()? {
1710-
DataType::Interval { .. } => parser.parse_interval(),
1711-
// PostgreSQL allows almost any identifier to be used as custom data type name,
1712-
// and we support that in `parse_data_type()`. But unlike Postgres we don't
1713-
// have a list of globally reserved keywords (since they vary across dialects),
1714-
// so given `NOT 'a' LIKE 'b'`, we'd accept `NOT` as a possible custom data type
1715-
// name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of
1716-
// an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the
1717-
// `type 'string'` syntax for the custom data types at all.
1718-
DataType::Custom(..) => parser_err!("dummy", loc),
1719-
// MySQL supports using the `BINARY` keyword as a cast to binary type.
1720-
DataType::Binary(..) if self.dialect.supports_binary_kw_as_cast() => {
1721-
Ok(Expr::Cast {
1722-
kind: CastKind::Cast,
1723-
expr: Box::new(parser.parse_expr()?),
1724-
data_type: DataType::Binary(None),
1725-
array: false,
1726-
format: None,
1727-
})
1708+
// Short-circuit: only attempt typed-string parsing if the next token
1709+
// is a known data type keyword. Since DataType::Custom is rejected
1710+
// below anyway, there is no point speculatively parsing (and then
1711+
// dropping) a full DataType for every non-type-keyword token.
1712+
let opt_expr = if self.peek_known_data_type_keyword() {
1713+
self.maybe_parse(|parser| {
1714+
match parser.parse_data_type()? {
1715+
DataType::Interval { .. } => parser.parse_interval(),
1716+
// PostgreSQL allows almost any identifier to be used as custom data type name,
1717+
// and we support that in `parse_data_type()`. But unlike Postgres we don't
1718+
// have a list of globally reserved keywords (since they vary across dialects),
1719+
// so given `NOT 'a' LIKE 'b'`, we'd accept `NOT` as a possible custom data type
1720+
// name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of
1721+
// an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the
1722+
// `type 'string'` syntax for the custom data types at all.
1723+
DataType::Custom(..) => parser_err!("dummy", loc),
1724+
// MySQL supports using the `BINARY` keyword as a cast to binary type.
1725+
DataType::Binary(..) if self.dialect.supports_binary_kw_as_cast() => {
1726+
Ok(Expr::Cast {
1727+
kind: CastKind::Cast,
1728+
expr: Box::new(parser.parse_expr()?),
1729+
data_type: DataType::Binary(None),
1730+
array: false,
1731+
format: None,
1732+
})
1733+
}
1734+
data_type => Ok(Expr::TypedString(TypedString {
1735+
data_type,
1736+
value: parser.parse_value()?,
1737+
uses_odbc_syntax: false,
1738+
})),
17281739
}
1729-
data_type => Ok(Expr::TypedString(TypedString {
1730-
data_type,
1731-
value: parser.parse_value()?,
1732-
uses_odbc_syntax: false,
1733-
})),
1734-
}
1735-
})?;
1740+
})?
1741+
} else {
1742+
None
1743+
};
17361744

17371745
if let Some(expr) = opt_expr {
17381746
return Ok(expr);
@@ -1956,7 +1964,7 @@ impl<'a> Parser<'a> {
19561964
pub fn parse_compound_expr(
19571965
&mut self,
19581966
root: Expr,
1959-
mut chain: Vec<AccessExpr>,
1967+
mut chain: Option<Vec<AccessExpr>>,
19601968
) -> Result<Expr, ParserError> {
19611969
let mut ending_wildcard: Option<TokenWithSpan> = None;
19621970
loop {
@@ -5980,7 +5988,11 @@ impl<'a> Parser<'a> {
59805988
}
59815989
}
59825990

5983-
if let Some(next_data_type) = self.maybe_parse(parse_data_type_no_default)? {
5991+
if let Some(next_data_type) = if matches!(self.peek_token_ref().token, Token::Word(_)) {
5992+
self.maybe_parse(parse_data_type_no_default)?
5993+
} else {
5994+
None
5995+
} {
59845996
let token = self.token_at(data_type_idx);
59855997

59865998
// We ensure that the token is a `Word` token, and not other special tokens.
@@ -8931,8 +8943,12 @@ impl<'a> Parser<'a> {
89318943
let data_type = if self.is_column_type_sqlite_unspecified() {
89328944
DataType::Unspecified
89338945
} else if optional_data_type {
8934-
self.maybe_parse(|parser| parser.parse_data_type())?
8935-
.unwrap_or(DataType::Unspecified)
8946+
if matches!(self.peek_token_ref().token, Token::Word(_)) {
8947+
self.maybe_parse(|parser| parser.parse_data_type())?
8948+
.unwrap_or(DataType::Unspecified)
8949+
} else {
8950+
DataType::Unspecified
8951+
}
89368952
} else {
89378953
self.parse_data_type()?
89388954
};
@@ -11734,6 +11750,116 @@ impl<'a> Parser<'a> {
1173411750
Ok(values)
1173511751
}
1173611752

11753+
/// Returns true if the next token is a keyword that can start a known
11754+
/// (non-custom) data type. This is useful for short-circuiting speculative
11755+
/// `parse_data_type` calls: if the next token is not a data type keyword,
11756+
/// we can skip the attempt entirely and avoid allocating a `DataType` value
11757+
/// that would be immediately dropped on failure.
11758+
///
11759+
/// Note: this does NOT cover custom data types (arbitrary identifiers).
11760+
/// It only checks for built-in SQL type keywords.
11761+
fn peek_known_data_type_keyword(&self) -> bool {
11762+
match &self.peek_token_ref().token {
11763+
Token::Word(w) => matches!(
11764+
w.keyword,
11765+
Keyword::BOOLEAN
11766+
| Keyword::BOOL
11767+
| Keyword::FLOAT
11768+
| Keyword::REAL
11769+
| Keyword::FLOAT4
11770+
| Keyword::FLOAT32
11771+
| Keyword::FLOAT64
11772+
| Keyword::FLOAT8
11773+
| Keyword::DOUBLE
11774+
| Keyword::TINYINT
11775+
| Keyword::INT2
11776+
| Keyword::SMALLINT
11777+
| Keyword::MEDIUMINT
11778+
| Keyword::INT
11779+
| Keyword::INT4
11780+
| Keyword::INT8
11781+
| Keyword::INT16
11782+
| Keyword::INT32
11783+
| Keyword::INT64
11784+
| Keyword::INT128
11785+
| Keyword::INT256
11786+
| Keyword::INTEGER
11787+
| Keyword::BIGINT
11788+
| Keyword::HUGEINT
11789+
| Keyword::UBIGINT
11790+
| Keyword::UHUGEINT
11791+
| Keyword::USMALLINT
11792+
| Keyword::UTINYINT
11793+
| Keyword::UINT8
11794+
| Keyword::UINT16
11795+
| Keyword::UINT32
11796+
| Keyword::UINT64
11797+
| Keyword::UINT128
11798+
| Keyword::UINT256
11799+
| Keyword::VARCHAR
11800+
| Keyword::NVARCHAR
11801+
| Keyword::CHARACTER
11802+
| Keyword::CHAR
11803+
| Keyword::CLOB
11804+
| Keyword::BINARY
11805+
| Keyword::VARBINARY
11806+
| Keyword::BLOB
11807+
| Keyword::TINYBLOB
11808+
| Keyword::MEDIUMBLOB
11809+
| Keyword::LONGBLOB
11810+
| Keyword::BYTES
11811+
| Keyword::BIT
11812+
| Keyword::VARBIT
11813+
| Keyword::UUID
11814+
| Keyword::DATE
11815+
| Keyword::DATE32
11816+
| Keyword::DATETIME
11817+
| Keyword::DATETIME64
11818+
| Keyword::TIMESTAMP
11819+
| Keyword::TIMESTAMPTZ
11820+
| Keyword::TIMESTAMP_NTZ
11821+
| Keyword::TIME
11822+
| Keyword::TIMETZ
11823+
| Keyword::INTERVAL
11824+
| Keyword::JSON
11825+
| Keyword::JSONB
11826+
| Keyword::REGCLASS
11827+
| Keyword::STRING
11828+
| Keyword::FIXEDSTRING
11829+
| Keyword::TEXT
11830+
| Keyword::TINYTEXT
11831+
| Keyword::MEDIUMTEXT
11832+
| Keyword::LONGTEXT
11833+
| Keyword::BYTEA
11834+
| Keyword::NUMERIC
11835+
| Keyword::DECIMAL
11836+
| Keyword::DEC
11837+
| Keyword::BIGNUMERIC
11838+
| Keyword::BIGDECIMAL
11839+
| Keyword::ENUM
11840+
| Keyword::ENUM8
11841+
| Keyword::ENUM16
11842+
| Keyword::SET
11843+
| Keyword::ARRAY
11844+
| Keyword::STRUCT
11845+
| Keyword::UNION
11846+
| Keyword::NULLABLE
11847+
| Keyword::LOWCARDINALITY
11848+
| Keyword::MAP
11849+
| Keyword::NESTED
11850+
| Keyword::TUPLE
11851+
| Keyword::TRIGGER
11852+
| Keyword::ANY
11853+
| Keyword::TABLE
11854+
| Keyword::SIGNED
11855+
| Keyword::UNSIGNED
11856+
| Keyword::TSVECTOR
11857+
| Keyword::TSQUERY
11858+
),
11859+
_ => false,
11860+
}
11861+
}
11862+
1173711863
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
1173811864
pub fn parse_data_type(&mut self) -> Result<DataType, ParserError> {
1173911865
let (ty, trailing_bracket) = self.parse_data_type_helper()?;
@@ -12983,7 +13109,11 @@ impl<'a> Parser<'a> {
1298313109
if self.consume_token(&Token::LParen) {
1298413110
let cols = self.parse_comma_separated(|p| {
1298513111
let name = p.parse_identifier()?;
12986-
let data_type = p.maybe_parse(|p| p.parse_data_type())?;
13112+
let data_type = if matches!(p.peek_token_ref().token, Token::Word(_)) {
13113+
p.maybe_parse(|p| p.parse_data_type())?
13114+
} else {
13115+
None
13116+
};
1298713117
Ok(TableAliasColumnDef { name, data_type })
1298813118
})?;
1298913119
self.expect_token(&Token::RParen)?;

0 commit comments

Comments
 (0)