Skip to content

Commit a019bf9

Browse files
Add XMLPARSE expression
1 parent 0bfafea commit a019bf9

5 files changed

Lines changed: 157 additions & 0 deletions

File tree

src/ast/mod.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,43 @@ impl fmt::Display for CaseWhen {
844844
}
845845
}
846846

847+
/// Parsing mode for `XMLPARSE`.
848+
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)]
849+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
850+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
851+
pub enum XmlParseMode {
852+
/// `CONTENT`
853+
Content,
854+
/// `DOCUMENT`
855+
Document,
856+
}
857+
858+
impl fmt::Display for XmlParseMode {
859+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
860+
match self {
861+
XmlParseMode::Content => write!(f, "CONTENT"),
862+
XmlParseMode::Document => write!(f, "DOCUMENT"),
863+
}
864+
}
865+
}
866+
867+
/// `XMLPARSE(CONTENT|DOCUMENT expr)`.
868+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
869+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
870+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
871+
pub struct XmlParseExpr {
872+
/// Parsing mode.
873+
pub mode: XmlParseMode,
874+
/// Expression to parse as XML.
875+
pub expr: Box<Expr>,
876+
}
877+
878+
impl fmt::Display for XmlParseExpr {
879+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
880+
write!(f, "XMLPARSE({} {})", self.mode, self.expr)
881+
}
882+
}
883+
847884
/// An SQL expression of any type.
848885
///
849886
/// # Semantics / Type Checking
@@ -1233,6 +1270,8 @@ pub enum Expr {
12331270
/// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`),
12341271
/// as well as constants of other types (a non-standard PostgreSQL extension).
12351272
TypedString(TypedString),
1273+
/// XML parse expression: `XMLPARSE(CONTENT|DOCUMENT expr)`.
1274+
XmlParse(XmlParseExpr),
12361275
/// Scalar function call e.g. `LEFT(foo, 5)`
12371276
Function(Function),
12381277
/// `CASE [<operand>] WHEN <condition> THEN <result> ... [ELSE <result>] END`
@@ -2015,6 +2054,7 @@ impl fmt::Display for Expr {
20152054
Expr::Value(v) => write!(f, "{v}"),
20162055
Expr::Prefixed { prefix, value } => write!(f, "{prefix} {value}"),
20172056
Expr::TypedString(ts) => ts.fmt(f),
2057+
Expr::XmlParse(xml_parse) => xml_parse.fmt(f),
20182058
Expr::Function(fun) => fun.fmt(f),
20192059
Expr::Case {
20202060
case_token: _,

src/ast/spans.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1555,6 +1555,7 @@ impl Spanned for Expr {
15551555
Expr::Nested(expr) => expr.span(),
15561556
Expr::Value(value) => value.span(),
15571557
Expr::TypedString(TypedString { value, .. }) => value.span(),
1558+
Expr::XmlParse(xml_parse) => xml_parse.expr.span(),
15581559
Expr::Function(function) => function.span(),
15591560
Expr::GroupingSets(vec) => {
15601561
union_spans(vec.iter().flat_map(|i| i.iter().map(|k| k.span())))

src/parser/mod.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2527,8 +2527,43 @@ impl<'a> Parser<'a> {
25272527
})
25282528
}
25292529

2530+
/// Consume the next token if it is an unquoted word matching `expected`
2531+
/// (case-insensitive), returning whether it was consumed.
2532+
fn parse_unquoted_word_value(&mut self, expected: &str) -> bool {
2533+
if let Token::Word(word) = &self.peek_token_ref().token {
2534+
if word.quote_style.is_none() && word.value.eq_ignore_ascii_case(expected) {
2535+
self.next_token();
2536+
return true;
2537+
}
2538+
}
2539+
false
2540+
}
2541+
2542+
fn parse_xml_parse_mode(&mut self) -> Result<XmlParseMode, ParserError> {
2543+
if self.parse_unquoted_word_value("content") {
2544+
Ok(XmlParseMode::Content)
2545+
} else if self.parse_unquoted_word_value("document") {
2546+
Ok(XmlParseMode::Document)
2547+
} else {
2548+
self.expected_ref("CONTENT or DOCUMENT", self.peek_token_ref())
2549+
}
2550+
}
2551+
2552+
fn parse_xmlparse_expr(&mut self) -> Result<Expr, ParserError> {
2553+
self.expect_token(&Token::LParen)?;
2554+
let mode = self.parse_xml_parse_mode()?;
2555+
let expr = Box::new(self.parse_expr()?);
2556+
self.expect_token(&Token::RParen)?;
2557+
Ok(Expr::XmlParse(XmlParseExpr { mode, expr }))
2558+
}
2559+
25302560
/// Parse a function call expression named by `name` and return it as an `Expr`.
25312561
pub fn parse_function(&mut self, name: ObjectName) -> Result<Expr, ParserError> {
2562+
if self.dialect.supports_xml_expressions()
2563+
&& Self::is_simple_unquoted_object_name(&name, "xmlparse")
2564+
{
2565+
return self.parse_xmlparse_expr();
2566+
}
25322567
self.parse_function_call(name).map(Expr::Function)
25332568
}
25342569

tests/sqlparser_common.rs

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19014,6 +19014,46 @@ fn parse_aliased_function_args() {
1901419014
.is_err());
1901519015
}
1901619016

19017+
#[test]
19018+
fn parse_xmlparse() {
19019+
let dialects = all_dialects_where(|d| d.supports_xml_expressions());
19020+
19021+
let select = dialects.verified_only_select_with_canonical(
19022+
"SELECT xmlparse(content '<a/>')",
19023+
"SELECT XMLPARSE(CONTENT '<a/>')",
19024+
);
19025+
match &select.projection[0] {
19026+
UnnamedExpr(Expr::XmlParse(XmlParseExpr { mode, .. })) => {
19027+
assert_eq!(*mode, XmlParseMode::Content);
19028+
}
19029+
item => panic!("expected XmlParse expression, got {item:?}"),
19030+
}
19031+
19032+
let select = dialects.verified_only_select_with_canonical(
19033+
"SELECT xmlparse(document '<a/>')",
19034+
"SELECT XMLPARSE(DOCUMENT '<a/>')",
19035+
);
19036+
match &select.projection[0] {
19037+
UnnamedExpr(Expr::XmlParse(XmlParseExpr { mode, .. })) => {
19038+
assert_eq!(*mode, XmlParseMode::Document);
19039+
}
19040+
item => panic!("expected XmlParse expression, got {item:?}"),
19041+
}
19042+
19043+
// XMLPARSE requires a CONTENT or DOCUMENT mode.
19044+
assert!(dialects
19045+
.parse_sql_statements("SELECT xmlparse('<a/>')")
19046+
.is_err());
19047+
19048+
// On dialects without XML support, `xmlparse` stays a regular function
19049+
// and the special `CONTENT <expr>` syntax is rejected.
19050+
let others = all_dialects_except(|d| d.supports_xml_expressions());
19051+
others.verified_only_select("SELECT xmlparse(1)");
19052+
assert!(others
19053+
.parse_sql_statements("SELECT xmlparse(content '<a/>')")
19054+
.is_err());
19055+
}
19056+
1901719057
/// Regression test for the 2^N parse-time blowup in `parse_compound_expr` on
1901819058
/// inputs like `IF a0.a1...aN.#`. The parse is run on a worker thread and the
1901919059
/// main thread asserts that it reports back within a generous timeout. Post-fix

tests/sqlparser_postgres.rs

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3951,6 +3951,47 @@ fn parse_xmlforest_aliased_arguments() {
39513951
);
39523952
}
39533953

3954+
#[test]
3955+
fn parse_xmlparse() {
3956+
// Regression statements covering CONTENT and DOCUMENT modes with valid,
3957+
// invalid, and edge-case XML strings (parsing only, no semantic checks).
3958+
let statements = [
3959+
"SELECT XMLPARSE(CONTENT '')",
3960+
"SELECT XMLPARSE(CONTENT ' ')",
3961+
"SELECT XMLPARSE(CONTENT 'abc')",
3962+
"SELECT XMLPARSE(CONTENT '<abc>x</abc>')",
3963+
"SELECT XMLPARSE(CONTENT '<invalidentity>&</invalidentity>')",
3964+
"SELECT XMLPARSE(CONTENT '<undefinedentity>&idontexist;</undefinedentity>')",
3965+
"SELECT XMLPARSE(CONTENT '<twoerrors>&idontexist;</unbalanced>')",
3966+
"SELECT XMLPARSE(CONTENT '<nosuchprefix:tag/>')",
3967+
"SELECT XMLPARSE(DOCUMENT ' ')",
3968+
"SELECT XMLPARSE(DOCUMENT 'abc')",
3969+
"SELECT XMLPARSE(DOCUMENT '<abc>x</abc>')",
3970+
"SELECT XMLPARSE(DOCUMENT '<invalidentity>&</abc>')",
3971+
"SELECT XMLPARSE(DOCUMENT '<undefinedentity>&idontexist;</abc>')",
3972+
"SELECT XMLPARSE(DOCUMENT '<twoerrors>&idontexist;</unbalanced>')",
3973+
"SELECT XMLPARSE(DOCUMENT '<nosuchprefix:tag/>')",
3974+
];
3975+
for sql in statements {
3976+
pg().verified_stmt(sql);
3977+
}
3978+
3979+
// Lowercase keywords canonicalize to uppercase.
3980+
let select = pg().verified_only_select_with_canonical(
3981+
"SELECT xmlparse(content '<a/>')",
3982+
"SELECT XMLPARSE(CONTENT '<a/>')",
3983+
);
3984+
assert_eq!(
3985+
expr_from_projection(&select.projection[0]),
3986+
&Expr::XmlParse(XmlParseExpr {
3987+
mode: XmlParseMode::Content,
3988+
expr: Box::new(Expr::Value(
3989+
Value::SingleQuotedString("<a/>".to_string()).into()
3990+
)),
3991+
})
3992+
);
3993+
}
3994+
39543995
#[test]
39553996
fn parse_xml_typed_string() {
39563997
// xml '...' should parse as a TypedString on PostgreSQL and Generic

0 commit comments

Comments
 (0)