Skip to content

Commit 654a010

Browse files
authored
Avoid using SqlXml for serialization (#38506)
1 parent 3cb71a6 commit 654a010

3 files changed

Lines changed: 79 additions & 75 deletions

File tree

src/EFCore.SqlServer/Storage/Internal/SqlServerStringTypeMapping.cs

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,7 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33

44
using System.Data;
5-
using System.Data.SqlTypes;
65
using System.Text;
7-
using System.Xml;
86
using Microsoft.Data.SqlClient;
97
using Microsoft.EntityFrameworkCore.Storage.Json;
108

@@ -21,15 +19,6 @@ public class SqlServerStringTypeMapping : StringTypeMapping
2119
private const int UnicodeMax = 4000;
2220
private const int AnsiMax = 8000;
2321

24-
// DTD processing is explicitly prohibited and no external resolver is used so that a malicious
25-
// payload is rejected rather than processed.
26-
private static readonly XmlReaderSettings XmlFragmentSettings = new()
27-
{
28-
ConformanceLevel = ConformanceLevel.Fragment,
29-
DtdProcessing = DtdProcessing.Prohibit,
30-
XmlResolver = null
31-
};
32-
3322
private static readonly CaseInsensitiveValueComparer CaseInsensitiveValueComparer = new();
3423

3524
private readonly bool _isUtf16;
@@ -169,15 +158,17 @@ protected override void ConfigureParameter(DbParameter parameter)
169158

170159
if (_sqlDbType == SqlDbType.Xml)
171160
{
172-
// SqlClient only sends a parameter as 'xml' (rather than 'nvarchar(max)') when its value is a SqlXml.
173-
// Sending it as 'xml' lets SQL Server honor any encoding declared in the XML prolog (e.g. utf-8), which
174-
// otherwise fails with "unable to switch the encoding". A fragment-conformant reader is used so that the
175-
// content forms that the 'xml' store type accepts - empty strings, text, and multiple top-level nodes -
176-
// continue to round-trip.
161+
// xml is a max-length type; pin the size to unbounded so SqlClient does not infer it from the
162+
// value length (which would otherwise vary per value in logs and the query cache).
163+
parameter.Size = -1;
164+
165+
// SqlClient sends a string parameter as 'nvarchar(max)' even when its SqlDbType is 'xml', which makes
166+
// SQL Server reject an XML prolog that declares a non-UTF-16 encoding (e.g. utf-8) with "unable to switch
167+
// the encoding". Only the XML declaration conflicts, and it can only appear at the very start, so it is
168+
// sliced off and the rest of the value is sent verbatim.
177169
if (value is string xml)
178170
{
179-
using var reader = XmlReader.Create(new StringReader(xml), XmlFragmentSettings);
180-
parameter.Value = new SqlXml(reader);
171+
parameter.Value = RemoveXmlProlog(xml);
181172
}
182173

183174
return;
@@ -220,6 +211,30 @@ protected override void ConfigureParameter(DbParameter parameter)
220211
}
221212
}
222213

214+
private static string RemoveXmlProlog(string xml)
215+
{
216+
var start = 0;
217+
while (start < xml.Length && char.IsWhiteSpace(xml[start]))
218+
{
219+
start++;
220+
}
221+
222+
// An XML declaration starts with "<?xml" followed by mandatory whitespace. If present, remove everything
223+
// up to and including the closing "?>" and return the remainder verbatim.
224+
if (start + 5 < xml.Length
225+
&& string.CompareOrdinal(xml, start, "<?xml", 0, 5) == 0
226+
&& char.IsWhiteSpace(xml[start + 5]))
227+
{
228+
var end = xml.IndexOf("?>", start + 6, StringComparison.Ordinal);
229+
if (end >= 0)
230+
{
231+
return xml[(end + 2)..];
232+
}
233+
}
234+
235+
return xml;
236+
}
237+
223238
/// <summary>
224239
/// This is an internal API that supports the Entity Framework Core infrastructure and not subject to
225240
/// the same compatibility standards as public APIs. It may be changed or removed without notice in

test/EFCore.SqlServer.FunctionalTests/BuiltInDataTypesSqlServerTest.cs

Lines changed: 27 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -3820,20 +3820,31 @@ FROM INFORMATION_SCHEMA.COLUMNS
38203820
// The grinning-face emoji is outside the BMP (a UTF-16 surrogate pair, four UTF-8 bytes) and the euro sign
38213821
// is a single UTF-16 code unit but three UTF-8 bytes; both are represented differently in UTF-16 than in
38223822
// UTF-8 and are lost when an xml value is sent to the server as a non-Unicode string, which makes them good
3823-
// probes for the SqlXml/SqlDbType.Xml parameter path.
3823+
// probes for the SqlDbType.Xml parameter path.
38243824
private const string XmlEmoji = "\U0001F600";
38253825
private const string XmlEuro = "\u20AC";
38263826

38273827
[Theory]
3828-
[InlineData("<root>" + XmlEmoji + XmlEuro + "</root>", "<root>" + XmlEmoji + XmlEuro + "</root>")]
3829-
// An explicit non-UTF-16 prolog is accepted because the value is sent as 'xml', not 'nvarchar(max)'.
3830-
[InlineData("<?xml version=\"1.0\" encoding=\"utf-8\"?><root>" + XmlEmoji + "</root>", "<root>" + XmlEmoji + "</root>")]
3831-
[InlineData("<?xml version=\"1.0\" encoding=\"utf-16\"?><root>" + XmlEuro + "</root>", "<root>" + XmlEuro + "</root>")]
3828+
[InlineData(
3829+
"<root>" + XmlEmoji + XmlEuro + "</root>",
3830+
"<root>" + XmlEmoji + XmlEuro + "</root>",
3831+
"<root>" + XmlEmoji + XmlEuro + "</root>")]
3832+
// Only the XML declaration is removed; a following stylesheet PI and the rest of the value are sent verbatim.
3833+
[InlineData(
3834+
"<?xml version=\"1.0\" encoding=\"utf-8\" standalone='yes' ?> <?xml-stylesheet href=\"style.xsl\" type=\"text/xml\"?> <root>" + XmlEmoji + "</root>",
3835+
" <?xml-stylesheet href=\"style.xsl\" type=\"text/xml\"?> <root>" + XmlEmoji + "</root>",
3836+
"<?xml-stylesheet href=\"style.xsl\" type=\"text/xml\"?><root>" + XmlEmoji + "</root>")]
3837+
// The leading whitespace and the declaration are removed when the value is sent.
3838+
[InlineData(
3839+
" <?xml version=\"1.1\" encoding=\"utf-16\"?> <root>" + XmlEuro + "</root>",
3840+
" <root>" + XmlEuro + "</root>",
3841+
"<root>" + XmlEuro + "</root>")]
38323842
// Content forms that the 'xml' store type accepts beyond a single well-formed document.
3833-
[InlineData("", "")]
3834-
[InlineData("text fragment", "text fragment")]
3835-
[InlineData("<a/><b/>", "<a /><b />")]
3836-
public async Task Xml_value_round_trips(string value, string expected)
3843+
[InlineData("", "", "")]
3844+
[InlineData("text fragment", "text fragment", "text fragment")]
3845+
// The content is sent verbatim, but the server expands self-closing tags when the xml column is read back.
3846+
[InlineData("<a/><b/>", "<a/><b/>", "<a /><b />")]
3847+
public async Task Xml_value_round_trips(string value, string expected, string roundTripped)
38373848
{
38383849
await using var context = CreateContext();
38393850

@@ -3845,7 +3856,7 @@ public async Task Xml_value_round_trips(string value, string expected)
38453856
context.ChangeTracker.Clear();
38463857

38473858
// xml columns cannot be compared directly in a WHERE clause, so the row is fetched by its key. Coalescing
3848-
// the column with the original value sends that value as an 'xml' parameter, exercising the SqlXml
3859+
// the column with the original value sends that value as an 'xml' parameter, exercising the prolog-removal
38493860
// parameter path in a query in addition to the insert above.
38503861
var query = context.Set<XmlTestDocument>()
38513862
.Where(d => d.Id == id)
@@ -3860,14 +3871,15 @@ SELECT COALESCE([x].[Content], @value)
38603871
FROM [XmlTestDocument] AS [x]
38613872
WHERE [x].[Id] = @id
38623873
""",
3863-
query.ToQueryString());
3874+
query.ToQueryString(),
3875+
ignoreLineEndingDifferences: true);
38643876

3865-
var roundTripped = await query.SingleAsync();
3866-
Assert.Equal(expected, roundTripped);
3877+
var actual = await query.SingleAsync();
3878+
Assert.Equal(roundTripped, actual);
38673879

38683880
AssertSql(
38693881
$"""
3870-
@p0='{expected}' (DbType = Xml)
3882+
@p0='{expected}' (Size = -1) (DbType = Xml)
38713883
38723884
SET IMPLICIT_TRANSACTIONS OFF;
38733885
SET NOCOUNT ON;
@@ -3877,7 +3889,7 @@ OUTPUT INSERTED.[Id]
38773889
""",
38783890
//
38793891
$"""
3880-
@value='{expected}' (DbType = Xml)
3892+
@value='{expected}' (Size = -1) (DbType = Xml)
38813893
@id='{id}'
38823894
38833895
SELECT TOP(2) COALESCE([x].[Content], @value)
@@ -3886,40 +3898,6 @@ FROM [XmlTestDocument] AS [x]
38863898
""");
38873899
}
38883900

3889-
[Fact]
3890-
public async Task Xml_value_with_dtd_payload_is_rejected()
3891-
{
3892-
await using var context = CreateContext();
3893-
3894-
// A "billion laughs" entity-expansion payload: the reader must reject the DTD rather than expand it.
3895-
const string maliciousXml =
3896-
"<?xml version=\"1.0\"?>"
3897-
+ "<!DOCTYPE lolz [<!ENTITY lol \"lol\">"
3898-
+ "<!ENTITY lol2 \"&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;&lol;\">"
3899-
+ "<!ENTITY lol3 \"&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;&lol2;\">]>"
3900-
+ "<lolz>&lol3;</lolz>";
3901-
3902-
context.Add(new XmlTestDocument { Content = maliciousXml });
3903-
3904-
var exception = await Assert.ThrowsAnyAsync<Exception>(() => context.SaveChangesAsync());
3905-
Assert.True(
3906-
HasXmlException(exception),
3907-
$"Expected an {nameof(XmlException)} in the exception chain but found: {exception}");
3908-
3909-
static bool HasXmlException(Exception exception)
3910-
{
3911-
for (var current = exception; current is not null; current = current.InnerException)
3912-
{
3913-
if (current is XmlException)
3914-
{
3915-
return true;
3916-
}
3917-
}
3918-
3919-
return false;
3920-
}
3921-
}
3922-
39233901
private class XmlTestDocument
39243902
{
39253903
public int Id { get; set; }

test/EFCore.SqlServer.Tests/Storage/SqlServerTypeMappingTest.cs

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
using Microsoft.EntityFrameworkCore.Design.Internal;
88
using Microsoft.EntityFrameworkCore.SqlServer.Infrastructure.Internal;
99
using Microsoft.EntityFrameworkCore.SqlServer.Storage.Internal;
10+
using Microsoft.EntityFrameworkCore.Storage.Internal;
1011

1112
// ReSharper disable InconsistentNaming
1213
namespace Microsoft.EntityFrameworkCore.Storage;
@@ -379,12 +380,18 @@ public virtual void Char_Utf8()
379380
}
380381

381382
[Theory]
382-
[InlineData("<r>a</r>")]
383-
[InlineData("<?xml version=\"1.0\" encoding=\"utf-8\"?><r>a</r>")]
384-
[InlineData("")]
385-
[InlineData("text fragment")]
386-
[InlineData("<a/><b/>")]
387-
public virtual void Xml_parameter_is_sent_as_SqlXml(string value)
383+
[InlineData("<r>a</r>", "<r>a</r>")]
384+
// The XML declaration is removed so the value can be sent as a string without an encoding conflict.
385+
[InlineData("<?xml version=\"1.0\" encoding=\"utf-8\"?><r>a</r>", "<r>a</r>")]
386+
// The declaration's closing '>' is found even when a space precedes it, and any leading whitespace is dropped too.
387+
[InlineData(" <?xml version=\"1.0\" encoding=\"utf-8\" ?> <r>a</r>", " <r>a</r>")]
388+
// Only the declaration is removed; a following stylesheet PI and the rest are kept verbatim.
389+
[InlineData("<?xml version=\"1.1\" encoding=\"utf-8\"?><?xml-stylesheet href=\"s.xsl\"?><r>a</r>", "<?xml-stylesheet href=\"s.xsl\"?><r>a</r>")]
390+
[InlineData("", "")]
391+
[InlineData("text fragment", "text fragment")]
392+
// Content after the prolog is sent verbatim, so the original formatting is preserved.
393+
[InlineData("<a/><b/>", "<a/><b/>")]
394+
public virtual void Xml_parameter_is_sent_as_string_with_prolog_removed(string value, string expected)
388395
{
389396
var mapping = GetMapping("xml");
390397
Assert.Equal("xml", mapping.StoreType);
@@ -393,8 +400,12 @@ public virtual void Xml_parameter_is_sent_as_SqlXml(string value)
393400
var parameter = (SqlParameter)mapping.CreateParameter(command, "foo", value);
394401

395402
Assert.Equal(SqlDbType.Xml, parameter.SqlDbType);
396-
var sqlXml = Assert.IsType<System.Data.SqlTypes.SqlXml>(parameter.Value);
397-
Assert.False(sqlXml.IsNull);
403+
404+
// The value stays a string so it can still be rendered by the diagnostics logger.
405+
Assert.Equal(expected, parameter.Value);
406+
Assert.Equal(
407+
$"foo='{expected}' (Nullable = false) (Size = -1) (DbType = Xml)",
408+
parameter.FormatParameter(logParameterValues: true));
398409
}
399410

400411
[Fact]

0 commit comments

Comments
 (0)