Skip to content

Commit 58bfd94

Browse files
[6.1] Test | Fix Transient Fault handling and other flaky unit tests (#4080) (#4269)
1 parent 0616345 commit 58bfd94

12 files changed

Lines changed: 181 additions & 97 deletions

File tree

eng/pipelines/common/templates/steps/configure-sql-server-macos-step.yml

Lines changed: 66 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
# See the LICENSE file in the project root for more information. #
55
#################################################################################
66

7-
# This step installs the latest SQL Server 2022 onto the macOS host and
8-
# configures it for use.
7+
# This step installs the latest SQL Server 2022 onto the macOS host as a docker
8+
# container and configures it for use.
99

1010
parameters:
1111
- name: password
@@ -21,39 +21,55 @@ steps:
2121
- bash: |
2222
# The "user" pipeline variable conflicts with homebrew, causing errors during install. Set it back to the pipeline user.
2323
USER=`whoami`
24+
2425
SQLCMD_ERRORS=$(Agent.TempDirectory)/sqlcmd_err.log
25-
echo $SQLCMD_ERRORS
26+
echo "Errors will be written to: $SQLCMD_ERRORS"
27+
28+
# Configure the prompt to show the current timestamp so we can see how long each command takes.
29+
export PS4='+ [$(date "+%Y-%m-%d %H:%M:%S")] '
30+
set -x
2631
32+
# Install Docker CLI (not Desktop -- Colima provides the daemon) and SQLCMD tools.
2733
brew install colima
28-
brew install --cask docker
34+
brew install docker
2935
brew tap microsoft/mssql-release https://github.com/Microsoft/homebrew-mssql-release
3036
brew update
3137
HOMEBREW_ACCEPT_EULA=Y brew install mssql-tools18
32-
colima start --arch x86_64
38+
39+
# Start Colima with Virtualization.framework for x86_64 binary translation
40+
# on Apple Silicon. Rosetta/binfmt emulation is enabled by default in
41+
# Colima >= 0.8 when using --vm-type vz, which is dramatically faster than
42+
# --arch x86_64 (full QEMU VM emulation).
43+
# Requires macOS >= 13 (Ventura).
44+
colima start --vm-type vz --cpu 4 --memory 4
3345
docker --version
34-
docker pull mcr.microsoft.com/mssql/server:2022-latest
35-
46+
docker pull --platform linux/amd64 mcr.microsoft.com/mssql/server:2022-latest
47+
48+
# Disable xtrace before any command that handles the SA password so it is
49+
# not echoed (expanded) into pipeline logs.
50+
{ set +x; } 2>/dev/null
51+
3652
# Password for the SA user (required)
37-
MSSQL_SA_PW=${{ parameters.password }}
53+
MSSQL_SA_PW="${{ parameters.password }}"
54+
55+
docker run --platform linux/amd64 -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=$MSSQL_SA_PW" -p 1433:1433 -p 1434:1434 --name sql1 --hostname sql1 -d mcr.microsoft.com/mssql/server:2022-latest
3856
39-
docker run -e "ACCEPT_EULA=Y" -e "MSSQL_SA_PASSWORD=$MSSQL_SA_PW" -p 1433:1433 -p 1434:1434 --name sql1 --hostname sql1 -d mcr.microsoft.com/mssql/server:2022-latest
40-
41-
sleep 5
57+
sleep 10
4258
4359
docker ps -a
4460
4561
# Connect to the SQL Server container and get its version.
4662
#
47-
# It can take a while for the docker container to start listening and be
48-
# ready for connections, so we will wait for up to 2 minutes, checking every
49-
# 3 seconds.
50-
51-
# Wait 3 seconds between attempts.
52-
delay=3
53-
54-
# Try up to 40 times (2 minutes) to connect.
55-
maxAttempts=40
56-
63+
# With Rosetta 2 emulation, SQL Server starts much faster than under full
64+
# QEMU emulation, but it can still take a minute or two. We allow up to
65+
# 6 minutes (72 attempts x 5 seconds) as a generous upper bound.
66+
67+
# Wait 5 seconds between attempts.
68+
delay=5
69+
70+
# Try up to 72 times (~6 minutes) to connect.
71+
maxAttempts=72
72+
5773
# Attempt counter.
5874
attempt=1
5975
@@ -63,18 +79,29 @@ steps:
6379
while [ $attempt -le $maxAttempts ]
6480
do
6581
echo "Waiting for SQL Server to start (attempt #$attempt of $maxAttempts)..."
66-
67-
sqlcmd -S 127.0.0.1 -No -U sa -P $MSSQL_SA_PW -Q "SELECT @@VERSION" >> $SQLCMD_ERRORS 2>&1
68-
82+
83+
# -C trusts the self-signed certificate inside the container.
84+
sqlcmd -S 127.0.0.1 -No -C -U sa -P "$MSSQL_SA_PW" -Q "SELECT @@VERSION" >> $SQLCMD_ERRORS 2>&1
85+
6986
# If the command was successful, then the SQL Server is ready.
7087
if [ $? -eq 0 ]; then
7188
ready=1
7289
break
7390
fi
74-
91+
92+
# Verify the container is still running; no point retrying if it crashed.
93+
if ! docker ps --filter "name=^/sql1$" --filter "status=running" --format '{{.Names}}' | grep -Fxq 'sql1'; then
94+
echo "ERROR: sql1 container is no longer running."
95+
docker ps -a --filter "name=^/sql1$"
96+
echo "--- Container logs ---"
97+
docker logs sql1 2>&1 | tail -50
98+
rm -f $SQLCMD_ERRORS
99+
exit 1
100+
fi
101+
75102
# Increment the attempt counter.
76103
((attempt++))
77-
104+
78105
# Wait before trying again.
79106
sleep $delay
80107
done
@@ -83,8 +110,13 @@ steps:
83110
if [ $ready -eq 0 ]
84111
then
85112
# No, so report the error(s) and exit.
86-
echo Cannot connect to SQL Server; installation aborted; errors were:
113+
echo "Cannot connect to SQL Server after $maxAttempts attempts; installation aborted."
114+
echo "--- sqlcmd errors ---"
87115
cat $SQLCMD_ERRORS
116+
echo "--- Container status ---"
117+
docker ps -a --filter "name=^/sql1$"
118+
echo "--- Container logs (last 80 lines) ---"
119+
docker logs sql1 2>&1 | tail -80
88120
rm -f $SQLCMD_ERRORS
89121
exit 1
90122
fi
@@ -93,22 +125,22 @@ steps:
93125
94126
echo "Use sqlcmd to show which IP addresses are being listened on..."
95127
echo 0.0.0.0
96-
sqlcmd -S 0.0.0.0 -No -U sa -P $MSSQL_SA_PW -Q "SELECT @@VERSION" -l 2
128+
sqlcmd -S 0.0.0.0 -No -C -U sa -P "$MSSQL_SA_PW" -Q "SELECT @@VERSION" -l 2
97129
echo 127.0.0.1
98-
sqlcmd -S 127.0.0.1 -No -U sa -P $MSSQL_SA_PW -Q "SELECT @@VERSION" -l 2
130+
sqlcmd -S 127.0.0.1 -No -C -U sa -P "$MSSQL_SA_PW" -Q "SELECT @@VERSION" -l 2
99131
echo ::1
100-
sqlcmd -S ::1 -No -U sa -P $MSSQL_SA_PW -Q "SELECT @@VERSION" -l 2
132+
sqlcmd -S ::1 -No -C -U sa -P "$MSSQL_SA_PW" -Q "SELECT @@VERSION" -l 2
101133
echo localhost
102-
sqlcmd -S localhost -No -U sa -P $MSSQL_SA_PW -Q "SELECT @@VERSION" -l 2
134+
sqlcmd -S localhost -No -C -U sa -P "$MSSQL_SA_PW" -Q "SELECT @@VERSION" -l 2
103135
echo "(sqlcmd default / not specified)"
104-
sqlcmd -No -U sa -P $MSSQL_SA_PW -Q "SELECT @@VERSION" -l 2
136+
sqlcmd -No -C -U sa -P "$MSSQL_SA_PW" -Q "SELECT @@VERSION" -l 2
105137
106138
echo "Configuring Dedicated Administer Connections to allow remote connections..."
107-
sqlcmd -S 127.0.0.1 -No -U sa -P $MSSQL_SA_PW -Q "sp_configure 'remote admin connections', 1; RECONFIGURE;"
139+
sqlcmd -S 127.0.0.1 -No -C -U sa -P "$MSSQL_SA_PW" -Q "sp_configure 'remote admin connections', 1; RECONFIGURE;"
108140
if [ $? = 1 ]
109141
then
110142
echo "Error configuring DAC for remote access."
111-
exit $errstatus
143+
exit 1
112144
else
113145
echo "Configuration complete."
114146
fi

src/Microsoft.Data.SqlClient/tests/UnitTests/Microsoft/Data/SqlTypes/SqlTypeWorkaroundsTests.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ public void SqlBinaryCtor_NullInput()
6363
};
6464

6565
[Theory]
66-
[MemberData(nameof(SqlDecimalExtractData_NonNullInput_Data))]
66+
[MemberData(nameof(SqlDecimalExtractData_NonNullInput_Data), DisableDiscoveryEnumeration = true)]
6767
public void SqlDecimalExtractData_NonNullInput(SqlDecimal input)
6868
{
6969
// Act
@@ -156,7 +156,7 @@ public void SqlGuidCtor_ValidInput(byte[] input)
156156
};
157157

158158
[Theory]
159-
[MemberData(nameof(SqlMoneyCtor_Data))]
159+
[MemberData(nameof(SqlMoneyCtor_Data), DisableDiscoveryEnumeration = true)]
160160
public void SqlMoneyCtor(long input, SqlMoney expected)
161161
{
162162
// Act
@@ -177,7 +177,7 @@ public void SqlMoneyCtor(long input, SqlMoney expected)
177177
};
178178

179179
[Theory]
180-
[MemberData(nameof(SqlMoneyToSqlInternalRepresentation_NonNullInput_Data))]
180+
[MemberData(nameof(SqlMoneyToSqlInternalRepresentation_NonNullInput_Data), DisableDiscoveryEnumeration = true)]
181181
public void SqlMoneyToSqlInternalRepresentation_NonNullInput(SqlMoney input, long expected)
182182
{
183183
// Act

src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111

1212
namespace Microsoft.Data.SqlClient.UnitTests.SimulatedServerTests
1313
{
14-
[Trait("Category", "flaky")]
1514
[Collection("SimulatedServerTests")]
1615
public class ConnectionFailoverTests
1716
{
@@ -71,7 +70,7 @@ public void TransientFault_NoFailover_DoesNotClearPool(uint errorCode)
7170
Assert.Equal($"localhost,{initialServer.EndPoint.Port}", secondConnection.DataSource);
7271

7372
// 1 for the initial connection, 2 for the second connection
74-
Assert.Equal(3, initialServer.PreLoginCount);
73+
Assert.Equal(3, initialServer.Login7Count);
7574
// A failover should not be triggered, so prelogin count to the failover server should be 0
7675
Assert.Equal(0, failoverServer.PreLoginCount);
7776
}
@@ -219,6 +218,7 @@ public void NetworkDelay_ShouldConnectToPrimary()
219218
InitialCatalog = "master",// Required for failover partner to work
220219
ConnectTimeout = 5,
221220
Encrypt = false,
221+
Pooling = false, // Disable pooling to ensure a fresh connection attempt is made
222222
MultiSubnetFailover = false,
223223
#if NETFRAMEWORK
224224
TransparentNetworkIPResolution = false,
@@ -275,6 +275,7 @@ public void NetworkError_WithUserProvidedPartner_RetryDisabled_ShouldConnectToFa
275275
ConnectRetryCount = 0, // Disable retry
276276
FailoverPartner = $"localhost,{failoverServer.EndPoint.Port}", // User provided failover partner
277277
Encrypt = false,
278+
Pooling = false, // Disable pooling to ensure a fresh connection attempt is made on failover
278279
};
279280
using SqlConnection connection = new(builder.ConnectionString);
280281
try
@@ -326,6 +327,9 @@ public void NetworkError_WithUserProvidedPartner_RetryEnabled_ShouldConnectToFai
326327
ConnectRetryInterval = 1,
327328
FailoverPartner = $"localhost,{failoverServer.EndPoint.Port}", // User provided failover partner
328329
Encrypt = false,
330+
#if NETFRAMEWORK
331+
TransparentNetworkIPResolution = false,
332+
#endif
329333
};
330334
using SqlConnection connection = new(builder.ConnectionString);
331335
// Act
@@ -337,7 +341,11 @@ public void NetworkError_WithUserProvidedPartner_RetryEnabled_ShouldConnectToFai
337341
Assert.Equal(ConnectionState.Open, connection.State);
338342
Assert.Equal($"localhost,{failoverServer.EndPoint.Port}", connection.DataSource);
339343
Assert.Equal(1, server.PreLoginCount);
340-
Assert.Equal(1, failoverServer.PreLoginCount);
344+
// Login7 is sent to the primary but the client gives up during the
345+
// server-side delay; the counter is still incremented when the
346+
// Login7 message is received.
347+
Assert.Equal(1, server.Login7Count);
348+
Assert.Equal(1, failoverServer.Login7Count);
341349
}
342350

343351
[Theory]
@@ -370,7 +378,8 @@ public void TransientFault_ShouldConnectToPrimary(uint errorCode)
370378
InitialCatalog = "master",
371379
ConnectTimeout = 30,
372380
ConnectRetryInterval = 1,
373-
Encrypt = false
381+
Encrypt = false,
382+
Pooling = false, // Disable pooling to ensure a fresh connection attempt is made
374383
};
375384
using SqlConnection connection = new(builder.ConnectionString);
376385

@@ -382,7 +391,7 @@ public void TransientFault_ShouldConnectToPrimary(uint errorCode)
382391
Assert.Equal($"localhost,{server.EndPoint.Port}", connection.DataSource);
383392

384393
// Failures should prompt the client to return to the original server, resulting in a login count of 2
385-
Assert.Equal(2, server.PreLoginCount);
394+
Assert.Equal(2, server.Login7Count);
386395
}
387396

388397
[Theory]
@@ -468,7 +477,7 @@ public void TransientFault_WithUserProvidedPartner_ShouldConnectToPrimary(uint e
468477
FailoverPartner = $"localhost:{failoverServer.EndPoint.Port}", // User provided failover partner
469478
};
470479
using SqlConnection connection = new(builder.ConnectionString);
471-
480+
472481
// Act
473482
connection.Open();
474483

@@ -477,7 +486,7 @@ public void TransientFault_WithUserProvidedPartner_ShouldConnectToPrimary(uint e
477486
Assert.Equal($"localhost,{server.EndPoint.Port}", connection.DataSource);
478487

479488
// Failures should prompt the client to return to the original server, resulting in a login count of 2
480-
Assert.Equal(2, server.PreLoginCount);
489+
Assert.Equal(2, server.Login7Count);
481490
}
482491

483492
[Theory]
@@ -580,6 +589,10 @@ public void TransientFault_IgnoreServerProvidedFailoverPartner_ShouldConnectToUs
580589
// Dispose of the server to trigger a failover
581590
server.Dispose();
582591

592+
// Clear the pool to ensure the next connection attempt doesn't reuse
593+
// the pooled connection to the now-disposed primary server.
594+
SqlConnection.ClearAllPools();
595+
583596
// Opening a new connection will use the failover partner stored in the pool group.
584597
// This will fail if the server provided failover partner was stored to the pool group.
585598
using SqlConnection failoverConnection = new(builder.ConnectionString);
@@ -593,9 +606,9 @@ public void TransientFault_IgnoreServerProvidedFailoverPartner_ShouldConnectToUs
593606
Assert.Equal(ConnectionState.Open, failoverConnection.State);
594607
Assert.Equal($"localhost,{failoverServer.EndPoint.Port}", failoverConnection.DataSource);
595608
// 1 for the initial connection
596-
Assert.Equal(1, server.PreLoginCount);
609+
Assert.Equal(1, server.Login7Count);
597610
// 1 for the failover connection
598-
Assert.Equal(1, failoverServer.PreLoginCount);
611+
Assert.Equal(1, failoverServer.Login7Count);
599612
}
600613
}
601614
}

src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionRoutingTests.cs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010

1111
namespace Microsoft.Data.SqlClient.UnitTests.SimulatedServerTests
1212
{
13-
[Trait("Category", "flaky")]
1413
[Collection("SimulatedServerTests")]
1514
public class ConnectionRoutingTests
1615
{
@@ -58,8 +57,8 @@ public void TransientFaultAtRoutedLocation_ShouldReturnToGateway(uint errorCode)
5857
Assert.Equal($"localhost,{router.EndPoint.Port}", connection.DataSource);
5958

6059
// Failures should prompt the client to return to the original server, resulting in a login count of 2
61-
Assert.Equal(2, router.PreLoginCount);
62-
Assert.Equal(2, server.PreLoginCount);
60+
Assert.Equal(2, router.Login7Count);
61+
Assert.Equal(2, server.Login7Count);
6362
}
6463

6564
[Theory]

src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionRoutingTestsAzure.cs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010

1111
namespace Microsoft.Data.SqlClient.UnitTests.SimulatedServerTests
1212
{
13-
[Trait("Category", "flaky")]
1413
[Collection("SimulatedServerTests")]
1514
public class ConnectionRoutingTestsAzure : IDisposable
1615
{
@@ -76,8 +75,8 @@ public void TransientFaultAtRoutedLocation_ShouldReturnToGateway(uint errorCode)
7675
Assert.Equal($"localhost,{router.EndPoint.Port}", connection.DataSource);
7776

7877
// Failures should prompt the client to return to the original server, resulting in a login count of 2
79-
Assert.Equal(2, router.PreLoginCount);
80-
Assert.Equal(2, server.PreLoginCount);
78+
Assert.Equal(2, router.Login7Count);
79+
Assert.Equal(2, server.Login7Count);
8180
}
8281

8382
[Theory]

0 commit comments

Comments
 (0)