Skip to content

Commit 24d5a77

Browse files
committed
Fix failover parser-state handling and add targeted regression tests
1 parent 4a36399 commit 24d5a77

4 files changed

Lines changed: 295 additions & 3 deletions

File tree

src/Microsoft.Data.SqlClient/src/Microsoft/Data/SqlClient/Connection/SqlConnectionInternal.cs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3635,7 +3635,15 @@ private void LoginWithFailover(
36353635
continue;
36363636
}
36373637

3638-
if (IsDoNotRetryConnectError(sqlex) || timeout.IsExpired)
3638+
// If state != closed, indicates that the parser encountered an error while
3639+
// processing the login response (e.g. an explicit error token). Transient
3640+
// network errors that impact connectivity will result in parser state being
3641+
// closed. Only network-level errors should trigger failover alternation;
3642+
// login-phase errors (like transient errors) should be thrown so they can
3643+
// be handled by the outer ConnectRetryCount loop.
3644+
if (_parser?.State is not TdsParserState.Closed ||
3645+
IsDoNotRetryConnectError(sqlex) ||
3646+
timeout.IsExpired)
36393647
{
36403648
// No more time to try again.
36413649
// Caller will call LoginFailure()

src/Microsoft.Data.SqlClient/tests/UnitTests/SimulatedServerTests/ConnectionFailoverTests.cs

Lines changed: 278 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
using System;
66
using System.Data;
7+
using System.Threading.Tasks;
78
using Microsoft.Data.SqlClient.Connection;
89
using Microsoft.Data.SqlClient.Tests.Common;
910
using Microsoft.SqlServer.TDS.Servers;
@@ -172,7 +173,7 @@ public void NetworkTimeout_ShouldFail()
172173
InitialCatalog = "master",// Required for failover partner to work
173174
ConnectTimeout = 1,
174175
ConnectRetryInterval = 1,
175-
ConnectRetryCount = 0, // Disable retry
176+
ConnectRetryCount = 0, // Disable retry
176177
Encrypt = false,
177178
MultiSubnetFailover = false,
178179
#if NETFRAMEWORK
@@ -336,6 +337,10 @@ public void NetworkError_WithUserProvidedPartner_RetryEnabled_ShouldConnectToFai
336337
Assert.Equal(1, failoverServer.PreLoginCount - failoverServer.AbandonedPreLoginCount);
337338
}
338339

340+
/// <summary>
341+
/// Verifies login-phase transient SQL errors are retried on the primary endpoint and
342+
/// do not trigger failover-partner alternation.
343+
/// </summary>
339344
[Theory]
340345
[InlineData(40613)]
341346
[InlineData(42108)]
@@ -372,6 +377,8 @@ public void TransientFault_ShouldConnectToPrimary(uint errorCode)
372377
using SqlConnection connection = new(builder.ConnectionString);
373378

374379
// Act
380+
// First login receives the transient token; outer connect retry opens a fresh parser
381+
// and retries against the same primary endpoint.
375382
connection.Open();
376383

377384
// Assert
@@ -380,6 +387,8 @@ public void TransientFault_ShouldConnectToPrimary(uint errorCode)
380387

381388
// Failures should prompt the client to return to the original server, resulting in a login count of 2
382389
Assert.Equal(2, server.PreLoginCount - server.AbandonedPreLoginCount);
390+
// The fix: login-phase errors must NOT trigger failover alternation
391+
Assert.Equal(0, failoverServer.PreLoginCount);
383392
}
384393

385394
[Theory]
@@ -430,6 +439,10 @@ public void TransientFault_RetryDisabled_ShouldFail(uint errorCode)
430439
Assert.Fail();
431440
}
432441

442+
/// <summary>
443+
/// Verifies user-provided failover partner does not change behavior for login-phase
444+
/// transient SQL errors; retries stay on primary.
445+
/// </summary>
433446
[Theory]
434447
[InlineData(40613)]
435448
[InlineData(42108)]
@@ -467,6 +480,8 @@ public void TransientFault_WithUserProvidedPartner_ShouldConnectToPrimary(uint e
467480
using SqlConnection connection = new(builder.ConnectionString);
468481

469482
// Act
483+
// Even with a configured partner, this path should use outer connect retry
484+
// against primary rather than alternation inside LoginWithFailover.
470485
connection.Open();
471486

472487
// Assert
@@ -475,6 +490,8 @@ public void TransientFault_WithUserProvidedPartner_ShouldConnectToPrimary(uint e
475490

476491
// Failures should prompt the client to return to the original server, resulting in a login count of 2
477492
Assert.Equal(2, server.PreLoginCount - server.AbandonedPreLoginCount);
493+
// The fix: login-phase errors must NOT trigger failover alternation
494+
Assert.Equal(0, failoverServer.PreLoginCount);
478495
}
479496

480497
[Theory]
@@ -591,5 +608,265 @@ public void TransientFault_IgnoreServerProvidedFailoverPartner_ShouldConnectToUs
591608
// 1 for the failover connection
592609
Assert.Equal(1, failoverServer.PreLoginCount - failoverServer.AbandonedPreLoginCount);
593610
}
611+
612+
/// <summary>
613+
/// Async parity for primary-only retry behavior on login-phase transient SQL errors.
614+
/// </summary>
615+
[Theory]
616+
[InlineData(40613)]
617+
[InlineData(42108)]
618+
[InlineData(42109)]
619+
public async Task TransientFault_Async_ShouldConnectToPrimary_NotFailover(uint errorCode)
620+
{
621+
// Async parity for TransientFault_ShouldConnectToPrimary.
622+
// A transient login-token error must be retried against the primary;
623+
// the failover partner must never be contacted.
624+
625+
using TdsServer failoverServer = new(
626+
new TdsServerArguments
627+
{
628+
FailoverPartner = "localhost,1234",
629+
});
630+
failoverServer.Start();
631+
632+
using TransientTdsErrorTdsServer server = new(
633+
new TransientTdsErrorTdsServerArguments()
634+
{
635+
IsEnabledTransientError = true,
636+
Number = errorCode,
637+
FailoverPartner = $"localhost,{failoverServer.EndPoint.Port}",
638+
});
639+
server.Start();
640+
641+
SqlConnectionStringBuilder builder = new()
642+
{
643+
DataSource = $"localhost,{server.EndPoint.Port}",
644+
InitialCatalog = "master",
645+
ConnectTimeout = 30,
646+
ConnectRetryInterval = 1,
647+
Encrypt = false,
648+
Pooling = false,
649+
};
650+
using SqlConnection connection = new(builder.ConnectionString);
651+
652+
// Asserts async open follows the same retry and failover-selection rules as sync.
653+
await connection.OpenAsync();
654+
655+
Assert.Equal(ConnectionState.Open, connection.State);
656+
Assert.Equal($"localhost,{server.EndPoint.Port}", connection.DataSource);
657+
Assert.Equal(2, server.PreLoginCount - server.AbandonedPreLoginCount);
658+
// The fix: login-phase errors must NOT trigger failover alternation
659+
Assert.Equal(0, failoverServer.PreLoginCount);
660+
}
661+
662+
/// <summary>
663+
/// Async parity with user-provided partner: login-phase transient SQL errors should
664+
/// still retry on primary without failover alternation.
665+
/// </summary>
666+
[Theory]
667+
[InlineData(40613)]
668+
[InlineData(42108)]
669+
[InlineData(42109)]
670+
public async Task TransientFault_WithUserProvidedPartner_Async_ShouldConnectToPrimary_NotFailover(uint errorCode)
671+
{
672+
// Async parity for TransientFault_WithUserProvidedPartner_ShouldConnectToPrimary.
673+
// Even with a user-provided failover partner, a login-token error must not
674+
// cause alternation to the failover server.
675+
676+
using TdsServer failoverServer = new(
677+
new TdsServerArguments
678+
{
679+
FailoverPartner = "localhost:1234",
680+
});
681+
failoverServer.Start();
682+
683+
using TransientTdsErrorTdsServer server = new(
684+
new TransientTdsErrorTdsServerArguments()
685+
{
686+
IsEnabledTransientError = true,
687+
Number = errorCode,
688+
FailoverPartner = $"localhost:{failoverServer.EndPoint.Port}",
689+
});
690+
server.Start();
691+
692+
SqlConnectionStringBuilder builder = new()
693+
{
694+
DataSource = $"localhost,{server.EndPoint.Port}",
695+
InitialCatalog = "master",
696+
ConnectTimeout = 30,
697+
ConnectRetryInterval = 1,
698+
Encrypt = false,
699+
FailoverPartner = $"localhost:{failoverServer.EndPoint.Port}",
700+
};
701+
using SqlConnection connection = new(builder.ConnectionString);
702+
703+
// Asserts async open with explicit partner still avoids failover alternation.
704+
await connection.OpenAsync();
705+
706+
Assert.Equal(ConnectionState.Open, connection.State);
707+
Assert.Equal($"localhost,{server.EndPoint.Port}", connection.DataSource);
708+
Assert.Equal(2, server.PreLoginCount - server.AbandonedPreLoginCount);
709+
// The fix: login-phase errors must NOT trigger failover alternation
710+
Assert.Equal(0, failoverServer.PreLoginCount);
711+
}
712+
713+
/// <summary>
714+
/// Verifies pooled connections are not cleared and failover is not attempted when a
715+
/// login-phase transient SQL error occurs with a user-provided failover partner.
716+
/// </summary>
717+
[Theory]
718+
[InlineData(40613)]
719+
[InlineData(42108)]
720+
[InlineData(42109)]
721+
public void TransientFault_WithUserProvidedPartner_Pooling_ShouldNotClearPool_NotFailover(uint errorCode)
722+
{
723+
// With pooling enabled and a user-provided failover partner, a transient
724+
// login-token error must not clear the pool and must not contact the failover server.
725+
726+
using TdsServer failoverServer = new(
727+
new TdsServerArguments
728+
{
729+
FailoverPartner = "localhost,1234",
730+
});
731+
failoverServer.Start();
732+
733+
// Start with errors disabled so the pool warms up successfully.
734+
using TransientTdsErrorTdsServer server = new(
735+
new TransientTdsErrorTdsServerArguments()
736+
{
737+
FailoverPartner = $"localhost,{failoverServer.EndPoint.Port}",
738+
});
739+
server.Start();
740+
741+
SqlConnectionStringBuilder builder = new()
742+
{
743+
DataSource = $"localhost,{server.EndPoint.Port}",
744+
InitialCatalog = "master",
745+
ConnectTimeout = 30,
746+
ConnectRetryInterval = 1,
747+
Encrypt = SqlConnectionEncryptOption.Optional,
748+
FailoverPartner = $"localhost,{failoverServer.EndPoint.Port}",
749+
Pooling = true,
750+
};
751+
752+
// Warm up the pool.
753+
using SqlConnection warmup = new(builder.ConnectionString);
754+
warmup.Open();
755+
warmup.Close();
756+
757+
// Enable the transient error for the next login attempt.
758+
server.SetErrorBehavior(true, errorCode);
759+
760+
// ConnectRetryCount > 0 (default 1) so the client retries and succeeds.
761+
using SqlConnection connection = new(builder.ConnectionString);
762+
connection.Open();
763+
764+
Assert.Equal(ConnectionState.Open, connection.State);
765+
Assert.Equal($"localhost,{server.EndPoint.Port}", connection.DataSource);
766+
// Failover server must never have been contacted.
767+
Assert.Equal(0, failoverServer.PreLoginCount);
768+
}
769+
770+
/// <summary>
771+
/// Verifies ConnectRetryCount=0 propagates login-phase transient SQL errors immediately
772+
/// and never attempts failover alternation.
773+
/// </summary>
774+
[Theory]
775+
[InlineData(40613)]
776+
[InlineData(42108)]
777+
[InlineData(42109)]
778+
public void TransientFault_RetryDisabled_WithUserProvidedPartner_ShouldFail_NotFailover(uint errorCode)
779+
{
780+
// When ConnectRetryCount = 0 and the server returns a login-phase token error,
781+
// the exception must propagate immediately and the failover partner must not be
782+
// contacted (parser state is not Closed, so the new guard must kick in).
783+
784+
using TdsServer failoverServer = new(
785+
new TdsServerArguments
786+
{
787+
FailoverPartner = "localhost:1234",
788+
});
789+
failoverServer.Start();
790+
791+
using TransientTdsErrorTdsServer server = new(
792+
new TransientTdsErrorTdsServerArguments()
793+
{
794+
IsEnabledTransientError = true,
795+
Number = errorCode,
796+
FailoverPartner = $"localhost:{failoverServer.EndPoint.Port}",
797+
});
798+
server.Start();
799+
800+
SqlConnectionStringBuilder builder = new()
801+
{
802+
DataSource = $"localhost,{server.EndPoint.Port}",
803+
InitialCatalog = "master",
804+
ConnectTimeout = 30,
805+
ConnectRetryInterval = 1,
806+
ConnectRetryCount = 0,
807+
Encrypt = false,
808+
FailoverPartner = $"localhost:{failoverServer.EndPoint.Port}",
809+
};
810+
using SqlConnection connection = new(builder.ConnectionString);
811+
812+
// No outer connect retry is allowed, so the first transient error should surface.
813+
SqlException ex = Assert.Throws<SqlException>(() => connection.Open());
814+
815+
Assert.Equal((int)errorCode, ex.Number);
816+
Assert.Equal(ConnectionState.Closed, connection.State);
817+
// The parser was not closed (login-phase error), so the failover alternation branch
818+
// must not have been entered.
819+
Assert.Equal(0, failoverServer.PreLoginCount);
820+
}
821+
822+
/// <summary>
823+
/// Isolates the parser-state guard by using a non-fatal login error token: without the
824+
/// guard, LoginWithFailover alternates to partner; with the guard, retry stays on primary.
825+
/// </summary>
826+
[Fact]
827+
public void NonFatalTransientLoginError_WithUserProvidedPartner_ShouldRetryPrimary_NotFailover()
828+
{
829+
// This test isolates the parser-state guard added to LoginWithFailover.
830+
// We emit a transient login error with non-fatal severity so the connection
831+
// is not automatically doomed/broken by existing breakConnection logic.
832+
833+
using TdsServer failoverServer = new(
834+
new TdsServerArguments
835+
{
836+
FailoverPartner = "localhost,1234",
837+
});
838+
failoverServer.Start();
839+
840+
using TransientTdsErrorTdsServer server = new(
841+
new TransientTdsErrorTdsServerArguments()
842+
{
843+
IsEnabledTransientError = true,
844+
Number = 40613,
845+
// Use non-fatal severity so break/doom logic does not short-circuit the path.
846+
ErrorClass = 16,
847+
RepeatCount = 1,
848+
FailoverPartner = $"localhost,{failoverServer.EndPoint.Port}",
849+
});
850+
server.Start();
851+
852+
SqlConnectionStringBuilder builder = new()
853+
{
854+
DataSource = $"localhost,{server.EndPoint.Port}",
855+
InitialCatalog = "master",
856+
ConnectTimeout = 30,
857+
ConnectRetryInterval = 1,
858+
Encrypt = false,
859+
Pooling = false,
860+
FailoverPartner = $"localhost,{failoverServer.EndPoint.Port}",
861+
};
862+
863+
using SqlConnection connection = new(builder.ConnectionString);
864+
connection.Open();
865+
866+
Assert.Equal(ConnectionState.Open, connection.State);
867+
Assert.Equal($"localhost,{server.EndPoint.Port}", connection.DataSource);
868+
Assert.Equal(2, server.PreLoginCount - server.AbandonedPreLoginCount);
869+
Assert.Equal(0, failoverServer.PreLoginCount - failoverServer.AbandonedPreLoginCount);
870+
}
594871
}
595872
}

src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.Servers/TransientTdsErrorTdsServer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ private TDSMessageCollection GenerateErrorMessage(TDSMessage request)
8989
TDSUtilities.Log(Arguments.Log, "Request", request);
9090

9191
// Prepare ERROR token with the denial details
92-
TDSErrorToken errorToken = new TDSErrorToken(errorNumber, 1, 20, errorMessage);
92+
TDSErrorToken errorToken = new TDSErrorToken(errorNumber, 1, Arguments.ErrorClass, errorMessage);
9393

9494
// Log response
9595
TDSUtilities.Log(Arguments.Log, "Response", errorToken);

src/Microsoft.Data.SqlClient/tests/tools/TDS/TDS.Servers/TransientTdsErrorTdsServerArguments.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,12 @@ public class TransientTdsErrorTdsServerArguments : TdsServerArguments
2525
/// The number of times the transient error should be raised.
2626
/// </summary>
2727
public int RepeatCount { get; set; } = 1;
28+
29+
/// <summary>
30+
/// Error class (severity) to emit in ERROR token.
31+
/// Fatal starts at 20 (TdsEnums.FATAL_ERROR_CLASS), so use values below 20
32+
/// to avoid automatic break/doom behavior in the client.
33+
/// </summary>
34+
public byte ErrorClass { get; set; } = 20;
2835
}
2936
}

0 commit comments

Comments
 (0)