From 9887ece059852362a83420b27b86d8176a909d9c Mon Sep 17 00:00:00 2001 From: Bogdan Gavril Date: Wed, 4 Mar 2026 14:07:13 +0000 Subject: [PATCH 1/9] Fix for #2804 --- docs/instance-discovery-rules.md | 324 ++++++++++++++++++ .../Discovery/InstanceDiscoveryManager.cs | 12 +- .../PublicApiTests/InstanceDiscoveryTests.cs | 56 +++ 3 files changed, 388 insertions(+), 4 deletions(-) create mode 100644 docs/instance-discovery-rules.md diff --git a/docs/instance-discovery-rules.md b/docs/instance-discovery-rules.md new file mode 100644 index 0000000000..92f956d55a --- /dev/null +++ b/docs/instance-discovery-rules.md @@ -0,0 +1,324 @@ +# Instance Discovery Rules for MSAL + +## Purpose + +Instance discovery resolves an authority host (e.g., `login.microsoftonline.com`) to its **metadata**: preferred network host, preferred cache host, and a list of aliases. This metadata enables SSO across aliased environments and ensures tokens are sent to the correct endpoint. + +This document describes the rules implemented in MSAL.NET to aid reimplementation in other MSAL libraries. + +--- + +## 1. Core Data Model + +Instance discovery produces a single entry per authority environment: + +``` +InstanceDiscoveryMetadataEntry: + preferred_network: string # host to use for token requests + preferred_cache: string # host to use as cache key + aliases: string[] # all equivalent hosts (used for SSO, cache lookups) +``` + +A successful network response returns an array of these entries: + +```json +{ + "tenant_discovery_endpoint": "https://login.microsoftonline.com/{tenant}/.well-known/openid-configuration", + "metadata": [ + { + "preferred_network": "login.microsoftonline.com", + "preferred_cache": "login.windows.net", + "aliases": ["login.microsoftonline.com", "login.windows.net", "login.microsoft.com", "sts.windows.net"] + } + ] +} +``` + +--- + +## 2. Applicability + +| Authority Type | Instance Discovery Supported | +|---|---| +| AAD (Entra ID) | ✅ Yes | +| B2C | ❌ No — use self-entry | +| ADFS | ❌ No — use self-entry | +| CIAM / dSTS | ❌ No — use self-entry | + +A **self-entry** means: `preferred_network = preferred_cache = aliases = [configured_authority_host]`. + +**Rule:** If the authority type does not support instance discovery, skip all providers and immediately return a self-entry. + +--- + +## 3. Known Environments (Hardcoded) + +MSAL maintains a static, hardcoded list of known cloud environments and their metadata. This avoids network calls for common clouds. + +| Cloud | Aliases | Preferred Network | Preferred Cache | +|---|---|---|---| +| **Public** | `login.microsoftonline.com`, `login.windows.net`, `login.microsoft.com`, `sts.windows.net` | `login.microsoftonline.com` | `login.windows.net` | +| **China** | `login.partner.microsoftonline.cn`, `login.chinacloudapi.cn` | `login.partner.microsoftonline.cn` | `login.partner.microsoftonline.cn` | +| **US Gov** | `login.microsoftonline.us`, `login.usgovcloudapi.net` | `login.microsoftonline.us` | `login.microsoftonline.us` | +| **Germany (legacy)** | `login.microsoftonline.de` | `login.microsoftonline.de` | `login.microsoftonline.de` | +| **US (login-us)** | `login-us.microsoftonline.com` | `login-us.microsoftonline.com` | `login-us.microsoftonline.com` | +| **PPE** | `login.windows-ppe.net`, `sts.windows-ppe.net`, `login.microsoft-ppe.com` | `login.windows-ppe.net` | `login.windows-ppe.net` | +| **Bleu (FR)** | `login.sovcloud-identity.fr` | `login.sovcloud-identity.fr` | `login.sovcloud-identity.fr` | +| **Delos (DE)** | `login.sovcloud-identity.de` | `login.sovcloud-identity.de` | `login.sovcloud-identity.de` | +| **GovSG** | `login.sovcloud-identity.sg` | `login.sovcloud-identity.sg` | `login.sovcloud-identity.sg` | + +**Known environment check for the known metadata provider**: The known metadata provider is only usable when **all** environments already present in the token cache are themselves known. If any cached environment is unknown, the known provider must be skipped (because the network may return richer alias data). + +--- + +## 4. Discovery Endpoint Selection + +When a network call is needed, MSAL must choose which host to call: + +| Authority Host | Discovery Endpoint Host | +|---|---| +| Known environment (e.g., `login.microsoftonline.com`) | Same host: `https://{authority_host}/common/discovery/instance` | +| Unknown environment (e.g., `login.microsoft.new`) | Fallback to default trusted host: `https://login.microsoftonline.com/common/discovery/instance` | + +The query parameters are: +``` +GET https://{discovery_host}/common/discovery/instance + ?api-version=1.1 + &authorization_endpoint=https://{authority_host}/{tenant}/oauth2/v2.0/authorize +``` + +--- + +## 5. Provider Resolution Order + +### 5.1 Full Flow (GetMetadataEntryAsync — used during token acquisition) + +Providers are consulted in strict priority order. The first non-null result wins. + +``` +1. [If instance discovery is disabled (WithInstanceDiscovery(false))] + → Check region discovery provider (regions are not affected by the disable flag) + → If still null, return self-entry. STOP. + +2. Region discovery provider + +3. Network call (FetchNetworkMetadataOrFallback) + → On success: cache all entries by alias in the network cache + → On "invalid_instance" error: see §6 + → On any other error (404, 502, network failure, etc.): see §7 + +4. [If still null] Log warning, create self-entry, cache it in network cache +``` + +### 5.2 Cache-Preferring Flow (GetMetadataEntryTryAvoidNetworkAsync — used during token acquisition with cache check) + +This flow tries to avoid network calls when possible: + +``` +1. Region discovery provider + +2. [If instance discovery is disabled] → return self-entry + +3. Network cache (static, populated from prior network calls) + +4. Known metadata provider (hardcoded, but only if all cached environments are known) + +5. Full flow (§5.1) + +6. [If still null] Return self-entry +``` + +### 5.3 Offline Flow (GetMetadataEntryAvoidNetwork — used for GetAccounts/AcquireTokenSilent) + +No network calls are ever made: + +``` +1. [If instance discovery is enabled]: + a. Network cache + b. Known metadata provider (with null existing environments → always usable) + +2. [If still null] Return self-entry +``` + +--- + +## 6. Error Handling: `invalid_instance` + +When the discovery endpoint returns an `invalid_instance` error (the AAD server-side error `AADSTS50049`), this means the authority genuinely does not exist. + +| `ValidateAuthority` setting | Behavior | +|---|---| +| `true` (default) | **Throw** `MsalServiceException` with error code `invalid_instance`. Do NOT cache. | +| `false` | Return a self-entry. Continue without validation. | + +--- + +## 7. Error Handling: All Other Errors (404, 502, network failures, etc.) + +When instance discovery fails with any error other than `invalid_instance`: + +1. **Try the known metadata provider** for the authority host (with empty `existingEnvironmentsInCache` to force lookup). +2. If the known provider has no entry, **create a self-entry**. +3. **Cache the result** in the network cache so that subsequent calls do NOT retry the network call. +4. Return the entry. + +**Critical rule**: The fallback entry MUST be cached. Without caching, every token request retries the failing network call, causing performance degradation and unnecessary traffic. (See [issue #5804](https://github.com/AzureAD/microsoft-authentication-library-for-dotnet/issues/5804).) + +--- + +## 8. Caching Rules + +### 8.1 Network Cache + +- **Scope**: Static / process-wide (shared across all app instances in the same process). +- **Key**: Authority host string (case-sensitive). +- **Population**: + - On successful network response: cache each entry keyed by each of its aliases. + - On network failure (non-`invalid_instance`): cache the fallback entry keyed by the authority host. +- **Eviction**: None. Entries persist for the process lifetime. +- **Thread safety**: Must be thread-safe (concurrent dictionary or equivalent). + +### 8.2 Known Metadata Cache + +- **Scope**: Static / compiled into the library. +- **Immutable**: Never modified at runtime. +- **Guard**: Only usable when all environments in the token cache are themselves known environments. + +--- + +## 9. Interaction with Regions + +- Regional discovery (e.g., `centralus.login.microsoft.com`) runs independently of instance discovery. +- Even when instance discovery is disabled (`WithInstanceDiscovery(false)`), region discovery still runs. +- Regional metadata takes precedence when available (checked before the network call). + +--- + +## 10. Interaction with Authority Validation + +Authority validation is a separate step that runs **after** instance discovery: + +1. Instance discovery resolves metadata (preferred network, aliases). +2. If `ValidateAuthority` is true AND instance discovery is enabled: + - Validate the authority against the OIDC endpoint. + - Cache successful validations by environment. + +--- + +## 11. Validation Test Matrix + +The following test scenarios should be implemented in any MSAL that supports instance discovery. Tests use HTTP mocking (no real network calls). + +### T1: Known Cloud — Discovery Happens on Same Host + +**Setup**: Authority = `https://login.microsoftonline.com/tenant` (or any known cloud host). +**Expected**: +- Instance discovery GET request goes to `https://login.microsoftonline.com/common/discovery/instance`. +- Token request goes to `https://login.microsoftonline.com/tenant/oauth2/v2.0/token`. +- Token is acquired successfully. + +**Test hosts to cover**: `login.microsoftonline.com`, `login.microsoftonline.us`, `login.microsoftonline.de`, `login.partner.microsoftonline.cn`, `login.sovcloud-identity.fr`, `login.sovcloud-identity.de`, `login.sovcloud-identity.sg`. + +### T2: Instance Discovery Disabled — No Network Discovery Call + +**Setup**: Authority = any (known or unknown), `WithInstanceDiscovery(false)`. +**Expected**: +- No GET request to the discovery endpoint. +- Token request goes directly to the configured authority. +- Token is acquired successfully. + +### T3: Unknown Cloud — Discovery Falls Back to Default Trusted Host + +**Setup**: Authority = `https://unknown.host.example/tenant` (not in known list). +**Expected**: +- Instance discovery GET request goes to `https://login.microsoftonline.com/common/discovery/instance` (the default trusted host). +- If discovery succeeds: metadata is cached, token request uses the resolved preferred network. +- If discovery fails (e.g., 404): fallback entry is created and cached, token request goes to the original authority. + +### T4: Discovery Failure (404/502) — Fallback Is Cached, No Retry + +**Setup**: Authority = unknown host. Discovery endpoint returns 404 or 502. +**Expected**: +- First `AcquireTokenForClient`: discovery fails, fallback entry created and cached, token acquired from IdP. +- Second `AcquireTokenForClient` (same scope): token served from cache, no network calls. +- Third `AcquireTokenForClient` (different scope): token acquired from IdP, NO discovery call (fallback is cached). + +**HTTP mocks (in order)**: +1. GET discovery → 404 (or 502) +2. POST token → 200 (success) +3. POST token → 200 (success) — for the different-scope call + +If the SDK makes an unexpected discovery call, the mock framework should fail. + +### T5: Discovery Failure (Network Error / HttpRequestException) — Fallback, No Retry + +**Setup**: Authority = unknown host. Discovery endpoint throws a network-level exception. +**Expected**: Same as T4 — fallback is cached, subsequent calls don't retry. + +### T6: Discovery Failure with `invalid_instance` — Throw (ValidateAuthority=true) + +**Setup**: Authority = unknown host. Discovery endpoint returns `invalid_instance` error. `ValidateAuthority` = true (default). +**Expected**: +- `MsalServiceException` is thrown with error code `invalid_instance`. +- The known metadata provider is NOT consulted as a fallback. + +### T7: Discovery Failure with `invalid_instance` — Proceed (ValidateAuthority=false) + +**Setup**: Authority = unknown host. Discovery endpoint returns `invalid_instance`. `ValidateAuthority` = false. +**Expected**: +- No exception thrown. +- A self-entry is returned (authority used as-is). +- Token is acquired successfully. + +### T8: Known Metadata — Used When All Cache Environments Are Known + +**Setup**: Authority = `login.microsoftonline.com`. Token cache contains entries only for known environments. +**Expected**: +- Known metadata provider returns the hardcoded entry. +- No network discovery call. + +### T9: Known Metadata Bypassed — Unknown Environment in Cache + +**Setup**: Authority = `login.microsoftonline.com`. Token cache contains entries for both known and unknown environments. +**Expected**: +- Known metadata provider is bypassed (returns null). +- Network discovery call is made. + +### T10: B2C / ADFS — Instance Discovery Skipped + +**Setup**: Authority = B2C or ADFS authority. +**Expected**: +- No instance discovery call. +- Self-entry is returned. + +### T11: Airgapped Cloud with Regions — Discovery Disabled + +**Setup**: Authority = unknown host. `WithInstanceDiscovery(false)`. `WithAzureRegion("centralus")`. +**Expected**: +- No instance discovery call. +- Region discovery still runs. +- Token request goes to the regionalized endpoint. + +### T12: Airgapped Cloud with Regions — Discovery Enabled but Fails + +**Setup**: Authority = unknown host. `WithAzureRegion("centralus")`. Instance discovery call throws (e.g., `HttpRequestException`). +**Expected**: +- Instance discovery failure is swallowed. +- Token request still succeeds using the regionalized endpoint. +- No retry of instance discovery on subsequent calls. + +--- + +## 12. Implementation Checklist + +- [ ] Hardcode the known cloud metadata table (§3). +- [ ] Implement the three resolution flows (§5.1, §5.2, §5.3). +- [ ] Implement discovery endpoint host selection (§4). +- [ ] Handle `invalid_instance` separately from other errors (§6 vs §7). +- [ ] Cache fallback entries on non-`invalid_instance` failures (§7 — critical). +- [ ] Use a process-wide static cache for network results (§8.1). +- [ ] Guard known metadata usage by checking all cached environments are known (§8.2). +- [ ] Support `WithInstanceDiscovery(false)` to disable network discovery. +- [ ] Ensure region discovery is independent of instance discovery toggle (§9). +- [ ] Self-entry: `preferred_network = preferred_cache = aliases = [authority_host]` (§2). +- [ ] Implement all tests T1–T12 in §11. diff --git a/src/client/Microsoft.Identity.Client/Instance/Discovery/InstanceDiscoveryManager.cs b/src/client/Microsoft.Identity.Client/Instance/Discovery/InstanceDiscoveryManager.cs index 64ce65928e..e8f4d0079a 100644 --- a/src/client/Microsoft.Identity.Client/Instance/Discovery/InstanceDiscoveryManager.cs +++ b/src/client/Microsoft.Identity.Client/Instance/Discovery/InstanceDiscoveryManager.cs @@ -205,12 +205,16 @@ private async Task FetchNetworkMetadataOrFallbac catch (Exception e) { requestContext.Logger.Warning( - $"[Instance Discovery] Instance Discovery failed. MSAL will continue without network instance metadata. \n\r" + + $"[Instance Discovery] Instance Discovery failed. MSAL will continue without instance metadata. \n\r" + $" Exception: {e} "); - - return + + var fallbackEntry = _knownMetadataProvider.GetMetadata(authorityUri.Host, Enumerable.Empty(), requestContext.Logger) - ?? CreateEntryForSingleAuthority(authorityUri); + ?? CreateEntryForSingleAuthority(authorityUri); + + _networkCacheMetadataProvider.AddMetadata(authorityUri.Host, fallbackEntry); + + return fallbackEntry; } } diff --git a/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs b/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs index 1123837ec6..c7ebb45018 100644 --- a/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs +++ b/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs @@ -156,5 +156,61 @@ public async Task HttpErrorsInDiscoveryShouldBeIgnored_AirgappedCould_TestAsync( // Assert happens when httpManager disposes and checks for unconsumed handlers } } + + [DataTestMethod] + [DataRow(HttpStatusCode.NotFound, DisplayName = "404")] + [DataRow(HttpStatusCode.BadGateway, DisplayName = "502")] + [WorkItem(5804)] // https://github.com/AzureAD/microsoft-authentication-library-for-dotnet/issues/5804 + public async Task InstanceDiscoveryFailure_IsCached_NotRetriedOnSubsequentCalls_Async(HttpStatusCode errorStatusCode) + { + using (var httpManager = new MockHttpManager(disableInternalRetries: true)) + { + // Arrange - use an authority unknown to MSAL so instance discovery goes to the network + var app = ConfidentialClientApplicationBuilder.Create(TestConstants.ClientId) + .WithAuthority(TestConstants.AuthorityNotKnownTenanted) + .WithClientSecret(TestConstants.ClientSecret) + .WithHttpManager(httpManager) + .BuildConcrete(); + + // First call: instance discovery returns an HTTP error, then token endpoint succeeds + httpManager.AddMockHandler(new MockHttpMessageHandler() + { + ExpectedMethod = HttpMethod.Get, + ResponseMessage = new HttpResponseMessage(errorStatusCode) + { + Content = new StringContent("error") + } + }); + httpManager.AddMockHandlerSuccessfulClientCredentialTokenResponseMessage(); + + var result1 = await app.AcquireTokenForClient(TestConstants.s_scope.ToArray()) + .ExecuteAsync(CancellationToken.None) + .ConfigureAwait(false); + + Assert.AreEqual(TokenSource.IdentityProvider, result1.AuthenticationResultMetadata.TokenSource); + + // Second call with the same scope - should come from cache, no network calls at all. + // No mock handlers added - if any network call happens, the test will fail. + var result2 = await app.AcquireTokenForClient(TestConstants.s_scope.ToArray()) + .ExecuteAsync(CancellationToken.None) + .ConfigureAwait(false); + + Assert.AreEqual(TokenSource.Cache, result2.AuthenticationResultMetadata.TokenSource); + + // Third call with a different scope to force a new token request from the STS. + // Only mock the token endpoint - NO instance discovery mock! + // If instance discovery were retried, the test would fail because + // MockHttpManager would receive an unexpected HTTP call. + httpManager.AddMockHandlerSuccessfulClientCredentialTokenResponseMessage(); + + var result3 = await app.AcquireTokenForClient(TestConstants.s_scopeForAnotherResource.ToArray()) + .ExecuteAsync(CancellationToken.None) + .ConfigureAwait(false); + + Assert.AreEqual(TokenSource.IdentityProvider, result3.AuthenticationResultMetadata.TokenSource); + + // MockHttpManager.Dispose() asserts all handlers were consumed and no extra calls were made + } + } } } From 0ec1142da26cb193bca1c437267fd9f1aecd5fd4 Mon Sep 17 00:00:00 2001 From: Bogdan Gavril Date: Thu, 5 Mar 2026 14:45:52 +0000 Subject: [PATCH 2/9] Fix for #5809 --- .../Discovery/NetworkMetadataProvider.cs | 34 ++++++++++++--- .../Internal/RequestContext.cs | 4 +- .../PublicApiTests/InstanceDiscoveryTests.cs | 42 +++++++++++++++++++ 3 files changed, 73 insertions(+), 7 deletions(-) diff --git a/src/client/Microsoft.Identity.Client/Instance/Discovery/NetworkMetadataProvider.cs b/src/client/Microsoft.Identity.Client/Instance/Discovery/NetworkMetadataProvider.cs index c03134cc3f..d72883efb9 100644 --- a/src/client/Microsoft.Identity.Client/Instance/Discovery/NetworkMetadataProvider.cs +++ b/src/client/Microsoft.Identity.Client/Instance/Discovery/NetworkMetadataProvider.cs @@ -4,6 +4,7 @@ using System; using System.Linq; using System.Globalization; +using System.Threading; using System.Threading.Tasks; using Microsoft.Identity.Client.Http; using Microsoft.Identity.Client.OAuth2; @@ -21,15 +22,24 @@ internal class NetworkMetadataProvider : INetworkMetadataProvider private readonly IHttpManager _httpManager; private readonly INetworkCacheMetadataProvider _networkCacheMetadataProvider; private readonly Uri _userProvidedInstanceDiscoveryUri; // can be null + private readonly int _instanceDiscoveryTimeoutMs; + + /// + /// Default timeout for instance discovery network calls. + /// Prevents waiting for the full HttpClient timeout (100s) when the discovery endpoint is unreachable. + /// + internal const int DefaultInstanceDiscoveryTimeoutMs = 10_000; public NetworkMetadataProvider( IHttpManager httpManager, INetworkCacheMetadataProvider networkCacheMetadataProvider, - Uri userProvidedInstanceDiscoveryUri = null) + Uri userProvidedInstanceDiscoveryUri = null, + int instanceDiscoveryTimeoutMs = DefaultInstanceDiscoveryTimeoutMs) { _httpManager = httpManager ?? throw new ArgumentNullException(nameof(httpManager)); _networkCacheMetadataProvider = networkCacheMetadataProvider ?? throw new ArgumentNullException(nameof(networkCacheMetadataProvider)); _userProvidedInstanceDiscoveryUri = userProvidedInstanceDiscoveryUri; // can be null + _instanceDiscoveryTimeoutMs = instanceDiscoveryTimeoutMs; } public async Task GetMetadataAsync(Uri authority, RequestContext requestContext) @@ -83,11 +93,25 @@ private async Task SendInstanceDiscoveryRequestAsync( Uri instanceDiscoveryEndpoint = ComputeHttpEndpoint(authority, requestContext); - InstanceDiscoveryResponse discoveryResponse = await client - .DiscoverAadInstanceAsync(instanceDiscoveryEndpoint, requestContext) - .ConfigureAwait(false); + using var timeoutCts = new CancellationTokenSource(_instanceDiscoveryTimeoutMs); + using var linkedCts = CancellationTokenSource.CreateLinkedTokenSource( + requestContext.UserCancellationToken, timeoutCts.Token); - return discoveryResponse; + CancellationToken originalToken = requestContext.UserCancellationToken; + requestContext.UserCancellationToken = linkedCts.Token; + + try + { + InstanceDiscoveryResponse discoveryResponse = await client + .DiscoverAadInstanceAsync(instanceDiscoveryEndpoint, requestContext) + .ConfigureAwait(false); + + return discoveryResponse; + } + finally + { + requestContext.UserCancellationToken = originalToken; + } } private Uri ComputeHttpEndpoint(Uri authority, RequestContext requestContext) diff --git a/src/client/Microsoft.Identity.Client/Internal/RequestContext.cs b/src/client/Microsoft.Identity.Client/Internal/RequestContext.cs index b06cb2b568..0d43bee172 100644 --- a/src/client/Microsoft.Identity.Client/Internal/RequestContext.cs +++ b/src/client/Microsoft.Identity.Client/Internal/RequestContext.cs @@ -27,12 +27,12 @@ internal class RequestContext /// public ApiEvent ApiEvent { get; set; } - public CancellationToken UserCancellationToken { get; } + public CancellationToken UserCancellationToken { get; set; } public X509Certificate2 MtlsCertificate { get; } public bool IsAttestationRequested { get; set; } - + public bool IsMtlsRequested { get; set; } public RequestContext(IServiceBundle serviceBundle, Guid correlationId, X509Certificate2 mtlsCertificate, CancellationToken cancellationToken = default) diff --git a/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs b/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs index c7ebb45018..2f83816339 100644 --- a/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs +++ b/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs @@ -212,5 +212,47 @@ public async Task InstanceDiscoveryFailure_IsCached_NotRetriedOnSubsequentCalls_ // MockHttpManager.Dispose() asserts all handlers were consumed and no extra calls were made } } + + [TestMethod] + [WorkItem(5805)] // https://github.com/AzureAD/microsoft-authentication-library-for-dotnet/issues/5805 + public async Task InstanceDiscoveryTimeout_FallsBackAndCachesResult_Async() + { + using (var httpManager = new MockHttpManager(disableInternalRetries: true)) + { + // Arrange - use an authority unknown to MSAL so instance discovery goes to the network + var app = ConfidentialClientApplicationBuilder.Create(TestConstants.ClientId) + .WithAuthority(TestConstants.AuthorityNotKnownTenanted) + .WithClientSecret(TestConstants.ClientSecret) + .WithHttpManager(httpManager) + .BuildConcrete(); + + // First call: instance discovery times out (TaskCanceledException is what HttpClient + // throws on timeout), then token endpoint succeeds + httpManager.AddMockHandler(new MockHttpMessageHandler() + { + ExpectedMethod = HttpMethod.Get, + ExceptionToThrow = new TaskCanceledException("simulated timeout") + }); + httpManager.AddMockHandlerSuccessfulClientCredentialTokenResponseMessage(); + + var result1 = await app.AcquireTokenForClient(TestConstants.s_scope.ToArray()) + .ExecuteAsync(CancellationToken.None) + .ConfigureAwait(false); + + Assert.AreEqual(TokenSource.IdentityProvider, result1.AuthenticationResultMetadata.TokenSource); + + // Second call with a different scope to force a new token request from the STS. + // Only mock the token endpoint — NO instance discovery mock. + // If instance discovery were retried, the test would fail because + // MockHttpManager would receive an unexpected HTTP call. + httpManager.AddMockHandlerSuccessfulClientCredentialTokenResponseMessage(); + + var result2 = await app.AcquireTokenForClient(TestConstants.s_scopeForAnotherResource.ToArray()) + .ExecuteAsync(CancellationToken.None) + .ConfigureAwait(false); + + Assert.AreEqual(TokenSource.IdentityProvider, result2.AuthenticationResultMetadata.TokenSource); + } + } } } From 019825a43f01171c3715576cb2a410749718c05f Mon Sep 17 00:00:00 2001 From: Bogdan Gavril Date: Tue, 31 Mar 2026 15:04:27 +0100 Subject: [PATCH 3/9] fix --- .../PublicApiTests/InstanceDiscoveryTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs b/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs index 2f83816339..2ed6ed51c3 100644 --- a/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs +++ b/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs @@ -157,7 +157,7 @@ public async Task HttpErrorsInDiscoveryShouldBeIgnored_AirgappedCould_TestAsync( } } - [DataTestMethod] + [TestMethod] [DataRow(HttpStatusCode.NotFound, DisplayName = "404")] [DataRow(HttpStatusCode.BadGateway, DisplayName = "502")] [WorkItem(5804)] // https://github.com/AzureAD/microsoft-authentication-library-for-dotnet/issues/5804 From 1e17761620f8a13f8983195456266a74fb564123 Mon Sep 17 00:00:00 2001 From: Bogdan Gavril Date: Tue, 31 Mar 2026 17:04:10 +0100 Subject: [PATCH 4/9] Fix --- .../Instance/Discovery/InstanceDiscoveryManager.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/client/Microsoft.Identity.Client/Instance/Discovery/InstanceDiscoveryManager.cs b/src/client/Microsoft.Identity.Client/Instance/Discovery/InstanceDiscoveryManager.cs index e8f4d0079a..2503e0df89 100644 --- a/src/client/Microsoft.Identity.Client/Instance/Discovery/InstanceDiscoveryManager.cs +++ b/src/client/Microsoft.Identity.Client/Instance/Discovery/InstanceDiscoveryManager.cs @@ -202,7 +202,7 @@ private async Task FetchNetworkMetadataOrFallbac requestContext.Logger.Error($"[Instance Discovery] Instance discovery failed - invalid instance! "); throw; } - catch (Exception e) + catch (Exception e) when (!requestContext.UserCancellationToken.IsCancellationRequested) { requestContext.Logger.Warning( $"[Instance Discovery] Instance Discovery failed. MSAL will continue without instance metadata. \n\r" + From ef4d599569441bcc2ea835c3f37a4139cc70bf25 Mon Sep 17 00:00:00 2001 From: Bogdan Gavril Date: Wed, 4 Mar 2026 14:07:13 +0000 Subject: [PATCH 5/9] Fix for #2804 --- .../PublicApiTests/InstanceDiscoveryTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs b/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs index 2ed6ed51c3..2f83816339 100644 --- a/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs +++ b/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs @@ -157,7 +157,7 @@ public async Task HttpErrorsInDiscoveryShouldBeIgnored_AirgappedCould_TestAsync( } } - [TestMethod] + [DataTestMethod] [DataRow(HttpStatusCode.NotFound, DisplayName = "404")] [DataRow(HttpStatusCode.BadGateway, DisplayName = "502")] [WorkItem(5804)] // https://github.com/AzureAD/microsoft-authentication-library-for-dotnet/issues/5804 From 8ae55566cd715bf0988aded311c83b519a1d71d3 Mon Sep 17 00:00:00 2001 From: Bogdan Gavril Date: Tue, 31 Mar 2026 15:04:27 +0100 Subject: [PATCH 6/9] fix --- .../PublicApiTests/InstanceDiscoveryTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs b/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs index 2f83816339..2ed6ed51c3 100644 --- a/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs +++ b/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs @@ -157,7 +157,7 @@ public async Task HttpErrorsInDiscoveryShouldBeIgnored_AirgappedCould_TestAsync( } } - [DataTestMethod] + [TestMethod] [DataRow(HttpStatusCode.NotFound, DisplayName = "404")] [DataRow(HttpStatusCode.BadGateway, DisplayName = "502")] [WorkItem(5804)] // https://github.com/AzureAD/microsoft-authentication-library-for-dotnet/issues/5804 From 0a69d1d1f25778822ed5f64e5b8d5a72b2baf264 Mon Sep 17 00:00:00 2001 From: avdunn Date: Wed, 1 Apr 2026 10:01:05 -0700 Subject: [PATCH 7/9] Improve alias caching --- .../INetworkCacheMetadataProvider.cs | 7 +++ .../Discovery/InstanceDiscoveryManager.cs | 2 +- .../Discovery/NetworkCacheMetadataProvider.cs | 17 +++++++ .../Discovery/NetworkMetadataProvider.cs | 5 +- .../InstanceDiscoveryManagerTests.cs | 46 +++++++++++++++++++ 5 files changed, 72 insertions(+), 5 deletions(-) diff --git a/src/client/Microsoft.Identity.Client/Instance/Discovery/INetworkCacheMetadataProvider.cs b/src/client/Microsoft.Identity.Client/Instance/Discovery/INetworkCacheMetadataProvider.cs index f30c5af975..16534dd400 100644 --- a/src/client/Microsoft.Identity.Client/Instance/Discovery/INetworkCacheMetadataProvider.cs +++ b/src/client/Microsoft.Identity.Client/Instance/Discovery/INetworkCacheMetadataProvider.cs @@ -8,6 +8,13 @@ namespace Microsoft.Identity.Client.Instance.Discovery internal interface INetworkCacheMetadataProvider { void AddMetadata(string environment, InstanceDiscoveryMetadataEntry entry); + + /// + /// Caches under each host in .Aliases. + /// If the entry has no aliases, falls back to . + /// + void AddMetadataWithAliases(InstanceDiscoveryMetadataEntry entry, string fallbackEnvironment); + InstanceDiscoveryMetadataEntry GetMetadata(string environment, ILoggerAdapter logger); } } diff --git a/src/client/Microsoft.Identity.Client/Instance/Discovery/InstanceDiscoveryManager.cs b/src/client/Microsoft.Identity.Client/Instance/Discovery/InstanceDiscoveryManager.cs index 2503e0df89..513ae3211c 100644 --- a/src/client/Microsoft.Identity.Client/Instance/Discovery/InstanceDiscoveryManager.cs +++ b/src/client/Microsoft.Identity.Client/Instance/Discovery/InstanceDiscoveryManager.cs @@ -212,7 +212,7 @@ private async Task FetchNetworkMetadataOrFallbac _knownMetadataProvider.GetMetadata(authorityUri.Host, Enumerable.Empty(), requestContext.Logger) ?? CreateEntryForSingleAuthority(authorityUri); - _networkCacheMetadataProvider.AddMetadata(authorityUri.Host, fallbackEntry); + _networkCacheMetadataProvider.AddMetadataWithAliases(fallbackEntry, authorityUri.Host); return fallbackEntry; } diff --git a/src/client/Microsoft.Identity.Client/Instance/Discovery/NetworkCacheMetadataProvider.cs b/src/client/Microsoft.Identity.Client/Instance/Discovery/NetworkCacheMetadataProvider.cs index 0f82b13554..1916591d4d 100644 --- a/src/client/Microsoft.Identity.Client/Instance/Discovery/NetworkCacheMetadataProvider.cs +++ b/src/client/Microsoft.Identity.Client/Instance/Discovery/NetworkCacheMetadataProvider.cs @@ -26,6 +26,23 @@ public void AddMetadata(string environment, InstanceDiscoveryMetadataEntry entry s_cache.AddOrUpdate(environment, entry, (_, _) => entry); } + public void AddMetadataWithAliases(InstanceDiscoveryMetadataEntry entry, string fallbackEnvironment) + { + string[] aliases = entry?.Aliases; + if (aliases != null && aliases.Length > 0) + { + foreach (string alias in aliases) + { + if (!string.IsNullOrWhiteSpace(alias)) + s_cache.AddOrUpdate(alias, entry, (_, _) => entry); + } + } + else if (!string.IsNullOrWhiteSpace(fallbackEnvironment)) + { + s_cache.AddOrUpdate(fallbackEnvironment, entry, (_, _) => entry); + } + } + internal static void ResetStaticCacheForTest() { s_cache.Clear(); diff --git a/src/client/Microsoft.Identity.Client/Instance/Discovery/NetworkMetadataProvider.cs b/src/client/Microsoft.Identity.Client/Instance/Discovery/NetworkMetadataProvider.cs index d72883efb9..d3477d2c00 100644 --- a/src/client/Microsoft.Identity.Client/Instance/Discovery/NetworkMetadataProvider.cs +++ b/src/client/Microsoft.Identity.Client/Instance/Discovery/NetworkMetadataProvider.cs @@ -67,10 +67,7 @@ private void CacheInstanceDiscoveryMetadata(InstanceDiscoveryResponse instanceDi { foreach (InstanceDiscoveryMetadataEntry entry in instanceDiscoveryResponse.Metadata ?? Enumerable.Empty()) { - foreach (string aliasedEnvironment in entry.Aliases ?? Enumerable.Empty()) - { - _networkCacheMetadataProvider.AddMetadata(aliasedEnvironment, entry); - } + _networkCacheMetadataProvider.AddMetadataWithAliases(entry, fallbackEnvironment: null); } } diff --git a/tests/Microsoft.Identity.Test.Unit/CoreTests/InstanceTests/InstanceDiscoveryManagerTests.cs b/tests/Microsoft.Identity.Test.Unit/CoreTests/InstanceTests/InstanceDiscoveryManagerTests.cs index 68ff2e638f..73a03d4bec 100644 --- a/tests/Microsoft.Identity.Test.Unit/CoreTests/InstanceTests/InstanceDiscoveryManagerTests.cs +++ b/tests/Microsoft.Identity.Test.Unit/CoreTests/InstanceTests/InstanceDiscoveryManagerTests.cs @@ -251,6 +251,52 @@ public async Task NetworkProviderFailures_AreIgnored_Async() _knownMetadataProvider.Received(1).GetMetadata("some_env.com", Enumerable.Empty(), Arg.Any()); } + [TestMethod] + public async Task NetworkProviderFailures_FallbackEntry_AllAliasesCached_Async() + { + // Arrange — use a real cache so we can inspect what was written + var realNetworkCache = new NetworkCacheMetadataProvider(); + + var multiAliasEntry = new InstanceDiscoveryMetadataEntry() + { + Aliases = new[] { "some_env.com", "alias1.some_env.com", "alias2.some_env.com" }, + PreferredCache = "some_env.com", + PreferredNetwork = "some_env.com" + }; + + _knownMetadataProvider + .GetMetadata("some_env.com", Enumerable.Empty(), Arg.Any()) + .Returns(multiAliasEntry); + + _networkMetadataProvider + .When(x => x.GetMetadataAsync(Arg.Any(), _testRequestContext)) + .Do(_ => throw new MsalServiceException("endpoint_busy", "simulated failure")); + + _discoveryManager = new InstanceDiscoveryManager( + _harness.HttpManager, + null, + null, + _knownMetadataProvider, + realNetworkCache, + _networkMetadataProvider); + + // Act + var result = await _discoveryManager.GetMetadataEntryAsync( + AuthorityInfo.FromAuthorityUri("https://some_env.com/tid", true), + _testRequestContext) + .ConfigureAwait(false); + + Assert.AreSame(multiAliasEntry, result); + + // Assert — fallback entry must be cached under every alias so subsequent requests + // using any alias won't retry the failing network call + ILoggerAdapter logger = _testRequestContext.Logger; + Assert.IsNotNull(realNetworkCache.GetMetadata("some_env.com", logger), "Primary host should be cached"); + Assert.IsNotNull(realNetworkCache.GetMetadata("alias1.some_env.com", logger), "First alias should be cached"); + Assert.IsNotNull(realNetworkCache.GetMetadata("alias2.some_env.com", logger), "Second alias should be cached"); + Assert.IsNull(realNetworkCache.GetMetadata("unrelated_env.com", logger), "Unrelated host should not be in cache"); + } + [TestMethod] public async Task NetworkProviderFailures_WithNoKnownMetadata_ContinuesWithAuthority_Async() { From 31935643bdc76bde668e8889a6a469918b22706e Mon Sep 17 00:00:00 2001 From: avdunn Date: Mon, 13 Apr 2026 15:29:51 -0700 Subject: [PATCH 8/9] Add test for cancellation behavior --- .../PublicApiTests/InstanceDiscoveryTests.cs | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs b/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs index 2ed6ed51c3..adfb536f00 100644 --- a/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs +++ b/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs @@ -254,5 +254,45 @@ public async Task InstanceDiscoveryTimeout_FallsBackAndCachesResult_Async() Assert.AreEqual(TokenSource.IdentityProvider, result2.AuthenticationResultMetadata.TokenSource); } } + + [TestMethod] + [WorkItem(5805)] // https://github.com/AzureAD/microsoft-authentication-library-for-dotnet/issues/5805 + public async Task InstanceDiscoveryCallerCancellation_BubblesUp_DoesNotFallBack_Async() + { + using (var httpManager = new MockHttpManager(disableInternalRetries: true)) + { + // Arrange - use an authority unknown to MSAL so instance discovery goes to the network + var app = ConfidentialClientApplicationBuilder.Create(TestConstants.ClientId) + .WithAuthority(TestConstants.AuthorityNotKnownTenanted) + .WithClientSecret(TestConstants.ClientSecret) + .WithHttpManager(httpManager) + .BuildConcrete(); + + // Caller-controlled cancellation token + using var callerCts = new CancellationTokenSource(); + + // The mock GET handler for instance discovery: cancel the caller's token + // during the request so that the linked CTS fires and the cancellation + // propagates through the real linked-token path (not via ExceptionToThrow). + httpManager.AddMockHandler(new MockHttpMessageHandler() + { + ExpectedMethod = HttpMethod.Get, + ResponseMessage = new HttpResponseMessage(HttpStatusCode.OK) + { + Content = new StringContent("{}") + }, + AdditionalRequestValidation = _ => callerCts.Cancel() + }); + + // No token endpoint mock — the request should never get that far. + // If it does, MockHttpManager will fail because the queue is empty. + + // Act & Assert — caller cancellation must bubble up, not be swallowed + // by the fallback catch in FetchNetworkMetadataOrFallbackAsync. + await AssertException.TaskThrowsAsync( + () => app.AcquireTokenForClient(TestConstants.s_scope.ToArray()) + .ExecuteAsync(callerCts.Token)).ConfigureAwait(false); + } + } } } From 9ff75f17eab5cda140639c8ff270f33c388196b3 Mon Sep 17 00:00:00 2001 From: Bogdan Gavril Date: Fri, 22 May 2026 01:07:07 +0100 Subject: [PATCH 9/9] Address PR feedback: add ExpectedUrl to instance discovery test mocks Set ExpectedUrl on all instance discovery mock handlers to ensure the tests validate that the correct endpoint is called before verifying fallback/caching behavior. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../PublicApiTests/InstanceDiscoveryTests.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs b/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs index adfb536f00..d62404ebb9 100644 --- a/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs +++ b/tests/Microsoft.Identity.Test.Unit/PublicApiTests/InstanceDiscoveryTests.cs @@ -176,6 +176,7 @@ public async Task InstanceDiscoveryFailure_IsCached_NotRetriedOnSubsequentCalls_ httpManager.AddMockHandler(new MockHttpMessageHandler() { ExpectedMethod = HttpMethod.Get, + ExpectedUrl = "https://login.microsoftonline.com/common/discovery/instance", ResponseMessage = new HttpResponseMessage(errorStatusCode) { Content = new StringContent("error") @@ -231,6 +232,7 @@ public async Task InstanceDiscoveryTimeout_FallsBackAndCachesResult_Async() httpManager.AddMockHandler(new MockHttpMessageHandler() { ExpectedMethod = HttpMethod.Get, + ExpectedUrl = "https://login.microsoftonline.com/common/discovery/instance", ExceptionToThrow = new TaskCanceledException("simulated timeout") }); httpManager.AddMockHandlerSuccessfulClientCredentialTokenResponseMessage(); @@ -277,6 +279,7 @@ public async Task InstanceDiscoveryCallerCancellation_BubblesUp_DoesNotFallBack_ httpManager.AddMockHandler(new MockHttpMessageHandler() { ExpectedMethod = HttpMethod.Get, + ExpectedUrl = "https://login.microsoftonline.com/common/discovery/instance", ResponseMessage = new HttpResponseMessage(HttpStatusCode.OK) { Content = new StringContent("{}")