Added Cache support for fmi keys

4gust · 4gust · commit 791161dd2148 · 2026-02-23T10:58:27.000Z
diff --git a/msal/application.py b/msal/application.py
@@ -15,7 +15,7 @@
 from .mex import send_request as mex_send_request
 from .wstrust_request import send_request as wst_send_request
 from .wstrust_response import *
-from .token_cache import TokenCache, _get_username, _GRANT_TYPE_BROKER
+from .token_cache import TokenCache, _get_username, _GRANT_TYPE_BROKER, _compute_ext_cache_key
 import msal.telemetry
 from .region import _detect_region
 from .throttled_http_client import ThrottledHttpClient
@@ -1571,6 +1571,9 @@ def _acquire_token_silent_from_cache_and_possibly_refresh_it(
             key_id = kwargs.get("data", {}).get("key_id")
             if key_id:  # Some token types (SSH-certs, POP) are bound to a key
                 query["key_id"] = key_id
+            ext_cache_key = _compute_ext_cache_key(kwargs.get("data", {}))
+            if ext_cache_key:  # FMI tokens need cache isolation by path
+                query["ext_cache_key"] = ext_cache_key
             now = time.time()
             refresh_reason = msal.telemetry.AT_ABSENT
             for entry in self.token_cache.search(  # A generator allows us to
diff --git a/msal/authority.py b/msal/authority.py
@@ -92,11 +92,9 @@ def __init__(
         self._http_client = http_client
         self._oidc_authority_url = oidc_authority_url
         if oidc_authority_url:
-            logger.debug("Initializing with OIDC authority: %s", oidc_authority_url)
             tenant_discovery_endpoint = self._initialize_oidc_authority(
                 oidc_authority_url)
         else:
-            logger.debug("Initializing with Entra authority: %s", authority_url)
             tenant_discovery_endpoint = self._initialize_entra_authority(
                 authority_url, validate_authority, instance_discovery)
         try:
@@ -117,8 +115,6 @@ def __init__(
                 .format(authority_url)
                 ) + " Also please double check your tenant name or GUID is correct."
             raise ValueError(error_message)
-        logger.debug(
-            'openid_config("%s") = %s', tenant_discovery_endpoint, openid_config)
         self._issuer = openid_config.get('issuer')
         self.authorization_endpoint = openid_config['authorization_endpoint']
         self.token_endpoint = openid_config['token_endpoint']
diff --git a/msal/token_cache.py b/msal/token_cache.py
@@ -1,4 +1,6 @@
-﻿import json
+﻿import base64
+import hashlib
+import json
 import threading
 import time
 import logging
@@ -12,6 +14,63 @@
 logger = logging.getLogger(__name__)
 _GRANT_TYPE_BROKER = "broker"
 
+# Fields in the request data dict that should NOT be included in the extended
+# cache key hash. Everything else in data IS included, because those are extra
+# body parameters going on the wire and must differentiate cached tokens.
+#
+# Excluded fields and reasons:
+#   - "key_id"      : Already handled as a separate cache lookup field
+#   - "token_type"  : Used for SSH-cert/POP detection; AT entry stores it separately
+#   - "req_cnf"     : Ephemeral proof-of-possession nonce, changes per request
+#   - "claims"      : Handled separately; its presence forces a token refresh
+#   - "scope"       : Already represented as "target" in the AT cache key;
+#                     also added to data only at wire-time, not at cache-lookup time
+#   - "username"    : Standard ROPC grant parameter, not an extra body parameter
+#   - "password"    : Standard ROPC grant parameter, not an extra body parameter
+#
+# Included fields (examples — anything NOT in this set is included):
+#   - "fmi_path"    : Federated Managed Identity credential path
+#   - any future extra body parameter that should isolate cache entries
+_EXT_CACHE_KEY_EXCLUDED_FIELDS = frozenset({
+    "key_id",
+    "token_type",
+    "req_cnf",
+    "claims",
+    "scope",
+    "username",
+    "password",
+})
+
+
+def _compute_ext_cache_key(data):
+    """Compute an extended cache key hash from extra body parameters in *data*.
+
+    All fields in *data* that go on the wire are included in the hash,
+    EXCEPT those listed in ``_EXT_CACHE_KEY_EXCLUDED_FIELDS``.
+    This ensures tokens acquired with different parameter values
+    (e.g., different FMI paths) are cached separately.
+
+    Returns an empty string when *data* has no hashable fields.
+
+    The algorithm matches the Go MSAL implementation (CacheExtKeyGenerator):
+    sorted key+value pairs are concatenated and SHA256 hashed, then base64url encoded.
+    """
+    if not data:
+        return ""
+    cache_components = {
+        k: str(v) for k, v in data.items()
+        if k not in _EXT_CACHE_KEY_EXCLUDED_FIELDS and v
+    }
+    if not cache_components:
+        return ""
+    # Sort keys for consistent hashing (matches Go implementation)
+    key_str = "".join(
+        k + cache_components[k] for k in sorted(cache_components.keys())
+    )
+    hash_bytes = hashlib.sha256(key_str.encode("utf-8")).digest()
+    return base64.urlsafe_b64encode(hash_bytes).rstrip(b"=").decode("ascii").lower()
+
+
 def is_subdict_of(small, big):
     return dict(big, **small) == big
 
@@ -59,6 +118,7 @@ def __init__(self):
             self.CredentialType.ACCESS_TOKEN:
                 lambda home_account_id=None, environment=None, client_id=None,
                         realm=None, target=None,
+                        ext_cache_key=None,
                         # Note: New field(s) can be added here
                         #key_id=None,
                         **ignored_payload_from_a_real_token:
@@ -70,7 +130,8 @@ def __init__(self):
                         realm or "",
                         target or "",
                         #key_id or "",  # So ATs of different key_id can coexist
-                        ]).lower(),
+                        ] + ([ext_cache_key] if ext_cache_key else [])
+                        ).lower(),
             self.CredentialType.ID_TOKEN:
                 lambda home_account_id=None, environment=None, client_id=None,
                         realm=None, **ignored_payload_from_a_real_token:
@@ -98,6 +159,7 @@ def __init__(self):
     def _get_access_token(
         self,
         home_account_id, environment, client_id, realm, target,  # Together they form a compound key
+        ext_cache_key=None,
         default=None,
     ):  # O(1)
         return self._get(
@@ -108,6 +170,7 @@ def _get_access_token(
                 client_id=client_id,
                 realm=realm,
                 target=" ".join(target),
+                ext_cache_key=ext_cache_key,
                 ),
             default=default)
 
@@ -153,7 +216,8 @@ def search(self, credential_type, target=None, query=None, *, now=None):  # O(n)
         ):  # Special case for O(1) AT lookup
             preferred_result = self._get_access_token(
                 query["home_account_id"], query["environment"],
-                query["client_id"], query["realm"], target)
+                query["client_id"], query["realm"], target,
+                ext_cache_key=query.get("ext_cache_key"))
             if preferred_result and self._is_matching(
                 preferred_result, query,
                 # Needs no target_set here because it is satisfied by dict key
@@ -179,6 +243,13 @@ def search(self, credential_type, target=None, query=None, *, now=None):  # O(n)
                 if (entry != preferred_result  # Avoid yielding the same entry twice
                     and self._is_matching(entry, query, target_set=target_set)
                 ):
+                    # Cache isolation for extended cache keys (e.g., FMI path).
+                    # Entries with ext_cache_key must not match queries without one.
+                    if (credential_type == self.CredentialType.ACCESS_TOKEN
+                        and "ext_cache_key" in entry
+                        and "ext_cache_key" not in (query or {})
+                    ):
+                        continue
                     yield entry
             for at in expired_access_tokens:
                 self.remove_at(at)
@@ -278,6 +349,12 @@ def __add(self, event, now=None):
                     # So that we won't accidentally store a user's password etc.
                     "key_id",  # It happens in SSH-cert or POP scenario
                 }})
+                # Compute and store extended cache key for cache isolation
+                # (e.g., different FMI paths should have separate cache entries)
+                ext_cache_key = _compute_ext_cache_key(data)
+                
+                if ext_cache_key:
+                    at["ext_cache_key"] = ext_cache_key
                 if "refresh_in" in response:
                     refresh_in = response["refresh_in"]  # It is an integer
                     at["refresh_on"] = str(now + refresh_in)  # Schema wants a string
diff --git a/tests/test_application.py b/tests/test_application.py
@@ -805,6 +805,63 @@ def mock_post(url, headers=None, data=None, *args, **kwargs):
         self.assertEqual(result2[app._TOKEN_SOURCE], app._TOKEN_SOURCE_CACHE,
             "Second call should return token from cache")
 
+    def test_different_fmi_paths_are_cached_separately(self):
+        """Tokens acquired with different fmi_path values must NOT share cache entries."""
+        app = ConfidentialClientApplication(
+            "client_id", client_credential="secret",
+            authority="https://login.microsoftonline.com/my_tenant")
+
+        def mock_post_factory(token_value):
+            def mock_post(url, headers=None, data=None, *args, **kwargs):
+                return MinimalResponse(
+                    status_code=200, text=json.dumps({
+                        "access_token": token_value,
+                        "expires_in": 3600,
+                    }))
+            return mock_post
+
+        # Acquire token with path A
+        result_a = app.acquire_token_for_client_with_fmi_path(
+            ["scope"], "PathA/credential", post=mock_post_factory("AT_for_path_A"))
+        self.assertEqual("AT_for_path_A", result_a["access_token"])
+
+        # Acquire token with path B (should NOT get path A's cached token)
+        result_b = app.acquire_token_for_client_with_fmi_path(
+            ["scope"], "PathB/credential", post=mock_post_factory("AT_for_path_B"))
+        self.assertEqual("AT_for_path_B", result_b["access_token"])
+        self.assertEqual(result_b[app._TOKEN_SOURCE], app._TOKEN_SOURCE_IDP,
+            "Different FMI path should NOT return a cached token from another path")
+
+        # Verify path A still returns its own cached token
+        result_a2 = app.acquire_token_for_client_with_fmi_path(
+            ["scope"], "PathA/credential", post=mock_post_factory("should_not_be_used"))
+        self.assertEqual("AT_for_path_A", result_a2["access_token"])
+        self.assertEqual(result_a2[app._TOKEN_SOURCE], app._TOKEN_SOURCE_CACHE,
+            "Same FMI path should return cached token")
+
+    def test_fmi_token_does_not_interfere_with_non_fmi_token(self):
+        """FMI-cached tokens must not be returned for non-FMI acquire_token_for_client."""
+        app = ConfidentialClientApplication(
+            "client_id", client_credential="secret",
+            authority="https://login.microsoftonline.com/my_tenant")
+
+        # First, cache a token via FMI path
+        app.acquire_token_for_client_with_fmi_path(
+            ["scope"], "some/fmi/path",
+            post=lambda url, **kwargs: MinimalResponse(
+                status_code=200, text=json.dumps({
+                    "access_token": "FMI_AT", "expires_in": 3600})))
+
+        # Now call regular acquire_token_for_client — should NOT get FMI token
+        result = app.acquire_token_for_client(
+            ["scope"],
+            post=lambda url, **kwargs: MinimalResponse(
+                status_code=200, text=json.dumps({
+                    "access_token": "regular_AT", "expires_in": 3600})))
+        self.assertEqual("regular_AT", result["access_token"])
+        self.assertEqual(result[app._TOKEN_SOURCE], app._TOKEN_SOURCE_IDP,
+            "Non-FMI call should not return FMI-cached token")
+
 
 @patch(_OIDC_DISCOVERY, new=_OIDC_DISCOVERY_MOCK)
 class TestRemoveTokensForClient(unittest.TestCase):
diff --git a/tests/test_ccs.py b/tests/test_ccs.py
@@ -61,11 +61,14 @@ def test_acquire_token_silent(self):
                 "CSS routing info should be derived from home_account_id")
 
     def test_acquire_token_by_username_password(self):
+        import warnings
         app = msal.ClientApplication("client_id")
         username = "johndoe@contoso.com"
         with patch.object(app.http_client, "post", return_value=MinimalResponse(
                 status_code=400, text='{"error": "mock"}')) as mocked_method:
-            app.acquire_token_by_username_password(username, "password", ["scope"])
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", DeprecationWarning)
+                app.acquire_token_by_username_password(username, "password", ["scope"])
             self.assertEqual(
                 "upn:" + username,
                 mocked_method.call_args[1].get("headers", {}).get('X-AnchorMailbox'),
diff --git a/tests/test_fmi_e2e.py b/tests/test_fmi_e2e.py
@@ -149,5 +149,135 @@ def test_acquire_with_assertion_callback_and_fmi_path(self):
             "cache might not be working correctly")
 
 
+class TestFMICacheIsolation(LabBasedTestCase):
+    """Test that tokens acquired with different FMI paths are cached separately.
+
+    This verifies the cache key extensibility: two calls with different fmi_path
+    values should NOT return each other's cached tokens.
+    """
+
+    def test_different_fmi_paths_are_cached_separately(self):
+        app = msal.ConfidentialClientApplication(
+            _FMI_CLIENT_ID,
+            client_credential=get_client_certificate(),
+            authority=_AUTHORITY_URL,
+            http_client=MinimalHttpClient(),
+        )
+        scopes = [_FMI_SCOPE]
+
+        # Acquire token with path A
+        result_a = app.acquire_token_for_client_with_fmi_path(
+            scopes, "PathA/credential")
+        self.assertIn("access_token", result_a,
+            "Path A acquisition failed: {}: {}".format(
+                result_a.get("error"), result_a.get("error_description")))
+
+        # Acquire token with path B — should NOT get path A's cached token
+        result_b = app.acquire_token_for_client_with_fmi_path(
+            scopes, "PathB/credential")
+        self.assertIn("access_token", result_b,
+            "Path B acquisition failed: {}: {}".format(
+                result_b.get("error"), result_b.get("error_description")))
+        self.assertNotEqual(
+            result_b.get("token_source"), "cache",
+            "Different FMI path should NOT return cached token from another path")
+
+        # Verify path A still returns its own cached token
+        result_a2 = app.acquire_token_for_client_with_fmi_path(
+            scopes, "PathA/credential")
+        self.assertIn("access_token", result_a2)
+        self.assertEqual(
+            result_a2.get("token_source"), "cache",
+            "Same FMI path should return cached token")
+        self.assertEqual(result_a["access_token"], result_a2["access_token"])
+
+    def test_fmi_token_does_not_interfere_with_non_fmi_token(self):
+        app = msal.ConfidentialClientApplication(
+            _FMI_CLIENT_ID,
+            client_credential=get_client_certificate(),
+            authority=_AUTHORITY_URL,
+            http_client=MinimalHttpClient(),
+        )
+        scopes = [_FMI_SCOPE]
+
+        # Cache a token via FMI path
+        fmi_result = app.acquire_token_for_client_with_fmi_path(scopes, _FMI_PATH)
+        self.assertIn("access_token", fmi_result)
+
+        # Regular acquire_token_for_client should NOT get the FMI token
+        regular_result = app.acquire_token_for_client(scopes)
+        self.assertIn("access_token", regular_result,
+            "Regular call failed: {}: {}".format(
+                regular_result.get("error"), regular_result.get("error_description")))
+        self.assertNotEqual(
+            regular_result.get("token_source"), "cache",
+            "Non-FMI call should not return FMI-cached token")
+
+
+class TestFMICacheInspection(LabBasedTestCase):
+    """Acquire tokens with two different FMI paths and inspect the underlying
+    cache to verify the entries are correctly isolated."""
+
+    def test_two_fmi_paths_produce_separate_cache_entries(self):
+        app = msal.ConfidentialClientApplication(
+            _FMI_CLIENT_ID,
+            client_credential=get_client_certificate(),
+            authority=_AUTHORITY_URL,
+            http_client=MinimalHttpClient(),
+        )
+        scopes = [_FMI_SCOPE]
+        path_a = "PathAlpha/Credential"
+        path_b = "PathBeta/Credential"
+
+        # 1. Acquire token with path A
+        result_a = app.acquire_token_for_client_with_fmi_path(scopes, path_a)
+        self.assertIn("access_token", result_a,
+            "Path A acquisition failed: {}: {}".format(
+                result_a.get("error"), result_a.get("error_description")))
+        token_a = result_a["access_token"]
+
+        # 2. Acquire token with path B
+        result_b = app.acquire_token_for_client_with_fmi_path(scopes, path_b)
+        self.assertIn("access_token", result_b,
+            "Path B acquisition failed: {}: {}".format(
+                result_b.get("error"), result_b.get("error_description")))
+        token_b = result_b["access_token"]
+
+        # Tokens should be different (different paths go to different resources)
+        self.assertNotEqual(token_a, token_b,
+            "Tokens for different FMI paths should differ")
+
+        # 3. Inspect cache: there should be exactly 2 AccessToken entries
+        cache = app.token_cache._cache
+        at_entries = cache.get("AccessToken", {})
+        # Filter to our client_id + scope to avoid noise
+        our_entries = {
+            k: v for k, v in at_entries.items()
+            if v.get("client_id") == _FMI_CLIENT_ID
+            and _FMI_SCOPE.split("/")[0] in v.get("target", "")
+        }
+        self.assertEqual(2, len(our_entries),
+            "Cache should contain exactly 2 AT entries for our client, "
+            "got {}: {}".format(len(our_entries), list(our_entries.keys())))
+
+        # 4. Each entry must have a non-empty ext_cache_key, and they must differ
+        ext_keys = [v.get("ext_cache_key") for v in our_entries.values()]
+        for ek in ext_keys:
+            self.assertTrue(ek, "Each FMI cache entry must have a non-empty ext_cache_key")
+        self.assertNotEqual(ext_keys[0], ext_keys[1],
+            "ext_cache_key values for different FMI paths must differ")
+
+        # 5. Verify each path still returns its own cached token
+        cached_a = app.acquire_token_for_client_with_fmi_path(scopes, path_a)
+        self.assertEqual("cache", cached_a.get("token_source"))
+        self.assertEqual(token_a, cached_a["access_token"],
+            "Path A should return its own cached token")
+
+        cached_b = app.acquire_token_for_client_with_fmi_path(scopes, path_b)
+        self.assertEqual("cache", cached_b.get("token_source"))
+        self.assertEqual(token_b, cached_b["access_token"],
+            "Path B should return its own cached token")
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_token_cache.py b/tests/test_token_cache.py