Skip to content

Commit 1f13907

Browse files
author
Kevin Westphal
committed
feat: cache auth check api calls
`check_client_api_key` hits `/v1/metadata` and `/v1/api_tokens/self` on every invocation. When a caller runs bursty scans — notably the VSCode extension, which rescans on every save — this is two API round-trips per scan even though the key state rarely changes between them. This commit introduces a cache for auth calls.
1 parent c9b52ca commit 1f13907

6 files changed

Lines changed: 365 additions & 25 deletions

File tree

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
### Changed
2+
3+
- Successful API key checks are now cached on disk for 5 minutes.

ggshield/core/auth_check_cache.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import hashlib
2+
import logging
3+
import time
4+
from dataclasses import dataclass
5+
from pathlib import Path
6+
from typing import Optional
7+
8+
import yaml
9+
from pygitguardian.models import TokenScope
10+
11+
from ggshield.core.config.utils import load_yaml_dict, save_yaml_dict
12+
from ggshield.core.dirs import get_cache_dir
13+
from ggshield.core.errors import UnexpectedError
14+
15+
16+
logger = logging.getLogger(__name__)
17+
18+
# How long a successful auth check (metadata + token scopes) stays valid.
19+
# Short enough that revoked tokens and scope changes propagate quickly;
20+
# long enough that a burst of scans (e.g. IDE on-save) shares one check.
21+
TTL_SECONDS = 300
22+
23+
24+
def _cache_file() -> Path:
25+
# Resolved lazily so GG_CACHE_DIR overrides (tests, sandboxed envs) are honored.
26+
return get_cache_dir() / "auth_check.yaml"
27+
28+
29+
@dataclass
30+
class CachedAuthCheck:
31+
# The API key has been verified against /v1/metadata and is usable.
32+
metadata_verified: bool
33+
# If not None, these are the scopes fetched from /v1/api_tokens/self.
34+
# None means we haven't fetched scopes yet (e.g. from an auth-login flow
35+
# where no specific scopes were required).
36+
scopes: Optional[set[TokenScope]]
37+
38+
39+
def _key_hash(instance_url: str, api_key: str) -> str:
40+
return hashlib.sha256(f"{instance_url}\0{api_key}".encode("utf-8")).hexdigest()[:16]
41+
42+
43+
def load(instance_url: str, api_key: str) -> Optional[CachedAuthCheck]:
44+
"""Return the cached auth check for this (instance, key) pair, or None on miss."""
45+
try:
46+
data = load_yaml_dict(_cache_file())
47+
except (OSError, ValueError, yaml.YAMLError) as e:
48+
logger.warning("Could not load auth check cache: %s", repr(e))
49+
return None
50+
51+
if not data:
52+
return None
53+
if data.get("key_hash") != _key_hash(instance_url, api_key):
54+
return None
55+
if data.get("expires_at", 0) < time.time():
56+
return None
57+
58+
raw_scopes = data.get("scopes")
59+
scopes: Optional[set[TokenScope]]
60+
if raw_scopes is None:
61+
scopes = None
62+
else:
63+
scopes = set()
64+
for scope_str in raw_scopes:
65+
try:
66+
scopes.add(TokenScope(scope_str))
67+
except ValueError:
68+
logger.debug("Ignoring unknown cached scope: '%s'", scope_str)
69+
70+
return CachedAuthCheck(metadata_verified=True, scopes=scopes)
71+
72+
73+
def store(instance_url: str, api_key: str, scopes: Optional[set[TokenScope]]) -> None:
74+
"""Record a successful auth check.
75+
76+
Pass scopes=None if token scopes were not fetched (only metadata was checked).
77+
"""
78+
try:
79+
save_yaml_dict(
80+
{
81+
"key_hash": _key_hash(instance_url, api_key),
82+
"scopes": (None if scopes is None else sorted(s.value for s in scopes)),
83+
"expires_at": int(time.time()) + TTL_SECONDS,
84+
},
85+
_cache_file(),
86+
restricted=True,
87+
)
88+
except (OSError, UnexpectedError) as e:
89+
logger.warning("Could not save auth check cache: %s", repr(e))
90+
91+
92+
def invalidate() -> None:
93+
"""Drop the cached auth check, typically after a 401 from any API call."""
94+
try:
95+
_cache_file().unlink(missing_ok=True)
96+
except OSError as e:
97+
logger.warning("Could not invalidate auth check cache: %s", repr(e))

ggshield/core/client.py

Lines changed: 45 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from requests import Session
1010
from requests.adapters import HTTPAdapter
1111

12-
from . import ui
12+
from . import auth_check_cache, ui
1313
from .config import Config
1414
from .constants import DEFAULT_INSTANCE_URL
1515
from .errors import (
@@ -117,32 +117,50 @@ def check_client_api_key(client: GGClient, required_scopes: set[TokenScope]) ->
117117
(either it is invalid or unset). Raises UnexpectedError if the API is down.
118118
119119
If required_scopes is not empty, also checks that the API key has the required scopes.
120+
121+
Successful checks are cached on disk for a short TTL so bursty callers (e.g. the
122+
GitGuardian VSCode extension) do not re-hit /v1/metadata and
123+
/v1/api_tokens/self on every invocation.
120124
"""
121-
try:
122-
response = client.read_metadata()
123-
except requests.exceptions.ConnectionError as e:
124-
raise UnexpectedError(
125-
"Failed to connect to GitGuardian server. Check your"
126-
f" instance URL settings.\nDetails: {e}."
127-
)
125+
cached = auth_check_cache.load(client.base_uri, client.api_key)
126+
if cached is not None and (
127+
not required_scopes
128+
or (cached.scopes is not None and required_scopes <= cached.scopes)
129+
):
130+
return
131+
132+
# The cache either missed or does not prove the scopes we need. We still get to
133+
# skip /v1/metadata when the cache told us the key was recently verified.
134+
if cached is None or not cached.metadata_verified:
135+
try:
136+
response = client.read_metadata()
137+
except requests.exceptions.ConnectionError as e:
138+
raise UnexpectedError(
139+
"Failed to connect to GitGuardian server. Check your"
140+
f" instance URL settings.\nDetails: {e}."
141+
)
128142

129-
if response is None:
130-
# None means success
131-
pass
132-
elif response.status_code == 401:
133-
raise APIKeyCheckError(client.base_uri, "Invalid GitGuardian API key.")
134-
elif response.status_code == 404:
135-
raise UnexpectedError(
136-
"The server returned a 404 error. Check your instance URL settings.",
137-
)
138-
elif response.status_code is not None and 500 <= response.status_code < 600:
139-
raise ServiceUnavailableError(
140-
message=f"GitGuardian server is not responding.\nDetails: {response.detail}",
141-
)
142-
else:
143-
raise UnexpectedError(
144-
f"GitGuardian server is not responding as expected.\nDetails: {response.detail}"
145-
)
143+
if response is None:
144+
# None means success
145+
pass
146+
elif response.status_code == 401:
147+
raise APIKeyCheckError(client.base_uri, "Invalid GitGuardian API key.")
148+
elif response.status_code == 404:
149+
raise UnexpectedError(
150+
"The server returned a 404 error. Check your instance URL settings.",
151+
)
152+
elif response.status_code is not None and 500 <= response.status_code < 600:
153+
raise ServiceUnavailableError(
154+
message=f"GitGuardian server is not responding.\nDetails: {response.detail}",
155+
)
156+
else:
157+
raise UnexpectedError(
158+
f"GitGuardian server is not responding as expected.\nDetails: {response.detail}"
159+
)
160+
161+
api_scopes: Optional[set[TokenScope]] = (
162+
cached.scopes if cached is not None else None
163+
)
146164

147165
# Check token scopes if required_scopes is not empty
148166
if required_scopes:
@@ -164,3 +182,5 @@ def check_client_api_key(client: GGClient, required_scopes: set[TokenScope]) ->
164182
missing_scopes = required_scopes - api_scopes
165183
if missing_scopes:
166184
raise MissingScopesError(list(missing_scopes))
185+
186+
auth_check_cache.store(client.base_uri, client.api_key, api_scopes)

ggshield/core/errors.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,11 @@ def handle_api_error(detail: Detail) -> None:
213213
logger.error("status_code=%s", detail.status_code)
214214
logger.debug("detail=%s", detail.detail)
215215
if detail.status_code == 401:
216+
# The cached metadata/scopes check must not keep masking a revoked
217+
# or rotated API key within its TTL.
218+
from ggshield.core import auth_check_cache
219+
220+
auth_check_cache.invalidate()
216221
raise click.UsageError(detail.detail)
217222
if detail.status_code is None:
218223
raise UnexpectedError(f"Scanning failed: {detail.detail}")

0 commit comments

Comments
 (0)