Skip to content

Commit cf4e4a1

Browse files
Kevin Westphal6d7a
authored andcommitted
feat: cache auth check api calls
`check_client_api_key` hits `/v1/metadata` and `/v1/api_tokens/self` on every invocation. When a caller runs bursty scans — notably the VSCode extension, which rescans on every save — this is two API round-trips per scan even though the key state rarely changes between them. This commit introduces a cache for auth calls.
1 parent 1c0e523 commit cf4e4a1

8 files changed

Lines changed: 807 additions & 42 deletions

File tree

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
### Changed
2+
3+
- Successful API key checks are now cached on disk for 5 minutes.

ggshield/cmd/auth/logout.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import ggshield.verticals.hmsl.utils as hmsl_utils
88
from ggshield.cmd.utils.common_options import add_common_options
99
from ggshield.cmd.utils.context_obj import ContextObj
10+
from ggshield.core import auth_check_cache
1011
from ggshield.core.client import create_client
1112
from ggshield.core.config import Config
1213
from ggshield.core.config.token_store import get_token_store
@@ -74,6 +75,7 @@ def logout(config: Config, instance_url: str, revoke: bool) -> None:
7475
revoke_token(config, instance_url)
7576
hmsl_utils.remove_token_from_disk()
7677
delete_account_config(config, instance_url)
78+
auth_check_cache.invalidate()
7779

7880
click.echo(
7981
f"Successfully logged out for instance {instance_url}\n\n"

ggshield/core/auth_check_cache.py

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
import hashlib
2+
import logging
3+
import os
4+
import tempfile
5+
import time
6+
from dataclasses import dataclass
7+
from pathlib import Path
8+
from typing import Optional
9+
10+
import yaml
11+
from pygitguardian.models import RemediationMessages, SecretScanPreferences, TokenScope
12+
13+
from ggshield.core.dirs import get_cache_dir
14+
15+
16+
logger = logging.getLogger(__name__)
17+
18+
# How long a successful auth check (metadata + token scopes) stays valid.
19+
# Short enough that revoked tokens and scope changes propagate quickly;
20+
# long enough that a burst of scans (e.g. IDE on-save) shares one check.
21+
TTL_SECONDS = 300
22+
23+
24+
def _cache_file() -> Path:
25+
# Resolved lazily so GG_CACHE_DIR overrides (tests, sandboxed envs) are honored.
26+
return get_cache_dir() / "auth_check.yaml"
27+
28+
29+
@dataclass
30+
class CachedAuthCheck:
31+
# If not None, these are the scopes fetched from /v1/api_tokens/self.
32+
# None means we haven't fetched scopes yet (e.g. from an auth-login flow
33+
# where no specific scopes were required).
34+
scopes: Optional[set[TokenScope]]
35+
secrets_engine_version: Optional[str]
36+
maximum_payload_size: Optional[int]
37+
secret_scan_preferences: Optional[SecretScanPreferences]
38+
remediation_messages: Optional[RemediationMessages]
39+
40+
41+
def _key_hash(instance_url: str, api_key: str) -> str:
42+
return hashlib.sha256(f"{instance_url}\0{api_key}".encode("utf-8")).hexdigest()
43+
44+
45+
def load(instance_url: str, api_key: str) -> Optional[CachedAuthCheck]:
46+
"""Return the cached auth check for this (instance, key) pair, or None on miss."""
47+
path = _cache_file()
48+
try:
49+
with path.open("r") as f:
50+
data = yaml.safe_load(f)
51+
except FileNotFoundError:
52+
return None
53+
except (OSError, yaml.YAMLError) as e:
54+
logger.warning("Could not load auth check cache: %s", repr(e))
55+
return None
56+
57+
if not isinstance(data, dict):
58+
return None
59+
if data.get("key_hash") != _key_hash(instance_url, api_key):
60+
return None
61+
if data.get("expires_at", 0) < time.time():
62+
return None
63+
64+
raw_scopes = data.get("scopes")
65+
scopes: Optional[set[TokenScope]]
66+
if raw_scopes is None:
67+
scopes = None
68+
else:
69+
scopes = set()
70+
for scope_str in raw_scopes:
71+
try:
72+
scopes.add(TokenScope(scope_str))
73+
except ValueError:
74+
logger.debug("Ignoring unknown cached scope: '%s'", scope_str)
75+
76+
raw_version = data.get("secrets_engine_version")
77+
secrets_engine_version = raw_version if isinstance(raw_version, str) else None
78+
79+
raw_max_payload = data.get("maximum_payload_size")
80+
maximum_payload_size = raw_max_payload if isinstance(raw_max_payload, int) else None
81+
82+
raw_ssp = data.get("secret_scan_preferences")
83+
secret_scan_preferences: Optional[SecretScanPreferences] = None
84+
if isinstance(raw_ssp, dict):
85+
try:
86+
secret_scan_preferences = SecretScanPreferences(**raw_ssp)
87+
except TypeError as e:
88+
logger.debug("Ignoring malformed cached secret_scan_preferences: %s", e)
89+
90+
raw_rm = data.get("remediation_messages")
91+
remediation_messages: Optional[RemediationMessages] = None
92+
if isinstance(raw_rm, dict):
93+
try:
94+
remediation_messages = RemediationMessages(**raw_rm)
95+
except TypeError as e:
96+
logger.debug("Ignoring malformed cached remediation_messages: %s", e)
97+
98+
return CachedAuthCheck(
99+
scopes=scopes,
100+
secrets_engine_version=secrets_engine_version,
101+
maximum_payload_size=maximum_payload_size,
102+
secret_scan_preferences=secret_scan_preferences,
103+
remediation_messages=remediation_messages,
104+
)
105+
106+
107+
def store(
108+
instance_url: str,
109+
api_key: str,
110+
scopes: Optional[set[TokenScope]],
111+
secrets_engine_version: Optional[str],
112+
maximum_payload_size: Optional[int],
113+
secret_scan_preferences: Optional[SecretScanPreferences],
114+
remediation_messages: Optional[RemediationMessages],
115+
) -> None:
116+
"""Record a successful auth check.
117+
118+
Pass scopes=None if token scopes were not fetched (only metadata was checked).
119+
"""
120+
payload = {
121+
"key_hash": _key_hash(instance_url, api_key),
122+
"scopes": (None if scopes is None else sorted(s.value for s in scopes)),
123+
"secrets_engine_version": secrets_engine_version,
124+
"maximum_payload_size": maximum_payload_size,
125+
"secret_scan_preferences": (
126+
None
127+
if secret_scan_preferences is None
128+
else {
129+
"maximum_document_size": secret_scan_preferences.maximum_document_size,
130+
"maximum_documents_per_scan": secret_scan_preferences.maximum_documents_per_scan,
131+
}
132+
),
133+
"remediation_messages": (
134+
None
135+
if remediation_messages is None
136+
else {
137+
"pre_commit": remediation_messages.pre_commit,
138+
"pre_push": remediation_messages.pre_push,
139+
"pre_receive": remediation_messages.pre_receive,
140+
}
141+
),
142+
"expires_at": int(time.time()) + TTL_SECONDS,
143+
}
144+
145+
path = _cache_file()
146+
try:
147+
path.parent.mkdir(parents=True, exist_ok=True, mode=0o700)
148+
# Re-apply on a pre-existing dir, since mkdir's mode is ignored when the
149+
# dir already exists. Keeps the auth-check file out of reach of other
150+
# local users on shared POSIX hosts.
151+
try:
152+
os.chmod(path.parent, 0o700)
153+
except OSError as e:
154+
logger.debug("Could not tighten cache dir permissions: %s", repr(e))
155+
# Atomic write: a concurrent ggshield process would otherwise be able to
156+
# observe a truncated YAML file. tempfile in the same directory so
157+
# os.replace stays on one filesystem.
158+
fd, tmp_path = tempfile.mkstemp(
159+
prefix=".auth_check.", suffix=".tmp", dir=path.parent
160+
)
161+
try:
162+
os.chmod(tmp_path, 0o600)
163+
with os.fdopen(fd, "w") as f:
164+
yaml.dump(payload, f, indent=2, default_flow_style=False)
165+
os.replace(tmp_path, path)
166+
except Exception:
167+
try:
168+
os.unlink(tmp_path)
169+
except OSError:
170+
pass
171+
raise
172+
except OSError as e:
173+
logger.warning("Could not save auth check cache: %s", repr(e))
174+
175+
176+
def invalidate() -> None:
177+
"""Drop the cached auth check, typically after a 401 from any API call."""
178+
try:
179+
_cache_file().unlink(missing_ok=True)
180+
except OSError as e:
181+
logger.warning("Could not invalidate auth check cache: %s", repr(e))

ggshield/core/client.py

Lines changed: 67 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from requests import Session
1111
from requests.adapters import HTTPAdapter
1212

13-
from . import ui
13+
from . import auth_check_cache, ui
1414
from .config import Config
1515
from .constants import DEFAULT_INSTANCE_URL
1616
from .errors import (
@@ -173,32 +173,60 @@ def check_client_api_key(client: GGClient, required_scopes: set[TokenScope]) ->
173173
(either it is invalid or unset). Raises UnexpectedError if the API is down.
174174
175175
If required_scopes is not empty, also checks that the API key has the required scopes.
176+
177+
Successful checks are cached on disk for a short TTL so bursty callers (e.g. the
178+
GitGuardian VSCode extension) do not re-hit /v1/metadata and
179+
/v1/api_tokens/self on every invocation.
176180
"""
177-
try:
178-
response = client.read_metadata()
179-
except requests.exceptions.ConnectionError as e:
180-
raise ServiceUnavailableError(
181-
message="Failed to connect to GitGuardian server. Check your"
182-
f" instance URL settings.\nDetails: {e}.",
183-
)
181+
cached = auth_check_cache.load(client.base_uri, client.api_key)
182+
if cached is not None:
183+
# Restore the full set of side effects read_metadata() would normally
184+
# apply to the client.
185+
if cached.secrets_engine_version is not None:
186+
client.secrets_engine_version = cached.secrets_engine_version
187+
if cached.maximum_payload_size is not None:
188+
client.maximum_payload_size = cached.maximum_payload_size
189+
if cached.secret_scan_preferences is not None:
190+
client.secret_scan_preferences = cached.secret_scan_preferences
191+
if cached.remediation_messages is not None:
192+
client.remediation_messages = cached.remediation_messages
184193

185-
if response is None:
186-
# None means success
187-
pass
188-
elif response.status_code == 401:
189-
raise APIKeyCheckError(client.base_uri, "Invalid GitGuardian API key.")
190-
elif response.status_code == 404:
191-
raise UnexpectedError(
192-
"The server returned a 404 error. Check your instance URL settings.",
193-
)
194-
elif response.status_code is not None and 500 <= response.status_code < 600:
195-
raise ServiceUnavailableError(
196-
message=f"GitGuardian server is not responding.\nDetails: {response.detail}",
197-
)
198-
else:
199-
raise UnexpectedError(
200-
f"GitGuardian server is not responding as expected.\nDetails: {response.detail}"
201-
)
194+
if cached is not None and (
195+
not required_scopes
196+
or (cached.scopes is not None and required_scopes <= cached.scopes)
197+
):
198+
return
199+
200+
if cached is None:
201+
try:
202+
response = client.read_metadata()
203+
except requests.exceptions.ConnectionError as e:
204+
raise ServiceUnavailableError(
205+
message="Failed to connect to GitGuardian server. Check your"
206+
f" instance URL settings.\nDetails: {e}.",
207+
)
208+
209+
if response is None:
210+
# None means success
211+
pass
212+
elif response.status_code == 401:
213+
raise APIKeyCheckError(client.base_uri, "Invalid GitGuardian API key.")
214+
elif response.status_code == 404:
215+
raise UnexpectedError(
216+
"The server returned a 404 error. Check your instance URL settings.",
217+
)
218+
elif response.status_code is not None and 500 <= response.status_code < 600:
219+
raise ServiceUnavailableError(
220+
message=f"GitGuardian server is not responding.\nDetails: {response.detail}",
221+
)
222+
else:
223+
raise UnexpectedError(
224+
f"GitGuardian server is not responding as expected.\nDetails: {response.detail}"
225+
)
226+
227+
api_scopes: Optional[set[TokenScope]] = (
228+
cached.scopes if cached is not None else None
229+
)
202230

203231
# Check token scopes if required_scopes is not empty
204232
if required_scopes:
@@ -207,6 +235,10 @@ def check_client_api_key(client: GGClient, required_scopes: set[TokenScope]) ->
207235
if not isinstance(response, (Detail, APITokensResponse)):
208236
raise UnexpectedError("Unexpected api_tokens response")
209237
elif isinstance(response, Detail):
238+
if response.status_code == 401:
239+
# Drop the cache for re-verification
240+
auth_check_cache.invalidate()
241+
raise APIKeyCheckError(client.base_uri, "Invalid GitGuardian API key.")
210242
raise UnexpectedError(response.detail)
211243

212244
# Build set of API scopes, ignoring unknown ones for forward compatibility
@@ -220,3 +252,13 @@ def check_client_api_key(client: GGClient, required_scopes: set[TokenScope]) ->
220252
missing_scopes = required_scopes - api_scopes
221253
if missing_scopes:
222254
raise MissingScopesError(list(missing_scopes))
255+
256+
auth_check_cache.store(
257+
client.base_uri,
258+
client.api_key,
259+
api_scopes,
260+
client.secrets_engine_version,
261+
client.maximum_payload_size,
262+
client.secret_scan_preferences,
263+
client.remediation_messages,
264+
)

ggshield/core/errors.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from marshmallow import ValidationError
1414
from pygitguardian.models import Detail, TokenScope
1515

16+
from ggshield.core import auth_check_cache
1617
from ggshield.core.text_utils import pluralize
1718
from ggshield.utils.git_shell import InvalidGitRefError
1819

@@ -213,6 +214,9 @@ def handle_api_error(detail: Detail) -> None:
213214
logger.error("status_code=%s", detail.status_code)
214215
logger.debug("detail=%s", detail.detail)
215216
if detail.status_code == 401:
217+
# The cached metadata/scopes check must not keep masking a revoked
218+
# or rotated API key within its TTL.
219+
auth_check_cache.invalidate()
216220
raise click.UsageError(detail.detail)
217221
if detail.status_code is None:
218222
raise UnexpectedError(f"Scanning failed: {detail.detail}")

tests/unit/cmd/auth/test_logout.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from requests.exceptions import ConnectionError
66

77
from ggshield.__main__ import cli
8+
from ggshield.core import auth_check_cache
89
from ggshield.core.config import Config
910
from ggshield.core.config.token_store import KeyringTokenStore, reset_token_store
1011
from ggshield.core.constants import DEFAULT_INSTANCE_URL
@@ -192,6 +193,34 @@ def test_logout_deletes_from_keyring(self, monkeypatch, cli_fs_runner):
192193
mock_store.delete_token.assert_called_once_with(DEFAULT_INSTANCE_URL)
193194
reset_token_store()
194195

196+
def test_logout_invalidates_auth_check_cache(self, monkeypatch, cli_fs_runner):
197+
"""
198+
GIVEN a saved instance and a populated auth-check cache
199+
WHEN running the logout command
200+
THEN the auth-check cache file is removed so a stale "still valid" hit
201+
cannot survive across the logout
202+
"""
203+
post_mock = Mock(return_value=Mock(status_code=204, ok=True))
204+
monkeypatch.setattr("ggshield.core.client.GGClient.post", post_mock)
205+
206+
add_instance_config()
207+
208+
auth_check_cache.store(
209+
"https://api.gitguardian.com",
210+
"test-api-key",
211+
None,
212+
None,
213+
None,
214+
None,
215+
None,
216+
)
217+
assert auth_check_cache._cache_file().exists()
218+
219+
exit_code, output = self.run_cmd(cli_fs_runner)
220+
221+
assert exit_code == ExitCode.SUCCESS, output
222+
assert not auth_check_cache._cache_file().exists()
223+
195224
@staticmethod
196225
def run_cmd(
197226
cli_fs_runner,

0 commit comments

Comments
 (0)