Skip to content

Commit 18e9aa1

Browse files
author
Kevin Westphal
committed
feat: cache auth check api calls
`check_client_api_key` hits `/v1/metadata` and `/v1/api_tokens/self` on every invocation. When a caller runs bursty scans — notably the VSCode extension, which rescans on every save — this is two API round-trips per scan even though the key state rarely changes between them. This commit introduces a cache for auth calls.
1 parent 6a2ba9c commit 18e9aa1

8 files changed

Lines changed: 807 additions & 42 deletions

File tree

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
### Changed
2+
3+
- Successful API key checks are now cached on disk for 5 minutes.

ggshield/cmd/auth/logout.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import ggshield.verticals.hmsl.utils as hmsl_utils
88
from ggshield.cmd.utils.common_options import add_common_options
99
from ggshield.cmd.utils.context_obj import ContextObj
10+
from ggshield.core import auth_check_cache
1011
from ggshield.core.client import create_client
1112
from ggshield.core.config import Config
1213
from ggshield.core.config.token_store import get_token_store
@@ -74,6 +75,7 @@ def logout(config: Config, instance_url: str, revoke: bool) -> None:
7475
revoke_token(config, instance_url)
7576
hmsl_utils.remove_token_from_disk()
7677
delete_account_config(config, instance_url)
78+
auth_check_cache.invalidate()
7779

7880
click.echo(
7981
f"Successfully logged out for instance {instance_url}\n\n"

ggshield/core/auth_check_cache.py

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
import hashlib
2+
import logging
3+
import os
4+
import tempfile
5+
import time
6+
from dataclasses import dataclass
7+
from pathlib import Path
8+
from typing import Optional
9+
10+
import yaml
11+
from pygitguardian.models import RemediationMessages, SecretScanPreferences, TokenScope
12+
13+
from ggshield.core.dirs import get_cache_dir
14+
15+
16+
logger = logging.getLogger(__name__)
17+
18+
# How long a successful auth check (metadata + token scopes) stays valid.
19+
# Short enough that revoked tokens and scope changes propagate quickly;
20+
# long enough that a burst of scans (e.g. IDE on-save) shares one check.
21+
TTL_SECONDS = 300
22+
23+
24+
def _cache_file() -> Path:
25+
# Resolved lazily so GG_CACHE_DIR overrides (tests, sandboxed envs) are honored.
26+
return get_cache_dir() / "auth_check.yaml"
27+
28+
29+
@dataclass
30+
class CachedAuthCheck:
31+
# If not None, these are the scopes fetched from /v1/api_tokens/self.
32+
# None means we haven't fetched scopes yet (e.g. from an auth-login flow
33+
# where no specific scopes were required).
34+
scopes: Optional[set[TokenScope]]
35+
secrets_engine_version: Optional[str]
36+
maximum_payload_size: Optional[int]
37+
secret_scan_preferences: Optional[SecretScanPreferences]
38+
remediation_messages: Optional[RemediationMessages]
39+
40+
41+
def _key_hash(instance_url: str, api_key: str) -> str:
42+
return hashlib.sha256(f"{instance_url}\0{api_key}".encode("utf-8")).hexdigest()
43+
44+
45+
def load(instance_url: str, api_key: str) -> Optional[CachedAuthCheck]:
46+
"""Return the cached auth check for this (instance, key) pair, or None on miss."""
47+
path = _cache_file()
48+
try:
49+
with path.open("r") as f:
50+
data = yaml.safe_load(f)
51+
except FileNotFoundError:
52+
return None
53+
except (OSError, yaml.YAMLError) as e:
54+
logger.warning("Could not load auth check cache: %s", repr(e))
55+
return None
56+
57+
if not isinstance(data, dict):
58+
return None
59+
if data.get("key_hash") != _key_hash(instance_url, api_key):
60+
return None
61+
if data.get("expires_at", 0) < time.time():
62+
return None
63+
64+
raw_scopes = data.get("scopes")
65+
scopes: Optional[set[TokenScope]]
66+
if raw_scopes is None:
67+
scopes = None
68+
else:
69+
scopes = set()
70+
for scope_str in raw_scopes:
71+
try:
72+
scopes.add(TokenScope(scope_str))
73+
except ValueError:
74+
logger.debug("Ignoring unknown cached scope: '%s'", scope_str)
75+
76+
raw_version = data.get("secrets_engine_version")
77+
secrets_engine_version = raw_version if isinstance(raw_version, str) else None
78+
79+
raw_max_payload = data.get("maximum_payload_size")
80+
maximum_payload_size = raw_max_payload if isinstance(raw_max_payload, int) else None
81+
82+
raw_ssp = data.get("secret_scan_preferences")
83+
secret_scan_preferences: Optional[SecretScanPreferences] = None
84+
if isinstance(raw_ssp, dict):
85+
try:
86+
secret_scan_preferences = SecretScanPreferences(**raw_ssp)
87+
except TypeError as e:
88+
logger.debug("Ignoring malformed cached secret_scan_preferences: %s", e)
89+
90+
raw_rm = data.get("remediation_messages")
91+
remediation_messages: Optional[RemediationMessages] = None
92+
if isinstance(raw_rm, dict):
93+
try:
94+
remediation_messages = RemediationMessages(**raw_rm)
95+
except TypeError as e:
96+
logger.debug("Ignoring malformed cached remediation_messages: %s", e)
97+
98+
return CachedAuthCheck(
99+
scopes=scopes,
100+
secrets_engine_version=secrets_engine_version,
101+
maximum_payload_size=maximum_payload_size,
102+
secret_scan_preferences=secret_scan_preferences,
103+
remediation_messages=remediation_messages,
104+
)
105+
106+
107+
def store(
108+
instance_url: str,
109+
api_key: str,
110+
scopes: Optional[set[TokenScope]],
111+
secrets_engine_version: Optional[str],
112+
maximum_payload_size: Optional[int],
113+
secret_scan_preferences: Optional[SecretScanPreferences],
114+
remediation_messages: Optional[RemediationMessages],
115+
) -> None:
116+
"""Record a successful auth check.
117+
118+
Pass scopes=None if token scopes were not fetched (only metadata was checked).
119+
"""
120+
payload = {
121+
"key_hash": _key_hash(instance_url, api_key),
122+
"scopes": (None if scopes is None else sorted(s.value for s in scopes)),
123+
"secrets_engine_version": secrets_engine_version,
124+
"maximum_payload_size": maximum_payload_size,
125+
"secret_scan_preferences": (
126+
None
127+
if secret_scan_preferences is None
128+
else {
129+
"maximum_document_size": secret_scan_preferences.maximum_document_size,
130+
"maximum_documents_per_scan": secret_scan_preferences.maximum_documents_per_scan,
131+
}
132+
),
133+
"remediation_messages": (
134+
None
135+
if remediation_messages is None
136+
else {
137+
"pre_commit": remediation_messages.pre_commit,
138+
"pre_push": remediation_messages.pre_push,
139+
"pre_receive": remediation_messages.pre_receive,
140+
}
141+
),
142+
"expires_at": int(time.time()) + TTL_SECONDS,
143+
}
144+
145+
path = _cache_file()
146+
try:
147+
path.parent.mkdir(parents=True, exist_ok=True, mode=0o700)
148+
# Re-apply on a pre-existing dir, since mkdir's mode is ignored when the
149+
# dir already exists. Keeps the auth-check file out of reach of other
150+
# local users on shared POSIX hosts.
151+
try:
152+
os.chmod(path.parent, 0o700)
153+
except OSError as e:
154+
logger.debug("Could not tighten cache dir permissions: %s", repr(e))
155+
# Atomic write: a concurrent ggshield process would otherwise be able to
156+
# observe a truncated YAML file. tempfile in the same directory so
157+
# os.replace stays on one filesystem.
158+
fd, tmp_path = tempfile.mkstemp(
159+
prefix=".auth_check.", suffix=".tmp", dir=path.parent
160+
)
161+
try:
162+
os.chmod(tmp_path, 0o600)
163+
with os.fdopen(fd, "w") as f:
164+
yaml.dump(payload, f, indent=2, default_flow_style=False)
165+
os.replace(tmp_path, path)
166+
except Exception:
167+
try:
168+
os.unlink(tmp_path)
169+
except OSError:
170+
pass
171+
raise
172+
except OSError as e:
173+
logger.warning("Could not save auth check cache: %s", repr(e))
174+
175+
176+
def invalidate() -> None:
177+
"""Drop the cached auth check, typically after a 401 from any API call."""
178+
try:
179+
_cache_file().unlink(missing_ok=True)
180+
except OSError as e:
181+
logger.warning("Could not invalidate auth check cache: %s", repr(e))

ggshield/core/client.py

Lines changed: 67 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from requests import Session
1010
from requests.adapters import HTTPAdapter
1111

12-
from . import ui
12+
from . import auth_check_cache, ui
1313
from .config import Config
1414
from .constants import DEFAULT_INSTANCE_URL
1515
from .errors import (
@@ -130,32 +130,60 @@ def check_client_api_key(client: GGClient, required_scopes: set[TokenScope]) ->
130130
(either it is invalid or unset). Raises UnexpectedError if the API is down.
131131
132132
If required_scopes is not empty, also checks that the API key has the required scopes.
133+
134+
Successful checks are cached on disk for a short TTL so bursty callers (e.g. the
135+
GitGuardian VSCode extension) do not re-hit /v1/metadata and
136+
/v1/api_tokens/self on every invocation.
133137
"""
134-
try:
135-
response = client.read_metadata()
136-
except requests.exceptions.ConnectionError as e:
137-
raise ServiceUnavailableError(
138-
message="Failed to connect to GitGuardian server. Check your"
139-
f" instance URL settings.\nDetails: {e}.",
140-
)
138+
cached = auth_check_cache.load(client.base_uri, client.api_key)
139+
if cached is not None:
140+
# Restore the full set of side effects read_metadata() would normally
141+
# apply to the client.
142+
if cached.secrets_engine_version is not None:
143+
client.secrets_engine_version = cached.secrets_engine_version
144+
if cached.maximum_payload_size is not None:
145+
client.maximum_payload_size = cached.maximum_payload_size
146+
if cached.secret_scan_preferences is not None:
147+
client.secret_scan_preferences = cached.secret_scan_preferences
148+
if cached.remediation_messages is not None:
149+
client.remediation_messages = cached.remediation_messages
150+
151+
if cached is not None and (
152+
not required_scopes
153+
or (cached.scopes is not None and required_scopes <= cached.scopes)
154+
):
155+
return
156+
157+
if cached is None:
158+
try:
159+
response = client.read_metadata()
160+
except requests.exceptions.ConnectionError as e:
161+
raise ServiceUnavailableError(
162+
message="Failed to connect to GitGuardian server. Check your"
163+
f" instance URL settings.\nDetails: {e}.",
164+
)
141165

142-
if response is None:
143-
# None means success
144-
pass
145-
elif response.status_code == 401:
146-
raise APIKeyCheckError(client.base_uri, "Invalid GitGuardian API key.")
147-
elif response.status_code == 404:
148-
raise UnexpectedError(
149-
"The server returned a 404 error. Check your instance URL settings.",
150-
)
151-
elif response.status_code is not None and 500 <= response.status_code < 600:
152-
raise ServiceUnavailableError(
153-
message=f"GitGuardian server is not responding.\nDetails: {response.detail}",
154-
)
155-
else:
156-
raise UnexpectedError(
157-
f"GitGuardian server is not responding as expected.\nDetails: {response.detail}"
158-
)
166+
if response is None:
167+
# None means success
168+
pass
169+
elif response.status_code == 401:
170+
raise APIKeyCheckError(client.base_uri, "Invalid GitGuardian API key.")
171+
elif response.status_code == 404:
172+
raise UnexpectedError(
173+
"The server returned a 404 error. Check your instance URL settings.",
174+
)
175+
elif response.status_code is not None and 500 <= response.status_code < 600:
176+
raise ServiceUnavailableError(
177+
message=f"GitGuardian server is not responding.\nDetails: {response.detail}",
178+
)
179+
else:
180+
raise UnexpectedError(
181+
f"GitGuardian server is not responding as expected.\nDetails: {response.detail}"
182+
)
183+
184+
api_scopes: Optional[set[TokenScope]] = (
185+
cached.scopes if cached is not None else None
186+
)
159187

160188
# Check token scopes if required_scopes is not empty
161189
if required_scopes:
@@ -164,6 +192,10 @@ def check_client_api_key(client: GGClient, required_scopes: set[TokenScope]) ->
164192
if not isinstance(response, (Detail, APITokensResponse)):
165193
raise UnexpectedError("Unexpected api_tokens response")
166194
elif isinstance(response, Detail):
195+
if response.status_code == 401:
196+
# Drop the cache for re-verification
197+
auth_check_cache.invalidate()
198+
raise APIKeyCheckError(client.base_uri, "Invalid GitGuardian API key.")
167199
raise UnexpectedError(response.detail)
168200

169201
# Build set of API scopes, ignoring unknown ones for forward compatibility
@@ -177,3 +209,13 @@ def check_client_api_key(client: GGClient, required_scopes: set[TokenScope]) ->
177209
missing_scopes = required_scopes - api_scopes
178210
if missing_scopes:
179211
raise MissingScopesError(list(missing_scopes))
212+
213+
auth_check_cache.store(
214+
client.base_uri,
215+
client.api_key,
216+
api_scopes,
217+
client.secrets_engine_version,
218+
client.maximum_payload_size,
219+
client.secret_scan_preferences,
220+
client.remediation_messages,
221+
)

ggshield/core/errors.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from marshmallow import ValidationError
1414
from pygitguardian.models import Detail, TokenScope
1515

16+
from ggshield.core import auth_check_cache
1617
from ggshield.core.text_utils import pluralize
1718
from ggshield.utils.git_shell import InvalidGitRefError
1819

@@ -213,6 +214,9 @@ def handle_api_error(detail: Detail) -> None:
213214
logger.error("status_code=%s", detail.status_code)
214215
logger.debug("detail=%s", detail.detail)
215216
if detail.status_code == 401:
217+
# The cached metadata/scopes check must not keep masking a revoked
218+
# or rotated API key within its TTL.
219+
auth_check_cache.invalidate()
216220
raise click.UsageError(detail.detail)
217221
if detail.status_code is None:
218222
raise UnexpectedError(f"Scanning failed: {detail.detail}")

tests/unit/cmd/auth/test_logout.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from requests.exceptions import ConnectionError
66

77
from ggshield.__main__ import cli
8+
from ggshield.core import auth_check_cache
89
from ggshield.core.config import Config
910
from ggshield.core.config.token_store import KeyringTokenStore, reset_token_store
1011
from ggshield.core.constants import DEFAULT_INSTANCE_URL
@@ -192,6 +193,34 @@ def test_logout_deletes_from_keyring(self, monkeypatch, cli_fs_runner):
192193
mock_store.delete_token.assert_called_once_with(DEFAULT_INSTANCE_URL)
193194
reset_token_store()
194195

196+
def test_logout_invalidates_auth_check_cache(self, monkeypatch, cli_fs_runner):
197+
"""
198+
GIVEN a saved instance and a populated auth-check cache
199+
WHEN running the logout command
200+
THEN the auth-check cache file is removed so a stale "still valid" hit
201+
cannot survive across the logout
202+
"""
203+
post_mock = Mock(return_value=Mock(status_code=204, ok=True))
204+
monkeypatch.setattr("ggshield.core.client.GGClient.post", post_mock)
205+
206+
add_instance_config()
207+
208+
auth_check_cache.store(
209+
"https://api.gitguardian.com",
210+
"test-api-key",
211+
None,
212+
None,
213+
None,
214+
None,
215+
None,
216+
)
217+
assert auth_check_cache._cache_file().exists()
218+
219+
exit_code, output = self.run_cmd(cli_fs_runner)
220+
221+
assert exit_code == ExitCode.SUCCESS, output
222+
assert not auth_check_cache._cache_file().exists()
223+
195224
@staticmethod
196225
def run_cmd(
197226
cli_fs_runner,

0 commit comments

Comments
 (0)