Skip to content

Commit 6dc89ca

Browse files
authored
Client logging and metrics (#60)
* wip: improve logging and metrics * refactor client error logging, change not_saved metric to no_attestation, add rate_limited metric * fix lint findings * add record digest to no artifact debug message, add container name to log messages * improve documentation wording * address comments * add resp_msg to 404 logs
1 parent 3cfd319 commit 6dc89ca

File tree

6 files changed

+427
-16
lines changed

6 files changed

+427
-16
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,11 @@ The metrics exposed beyond the default Prometheus metrics are:
184184
outgoing HTTP POST to upload the deployment record.
185185
* `deptracker_post_record_ok`: the number of successful deployment
186186
record uploads.
187+
* `deptracker_post_record_rate_limited`: the number of post attempts
188+
that were rate limited.
189+
* `deptracker_post_record_no_attestation`: the number of attempts
190+
that resulted in no matching attestation for the container digest
191+
(404 "no artifacts found" responses).
187192
* `deptracker_post_record_soft_fail`: the number of recoverable failed
188193
attempts to upload the deployment record.
189194
* `deptracker_post_record_hard_fail`: the number of failures to

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ require (
3232
github.com/google/uuid v1.6.0 // indirect
3333
github.com/josharian/intern v1.0.0 // indirect
3434
github.com/json-iterator/go v1.1.12 // indirect
35+
github.com/kylelemons/godebug v1.1.0 // indirect
3536
github.com/mailru/easyjson v0.7.7 // indirect
3637
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
3738
github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect

internal/controller/controller.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,12 @@ func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, conta
471471
)
472472

473473
if err := c.apiClient.PostOne(ctx, record); err != nil {
474+
// Return if no artifact is found
475+
var noArtifactErr *deploymentrecord.NoArtifactError
476+
if errors.As(err, &noArtifactErr) {
477+
return nil
478+
}
479+
474480
// Make sure to not retry on client error messages
475481
var clientErr *deploymentrecord.ClientError
476482
if errors.As(err, &clientErr) {

pkg/deploymentrecord/client.go

Lines changed: 59 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,19 @@ func (c *ClientError) Unwrap() error {
160160
return c.err
161161
}
162162

163+
// NoArtifactError represents a 404 client response whose body indicates "no artifacts found".
164+
type NoArtifactError struct {
165+
err error
166+
}
167+
168+
func (n *NoArtifactError) Error() string {
169+
return fmt.Sprintf("no artifact found: %s", n.err.Error())
170+
}
171+
172+
func (n *NoArtifactError) Unwrap() error {
173+
return n.err
174+
}
175+
163176
// PostOne posts a single deployment record to the GitHub deployment
164177
// records API.
165178
func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error {
@@ -249,34 +262,64 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error {
249262
}
250263

251264
// Drain and close response body to enable connection reuse by reading body for error logging
252-
body, _ := io.ReadAll(resp.Body)
265+
respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
266+
_, _ = io.Copy(io.Discard, resp.Body)
253267
_ = resp.Body.Close()
254268

255-
lastErr = fmt.Errorf("unexpected status code: %d", resp.StatusCode)
256-
257-
// Don't retry on client errors (4xx) except for 429
258-
// (rate limit)
259-
if resp.StatusCode >= 400 && resp.StatusCode < 500 && resp.StatusCode != 429 {
269+
switch {
270+
case resp.StatusCode == 404:
271+
// No artifact found
272+
dtmetrics.PostDeploymentRecordNoAttestation.Inc()
273+
slog.Debug("no artifact attestation found, no record created",
274+
"attempt", attempt,
275+
"status_code", resp.StatusCode,
276+
"container_name", record.Name,
277+
"resp_msg", string(respBody),
278+
"digest", record.Digest,
279+
)
280+
return &NoArtifactError{err: fmt.Errorf("no attestation found for %s", record.Digest)}
281+
case resp.StatusCode >= 400 && resp.StatusCode < 500:
282+
if resp.Header.Get("retry-after") != "" || resp.Header.Get("x-ratelimit-remaining") == "0" {
283+
// Rate limited — retry with backoff
284+
// Could be 403 or 429
285+
dtmetrics.PostDeploymentRecordRateLimited.Inc()
286+
slog.Warn("rate limited, retrying",
287+
"attempt", attempt,
288+
"status_code", resp.StatusCode,
289+
"retry_after", resp.Header.Get("Retry-After"),
290+
"container_name", record.Name,
291+
"resp_msg", string(respBody),
292+
)
293+
lastErr = fmt.Errorf("rate limited, attempt %d", attempt)
294+
continue
295+
}
296+
// Don't retry non rate limiting client errors
260297
dtmetrics.PostDeploymentRecordClientError.Inc()
261298
slog.Warn("client error, aborting",
262299
"attempt", attempt,
263-
"error", lastErr,
264300
"status_code", resp.StatusCode,
265-
"msg", string(body),
301+
"container_name", record.Name,
302+
"resp_msg", string(respBody),
303+
)
304+
return &ClientError{err: fmt.Errorf("unexpected client err with status code %d", resp.StatusCode)}
305+
default:
306+
// Retry with backoff
307+
dtmetrics.PostDeploymentRecordSoftFail.Inc()
308+
slog.Debug("retriable error",
309+
"attempt", attempt,
310+
"status_code", resp.StatusCode,
311+
"container_name", record.Name,
312+
"resp_msg", string(respBody),
266313
)
267-
return &ClientError{err: lastErr}
314+
lastErr = fmt.Errorf("server error, attempt %d", attempt)
268315
}
269-
dtmetrics.PostDeploymentRecordSoftFail.Inc()
270-
slog.Debug("retriable server error",
271-
"attempt", attempt,
272-
"status_code", resp.StatusCode,
273-
"msg", string(body),
274-
)
275316
}
276317

277318
dtmetrics.PostDeploymentRecordHardFail.Inc()
278319
slog.Error("all retries exhausted",
279320
"count", c.retries,
280-
"error", lastErr)
321+
"error", lastErr,
322+
"container_name", record.Name,
323+
)
281324
return fmt.Errorf("all retries exhausted: %w", lastErr)
282325
}

0 commit comments

Comments
 (0)