Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# 1.25.5-alpine3.23
FROM golang@sha256:26111811bc967321e7b6f852e914d14bede324cd1accb7f81811929a6a57fea9 AS builder
FROM golang@sha256:d9b2e14101f27ec8d09674cd01186798d227bb0daec90e032aeb1cd22ac0f029 AS builder

WORKDIR /app
COPY go.mod go.sum ./
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ The metrics exposed beyond the default Prometheus metrics are:
* `deptracker_post_record_hard_fail`: the number of failures to
persist a record via the HTTP API (either an irrecoverable error or
all retries are exhausted).
* `deptracker_post_record_client_error`: the number of client errors,
there are never retried nor reprocessed.
Comment thread
kommendorkapten marked this conversation as resolved.
Outdated

## License

Expand Down
14 changes: 14 additions & 0 deletions internal/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,20 @@ func (c *Controller) recordContainer(ctx context.Context, pod *corev1.Pod, conta
)

if err := c.apiClient.PostOne(ctx, record); err != nil {
// Make sure to not retry on client error messages
var clientErr *deploymentrecord.ClientError
if errors.As(err, &clientErr) {
slog.Warn("Failed to post record",
"event_type", eventType,
"name", record.Name,
"deployment_name", record.DeploymentName,
"status", record.Status,
"digest", record.Digest,
"error", err,
)
return nil
}

slog.Error("Failed to post record",
"event_type", eventType,
"name", record.Name,
Expand Down
19 changes: 16 additions & 3 deletions pkg/deploymentrecord/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,19 @@ func WithAPIToken(token string) ClientOption {
}
}

// ClientError represents a client error that can not be retried.
type ClientError struct {
err error
}

func (c *ClientError) Error() string {
return fmt.Sprintf("client_error: %s", c.err.Error())
}

func (c *ClientError) Unwrap() error {
return c.err
}

// PostOne posts a single deployment record to the GitHub deployment
// records API.
func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error {
Expand Down Expand Up @@ -129,11 +142,11 @@ func (c *Client) PostOne(ctx context.Context, record *DeploymentRecord) error {
// Don't retry on client errors (4xx) except for 429
// (rate limit)
if resp.StatusCode >= 400 && resp.StatusCode < 500 && resp.StatusCode != 429 {
metrics.PostDeploymentRecordHardFail.Inc()
slog.Error("irrecoverable error, aborting",
metrics.PostDeploymentRecordClientError.Inc()
slog.Warn("client error, aborting",
"attempt", attempt,
"error", lastErr)
Comment thread
kommendorkapten marked this conversation as resolved.
return lastErr
return &ClientError{err: lastErr}
}
metrics.PostDeploymentRecordSoftFail.Inc()
}
Expand Down
8 changes: 8 additions & 0 deletions pkg/metrics/prom.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,12 @@ var (
Help: "The total number of hard post failures",
},
)

//nolint: revive
PostDeploymentRecordClientError = promauto.NewCounter(
prometheus.CounterOpts{
Name: "deptracker_post_record_client_error",
Help: "The total number of client failures",
Comment thread
kommendorkapten marked this conversation as resolved.
Outdated
},
)
)