Skip to content

Commit 57b314e

Browse files
committed
Add metric for expiring certs
Add certificate validity check Fail on certificate failures and reset candlepin label for Prometheus gauge. HMS-10103: add metric and alert for expiring certs in patch and update content-sources SOP
1 parent 4019b21 commit 57b314e

7 files changed

Lines changed: 255 additions & 1 deletion

File tree

base/certutil/cert_expiry.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
package certutil
2+
3+
import (
4+
"crypto/tls"
5+
"crypto/x509"
6+
"errors"
7+
"fmt"
8+
"time"
9+
)
10+
11+
// ErrNoParseableCertificate is returned when the chain is non-empty but no DER entry could be parsed.
12+
var ErrNoParseableCertificate = errors.New("certutil: no parseable certificate in chain")
13+
14+
// DaysTillExpiration returns whole days until the earliest NotAfter in the TLS certificate chain.
15+
// If every raw certificate fails to parse, it returns an error so callers do not treat a broken
16+
// configuration as “0 days left”.
17+
func DaysTillExpiration(certs *tls.Certificate) (int, error) {
18+
expires := time.Time{}.UTC()
19+
found := false
20+
if certs == nil {
21+
return 0, nil
22+
}
23+
var lastParseErr error
24+
for _, tlsCert := range certs.Certificate {
25+
parsed, err := x509.ParseCertificate(tlsCert)
26+
if err != nil {
27+
lastParseErr = err
28+
continue
29+
}
30+
if !found || parsed.NotAfter.Before(expires) {
31+
expires = parsed.NotAfter
32+
found = true
33+
}
34+
}
35+
if !found {
36+
if lastParseErr != nil {
37+
return 0, fmt.Errorf("certutil: parse certificate: %w", lastParseErr)
38+
}
39+
return 0, ErrNoParseableCertificate
40+
}
41+
diff := time.Until(expires)
42+
return int(diff.Hours() / 24), nil
43+
}

base/certutil/cert_expiry_test.go

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
package certutil
2+
3+
import (
4+
"crypto/rand"
5+
"crypto/rsa"
6+
"crypto/tls"
7+
"crypto/x509"
8+
"crypto/x509/pkix"
9+
"math/big"
10+
"testing"
11+
"time"
12+
13+
"github.com/stretchr/testify/assert"
14+
)
15+
16+
func TestDaysTillExpiration(t *testing.T) {
17+
key, err := rsa.GenerateKey(rand.Reader, 2048)
18+
assert.NoError(t, err)
19+
20+
notBefore := time.Now().UTC().Add(-time.Hour)
21+
notAfter := notBefore.Add(100 * 24 * time.Hour)
22+
23+
tmpl := x509.Certificate{
24+
SerialNumber: big.NewInt(1),
25+
Subject: pkix.Name{CommonName: "test"},
26+
NotBefore: notBefore,
27+
NotAfter: notAfter,
28+
}
29+
30+
der, err := x509.CreateCertificate(rand.Reader, &tmpl, &tmpl, &key.PublicKey, key)
31+
assert.NoError(t, err)
32+
33+
cert := tls.Certificate{
34+
Certificate: [][]byte{der},
35+
PrivateKey: key,
36+
}
37+
38+
days, err := DaysTillExpiration(&cert)
39+
assert.NoError(t, err)
40+
assert.GreaterOrEqual(t, days, 99)
41+
assert.LessOrEqual(t, days, 100)
42+
}
43+
44+
func TestDaysTillExpirationNil(t *testing.T) {
45+
days, err := DaysTillExpiration(nil)
46+
assert.NoError(t, err)
47+
assert.Equal(t, 0, days)
48+
}
49+
50+
func TestDaysTillExpirationUnparsableChain(t *testing.T) {
51+
cert := tls.Certificate{
52+
Certificate: [][]byte{[]byte("not valid der")},
53+
}
54+
_, err := DaysTillExpiration(&cert)
55+
assert.Error(t, err)
56+
assert.NotErrorIs(t, err, ErrNoParseableCertificate)
57+
}
58+
59+
func TestDaysTillExpirationEmptyChain(t *testing.T) {
60+
cert := tls.Certificate{Certificate: [][]byte{}}
61+
_, err := DaysTillExpiration(&cert)
62+
assert.ErrorIs(t, err, ErrNoParseableCertificate)
63+
}

base/metrics/certificate_expiry.go

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
package metrics
2+
3+
import (
4+
"app/base/certutil"
5+
"app/base/utils"
6+
"crypto/tls"
7+
8+
"github.com/prometheus/client_golang/prometheus"
9+
)
10+
11+
const candlepinCertLabel = "candlepin"
12+
13+
// CertificateExpiryDays mirrors content-sources-backend certificate_expiry_days (GaugeVec by label).
14+
// It is registered only where metrics are pushed (e.g. vmaas_sync), not on every pod's default registry.
15+
var CertificateExpiryDays = prometheus.NewGaugeVec(prometheus.GaugeOpts{
16+
Help: "Number of days until the certificate expires by the certificate label",
17+
Namespace: "patchman_engine",
18+
Subsystem: "core",
19+
Name: "certificate_expiry_days",
20+
}, []string{"certificate_label"})
21+
22+
// UpdateCandlepinCertificateExpiry refreshes the candlepin series from CoreCfg (single shot; no background loop).
23+
func UpdateCandlepinCertificateExpiry() {
24+
applyCandlepinCertExpiry(CertificateExpiryDays, utils.CoreCfg.CandlepinCert, utils.CoreCfg.CandlepinKey)
25+
}
26+
27+
// applyCandlepinCertExpiry refreshes or removes the candlepin expiry series. On parse/calculation
28+
// errors it deletes the label so Prometheus does not keep a stale last-good value.
29+
func applyCandlepinCertExpiry(gauge *prometheus.GaugeVec, certPEM, keyPEM string) {
30+
if certPEM == "" || keyPEM == "" {
31+
gauge.DeleteLabelValues(candlepinCertLabel)
32+
return
33+
}
34+
cert, err := tls.X509KeyPair([]byte(certPEM), []byte(keyPEM))
35+
if err != nil {
36+
utils.LogError("err", err, "certificate_expiry: candlepin X509KeyPair")
37+
gauge.DeleteLabelValues(candlepinCertLabel)
38+
return
39+
}
40+
days, err := certutil.DaysTillExpiration(&cert)
41+
if err != nil {
42+
utils.LogError("err", err, "certificate_expiry: candlepin DaysTillExpiration")
43+
gauge.DeleteLabelValues(candlepinCertLabel)
44+
return
45+
}
46+
gauge.WithLabelValues(candlepinCertLabel).Set(float64(days))
47+
}
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
package metrics
2+
3+
import (
4+
"bytes"
5+
"crypto/rand"
6+
"crypto/rsa"
7+
"crypto/x509"
8+
"crypto/x509/pkix"
9+
"encoding/pem"
10+
"math/big"
11+
"testing"
12+
"time"
13+
14+
"github.com/prometheus/client_golang/prometheus"
15+
"github.com/prometheus/client_golang/prometheus/testutil"
16+
"github.com/stretchr/testify/assert"
17+
"github.com/stretchr/testify/require"
18+
)
19+
20+
func testCandlepinPEMs(t *testing.T) (certPEM, keyPEM string) {
21+
t.Helper()
22+
key, err := rsa.GenerateKey(rand.Reader, 2048)
23+
require.NoError(t, err)
24+
25+
notBefore := time.Now().UTC().Add(-time.Hour)
26+
notAfter := notBefore.Add(100 * 24 * time.Hour)
27+
tmpl := x509.Certificate{
28+
SerialNumber: big.NewInt(1),
29+
Subject: pkix.Name{CommonName: "test-candlepin"},
30+
NotBefore: notBefore,
31+
NotAfter: notAfter,
32+
}
33+
der, err := x509.CreateCertificate(rand.Reader, &tmpl, &tmpl, &key.PublicKey, key)
34+
require.NoError(t, err)
35+
36+
var certBuf, keyBuf bytes.Buffer
37+
require.NoError(t, pem.Encode(&certBuf, &pem.Block{Type: "CERTIFICATE", Bytes: der}))
38+
require.NoError(t, pem.Encode(&keyBuf, &pem.Block{Type: "RSA PRIVATE KEY", Bytes: x509.MarshalPKCS1PrivateKey(key)}))
39+
return certBuf.String(), keyBuf.String()
40+
}
41+
42+
func TestApplyCandlepinCertExpiry_setsGauge(t *testing.T) {
43+
gv := prometheus.NewGaugeVec(prometheus.GaugeOpts{
44+
Namespace: "patchman_engine",
45+
Subsystem: "core",
46+
Name: "certificate_expiry_days_test_helper",
47+
Help: "test",
48+
}, []string{"certificate_label"})
49+
50+
certPEM, keyPEM := testCandlepinPEMs(t)
51+
applyCandlepinCertExpiry(gv, certPEM, keyPEM)
52+
53+
v := testutil.ToFloat64(gv.WithLabelValues(candlepinCertLabel))
54+
assert.GreaterOrEqual(t, v, 99.0)
55+
assert.LessOrEqual(t, v, 100.0)
56+
}
57+
58+
func TestApplyCandlepinCertExpiry_badPEMDeletesSeries(t *testing.T) {
59+
gv := prometheus.NewGaugeVec(prometheus.GaugeOpts{
60+
Namespace: "patchman_engine",
61+
Subsystem: "core",
62+
Name: "certificate_expiry_days_test_helper_bad",
63+
Help: "test",
64+
}, []string{"certificate_label"})
65+
66+
certPEM, keyPEM := testCandlepinPEMs(t)
67+
reg := prometheus.NewPedanticRegistry()
68+
reg.MustRegister(gv)
69+
70+
applyCandlepinCertExpiry(gv, certPEM, keyPEM)
71+
require.NotZero(t, testutil.ToFloat64(gv.WithLabelValues(candlepinCertLabel)))
72+
73+
applyCandlepinCertExpiry(gv, "not-valid-pem", "not-valid-pem")
74+
n, err := testutil.GatherAndCount(reg)
75+
require.NoError(t, err)
76+
assert.Zero(t, n)
77+
}
78+
79+
func TestApplyCandlepinCertExpiry_missingConfigDeletesSeries(t *testing.T) {
80+
gv := prometheus.NewGaugeVec(prometheus.GaugeOpts{
81+
Namespace: "patchman_engine",
82+
Subsystem: "core",
83+
Name: "certificate_expiry_days_test_helper_clear",
84+
Help: "test",
85+
}, []string{"certificate_label"})
86+
87+
reg := prometheus.NewPedanticRegistry()
88+
reg.MustRegister(gv)
89+
90+
certPEM, keyPEM := testCandlepinPEMs(t)
91+
applyCandlepinCertExpiry(gv, certPEM, keyPEM)
92+
applyCandlepinCertExpiry(gv, "", "")
93+
n, err := testutil.GatherAndCount(reg)
94+
require.NoError(t, err)
95+
assert.Zero(t, n)
96+
}

deploy/clowdapp.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,8 @@ objects:
343343
- {name: SSL_CERT_DIR, value: '${SSL_CERT_DIR}'}
344344
- {name: GOMEMLIMIT, value: '${GOMEMLIMIT_VMAAS_SYNC}'}
345345
- {name: POD_CONFIG, value: '${JOBS_CONFIG}'}
346+
- {name: CANDLEPIN_CERT, valueFrom: {secretKeyRef: {name: candlepin, key: cert}}}
347+
- {name: CANDLEPIN_KEY, valueFrom: {secretKeyRef: {name: candlepin, key: key}}}
346348
resources:
347349
limits: {cpu: '${CPU_LIMIT_VMAAS_SYNC}', memory: '${MEM_LIMIT_VMAAS_SYNC}'}
348350
requests: {cpu: '${CPU_REQUEST_VMAAS_SYNC}', memory: '${MEM_REQUEST_VMAAS_SYNC}'}

go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ require (
8787
github.com/json-iterator/go v1.1.12 // indirect
8888
github.com/klauspost/compress v1.18.5 // indirect
8989
github.com/klauspost/cpuid/v2 v2.3.0 // indirect
90+
github.com/kylelemons/godebug v1.1.0 // indirect
9091
github.com/leodido/go-urn v1.4.0 // indirect
9192
github.com/lestrrat-go/option v1.0.1 // indirect
9293
github.com/mailru/easyjson v0.9.2 // indirect

tasks/vmaas_sync/metrics.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package vmaas_sync
22

33
import (
44
"app/base/database"
5+
"app/base/metrics"
56
"app/base/models"
67
"app/base/utils"
78
"app/tasks"
@@ -104,7 +105,7 @@ func Metrics() *push.Pusher {
104105
registry.MustRegister(vmaasCallCnt, storeAdvisoriesCnt, storePackagesCnt,
105106
systemsCnt, advisoriesCnt, systemAdvisoriesStats, syncDuration, messageSendDuration, packageCnt, packageNameCnt,
106107
databaseSizeBytesGaugeVec, databaseProcessesGaugeVec, systemsCntByType, tagsCntByType,
107-
advisoriesCountMismatch)
108+
advisoriesCountMismatch, metrics.CertificateExpiryDays)
108109

109110
// update advanced metrics
110111
update()
@@ -119,6 +120,7 @@ func update() {
119120
updateSystemAdvisoriesStats()
120121
updateDBMetrics()
121122
updateSystemInventoryData()
123+
metrics.UpdateCandlepinCertificateExpiry()
122124
}
123125

124126
func updateSystemMetrics() {

0 commit comments

Comments
 (0)