Skip to content

Commit a866848

Browse files
committed
backends/azure,backends/s3: metrics improvements
Added all metrics that the s3 backend has to the azure backend, and improved how they are registered in both.
1 parent 04eb9d1 commit a866848

6 files changed

Lines changed: 151 additions & 90 deletions

File tree

backends/azure/azure.go

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -307,13 +307,12 @@ func New(ctx context.Context, opt Options) (*Backend, error) {
307307

308308
if opt.CreateContainer {
309309
// Create bucket if it does not exist
310-
metricCalls.WithLabelValues("create-container").Inc()
311-
metricLastCallTimestamp.WithLabelValues("create-container").SetToCurrentTime()
312-
310+
start := time.Now()
313311
_, err := client.CreateContainer(ctx, opt.Container, &azblob.CreateContainerOptions{})
314312
if err != nil && !bloberror.HasCode(err, bloberror.ContainerAlreadyExists) {
315313
return nil, err
316314
}
315+
recordCallMetrics("create-container", start)
317316
}
318317

319318
b := &Backend{
@@ -399,6 +398,8 @@ func convertAzureError(err error) error {
399398
}
400399

401400
func (b *Backend) doList(ctx context.Context, prefix string) (simpleblob.BlobList, error) {
401+
defer recordCallMetrics("list", time.Now())
402+
402403
var blobs simpleblob.BlobList
403404

404405
// Runes to strip from blob names for GlobalPrefix
@@ -414,6 +415,7 @@ func (b *Backend) doList(ctx context.Context, prefix string) (simpleblob.BlobLis
414415
for blobPager.More() {
415416
resp, err := blobPager.NextPage(ctx)
416417
if err != nil {
418+
recordErrorMetrics("list", err)
417419
return nil, err
418420
}
419421

@@ -438,8 +440,6 @@ func (b *Backend) doList(ctx context.Context, prefix string) (simpleblob.BlobLis
438440
}
439441
}
440442

441-
metricLastCallTimestamp.WithLabelValues("list").SetToCurrentTime()
442-
443443
return blobs, nil
444444
}
445445

@@ -463,13 +463,14 @@ func (b *Backend) Load(ctx context.Context, name string) ([]byte, error) {
463463
}
464464

465465
func (b *Backend) doLoadReader(ctx context.Context, name string) (io.ReadCloser, error) {
466-
metricCalls.WithLabelValues("load").Inc()
467-
metricLastCallTimestamp.WithLabelValues("load").SetToCurrentTime()
466+
defer recordCallMetrics("load", time.Now())
468467

469468
// Download the blob's contents and ensure that the download worked properly
470469
blobDownloadResponse, err := b.client.DownloadStream(ctx, b.opt.Container, name, nil)
471470
if err = convertAzureError(err); err != nil {
472-
metricCallErrors.WithLabelValues("load").Inc()
471+
if !errors.Is(err, os.ErrNotExist) {
472+
recordErrorMetrics("load", err)
473+
}
473474
return nil, err
474475
}
475476

@@ -497,14 +498,12 @@ func (b *Backend) Store(ctx context.Context, name string, data []byte) error {
497498

498499
// doStore is a convenience wrapper around doStoreReader.
499500
func (b *Backend) doStore(ctx context.Context, name string, data []byte) (azblob.UploadStreamResponse, error) {
500-
return b.doStoreReader(ctx, name, bytes.NewReader(data), int64(len(data)))
501+
return b.doStoreReader(ctx, name, bytes.NewReader(data))
501502
}
502503

503504
// doStoreReader stores data with key name in Azure blob, using r as a source for data.
504-
// The value of size may be -1, in case the size is not known.
505-
func (b *Backend) doStoreReader(ctx context.Context, name string, r io.Reader, size int64) (azblob.UploadStreamResponse, error) {
506-
metricCalls.WithLabelValues("store").Inc()
507-
metricLastCallTimestamp.WithLabelValues("store").SetToCurrentTime()
505+
func (b *Backend) doStoreReader(ctx context.Context, name string, r io.Reader) (azblob.UploadStreamResponse, error) {
506+
defer recordCallMetrics("store", time.Now())
508507

509508
uploadStreamOptions := &azblob.UploadStreamOptions{
510509
Concurrency: b.opt.Concurrency,
@@ -513,7 +512,7 @@ func (b *Backend) doStoreReader(ctx context.Context, name string, r io.Reader, s
513512
// Perform UploadStream
514513
resp, err := b.client.UploadStream(ctx, b.opt.Container, name, r, uploadStreamOptions)
515514
if err != nil {
516-
metricCallErrors.WithLabelValues("store").Inc()
515+
recordErrorMetrics("store", err)
517516
return azblob.UploadStreamResponse{}, err
518517
}
519518

@@ -533,8 +532,7 @@ func (b *Backend) Delete(ctx context.Context, name string) error {
533532
}
534533

535534
func (b *Backend) doDelete(ctx context.Context, name string) error {
536-
metricCalls.WithLabelValues("delete").Inc()
537-
metricLastCallTimestamp.WithLabelValues("delete").SetToCurrentTime()
535+
defer recordCallMetrics("delete", time.Now())
538536

539537
_, err := b.client.DeleteBlob(ctx, b.opt.Container, name, nil)
540538

@@ -543,7 +541,7 @@ func (b *Backend) doDelete(ctx context.Context, name string) error {
543541
if errors.Is(err, os.ErrNotExist) {
544542
return nil
545543
}
546-
metricCallErrors.WithLabelValues("delete").Inc()
544+
recordErrorMetrics("delete", err)
547545
return err
548546
}
549547

backends/azure/azure_test.go

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ import (
66
"testing"
77
"time"
88

9+
"github.com/prometheus/client_golang/prometheus/testutil"
10+
"github.com/prometheus/common/expfmt"
911
"github.com/stretchr/testify/assert"
1012
"github.com/stretchr/testify/require"
1113
"github.com/testcontainers/testcontainers-go"
@@ -30,13 +32,6 @@ func getBackend(ctx context.Context, t *testing.T) (b *Backend) {
3032
t.Fatalf("failed to start container: %v", err)
3133
}
3234

33-
state, err := azuriteContainer.State(ctx)
34-
if err != nil {
35-
t.Fatalf("failed to get container state: %v", err)
36-
}
37-
38-
t.Log(state.Running)
39-
4035
serviceURL, err := azuriteContainer.BlobServiceURL(ctx)
4136
require.NoError(t, err)
4237

@@ -139,3 +134,32 @@ func TestBackend_globalPrefixAndMarker(t *testing.T) {
139134
tester.DoBackendTests(t, b)
140135
assert.NotEmpty(t, b.lastMarker)
141136
}
137+
138+
func TestMetrics(t *testing.T) {
139+
metricName := "storage_azure_call_error_by_type_total"
140+
metricCallErrorsType.Reset() // Ensure no metrics are left from other tests
141+
b := getBackend(t.Context(), t)
142+
143+
// Operations on a healthy backend do not collect any metrics.
144+
assert.Equal(t, 0, testutil.CollectAndCount(metricCallErrorsType, metricName))
145+
var (
146+
_ = b.Store(t.Context(), "my-key", []byte{})
147+
_, _ = b.Load(t.Context(), "my-key")
148+
_ = b.Delete(t.Context(), "my-key")
149+
_, _ = b.Load(t.Context(), "no-such-key") // ErrNotExist is not collected
150+
_ = b.Delete(t.Context(), "no-such-key")
151+
_, _ = b.List(t.Context(), "")
152+
)
153+
assert.Equal(t, 0, testutil.CollectAndCount(metricCallErrorsType, metricName))
154+
155+
// A failed operation generates metrics.
156+
savedContainer := b.opt.Container
157+
b.opt.Container = "non-existent-container"
158+
assert.Error(t, b.Store(t.Context(), "file-in-non-existent-container", []byte{})) // Fails due to missing bucket
159+
b.opt.Container = savedContainer
160+
assert.Equal(t, 1, testutil.CollectAndCount(metricCallErrorsType, metricName))
161+
p, err := testutil.CollectAndFormat(metricCallErrorsType, expfmt.TypeProtoCompact, metricName)
162+
require.NoError(t, err)
163+
assert.Regexp(t, `label:{name:"method"\s+value:"store"}`, string(p))
164+
assert.Regexp(t, `label:{name:"error"\s+value:"ContainerNotFound"}`, string(p))
165+
}

backends/azure/metrics.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
package azure
22

33
import (
4+
"context"
5+
"errors"
6+
"net"
7+
"net/url"
8+
"time"
9+
10+
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
411
"github.com/prometheus/client_golang/prometheus"
512
)
613

@@ -26,10 +33,55 @@ var (
2633
},
2734
[]string{"method"},
2835
)
36+
metricCallErrorsType = prometheus.NewCounterVec(
37+
prometheus.CounterOpts{
38+
Name: "storage_azure_call_error_by_type_total",
39+
Help: "Azure API call errors by method and error type",
40+
},
41+
[]string{"method", "error"},
42+
)
43+
metricCallHistogram = prometheus.NewHistogramVec(
44+
prometheus.HistogramOpts{
45+
Name: "storage_azure_call_duration_seconds",
46+
Help: "Azure API call duration by method",
47+
Buckets: []float64{0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10, 30, 60},
48+
},
49+
[]string{"method"},
50+
)
2951
)
3052

53+
func recordCallMetrics(method string, start time.Time) {
54+
metricCalls.WithLabelValues(method).Inc()
55+
metricLastCallTimestamp.WithLabelValues(method).SetToCurrentTime()
56+
metricCallHistogram.WithLabelValues(method).Observe(time.Since(start).Seconds())
57+
}
58+
59+
func recordErrorMetrics(method string, err error) {
60+
if err == nil {
61+
return
62+
}
63+
errorLabel := "Unknown"
64+
var dnsErr *net.DNSError
65+
var urlErr *url.Error
66+
var azErr *azcore.ResponseError
67+
switch {
68+
case errors.Is(err, context.DeadlineExceeded):
69+
errorLabel = "Timeout"
70+
case errors.As(err, &dnsErr):
71+
errorLabel = "DNSError"
72+
case errors.As(err, &urlErr):
73+
errorLabel = "URLError"
74+
case errors.As(err, &azErr):
75+
errorLabel = azErr.ErrorCode
76+
}
77+
metricCallErrors.WithLabelValues(method).Inc()
78+
metricCallErrorsType.WithLabelValues(method, errorLabel).Inc()
79+
}
80+
3181
func init() {
3282
prometheus.MustRegister(metricLastCallTimestamp)
3383
prometheus.MustRegister(metricCalls)
3484
prometheus.MustRegister(metricCallErrors)
85+
prometheus.MustRegister(metricCallErrorsType)
86+
prometheus.MustRegister(metricCallHistogram)
3587
}

backends/azure/stream.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ func (b *Backend) NewWriter(ctx context.Context, name string) (io.WriteCloser, e
4040
// if the writing end of the pipe is closed.
4141
// It is okay to write to w.info from this goroutine
4242
// because it will only be used after w.donePipe is closed.
43-
w.info, err = w.backend.doStoreReader(w.ctx, w.name, pr, -1)
43+
w.info, err = w.backend.doStoreReader(w.ctx, w.name, pr)
4444
_ = pr.CloseWithError(err) // Always returns nil.
4545
close(w.donePipe)
4646
}()

backends/s3/metrics.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
package s3
22

33
import (
4+
"context"
5+
"errors"
6+
"net"
7+
"net/url"
8+
"os"
9+
"time"
10+
11+
"github.com/minio/minio-go/v7"
412
"github.com/prometheus/client_golang/prometheus"
513
)
614

@@ -43,6 +51,39 @@ var (
4351
)
4452
)
4553

54+
func recordCallMetrics(method string, start time.Time) {
55+
metricCalls.WithLabelValues(method).Inc()
56+
metricLastCallTimestamp.WithLabelValues(method).SetToCurrentTime()
57+
metricCallHistogram.WithLabelValues(method).Observe(time.Since(start).Seconds())
58+
}
59+
60+
func recordErrorMetrics(method string, err error) {
61+
if err == nil {
62+
return
63+
}
64+
errorLabel := "Unknown"
65+
var netError *net.OpError
66+
var dnsErr *net.DNSError
67+
var urlErr *url.Error
68+
errRes := minio.ToErrorResponse(err)
69+
switch {
70+
case errors.Is(err, os.ErrNotExist):
71+
errorLabel = "NotFound"
72+
case errors.Is(err, context.DeadlineExceeded),
73+
errors.Is(err, ErrClientTimeout),
74+
(errors.As(err, &netError) && netError.Timeout()):
75+
errorLabel = "Timeout"
76+
case errors.As(err, &dnsErr):
77+
errorLabel = "DNSError"
78+
case errors.As(err, &urlErr):
79+
errorLabel = "URLError"
80+
case errRes.Code != "":
81+
errorLabel = errRes.Code
82+
}
83+
metricCallErrors.WithLabelValues(method).Inc()
84+
metricCallErrorsType.WithLabelValues(method, errorLabel).Inc()
85+
}
86+
4687
func init() {
4788
prometheus.MustRegister(metricLastCallTimestamp)
4889
prometheus.MustRegister(metricCalls)

0 commit comments

Comments
 (0)