Skip to content

Commit f69c583

Browse files
authored
Add wal record metrics (#7420)
Signed-off-by: SungJin1212 <tjdwls1201@gmail.com>
1 parent 7282a81 commit f69c583

File tree

3 files changed

+81
-0
lines changed

3 files changed

+81
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## master / unreleased
44
* [FEATURE] Distributor: Add experimental `-distributor.enable-start-timestamp` flag for Prometheus Remote Write 2.0. When enabled, `StartTimestamp (ST)` is ingested. #7371
55
* [FEATURE] Memberlist: Add `-memberlist.cluster-label` and `-memberlist.cluster-label-verification-disabled` to prevent accidental cross-cluster gossip joins and support rolling label rollout. #7385
6+
* [ENHANCEMENT] Ingester: Add WAL record metrics to help evaluate the effectiveness of WAL compression type (e.g. snappy, zstd): `cortex_ingester_tsdb_wal_record_part_writes_total`, `cortex_ingester_tsdb_wal_record_parts_bytes_written_total`, and `cortex_ingester_tsdb_wal_record_bytes_saved_total`. #7420
67
* [ENHANCEMENT] Distributor: Introduce dynamic `Symbols` slice capacity pooling. #7398 #7401
78
* [ENHANCEMENT] Metrics Helper: Add native histogram support for aggregating and merging, including dual-format histogram handling that exposes both native and classic bucket formats. #7359
89
* [ENHANCEMENT] Cache: Add per-tenant TTL configuration for query results cache to control cache expiration on a per-tenant basis with separate TTLs for regular and out-of-order data. #7357

pkg/ingester/metrics.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,11 @@ type tsdbMetrics struct {
453453
checkpointCreationFail *prometheus.Desc
454454
checkpointCreationTotal *prometheus.Desc
455455

456+
// WAL record part metrics
457+
tsdbWALRecordPartWritesTotal *prometheus.Desc
458+
tsdbWALRecordPartBytesTotal *prometheus.Desc
459+
tsdbWALRecordBytesSaved *prometheus.Desc
460+
456461
// These two metrics replace metrics in ingesterMetrics, as we count them differently
457462
memSeriesCreatedTotal *prometheus.Desc
458463
memSeriesRemovedTotal *prometheus.Desc
@@ -532,6 +537,7 @@ func newTSDBMetrics(r prometheus.Registerer) *tsdbMetrics {
532537
"cortex_ingester_tsdb_wal_writes_failed_total",
533538
"Total number of TSDB WAL writes that failed.",
534539
nil, nil),
540+
535541
tsdbHeadTruncateFail: prometheus.NewDesc(
536542
"cortex_ingester_tsdb_head_truncations_failed_total",
537543
"Total number of TSDB head truncations that failed.",
@@ -620,6 +626,18 @@ func newTSDBMetrics(r prometheus.Registerer) *tsdbMetrics {
620626
"cortex_ingester_tsdb_checkpoint_creations_total",
621627
"Total number of TSDB checkpoint creations attempted.",
622628
nil, nil),
629+
tsdbWALRecordPartWritesTotal: prometheus.NewDesc(
630+
"cortex_ingester_tsdb_wal_record_part_writes_total",
631+
"Total number of WAL record parts written before flushing.",
632+
nil, nil),
633+
tsdbWALRecordPartBytesTotal: prometheus.NewDesc(
634+
"cortex_ingester_tsdb_wal_record_parts_bytes_written_total",
635+
"Total number of WAL record part bytes written before flushing, including CRC and compression headers.",
636+
nil, nil),
637+
tsdbWALRecordBytesSaved: prometheus.NewDesc(
638+
"cortex_ingester_tsdb_wal_record_bytes_saved_total",
639+
"Total number of bytes saved by the optional WAL record compression.",
640+
[]string{"compression"}, nil),
623641
tsdbSamplesAppended: prometheus.NewDesc(
624642
"cortex_ingester_tsdb_head_samples_appended_total",
625643
"Total number of appended samples.",
@@ -728,6 +746,10 @@ func (sm *tsdbMetrics) Describe(out chan<- *prometheus.Desc) {
728746
out <- sm.checkpointCreationFail
729747
out <- sm.checkpointCreationTotal
730748

749+
out <- sm.tsdbWALRecordPartWritesTotal
750+
out <- sm.tsdbWALRecordPartBytesTotal
751+
out <- sm.tsdbWALRecordBytesSaved
752+
731753
out <- sm.tsdbExemplarsTotal
732754
out <- sm.tsdbExemplarsInStorage
733755
out <- sm.tsdbExemplarSeriesInStorage
@@ -788,6 +810,9 @@ func (sm *tsdbMetrics) Collect(out chan<- prometheus.Metric) {
788810
data.SendSumOfCounters(out, sm.checkpointDeleteTotal, "prometheus_tsdb_checkpoint_deletions_total")
789811
data.SendSumOfCounters(out, sm.checkpointCreationFail, "prometheus_tsdb_checkpoint_creations_failed_total")
790812
data.SendSumOfCounters(out, sm.checkpointCreationTotal, "prometheus_tsdb_checkpoint_creations_total")
813+
data.SendSumOfCounters(out, sm.tsdbWALRecordPartWritesTotal, "prometheus_tsdb_wal_record_part_writes_total")
814+
data.SendSumOfCounters(out, sm.tsdbWALRecordPartBytesTotal, "prometheus_tsdb_wal_record_parts_bytes_written_total")
815+
data.SendSumOfCountersWithLabels(out, sm.tsdbWALRecordBytesSaved, "prometheus_tsdb_wal_record_bytes_saved_total", "compression")
791816
data.SendSumOfCounters(out, sm.tsdbExemplarsTotal, "prometheus_tsdb_exemplar_exemplars_appended_total")
792817
data.SendSumOfGauges(out, sm.tsdbExemplarsInStorage, "prometheus_tsdb_exemplar_exemplars_in_storage")
793818
data.SendSumOfGaugesPerUser(out, sm.tsdbExemplarSeriesInStorage, "prometheus_tsdb_exemplar_series_with_exemplars_in_storage")

pkg/ingester/metrics_test.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,6 +445,23 @@ func TestTSDBMetrics(t *testing.T) {
445445
# TYPE cortex_ingester_tsdb_checkpoint_creations_total counter
446446
cortex_ingester_tsdb_checkpoint_creations_total 1883489
447447
448+
# HELP cortex_ingester_tsdb_wal_record_part_writes_total Total number of WAL record parts written before flushing.
449+
# TYPE cortex_ingester_tsdb_wal_record_part_writes_total counter
450+
# 32*(12345 + 85787 + 999)
451+
cortex_ingester_tsdb_wal_record_part_writes_total 3172192
452+
453+
# HELP cortex_ingester_tsdb_wal_record_parts_bytes_written_total Total number of WAL record part bytes written before flushing, including CRC and compression headers.
454+
# TYPE cortex_ingester_tsdb_wal_record_parts_bytes_written_total counter
455+
# 33*(12345 + 85787 + 999)
456+
cortex_ingester_tsdb_wal_record_parts_bytes_written_total 3271323
457+
458+
# HELP cortex_ingester_tsdb_wal_record_bytes_saved_total Total number of bytes saved by the optional WAL record compression.
459+
# TYPE cortex_ingester_tsdb_wal_record_bytes_saved_total counter
460+
# 34*(12345 + 85787 + 999)
461+
cortex_ingester_tsdb_wal_record_bytes_saved_total{compression="snappy"} 3370454
462+
# 35*(12345 + 85787 + 999)
463+
cortex_ingester_tsdb_wal_record_bytes_saved_total{compression="zstd"} 3469585
464+
448465
# HELP cortex_ingester_memory_series_created_total The total number of series that were created per user.
449466
# TYPE cortex_ingester_memory_series_created_total counter
450467
# 5 * (12345, 85787 and 999 respectively)
@@ -744,6 +761,23 @@ func TestTSDBMetricsWithRemoval(t *testing.T) {
744761
# TYPE cortex_ingester_tsdb_checkpoint_creations_total counter
745762
cortex_ingester_tsdb_checkpoint_creations_total 1883489
746763
764+
# HELP cortex_ingester_tsdb_wal_record_part_writes_total Total number of WAL record parts written before flushing.
765+
# TYPE cortex_ingester_tsdb_wal_record_part_writes_total counter
766+
# 32*(12345 + 85787 + 999) - counter retained after user3 removal
767+
cortex_ingester_tsdb_wal_record_part_writes_total 3172192
768+
769+
# HELP cortex_ingester_tsdb_wal_record_parts_bytes_written_total Total number of WAL record part bytes written before flushing, including CRC and compression headers.
770+
# TYPE cortex_ingester_tsdb_wal_record_parts_bytes_written_total counter
771+
# 33*(12345 + 85787 + 999) - counter retained after user3 removal
772+
cortex_ingester_tsdb_wal_record_parts_bytes_written_total 3271323
773+
774+
# HELP cortex_ingester_tsdb_wal_record_bytes_saved_total Total number of bytes saved by the optional WAL record compression.
775+
# TYPE cortex_ingester_tsdb_wal_record_bytes_saved_total counter
776+
# 34*(12345 + 85787 + 999) - counter retained after user3 removal
777+
cortex_ingester_tsdb_wal_record_bytes_saved_total{compression="snappy"} 3370454
778+
# 35*(12345 + 85787 + 999) - counter retained after user3 removal
779+
cortex_ingester_tsdb_wal_record_bytes_saved_total{compression="zstd"} 3469585
780+
747781
# HELP cortex_ingester_memory_series_created_total The total number of series that were created per user.
748782
# TYPE cortex_ingester_memory_series_created_total counter
749783
# 5 * (12345, 85787 and 999 respectively)
@@ -1208,5 +1242,26 @@ func populateTSDBMetrics(base float64) *prometheus.Registry {
12081242
})
12091243
headStaleSeries.Set(31 * base)
12101244

1245+
recordPartWrites := promauto.With(r).NewCounter(prometheus.CounterOpts{
1246+
Name: "prometheus_tsdb_wal_record_part_writes_total",
1247+
Help: "Total number of record parts written before flushing.",
1248+
})
1249+
recordPartWrites.Add(32 * base)
1250+
1251+
recordPartBytes := promauto.With(r).NewCounter(prometheus.CounterOpts{
1252+
Name: "prometheus_tsdb_wal_record_parts_bytes_written_total",
1253+
Help: "Total number of record part bytes written before flushing, including" +
1254+
" CRC and compression headers.",
1255+
})
1256+
recordPartBytes.Add(33 * base)
1257+
1258+
recordBytesSaved := promauto.With(r).NewCounterVec(prometheus.CounterOpts{
1259+
Name: "prometheus_tsdb_wal_record_bytes_saved_total",
1260+
Help: "Total number of bytes saved by the optional record compression." +
1261+
" Use this metric to learn about the effectiveness compression.",
1262+
}, []string{"compression"})
1263+
recordBytesSaved.WithLabelValues("snappy").Add(34 * base)
1264+
recordBytesSaved.WithLabelValues("zstd").Add(35 * base)
1265+
12111266
return r
12121267
}

0 commit comments

Comments
 (0)