Skip to content

Commit c8b3722

Browse files
committed
Introduce MetricsStore.
1 parent c1ba6a8 commit c8b3722

File tree

19 files changed

+1638
-49
lines changed

19 files changed

+1638
-49
lines changed

deployment-examples/metrics/grafana/dashboards/nativelink-stores.json

Lines changed: 1173 additions & 0 deletions
Large diffs are not rendered by default.

deployment-examples/metrics/grafana/provisioning/dashboards/dashboards.yaml

Lines changed: 0 additions & 17 deletions
This file was deleted.

deployment-examples/metrics/grafana/provisioning/datasources/datasources.yaml

Lines changed: 0 additions & 18 deletions
This file was deleted.

deployment-examples/metrics/prometheus-recording-rules.yml

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,3 +284,92 @@ groups:
284284
sum(rate(nativelink_execution_completed_count[30d]))
285285
))
286286
) / (1 - 0.99)
287+
288+
- name: nativelink_stores
289+
interval: 30s
290+
rules:
291+
# Store cache hit rate by store type and name
292+
- record: nativelink:store_cache_hit_rate
293+
expr: |
294+
sum by (store_type, store_name) (
295+
rate(nativelink_store_operations{cache_operation_name="read", cache_operation_result="hit"}[5m])
296+
) /
297+
sum by (store_type, store_name) (
298+
rate(nativelink_store_operations{cache_operation_name="read", cache_operation_result=~"hit|miss"}[5m])
299+
)
300+
301+
# Store read latency percentiles
302+
- record: nativelink:store_read_latency_p50
303+
expr: |
304+
histogram_quantile(0.5,
305+
sum by (le, store_type, store_name) (
306+
rate(nativelink_store_operation_duration_bucket{cache_operation_name="read"}[5m])
307+
)
308+
)
309+
310+
- record: nativelink:store_read_latency_p90
311+
expr: |
312+
histogram_quantile(0.9,
313+
sum by (le, store_type, store_name) (
314+
rate(nativelink_store_operation_duration_bucket{cache_operation_name="read"}[5m])
315+
)
316+
)
317+
318+
- record: nativelink:store_read_latency_p99
319+
expr: |
320+
histogram_quantile(0.99,
321+
sum by (le, store_type, store_name) (
322+
rate(nativelink_store_operation_duration_bucket{cache_operation_name="read"}[5m])
323+
)
324+
)
325+
326+
# Store write latency percentiles
327+
- record: nativelink:store_write_latency_p50
328+
expr: |
329+
histogram_quantile(0.5,
330+
sum by (le, store_type, store_name) (
331+
rate(nativelink_store_operation_duration_bucket{cache_operation_name="write"}[5m])
332+
)
333+
)
334+
335+
- record: nativelink:store_write_latency_p90
336+
expr: |
337+
histogram_quantile(0.9,
338+
sum by (le, store_type, store_name) (
339+
rate(nativelink_store_operation_duration_bucket{cache_operation_name="write"}[5m])
340+
)
341+
)
342+
343+
- record: nativelink:store_write_latency_p99
344+
expr: |
345+
histogram_quantile(0.99,
346+
sum by (le, store_type, store_name) (
347+
rate(nativelink_store_operation_duration_bucket{cache_operation_name="write"}[5m])
348+
)
349+
)
350+
351+
# Store operation rates
352+
- record: nativelink:store_read_rate
353+
expr: |
354+
sum by (store_type, store_name) (
355+
rate(nativelink_store_operations{cache_operation_name="read"}[5m])
356+
)
357+
358+
- record: nativelink:store_write_rate
359+
expr: |
360+
sum by (store_type, store_name) (
361+
rate(nativelink_store_operations{cache_operation_name="write"}[5m])
362+
)
363+
364+
# Store error rate
365+
- record: nativelink:store_error_rate
366+
expr: |
367+
sum by (store_type, store_name, cache_operation_name) (
368+
rate(nativelink_store_operations{cache_operation_result="error"}[5m])
369+
)
370+
371+
# Overall store hit rate (aggregated across all stores)
372+
- record: nativelink:store_overall_hit_rate
373+
expr: |
374+
sum(rate(nativelink_store_operations{cache_operation_name="read", cache_operation_result="hit"}[5m])) /
375+
sum(rate(nativelink_store_operations{cache_operation_name="read", cache_operation_result=~"hit|miss"}[5m]))

nativelink-service/tests/ac_server_test.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ async fn make_store_manager() -> Result<Arc<StoreManager>, Error> {
5656
store_manager.add_store(
5757
"main_cas",
5858
store_factory(
59+
"main_cas",
5960
&StoreSpec::Memory(MemorySpec::default()),
6061
&store_manager,
6162
None,
@@ -65,6 +66,7 @@ async fn make_store_manager() -> Result<Arc<StoreManager>, Error> {
6566
store_manager.add_store(
6667
"main_ac",
6768
store_factory(
69+
"main_ac",
6870
&StoreSpec::Memory(MemorySpec::default()),
6971
&store_manager,
7072
None,

nativelink-service/tests/bep_server_test.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ async fn make_store_manager() -> Result<Arc<StoreManager>, Error> {
5555
store_manager.add_store(
5656
BEP_STORE_NAME,
5757
store_factory(
58+
BEP_STORE_NAME,
5859
&StoreSpec::Memory(MemorySpec::default()),
5960
&store_manager,
6061
None,

nativelink-service/tests/bytestream_server_test.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ async fn make_store_manager() -> Result<Arc<StoreManager>, Error> {
6060
store_manager.add_store(
6161
"main_cas",
6262
store_factory(
63+
"main_cas",
6364
&StoreSpec::Memory(MemorySpec::default()),
6465
&store_manager,
6566
None,

nativelink-service/tests/cas_server_test.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ async fn make_store_manager() -> Result<Arc<StoreManager>, Error> {
5050
store_manager.add_store(
5151
"main_cas",
5252
store_factory(
53+
"main_cas",
5354
&StoreSpec::Memory(MemorySpec::default()),
5455
&store_manager,
5556
None,

nativelink-service/tests/execution_server_test.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ async fn make_store_manager() -> Result<Arc<StoreManager>, Error> {
3535
store_manager.add_store(
3636
"main_cas",
3737
store_factory(
38+
"main_cas",
3839
&StoreSpec::Memory(MemorySpec::default()),
3940
&store_manager,
4041
None,

nativelink-service/tests/fetch_server_test.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ async fn make_store_manager() -> Result<Arc<StoreManager>, Error> {
3636
store_manager.add_store(
3737
"test_fetch_store",
3838
store_factory(
39+
"test_fetch_store",
3940
&StoreSpec::Memory(MemorySpec::default()),
4041
&store_manager,
4142
None,

0 commit comments

Comments
 (0)