diff --git a/rs/execution_environment/src/execution/response/tests.rs b/rs/execution_environment/src/execution/response/tests.rs index cd2f673b6d60..159b0f3d03bb 100644 --- a/rs/execution_environment/src/execution/response/tests.rs +++ b/rs/execution_environment/src/execution/response/tests.rs @@ -98,8 +98,18 @@ fn execute_response_refunds_cycles() { // Canister A calls canister B. let cycles_sent = Cycles::new(1_000_000); + let b_callback = wasm() + .accept_cycles(cycles_sent / 2_u64) + .message_payload() + .append_and_reply() + .build(); let wasm_payload = wasm() - .call_with_cycles(b_id, "update", call_args(), cycles_sent) + .call_with_cycles( + b_id, + "update", + call_args().other_side(b_callback.clone()), + cycles_sent, + ) .build(); // Enqueue ingress message to canister A and execute it. @@ -107,16 +117,11 @@ fn execute_response_refunds_cycles() { assert_matches!(test.ingress_state(&msg_id), IngressState::Received); test.execute_message(a_id); - // Create response from canister B to canister A. - let response = ResponseBuilder::new() - .originator(a_id) - .respondent(b_id) - .originator_reply_callback(CallbackId::from(1)) - .refund(cycles_sent / 2_u64) - .build(); - let response_payload_size = response.payload_size_bytes(); + // Execute message on B. + test.induct_messages(); + test.execute_message(b_id); - // Execute response. + // Execute response on A. let balance_before = test.canister_state(a_id).system_state.balance(); let consumed_cycles_before = *test .canister_state(a_id) @@ -125,8 +130,16 @@ fn execute_response_refunds_cycles() { .consumed_cycles_by_use_cases() .get(&CyclesUseCase::RequestAndResponseTransmission) .unwrap(); + let consumed_cycles_before_counter = *test + .canister_state(a_id) + .system_state + .canister_metrics() + .consumed_cycles_by_use_cases_as_counters() + .get(&CyclesUseCase::RequestAndResponseTransmission) + .unwrap(); let instructions_before = test.canister_executed_instructions(a_id); - test.execute_response(a_id, response); + test.induct_messages(); + test.execute_message(a_id); let instructions_after = test.canister_executed_instructions(a_id); let instructions_executed = instructions_after - instructions_before; let balance_after = test.canister_state(a_id).system_state.balance(); @@ -137,6 +150,13 @@ fn execute_response_refunds_cycles() { .consumed_cycles_by_use_cases() .get(&CyclesUseCase::RequestAndResponseTransmission) .unwrap(); + let consumed_cycles_after_counter = *test + .canister_state(a_id) + .system_state + .canister_metrics() + .consumed_cycles_by_use_cases_as_counters() + .get(&CyclesUseCase::RequestAndResponseTransmission) + .unwrap(); // The balance is equivalent to the amount of cycles before executing`execute_response` // plus the unaccepted cycles (no more the cycles sent via request), @@ -146,7 +166,7 @@ fn execute_response_refunds_cycles() { let prepayment_for_response_transmission = mgr.prepayment_for_response_transmission(test.subnet_size(), cost_schedule); let actual_response_transmission_fee = mgr.xnet_call_bytes_transmitted_fee( - response_payload_size, + NumBytes::from(b_callback.len() as u64), test.subnet_size(), cost_schedule, ); @@ -177,6 +197,13 @@ fn execute_response_refunds_cycles() { consumed_cycles_after, consumed_cycles_before - response_transmission_refund.nominal(), ); + assert_eq!( + consumed_cycles_after_counter, + consumed_cycles_before_counter + + (test.call_fee("update", &b_callback) + actual_response_transmission_fee) + .nominal(), + ); + assert_eq!(consumed_cycles_after, consumed_cycles_after_counter); } } diff --git a/rs/execution_environment/src/execution_environment.rs b/rs/execution_environment/src/execution_environment.rs index aea7ef627a1b..9a4da5f99f1f 100644 --- a/rs/execution_environment/src/execution_environment.rs +++ b/rs/execution_environment/src/execution_environment.rs @@ -2145,8 +2145,11 @@ impl ExecutionEnvironment { state.metadata.subnet_call_context_manager.push_context( SubnetCallContext::CanisterHttpRequest(canister_http_request_context), ); - if let Some(canister_stats) = state.canister_state_make_mut(&request.sender) { - canister_stats + if let Some(canister_state) = state.canister_state_make_mut(&request.sender) { + canister_state + .system_state + .observe_consumed_cycles_for_https_outcall(nominal_http_request_fee); + canister_state .system_state .canister_metrics_mut() .load_metrics_mut() diff --git a/rs/execution_environment/src/execution_environment/tests.rs b/rs/execution_environment/src/execution_environment/tests.rs index 6e9986a6bc2f..8f441de76323 100644 --- a/rs/execution_environment/src/execution_environment/tests.rs +++ b/rs/execution_environment/src/execution_environment/tests.rs @@ -3003,86 +3003,114 @@ fn test_allocating_memory_reduces_subnet_available_memory() { #[test] fn execute_canister_http_request() { - let own_subnet = subnet_test_id(1); - let caller_canister = canister_test_id(10); - let mut test = ExecutionTestBuilder::new() - .with_own_subnet_id(own_subnet) - .with_caller(own_subnet, caller_canister) - .build(); - test.state_mut().metadata.own_subnet_features.http_requests = true; + for cost_schedule in [ + CanisterCyclesCostSchedule::Normal, + CanisterCyclesCostSchedule::Free, + ] { + let own_subnet = subnet_test_id(1); + let mut test = ExecutionTestBuilder::new() + .with_own_subnet_id(own_subnet) + .with_cost_schedule(cost_schedule) + .with_manual_execution() + .build(); - // Create payload of the request. - let url = "https://".to_string(); - let response_size_limit = 1000_u64; - let transform_method_name = "transform".to_string(); - let transform_context = vec![0, 1, 2]; - let args = CanisterHttpRequestArgs { - url: url.clone(), - max_response_bytes: Some(response_size_limit), - headers: BoundedHttpHeaders::new(vec![]), - body: None, - method: HttpMethod::GET, - transform: Some(TransformContext { - function: TransformFunc(candid::Func { - principal: caller_canister.get().0, - method: transform_method_name.clone(), + let caller_canister = test.universal_canister().unwrap(); + // Create payload of the request. + let url = "https://".to_string(); + let response_size_limit = 1000_u64; + let transform_method_name = "transform".to_string(); + let transform_context = vec![0, 1, 2]; + let args = CanisterHttpRequestArgs { + url: url.clone(), + max_response_bytes: Some(response_size_limit), + headers: BoundedHttpHeaders::new(vec![]), + body: None, + method: HttpMethod::GET, + transform: Some(TransformContext { + function: TransformFunc(candid::Func { + principal: caller_canister.get().0, + method: transform_method_name.clone(), + }), + context: transform_context.clone(), }), - context: transform_context.clone(), - }), - is_replicated: None, - pricing_version: None, - }; - - // Create request to HTTP_REQUEST method. - let payment = Cycles::new(1_000_000_000); - let payload = args.encode(); - test.inject_call_to_ic00(Method::HttpRequest, payload, payment); - test.execute_all(); - // Check that the SubnetCallContextManager contains the request. - let canister_http_request_contexts = &test - .state() - .metadata - .subnet_call_context_manager - .canister_http_request_contexts; - assert_eq!(canister_http_request_contexts.len(), 1); - - let http_request_context = canister_http_request_contexts - .get(&CallbackId::from(0)) - .unwrap(); - assert_eq!(http_request_context.url, url); - assert_eq!( - http_request_context.transform, - Some(Transform { - method_name: transform_method_name, - context: transform_context, - }) - ); - assert_eq!(http_request_context.http_method, CanisterHttpMethod::GET); - assert_eq!(http_request_context.request.sender, caller_canister); - let fee = test.http_request_fee( - http_request_context.variable_parts_size(), - Some(NumBytes::from(response_size_limit)), - ); - assert_eq!(http_request_context.request.payment, payment - fee.real()); + is_replicated: None, + pricing_version: None, + }; - assert_eq!( - fee.nominal(), - test.state() - .metadata - .subnet_metrics - .get_consumed_cycles_http_outcalls() - ); + // Create request to HTTP_REQUEST method. + let payment = Cycles::new(1_000_000_000); + let payload = args.encode(); + let call_to_management_canister = wasm() + .call_with_cycles( + IC_00, + Method::HttpRequest, + call_args().other_side(payload.clone()), + payment, + ) + .build(); - assert_eq!( - fee.nominal(), - *test + let (message_id, _) = + test.ingress_raw(caller_canister, "update", call_to_management_canister); + test.execute_all(); + // Check that the SubnetCallContextManager contains the request + // and the ingress message is in processing state. + let canister_http_request_contexts = &test .state() .metadata - .subnet_metrics - .get_consumed_cycles_by_use_case() - .get(&CyclesUseCase::HTTPOutcalls) - .unwrap() - ); + .subnet_call_context_manager + .canister_http_request_contexts; + assert_eq!(canister_http_request_contexts.len(), 1); + assert_eq!(test.ingress_state(&message_id), IngressState::Processing); + + let http_request_context = canister_http_request_contexts + .get(&CallbackId::from(0)) + .unwrap(); + assert_eq!(http_request_context.url, url); + assert_eq!( + http_request_context.transform, + Some(Transform { + method_name: transform_method_name, + context: transform_context, + }) + ); + assert_eq!(http_request_context.http_method, CanisterHttpMethod::GET); + assert_eq!(http_request_context.request.sender, caller_canister); + let fee = test.http_request_fee( + http_request_context.variable_parts_size(), + Some(NumBytes::from(response_size_limit)), + ); + assert_eq!(http_request_context.request.payment, payment - fee.real()); + + assert_eq!( + fee.nominal(), + test.state() + .metadata + .subnet_metrics + .get_consumed_cycles_http_outcalls() + ); + + assert_eq!( + fee.nominal(), + *test + .state() + .metadata + .subnet_metrics + .get_consumed_cycles_by_use_case() + .get(&CyclesUseCase::HTTPOutcalls) + .unwrap() + ); + + assert_eq!( + fee.nominal(), + *test + .canister_state(caller_canister) + .system_state + .canister_metrics() + .consumed_cycles_by_use_cases_as_counters() + .get(&CyclesUseCase::HTTPOutcalls) + .unwrap() + ); + } } #[test] @@ -3879,6 +3907,14 @@ fn replicated_query_can_burn_cycles() { .get(&CyclesUseCase::BurnedCycles) .unwrap(); assert_eq!(burned_cycles, NominalCycles::new(cycles_to_burn.get())); + let burned_cycles_as_counters = *test + .canister_state(canister_id) + .system_state + .canister_metrics() + .consumed_cycles_by_use_cases_as_counters() + .get(&CyclesUseCase::BurnedCycles) + .unwrap(); + assert_eq!(burned_cycles_as_counters, burned_cycles); } #[test] @@ -3919,6 +3955,14 @@ fn replicated_query_does_not_burn_cycles_on_trap() { .get(&CyclesUseCase::BurnedCycles) .is_none() ); + assert!( + test.canister_state(canister_id) + .system_state + .canister_metrics() + .consumed_cycles_by_use_cases_as_counters() + .get(&CyclesUseCase::BurnedCycles) + .is_none() + ); } #[test] @@ -3951,9 +3995,31 @@ fn test_consumed_cycles_by_use_case_with_refund() { ) .build(); + let instruction_consumption_initial_counters = *test + .canister_state(a_id) + .system_state + .canister_metrics() + .consumed_cycles_by_use_cases_as_counters() + .get(&CyclesUseCase::Instructions) + .unwrap(); + let execution_cost_initial = test.canister_execution_cost(a_id); + let (message_id, _) = test.ingress_raw(a_id, "update", a_payload); // Canister A sends the message to canister B. test.execute_message(a_id); + let instruction_consumption_after_message_execution_counters = *test + .canister_state(a_id) + .system_state + .canister_metrics() + .consumed_cycles_by_use_cases_as_counters() + .get(&CyclesUseCase::Instructions) + .unwrap(); + let execution_cost_after_message = test.canister_execution_cost(a_id); + assert_eq!( + instruction_consumption_after_message_execution_counters, + instruction_consumption_initial_counters + + (execution_cost_after_message - execution_cost_initial).nominal(), + ); test.induct_messages(); test.execute_message(b_id); @@ -3977,7 +4043,7 @@ fn test_consumed_cycles_by_use_case_with_refund() { } // Get consumption for 'RequestAndResponseTransmission' and 'Instructions' - // before receiving a response on canister A. + // before receiving a response on canister A for gauges. let transmission_consumption_before_response = *test .canister_state(a_id) .system_state @@ -3996,6 +4062,32 @@ fn test_consumed_cycles_by_use_case_with_refund() { assert_gt!(transmission_consumption_before_response.get(), 0); assert_gt!(instruction_consumption_before_response.get(), 0); + // // Get consumption for 'RequestAndResponseTransmission' and 'Instructions' + // before receiving a response on canister A for counters. + let transmission_consumption_before_response_counters = *test + .canister_state(a_id) + .system_state + .canister_metrics() + .consumed_cycles_by_use_cases_as_counters() + .get(&CyclesUseCase::RequestAndResponseTransmission) + .unwrap(); + let instruction_consumption_before_response_counters = *test + .canister_state(a_id) + .system_state + .canister_metrics() + .consumed_cycles_by_use_cases_as_counters() + .get(&CyclesUseCase::Instructions) + .unwrap(); + + assert_eq!( + transmission_consumption_before_response_counters, + NominalCycles::zero() + ); + assert_eq!( + instruction_consumption_after_message_execution_counters, + instruction_consumption_before_response_counters, + ); + // Canister A executed the response. test.induct_messages(); test.execute_message(a_id); @@ -4013,12 +4105,15 @@ fn test_consumed_cycles_by_use_case_with_refund() { let transmission_cost = test.call_fee("update", &b_callback) + test.reply_fee(&b_callback); - let execution_cost = test.canister_execution_cost(a_id); + let execution_cost_after_response = test.canister_execution_cost(a_id); // Check that canister A's balance is updated correctly. assert_eq!( test.canister_state(a_id).system_state.balance(), - initial_cycles - execution_cost.real() - transmission_cost.real() - transferred_cycles + initial_cycles + - execution_cost_after_response.real() + - transmission_cost.real() + - transferred_cycles ); assert_eq!( @@ -4045,6 +4140,21 @@ fn test_consumed_cycles_by_use_case_with_refund() { .get(&CyclesUseCase::Instructions) .unwrap(); + let transmission_consumption_after_response_counters = *test + .canister_state(a_id) + .system_state + .canister_metrics() + .consumed_cycles_by_use_cases_as_counters() + .get(&CyclesUseCase::RequestAndResponseTransmission) + .unwrap(); + let instruction_consumption_after_response_counters = *test + .canister_state(a_id) + .system_state + .canister_metrics() + .consumed_cycles_by_use_cases_as_counters() + .get(&CyclesUseCase::Instructions) + .unwrap(); + // Check that consumed cycles are correct for both use cases. assert_eq!( transmission_consumption_after_response, @@ -4053,7 +4163,7 @@ fn test_consumed_cycles_by_use_case_with_refund() { assert_eq!( instruction_consumption_after_response, - execution_cost.nominal(), + execution_cost_after_response.nominal(), ); // Consumed cycles after the response should be smaller than before @@ -4067,6 +4177,28 @@ fn test_consumed_cycles_by_use_case_with_refund() { instruction_consumption_before_response ); + // Check that consumed cycles are correct for both use cases for counter metrics. + assert_eq!( + transmission_consumption_after_response_counters, + transmission_consumption_before_response_counters + transmission_cost.nominal(), + ); + + assert_eq!( + instruction_consumption_after_response_counters, + instruction_consumption_before_response_counters + + (execution_cost_after_response - execution_cost_after_message).nominal(), + ); + + // Both the gauge and counter metrics should show the same consumption. + assert_eq!( + transmission_consumption_after_response, + transmission_consumption_after_response_counters, + ); + assert_eq!( + instruction_consumption_after_response, + instruction_consumption_after_response_counters, + ); + // Check that canister B's balance is updated correctly. assert_eq!( test.canister_state(b_id).system_state.balance(), diff --git a/rs/execution_environment/src/scheduler/tests/metrics.rs b/rs/execution_environment/src/scheduler/tests/metrics.rs index 6b1dc77e1438..3c9bd457d4e5 100644 --- a/rs/execution_environment/src/scheduler/tests/metrics.rs +++ b/rs/execution_environment/src/scheduler/tests/metrics.rs @@ -22,8 +22,8 @@ use ic_registry_subnet_type::SubnetType; use ic_replicated_state::metadata_state::testing::NetworkTopologyTesting; use ic_replicated_state::testing::SystemStateTesting; use ic_test_utilities_metrics::{ - HistogramStats, fetch_gauge, fetch_gauge_vec, fetch_histogram_stats, fetch_histogram_vec_stats, - fetch_int_gauge, fetch_int_gauge_vec, metric_vec, + HistogramStats, fetch_counter_vec, fetch_gauge, fetch_gauge_vec, fetch_histogram_stats, + fetch_histogram_vec_stats, fetch_int_gauge, fetch_int_gauge_vec, metric_vec, }; use ic_test_utilities_state::{get_running_canister, get_stopped_canister, get_stopping_canister}; use ic_types::NumBytes; @@ -1030,6 +1030,14 @@ fn consumed_cycles_http_outcalls_are_added_to_consumed_cycles_total() { ), metric_vec(&[(&[("use_case", "HTTPOutcalls")], fee.nominal().get() as f64),]), ); + + assert_eq!( + fetch_counter_vec( + test.metrics_registry(), + "replicated_state_consumed_cycles_from_replica_start_as_counters", + ), + metric_vec(&[(&[("use_case", "HTTPOutcalls")], fee.nominal().get() as f64),]), + ); } } @@ -1197,6 +1205,26 @@ fn consumed_cycles_for_resource_allocations_are_updated_from_valid_canisters() { ), ]), ); + assert_eq!( + fetch_counter_vec( + test.metrics_registry(), + "replicated_state_consumed_cycles_from_replica_start_as_counters", + ), + metric_vec(&[ + ( + &[("use_case", "Memory")], + test.memory_cost(memory_allocation, duration) + .nominal() + .get() as f64 + ), + ( + &[("use_case", "ComputeAllocation")], + test.compute_allocation_cost(compute_allocation, duration) + .nominal() + .get() as f64, + ), + ]), + ); } } diff --git a/rs/monitoring/metrics/src/registry.rs b/rs/monitoring/metrics/src/registry.rs index b591470f24dc..a72053d3eebc 100644 --- a/rs/monitoring/metrics/src/registry.rs +++ b/rs/monitoring/metrics/src/registry.rs @@ -1,8 +1,8 @@ use crate::adapter_metrics_registry::AdapterMetricsRegistry; use ic_adapter_metrics_client::AdapterMetrics; use prometheus::{ - Gauge, GaugeVec, Histogram, HistogramOpts, HistogramVec, IntCounter, IntCounterVec, IntGauge, - IntGaugeVec, Opts, core::Collector, + CounterVec, Gauge, GaugeVec, Histogram, HistogramOpts, HistogramVec, IntCounter, IntCounterVec, + IntGauge, IntGaugeVec, Opts, core::Collector, }; /// A wrapper around `prometheus::Registry` with helpers for creating metrics @@ -101,6 +101,16 @@ impl MetricsRegistry { self.register(GaugeVec::new(Opts::new(name, help), label_names).unwrap()) } + /// Create and register a `CounterVec`. + pub fn counter_vec>( + &self, + name: S, + help: S, + label_names: &[&str], + ) -> CounterVec { + self.register(CounterVec::new(Opts::new(name, help), label_names).unwrap()) + } + /// Create and register an `IntCounter`. pub fn int_counter>(&self, name: S, help: S) -> IntCounter { self.register(IntCounter::new(name, help).unwrap()) diff --git a/rs/protobuf/def/state/canister_state_bits/v1/canister_state_bits.proto b/rs/protobuf/def/state/canister_state_bits/v1/canister_state_bits.proto index 8b18c943b1b6..214a3c5e589f 100644 --- a/rs/protobuf/def/state/canister_state_bits/v1/canister_state_bits.proto +++ b/rs/protobuf/def/state/canister_state_bits/v1/canister_state_bits.proto @@ -486,6 +486,11 @@ message CanisterStateBits { // the respective amount is added to the consumed amount while when a refund // happens the refund amount is subtracted from consumed amount. repeated ConsumedCyclesByUseCase consumed_cycles_by_use_cases = 36; + // Consumed cycles by use case presented as counters. The consumed amount is + // only updated once the refund is known to perform a single accounting step. + // These counters facilitate programming retrieval of metrics and performing + // various aggregations on them more easily than their gauge counterparts. + repeated ConsumedCyclesByUseCase consumed_cycles_by_use_cases_as_counters = 65; CanisterHistory canister_history = 37; // Resource reservation cycles. state.queues.v1.Cycles reserved_balance = 38; diff --git a/rs/protobuf/def/state/metadata/v1/metadata.proto b/rs/protobuf/def/state/metadata/v1/metadata.proto index a00dd2fe0152..210167a5ffda 100644 --- a/rs/protobuf/def/state/metadata/v1/metadata.proto +++ b/rs/protobuf/def/state/metadata/v1/metadata.proto @@ -322,6 +322,7 @@ message SubnetMetrics { optional uint64 canister_state_bytes = 9; optional uint64 update_transactions_total = 10; repeated ThresholdSignatureAgreementsEntry threshold_signature_agreements = 11; + repeated canister_state_bits.v1.ConsumedCyclesByUseCase consumed_cycles_by_use_case_as_counters = 12; } message BitcoinGetSuccessorsFollowUpResponses { diff --git a/rs/protobuf/src/gen/state/state.canister_state_bits.v1.rs b/rs/protobuf/src/gen/state/state.canister_state_bits.v1.rs index e97e720494bc..111d6beb203d 100644 --- a/rs/protobuf/src/gen/state/state.canister_state_bits.v1.rs +++ b/rs/protobuf/src/gen/state/state.canister_state_bits.v1.rs @@ -736,6 +736,12 @@ pub struct CanisterStateBits { /// happens the refund amount is subtracted from consumed amount. #[prost(message, repeated, tag = "36")] pub consumed_cycles_by_use_cases: ::prost::alloc::vec::Vec, + /// Consumed cycles by use case presented as counters. The consumed amount is + /// only updated once the refund is known to perform a single accounting step. + /// These counters facilitate programming retrieval of metrics and performing + /// various aggregations on them more easily than their gauge counterparts. + #[prost(message, repeated, tag = "65")] + pub consumed_cycles_by_use_cases_as_counters: ::prost::alloc::vec::Vec, #[prost(message, optional, tag = "37")] pub canister_history: ::core::option::Option, /// Resource reservation cycles. diff --git a/rs/protobuf/src/gen/state/state.metadata.v1.rs b/rs/protobuf/src/gen/state/state.metadata.v1.rs index 72cfce0a2222..b496283936ae 100644 --- a/rs/protobuf/src/gen/state/state.metadata.v1.rs +++ b/rs/protobuf/src/gen/state/state.metadata.v1.rs @@ -475,6 +475,9 @@ pub struct SubnetMetrics { pub update_transactions_total: ::core::option::Option, #[prost(message, repeated, tag = "11")] pub threshold_signature_agreements: ::prost::alloc::vec::Vec, + #[prost(message, repeated, tag = "12")] + pub consumed_cycles_by_use_case_as_counters: + ::prost::alloc::vec::Vec, } #[derive(Clone, PartialEq, ::prost::Message)] pub struct BitcoinGetSuccessorsFollowUpResponses { diff --git a/rs/replicated_state/src/canister_state/system_state.rs b/rs/replicated_state/src/canister_state/system_state.rs index 99c504e6fc28..168631ca825d 100644 --- a/rs/replicated_state/src/canister_state/system_state.rs +++ b/rs/replicated_state/src/canister_state/system_state.rs @@ -153,6 +153,7 @@ pub struct CanisterMetrics { load_metrics: LoadMetrics, consumed_cycles: NominalCycles, consumed_cycles_by_use_cases: BTreeMap, + consumed_cycles_by_use_cases_as_counters: BTreeMap, } impl CanisterMetrics { @@ -163,6 +164,7 @@ impl CanisterMetrics { interrupted_during_execution: u64, consumed_cycles: NominalCycles, consumed_cycles_by_use_cases: BTreeMap, + consumed_cycles_by_use_cases_as_counters: BTreeMap, instructions_executed: NumInstructions, load_metrics: LoadMetrics, ) -> Self { @@ -173,6 +175,7 @@ impl CanisterMetrics { interrupted_during_execution, consumed_cycles, consumed_cycles_by_use_cases, + consumed_cycles_by_use_cases_as_counters, instructions_executed, load_metrics, } @@ -206,6 +209,12 @@ impl CanisterMetrics { &self.consumed_cycles_by_use_cases } + pub fn consumed_cycles_by_use_cases_as_counters( + &self, + ) -> &BTreeMap { + &self.consumed_cycles_by_use_cases_as_counters + } + pub fn observe_round_scheduled(&mut self) { self.rounds_scheduled += 1; } @@ -1944,6 +1953,17 @@ impl SystemState { self.consume_cycles(CompoundCycles::::new(balance, cost_schedule)); } + /// Observes the consumed cycles for HTTPS outcalls. This should only be + /// called to update the counter metric on the canister level, as the gauge + /// metric for HTTPS outcalls is updated on the subnet level only. + pub fn observe_consumed_cycles_for_https_outcall(&mut self, amount: NominalCycles) { + *self + .canister_metrics + .consumed_cycles_by_use_cases_as_counters + .entry(CyclesUseCase::HTTPOutcalls) + .or_insert_with(NominalCycles::zero) += amount; + } + fn observe_consumed_cycles_with_use_case( &mut self, prepayment: NominalCycles, @@ -1978,26 +1998,53 @@ impl SystemState { ConsumingCycles::Refund => {} } - // Skip if the amounts are zero and no metric updates are needed. - if (consuming_cycles == ConsumingCycles::Prepayment && prepayment.is_zero()) - || (consuming_cycles == ConsumingCycles::Refund && refund.is_zero()) - { + // Skip if the consumed cycles are zero and no metric updates are needed. + if prepayment - refund == NominalCycles::zero() { return; } let metric: &mut BTreeMap = &mut self.canister_metrics.consumed_cycles_by_use_cases; - let use_case_consumption = metric.entry(use_case).or_insert_with(NominalCycles::zero); + let metric: &mut BTreeMap = &mut self + .canister_metrics + .consumed_cycles_by_use_cases_as_counters; + let use_case_consumption_as_counter = + metric.entry(use_case).or_insert_with(NominalCycles::zero); match consuming_cycles { ConsumingCycles::Prepayment => { *use_case_consumption += prepayment; self.canister_metrics.consumed_cycles += prepayment; + match use_case { + CyclesUseCase::Instructions | CyclesUseCase::RequestAndResponseTransmission => { + // These use cases are accounted for during refund + // for the counter metrics. + } + CyclesUseCase::Memory + | CyclesUseCase::ComputeAllocation + | CyclesUseCase::Uninstall + | CyclesUseCase::IngressInduction + | CyclesUseCase::CanisterCreation + | CyclesUseCase::BurnedCycles => { + *use_case_consumption_as_counter += prepayment; + } + + CyclesUseCase::ECDSAOutcalls + | CyclesUseCase::SchnorrOutcalls + | CyclesUseCase::VetKd + | CyclesUseCase::HTTPOutcalls + | CyclesUseCase::DeletedCanisters + | CyclesUseCase::DroppedMessages + | CyclesUseCase::NonConsumed => { + // These use cases should not be tracked on the canister level. + } + } } ConsumingCycles::Refund => { *use_case_consumption -= refund; self.canister_metrics.consumed_cycles -= refund; + *use_case_consumption_as_counter += prepayment - refund; } } } diff --git a/rs/replicated_state/src/metadata_state.rs b/rs/replicated_state/src/metadata_state.rs index ea69dba62e11..86097c804842 100644 --- a/rs/replicated_state/src/metadata_state.rs +++ b/rs/replicated_state/src/metadata_state.rs @@ -396,6 +396,7 @@ pub struct SubnetMetrics { consumed_cycles_http_outcalls: NominalCycles, consumed_cycles_ecdsa_outcalls: NominalCycles, consumed_cycles_by_use_case: BTreeMap, + consumed_cycles_by_use_case_as_counters: BTreeMap, pub threshold_signature_agreements: BTreeMap, /// The number of canisters that exist on this subnet. pub num_canisters: u64, @@ -420,6 +421,10 @@ impl SubnetMetrics { .consumed_cycles_by_use_case .entry(use_case) .or_insert_with(NominalCycles::zero) += cycles; + *self + .consumed_cycles_by_use_case_as_counters + .entry(use_case) + .or_insert_with(NominalCycles::zero) += cycles; } pub fn observe_consumed_cycles_by_deleted_canisters(&mut self, cycles: NominalCycles) { @@ -450,6 +455,12 @@ impl SubnetMetrics { &self.consumed_cycles_by_use_case } + pub fn get_consumed_cycles_by_use_case_as_counters( + &self, + ) -> &BTreeMap { + &self.consumed_cycles_by_use_case_as_counters + } + pub fn consumed_cycles_total(&self) -> NominalCycles { let mut total = NominalCycles::zero(); diff --git a/rs/replicated_state/src/metadata_state/proto.rs b/rs/replicated_state/src/metadata_state/proto.rs index 1a1c61add50d..dffc26832d54 100644 --- a/rs/replicated_state/src/metadata_state/proto.rs +++ b/rs/replicated_state/src/metadata_state/proto.rs @@ -241,6 +241,15 @@ impl From<&SubnetMetrics> for pb_metadata::SubnetMetrics { cycles: Some((&cycles).into()), }) .collect(), + consumed_cycles_by_use_case_as_counters: item + .consumed_cycles_by_use_case_as_counters + .clone() + .into_iter() + .map(|(use_case, cycles)| ConsumedCyclesByUseCase { + use_case: pbCyclesUseCase::from(use_case).into(), + cycles: Some((&cycles).into()), + }) + .collect(), num_canisters: Some(item.num_canisters), canister_state_bytes: Some(item.canister_state_bytes.get()), update_transactions_total: Some(item.update_transactions_total), @@ -263,6 +272,20 @@ impl TryFrom for SubnetMetrics { NominalCycles::try_from(x.cycles.unwrap_or_default()).unwrap_or_default(), ); } + + let mut consumed_cycles_by_use_case_as_counters = BTreeMap::new(); + for x in item.consumed_cycles_by_use_case_as_counters.into_iter() { + consumed_cycles_by_use_case_as_counters.insert( + CyclesUseCase::try_from(pbCyclesUseCase::try_from(x.use_case).map_err(|_| { + ProxyDecodeError::ValueOutOfRange { + typ: "CyclesUseCase", + err: format!("Unexpected value of cycles use case: {}", x.use_case), + } + })?)?, + NominalCycles::try_from(x.cycles.unwrap_or_default()).unwrap_or_default(), + ); + } + let mut threshold_signature_agreements = BTreeMap::new(); for x in item.threshold_signature_agreements.into_iter() { threshold_signature_agreements.insert( @@ -273,6 +296,7 @@ impl TryFrom for SubnetMetrics { x.count, ); } + Ok(Self { consumed_cycles_by_deleted_canisters: try_from_option_field( item.consumed_cycles_by_deleted_canisters, @@ -290,6 +314,7 @@ impl TryFrom for SubnetMetrics { .unwrap_or_else(|_| NominalCycles::zero()), threshold_signature_agreements, consumed_cycles_by_use_case, + consumed_cycles_by_use_case_as_counters, num_canisters: try_from_option_field( item.num_canisters, "SubnetMetrics::num_canisters", diff --git a/rs/replicated_state/src/metrics.rs b/rs/replicated_state/src/metrics.rs index d555d03d8538..e28d45d7ed7e 100644 --- a/rs/replicated_state/src/metrics.rs +++ b/rs/replicated_state/src/metrics.rs @@ -13,7 +13,7 @@ use ic_types::{ Height, MAX_STABLE_MEMORY_IN_BYTES, MAX_WASM_MEMORY_IN_BYTES, NumBytes, NumInstructions, Time, }; use ic_types_cycles::{Cycles, CyclesUseCase, NominalCycles}; -use prometheus::{Gauge, GaugeVec, Histogram, HistogramVec, IntGauge, IntGaugeVec}; +use prometheus::{CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntGauge, IntGaugeVec}; use std::collections::BTreeMap; use std::time::Duration; @@ -42,6 +42,7 @@ pub struct ReplicatedStateMetrics { available_canister_ids: IntGauge, consumed_cycles: Gauge, consumed_cycles_by_use_case: GaugeVec, + consumed_cycles_by_use_case_as_counters: CounterVec, input_queue_messages: IntGaugeVec, input_queues_size_bytes: IntGaugeVec, queues_response_bytes: IntGauge, @@ -127,6 +128,11 @@ impl ReplicatedStateMetrics { "Number of cycles consumed by use cases.", &["use_case"], ), + consumed_cycles_by_use_case_as_counters: metrics_registry.counter_vec( + "replicated_state_consumed_cycles_from_replica_start_as_counters", + "Number of cycles consumed by use cases.", + &["use_case"], + ), input_queue_messages: metrics_registry.int_gauge_vec( "execution_input_queue_messages", "Count of messages currently enqueued in input queues, by message kind.", @@ -241,6 +247,20 @@ impl ReplicatedStateMetrics { } } + fn observe_consumed_cycles_by_use_case_as_counters( + &self, + consumed_cycles_by_use_case_as_counters: &BTreeMap, + ) { + for (use_case, cycles) in consumed_cycles_by_use_case_as_counters.iter() { + self.consumed_cycles_by_use_case_as_counters + .with_label_values(&[use_case.as_str()]) + .reset(); + self.consumed_cycles_by_use_case_as_counters + .with_label_values(&[use_case.as_str()]) + .inc_by(cycles.get() as f64); + } + } + fn observe_input_messages(&self, kind: &str, message_count: usize) { self.input_queue_messages .with_label_values(&[kind]) @@ -306,6 +326,7 @@ impl ReplicatedStateMetrics { let mut consumed_cycles_total = NominalCycles::zero(); let mut consumed_cycles_total_by_use_case = BTreeMap::new(); + let mut consumed_cycles_total_by_use_case_as_counters = BTreeMap::new(); let mut ingress_queue_message_count = 0; let mut ingress_queue_size_bytes = 0; @@ -368,6 +389,20 @@ impl ReplicatedStateMetrics { .canister_metrics() .consumed_cycles_by_use_cases(), ); + // For the purpose of exporting the total counters to prometheus, filter out HTTPS + // outcalls from canister level metrics as they will be added later from the subnet level metrics. + // This only applies for the counter version of metrics as the gauge version only updates + // the subnet level part. + let mut counter_metrics_map = canister + .system_state + .canister_metrics() + .consumed_cycles_by_use_cases_as_counters() + .clone(); + counter_metrics_map.remove(&CyclesUseCase::HTTPOutcalls); + join_consumed_cycles_by_use_case( + &mut consumed_cycles_total_by_use_case_as_counters, + &counter_metrics_map, + ); let queues = canister.system_state.queues(); ingress_queue_message_count += queues.ingress_queue_message_count(); ingress_queue_size_bytes += queues.ingress_queue_size_bytes(); @@ -435,6 +470,13 @@ impl ReplicatedStateMetrics { .subnet_metrics .get_consumed_cycles_by_use_case(), ); + join_consumed_cycles_by_use_case( + &mut consumed_cycles_total_by_use_case_as_counters, + state + .metadata + .subnet_metrics + .get_consumed_cycles_by_use_case(), + ); // Add the consumed cycles in ecdsa outcalls. consumed_cycles_total += state @@ -451,6 +493,9 @@ impl ReplicatedStateMetrics { self.consumed_cycles.set(consumed_cycles_total.get() as f64); self.observe_consumed_cycles_by_use_case(&consumed_cycles_total_by_use_case); + self.observe_consumed_cycles_by_use_case_as_counters( + &consumed_cycles_total_by_use_case_as_counters, + ); for (key_id, count) in &state.metadata.subnet_metrics.threshold_signature_agreements { self.threshold_signature_agreements diff --git a/rs/state_layout/src/state_layout.rs b/rs/state_layout/src/state_layout.rs index 6a3ac4ce6bd3..c81c3aa23c4a 100644 --- a/rs/state_layout/src/state_layout.rs +++ b/rs/state_layout/src/state_layout.rs @@ -195,6 +195,7 @@ pub struct CanisterStateBits { pub global_timer_nanos: Option, pub canister_version: u64, pub consumed_cycles_by_use_cases: BTreeMap, + pub consumed_cycles_by_use_cases_as_counters: BTreeMap, pub instructions_executed: NumInstructions, pub ingress_messages_executed: u64, pub remote_subnet_messages_executed: u64, diff --git a/rs/state_layout/src/state_layout/proto.rs b/rs/state_layout/src/state_layout/proto.rs index 4df4cf22f2b6..8865281bf0d2 100644 --- a/rs/state_layout/src/state_layout/proto.rs +++ b/rs/state_layout/src/state_layout/proto.rs @@ -54,6 +54,16 @@ impl From for pb_canister_state_bits::CanisterStateBits { }, ) .collect(), + consumed_cycles_by_use_cases_as_counters: item + .consumed_cycles_by_use_cases_as_counters + .into_iter() + .map( + |(use_case, cycles)| pb_canister_state_bits::ConsumedCyclesByUseCase { + use_case: pb_canister_state_bits::CyclesUseCase::from(use_case).into(), + cycles: Some((&cycles).into()), + }, + ) + .collect(), canister_history: Some((&item.canister_history).into()), wasm_chunk_store_metadata: Some((&item.wasm_chunk_store_metadata).into()), total_query_stats: Some((&item.total_query_stats).into()), @@ -131,6 +141,21 @@ impl TryFrom for CanisterStateBits { ); } + let mut consumed_cycles_by_use_cases_as_counters = BTreeMap::new(); + for x in value.consumed_cycles_by_use_cases_as_counters.into_iter() { + consumed_cycles_by_use_cases_as_counters.insert( + CyclesUseCase::try_from( + pb_canister_state_bits::CyclesUseCase::try_from(x.use_case).map_err(|_| { + ProxyDecodeError::ValueOutOfRange { + typ: "CyclesUseCase", + err: format!("Unexpected value of cycles use case: {}", x.use_case), + } + })?, + )?, + NominalCycles::try_from(x.cycles.unwrap_or_default()).unwrap_or_default(), + ); + } + let tasks: pb_canister_state_bits::TaskQueue = try_from_option_field(value.tasks, "CanisterStateBits::tasks").unwrap_or_default(); let task_queue = TaskQueue::try_from(tasks)?; @@ -179,6 +204,7 @@ impl TryFrom for CanisterStateBits { global_timer_nanos: value.global_timer_nanos, canister_version: value.canister_version, consumed_cycles_by_use_cases, + consumed_cycles_by_use_cases_as_counters, // TODO(MR-412): replace `unwrap_or_default` by returning an error on missing canister_history field canister_history: try_from_option_field( value.canister_history, diff --git a/rs/state_layout/src/state_layout/tests.rs b/rs/state_layout/src/state_layout/tests.rs index 5d1b70a2572f..49050248e775 100644 --- a/rs/state_layout/src/state_layout/tests.rs +++ b/rs/state_layout/src/state_layout/tests.rs @@ -56,6 +56,7 @@ fn default_canister_state_bits() -> CanisterStateBits { global_timer_nanos: None, canister_version: 0, consumed_cycles_by_use_cases: BTreeMap::new(), + consumed_cycles_by_use_cases_as_counters: BTreeMap::new(), canister_history: CanisterHistory::default(), wasm_chunk_store_metadata: WasmChunkStoreMetadata::default(), total_query_stats: TotalQueryStats::default(), diff --git a/rs/state_manager/src/checkpoint.rs b/rs/state_manager/src/checkpoint.rs index 53b8347f3eda..53bafdce8742 100644 --- a/rs/state_manager/src/checkpoint.rs +++ b/rs/state_manager/src/checkpoint.rs @@ -813,6 +813,7 @@ pub fn load_canister_state( canister_state_bits.interrupted_during_execution, canister_state_bits.consumed_cycles, canister_state_bits.consumed_cycles_by_use_cases, + canister_state_bits.consumed_cycles_by_use_cases_as_counters, canister_state_bits.instructions_executed, LoadMetrics::new( canister_state_bits.ingress_messages_executed, diff --git a/rs/state_manager/src/tip.rs b/rs/state_manager/src/tip.rs index bda1e9772647..cad78e849969 100644 --- a/rs/state_manager/src/tip.rs +++ b/rs/state_manager/src/tip.rs @@ -1268,6 +1268,11 @@ fn serialize_canister_protos_to_checkpoint_readwrite( .canister_metrics() .consumed_cycles_by_use_cases() .clone(), + consumed_cycles_by_use_cases_as_counters: canister_state + .system_state + .canister_metrics() + .consumed_cycles_by_use_cases_as_counters() + .clone(), canister_history: canister_state.system_state.get_canister_history().clone(), wasm_chunk_store_metadata: canister_state .system_state