diff --git a/assets/kube-burner-report-ocp-wrapper/queries.libsonnet b/assets/kube-burner-report-ocp-wrapper/queries.libsonnet index a85e9f8..a353eff 100644 --- a/assets/kube-burner-report-ocp-wrapper/queries.libsonnet +++ b/assets/kube-burner-report-ocp-wrapper/queries.libsonnet @@ -126,6 +126,123 @@ local elasticsearch = g.query.elasticsearch; + elasticsearch.withQuery('uuid.keyword: $uuid AND metricName.keyword: containerNetworkSetupLatency') + elasticsearch.withTimeField('timestamp'), }, + bgpRouteLatenciesSummary: { + query(): + elasticsearch.withAlias('$latencyPercentile {{term quantileName.keyword}}') + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField('quantileName.keyword') + + elasticsearch.bucketAggs.Terms.withId('3') + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize('10'), + elasticsearch.bucketAggs.DateHistogram.withField('timestamp') + + elasticsearch.bucketAggs.DateHistogram.withId('2') + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('auto') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount(0) + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges(0), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Max.withField('$latencyPercentile') + + elasticsearch.metrics.MetricAggregationWithSettings.Max.withId('1') + + elasticsearch.metrics.MetricAggregationWithSettings.Max.withType('max'), + ]) + + elasticsearch.withQuery('uuid.keyword: $uuid AND metricName.keyword: raLatencyQuantilesMeasurement') + + elasticsearch.withTimeField('timestamp'), + }, + bgpRouteExportLatency: { + query(): + elasticsearch.withBucketAggs([]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.RawData.withId('1') + + elasticsearch.metrics.MetricAggregationWithSettings.RawData.settings.withSize('500') + + elasticsearch.metrics.MetricAggregationWithSettings.RawData.withType('raw_data'), + ]) + + elasticsearch.withQuery('uuid.keyword: $uuid AND metricName.keyword: raLatencyMeasurement AND scenario.keyword: ExportRoutes') + + elasticsearch.withQueryType('randomWalk') + + elasticsearch.withTimeField('timestamp'), + }, + bgpRouteImportLatency: { + query(): + elasticsearch.withBucketAggs([]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.RawData.withId('1') + + elasticsearch.metrics.MetricAggregationWithSettings.RawData.settings.withSize('500') + + elasticsearch.metrics.MetricAggregationWithSettings.RawData.withType('raw_data'), + ]) + + elasticsearch.withQuery('uuid.keyword: $uuid AND metricName.keyword: raLatencyMeasurement AND scenario.keyword: ImportRoutes') + + elasticsearch.withQueryType('randomWalk') + + elasticsearch.withTimeField('timestamp'), + }, + egressIPLatency: { + query(): + elasticsearch.withAlias('') + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField('metricName.keyword') + + elasticsearch.bucketAggs.Terms.withId('2') + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize('10'), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Max.withField('value') + + elasticsearch.metrics.MetricAggregationWithSettings.Max.withId('1') + + elasticsearch.metrics.MetricAggregationWithSettings.Max.withType('max'), + ]) + + elasticsearch.withQuery('uuid.keyword: $uuid AND metricName.keyword: eipStartupLatencyTotal') + + elasticsearch.withTimeField('timestamp'), + }, + frrk8sPodStats: { + base(alias, query): + elasticsearch.withAlias(alias) + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.Terms.withField('labels.pod.keyword') + + elasticsearch.bucketAggs.Terms.withId('3') + + elasticsearch.bucketAggs.Terms.withType('terms') + + elasticsearch.bucketAggs.Terms.settings.withOrder('desc') + + elasticsearch.bucketAggs.Terms.settings.withOrderBy('1') + + elasticsearch.bucketAggs.Terms.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.Terms.settings.withSize('5'), + elasticsearch.bucketAggs.DateHistogram.withField('timestamp') + + elasticsearch.bucketAggs.DateHistogram.withId('2') + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('10s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.DateHistogram.settings.withTimeZone('utc') + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges('0'), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withField('value') + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withId('1') + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withType('sum'), + ]) + + elasticsearch.withQuery(query) + + elasticsearch.withTimeField('timestamp'), + queries(metric): [ + self.base('{{labels.pod.keyword}}', 'uuid.keyword: $uuid AND metricName: "' + metric + '" AND labels.namespace.keyword: "openshift-frr-k8s" AND labels.pod.keyword: /frr-k8s.*/'), + elasticsearch.withAlias('Aggregated') + + elasticsearch.withBucketAggs([ + elasticsearch.bucketAggs.DateHistogram.withField('timestamp') + + elasticsearch.bucketAggs.DateHistogram.withId('2') + + elasticsearch.bucketAggs.DateHistogram.withType('date_histogram') + + elasticsearch.bucketAggs.DateHistogram.settings.withInterval('10s') + + elasticsearch.bucketAggs.DateHistogram.settings.withMinDocCount('1') + + elasticsearch.bucketAggs.DateHistogram.settings.withTimeZone('utc') + + elasticsearch.bucketAggs.DateHistogram.settings.withTrimEdges('0'), + ]) + + elasticsearch.withMetrics([ + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withField('value') + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withId('1') + + elasticsearch.metrics.MetricAggregationWithSettings.Sum.withType('sum'), + ]) + + elasticsearch.withQuery('uuid.keyword: $uuid AND metricName: "' + metric + '" AND labels.namespace.keyword: "openshift-frr-k8s" AND labels.pod.keyword: /frr-k8s.*/') + + elasticsearch.withTimeField('timestamp'), + ], + }, schedulingThroughput: { query(): elasticsearch.withAlias('') diff --git a/assets/kube-burner-report-ocp-wrapper/variables.libsonnet b/assets/kube-burner-report-ocp-wrapper/variables.libsonnet index 033e0e8..aac8c9a 100644 --- a/assets/kube-burner-report-ocp-wrapper/variables.libsonnet +++ b/assets/kube-burner-report-ocp-wrapper/variables.libsonnet @@ -24,7 +24,7 @@ local var = g.dashboard.variable; + var.query.generalOptions.withLabel('SDN type'), job: - var.query.new('job', '{"find": "terms", "field": "jobConfig.name.keyword", "query": "platform.keyword: $platform AND sdnType.keyword: $sdn" AND NOT jobConfig.name.keyword: garbage-collection}') + var.query.new('job', '{"find": "terms", "field": "jobConfig.name.keyword", "query": "platform.keyword: $platform AND sdnType.keyword: $sdn AND NOT jobConfig.name.keyword: garbage-collection"}') + var.query.withDatasourceFromVariable(self.Datasource) + var.query.withRefresh(1) + var.query.selectionOptions.withMulti(false) diff --git a/assets/ovn-monitoring/queries.libsonnet b/assets/ovn-monitoring/queries.libsonnet index 3459111..c565f52 100644 --- a/assets/ovn-monitoring/queries.libsonnet +++ b/assets/ovn-monitoring/queries.libsonnet @@ -35,6 +35,33 @@ local generateTimeSeriesQuery(query, legend) = [ generateTimeSeriesQuery('container_memory_rss{pod=~"(ovnkube-master|ovnkube-control-plane).+",namespace="openshift-ovn-kubernetes",container!~"POD|"}', '{{pod}} - {{node}}'), }, + topFrrContainerCPU: { + query(): + generateTimeSeriesQuery('topk(10, sum( irate(container_cpu_usage_seconds_total{pod=~"frr-k8s.*",namespace="openshift-frr-k8s",container="frr"}[2m])*100) by (pod,node) )', '{{pod}} - {{node}}'), + }, + topFrrContainerMem: { + query(): + generateTimeSeriesQuery('topk(10, sum(container_memory_rss{pod=~"frr-k8s-.*",namespace="openshift-frr-k8s",container="frr"}) by (pod,node))', '{{pod}} - {{node}}'), + }, + + topFrrControllerContainerCPU: { + query(): + generateTimeSeriesQuery('topk(10, sum( irate(container_cpu_usage_seconds_total{pod=~"frr-k8s.*",namespace="openshift-frr-k8s",container="controller"}[2m])*100) by (pod,node) )', '{{pod}} - {{node}}'), + }, + topFrrControllerContainerMem: { + query(): + generateTimeSeriesQuery('topk(10, sum(container_memory_rss{pod=~"frr-k8s-.*",namespace="openshift-frr-k8s",container="controller"}) by (pod,node))', '{{pod}} - {{node}}'), + }, + + topFrrReloaderContainerCPU: { + query(): + generateTimeSeriesQuery('topk(10, sum( irate(container_cpu_usage_seconds_total{pod=~"frr-k8s.*",namespace="openshift-frr-k8s",container="reloader"}[2m])*100) by (pod,node) )', '{{pod}} - {{node}}'), + }, + topFrrReloaderContainerMem: { + query(): + generateTimeSeriesQuery('topk(10, sum(container_memory_rss{pod=~"frr-k8s-.*",namespace="openshift-frr-k8s",container="reloader"}) by (pod,node))', '{{pod}} - {{node}}'), + }, + topOvnControllerCPU: { query(): generateTimeSeriesQuery('topk(10, sum( irate(container_cpu_usage_seconds_total{pod=~"ovnkube-.*",namespace="openshift-ovn-kubernetes",container="ovn-controller"}[2m])*100) by (pod,node) )', '{{pod}} - {{node}}'), diff --git a/templates/CPT/kube-burner-report-ocp-wrapper.jsonnet b/templates/CPT/kube-burner-report-ocp-wrapper.jsonnet index 15f766e..7abdeca 100644 --- a/templates/CPT/kube-burner-report-ocp-wrapper.jsonnet +++ b/templates/CPT/kube-burner-report-ocp-wrapper.jsonnet @@ -75,6 +75,12 @@ g.dashboard.new('Kube-burner Report - OCP wrapper') panels.timeSeries.withMeanMax('Aggregated OVNKube-master containers memory', 'bytes', queries.aggregatedOVNKubeMasterStats.queries('containerMemory'), { x: 12, y: 49, w: 12, h: 14 }, null), panels.timeSeries.withMeanMax('Aggregated OVNKube-node containers CPU', 'percent', queries.aggregatedOVNKubeNodeStats.query('containerCPU-AggregatedWorkers'), { x: 0, y: 63, w: 12, h: 14 }, null), panels.timeSeries.sortByMeanCommon('Aggregated OVNKube-node containers Memory', 'bytes', queries.aggregatedOVNKubeNodeStats.query('containerMemory-AggregatedWorkers'), { x: 12, y: 63, w: 12, h: 14 }, null), + panels.stat.withMeanThresholds('Route latencies summary $latencyPercentile', 'ms', queries.bgpRouteLatenciesSummary.query(), { x: 12, y: 15, w: 12, h: 8 }), + panels.table.withLatencyTableOverrides('BGP Routes Export latency', 'ms', queries.bgpRouteExportLatency.query(), { x: 0, y: 23, w: 24, h: 10 }), + panels.table.withLatencyTableOverrides('BGP Routes Import latency', 'ms', queries.bgpRouteImportLatency.query(), { x: 0, y: 23, w: 24, h: 10 }), + panels.table.withLatencyTableOverrides('EgressIP latency', 'ms', queries.egressIPLatency.query(), { x: 0, y: 23, w: 24, h: 10 }), + panels.timeSeries.sortByMean('frr-k8s pods CPU Usage', 'percent', queries.frrk8sPodStats.queries('containerCPU'), { x: 0, y: 33, w: 12, h: 8 }, null), + panels.timeSeries.sortByMean('frr-k8s pods Memory Usage', 'bytes', queries.frrk8sPodStats.queries('containerMemory'), { x: 12, y: 33, w: 12, h: 8 }, null), ]), g.panel.row.new('etcd') + g.panel.row.withGridPos({ x: 0, y: 14, w: 24, h: 1 }) diff --git a/templates/General/ovn-dashboard.jsonnet b/templates/General/ovn-dashboard.jsonnet index 8fe8fcd..bd55bd6 100644 --- a/templates/General/ovn-dashboard.jsonnet +++ b/templates/General/ovn-dashboard.jsonnet @@ -33,6 +33,12 @@ g.dashboard.new('OVN-Monitoring-dashboard') panels.timeSeries.genericTimeSeriesLegendPanel('ovnkube-control-plane Memory Usage', 'bytes', queries.ovnKubeControlPlaneMem.query(), { x: 12, y: 4, w: 12, h: 10 }), panels.timeSeries.genericTimeSeriesLegendPanel('Top 10 ovn-controller CPU Usage', 'percent', queries.topOvnControllerCPU.query(), { x: 0, y: 12, w: 12, h: 10 }), panels.timeSeries.genericTimeSeriesLegendPanel('Top 10 ovn-controller Memory Usage', 'bytes', queries.topOvnControllerMem.query(), { x: 12, y: 12, w: 12, h: 10 }), + panels.timeSeries.genericTimeSeriesLegendPanel('Top 10 frr container CPU Usage', 'percent', queries.topFrrContainerCPU.query(), { x: 0, y: 12, w: 12, h: 10 }), + panels.timeSeries.genericTimeSeriesLegendPanel('Top 10 frr container Memory Usage', 'bytes', queries.topFrrContainerMem.query(), { x: 12, y: 12, w: 12, h: 10 }), + panels.timeSeries.genericTimeSeriesLegendPanel('Top 10 frr controller container CPU Usage', 'percent', queries.topFrrControllerContainerCPU.query(), { x: 0, y: 12, w: 12, h: 10 }), + panels.timeSeries.genericTimeSeriesLegendPanel('Top 10 frr controller container Memory Usage', 'bytes', queries.topFrrControllerContainerMem.query(), { x: 12, y: 12, w: 12, h: 10 }), + panels.timeSeries.genericTimeSeriesLegendPanel('Top 10 frr reloader container CPU Usage', 'percent', queries.topFrrReloaderContainerCPU.query(), { x: 0, y: 12, w: 12, h: 10 }), + panels.timeSeries.genericTimeSeriesLegendPanel('Top 10 frr reloader container Memory Usage', 'bytes', queries.topFrrReloaderContainerMem.query(), { x: 12, y: 12, w: 12, h: 10 }), ]), g.panel.row.new('Latency Monitoring') + g.panel.row.withCollapsed(true)