Skip to content
This repository was archived by the owner on Aug 7, 2025. It is now read-only.

Commit dc9f9a7

Browse files
committed
scaling: add used memory to PDF report table
Currently the PDF report shows how much scaling up consumes free memory. This number is not comparable between cluster nodes or even test runs because RAM used for OS caches/buffers/slab is counted as consumed. As a consequence, consumed free memory depends heavily on initial memory conditions of a node, instead of used memory by the k8s and pods. This patch adds "memory used" to the report in order to have less node-dependent and more reproducible memory figure. Using /proc/meminfo "MemAvailable" was also tried out, but it varies almost like "MemFree" that is currently reported. Signed-off-by: Antti Kervinen <antti.kervinen@intel.com>
1 parent 6298cf2 commit dc9f9a7

2 files changed

Lines changed: 70 additions & 19 deletions

File tree

metrics/report/report_dockerfile/collectd_scaling.R

Lines changed: 69 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ testnames=c(
2020
podbootdata=c() # Track per-launch data
2121
cpuidledata=c() # Track cpu idle data per nodes
2222
memfreedata=c() # Track mem free data for nodes
23+
memuseddata=c() # Track mem used data for nodes
2324
inodefreedata=c() # Track inode free data for nodes
2425
ifpacketdata=c() # Track interface packet data for nodes
2526
ifoctetdata=c() # Track interface octets data for nodes
@@ -104,11 +105,13 @@ for (currentdir in resultdirs) {
104105
# Get a list of all the nodes from the schedule data list
105106
nodes=names(node_sched_data)
106107

107-
memtotal=0
108+
memfreedelta_total=0
109+
memuseddelta_total=0
108110
cputotal=0
109111
inodetotal=0
110112
cpu_idle_data=c()
111113
mem_free_data=c()
114+
mem_used_data=c()
112115
inode_free_data=c()
113116
interface_packets_data=c()
114117
interface_octets_data=c()
@@ -127,7 +130,7 @@ for (currentdir in resultdirs) {
127130
localhost_dir=paste(node_dir, "localhost", sep="/")
128131

129132
# grab memory data
130-
memory_dir=paste(localhost_dir, "memory", sep="/")
133+
memory_dir=paste(localhost_dir, "memory", sep="/")
131134
# filename has date on the end, so look for the right file name
132135
freemem_pattern='^memory\\-free'
133136
files=list.files(memory_dir, pattern=freemem_pattern)
@@ -136,13 +139,26 @@ for (currentdir in resultdirs) {
136139
mem_free_csv=paste(memory_dir, file, sep="/")
137140
node_mem_free_data=read.csv(mem_free_csv, header=TRUE, sep=",")
138141
node_mem_free_data=cbind(node_mem_free_data,
139-
node=rep(n, length(node_mem_free_data$value)))
142+
node=rep(n, length(node_mem_free_data$value)))
140143
node_mem_free_data=cbind(node_mem_free_data,
141-
testname=rep(testname, length(node_mem_free_data$value)))
144+
testname=rep(testname, length(node_mem_free_data$value)))
142145
node_mem_free_data$s_offset = node_mem_free_data$epoch - local_bootdata[1,]$epoch
143-
144146
mem_free_data=rbind(mem_free_data, node_mem_free_data)
145147
}
148+
# filename has date on the end, so look for the right file name
149+
usedmem_pattern='^memory\\-used'
150+
files=list.files(memory_dir, pattern=usedmem_pattern)
151+
# collectd csv plugin starts a new file for each day of data collected
152+
for(file in files) {
153+
mem_used_csv=paste(memory_dir, file, sep="/")
154+
node_mem_used_data=read.csv(mem_used_csv, header=TRUE, sep=",")
155+
node_mem_used_data=cbind(node_mem_used_data,
156+
node=rep(n, length(node_mem_used_data$value)))
157+
node_mem_used_data=cbind(node_mem_used_data,
158+
testname=rep(testname, length(node_mem_used_data$value)))
159+
node_mem_used_data$s_offset = node_mem_used_data$epoch - local_bootdata[1,]$epoch
160+
mem_used_data=rbind(mem_used_data, node_mem_used_data)
161+
}
146162

147163
# grab CPU data
148164
cpu_dir=paste(localhost_dir, "aggregation-cpu-average", sep="/")
@@ -273,6 +289,8 @@ for (currentdir in resultdirs) {
273289
end_time=local_bootdata$epoch[length(local_bootdata$epoch)]
274290

275291
# get value closest to first pod launch
292+
# memory-free and memory-used data share exactly the same timestamps,
293+
# so the same start/end indexes work for both.
276294
mem_start_index=Position(function(x) x > start_time, node_mem_free_data$epoch)
277295
# take the reading previous to the index as long as a valid index
278296
if (is.na(mem_start_index)) {
@@ -281,6 +299,7 @@ for (currentdir in resultdirs) {
281299
mem_start_index = mem_start_index - 1
282300
}
283301
max_free_mem=node_mem_free_data$value[mem_start_index]
302+
min_used_mem=node_mem_used_data$value[mem_start_index]
284303

285304
# get value closest to last pod launch
286305
mem_end_index=Position(function(x) x > end_time, node_mem_free_data$epoch)
@@ -291,8 +310,10 @@ for (currentdir in resultdirs) {
291310
mem_end_index = mem_end_index - 1
292311
}
293312
min_free_mem=node_mem_free_data$value[mem_end_index]
313+
max_used_mem=node_mem_used_data$value[mem_end_index]
294314

295-
memtotal = memtotal + (max_free_mem - min_free_mem)
315+
memfreedelta_total = memfreedelta_total + (max_free_mem - min_free_mem)
316+
memuseddelta_total = memuseddelta_total + (max_used_mem - min_used_mem)
296317

297318
# get value closest to first pod launch
298319
cpu_start_index=Position(function(x) x > start_time, node_cpu_idle_data$epoch)
@@ -342,17 +363,21 @@ for (currentdir in resultdirs) {
342363
num_pods = local_bootdata$n_pods[length(local_bootdata$n_pods)]
343364

344365
# We get data in b, but want the graphs in Gb.
345-
memtotal = memtotal / (1024*1024*1024)
346-
gb_per_pod = memtotal/num_pods
347-
pod_per_gb = 1/gb_per_pod
366+
memfreedelta_total = memfreedelta_total / (1024*1024*1024)
367+
memuseddelta_total = memuseddelta_total / (1024*1024*1024)
368+
gb_nonfree_per_pod = memfreedelta_total/num_pods
369+
gb_used_per_pod = memuseddelta_total/num_pods
370+
pod_per_nonfree_gb = 1/gb_nonfree_per_pod
371+
pod_per_used_gb = 1/gb_used_per_pod
348372

349373
# Memory usage stats.
350374
local_mems = c(
351375
"Test"=testname,
352376
"n"=num_pods,
353-
"Tot_Gb"=round(memtotal, 3),
354-
"avg_Gb"=round(gb_per_pod, 4),
355-
"n_per_Gb"=round(pod_per_gb, 2)
377+
"Free_GB_delta"=round(memfreedelta_total, 3),
378+
"Used_GB_delta"=round(memuseddelta_total, 3),
379+
"n_per_nonfree_GB"=round(pod_per_nonfree_gb, 2),
380+
"n_per_used_GB"=round(pod_per_used_gb, 2)
356381
)
357382
memstats=rbind(memstats, local_mems)
358383

@@ -393,6 +418,7 @@ for (currentdir in resultdirs) {
393418
podbootdata=rbind(podbootdata, local_bootdata, make.row.names=FALSE)
394419
cpuidledata=rbind(cpuidledata, cpu_idle_data)
395420
memfreedata=rbind(memfreedata, mem_free_data)
421+
memuseddata=rbind(memuseddata, mem_used_data)
396422
inodefreedata=rbind(inodefreedata, inode_free_data)
397423
ifpacketdata=rbind(ifpacketdata, interface_packets_data)
398424
ifoctetdata=rbind(ifoctetdata, interface_octets_data)
@@ -404,6 +430,7 @@ for (currentdir in resultdirs) {
404430
# It's nice to show the graphs in Gb, at least for any decent sized test
405431
# run, so make a new column with that pre-divided data in it for us to use.
406432
memfreedata$mem_free_gb = memfreedata$value/(1024*1024*1024)
433+
memuseddata$mem_used_gb = memuseddata$value/(1024*1024*1024)
407434
# And show the boot times in seconds, not ms
408435
podbootdata$launch_time_s = podbootdata$launch_time/1000.0
409436

@@ -414,8 +441,9 @@ mem_stats_plot = suppressWarnings(ggtexttable(data.frame(memstats),
414441
rows=NULL
415442
))
416443

417-
mem_scale = (max(memfreedata$value) / (1024*1024*1024)) / max(podbootdata$n_pods)
418-
mem_line_plot <- ggplot() +
444+
mem_free_scale = (max(memfreedata$value) / (1024*1024*1024)) / max(podbootdata$n_pods)
445+
mem_used_scale = (max(memuseddata$value) / (1024*1024*1024)) / max(podbootdata$n_pods)
446+
mem_free_line_plot <- ggplot() +
419447
geom_line(data=memfreedata,
420448
aes(s_offset, mem_free_gb, colour=interaction(testname, node),
421449
group=interaction(testname, node)),
@@ -425,21 +453,44 @@ mem_line_plot <- ggplot() +
425453
group=interaction(testname, node)),
426454
alpha=0.5, size=0.5) +
427455
geom_line(data=podbootdata,
428-
aes(x=s_offset, y=n_pods*mem_scale, colour=interaction(testname,"pod count"), group=testname),
456+
aes(x=s_offset, y=n_pods*mem_free_scale, colour=interaction(testname,"pod count"), group=testname),
429457
alpha=0.2) +
430458
geom_point(data=podbootdata,
431-
aes(x=s_offset, y=n_pods*mem_scale, colour=interaction(testname,"pod count"), group=testname),
459+
aes(x=s_offset, y=n_pods*mem_free_scale, colour=interaction(testname,"pod count"), group=testname),
432460
alpha=0.3, size=0.5) +
433461
labs(colour="") +
434462
xlab("seconds") +
435463
ylab("System Avail (Gb)") +
436-
scale_y_continuous(labels=comma, sec.axis=sec_axis(~ ./mem_scale, name="pods")) +
464+
scale_y_continuous(labels=comma, sec.axis=sec_axis(~ ./mem_free_scale, name="pods")) +
437465
ggtitle("System Memory free") +
438466
theme(legend.position="bottom") +
439467
theme(axis.text.x=element_text(angle=90))
440468

469+
mem_used_line_plot <- ggplot() +
470+
geom_line(data=memuseddata,
471+
aes(s_offset, mem_used_gb, colour=interaction(testname, node),
472+
group=interaction(testname, node)),
473+
alpha=0.3) +
474+
geom_point(data=memuseddata,
475+
aes(s_offset, mem_used_gb, colour=interaction(testname, node),
476+
group=interaction(testname, node)),
477+
alpha=0.5, size=0.5) +
478+
geom_line(data=podbootdata,
479+
aes(x=s_offset, y=n_pods*mem_used_scale, colour=interaction(testname,"pod count"), group=testname),
480+
alpha=0.2) +
481+
geom_point(data=podbootdata,
482+
aes(x=s_offset, y=n_pods*mem_used_scale, colour=interaction(testname,"pod count"), group=testname),
483+
alpha=0.3, size=0.5) +
484+
labs(colour="") +
485+
xlab("seconds") +
486+
ylab("System Used (Gb)") +
487+
scale_y_continuous(labels=comma, sec.axis=sec_axis(~ ./mem_used_scale, name="pods")) +
488+
ggtitle("System Memory used, not counting Cached, Buffered and SLAB") +
489+
theme(axis.text.x=element_text(angle=90))
490+
441491
page1 = grid.arrange(
442-
mem_line_plot,
492+
mem_free_line_plot,
493+
mem_used_line_plot,
443494
mem_stats_plot,
444495
ncol=1
445496
)

metrics/report/report_dockerfile/metrics_report.Rmd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ source('parallel.R')
4040

4141
# Runtime scaling rapid
4242
This [test](https://github.com/clearlinux/cloud-native-setup/metrics/scaling/k8s_scale_fast.sh)
43-
uses collectd to asynchronously measure CPU idle %, free memory, pod boot time, free inodes,
43+
uses collectd to asynchronously measure CPU idle %, free and used memory, pod boot time, free inodes,
4444
and interface stats as it launches more and more idle `busybox` pods on a Kubernetes cluster.
4545

4646
> Note: CPU % is measured as a system whole - 100% represents *all* CPUs on the node.

0 commit comments

Comments
 (0)