Skip to content
This repository was archived by the owner on Aug 7, 2025. It is now read-only.

Commit ed5a499

Browse files
committed
Add timestamps to master-worker messaging
Timestamps enable matching traced system calls in the worker to observed latencies. This is essential when finding out explanations for anomalies in response times.
1 parent eec37a5 commit ed5a499

1 file changed

Lines changed: 19 additions & 7 deletions

File tree

metrics/scaling/k8s_scale_nc.sh

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ nc_port=33101
2424
nc_req_msg=$(head -c $nc_req_msg_len /dev/zero | tr '\0' 'x')
2525
nc_percentiles=(0 1 5 25 50 75 95 99 100)
2626

27-
pod_command="[\"nc\", \"-lk\", \"-p\", \"${nc_port}\", \"-e\", \"/bin/cat\"]"
27+
pod_command="[\"nc\", \"-lk\", \"-p\", \"${nc_port}\", \"-e\", \"sh\", \"-c\", \"echo \${EPOCHREALTIME/./}; cat; echo \${EPOCHREALTIME/./}\"]"
2828

2929
# Set some default metrics env vars
3030
TEST_ARGS="runtime=${RUNTIME}"
@@ -336,7 +336,7 @@ run() {
336336

337337
if [[ ${nc_reqs_per_pod} -ge 1 ]]; then
338338
mkdir -p "$RESULT_DIR" 2>/dev/null || true
339-
local latency_raw_output="$RESULT_DIR/${TEST_NAME// /-}.t_pods_round_ip_latency_fail_rx.raw"
339+
local latency_raw_output="$RESULT_DIR/${TEST_NAME// /-}.tmaster_tworker_pods_r_ipord_ipaddr_lattot_latconn_latio_latdisconn_rx.raw"
340340
pod_ips=$(kubectl get pods --selector ${LABEL}=${LABELVALUE} -o json | jq -r '.items[].status.podIP')
341341
if [[ ${reqs} != $(echo $pod_ips | wc -w) ]]; then
342342
info "WARNING: pod IP count mismatch expected ${reqs} found $(echo $pod_ips | wc -w)"
@@ -345,18 +345,30 @@ run() {
345345
local latency_failures=0
346346
local latency_pod_array=()
347347
for latency_round in $(seq ${nc_reqs_per_pod}); do
348+
local pod_ip_ord=0
348349
for pod_ip in ${pod_ips}; do
350+
pod_ip_ord=$(( pod_ip_ord + 1 ))
349351
local latency_failed=0
350-
local latency_pod_start_time=$(date +%s%N)
351-
local latency_pod_response=$(echo ${nc_req_msg} | nc ${pod_ip} ${nc_port})
352-
local latency_pod_end_time=$(date +%s%N)
353-
local latency_response_microseconds=$(( (latency_pod_end_time - latency_pod_start_time) / 1000 ))
352+
local latency_pod_start_time=${EPOCHREALTIME/./}
353+
local latency_pod_start_response_end=$(echo ${latency_pod_start_time} ${nc_req_msg} | nc ${pod_ip} ${nc_port})
354+
# start_response_end contents: <worker_start_ts> <master_ts> <nc_req_msg> <worker_end_ts>
355+
local latency_pod_end_time=${EPOCHREALTIME/./}
356+
local latency_response_microseconds=$(( latency_pod_end_time - latency_pod_start_time ))
357+
local latency_pod_response=$(echo $latency_pod_start_response_end | awk '{print $3}')
354358
if [[ "$latency_pod_response" != "${nc_req_msg}" ]]; then
355359
latency_failures=$(( latency_failures + 1 ))
360+
local latency_pod_first_t=000000000000000
361+
local latency_pod_last_t=000000000000000
356362
latency_failed=1
363+
else
364+
local latency_pod_first_t=$(echo $latency_pod_start_response_end | awk '{print $1}')
365+
local latency_pod_last_t=$(echo $latency_pod_start_response_end | awk '{print $4}')
357366
fi
367+
local latency_pod_local_io=$(( latency_pod_last_t - latency_pod_first_t ))
368+
local latency_pod_conn=$(( latency_pod_first_t - latency_pod_start_time ))
369+
local latency_pod_disconn=$(( latency_pod_end_time - latency_pod_last_t ))
358370
latency_pod_array+=($latency_response_microseconds)
359-
echo "$latency_pod_start_time $reqs $latency_round $pod_ip $latency_response_microseconds $latency_failed $(echo $latency_pod_response | wc -c)" >> $latency_raw_output
371+
echo "$latency_pod_start_time $latency_pod_first_t $reqs $latency_round $pod_ip_ord $pod_ip $latency_response_microseconds $latency_pod_conn $latency_pod_local_io $latency_pod_disconn $(echo $latency_pod_start_response_end | wc -c)" >> $latency_raw_output
360372
done
361373
done
362374
IFS=$'\n'

0 commit comments

Comments
 (0)