Skip to content

Commit 15f8a65

Browse files
committed
agent: fix Recalc strategy with req. rate
1 parent 41419bb commit 15f8a65

4 files changed

Lines changed: 85 additions & 121 deletions

File tree

dfaasagent/agent/loadbalancer/dbglogging.go

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -206,31 +206,6 @@ func debugPromRAMusagePerFunction(timeSpan time.Duration, data map[string]float6
206206
logger.Debug(b.String())
207207
}
208208

209-
func debugHAProxyUserRates(data map[string]float64) {
210-
if !logging.GetDebugMode() {
211-
return
212-
}
213-
214-
keys := make([]string, 0, len(data))
215-
for k := range data {
216-
keys = append(keys, k)
217-
}
218-
219-
sort.Strings(keys)
220-
221-
var b strings.Builder
222-
b.WriteString("Invocation rates of requests from users only (calculated from HAProxy stick-table):")
223-
if len(keys) == 0 {
224-
b.WriteString("empty")
225-
} else {
226-
for _, funcName := range keys {
227-
b.WriteString("\n")
228-
b.WriteString(fmt.Sprintf(" - FUNC %s: %.2f req/s\n", funcName, data[funcName]))
229-
}
230-
}
231-
logging.Logger().Debug(b.String())
232-
}
233-
234209
func debugFuncs(data map[string]uint) {
235210
if !logging.GetDebugMode() {
236211
return
@@ -248,7 +223,7 @@ func debugFuncs(data map[string]uint) {
248223
if len(keys) > 0 {
249224
b.WriteString(" (limit req/s) ")
250225
for _, funcName := range keys {
251-
b.WriteString(fmt.Sprintf("%q (%s) ", funcName, data[funcName]))
226+
b.WriteString(fmt.Sprintf("%q (%d) ", funcName, data[funcName]))
252227
}
253228
}
254229
logging.Logger().Debug(b.String())
@@ -363,7 +338,7 @@ func debugStickTable(stName string, stContent map[string]*hasock.STEntry) {
363338
b.WriteString("\n")
364339
for _, key := range clients {
365340
stEntry := stContent[key]
366-
b.WriteString(fmt.Sprintf(" - key=%s: cnt=%d rate=%d\n", key, stEntry.HTTPReqCnt, stEntry.HTTPReqRate))
341+
b.WriteString(fmt.Sprintf(" - key=%s cnt=%d rate=%d\n", key, stEntry.HTTPReqCnt, stEntry.HTTPReqRate))
367342
}
368343
}
369344
logging.Logger().Debug(b.String())

dfaasagent/agent/loadbalancer/haproxycfgrecalc.tmpl

Lines changed: 68 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -21,44 +21,34 @@ defaults
2121
timeout connect 60s
2222
timeout server 60s
2323

24-
{{/* Warning: The order of plain comments ("#") in the HAProxy config is not
25-
preserved. From this point on, the configuration is managed by the DFaaS agent.
26-
*/}}
27-
28-
{{/* #################### STICK-TABLES #################### */ -}}
29-
3024
{{range $funcName, $func := .Functions -}}
31-
{{/* Stick-Table for specific function invocations:
32-
- not denied
33-
- from users only
34-
- only one row, for all clients
35-
The size is set to 10, but setting it to 1 should be already enough. */ -}}
25+
# Stick table for invocations of function {{$funcName}} from users, not from
26+
# other DFaaS nodes. We just use one row for all clients. Denied requests are
27+
# not counted here.
28+
#
29+
# Among all stick tables, this is the only one used by the DFaaS agent to
30+
# calculate forwarding weights.
3631
backend st_users_func_{{$funcName}}
37-
stick-table type integer size 10 expire {{$.StrRecalc}} store http_req_cnt,http_req_rate(1s)
38-
{{/* Stick-Table for specific function invocations:
39-
- not denied
40-
- to local OpenFaaS instance only
41-
- only one row, for all clients
42-
The size is set to 10, but setting it t!bgo 1 should be already enough. */ -}}
32+
stick-table type integer size 10 expire {{$.SecsRecalc}}s store http_req_cnt,http_req_rate(1s)
33+
34+
# Stick table for invocations of functions {{$funcName}} from all sources (users
35+
# and other DFaaS nodes). Same structure as st_users_func{{$funcName}}.
4336
backend st_local_func_{{$funcName}}
44-
stick-table type integer size 10 expire {{$.StrRecalc}} store http_req_cnt,http_req_rate(1s)
45-
46-
{{/* [NEW] Stick-Table for counting request from others agents:
47-
- One table for each tuple (node, function).
48-
- Key is destination port (80).
49-
- Counting request num and rate.
50-
- RecalTime for expiring why LimitIn are updated every "RecalPeriod" time.
51-
- Count number of requests and check rate for periods of 1 sec.
52-
*/ -}}
37+
stick-table type integer size 10 expire {{$.SecsRecalc}}s store http_req_cnt,http_req_rate(1s)
38+
5339
{{range $nodeID, $_ := $func.LimitsIn -}}
40+
# Stick table for invocation of function {{$funcName}} from the DFaaS node with
41+
# id {{$nodeID}}.
42+
#
43+
# We use destination port (80) as key and we count requests num and rate.
44+
# RecalTime for expiring why LimitIn are updated every "RecalPeriod" time. Count
45+
# number of requests and check rate for periods of 1 sec.
5446
backend st_other_node_{{$funcName}}_{{$nodeID}}
55-
stick-table type integer size 10 expire {{$.StrRecalc}} store http_req_cnt,http_req_rate(1s)
47+
stick-table type integer size 10 expire {{$.SecsRecalc}}s store http_req_cnt,http_req_rate(1s)
5648
{{end}}
5749

5850
{{end}}
5951

60-
{{/* #################### FRONTEND #################### */ -}}
61-
6252
frontend fe_main
6353
bind *:80
6454

@@ -179,100 +169,93 @@ frontend fe_main
179169

180170
{{end}}
181171

172+
# Note: for all backend we enable HTTP health checks and HTTP connection closing
173+
# on the server side. This provides the lowest latency on the client side (slow
174+
# network) and the fastest session reuse on the server side to save server
175+
# resources.
176+
182177
# Backend for simple 200 OK responses.
183178
backend be_ok
184179
http-request return status 200 content-type "text/plain" string "This is a DFaaS node. Call a function with /function/<funcname>\n"
185180

186181
# Backend for health check.
187182
backend be_healthz
188-
# Perform HTTP health checks.
189183
option httpchk GET /healthz
190-
191-
# Enable HTTP connection closing on the server side. This provides the
192-
# lowest latency on the client side (slow network) and the fastest session
193-
# reuse on the server side to save server resources.
194184
option http-server-close
195-
196185
server healthz dfaas-agent:80 check
197186

198-
{{/* #################### BACKEND FOR MANAGING OPENFAAS FUNCTIONS #################### */ -}}
199-
187+
# Backend for OpenFaaS Administrative API (/system/functions)
200188
backend be_system_funcs
201-
{{/* Perform HTTP health checks */ -}}
202189
option httpchk GET /
203-
{{/* Enable HTTP connection closing on the server side. This provides the lowest latency
204-
on the client side (slow network) and the fastest session reuse on the server side
205-
to save server resources */ -}}
206190
option http-server-close
207191
server system_funcs {{.OpenFaaSHost}}:{{.OpenFaaSPort}} check
208192

209-
{{/* #################### BACKEND FOR SELF OPENFAAS INSTANCE #################### */ -}}
210-
193+
# Backend for the local OpenFaaS instance. Requests (functions) arriving here
194+
# will be handled locally.
195+
#
196+
# Requests may originate directly from a client or from another DFaaS node. We
197+
# differentiate these cases using the DFaaS-Node-ID header.
198+
#
199+
# When the OpenFaaS instance responds, we add an X-Server header containing the
200+
# node's IP address and a DFaaS-Node-ID header with the agent's ID (libp2p ID).
201+
# This allows clients to identify which DFaaS node processed the request.
211202
backend be_myself
212-
{{/* Perform HTTP health checks (with the OPTIONS method by default) */ -}}
213203
option httpchk GET /
214-
{{/* Enable insertion of the X-Forwarded-For header to requests sent to servers */ -}}
215-
option forwardfor
216-
{{/* Enable HTTP connection closing on the server side. This provides the lowest latency
217-
on the client side (slow network) and the fastest session reuse on the server side
218-
to save server resources */ -}}
219204
option http-server-close
220205

221-
{{/* Add DFaaS-Node-ID header (in any case) */ -}}
206+
# Automatically add the X-Forwarded-For header to let know the local
207+
# OpenFaaS instance of the original client's IP address.
208+
option forwardfor
209+
210+
# Always include the DFaaS-Node-ID header containing the node's ID.
222211
http-request add-header DFaaS-Node-ID {{.MyNodeID}}
223212

224-
{{/* [NEW] Replicated ACLs for visibility. */}}
225-
acl has_nodeid_hdr var(req.hdrcnt_nodeid),add(0) gt 0 {{- /* The ",add(0)" is needed here, for some reason (maybe haproxy bug? like int/str conversion or something... if you remove it there will be problems!!!) */}}
213+
# Replicate here some ACLs from the frontend. They will be used to track
214+
# statistics.
215+
acl has_nodeid_hdr var(req.hdrcnt_nodeid),add(0) gt 0
226216
{{range $nodeID, $_ := $.Nodes}}
227217
acl is_node_{{$nodeID}} req.hdr(DFaaS-Node-ID) -m str {{$nodeID}}
228218
{{end}}
229219

220+
# For each function, record statistics in a dedicated stick table named
221+
# st_local_func_<funcname>. Additionally, at the neighbor node level, track
222+
# each function/node pair in a dedicated stick table named
223+
# st_other_node_<funcname>_<nodeid>.
230224
{{range $funcName, $func := .Functions -}}
231225
acl is_func_{{$funcName}} path_beg /function/{{$funcName}}
232-
{{/* Track all clients (this works because dst_port is 80 for every possible request) */ -}}
233-
http-request track-sc2 dst_port table st_local_func_{{$funcName}} if is_func_{{$funcName}} {{- /* Using Sticky-Counter #2 */}}
234-
235-
{{/* [NEW] ########### TRACKING WITH STICK TABLES FWD MESSAGED ########## */ -}}
236-
{{/* How it works?
237-
- If a message has a header "DFaaSNode..." it comes from another DFaaS node, indeed the message
238-
has been forwarded.
239-
- If a message has been forwarded from another node, increment row in a specific stick table.
240-
If the above condition are satisfied increment specific table.
241-
*/ -}}
226+
http-request track-sc2 dst_port table st_local_func_{{$funcName}} if is_func_{{$funcName}}
227+
242228
{{range $nodeID, $_ := $func.LimitsIn -}}
243-
http-request track-sc0 dst_port table st_other_node_{{$funcName}}_{{$nodeID}} if is_func_{{$funcName}} has_nodeid_hdr is_node_{{$nodeID}} {{/*is_hdr_nodeID_known*/ -}} {{- /* Using Sticky-Counter #0 */}}
229+
http-request track-sc0 dst_port table st_other_node_{{$funcName}}_{{$nodeID}} if is_func_{{$funcName}} has_nodeid_hdr is_node_{{$nodeID}}
244230
{{end}}
245231

246232
{{end}}
247233

248-
# Add X-Server (IP:port) and DFaaS-Node-ID (libp2p's ID) headers to response
249-
# to let clients know which DFaaS server served the request.
250234
http-response set-header X-Server %s
251235
http-response set-header DFaaS-Node-ID {{$.MyNodeID}}
252236

253237
server {{$.NodeIP}} {{.OpenFaaSHost}}:{{.OpenFaaSPort}} check
254238

255-
{{/* #################### BACKEND FOR OTHER NODES' HAPROXIES #################### */ -}}
256-
257239
{{range $funcName, $func := .Functions -}}
240+
# Backend responsible for forwarding incoming user requests to other DFaaS
241+
# nodes. This backend is specific to the {{$funcName}} function.
242+
#
243+
# Forwarding follows a round-robin method with custom weights.
244+
#
245+
# As with the be_myself backend, the reply will have X-Server with the DFaaS
246+
# agent IP address that served the requests and its node ID in DFaaS-Node-ID
247+
# header.
248+
#
249+
# Neighbor nodes with a weight of 0 will be excluded.
258250
backend be_others_func_{{$funcName}}
259-
{{/* Enable load-balancing using custom weights */ -}}
260-
balance roundrobin
261-
262-
{{/* Perform HTTP health checks (with the OPTIONS method by default) */ -}}
263251
option httpchk GET /
264-
{{/* Enable insertion of the X-Forwarded-For header to requests sent to servers */ -}}
265252
option forwardfor
266-
{{/* Enable HTTP connection closing on the server side. This provides the lowest latency
267-
on the client side (slow network) and the fastest session reuse on the server side
268-
to save server resources */ -}}
269253
option http-server-close
270254

271-
{{/* Add DFaaS-Node-ID header (in any case) */ -}}
272-
http-request add-header DFaaS-Node-ID {{$.MyNodeID}}
255+
balance roundrobin
273256

274-
{{/* Add X-Server header to response to know which server served the request */ -}}
275257
http-response set-header X-Server %s
258+
http-request add-header DFaaS-Node-ID {{$.MyNodeID}}
276259

277260
{{range $nodeID, $weight := $func.Weights -}}
278261
{{if (gt $weight 0) -}}
@@ -281,11 +264,14 @@ backend be_others_func_{{$funcName}}
281264
{{end}}
282265
{{end}}
283266

284-
{{/* [NEW] #################### BACKEND FOR DENY EXCEEDING LIMIT IN REQUESTS #################### */ -}}
285-
{{/* Note: It could be divided in alla specific be for nodes and functions and returns a specific error message */ -}}
286-
{{/* 503 (Service Unavailable) status code can be used.
287-
At the moment 429 has been used for test purpose. */}}
288267
# Backend for deny exceeding limit in requests.
268+
#
269+
# As with the be_myself backend, the reply will have X-Server with the local
270+
# DFaaS node IP and its node IS in DFaaS-Node-ID.
271+
#
272+
# TODO: It could be divided in alla specific be for nodes and functions and
273+
# returns a specific error message.
289274
backend be_limitInExceeds
275+
http-response set-header X-Server {{$.NodeIP}}
290276
http-response set-header DFaaS-Node-ID {{$.MyNodeID}}
291277
http-request deny deny_status 429

dfaasagent/agent/loadbalancer/recalcstrategy.go

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -164,10 +164,7 @@ func (strategy *RecalcStrategy) recalcStep1() error {
164164
// Purpose: Track requests forwarded from other DFaaS nodes for each
165165
// function, per node.
166166
//
167-
// These stick tables are used for rate limiting, forwarding logic, LimitIn
168-
// enforcement and tracking.
169-
//
170-
// Only st_users_func_<funcName> data are used to calculate weights!
167+
// Warning: only st_users_func_<funcName> data are used to calculate weights!
171168

172169
// Get stats from HAProxy stick tables (st_users_func_<funcName>).
173170
strategy.userRates = map[string]float64{}
@@ -182,17 +179,23 @@ func (strategy *RecalcStrategy) recalcStep1() error {
182179
}
183180

184181
for _, stEntry := range stContent {
185-
// There should be only one line, with key "80", which is the port
186-
// of the HAProxy frontend
182+
// This stick table contains a single key, "80", which tracks both
183+
// the number of requests within the given time window and the rate
184+
// per second during the recalculation period. We only use the rate.
185+
//
186+
// Note: Do not use http_req_cnt, as HAProxy restarts every
187+
// recalculation period, causing all counters to reset.
187188
//
188-
// Note: the whole formula is multiplied by two at the end because
189-
// we know we restarted HAProxy at the end of recalcStep2
190-
strategy.userRates[funcName] = float64(stEntry.HTTPReqCnt) / float64(_config.RecalcPeriod/time.Second) * 2
189+
// FIXME: The http_req_rate value is taken from the previous
190+
// 1-second period, not averaged over the entire recalculation
191+
// period. This is a known limitation of the current strategy.
192+
strategy.userRates[funcName] = float64(stEntry.HTTPReqRate)
191193
}
192194

193195
debugStickTable(stName, stContent)
194196
}
195-
debugHAProxyUserRates(strategy.userRates)
197+
198+
/* Just for debugging purpores, not really used to calculate weights.
196199
197200
// Get stats from HAProxy stick tables (st_local_func_<funcName>).
198201
for funcName := range strategy.funcs {
@@ -222,6 +225,8 @@ func (strategy *RecalcStrategy) recalcStep1() error {
222225
}
223226
}
224227
228+
*/
229+
225230
// Set overload/underload state for each function.
226231
strategy.overloads = map[string]bool{}
227232
for funcName, maxRate := range strategy.funcs {

dfaasagent/agent/nodestbl/nodestblrecalc.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,6 @@ type TableRecalc struct {
5454

5555
// InitTable initializes a TableRecalc's fields if they are empty
5656
func (tbl *TableRecalc) initTable() {
57-
logger := logging.Logger()
58-
5957
if tbl.entries == nil {
6058
tbl.entries = map[string]*EntryRecalc{}
6159
}

0 commit comments

Comments
 (0)