Skip to content

Commit 5c5ab1a

Browse files
committed
Improved cold start probability estimation
1 parent 3982a80 commit 5c5ab1a

3 files changed

Lines changed: 28 additions & 26 deletions

File tree

internal/metrics/metrics.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ var (
6161
)
6262

6363
type RetrievedMetrics struct {
64+
EdgeColdStartProbability map[string]map[string]float64
6465
RemoteColdStartProbability map[string]float64
6566
AvgRemoteExecutionTime map[string]float64
6667
AvgEdgeExecutionTime map[string]map[string]float64
@@ -72,6 +73,8 @@ type RetrievedMetrics struct {
7273

7374
func (r RetrievedMetrics) String() string {
7475
s := ""
76+
s += "EDGE COLD START PROB:\n"
77+
s += fmt.Sprintf(" %v\n\n", r.EdgeColdStartProbability)
7578
s += "REMOTE COLD START PROB:\n"
7679
s += fmt.Sprintf(" %v\n\n", r.RemoteColdStartProbability)
7780
s += "REMOTE EXEC TIMES:\n"

internal/metrics/retriever.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,14 @@ func MetricsRetriever() {
203203
}
204204
retrievedMetrics.AvgEdgeInitTime = avgInitTimeAllNodes
205205

206+
query = fmt.Sprintf("%s{node=~\"\\\\(%s\\\\).*\"}/%s{node=~\"\\\\(%s\\\\).*\"}",
207+
COLD_STARTS, localArea, COMPLETIONS, localArea)
208+
coldStartProbPerFunction, err := retrieveByFunctionAndNode(query, api, ctx)
209+
if err != nil {
210+
log.Printf("Error in retrieveByFunction: %v", err)
211+
}
212+
retrievedMetrics.EdgeColdStartProbability = coldStartProbPerFunction
213+
206214
// CLOUD
207215
cloudArea := config.GetString(config.REGISTRY_REMOTE_AREA, "")
208216
if cloudArea != "" {

internal/workflow/remote_offloading_policy.go

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -285,8 +285,6 @@ func prepareParameters(r *Request, p *Progress) *remotePolicyParams {
285285
params.DSBandwidth[CLOUD] = config.GetFloat(config.WORKFLOW_OFFLOADING_POLICY_CLOUD_TO_DATA_STORE_BANDWIDTH, dsBandwidth*10)
286286
}
287287

288-
localWarmStatus := node.WarmStatus()
289-
290288
// Execution Times
291289
retrievedMetrics := metrics.GetMetrics()
292290
for tid, task := range r.W.Tasks {
@@ -312,38 +310,32 @@ func prepareParameters(r *Request, p *Progress) *remotePolicyParams {
312310
params.ExecTime[tupleKey(string(tid), n)] = 0.01 // no data: just guessing
313311
}
314312
}
313+
// Init Time
314+
coldStartProb := 1.0
315+
nodeProbs, found := retrievedMetrics.EdgeColdStartProbability[nId.String()]
316+
if !found {
317+
coldStartProb = 1.0
318+
} else {
319+
coldStartProb, found = nodeProbs[f.Name]
320+
if !found || math.IsNaN(coldStartProb) || math.IsInf(coldStartProb, 1) {
321+
coldStartProb = 1.0
322+
}
323+
}
315324

316325
// Init Times
317326
initTimes, found := retrievedMetrics.AvgEdgeInitTime[nId.String()]
318327
if !found {
319328
// Unknown node
320-
params.InitTime[tupleKey(string(tid), n)] = 0.01 // no data: just guessing
329+
params.InitTime[tupleKey(string(tid), n)] = 0.01 * coldStartProb // no data: just guessing
321330
continue
322331
}
323-
324-
coldStart := false
325-
if n == LOCAL {
326-
warmCount, ok := localWarmStatus[f.Name]
327-
if !ok || warmCount < 1 {
328-
coldStart = true
329-
}
332+
t, found := initTimes[f.Name]
333+
if found {
334+
params.InitTime[tupleKey(string(tid), n)] = t * coldStartProb
330335
} else {
331-
warmCount, ok := nearbyServers[n].AvailableWarmContainers[f.Name]
332-
if !ok || warmCount < 1 {
333-
coldStart = true
334-
}
336+
params.InitTime[tupleKey(string(tid), n)] = 0.01 * coldStartProb // no data about init time: just guessing
335337
}
336338

337-
if !coldStart {
338-
params.InitTime[tupleKey(string(tid), n)] = 0
339-
} else {
340-
t, found := initTimes[f.Name]
341-
if found {
342-
params.InitTime[tupleKey(string(tid), n)] = t
343-
} else {
344-
params.InitTime[tupleKey(string(tid), n)] = 0.01 // no data: just guessing
345-
}
346-
}
347339
}
348340

349341
if len(params.CloudNodes) > 0 {
@@ -358,9 +350,9 @@ func prepareParameters(r *Request, p *Progress) *remotePolicyParams {
358350
// Init Time
359351
coldStartProb := retrievedMetrics.RemoteColdStartProbability[f.Name]
360352
if math.IsNaN(coldStartProb) || math.IsInf(coldStartProb, 1) {
353+
log.Printf("Cloud cold start probability is invalid for %s, setting 1", f.Name)
361354
coldStartProb = 1.0
362355
} else if coldStartProb < 0.0 {
363-
log.Printf("Cold start probability is negative: %f", coldStartProb)
364356
coldStartProb = 0.0
365357
}
366358
t, found = retrievedMetrics.AvgRemoteInitTime[f.Name]
@@ -476,6 +468,5 @@ func computeDecisionFromPlacement(placement taskPlacement, p *Progress, r *Reque
476468
}
477469

478470
decision := OffloadingDecision{true, remoteNodeReg.APIUrl(), plan}
479-
log.Printf("Decision: %v\n", decision)
480471
return decision
481472
}

0 commit comments

Comments
 (0)