forked from kubernetes-sigs/gateway-api-inference-extension
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplugin.go
More file actions
86 lines (70 loc) · 2.69 KB
/
plugin.go
File metadata and controls
86 lines (70 loc) · 2.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
package dynamo_cleanup
import (
"context"
"encoding/json"
log "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/plugins"
rc "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/requestcontrol"
schedtypes "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/types"
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
dynamo "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins/dynamo_kv_scorer"
)
const (
PluginName = "dynamo-cleanup"
PluginType = "dynamo-cleanup"
)
// DynamoCleanupPlugin is a PostResponse plugin that cleans up router state
// when a request completes. It calls dynamo_router_free_request to release
// the bookkeeping resources associated with the request.
type DynamoCleanupPlugin struct {
typedName plugins.TypedName
}
var _ plugins.Plugin = (*DynamoCleanupPlugin)(nil)
var _ rc.PostResponse = (*DynamoCleanupPlugin)(nil)
// NewDynamoCleanupPlugin creates a new DynamoCleanupPlugin instance.
func NewDynamoCleanupPlugin() *DynamoCleanupPlugin {
return &DynamoCleanupPlugin{
typedName: plugins.TypedName{Type: PluginType, Name: PluginName},
}
}
// WithName sets a custom name for the plugin.
func (p *DynamoCleanupPlugin) WithName(name string) *DynamoCleanupPlugin {
p.typedName.Name = name
return p
}
// DynamoCleanupPluginFactory creates a DynamoCleanupPlugin from configuration.
func DynamoCleanupPluginFactory(name string, _ json.RawMessage, _ plugins.Handle) (plugins.Plugin, error) {
return NewDynamoCleanupPlugin().WithName(name), nil
}
// TypedName returns the plugin's type and name.
func (p *DynamoCleanupPlugin) TypedName() plugins.TypedName {
return p.typedName
}
// PostResponse is called after a response is received from the model server.
// It cleans up the router bookkeeping state for the completed request.
func (p *DynamoCleanupPlugin) PostResponse(
ctx context.Context,
request *schedtypes.LLMRequest,
response *rc.Response,
targetPod *backend.Pod,
) {
logger := log.FromContext(ctx)
if request == nil {
logger.V(logutil.DEBUG).Info("DynamoCleanupPlugin: request is nil, skipping cleanup")
return
}
requestID := request.RequestId
if requestID == "" {
logger.V(logutil.DEBUG).Info("DynamoCleanupPlugin: no request ID, skipping cleanup")
return
}
// Call the dynamo router to free the request bookkeeping
if err := dynamo.CallFreeRequest(requestID); err != nil {
logger.V(logutil.DEFAULT).Error(err, "DynamoCleanupPlugin: failed to free request",
"requestID", requestID)
return
}
logger.V(logutil.VERBOSE).Info("DynamoCleanupPlugin: freed request from router",
"requestID", requestID)
}