Skip to content

Commit f11df57

Browse files
committed
fix(openvino): explicit ov::Tensor frees in ggml_backend_openvino_free
1 parent 5957f77 commit f11df57

2 files changed

Lines changed: 21 additions & 1 deletion

File tree

ggml/src/ggml-openvino/ggml-openvino.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,15 @@ bool ggml_backend_buft_is_openvino_host(ggml_backend_buffer_type_t buft) {
603603

604604
static void ggml_backend_openvino_free(ggml_backend_t backend) {
605605
ggml_backend_openvino_context * ctx = (ggml_backend_openvino_context *) backend->context;
606+
607+
if (ctx->runtime_context) {
608+
auto r_ctx = std::static_pointer_cast<ov_runtime_context>(ctx->runtime_context);
609+
r_ctx->backend_count--;
610+
if (r_ctx->backend_count == 0) {
611+
r_ctx->clear_caches();
612+
}
613+
}
614+
606615
delete ctx;
607616
delete backend;
608617
}
@@ -672,6 +681,7 @@ GGML_BACKEND_API ggml_backend_t ggml_backend_openvino_init(int device) {
672681
std::shared_ptr<ov_runtime_context> r_ctx = std::static_pointer_cast<ov_runtime_context>(ctx->runtime_context);
673682
r_ctx->device = ggml_openvino_get_device_name();
674683
r_ctx->stateful = is_stateful_enabled() && !ggml_openvino_is_npu();
684+
r_ctx->backend_count++;
675685

676686
ggml_backend_t openvino_backend = new ggml_backend{
677687
/* .guid = */ ggml_backend_openvino_guid(),

ggml/src/ggml-openvino/utils.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,21 @@ struct ov_runtime_context {
5959
// Simultanous stateful inference request support to be added.
6060
size_t stateful_kv_size;
6161
std::map<std::string, std::string> kv_state_input_name_map;
62+
int backend_count;
6263

6364
ov_runtime_context() :
6465
device("CPU"),
6566
stateful(false),
66-
stateful_kv_size(0) {}
67+
stateful_kv_size(0),
68+
backend_count(0) {}
69+
70+
void clear_caches() {
71+
decoder_cache.clear();
72+
infer_request_cache.clear();
73+
infer_request_cache_prefill.clear();
74+
ov_input_names_cache.clear();
75+
ov_output_names_cache.clear();
76+
}
6777
};
6878

6979
enum ggml_status ov_graph_compute(struct ggml_cgraph * cgraph, ggml_backend_t backend);

0 commit comments

Comments
 (0)