File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -603,6 +603,15 @@ bool ggml_backend_buft_is_openvino_host(ggml_backend_buffer_type_t buft) {
603603
604604static void ggml_backend_openvino_free (ggml_backend_t backend) {
605605 ggml_backend_openvino_context * ctx = (ggml_backend_openvino_context *) backend->context ;
606+
607+ if (ctx->runtime_context ) {
608+ auto r_ctx = std::static_pointer_cast<ov_runtime_context>(ctx->runtime_context );
609+ r_ctx->backend_count --;
610+ if (r_ctx->backend_count == 0 ) {
611+ r_ctx->clear_caches ();
612+ }
613+ }
614+
606615 delete ctx;
607616 delete backend;
608617}
@@ -672,6 +681,7 @@ GGML_BACKEND_API ggml_backend_t ggml_backend_openvino_init(int device) {
672681 std::shared_ptr<ov_runtime_context> r_ctx = std::static_pointer_cast<ov_runtime_context>(ctx->runtime_context );
673682 r_ctx->device = ggml_openvino_get_device_name ();
674683 r_ctx->stateful = is_stateful_enabled () && !ggml_openvino_is_npu ();
684+ r_ctx->backend_count ++;
675685
676686 ggml_backend_t openvino_backend = new ggml_backend{
677687 /* .guid = */ ggml_backend_openvino_guid (),
Original file line number Diff line number Diff line change @@ -59,11 +59,21 @@ struct ov_runtime_context {
5959 // Simultanous stateful inference request support to be added.
6060 size_t stateful_kv_size;
6161 std::map<std::string, std::string> kv_state_input_name_map;
62+ int backend_count;
6263
6364 ov_runtime_context () :
6465 device (" CPU" ),
6566 stateful (false ),
66- stateful_kv_size (0 ) {}
67+ stateful_kv_size (0 ),
68+ backend_count (0 ) {}
69+
70+ void clear_caches () {
71+ decoder_cache.clear ();
72+ infer_request_cache.clear ();
73+ infer_request_cache_prefill.clear ();
74+ ov_input_names_cache.clear ();
75+ ov_output_names_cache.clear ();
76+ }
6777};
6878
6979enum ggml_status ov_graph_compute (struct ggml_cgraph * cgraph, ggml_backend_t backend);
You can’t perform that action at this time.
0 commit comments