99#include " ggml.h"
1010
1111#include < atomic>
12+ #include < cstdlib>
1213#include < cstdint>
1314#include < cstring>
1415#include < memory>
@@ -593,36 +594,6 @@ bool ggml_backend_buft_is_openvino_host(ggml_backend_buffer_type_t buft) {
593594 return buft->iface .get_name == ggml_backend_openvino_host_buffer_type_get_name;
594595}
595596
596- // =====================================================
597- // OpenVINO Backend Context and Interface
598- // =====================================================
599-
600- struct ggml_backend_openvino_context {
601- int device; // the device ID currently in use
602- std::string name; // context Name
603- std::string description; // context description
604-
605- // OpenVINO core components
606- ov::Core core; // OpenVINO core interface
607- std::shared_ptr<ov::CompiledModel> model; // compiled Model
608- ov::InferRequest infer_request; // inference Request
609-
610- // OpenVINO Multi-stream support
611- static const int MAX_STREAMS = 8 ; // define the maximum number of flows
612- std::vector<ov::InferRequest> streams; // used to support multi-stream reasoning
613- int current_stream; // the currently active stream index
614-
615- // state Management
616- bool is_initialized; // initialize
617-
618- ggml_backend_openvino_context () :
619- device (0 ),
620- name (" OpenVINO" ),
621- description (" OpenVINO Backend Context" ),
622- current_stream (0 ),
623- is_initialized (false ) {}
624- };
625-
626597static void ggml_backend_openvino_free (ggml_backend_t backend) {
627598 ggml_backend_openvino_context * ctx = (ggml_backend_openvino_context *) backend->context ;
628599 delete ctx;
@@ -635,7 +606,7 @@ static const char * ggml_backend_openvino_get_name(ggml_backend_t backend) {
635606}
636607
637608static enum ggml_status ggml_backend_openvino_graph_compute (ggml_backend_t backend, ggml_cgraph * cgraph) {
638- return ov_graph_compute (cgraph);
609+ return ov_graph_compute (cgraph, backend );
639610 GGML_UNUSED (backend);
640611}
641612
@@ -657,7 +628,7 @@ static const ggml_backend_i ggml_backend_openvino_interface = {
657628};
658629
659630int ggml_backend_openvino_get_device_count () {
660- return ggml_openvino_info (). device_count ;
631+ return 1 ;
661632}
662633
663634static ggml_guid_t ggml_backend_openvino_guid (void ) {
@@ -679,6 +650,17 @@ GGML_BACKEND_API ggml_backend_t ggml_backend_openvino_init(int device) {
679650 return nullptr ;
680651 }
681652
653+ ctx->runtime_context = std::make_shared<ov_runtime_context>();
654+ if (ctx->runtime_context == nullptr ) {
655+ GGML_LOG_ERROR (" %s: failed to allocate runtime context\n " , __func__);
656+ delete ctx;
657+ return nullptr ;
658+ }
659+
660+ std::shared_ptr<ov_runtime_context> r_ctx = std::static_pointer_cast<ov_runtime_context>(ctx->runtime_context );
661+ r_ctx->device = ggml_openvino_get_device_name ();
662+ r_ctx->stateful = getenv (" GGML_OPENVINO_STATEFUL_EXECUTION" ) && !ggml_openvino_is_npu ();
663+
682664 ggml_backend_t openvino_backend = new ggml_backend{
683665 /* .guid = */ ggml_backend_openvino_guid (),
684666 /* .interface = */ ggml_backend_openvino_interface,
@@ -1059,7 +1041,7 @@ static const char * ggml_backend_openvino_reg_get_name(ggml_backend_reg_t reg) {
10591041
10601042static size_t ggml_backend_openvino_reg_get_device_count (ggml_backend_reg_t reg) {
10611043 GGML_UNUSED (reg);
1062- return ggml_openvino_info (). device_count ;
1044+ return ( size_t ) ggml_backend_openvino_get_device_count () ;
10631045}
10641046
10651047static ggml_backend_dev_t ggml_backend_openvino_reg_get_device (ggml_backend_reg_t reg, size_t index) {
@@ -1068,36 +1050,17 @@ static ggml_backend_dev_t ggml_backend_openvino_reg_get_device(ggml_backend_reg_
10681050 return ctx->devices [index];
10691051}
10701052
1071- static void * ggml_backend_openvino_get_proc_address (ggml_backend_reg_t reg, const char * name) {
1072- GGML_UNUSED (reg);
1073- GGML_UNUSED (name);
1074- return nullptr ;
1075- }
1076-
10771053static const struct ggml_backend_reg_i ggml_backend_openvino_reg_interface = {
10781054 /* .get_name = */ ggml_backend_openvino_reg_get_name,
10791055 /* .get_device_count = */ ggml_backend_openvino_reg_get_device_count,
10801056 /* .get_device = */ ggml_backend_openvino_reg_get_device,
1081- /* .get_proc_address = */ ggml_backend_openvino_get_proc_address ,
1057+ /* .get_proc_address = */ NULL ,
10821058};
10831059
1084- static int get_openvino_device_count () {
1085- return 1 ;
1086- }
1087-
1088- static ggml_openvino_device_info ggml_openvino_init () {
1060+ static void ggml_openvino_init () {
10891061 // Initialize device config singleton from env var
10901062 ggml_openvino_init_device_config ();
10911063 GGML_LOG_INFO (" OpenVINO: using device %s\n " , ggml_openvino_get_device_name ().c_str ());
1092-
1093- ggml_openvino_device_info info = {};
1094- info.device_count = get_openvino_device_count ();
1095- return info;
1096- }
1097-
1098- const ggml_openvino_device_info & ggml_openvino_info () {
1099- static ggml_openvino_device_info info = ggml_openvino_init ();
1100- return info;
11011064}
11021065
11031066GGML_BACKEND_API ggml_backend_reg_t ggml_backend_openvino_reg (void ) {
@@ -1108,9 +1071,11 @@ GGML_BACKEND_API ggml_backend_reg_t ggml_backend_openvino_reg(void) {
11081071 static std::mutex mutex;
11091072 std::lock_guard<std::mutex> lock (mutex);
11101073 if (!initialized) {
1074+ ggml_openvino_init ();
1075+
11111076 ggml_backend_openvino_reg_context * ctx = new ggml_backend_openvino_reg_context;
11121077
1113- for (int i = 0 ; i < ggml_openvino_info (). device_count ; i++) {
1078+ for (int i = 0 ; i < ggml_backend_openvino_get_device_count () ; i++) {
11141079 ggml_backend_openvino_device_context * dev_ctx = new ggml_backend_openvino_device_context;
11151080 dev_ctx->device = i;
11161081 dev_ctx->name = GGML_OPENVINO_NAME + std::to_string (i);
0 commit comments