Skip to content

Commit e70bd11

Browse files
rgerganovrsenthilkumar6
authored andcommitted
rpc : keep last_graph_uid in the device context (ggml-org#23273)
With the introduction of MTP we can have multiple compute contexts for the same RPC device. In this case last_graph_uid is not updated properly when contexts are being switched. This patch fixes this by moving last_graph_uid to the device context, making sure it is always updated. closes: ggml-org#23242
1 parent fc459f0 commit e70bd11

1 file changed

Lines changed: 17 additions & 17 deletions

File tree

ggml/src/ggml-rpc/ggml-rpc.cpp

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,14 @@ static ggml_guid_t ggml_backend_rpc_guid() {
199199
return &guid;
200200
}
201201

202+
struct ggml_backend_rpc_device_context {
203+
std::string endpoint;
204+
uint32_t device;
205+
std::string name;
206+
std::string description;
207+
uint64_t last_graph_uid;
208+
};
209+
202210
struct ggml_backend_rpc_buffer_type_context {
203211
std::string endpoint;
204212
uint32_t device;
@@ -211,7 +219,6 @@ struct ggml_backend_rpc_context {
211219
std::string endpoint;
212220
uint32_t device;
213221
std::string name;
214-
uint64_t last_graph_uid;
215222
};
216223

217224
struct ggml_backend_rpc_buffer_context {
@@ -691,17 +698,19 @@ static void serialize_graph(uint32_t device, const ggml_cgraph * cgraph, std::ve
691698

692699
static enum ggml_status ggml_backend_rpc_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) {
693700
ggml_backend_rpc_context * rpc_ctx = (ggml_backend_rpc_context *)backend->context;
701+
ggml_backend_dev_t rpc_dev = ggml_backend_get_device(backend);
702+
ggml_backend_rpc_device_context * rpc_dev_ctx = (ggml_backend_rpc_device_context *)rpc_dev->context;
694703

695704
GGML_ASSERT(cgraph->n_nodes > 0);
696-
bool reuse = cgraph->uid != 0 && rpc_ctx->last_graph_uid == cgraph->uid;
705+
bool reuse = cgraph->uid != 0 && rpc_dev_ctx->last_graph_uid == cgraph->uid;
697706
if (reuse) {
698707
rpc_msg_graph_recompute_req request;
699708
request.device = rpc_ctx->device;
700709
auto sock = get_socket(rpc_ctx->endpoint);
701710
bool status = send_rpc_cmd(sock, RPC_CMD_GRAPH_RECOMPUTE, &request, sizeof(request));
702711
RPC_STATUS_ASSERT(status);
703712
} else {
704-
rpc_ctx->last_graph_uid = cgraph->uid;
713+
rpc_dev_ctx->last_graph_uid = cgraph->uid;
705714
std::vector<uint8_t> input;
706715
serialize_graph(rpc_ctx->device, cgraph, input);
707716
auto sock = get_socket(rpc_ctx->endpoint);
@@ -770,7 +779,6 @@ ggml_backend_t ggml_backend_rpc_init(const char * endpoint, uint32_t device) {
770779
/* .endpoint = */ endpoint,
771780
/* .device = */ device,
772781
/* .name = */ dev_name,
773-
/* .last_graph_uid = */ 0,
774782
};
775783
auto reg = ggml_backend_rpc_add_server(endpoint);
776784
ggml_backend_t backend = new ggml_backend {
@@ -1757,15 +1765,6 @@ void ggml_backend_rpc_start_server(const char * endpoint, const char * cache_dir
17571765
}
17581766
}
17591767

1760-
// device interface
1761-
1762-
struct ggml_backend_rpc_device_context {
1763-
std::string endpoint;
1764-
uint32_t device;
1765-
std::string name;
1766-
std::string description;
1767-
};
1768-
17691768
static const char * ggml_backend_rpc_device_get_name(ggml_backend_dev_t dev) {
17701769
ggml_backend_rpc_device_context * ctx = (ggml_backend_rpc_device_context *)dev->context;
17711770

@@ -1947,10 +1946,11 @@ ggml_backend_reg_t ggml_backend_rpc_add_server(const char * endpoint) {
19471946
std::string dev_name = "RPC" + std::to_string(dev_id);
19481947
std::string dev_desc = std::string(endpoint);
19491948
ggml_backend_rpc_device_context * dev_ctx = new ggml_backend_rpc_device_context {
1950-
/* .endpoint = */ endpoint,
1951-
/* .device = */ ind,
1952-
/* .name = */ dev_name,
1953-
/* .description = */ dev_desc
1949+
/* .endpoint = */ endpoint,
1950+
/* .device = */ ind,
1951+
/* .name = */ dev_name,
1952+
/* .description = */ dev_desc,
1953+
/* .last_graph_uid = */ 0,
19541954
};
19551955

19561956
ggml_backend_dev_t dev = new ggml_backend_device {

0 commit comments

Comments
 (0)