Skip to content

Commit 6eee1ff

Browse files
committed
Use persistent streams to allow truly async gen operators
1 parent fd326ce commit 6eee1ff

2 files changed

Lines changed: 32 additions & 13 deletions

File tree

backends/hip-gen/ceed-hip-gen-operator.c

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,20 @@
2222
static int CeedOperatorDestroy_Hip_gen(CeedOperator op) {
2323
Ceed ceed;
2424
CeedOperator_Hip_gen *impl;
25+
bool is_composite;
2526

2627
CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
2728
CeedCallBackend(CeedOperatorGetData(op, &impl));
29+
CeedCallBackend(CeedOperatorIsComposite(op, &is_composite));
30+
if (is_composite) {
31+
CeedInt num_suboperators;
32+
33+
CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
34+
for (CeedInt i = 0; i < num_suboperators; i++) {
35+
if (impl->streams[i]) CeedCallHip(ceed, hipStreamDestroy(impl->streams[i]));
36+
impl->streams[i] = NULL;
37+
}
38+
}
2839
if (impl->module) CeedCallHip(ceed, hipModuleUnload(impl->module));
2940
if (impl->points.num_per_elem) CeedCallHip(ceed, hipFree((void **)impl->points.num_per_elem));
3041
CeedCallBackend(CeedFree(&impl));
@@ -239,28 +250,35 @@ static int CeedOperatorApplyAdd_Hip_gen(CeedOperator op, CeedVector input_vec, C
239250
}
240251

241252
static int CeedOperatorApplyAddComposite_Hip_gen(CeedOperator op, CeedVector input_vec, CeedVector output_vec, CeedRequest *request) {
242-
bool is_run_good[CEED_COMPOSITE_MAX] = {false};
243-
CeedInt num_suboperators;
244-
const CeedScalar *input_arr = NULL;
245-
CeedScalar *output_arr = NULL;
246-
Ceed ceed;
247-
CeedOperator *sub_operators;
253+
bool is_run_good[CEED_COMPOSITE_MAX] = {true};
254+
CeedInt num_suboperators;
255+
const CeedScalar *input_arr = NULL;
256+
CeedScalar *output_arr;
257+
Ceed ceed;
258+
CeedOperator_Hip_gen *impl;
259+
CeedOperator *sub_operators;
248260

249261
CeedCallBackend(CeedOperatorGetCeed(op, &ceed));
250-
CeedCall(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
251-
CeedCall(CeedCompositeOperatorGetSubList(op, &sub_operators));
262+
CeedCallBackend(CeedOperatorGetData(op, &impl));
263+
CeedCallBackend(CeedCompositeOperatorGetNumSub(op, &num_suboperators));
264+
CeedCallBackend(CeedCompositeOperatorGetSubList(op, &sub_operators));
252265
if (input_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArrayRead(input_vec, CEED_MEM_DEVICE, &input_arr));
253266
if (output_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorGetArray(output_vec, CEED_MEM_DEVICE, &output_arr));
254267
for (CeedInt i = 0; i < num_suboperators; i++) {
255268
CeedInt num_elem = 0;
256269

257-
CeedCall(CeedOperatorGetNumElements(sub_operators[i], &num_elem));
270+
CeedCallBackend(CeedOperatorGetNumElements(sub_operators[i], &num_elem));
258271
if (num_elem > 0) {
259-
hipStream_t stream = NULL;
272+
if (!impl->streams[i]) CeedCallHip(ceed, hipStreamCreate(&impl->streams[i]));
273+
CeedCallBackend(CeedOperatorApplyAddCore_Hip_gen(sub_operators[i], impl->streams[i], input_arr, output_arr, &is_run_good[i], request));
274+
} else {
275+
is_run_good[i] = true;
276+
}
277+
}
260278

261-
CeedCallHip(ceed, hipStreamCreate(&stream));
262-
CeedCallBackend(CeedOperatorApplyAddCore_Hip_gen(sub_operators[i], stream, input_arr, output_arr, &is_run_good[i], request));
263-
CeedCallHip(ceed, hipStreamDestroy(stream));
279+
for (CeedInt i = 0; i < num_suboperators; i++) {
280+
if (impl->streams[i]) {
281+
if (is_run_good[i]) CeedCallHip(ceed, hipStreamSynchronize(impl->streams[i]));
264282
}
265283
}
266284
if (input_vec != CEED_VECTOR_NONE) CeedCallBackend(CeedVectorRestoreArrayRead(input_vec, &input_arr));

backends/hip-gen/ceed-hip-gen.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ typedef struct {
1717
CeedInt Q, Q_1d;
1818
CeedInt max_P_1d;
1919
CeedInt thread_1d;
20+
hipStream_t streams[CEED_COMPOSITE_MAX];
2021
hipModule_t module;
2122
hipFunction_t op;
2223
FieldsInt_Hip indices;

0 commit comments

Comments
 (0)