diff --git a/backends/cuda-ref/ceed-cuda-ref-operator.c b/backends/cuda-ref/ceed-cuda-ref-operator.c index 808816986f..025d5b6501 100644 --- a/backends/cuda-ref/ceed-cuda-ref-operator.c +++ b/backends/cuda-ref/ceed-cuda-ref-operator.c @@ -750,7 +750,7 @@ static int CeedOperatorSetupAtPoints_Cuda(CeedOperator op) { //------------------------------------------------------------------------------ static inline int CeedOperatorInputBasisAtPoints_Cuda(CeedOperatorField op_input_field, CeedQFunctionField qf_input_field, CeedInt input_field, CeedVector in_vec, CeedVector active_e_vec, CeedInt num_elem, const CeedInt *num_points, - const bool skip_active, CeedOperator_Cuda *impl) { + const bool skip_active, const bool skip_passive, CeedOperator_Cuda *impl) { bool is_active = false; CeedEvalMode eval_mode; CeedVector l_vec, e_vec = impl->e_vecs_in[input_field], q_vec = impl->q_vecs_in[input_field]; @@ -758,7 +758,11 @@ static inline int CeedOperatorInputBasisAtPoints_Cuda(CeedOperatorField op_input // Skip active input CeedCallBackend(CeedOperatorFieldGetVector(op_input_field, &l_vec)); is_active = l_vec == CEED_VECTOR_ACTIVE; - if (is_active && skip_active) return CEED_ERROR_SUCCESS; + if (skip_active && is_active) return CEED_ERROR_SUCCESS; + if (skip_passive && !is_active) { + CeedCallBackend(CeedVectorDestroy(&l_vec)); + return CEED_ERROR_SUCCESS; + } if (is_active) { l_vec = in_vec; if (!e_vec) e_vec = active_e_vec; @@ -842,7 +846,7 @@ static int CeedOperatorApplyAddAtPoints_Cuda(CeedOperator op, CeedVector in_vec, CeedCallBackend( CeedOperatorInputRestrict_Cuda(op_input_fields[field], qf_input_fields[field], field, in_vec, active_e_vec, false, impl, request)); CeedCallBackend(CeedOperatorInputBasisAtPoints_Cuda(op_input_fields[field], qf_input_fields[field], field, in_vec, active_e_vec, num_elem, - num_points, false, impl)); + num_points, false, false, impl)); } // Output pointers, as necessary @@ -1845,19 +1849,8 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C // Process inputs for (CeedInt i = 0; i < num_input_fields; i++) { CeedCallBackend(CeedOperatorInputRestrict_Cuda(op_input_fields[i], qf_input_fields[i], i, NULL, NULL, true, impl, request)); - CeedCallBackend(CeedOperatorInputBasisAtPoints_Cuda(op_input_fields[i], qf_input_fields[i], i, NULL, NULL, num_elem, num_points, true, impl)); - } - - // Clear active input Qvecs - for (CeedInt i = 0; i < num_input_fields; i++) { - bool is_active = false; - CeedVector l_vec; - - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &l_vec)); - is_active = l_vec == CEED_VECTOR_ACTIVE; - CeedCallBackend(CeedVectorDestroy(&l_vec)); - if (!is_active) continue; - CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0)); + CeedCallBackend( + CeedOperatorInputBasisAtPoints_Cuda(op_input_fields[i], qf_input_fields[i], i, NULL, NULL, num_elem, num_points, true, false, impl)); } // Output pointers, as necessary @@ -1876,19 +1869,19 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C // Loop over active fields for (CeedInt i = 0; i < num_input_fields; i++) { bool is_active = false, is_active_at_points = true; - CeedInt elem_size = 1, num_comp_active = 1, e_vec_size = 0; + CeedInt elem_size = 1, num_comp_active = 1, e_vec_size = 0, field_in = impl->input_field_order[i]; CeedRestrictionType rstr_type; CeedVector l_vec; CeedElemRestriction elem_rstr; // -- Skip non-active input - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &l_vec)); + CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[field_in], &l_vec)); is_active = l_vec == CEED_VECTOR_ACTIVE; CeedCallBackend(CeedVectorDestroy(&l_vec)); - if (!is_active) continue; + if (!is_active || impl->skip_rstr_in[field_in]) continue; // -- Get active restriction type - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[field_in], &elem_rstr)); CeedCallBackend(CeedElemRestrictionGetType(elem_rstr, &rstr_type)); is_active_at_points = rstr_type == CEED_RESTRICTION_POINTS; if (!is_active_at_points) CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); @@ -1897,16 +1890,9 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); e_vec_size = elem_size * num_comp_active; + CeedCallBackend(CeedVectorSetValue(active_e_vec_in, 0.0)); for (CeedInt s = 0; s < e_vec_size; s++) { - bool is_active = false; - CeedEvalMode eval_mode; - CeedVector l_vec, q_vec = impl->q_vecs_in[i]; - - // Skip non-active input - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &l_vec)); - is_active = l_vec == CEED_VECTOR_ACTIVE; - CeedCallBackend(CeedVectorDestroy(&l_vec)); - if (!is_active) continue; + CeedVector q_vec = impl->q_vecs_in[field_in]; // Update unit vector { @@ -1915,8 +1901,7 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C CeedSize start = node * 1 + comp * (elem_size * num_elem); CeedSize stop = (comp + 1) * (elem_size * num_elem); - if (s == 0) CeedCallBackend(CeedVectorSetValue(active_e_vec_in, 0.0)); - else CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, start, stop, elem_size, 0.0)); + if (s != 0) CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, start, stop, elem_size, 0.0)); node = s % elem_size, comp = s / elem_size; start = node * 1 + comp * (elem_size * num_elem); @@ -1925,29 +1910,11 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C } // Basis action - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); - switch (eval_mode) { - case CEED_EVAL_NONE: { - const CeedScalar *e_vec_array; - - CeedCallBackend(CeedVectorGetArrayRead(active_e_vec_in, CEED_MEM_DEVICE, &e_vec_array)); - CeedCallBackend(CeedVectorSetArray(q_vec, CEED_MEM_DEVICE, CEED_USE_POINTER, (CeedScalar *)e_vec_array)); - break; - } - case CEED_EVAL_INTERP: - case CEED_EVAL_GRAD: - case CEED_EVAL_DIV: - case CEED_EVAL_CURL: { - CeedBasis basis; + for (CeedInt j = 0; j < num_input_fields; j++) { + CeedInt field = impl->input_field_order[j]; - CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis)); - CeedCallBackend( - CeedBasisApplyAtPoints(basis, num_elem, num_points, CEED_NOTRANSPOSE, eval_mode, impl->point_coords_elem, active_e_vec_in, q_vec)); - CeedCallBackend(CeedBasisDestroy(&basis)); - break; - } - case CEED_EVAL_WEIGHT: - break; // No action + CeedCallBackend(CeedOperatorInputBasisAtPoints_Cuda(op_input_fields[field], qf_input_fields[field], field, NULL, active_e_vec_in, num_elem, + num_points, false, true, impl)); } // Q function @@ -1957,20 +1924,21 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C for (CeedInt j = 0; j < num_output_fields; j++) { bool is_active = false; CeedInt elem_size = 0; + CeedInt field_out = impl->output_field_order[j]; CeedRestrictionType rstr_type; CeedEvalMode eval_mode; - CeedVector l_vec, e_vec = impl->e_vecs_out[j], q_vec = impl->q_vecs_out[j]; + CeedVector l_vec, e_vec = impl->e_vecs_out[field_out], q_vec = impl->q_vecs_out[field_out]; CeedElemRestriction elem_rstr; // ---- Skip non-active output - CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[j], &l_vec)); + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[field_out], &l_vec)); is_active = l_vec == CEED_VECTOR_ACTIVE; CeedCallBackend(CeedVectorDestroy(&l_vec)); if (!is_active) continue; if (!e_vec) e_vec = active_e_vec_out; // ---- Check if elem size matches - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[j], &elem_rstr)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[field_out], &elem_rstr)); CeedCallBackend(CeedElemRestrictionGetType(elem_rstr, &rstr_type)); if (is_active_at_points && rstr_type != CEED_RESTRICTION_POINTS) continue; if (rstr_type == CEED_RESTRICTION_POINTS) { @@ -1986,7 +1954,7 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C } // Basis action - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[j], &eval_mode)); + CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[field_out], &eval_mode)); switch (eval_mode) { case CEED_EVAL_NONE: { CeedScalar *e_vec_array; @@ -2001,8 +1969,13 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C case CEED_EVAL_CURL: { CeedBasis basis; - CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[j], &basis)); - CeedCallBackend(CeedBasisApplyAtPoints(basis, num_elem, num_points, CEED_TRANSPOSE, eval_mode, impl->point_coords_elem, q_vec, e_vec)); + CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[field_out], &basis)); + if (impl->apply_add_basis_out[field_out]) { + CeedCallBackend( + CeedBasisApplyAddAtPoints(basis, num_elem, num_points, CEED_TRANSPOSE, eval_mode, impl->point_coords_elem, q_vec, e_vec)); + } else { + CeedCallBackend(CeedBasisApplyAtPoints(basis, num_elem, num_points, CEED_TRANSPOSE, eval_mode, impl->point_coords_elem, q_vec, e_vec)); + } CeedCallBackend(CeedBasisDestroy(&basis)); break; } @@ -2014,6 +1987,10 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Cuda(CeedOperator op, C } // Mask output e-vec + if (impl->skip_rstr_out[field_out]) { + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); + continue; + } CeedCallBackend(CeedVectorPointwiseMult(e_vec, active_e_vec_in, e_vec)); // Restrict diff --git a/backends/hip-ref/ceed-hip-ref-operator.c b/backends/hip-ref/ceed-hip-ref-operator.c index 2d28627a72..b9a7231247 100644 --- a/backends/hip-ref/ceed-hip-ref-operator.c +++ b/backends/hip-ref/ceed-hip-ref-operator.c @@ -748,7 +748,7 @@ static int CeedOperatorSetupAtPoints_Hip(CeedOperator op) { //------------------------------------------------------------------------------ static inline int CeedOperatorInputBasisAtPoints_Hip(CeedOperatorField op_input_field, CeedQFunctionField qf_input_field, CeedInt input_field, CeedVector in_vec, CeedVector active_e_vec, CeedInt num_elem, const CeedInt *num_points, - const bool skip_active, CeedOperator_Hip *impl) { + const bool skip_active, const bool skip_passive, CeedOperator_Hip *impl) { bool is_active = false; CeedEvalMode eval_mode; CeedVector l_vec, e_vec = impl->e_vecs_in[input_field], q_vec = impl->q_vecs_in[input_field]; @@ -756,7 +756,11 @@ static inline int CeedOperatorInputBasisAtPoints_Hip(CeedOperatorField op_input_ // Skip active input CeedCallBackend(CeedOperatorFieldGetVector(op_input_field, &l_vec)); is_active = l_vec == CEED_VECTOR_ACTIVE; - if (is_active && skip_active) return CEED_ERROR_SUCCESS; + if (skip_active && is_active) return CEED_ERROR_SUCCESS; + if (skip_passive && !is_active) { + CeedCallBackend(CeedVectorDestroy(&l_vec)); + return CEED_ERROR_SUCCESS; + } if (is_active) { l_vec = in_vec; if (!e_vec) e_vec = active_e_vec; @@ -839,7 +843,7 @@ static int CeedOperatorApplyAddAtPoints_Hip(CeedOperator op, CeedVector in_vec, CeedCallBackend(CeedOperatorInputRestrict_Hip(op_input_fields[field], qf_input_fields[field], field, in_vec, active_e_vec, false, impl, request)); CeedCallBackend(CeedOperatorInputBasisAtPoints_Hip(op_input_fields[field], qf_input_fields[field], field, in_vec, active_e_vec, num_elem, - num_points, false, impl)); + num_points, false, false, impl)); } // Output pointers, as necessary @@ -1842,19 +1846,8 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce // Process inputs for (CeedInt i = 0; i < num_input_fields; i++) { CeedCallBackend(CeedOperatorInputRestrict_Hip(op_input_fields[i], qf_input_fields[i], i, NULL, NULL, true, impl, request)); - CeedCallBackend(CeedOperatorInputBasisAtPoints_Hip(op_input_fields[i], qf_input_fields[i], i, NULL, NULL, num_elem, num_points, true, impl)); - } - - // Clear active input Qvecs - for (CeedInt i = 0; i < num_input_fields; i++) { - bool is_active = false; - CeedVector l_vec; - - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &l_vec)); - is_active = l_vec == CEED_VECTOR_ACTIVE; - CeedCallBackend(CeedVectorDestroy(&l_vec)); - if (!is_active) continue; - CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0)); + CeedCallBackend( + CeedOperatorInputBasisAtPoints_Hip(op_input_fields[i], qf_input_fields[i], i, NULL, NULL, num_elem, num_points, true, false, impl)); } // Output pointers, as necessary @@ -1873,19 +1866,19 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce // Loop over active fields for (CeedInt i = 0; i < num_input_fields; i++) { bool is_active = false, is_active_at_points = true; - CeedInt elem_size = 1, num_comp_active = 1, e_vec_size = 0; + CeedInt elem_size = 1, num_comp_active = 1, e_vec_size = 0, field_in = impl->input_field_order[i]; CeedRestrictionType rstr_type; CeedVector l_vec; CeedElemRestriction elem_rstr; // -- Skip non-active input - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &l_vec)); + CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[field_in], &l_vec)); is_active = l_vec == CEED_VECTOR_ACTIVE; CeedCallBackend(CeedVectorDestroy(&l_vec)); - if (!is_active) continue; + if (!is_active || impl->skip_rstr_in[field_in]) continue; // -- Get active restriction type - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[field_in], &elem_rstr)); CeedCallBackend(CeedElemRestrictionGetType(elem_rstr, &rstr_type)); is_active_at_points = rstr_type == CEED_RESTRICTION_POINTS; if (!is_active_at_points) CeedCallBackend(CeedElemRestrictionGetElementSize(elem_rstr, &elem_size)); @@ -1894,16 +1887,9 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); e_vec_size = elem_size * num_comp_active; + CeedCallBackend(CeedVectorSetValue(active_e_vec_in, 0.0)); for (CeedInt s = 0; s < e_vec_size; s++) { - bool is_active = false; - CeedEvalMode eval_mode; - CeedVector l_vec, q_vec = impl->q_vecs_in[i]; - - // Skip non-active input - CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &l_vec)); - is_active = l_vec == CEED_VECTOR_ACTIVE; - CeedCallBackend(CeedVectorDestroy(&l_vec)); - if (!is_active) continue; + CeedVector q_vec = impl->q_vecs_in[field_in]; // Update unit vector { @@ -1912,8 +1898,7 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce CeedSize start = node * 1 + comp * (elem_size * num_elem); CeedSize stop = (comp + 1) * (elem_size * num_elem); - if (s == 0) CeedCallBackend(CeedVectorSetValue(active_e_vec_in, 0.0)); - else CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, start, stop, elem_size, 0.0)); + if (s != 0) CeedCallBackend(CeedVectorSetValueStrided(active_e_vec_in, start, stop, elem_size, 0.0)); node = s % elem_size, comp = s / elem_size; start = node * 1 + comp * (elem_size * num_elem); @@ -1922,29 +1907,11 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce } // Basis action - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); - switch (eval_mode) { - case CEED_EVAL_NONE: { - const CeedScalar *e_vec_array; - - CeedCallBackend(CeedVectorGetArrayRead(active_e_vec_in, CEED_MEM_DEVICE, &e_vec_array)); - CeedCallBackend(CeedVectorSetArray(q_vec, CEED_MEM_DEVICE, CEED_USE_POINTER, (CeedScalar *)e_vec_array)); - break; - } - case CEED_EVAL_INTERP: - case CEED_EVAL_GRAD: - case CEED_EVAL_DIV: - case CEED_EVAL_CURL: { - CeedBasis basis; + for (CeedInt j = 0; j < num_input_fields; j++) { + CeedInt field = impl->input_field_order[j]; - CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis)); - CeedCallBackend( - CeedBasisApplyAtPoints(basis, num_elem, num_points, CEED_NOTRANSPOSE, eval_mode, impl->point_coords_elem, active_e_vec_in, q_vec)); - CeedCallBackend(CeedBasisDestroy(&basis)); - break; - } - case CEED_EVAL_WEIGHT: - break; // No action + CeedCallBackend(CeedOperatorInputBasisAtPoints_Hip(op_input_fields[field], qf_input_fields[field], field, NULL, active_e_vec_in, num_elem, + num_points, false, true, impl)); } // Q function @@ -1954,20 +1921,21 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce for (CeedInt j = 0; j < num_output_fields; j++) { bool is_active = false; CeedInt elem_size = 0; + CeedInt field_out = impl->output_field_order[j]; CeedRestrictionType rstr_type; CeedEvalMode eval_mode; - CeedVector l_vec, e_vec = impl->e_vecs_out[j], q_vec = impl->q_vecs_out[j]; + CeedVector l_vec, e_vec = impl->e_vecs_out[field_out], q_vec = impl->q_vecs_out[field_out]; CeedElemRestriction elem_rstr; // ---- Skip non-active output - CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[j], &l_vec)); + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[field_out], &l_vec)); is_active = l_vec == CEED_VECTOR_ACTIVE; CeedCallBackend(CeedVectorDestroy(&l_vec)); if (!is_active) continue; if (!e_vec) e_vec = active_e_vec_out; // ---- Check if elem size matches - CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[j], &elem_rstr)); + CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[field_out], &elem_rstr)); CeedCallBackend(CeedElemRestrictionGetType(elem_rstr, &rstr_type)); if (is_active_at_points && rstr_type != CEED_RESTRICTION_POINTS) continue; if (rstr_type == CEED_RESTRICTION_POINTS) { @@ -1983,7 +1951,7 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce } // Basis action - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[j], &eval_mode)); + CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[field_out], &eval_mode)); switch (eval_mode) { case CEED_EVAL_NONE: { CeedScalar *e_vec_array; @@ -1998,8 +1966,13 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce case CEED_EVAL_CURL: { CeedBasis basis; - CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[j], &basis)); - CeedCallBackend(CeedBasisApplyAtPoints(basis, num_elem, num_points, CEED_TRANSPOSE, eval_mode, impl->point_coords_elem, q_vec, e_vec)); + CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[field_out], &basis)); + if (impl->apply_add_basis_out[field_out]) { + CeedCallBackend( + CeedBasisApplyAddAtPoints(basis, num_elem, num_points, CEED_TRANSPOSE, eval_mode, impl->point_coords_elem, q_vec, e_vec)); + } else { + CeedCallBackend(CeedBasisApplyAtPoints(basis, num_elem, num_points, CEED_TRANSPOSE, eval_mode, impl->point_coords_elem, q_vec, e_vec)); + } CeedCallBackend(CeedBasisDestroy(&basis)); break; } @@ -2010,6 +1983,12 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Hip(CeedOperator op, Ce } } + // Continue if a field that is summed into + if (impl->skip_rstr_out[field_out]) { + CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); + continue; + } + // Mask output e-vec CeedCallBackend(CeedVectorPointwiseMult(e_vec, active_e_vec_in, e_vec)); diff --git a/backends/ref/ceed-ref-operator.c b/backends/ref/ceed-ref-operator.c index 151316f83d..9adc6e4c3c 100644 --- a/backends/ref/ceed-ref-operator.c +++ b/backends/ref/ceed-ref-operator.c @@ -886,8 +886,8 @@ static int CeedOperatorSetupAtPoints_Ref(CeedOperator op) { //------------------------------------------------------------------------------ static inline int CeedOperatorInputBasisAtPoints_Ref(CeedInt e, CeedInt num_points_offset, CeedInt num_points, CeedQFunctionField *qf_input_fields, CeedOperatorField *op_input_fields, CeedInt num_input_fields, CeedVector in_vec, - CeedVector point_coords_elem, bool skip_active, CeedScalar *e_data[2 * CEED_FIELD_MAX], - CeedOperator_Ref *impl, CeedRequest *request) { + CeedVector point_coords_elem, bool skip_active, bool skip_passive, + CeedScalar *e_data[2 * CEED_FIELD_MAX], CeedOperator_Ref *impl, CeedRequest *request) { for (CeedInt i = 0; i < num_input_fields; i++) { bool is_active; CeedInt elem_size, size, num_comp; @@ -902,6 +902,7 @@ static inline int CeedOperatorInputBasisAtPoints_Ref(CeedInt e, CeedInt num_poin is_active = vec == CEED_VECTOR_ACTIVE; CeedCallBackend(CeedVectorDestroy(&vec)); if (skip_active && is_active) continue; + if (skip_passive && !is_active) continue; // Get elem_size, eval_mode, size CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); @@ -909,7 +910,8 @@ static inline int CeedOperatorInputBasisAtPoints_Ref(CeedInt e, CeedInt num_poin CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); CeedCallBackend(CeedQFunctionFieldGetSize(qf_input_fields[i], &size)); // Restrict block active input - if (is_active && !impl->skip_rstr_in[i]) { + // When skipping passive inputs, we're doing assembly and should not restrict + if (is_active && !impl->skip_rstr_in[i] && !skip_passive) { if (rstr_type == CEED_RESTRICTION_POINTS) { CeedCallBackend(CeedElemRestrictionApplyAtPointsInElement(elem_rstr, e, CEED_NOTRANSPOSE, in_vec, impl->e_vecs_in[i], request)); } else { @@ -952,7 +954,7 @@ static inline int CeedOperatorInputBasisAtPoints_Ref(CeedInt e, CeedInt num_poin static inline int CeedOperatorOutputBasisAtPoints_Ref(CeedInt e, CeedInt num_points_offset, CeedInt num_points, CeedQFunctionField *qf_output_fields, CeedOperatorField *op_output_fields, CeedInt num_input_fields, CeedInt num_output_fields, bool *apply_add_basis, bool *skip_rstr, CeedOperator op, CeedVector out_vec, - CeedVector point_coords_elem, CeedOperator_Ref *impl, CeedRequest *request) { + CeedVector point_coords_elem, bool skip_passive, CeedOperator_Ref *impl, CeedRequest *request) { for (CeedInt i = 0; i < num_output_fields; i++) { bool is_active; CeedRestrictionType rstr_type; @@ -961,6 +963,12 @@ static inline int CeedOperatorOutputBasisAtPoints_Ref(CeedInt e, CeedInt num_poi CeedElemRestriction elem_rstr; CeedBasis basis; + // Skip active input + CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); + is_active = vec == CEED_VECTOR_ACTIVE; + CeedCallBackend(CeedVectorDestroy(&vec)); + if (skip_passive && !is_active) continue; + // Get elem_size, eval_mode, size CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[i], &elem_rstr)); CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[i], &eval_mode)); @@ -989,7 +997,8 @@ static inline int CeedOperatorOutputBasisAtPoints_Ref(CeedInt e, CeedInt num_poi } } // Restrict output block - if (skip_rstr[i]) { + // When skipping passive outputs, we're doing assembly and should not restrict + if (skip_rstr[i] || skip_passive) { CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); continue; } @@ -997,7 +1006,6 @@ static inline int CeedOperatorOutputBasisAtPoints_Ref(CeedInt e, CeedInt num_poi // Get output vector CeedCallBackend(CeedElemRestrictionGetType(elem_rstr, &rstr_type)); CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[i], &vec)); - is_active = vec == CEED_VECTOR_ACTIVE; if (is_active) vec = out_vec; // Restrict if (rstr_type == CEED_RESTRICTION_POINTS) { @@ -1049,7 +1057,7 @@ static int CeedOperatorApplyAddAtPoints_Ref(CeedOperator op, CeedVector in_vec, // Input basis apply CeedCallBackend(CeedOperatorInputBasisAtPoints_Ref(e, num_points_offset, num_points, qf_input_fields, op_input_fields, num_input_fields, in_vec, - impl->point_coords_elem, false, e_data, impl, request)); + impl->point_coords_elem, false, false, e_data, impl, request)); // Q function if (!impl->is_identity_qf) { @@ -1059,7 +1067,7 @@ static int CeedOperatorApplyAddAtPoints_Ref(CeedOperator op, CeedVector in_vec, // Output basis apply and restriction CeedCallBackend(CeedOperatorOutputBasisAtPoints_Ref(e, num_points_offset, num_points, qf_output_fields, op_output_fields, num_input_fields, num_output_fields, impl->apply_add_basis_out, impl->skip_rstr_out, op, out_vec, - impl->point_coords_elem, impl, request)); + impl->point_coords_elem, false, impl, request)); num_points_offset += num_points; } @@ -1202,7 +1210,7 @@ static inline int CeedOperatorLinearAssembleQFunctionAtPointsCore_Ref(CeedOperat // Input basis apply CeedCallBackend(CeedOperatorInputBasisAtPoints_Ref(e, num_points_offset, num_points, qf_input_fields, op_input_fields, num_input_fields, NULL, - impl->point_coords_elem, true, e_data_full, impl, request)); + impl->point_coords_elem, true, false, e_data_full, impl, request)); // Assemble QFunction for (CeedInt i = 0; i < num_input_fields; i++) { @@ -1360,7 +1368,7 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Ref(CeedOperator op, Ce CeedCallBackend(CeedVectorSetValue(out_vec, 0.0)); } - // Clear input Qvecs + // Clear input Evecs for (CeedInt i = 0; i < num_input_fields; i++) { bool is_active; CeedVector vec; @@ -1368,8 +1376,8 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Ref(CeedOperator op, Ce CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); is_active = vec == CEED_VECTOR_ACTIVE; CeedCallBackend(CeedVectorDestroy(&vec)); - if (!is_active) continue; - CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0)); + if (!is_active || impl->skip_rstr_in[i]) continue; + CeedCallBackend(CeedVectorSetValue(impl->e_vecs_in[i], 0.0)); } // Input Evecs and Restriction @@ -1385,7 +1393,7 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Ref(CeedOperator op, Ce // Input basis apply for non-active bases CeedCallBackend(CeedOperatorInputBasisAtPoints_Ref(e, num_points_offset, num_points, qf_input_fields, op_input_fields, num_input_fields, in_vec, - impl->point_coords_elem, true, e_data, impl, request)); + impl->point_coords_elem, true, false, e_data, impl, request)); // Loop over points on element for (CeedInt i = 0; i < num_input_fields; i++) { @@ -1399,7 +1407,7 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Ref(CeedOperator op, Ce CeedCallBackend(CeedOperatorFieldGetVector(op_input_fields[i], &vec)); is_active = vec == CEED_VECTOR_ACTIVE; CeedCallBackend(CeedVectorDestroy(&vec)); - if (!is_active) continue; + if (!is_active || impl->skip_rstr_in[i]) continue; // -- Get active restriction type CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_input_fields[i], &elem_rstr)); @@ -1412,37 +1420,18 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Ref(CeedOperator op, Ce e_vec_size = elem_size_active * num_comp_active; for (CeedInt s = 0; s < e_vec_size; s++) { - CeedEvalMode eval_mode; - CeedBasis basis; - // -- Update unit vector { CeedScalar *array; - if (s == 0) CeedCallBackend(CeedVectorSetValue(impl->e_vecs_in[i], 0.0)); CeedCallBackend(CeedVectorGetArray(impl->e_vecs_in[i], CEED_MEM_HOST, &array)); array[s] = 1.0; if (s > 0) array[s - 1] = 0.0; CeedCallBackend(CeedVectorRestoreArray(impl->e_vecs_in[i], &array)); } - // -- Basis action - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_input_fields[i], &eval_mode)); - switch (eval_mode) { - case CEED_EVAL_NONE: - break; - // Note - these basis eval modes require FEM fields - case CEED_EVAL_INTERP: - case CEED_EVAL_GRAD: - case CEED_EVAL_DIV: - case CEED_EVAL_CURL: - CeedCallBackend(CeedOperatorFieldGetBasis(op_input_fields[i], &basis)); - CeedCallBackend(CeedBasisApplyAtPoints(basis, 1, &num_points, CEED_NOTRANSPOSE, eval_mode, impl->point_coords_elem, impl->e_vecs_in[i], - impl->q_vecs_in[i])); - CeedCallBackend(CeedBasisDestroy(&basis)); - break; - case CEED_EVAL_WEIGHT: - break; // No action - } + // Input basis apply for active bases + CeedCallBackend(CeedOperatorInputBasisAtPoints_Ref(e, num_points_offset, num_points, qf_input_fields, op_input_fields, num_input_fields, + in_vec, impl->point_coords_elem, false, true, e_data, impl, request)); // -- Q function if (!impl->is_identity_qf) { @@ -1452,23 +1441,21 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Ref(CeedOperator op, Ce // -- Output basis apply and restriction CeedCallBackend(CeedOperatorOutputBasisAtPoints_Ref(e, num_points_offset, num_points, qf_output_fields, op_output_fields, num_input_fields, num_output_fields, impl->apply_add_basis_out, impl->skip_rstr_out, op, out_vec, - impl->point_coords_elem, impl, request)); + impl->point_coords_elem, true, impl, request)); // -- Grab diagonal value for (CeedInt j = 0; j < num_output_fields; j++) { bool is_active; CeedInt elem_size = 0; CeedRestrictionType rstr_type; - CeedEvalMode eval_mode; CeedVector vec; CeedElemRestriction elem_rstr; - CeedBasis basis; // ---- Skip non-active output CeedCallBackend(CeedOperatorFieldGetVector(op_output_fields[j], &vec)); is_active = vec == CEED_VECTOR_ACTIVE; CeedCallBackend(CeedVectorDestroy(&vec)); - if (!is_active) continue; + if (!is_active || impl->skip_rstr_out[j]) continue; // ---- Check if elem size matches CeedCallBackend(CeedOperatorFieldGetElemRestriction(op_output_fields[j], &elem_rstr)); @@ -1491,27 +1478,6 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Ref(CeedOperator op, Ce continue; } } - - // ---- Basis action - CeedCallBackend(CeedQFunctionFieldGetEvalMode(qf_output_fields[j], &eval_mode)); - switch (eval_mode) { - case CEED_EVAL_NONE: - break; // No action - case CEED_EVAL_INTERP: - case CEED_EVAL_GRAD: - case CEED_EVAL_DIV: - case CEED_EVAL_CURL: - CeedCallBackend(CeedOperatorFieldGetBasis(op_output_fields[j], &basis)); - CeedCallBackend(CeedBasisApplyAtPoints(basis, 1, &num_points, CEED_TRANSPOSE, eval_mode, impl->point_coords_elem, impl->q_vecs_out[j], - impl->e_vecs_out[j])); - CeedCallBackend(CeedBasisDestroy(&basis)); - break; - // LCOV_EXCL_START - case CEED_EVAL_WEIGHT: { - return CeedError(CeedOperatorReturnCeed(op), CEED_ERROR_BACKEND, "CEED_EVAL_WEIGHT cannot be an output evaluation mode"); - // LCOV_EXCL_STOP - } - } // ---- Update output vector { CeedScalar *array, current_value = 0.0; @@ -1533,7 +1499,13 @@ static int CeedOperatorLinearAssembleAddDiagonalAtPoints_Ref(CeedOperator op, Ce CeedCallBackend(CeedElemRestrictionDestroy(&elem_rstr)); } // -- Reset unit vector - if (s == e_vec_size - 1) CeedCallBackend(CeedVectorSetValue(impl->q_vecs_in[i], 0.0)); + if (s == e_vec_size - 1) { + CeedScalar *array; + + CeedCallBackend(CeedVectorGetArray(impl->e_vecs_in[i], CEED_MEM_HOST, &array)); + array[s] = 0.0; + CeedCallBackend(CeedVectorRestoreArray(impl->e_vecs_in[i], &array)); + } } } num_points_offset += num_points;