Skip to content

Commit 17e38a8

Browse files
authored
Option to compile everything in single precision + explicit vectorization for ARM (#2819)
* add option * define single precision type, mpi, avoid repeated instantiations in mixed precision * use correct tecplot function * option 1 to handle double precision literals: replace min/max with fmin/fmax * Revert "option 1 to handle double precision literals: replace min/max with fmin/fmax" This reverts commit 4e4d9e1. * option 2: overload std::min/max for float/double mix, handle blas and lapack functions, fix some FPEs * fix some FPEs * vectorization for floats * arm vectorization * arm fix and use cpu arch for arm, fix some pass by value warnings * remove commented code * remove debug stuff * update regressions
1 parent f83bde4 commit 17e38a8

40 files changed

Lines changed: 571 additions & 256 deletions

.github/workflows/regression.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
- id: compute
3131
run: |
3232
if [[ "${{ inputs.runner || 'ubuntu-latest' }}" == *arm* ]]; then
33-
echo "flags=" >> $GITHUB_OUTPUT
33+
echo "flags=-Dcpu-arch=armv9-a+simd" >> $GITHUB_OUTPUT
3434
echo "werror=" >> $GITHUB_OUTPUT
3535
else
3636
echo "flags=-Dcpu-arch=skylake" >> $GITHUB_OUTPUT

Common/include/code_config.hpp

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#pragma once
2828

2929
#include <type_traits>
30+
#include <cmath>
3031

3132
#if defined(_MSC_VER)
3233
#define PRAGMIZE(X) __pragma(X)
@@ -94,6 +95,31 @@ FORCEINLINE Out su2staticcast_p(In ptr) {
9495
#define HAVE_OMP
9596
#endif
9697

98+
/*--- No full single precision for AD builds. ---*/
99+
#if (defined(CODI_REVERSE_TYPE) || defined(CODI_FORWARD_TYPE)) && defined(USE_SINGLE_PRECISION)
100+
#undef USE_SINGLE_PRECISION
101+
#endif
102+
103+
/*--- This type can be used for (rare) compatibility cases or for
104+
* computations that are intended to be (always) passive. ---*/
105+
#ifdef USE_SINGLE_PRECISION
106+
using passivedouble = float;
107+
#else
108+
using passivedouble = double;
109+
#endif
110+
111+
/*--- std::min/max do not compile if the arguments have inconsistent types, which
112+
* happens in single precision due to floating point literals (double by default).
113+
* These overloads delegate to fmin/fmax which do not have that problem. ---*/
114+
#ifdef USE_SINGLE_PRECISION
115+
namespace std {
116+
FORCEINLINE float min(const float& a, const double& b) { return fmin(a, static_cast<float>(b)); }
117+
FORCEINLINE float min(const double& b, const float& a) { return fmin(a, static_cast<float>(b)); }
118+
FORCEINLINE float max(const float& a, const double& b) { return fmax(a, static_cast<float>(b)); }
119+
FORCEINLINE float max(const double& b, const float& a) { return fmax(a, static_cast<float>(b)); }
120+
} // namespace std
121+
#endif
122+
97123
/*--- Depending on the datatype defined during the configuration,
98124
* include the correct definition, and create the main typedef. ---*/
99125

@@ -131,13 +157,9 @@ using su2double = codi::RealReverseTag;
131157
#include "codi.hpp"
132158
using su2double = codi::RealForward;
133159
#else // primal / direct / no AD
134-
using su2double = double;
160+
using su2double = passivedouble;
135161
#endif
136162

137-
/*--- This type can be used for (rare) compatibility cases or for
138-
* computations that are intended to be (always) passive. ---*/
139-
using passivedouble = double;
140-
141163
/*--- Define a type for potentially lower precision operations. ---*/
142164
#ifndef CODI_FORWARD_TYPE
143165
#ifdef USE_MIXED_PRECISION

Common/include/linear_algebra/CPastixWrapper.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,8 @@ class CPastixWrapper {
6262
vector<pastix_int_t> perm; /*!< \brief Ordering computed by PaStiX. */
6363
vector<su2mixedfloat> workvec; /*!< \brief RHS vector which then becomes the solution. */
6464

65-
pastix_int_t iparm[IPARM_SIZE]; /*!< \brief Integer parameters for PaStiX. */
66-
passivedouble dparm[DPARM_SIZE]; /*!< \brief Floating point parameters for PaStiX. */
65+
pastix_int_t iparm[IPARM_SIZE]; /*!< \brief Integer parameters for PaStiX. */
66+
double dparm[DPARM_SIZE]; /*!< \brief Floating point parameters for PaStiX. */
6767

6868
struct {
6969
unsigned long nVar = 0;

Common/include/linear_algebra/CSysSolve.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -442,13 +442,13 @@ class CSysSolve {
442442
* \brief Get the number of iterations.
443443
* \return The number of iterations done by Solve or Solve_b
444444
*/
445-
inline unsigned long GetIterations(void) const { return Iterations; }
445+
inline unsigned long GetIterations() const { return Iterations; }
446446

447447
/*!
448448
* \brief Get the final residual.
449449
* \return The residual at the end of Solve or Solve_b
450450
*/
451-
inline ScalarType GetResidual(void) const { return Residual; }
451+
inline ScalarType GetResidual() const { return Residual; }
452452

453453
/*!
454454
* \brief Set the type of the tolerance for stoping the linear solvers (RELATIVE or ABSOLUTE).

Common/include/parallelization/mpi_structure.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ template class CBaseMPIWrapper<su2double>;
190190
#if defined CODI_REVERSE_TYPE
191191
template class CBaseMPIWrapper<passivedouble>;
192192
#endif
193-
#if defined USE_MIXED_PRECISION
193+
#if defined(USE_MIXED_PRECISION) && !defined(USE_SINGLE_PRECISION)
194194
template class CBaseMPIWrapper<su2mixedfloat>;
195195
#endif
196196

Common/include/parallelization/mpi_structure.hpp

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,11 @@
6161

6262
#ifdef HAVE_MPI
6363

64+
#ifdef USE_SINGLE_PRECISION
65+
#undef MPI_DOUBLE
66+
#define MPI_DOUBLE MPI_FLOAT
67+
#endif
68+
6469
/*--- Depending on the datatype used, the correct MPI wrapper class is defined.
6570
* For the default (double type) case this results in using the normal MPI routines. ---*/
6671
#if defined CODI_REVERSE_TYPE || defined CODI_FORWARD_TYPE
@@ -71,10 +76,10 @@ using namespace medi;
7176
#include <codi/tools/mpi/codiMpiTypes.hpp>
7277

7378
class CMediMPIWrapper;
74-
typedef CMediMPIWrapper SU2_MPI;
79+
using SU2_MPI = CMediMPIWrapper;
7580

76-
typedef codi::CoDiMpiTypes<su2double> MediTypes;
77-
typedef MediTypes::Tool MediTool;
81+
using MediTypes = codi::CoDiMpiTypes<su2double>;
82+
using MediTool = MediTypes::Tool;
7883

7984
extern MediTypes* mediTypes;
8085
#define AMPI_ADOUBLE ((medi::MpiTypeInterface*)mediTypes->MPI_TYPE)
@@ -91,12 +96,12 @@ using SU2_MPI = CBaseMPIWrapper;
9196
*/
9297
class CBaseMPIWrapper {
9398
public:
94-
typedef MPI_Request Request;
95-
typedef MPI_Status Status;
96-
typedef MPI_Datatype Datatype;
97-
typedef MPI_Op Op;
98-
typedef MPI_Comm Comm;
99-
typedef MPI_Win Win;
99+
using Request = MPI_Request;
100+
using Status = MPI_Status;
101+
using Datatype = MPI_Datatype;
102+
using Op = MPI_Op;
103+
using Comm = MPI_Comm;
104+
using Win = MPI_Win;
100105

101106
protected:
102107
static int Rank, Size, MinRankError;
@@ -256,7 +261,7 @@ class CBaseMPIWrapper {
256261
static inline passivedouble Wtime(void) { return MPI_Wtime(); }
257262
};
258263

259-
typedef MPI_Comm SU2_Comm;
264+
using SU2_Comm = MPI_Comm;
260265

261266
#if defined CODI_REVERSE_TYPE || defined CODI_FORWARD_TYPE
262267

@@ -267,8 +272,8 @@ typedef MPI_Comm SU2_Comm;
267272

268273
class CMediMPIWrapper : public CBaseMPIWrapper {
269274
public:
270-
typedef AMPI_Request Request;
271-
typedef AMPI_Status Status;
275+
using Request = AMPI_Request;
276+
using Status = AMPI_Status;
272277

273278
static inline void Init(int* argc, char*** argv) {
274279
AMPI_Init(argc, argv);
@@ -492,10 +497,10 @@ class CMediMPIWrapper : public CBaseMPIWrapper {
492497
template <typename ScalarType>
493498
class CBaseMPIWrapper {
494499
public:
495-
typedef int Comm;
496-
typedef int Datatype;
497-
typedef int Request;
498-
typedef int Op;
500+
using Comm = int;
501+
using Datatype = int;
502+
using Request = int;
503+
using Op = int;
499504

500505
struct Status {
501506
int MPI_TAG;
@@ -632,7 +637,7 @@ struct SelectMPIWrapper<passivedouble> {
632637
#endif
633638

634639
/*--- Specialize for the low precision type. ---*/
635-
#if defined(USE_MIXED_PRECISION)
640+
#if defined(USE_MIXED_PRECISION) && !defined(USE_SINGLE_PRECISION)
636641
template <>
637642
struct SelectMPIWrapper<su2mixedfloat> {
638643
#if defined HAVE_MPI

0 commit comments

Comments
 (0)