su2code
diff --git a/‎.github/workflows/regression.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/regression.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Common/include/code_config.hpp‎
Lines changed: 27 additions & 5 deletions b/‎Common/include/code_config.hpp‎
Lines changed: 27 additions & 5 deletions
diff --git a/‎Common/include/linear_algebra/CPastixWrapper.hpp‎
Lines changed: 2 additions & 2 deletions b/‎Common/include/linear_algebra/CPastixWrapper.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Common/include/linear_algebra/CSysSolve.hpp‎
Lines changed: 2 additions & 2 deletions b/‎Common/include/linear_algebra/CSysSolve.hpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎Common/include/parallelization/mpi_structure.cpp‎
Lines changed: 1 addition & 1 deletion b/‎Common/include/parallelization/mpi_structure.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Common/include/parallelization/mpi_structure.hpp‎
Lines changed: 22 additions & 17 deletions b/‎Common/include/parallelization/mpi_structure.hpp‎
Lines changed: 22 additions & 17 deletions
@@ -30,7 +30,7 @@ jobs:
       - id: compute
         run: |
           if [[ "${{ inputs.runner || 'ubuntu-latest' }}" == *arm* ]]; then
-            echo "flags=" >> $GITHUB_OUTPUT
+            echo "flags=-Dcpu-arch=armv9-a+simd" >> $GITHUB_OUTPUT
             echo "werror=" >> $GITHUB_OUTPUT
           else
             echo "flags=-Dcpu-arch=skylake" >> $GITHUB_OUTPUT
 
@@ -27,6 +27,7 @@
 #pragma once
 
 #include <type_traits>
+#include <cmath>
 
 #if defined(_MSC_VER)
 #define PRAGMIZE(X) __pragma(X)
@@ -94,6 +95,31 @@ FORCEINLINE Out su2staticcast_p(In ptr) {
 #define HAVE_OMP
 #endif
 
+/*--- No full single precision for AD builds. ---*/
+#if (defined(CODI_REVERSE_TYPE) || defined(CODI_FORWARD_TYPE)) && defined(USE_SINGLE_PRECISION)
+#undef USE_SINGLE_PRECISION
+#endif
+
+/*--- This type can be used for (rare) compatibility cases or for
+ * computations that are intended to be (always) passive. ---*/
+#ifdef USE_SINGLE_PRECISION
+using passivedouble = float;
+#else
+using passivedouble = double;
+#endif
+
+/*--- std::min/max do not compile if the arguments have inconsistent types, which
+ * happens in single precision due to floating point literals (double by default).
+ * These overloads delegate to fmin/fmax which do not have that problem. ---*/
+#ifdef USE_SINGLE_PRECISION
+namespace std {
+FORCEINLINE float min(const float& a, const double& b) { return fmin(a, static_cast<float>(b)); }
+FORCEINLINE float min(const double& b, const float& a) { return fmin(a, static_cast<float>(b)); }
+FORCEINLINE float max(const float& a, const double& b) { return fmax(a, static_cast<float>(b)); }
+FORCEINLINE float max(const double& b, const float& a) { return fmax(a, static_cast<float>(b)); }
+}  // namespace std
+#endif
+
 /*--- Depending on the datatype defined during the configuration,
  * include the correct definition, and create the main typedef. ---*/
 
@@ -131,13 +157,9 @@ using su2double = codi::RealReverseTag;
 #include "codi.hpp"
 using su2double = codi::RealForward;
 #else  // primal / direct / no AD
-using su2double = double;
+using su2double = passivedouble;
 #endif
 
-/*--- This type can be used for (rare) compatibility cases or for
- * computations that are intended to be (always) passive. ---*/
-using passivedouble = double;
-
 /*--- Define a type for potentially lower precision operations. ---*/
 #ifndef CODI_FORWARD_TYPE
 #ifdef USE_MIXED_PRECISION
 
@@ -62,8 +62,8 @@ class CPastixWrapper {
   vector<pastix_int_t> perm;     /*!< \brief Ordering computed by PaStiX. */
   vector<su2mixedfloat> workvec; /*!< \brief RHS vector which then becomes the solution. */
 
-  pastix_int_t iparm[IPARM_SIZE];  /*!< \brief Integer parameters for PaStiX. */
-  passivedouble dparm[DPARM_SIZE]; /*!< \brief Floating point parameters for PaStiX. */
+  pastix_int_t iparm[IPARM_SIZE]; /*!< \brief Integer parameters for PaStiX. */
+  double dparm[DPARM_SIZE];       /*!< \brief Floating point parameters for PaStiX. */
 
   struct {
     unsigned long nVar = 0;
 
@@ -442,13 +442,13 @@ class CSysSolve {
    * \brief Get the number of iterations.
    * \return The number of iterations done by Solve or Solve_b
    */
-  inline unsigned long GetIterations(void) const { return Iterations; }
+  inline unsigned long GetIterations() const { return Iterations; }
 
   /*!
    * \brief Get the final residual.
    * \return The residual at the end of Solve or Solve_b
    */
-  inline ScalarType GetResidual(void) const { return Residual; }
+  inline ScalarType GetResidual() const { return Residual; }
 
   /*!
    * \brief Set the type of the tolerance for stoping the linear solvers (RELATIVE or ABSOLUTE).
 
@@ -190,7 +190,7 @@ template class CBaseMPIWrapper<su2double>;
 #if defined CODI_REVERSE_TYPE
 template class CBaseMPIWrapper<passivedouble>;
 #endif
-#if defined USE_MIXED_PRECISION
+#if defined(USE_MIXED_PRECISION) && !defined(USE_SINGLE_PRECISION)
 template class CBaseMPIWrapper<su2mixedfloat>;
 #endif
 
 
@@ -61,6 +61,11 @@
 
 #ifdef HAVE_MPI
 
+#ifdef USE_SINGLE_PRECISION
+#undef MPI_DOUBLE
+#define MPI_DOUBLE MPI_FLOAT
+#endif
+
 /*--- Depending on the datatype used, the correct MPI wrapper class is defined.
  * For the default (double type) case this results in using the normal MPI routines. ---*/
 #if defined CODI_REVERSE_TYPE || defined CODI_FORWARD_TYPE
@@ -71,10 +76,10 @@ using namespace medi;
 #include <codi/tools/mpi/codiMpiTypes.hpp>
 
 class CMediMPIWrapper;
-typedef CMediMPIWrapper SU2_MPI;
+using SU2_MPI = CMediMPIWrapper;
 
-typedef codi::CoDiMpiTypes<su2double> MediTypes;
-typedef MediTypes::Tool MediTool;
+using MediTypes = codi::CoDiMpiTypes<su2double>;
+using MediTool = MediTypes::Tool;
 
 extern MediTypes* mediTypes;
 #define AMPI_ADOUBLE ((medi::MpiTypeInterface*)mediTypes->MPI_TYPE)
@@ -91,12 +96,12 @@ using SU2_MPI = CBaseMPIWrapper;
  */
 class CBaseMPIWrapper {
  public:
-  typedef MPI_Request Request;
-  typedef MPI_Status Status;
-  typedef MPI_Datatype Datatype;
-  typedef MPI_Op Op;
-  typedef MPI_Comm Comm;
-  typedef MPI_Win Win;
+  using Request = MPI_Request;
+  using Status = MPI_Status;
+  using Datatype = MPI_Datatype;
+  using Op = MPI_Op;
+  using Comm = MPI_Comm;
+  using Win = MPI_Win;
 
  protected:
   static int Rank, Size, MinRankError;
@@ -256,7 +261,7 @@ class CBaseMPIWrapper {
   static inline passivedouble Wtime(void) { return MPI_Wtime(); }
 };
 
-typedef MPI_Comm SU2_Comm;
+using SU2_Comm = MPI_Comm;
 
 #if defined CODI_REVERSE_TYPE || defined CODI_FORWARD_TYPE
 
@@ -267,8 +272,8 @@ typedef MPI_Comm SU2_Comm;
 
 class CMediMPIWrapper : public CBaseMPIWrapper {
  public:
-  typedef AMPI_Request Request;
-  typedef AMPI_Status Status;
+  using Request = AMPI_Request;
+  using Status = AMPI_Status;
 
   static inline void Init(int* argc, char*** argv) {
     AMPI_Init(argc, argv);
@@ -492,10 +497,10 @@ class CMediMPIWrapper : public CBaseMPIWrapper {
 template <typename ScalarType>
 class CBaseMPIWrapper {
  public:
-  typedef int Comm;
-  typedef int Datatype;
-  typedef int Request;
-  typedef int Op;
+  using Comm = int;
+  using Datatype = int;
+  using Request = int;
+  using Op = int;
 
   struct Status {
     int MPI_TAG;
@@ -632,7 +637,7 @@ struct SelectMPIWrapper<passivedouble> {
 #endif
 
 /*--- Specialize for the low precision type. ---*/
-#if defined(USE_MIXED_PRECISION)
+#if defined(USE_MIXED_PRECISION) && !defined(USE_SINGLE_PRECISION)
 template <>
 struct SelectMPIWrapper<su2mixedfloat> {
 #if defined HAVE_MPI