@@ -566,6 +566,99 @@ namespace xsimd
566566 xcr0_reg_t m_low {};
567567 };
568568
569+ /* *
570+ * Orchestrator for `CPUID` calls.
571+ *
572+ * This class orchestrate `CPUID` and `XCR0` calls so that they are made in the appropriate
573+ * order. It also implements lazy calling and cache mechanism around those calls.
574+ * Works on all platforms, and return all zeros on non `x86` platforms.
575+ */
576+ class x86_cpu_features_backend_cpuid
577+ {
578+ public:
579+ x86_cpu_features_backend_cpuid () noexcept = default ;
580+
581+ inline x86_xcr0 const & xcr0 () const noexcept ;
582+ inline x86_cpuid_leaf0 const & leaf0 () const ;
583+ inline x86_cpuid_leaf80000000 const & leaf80000000 () const ;
584+ inline x86_cpuid_leaf1 const & leaf1 () const ;
585+ inline x86_cpuid_leaf7 const & leaf7 () const ;
586+ inline x86_cpuid_leaf7sub1 const & leaf7sub1 () const ;
587+ inline x86_cpuid_leaf80000001 const & leaf80000001 () const ;
588+
589+ private:
590+ enum class status
591+ {
592+ leaf0_valid = 0 ,
593+ leaf1_valid = 1 ,
594+ leaf7_valid = 2 ,
595+ leaf7sub1_valid = 3 ,
596+ leaf80000000_valid = 4 ,
597+ leaf80000001_valid = 5 ,
598+ xcr0_valid = 6 ,
599+ };
600+
601+ using status_bitset = utils::uint_bitset<status, std::uint32_t >;
602+
603+ mutable x86_cpuid_leaf0 m_leaf0 {};
604+ mutable x86_cpuid_leaf1 m_leaf1 {};
605+ mutable x86_cpuid_leaf7 m_leaf7 {};
606+ mutable x86_cpuid_leaf7sub1 m_leaf7sub1 {};
607+ mutable x86_cpuid_leaf80000000 m_leaf80000000 {};
608+ mutable x86_cpuid_leaf80000001 m_leaf80000001 {};
609+ mutable x86_xcr0 m_xcr0 {};
610+ mutable status_bitset m_status {};
611+
612+ inline bool osxsave () const noexcept ;
613+
614+ /* *
615+ * Internal utility to lazily read and cache a CPUID leaf.
616+ *
617+ * @tparam status_id The status bit tracking whether this leaf has been read and cached.
618+ * @tparam L The CPUID leaf type (e.g. x86_cpuid_leaf1, x86_cpuid_leaf7).
619+ * @param leaf_cache A non-const reference to the class member that stores the leaf
620+ * value. It must be non-const because this function may write to it on first
621+ * call. It is passed explicitly (rather than accessed via `this`) to allow
622+ * factoring the caching logic across different leaf members.
623+ * @return A const reference to `leaf_cache`. The non-const input / const-ref output
624+ * asymmetry is intentional: callers must not modify the cached value, but
625+ * this function needs write access to populate it.
626+ *
627+ * On first call, checks whether the leaf number is within the range advertised as
628+ * supported by CPUID (via leaf 0 for the standard range, leaf 0x80000000 for the
629+ * extended range). If supported, reads the leaf from the CPU; otherwise leaves
630+ * `leaf_cache` at its zero-initialized default (all feature bits false). Either
631+ * way, `status_id` is set so subsequent calls return immediately.
632+ */
633+ template <status status_id, typename L>
634+ inline auto const & safe_read_leaf (L& leaf_cache) const ;
635+ };
636+
637+ /* *
638+ * No-Op orchestrator for `CPUID` calls
639+ *
640+ * This does nothing and return zero-constructed objects on all calls.
641+ * This is meant as an optimization on non `x86` platforms as the
642+ * `x86_cpu_features_backend_cpuid` can be slightly large (hundred of bytes).
643+ */
644+ class x86_cpu_features_backend_noop
645+ {
646+ public:
647+ constexpr x86_xcr0 xcr0 () const noexcept { return {}; }
648+ constexpr x86_cpuid_leaf0 leaf0 () const { return {}; }
649+ constexpr x86_cpuid_leaf80000000 leaf80000000 () const { return {}; }
650+ constexpr x86_cpuid_leaf1 leaf1 () const { return {}; }
651+ constexpr x86_cpuid_leaf7 leaf7 () const { return {}; }
652+ constexpr x86_cpuid_leaf7sub1 leaf7sub1 () const { return {}; }
653+ constexpr x86_cpuid_leaf80000001 leaf80000001 () const { return {}; }
654+ };
655+
656+ #if XSIMD_TARGET_X86
657+ using x86_cpu_features_backend_default = x86_cpu_features_backend_cpuid;
658+ #else
659+ using x86_cpu_features_backend_default = x86_cpu_features_backend_noop;
660+ #endif
661+
569662 /* *
570663 * An opiniated CPU feature detection utility for x86.
571664 *
@@ -576,7 +669,7 @@ namespace xsimd
576669 * This is well defined on all architectures. It will always return false on
577670 * non-x86 architectures.
578671 */
579- class x86_cpu_features
672+ class x86_cpu_features : private x86_cpu_features_backend_default
580673 {
581674 public:
582675 x86_cpu_features () noexcept = default ;
@@ -681,155 +774,122 @@ namespace xsimd
681774 inline bool avxvnni () const noexcept { return avx_enabled () && leaf7sub1 ().all_bits_set <x86_cpuid_leaf7sub1::eax::avxvnni>(); }
682775
683776 inline bool fma4 () const noexcept { return avx_enabled () && leaf80000001 ().all_bits_set <x86_cpuid_leaf80000001::ecx::fma4>(); }
777+ };
684778
685- private:
686- enum class status
687- {
688- leaf0_valid = 0 ,
689- leaf1_valid = 1 ,
690- leaf7_valid = 2 ,
691- leaf7sub1_valid = 3 ,
692- leaf80000000_valid = 4 ,
693- leaf80000001_valid = 5 ,
694- xcr0_valid = 6 ,
695- };
696-
697- using status_bitset = utils::uint_bitset<status, std::uint32_t >;
779+ /* *******************
780+ * Implementation *
781+ ********************/
698782
699- mutable x86_cpuid_leaf0 m_leaf0 {};
700- mutable x86_cpuid_leaf1 m_leaf1 {};
701- mutable x86_cpuid_leaf7 m_leaf7 {};
702- mutable x86_cpuid_leaf7sub1 m_leaf7sub1 {};
703- mutable x86_cpuid_leaf80000000 m_leaf80000000 {};
704- mutable x86_cpuid_leaf80000001 m_leaf80000001 {};
705- mutable x86_xcr0 m_xcr0 {};
706- mutable status_bitset m_status {};
707-
708- inline x86_xcr0 const & xcr0 () const noexcept
783+ template <x86_cpu_features_backend_cpuid::status status_id, typename L>
784+ inline auto const & x86_cpu_features_backend_cpuid::safe_read_leaf (L& leaf_cache) const
785+ {
786+ // Check if already initialized
787+ if (m_status.bit_is_set <status_id>())
709788 {
710- if (!m_status.bit_is_set <status::xcr0_valid>())
711- {
712- m_xcr0 = osxsave () ? x86_xcr0::read () : x86_xcr0::safe_default ();
713- m_status.set_bit <status::xcr0_valid>();
714- }
715- return m_xcr0;
789+ return leaf_cache;
716790 }
717791
718- inline x86_cpuid_leaf0 const & leaf0 () const
719- {
720- if (!m_status.bit_is_set <status::leaf0_valid>())
721- {
722- m_leaf0 = x86_cpuid_leaf0::read ();
723- m_status.set_bit <status::leaf0_valid>();
724- }
725- return m_leaf0;
726- }
792+ // Limit where we need to check leaf0 or leaf 80000000.
793+ constexpr auto extended_threshold = x86_cpuid_leaf80000000::leaf;
727794
728- inline x86_cpuid_leaf80000000 const & leaf80000000 () const
795+ // Check if it is safe to call CPUID with this value.
796+ // First we identify if the leaf is in the regular or extended range.
797+ // TODO(C++17): if constexpr
798+ if (L::leaf < extended_threshold)
729799 {
730- if (!m_status.bit_is_set <status::leaf80000000_valid>())
800+ // Check leaf0 in regular range
801+ if (L::leaf <= leaf0 ().highest_leaf ())
731802 {
732- m_leaf80000000 = x86_cpuid_leaf80000000::read ();
733- m_status.set_bit <status::leaf80000000_valid>();
803+ leaf_cache = L::read ();
734804 }
735- return m_leaf80000000;
736805 }
737-
738- /* *
739- * Internal utility to lazily read and cache a CPUID leaf.
740- *
741- * @tparam status_id The status bit tracking whether this leaf has been read and cached.
742- * @tparam L The CPUID leaf type (e.g. x86_cpuid_leaf1, x86_cpuid_leaf7).
743- * @param leaf_cache A non-const reference to the class member that stores the leaf
744- * value. It must be non-const because this function may write to it on first
745- * call. It is passed explicitly (rather than accessed via `this`) to allow
746- * factoring the caching logic across different leaf members.
747- * @return A const reference to `leaf_cache`. The non-const input / const-ref output
748- * asymmetry is intentional: callers must not modify the cached value, but
749- * this function needs write access to populate it.
750- *
751- * On first call, checks whether the leaf number is within the range advertised as
752- * supported by CPUID (via leaf 0 for the standard range, leaf 0x80000000 for the
753- * extended range). If supported, reads the leaf from the CPU; otherwise leaves
754- * `leaf_cache` at its zero-initialized default (all feature bits false). Either
755- * way, `status_id` is set so subsequent calls return immediately.
756- */
757- template <status status_id, typename L>
758- inline auto const & safe_read_leaf (L& leaf_cache) const
806+ else
759807 {
760- // Check if already initialized
761- if (m_status. bit_is_set <status_id> ())
808+ // Check leaf80000000 in extended range
809+ if (L::leaf <= leaf80000000 (). highest_leaf ())
762810 {
763- return leaf_cache;
811+ leaf_cache = L::read () ;
764812 }
813+ }
765814
766- // Limit where we need to check leaf0 or leaf 80000000.
767- constexpr auto extended_threshold = x86_cpuid_leaf80000000::leaf;
768-
769- // Check if it is safe to call CPUID with this value.
770- // First we identify if the leaf is in the regular or extended range.
771- // TODO(C++17): if constexpr
772- if (L::leaf < extended_threshold)
773- {
774- // Check leaf0 in regular range
775- if (L::leaf <= leaf0 ().highest_leaf ())
776- {
777- leaf_cache = L::read ();
778- }
779- }
780- else
781- {
782- // Check leaf80000000 in extended range
783- if (L::leaf <= leaf80000000 ().highest_leaf ())
784- {
785- leaf_cache = L::read ();
786- }
787- }
815+ // Mark as valid in all cases, including if it was not read.
816+ // In this case it will be filled with zeros (all false).
817+ m_status.set_bit <status_id>();
818+ return leaf_cache;
819+ }
788820
789- // Mark as valid in all cases, including if it was not read.
790- // In this case it will be filled with zeros (all false).
791- m_status.set_bit <status_id>();
792- return leaf_cache;
821+ inline x86_xcr0 const & x86_cpu_features_backend_cpuid::xcr0 () const noexcept
822+ {
823+ if (!m_status.bit_is_set <status::xcr0_valid>())
824+ {
825+ m_xcr0 = osxsave () ? x86_xcr0::read () : x86_xcr0::safe_default ();
826+ m_status.set_bit <status::xcr0_valid>();
793827 }
828+ return m_xcr0;
829+ }
794830
795- inline x86_cpuid_leaf1 const & leaf1 () const
831+ inline x86_cpuid_leaf0 const & x86_cpu_features_backend_cpuid::leaf0 () const
832+ {
833+ if (!m_status.bit_is_set <status::leaf0_valid>())
796834 {
797- return safe_read_leaf<status::leaf1_valid>(m_leaf1);
835+ m_leaf0 = x86_cpuid_leaf0::read ();
836+ m_status.set_bit <status::leaf0_valid>();
798837 }
838+ return m_leaf0;
839+ }
799840
800- inline x86_cpuid_leaf7 const & leaf7 () const
841+ inline x86_cpuid_leaf80000000 const & x86_cpu_features_backend_cpuid::leaf80000000 () const
842+ {
843+ if (!m_status.bit_is_set <status::leaf80000000_valid>())
801844 {
802- return safe_read_leaf<status::leaf7_valid>(m_leaf7);
845+ m_leaf80000000 = x86_cpuid_leaf80000000::read ();
846+ m_status.set_bit <status::leaf80000000_valid>();
803847 }
848+ return m_leaf80000000;
849+ }
804850
805- inline x86_cpuid_leaf7sub1 const & leaf7sub1 () const
806- {
807- // Check if already initialized
808- if (m_status.bit_is_set <status::leaf7sub1_valid>())
809- {
810- return m_leaf7sub1;
811- }
851+ inline x86_cpuid_leaf1 const & x86_cpu_features_backend_cpuid::leaf1 () const
852+ {
853+ return safe_read_leaf<status::leaf1_valid>(m_leaf1);
854+ }
812855
813- // Check if safe to call CPUID with this value as subleaf.
814- constexpr auto start = x86_cpuid_leaf7::eax::highest_subleaf_start;
815- constexpr auto end = x86_cpuid_leaf7::eax::highest_subleaf_end;
816- const auto highest_subleaf7 = leaf7 ().get_range <start, end>();
817- if (x86_cpuid_leaf7sub1::subleaf <= highest_subleaf7)
818- {
819- m_leaf7sub1 = x86_cpuid_leaf7sub1::read ();
820- }
856+ inline x86_cpuid_leaf7 const & x86_cpu_features_backend_cpuid::leaf7 () const
857+ {
858+ return safe_read_leaf<status::leaf7_valid>(m_leaf7);
859+ }
821860
822- // Mark as valid in all cases, including if it was not read.
823- // In this case it will be filled with zeros (all false).
824- m_status.set_bit <status::leaf7sub1_valid>();
861+ inline x86_cpuid_leaf7sub1 const & x86_cpu_features_backend_cpuid::leaf7sub1 () const
862+ {
863+ // Check if already initialized
864+ if (m_status.bit_is_set <status::leaf7sub1_valid>())
865+ {
825866 return m_leaf7sub1;
826867 }
827868
828- inline x86_cpuid_leaf80000001 const & leaf80000001 () const
869+ // Check if safe to call CPUID with this value as subleaf.
870+ constexpr auto start = x86_cpuid_leaf7::eax::highest_subleaf_start;
871+ constexpr auto end = x86_cpuid_leaf7::eax::highest_subleaf_end;
872+ const auto highest_subleaf7 = leaf7 ().get_range <start, end>();
873+ if (x86_cpuid_leaf7sub1::subleaf <= highest_subleaf7)
829874 {
830- return safe_read_leaf<status::leaf80000001_valid>(m_leaf80000001 );
875+ m_leaf7sub1 = x86_cpuid_leaf7sub1::read ( );
831876 }
832- };
877+
878+ // Mark as valid in all cases, including if it was not read.
879+ // In this case it will be filled with zeros (all false).
880+ m_status.set_bit <status::leaf7sub1_valid>();
881+ return m_leaf7sub1;
882+ }
883+
884+ inline x86_cpuid_leaf80000001 const & x86_cpu_features_backend_cpuid::leaf80000001 () const
885+ {
886+ return safe_read_leaf<status::leaf80000001_valid>(m_leaf80000001);
887+ }
888+
889+ inline bool x86_cpu_features_backend_cpuid::osxsave () const noexcept
890+ {
891+ return leaf1 ().all_bits_set <x86_cpuid_leaf1::ecx::osxsave>();
892+ }
833893
834894 namespace detail
835895 {
0 commit comments