|
| 1 | +/*************************** asmlib.h *************************************** |
| 2 | +* Author: Agner Fog |
| 3 | +* Date created: 2003-12-12 |
| 4 | +* Last modified: 2013-10-04 |
| 5 | +* Project: asmlib.zip |
| 6 | +* Source URL: www.agner.org/optimize |
| 7 | +* |
| 8 | +* Description: |
| 9 | +* Header file for the asmlib function library. |
| 10 | +* This library is available in many versions for different platforms. |
| 11 | +* See asmlib-instructions.pdf for details. |
| 12 | +* |
| 13 | +* (c) Copyright 2003 - 2013 by Agner Fog. |
| 14 | +* GNU General Public License http://www.gnu.org/licenses/gpl.html |
| 15 | +*****************************************************************************/ |
| 16 | + |
| 17 | + |
| 18 | +#ifndef ASMLIB_H |
| 19 | +#define ASMLIB_H |
| 20 | + |
| 21 | + |
| 22 | +/*********************************************************************** |
| 23 | +Define compiler-specific types and directives |
| 24 | +***********************************************************************/ |
| 25 | + |
| 26 | +// Define type size_t |
| 27 | +#ifndef _SIZE_T_DEFINED |
| 28 | +#include "stddef.h" |
| 29 | +#endif |
| 30 | + |
| 31 | +// Define integer types with known size: int32_t, uint32_t, int64_t, uint64_t. |
| 32 | +// If this doesn't work then insert compiler-specific definitions here: |
| 33 | +#if defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER >= 1600) |
| 34 | + // Compilers supporting C99 or C++0x have stdint.h defining these integer types |
| 35 | + #include <stdint.h> |
| 36 | + #define INT64_SUPPORTED // Remove this if the compiler doesn't support 64-bit integers |
| 37 | +#elif defined(_MSC_VER) |
| 38 | + // Older Microsoft compilers have their own definition |
| 39 | + typedef signed __int16 int16_t; |
| 40 | + typedef unsigned __int16 uint16_t; |
| 41 | + typedef signed __int32 int32_t; |
| 42 | + typedef unsigned __int32 uint32_t; |
| 43 | + typedef signed __int64 int64_t; |
| 44 | + typedef unsigned __int64 uint64_t; |
| 45 | + #define INT64_SUPPORTED // Remove this if the compiler doesn't support 64-bit integers |
| 46 | +#else |
| 47 | + // This works with most compilers |
| 48 | + typedef signed short int int16_t; |
| 49 | + typedef unsigned short int uint16_t; |
| 50 | + typedef signed int int32_t; |
| 51 | + typedef unsigned int uint32_t; |
| 52 | + typedef long long int64_t; |
| 53 | + typedef unsigned long long uint64_t; |
| 54 | + #define INT64_SUPPORTED // Remove this if the compiler doesn't support 64-bit integers |
| 55 | +#endif |
| 56 | + |
| 57 | + |
| 58 | +// Turn off name mangling |
| 59 | +#ifdef __cplusplus |
| 60 | +extern "C" { |
| 61 | +#endif |
| 62 | + |
| 63 | +/*********************************************************************** |
| 64 | +Function prototypes, memory and string functions |
| 65 | +***********************************************************************/ |
| 66 | +void * A_memcpy (void * dest, const void * src, size_t count); // Copy count bytes from src to dest |
| 67 | +void * A_memmove(void * dest, const void * src, size_t count); // Same as memcpy, allows overlap between src and dest |
| 68 | +void * A_memset (void * dest, int c, size_t count); // Set count bytes in dest to (char)c |
| 69 | +int A_memcmp (const void * buf1, const void * buf2, size_t num); // Compares two blocks of memory |
| 70 | +size_t GetMemcpyCacheLimit(void); // Data blocks bigger than this will be copied uncached by memcpy and memmove |
| 71 | +void SetMemcpyCacheLimit(size_t); // Change limit in GetMemcpyCacheLimit |
| 72 | +size_t GetMemsetCacheLimit(void); // Data blocks bigger than this will be stored uncached by memset |
| 73 | +void SetMemsetCacheLimit(size_t); // Change limit in GetMemsetCacheLimit |
| 74 | +char * A_strcat (char * dest, const char * src); // Concatenate strings dest and src. Store result in dest |
| 75 | +char * A_strcpy (char * dest, const char * src); // Copy string src to dest |
| 76 | +size_t A_strlen (const char * str); // Get length of zero-terminated string |
| 77 | +int A_strcmp (const char * a, const char * b); // Compare strings. Case sensitive |
| 78 | +int A_stricmp (const char *string1, const char *string2); // Compare strings. Case insensitive for A-Z only |
| 79 | +char * A_strstr (char * haystack, const char * needle); // Search for substring in string |
| 80 | +void A_strtolower(char * string); // Convert string to lower case for A-Z only |
| 81 | +void A_strtoupper(char * string); // Convert string to upper case for a-z only |
| 82 | +size_t A_substring(char * dest, const char * source, size_t pos, size_t len); // Copy a substring for source into dest |
| 83 | +size_t A_strspn (const char * str, const char * set); // Find span of characters that belong to set |
| 84 | +size_t A_strcspn(const char * str, const char * set); // Find span of characters that don't belong to set |
| 85 | +size_t strCountInSet(const char * str, const char * set); // Count characters that belong to set |
| 86 | +size_t strcount_UTF8(const char * str); // Counts the number of characters in a UTF-8 encoded string |
| 87 | + |
| 88 | + |
| 89 | +/*********************************************************************** |
| 90 | +Function prototypes, miscellaneous functions |
| 91 | +***********************************************************************/ |
| 92 | +uint32_t A_popcount(uint32_t x); // Count 1-bits in 32-bit integer |
| 93 | +int RoundD (double x); // Round to nearest or even |
| 94 | +int RoundF (float x); // Round to nearest or even |
| 95 | +int InstructionSet(void); // Tell which instruction set is supported |
| 96 | +char * ProcessorName(void); // ASCIIZ text describing microprocessor |
| 97 | +void CpuType(int * vendor, int * family, int * model); // Get CPU vendor, family and model |
| 98 | +size_t DataCacheSize(int level); // Get size of data cache |
| 99 | +void A_DebugBreak(void); // Makes a debug breakpoint |
| 100 | +#ifdef INT64_SUPPORTED |
| 101 | + uint64_t ReadTSC(void); // Read microprocessor internal clock (64 bits) |
| 102 | +#else |
| 103 | + uint32_t ReadTSC(void); // Read microprocessor internal clock (only 32 bits supported by compiler) |
| 104 | +#endif |
| 105 | +void cpuid_ex (int abcd[4], int eax, int ecx); // call CPUID instruction |
| 106 | +static inline void cpuid_abcd (int abcd[4], int eax) { |
| 107 | + cpuid_ex(abcd, eax, 0);} |
| 108 | + |
| 109 | +#ifdef __cplusplus |
| 110 | +} // end of extern "C" |
| 111 | + |
| 112 | +// Define overloaded versions if compiling as C++ |
| 113 | + |
| 114 | +static inline int Round (double x) { // Overload name Round |
| 115 | + return RoundD(x);} |
| 116 | +static inline int Round (float x) { // Overload name Round |
| 117 | + return RoundF(x);} |
| 118 | +static inline const char * A_strstr(const char * haystack, const char * needle) { |
| 119 | + return A_strstr((char*)haystack, needle);} // Overload A_strstr with const char * version |
| 120 | + |
| 121 | +#endif // __cplusplus |
| 122 | + |
| 123 | + |
| 124 | +/*********************************************************************** |
| 125 | +Function prototypes, integer division functions |
| 126 | +***********************************************************************/ |
| 127 | + |
| 128 | +// Turn off name mangling |
| 129 | +#ifdef __cplusplus |
| 130 | +extern "C" { |
| 131 | +#endif |
| 132 | + |
| 133 | +void setdivisori32(int buffer[2], int d); // Set divisor for repeated division |
| 134 | +int dividefixedi32(const int buffer[2], int x); // Fast division with previously set divisor |
| 135 | +void setdivisoru32(uint32_t buffer[2], uint32_t d); // Set divisor for repeated division |
| 136 | +uint32_t dividefixedu32(const uint32_t buffer[2], uint32_t x); // Fast division with previously set divisor |
| 137 | + |
| 138 | +// Test if emmintrin.h is included and __m128i defined |
| 139 | +#if defined(__GNUC__) && defined(_EMMINTRIN_H_INCLUDED) && !defined(__SSE2__) |
| 140 | +#error Please compile with -sse2 or higher |
| 141 | +#endif |
| 142 | + |
| 143 | +#if defined(_INCLUDED_EMM) || (defined(_EMMINTRIN_H_INCLUDED) && defined(__SSE2__)) |
| 144 | +#define VECTORDIVISIONDEFINED |
| 145 | + |
| 146 | +// Integer vector division functions. These functions divide an integer vector by a scalar: |
| 147 | + |
| 148 | +// Set divisor for repeated integer vector division |
| 149 | +void setdivisorV8i16(__m128i buf[2], int16_t d); // Set divisor for repeated division |
| 150 | +void setdivisorV8u16(__m128i buf[2], uint16_t d); // Set divisor for repeated division |
| 151 | +void setdivisorV4i32(__m128i buf[2], int32_t d); // Set divisor for repeated division |
| 152 | +void setdivisorV4u32(__m128i buf[2], uint32_t d); // Set divisor for repeated division |
| 153 | + |
| 154 | +// Fast division of vector by previously set divisor |
| 155 | +__m128i dividefixedV8i16(const __m128i buf[2], __m128i x); // Fast division with previously set divisor |
| 156 | +__m128i dividefixedV8u16(const __m128i buf[2], __m128i x); // Fast division with previously set divisor |
| 157 | +__m128i dividefixedV4i32(const __m128i buf[2], __m128i x); // Fast division with previously set divisor |
| 158 | +__m128i dividefixedV4u32(const __m128i buf[2], __m128i x); // Fast division with previously set divisor |
| 159 | + |
| 160 | +#endif // defined(_INCLUDED_EMM) || (defined(_EMMINTRIN_H_INCLUDED) && defined(__SSE2__)) |
| 161 | + |
| 162 | +#ifdef __cplusplus |
| 163 | +} // end of extern "C" |
| 164 | +#endif // __cplusplus |
| 165 | + |
| 166 | +#ifdef __cplusplus |
| 167 | + |
| 168 | +// Define classes and operator '/' for fast division with fixed divisor |
| 169 | +class div_i32; |
| 170 | +class div_u32; |
| 171 | +static inline int32_t operator / (int32_t x, div_i32 const &D); |
| 172 | +static inline uint32_t operator / (uint32_t x, div_u32 const & D); |
| 173 | + |
| 174 | +class div_i32 { // Signed 32 bit integer division |
| 175 | +public: |
| 176 | + div_i32() { // Default constructor |
| 177 | + buffer[0] = buffer[1] = 0; |
| 178 | + } |
| 179 | + div_i32(int d) { // Constructor with divisor |
| 180 | + setdivisor(d); |
| 181 | + } |
| 182 | + void setdivisor(int d) { // Set divisor |
| 183 | + setdivisori32(buffer, d); |
| 184 | + } |
| 185 | +protected: |
| 186 | + int buffer[2]; // Internal memory |
| 187 | + friend int32_t operator / (int32_t x, div_i32 const & D); |
| 188 | +}; |
| 189 | + |
| 190 | +static inline int32_t operator / (int32_t x, div_i32 const &D){// Overloaded operator '/' |
| 191 | + return dividefixedi32(D.buffer, x); |
| 192 | +} |
| 193 | + |
| 194 | +static inline int32_t operator /= (int32_t &x, div_i32 const &D){// Overloaded operator '/=' |
| 195 | + return x = x / D; |
| 196 | +} |
| 197 | + |
| 198 | +class div_u32 { // Unsigned 32 bit integer division |
| 199 | +public: |
| 200 | + div_u32() { // Default constructor |
| 201 | + buffer[0] = buffer[1] = 0; |
| 202 | + } |
| 203 | + div_u32(uint32_t d) { // Constructor with divisor |
| 204 | + setdivisor(d); |
| 205 | + } |
| 206 | + void setdivisor(uint32_t d) { // Set divisor |
| 207 | + setdivisoru32(buffer, d); |
| 208 | + } |
| 209 | +protected: |
| 210 | + uint32_t buffer[2]; // Internal memory |
| 211 | + friend uint32_t operator / (uint32_t x, div_u32 const & D); |
| 212 | +}; |
| 213 | + |
| 214 | +static inline uint32_t operator / (uint32_t x, div_u32 const & D){ // Overloaded operator '/' |
| 215 | + return dividefixedu32(D.buffer, x); |
| 216 | +} |
| 217 | + |
| 218 | +static inline uint32_t operator /= (uint32_t &x, div_u32 const &D){// Overloaded operator '/=' |
| 219 | + return x = x / D; |
| 220 | +} |
| 221 | + |
| 222 | +#endif // __cplusplus |
| 223 | + |
| 224 | +#endif // ASMLIB_H |
0 commit comments