Skip to content

Commit a533c96

Browse files
committed
feat: adding asmlib
1 parent cdfcc65 commit a533c96

38 files changed

Lines changed: 10235 additions & 0 deletions
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
cmake_minimum_required(VERSION 3.16)
2+
#Set to verbose to see NASM command line
3+
#set(CMAKE_VERBOSE_MAKEFILE ON)
4+
5+
set(
6+
ASM_SOURCES
7+
${ASMLIB_SOURCE_DIR}instrset64.asm
8+
${ASMLIB_SOURCE_DIR}procname64.asm
9+
${ASMLIB_SOURCE_DIR}memcpy64.asm
10+
${ASMLIB_SOURCE_DIR}memcmp64.asm
11+
${ASMLIB_SOURCE_DIR}memset64.asm
12+
${ASMLIB_SOURCE_DIR}cputype64.asm
13+
${ASMLIB_SOURCE_DIR}unalignedisfaster64.asm
14+
${ASMLIB_SOURCE_DIR}cachesize64.asm
15+
)
16+
17+
enable_language(ASM_NASM)
18+
if (WIN32)
19+
set(CMAKE_ASM_NASM_FLAGS "-dWINDOWS")
20+
set(CMAKE_ASM_NASM_OBJECT_FORMAT win64)
21+
elseif (APPLE)
22+
# prefix global symbols with _, Mach-O liker needs the leading underscore
23+
set(CMAKE_ASM_NASM_FLAGS "-dUNIX --prefix _")
24+
set(CMAKE_ASM_NASM_OBJECT_FORMAT macho64)
25+
else ()
26+
set(CMAKE_SHARED_LINKER_FLAGS "-z noexecstack")
27+
# call instructions to be re-written as call %1 WRT ..plt for ELF
28+
set(CMAKE_ASM_NASM_FLAGS "-dUNIX -dELF")
29+
set(CMAKE_ASM_NASM_OBJECT_FORMAT elf64)
30+
endif ()
31+
32+
set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -dPOSITIONINDEPENDENT")
33+
34+
add_library(asm ${ASM_SOURCES})
35+
36+
target_include_directories(asm PRIVATE ${CMAKE_CURRENT_LIST_DIR})
37+
38+
target_link_libraries(
39+
asm
40+
)
Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
/*************************** asmlib.h ***************************************
2+
* Author: Agner Fog
3+
* Date created: 2003-12-12
4+
* Last modified: 2013-10-04
5+
* Project: asmlib.zip
6+
* Source URL: www.agner.org/optimize
7+
*
8+
* Description:
9+
* Header file for the asmlib function library.
10+
* This library is available in many versions for different platforms.
11+
* See asmlib-instructions.pdf for details.
12+
*
13+
* (c) Copyright 2003 - 2013 by Agner Fog.
14+
* GNU General Public License http://www.gnu.org/licenses/gpl.html
15+
*****************************************************************************/
16+
17+
18+
#ifndef ASMLIB_H
19+
#define ASMLIB_H
20+
21+
22+
/***********************************************************************
23+
Define compiler-specific types and directives
24+
***********************************************************************/
25+
26+
// Define type size_t
27+
#ifndef _SIZE_T_DEFINED
28+
#include "stddef.h"
29+
#endif
30+
31+
// Define integer types with known size: int32_t, uint32_t, int64_t, uint64_t.
32+
// If this doesn't work then insert compiler-specific definitions here:
33+
#if defined(__GNUC__) || (defined(_MSC_VER) && _MSC_VER >= 1600)
34+
// Compilers supporting C99 or C++0x have stdint.h defining these integer types
35+
#include <stdint.h>
36+
#define INT64_SUPPORTED // Remove this if the compiler doesn't support 64-bit integers
37+
#elif defined(_MSC_VER)
38+
// Older Microsoft compilers have their own definition
39+
typedef signed __int16 int16_t;
40+
typedef unsigned __int16 uint16_t;
41+
typedef signed __int32 int32_t;
42+
typedef unsigned __int32 uint32_t;
43+
typedef signed __int64 int64_t;
44+
typedef unsigned __int64 uint64_t;
45+
#define INT64_SUPPORTED // Remove this if the compiler doesn't support 64-bit integers
46+
#else
47+
// This works with most compilers
48+
typedef signed short int int16_t;
49+
typedef unsigned short int uint16_t;
50+
typedef signed int int32_t;
51+
typedef unsigned int uint32_t;
52+
typedef long long int64_t;
53+
typedef unsigned long long uint64_t;
54+
#define INT64_SUPPORTED // Remove this if the compiler doesn't support 64-bit integers
55+
#endif
56+
57+
58+
// Turn off name mangling
59+
#ifdef __cplusplus
60+
extern "C" {
61+
#endif
62+
63+
/***********************************************************************
64+
Function prototypes, memory and string functions
65+
***********************************************************************/
66+
void * A_memcpy (void * dest, const void * src, size_t count); // Copy count bytes from src to dest
67+
void * A_memmove(void * dest, const void * src, size_t count); // Same as memcpy, allows overlap between src and dest
68+
void * A_memset (void * dest, int c, size_t count); // Set count bytes in dest to (char)c
69+
int A_memcmp (const void * buf1, const void * buf2, size_t num); // Compares two blocks of memory
70+
size_t GetMemcpyCacheLimit(void); // Data blocks bigger than this will be copied uncached by memcpy and memmove
71+
void SetMemcpyCacheLimit(size_t); // Change limit in GetMemcpyCacheLimit
72+
size_t GetMemsetCacheLimit(void); // Data blocks bigger than this will be stored uncached by memset
73+
void SetMemsetCacheLimit(size_t); // Change limit in GetMemsetCacheLimit
74+
char * A_strcat (char * dest, const char * src); // Concatenate strings dest and src. Store result in dest
75+
char * A_strcpy (char * dest, const char * src); // Copy string src to dest
76+
size_t A_strlen (const char * str); // Get length of zero-terminated string
77+
int A_strcmp (const char * a, const char * b); // Compare strings. Case sensitive
78+
int A_stricmp (const char *string1, const char *string2); // Compare strings. Case insensitive for A-Z only
79+
char * A_strstr (char * haystack, const char * needle); // Search for substring in string
80+
void A_strtolower(char * string); // Convert string to lower case for A-Z only
81+
void A_strtoupper(char * string); // Convert string to upper case for a-z only
82+
size_t A_substring(char * dest, const char * source, size_t pos, size_t len); // Copy a substring for source into dest
83+
size_t A_strspn (const char * str, const char * set); // Find span of characters that belong to set
84+
size_t A_strcspn(const char * str, const char * set); // Find span of characters that don't belong to set
85+
size_t strCountInSet(const char * str, const char * set); // Count characters that belong to set
86+
size_t strcount_UTF8(const char * str); // Counts the number of characters in a UTF-8 encoded string
87+
88+
89+
/***********************************************************************
90+
Function prototypes, miscellaneous functions
91+
***********************************************************************/
92+
uint32_t A_popcount(uint32_t x); // Count 1-bits in 32-bit integer
93+
int RoundD (double x); // Round to nearest or even
94+
int RoundF (float x); // Round to nearest or even
95+
int InstructionSet(void); // Tell which instruction set is supported
96+
char * ProcessorName(void); // ASCIIZ text describing microprocessor
97+
void CpuType(int * vendor, int * family, int * model); // Get CPU vendor, family and model
98+
size_t DataCacheSize(int level); // Get size of data cache
99+
void A_DebugBreak(void); // Makes a debug breakpoint
100+
#ifdef INT64_SUPPORTED
101+
uint64_t ReadTSC(void); // Read microprocessor internal clock (64 bits)
102+
#else
103+
uint32_t ReadTSC(void); // Read microprocessor internal clock (only 32 bits supported by compiler)
104+
#endif
105+
void cpuid_ex (int abcd[4], int eax, int ecx); // call CPUID instruction
106+
static inline void cpuid_abcd (int abcd[4], int eax) {
107+
cpuid_ex(abcd, eax, 0);}
108+
109+
#ifdef __cplusplus
110+
} // end of extern "C"
111+
112+
// Define overloaded versions if compiling as C++
113+
114+
static inline int Round (double x) { // Overload name Round
115+
return RoundD(x);}
116+
static inline int Round (float x) { // Overload name Round
117+
return RoundF(x);}
118+
static inline const char * A_strstr(const char * haystack, const char * needle) {
119+
return A_strstr((char*)haystack, needle);} // Overload A_strstr with const char * version
120+
121+
#endif // __cplusplus
122+
123+
124+
/***********************************************************************
125+
Function prototypes, integer division functions
126+
***********************************************************************/
127+
128+
// Turn off name mangling
129+
#ifdef __cplusplus
130+
extern "C" {
131+
#endif
132+
133+
void setdivisori32(int buffer[2], int d); // Set divisor for repeated division
134+
int dividefixedi32(const int buffer[2], int x); // Fast division with previously set divisor
135+
void setdivisoru32(uint32_t buffer[2], uint32_t d); // Set divisor for repeated division
136+
uint32_t dividefixedu32(const uint32_t buffer[2], uint32_t x); // Fast division with previously set divisor
137+
138+
// Test if emmintrin.h is included and __m128i defined
139+
#if defined(__GNUC__) && defined(_EMMINTRIN_H_INCLUDED) && !defined(__SSE2__)
140+
#error Please compile with -sse2 or higher
141+
#endif
142+
143+
#if defined(_INCLUDED_EMM) || (defined(_EMMINTRIN_H_INCLUDED) && defined(__SSE2__))
144+
#define VECTORDIVISIONDEFINED
145+
146+
// Integer vector division functions. These functions divide an integer vector by a scalar:
147+
148+
// Set divisor for repeated integer vector division
149+
void setdivisorV8i16(__m128i buf[2], int16_t d); // Set divisor for repeated division
150+
void setdivisorV8u16(__m128i buf[2], uint16_t d); // Set divisor for repeated division
151+
void setdivisorV4i32(__m128i buf[2], int32_t d); // Set divisor for repeated division
152+
void setdivisorV4u32(__m128i buf[2], uint32_t d); // Set divisor for repeated division
153+
154+
// Fast division of vector by previously set divisor
155+
__m128i dividefixedV8i16(const __m128i buf[2], __m128i x); // Fast division with previously set divisor
156+
__m128i dividefixedV8u16(const __m128i buf[2], __m128i x); // Fast division with previously set divisor
157+
__m128i dividefixedV4i32(const __m128i buf[2], __m128i x); // Fast division with previously set divisor
158+
__m128i dividefixedV4u32(const __m128i buf[2], __m128i x); // Fast division with previously set divisor
159+
160+
#endif // defined(_INCLUDED_EMM) || (defined(_EMMINTRIN_H_INCLUDED) && defined(__SSE2__))
161+
162+
#ifdef __cplusplus
163+
} // end of extern "C"
164+
#endif // __cplusplus
165+
166+
#ifdef __cplusplus
167+
168+
// Define classes and operator '/' for fast division with fixed divisor
169+
class div_i32;
170+
class div_u32;
171+
static inline int32_t operator / (int32_t x, div_i32 const &D);
172+
static inline uint32_t operator / (uint32_t x, div_u32 const & D);
173+
174+
class div_i32 { // Signed 32 bit integer division
175+
public:
176+
div_i32() { // Default constructor
177+
buffer[0] = buffer[1] = 0;
178+
}
179+
div_i32(int d) { // Constructor with divisor
180+
setdivisor(d);
181+
}
182+
void setdivisor(int d) { // Set divisor
183+
setdivisori32(buffer, d);
184+
}
185+
protected:
186+
int buffer[2]; // Internal memory
187+
friend int32_t operator / (int32_t x, div_i32 const & D);
188+
};
189+
190+
static inline int32_t operator / (int32_t x, div_i32 const &D){// Overloaded operator '/'
191+
return dividefixedi32(D.buffer, x);
192+
}
193+
194+
static inline int32_t operator /= (int32_t &x, div_i32 const &D){// Overloaded operator '/='
195+
return x = x / D;
196+
}
197+
198+
class div_u32 { // Unsigned 32 bit integer division
199+
public:
200+
div_u32() { // Default constructor
201+
buffer[0] = buffer[1] = 0;
202+
}
203+
div_u32(uint32_t d) { // Constructor with divisor
204+
setdivisor(d);
205+
}
206+
void setdivisor(uint32_t d) { // Set divisor
207+
setdivisoru32(buffer, d);
208+
}
209+
protected:
210+
uint32_t buffer[2]; // Internal memory
211+
friend uint32_t operator / (uint32_t x, div_u32 const & D);
212+
};
213+
214+
static inline uint32_t operator / (uint32_t x, div_u32 const & D){ // Overloaded operator '/'
215+
return dividefixedu32(D.buffer, x);
216+
}
217+
218+
static inline uint32_t operator /= (uint32_t &x, div_u32 const &D){// Overloaded operator '/='
219+
return x = x / D;
220+
}
221+
222+
#endif // __cplusplus
223+
224+
#endif // ASMLIB_H

0 commit comments

Comments
 (0)