Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

##### Efficient and fastest **Run Length Encoding** library
- ARM NEON support
- RISC-V scalar fallback support
- 100% C (C++ compatible headers), without inline assembly
- Most efficient compression
- No other RLE compress or decompress faster with better compression
Expand Down Expand Up @@ -150,6 +151,7 @@ for more info, see also: [Entropy Coding Benchmark](https://sites.google.com/sit
- Linux amd64: Clang (>=3.2)
- Linux arm64: 64 bits aarch64 ARMv8: gcc (>=6.3)
- Linux arm64: 64 bits aarch64 ARMv8: clang
- Linux riscv64: scalar fallback path with a C99 compiler
- MaxOS: XCode (>=9)
- PowerPC ppc64le (incl. SIMD): gcc (>=8.0)

Expand Down
19 changes: 19 additions & 0 deletions include_/conf.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,24 @@ static ALWAYS_INLINE void stof64( void *cp, double
static ALWAYS_INLINE void ltou32(unsigned *x, const void *cp) { memcpy(x, cp, sizeof(*x)); } // ua read into ptr
static ALWAYS_INLINE void ltou64(unsigned long long *x, const void *cp) { memcpy(x, cp, sizeof(*x)); }

#elif defined(__riscv)
#include <string.h>
#define ctou16(_cp_) ({ unsigned short _x; memcpy(&_x, (_cp_), sizeof(_x)); _x; })
#define ctou32(_cp_) ({ unsigned _x; memcpy(&_x, (_cp_), sizeof(_x)); _x; })
#define ctou64(_cp_) ({ uint64_t _x; memcpy(&_x, (_cp_), sizeof(_x)); _x; })
#define ctof32(_cp_) ({ float _x; memcpy(&_x, (_cp_), sizeof(_x)); _x; })
#define ctof64(_cp_) ({ double _x; memcpy(&_x, (_cp_), sizeof(_x)); _x; })

#define stou8(_cp_, _x_) (*((uint8_t *)(_cp_)) = (_x_))
#define stou16(_cp_, _x_) do { unsigned short _v = (_x_); memcpy((_cp_), &_v, sizeof(_v)); } while(0)
#define stou32(_cp_, _x_) do { unsigned _v = (_x_); memcpy((_cp_), &_v, sizeof(_v)); } while(0)
#define stou64(_cp_, _x_) do { uint64_t _v = (_x_); memcpy((_cp_), &_v, sizeof(_v)); } while(0)
#define stof32(_cp_, _x_) do { float _v = (_x_); memcpy((_cp_), &_v, sizeof(_v)); } while(0)
#define stof64(_cp_, _x_) do { double _v = (_x_); memcpy((_cp_), &_v, sizeof(_v)); } while(0)

#define ltou32(_px_, _cp_) do { memcpy((_px_), (_cp_), sizeof(*(_px_))); } while(0)
#define ltou64(_px_, _cp_) do { memcpy((_px_), (_cp_), sizeof(*(_px_))); } while(0)

#elif defined(__i386__) || defined(__x86_64__) || \
defined(_M_IX86) || defined(_M_AMD64) || _MSC_VER ||\
defined(__powerpc__) || defined(__s390__) ||\
Expand Down Expand Up @@ -251,6 +269,7 @@ struct _PACKED doubleu { double d; };
defined(__x86_64__) || defined(_M_X64) ||\
defined(__ia64) || defined(_M_IA64) ||\
defined(__aarch64__) ||\
(defined(__riscv) && (__riscv_xlen == 64)) ||\
defined(__mips64) ||\
defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) ||\
defined(__s390x__)
Expand Down
10 changes: 8 additions & 2 deletions include_/time_.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
// time_.h : parameter free high precision time/benchmark functions
#include <time.h>
#include <float.h>
#ifdef _WIN32
#if defined(_WIN32) && !defined(__riscv)
#include <windows.h>
#ifndef sleep
#define sleep(n) Sleep((n) * 1000)
Expand Down Expand Up @@ -104,13 +104,19 @@ static int tmiszero(tm_t t) { return !t; }
#define TM_MBS "MB/s"
static double TMBS(unsigned l, double t) { return (l/t)/1000000.0; }

#ifdef _WIN32 //-------- windows
#if defined(_WIN32) && !defined(__riscv) //-------- windows
static LARGE_INTEGER tps;

typedef unsigned __int64 tm_t;
static tm_t tmtime() { LARGE_INTEGER tm; tm_t t; QueryPerformanceCounter(&tm); return tm.QuadPart; }
static tm_t tminit() { tm_t t0,ts; QueryPerformanceFrequency(&tps); t0 = tmtime(); while((ts = tmtime())==t0) {}; return ts; }
static double tmdiff(tm_t start, tm_t stop) { return (double)(stop - start)/tps.QuadPart; }
static int tmiszero(tm_t t) { return !t; }
#elif defined(__riscv)
typedef clock_t tm_t;
static tm_t tmtime() { return clock(); }
static tm_t tminit() { tm_t t0 = tmtime(), t; while((t = tmtime()) == t0) {}; return t; }
static double tmdiff(tm_t start, tm_t stop) { return (double)(stop - start) / CLOCKS_PER_SEC; }
static int tmiszero(tm_t t) { return !t; }
#else // Linux & compatible / MacOS
#ifdef __APPLE__
Expand Down
7 changes: 7 additions & 0 deletions makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ else

ifneq (,$(findstring aarch64,$(CC)))
ARCH = aarch64
else ifneq (,$(findstring riscv64,$(CC)))
ARCH = riscv64
else ifneq (,$(findstring riscv32,$(CC)))
ARCH = riscv32
else ifneq (,$(findstring powerpc64le,$(CC)))
ARCH = ppc64le
endif
Expand All @@ -43,6 +47,9 @@ else
CFLAGS+=-march=armv8-a
endif
MSSE=-march=armv8-a
else ifneq ($(filter riscv%,$(ARCH)),)
MARCH=
MSSE=
else ifeq ($(ARCH),$(filter $(ARCH),x86_64 ppc64le))
CFLAGS=-march=native
MSSE=-mssse3
Expand Down
6 changes: 3 additions & 3 deletions trle_.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@

#define _vlput32(_op_, _x_, _act_) {\
if(likely((_x_) < VL_OFS1)){ *_op_++ = (_x_); _act_;}\
else if ((_x_) < VL_OFS2) { ctou16(_op_) = bswap16((VL_OFS1<<8)+((_x_)-VL_OFS1)); _op_ += 2; _act_;}\
else if ((_x_) < VL_OFS3) { *_op_++ = VL_BA2 + (((_x_) -= VL_OFS2) >> 16); ctou16(_op_) = (_x_); _op_ += 2; _act_;}\
else { unsigned _b = (bsr32((_x_))+7)/8; *_op_++ = VL_BA3 + (_b - 3); ctou32(_op_) = (_x_); _op_ += _b; _act_;}\
else if ((_x_) < VL_OFS2) { stou16(_op_, bswap16((VL_OFS1<<8)+((_x_)-VL_OFS1))); _op_ += 2; _act_;}\
else if ((_x_) < VL_OFS3) { *_op_++ = VL_BA2 + (((_x_) -= VL_OFS2) >> 16); stou16(_op_, (_x_)); _op_ += 2; _act_;}\
else { unsigned _b = (bsr32((_x_))+7)/8; *_op_++ = VL_BA3 + (_b - 3); stou32(_op_, (_x_)); _op_ += _b; _act_;}\
}

#define _vlget32(_ip_, _x_, _act_) do { _x_ = *_ip_++;\
Expand Down
8 changes: 4 additions & 4 deletions trled.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,12 +225,12 @@ unsigned _trled(const unsigned char *__restrict in, unsigned char *__restrict ou
while(op < out+(outlen-32)) {
#if __WORDSIZE == 64
uint64_t z = (uint64_t)rmap[ip[7]]<<56 | (uint64_t)rmap[ip[6]] << 48 | (uint64_t)rmap[ip[5]] << 40 | (uint64_t)rmap[ip[4]] << 32 | (uint32_t)rmap[ip[3]] << 24 | (uint32_t)rmap[ip[2]] << 16| (uint32_t)rmap[ip[1]] << 8| rmap[ip[0]];
ctou64(op) = ctou64(ip); if(z) goto a; ip += 8; op += 8;
stou64(op, ctou64(ip)); if(z) goto a; ip += 8; op += 8;
continue;
a: z = ctz64(z)>>3;
#else
uint32_t z = (uint32_t)rmap[ip[3]] << 24 | (uint32_t)rmap[ip[2]] << 16| (uint32_t)rmap[ip[1]] << 8| rmap[ip[0]];
ctou32(op) = ctou32(ip); if(z) goto a; ip += 4; op += 4;
stou32(op, ctou32(ip)); if(z) goto a; ip += 4; op += 4;
continue;
a: z = ctz32(z)>>3;
#endif
Expand Down Expand Up @@ -310,8 +310,8 @@ unsigned trled(const unsigned char *__restrict in, unsigned inlen, unsigned char
#define rmemset(_op_, _c_, _i_) do { uint64_t _cc; uint8_t *_up = (uint8_t *)_op_; _op_ +=_i_;\
T2(_cset, USIZE)(_cc,_c_);\
do {\
T2(ctou, USIZE)(_up) = _c_; _up += USIZE/8;\
T2(ctou, USIZE)(_up) = _c_; _up += USIZE/8;\
T2(stou, USIZE)(_up, _c_); _up += USIZE/8;\
T2(stou, USIZE)(_up, _c_); _up += USIZE/8;\
} while(_up < (uint8_t *)_op_);\
} while(0)
#endif
Expand Down