diff --git a/.gitignore b/.gitignore index 850bcc7..fe63609 100644 --- a/.gitignore +++ b/.gitignore @@ -262,3 +262,23 @@ paket-files/ /TEST/FastNoiseSIMD Preview /cellModel.skp /cellModel.skb + +# CMake +CMakeLists.txt.user +CMakeCache.txt +CMakeFiles +CMakeScripts +Testing +Makefile +cmake_install.cmake +install_manifest.txt +compile_commands.json +CTestTestfile.cmake +_deps +[Bb]uild + +# VSCode +.vscode/ + +# FastNoiseSIMD +FastNoiseSIMD_config.h diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..c4fd581 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,30 @@ +sudo: required + +# Enable C++ support +language: cpp + +# Use Linux by default +os: linux + +# Use Ubuntu 18.04 for GCC 7.4.0 +dist: bionic + +# Compiler selection +compiler: gcc + +install: + - DEPS_DIR="${TRAVIS_BUILD_DIR}/deps" + - mkdir -p ${DEPS_DIR} && cd ${DEPS_DIR} + - | + CMAKE_URL="https://cmake.org/files/v3.16/cmake-3.16.2-Linux-x86_64.tar.gz" + mkdir cmake && travis_retry wget --no-check-certificate --quiet -O - ${CMAKE_URL} | tar --strip-components=1 -xz -C cmake + export PATH=${DEPS_DIR}/cmake/bin:${PATH} + - ../install-catch2.sh + +# Build steps +script: + - cd ${TRAVIS_BUILD_DIR} + - mkdir build + - cd build + - cmake -DFN_COMPILE_AVX2=ON -DFN_COMPILE_AVX512=OFF -DBUILD_TESTING=ON .. && make + - ctest diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..671928a --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,158 @@ +cmake_minimum_required(VERSION 3.10) + +# detect if FastNoiseSIMD is being bundled, +# disable testsuite in that case +if(NOT DEFINED PROJECT_NAME) + set(NOT_SUBPROJECT ON) +endif() + +project(FastNoiseSIMD + VERSION 0.7.0 + DESCRIPTION "C++ SIMD Noise Library" + LANGUAGES CXX +) + +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) + +if(BUILD_TESTING) + # Catch2 needed for testing + find_package(Catch2 CONFIG REQUIRED) +endif() + +# Do stuff depending on the compiler +option(FN_SET_c0rp3n_CXX_FLAGS + "Set CMAKE_CXX_FLAGS[_DEBUG][_RELEASE] defaults from the c0rp3n/fastnoise-simd fork." + OFF) +if(FN_SET_c0rp3n_CXX_FLAGS) +if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(CMAKE_CXX_FLAGS "-W -Wall -Wextra -Wpedantic -Wunused-value -Wold-style-cast") + set(CMAKE_CXX_FLAGS_DEBUG "-g -O0") + set(CMAKE_CXX_FLAGS_RELEASE "-O3") +elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC") + set(CMAKE_CXX_FLAGS "/W4") + set(CMAKE_CXX_FLAGS_DEBUG "/O0 /ZI") + set(CMAKE_CXX_FLAGS_RELEASE "/O2 /Ob2") +endif() +endif() + +set_property(DIRECTORY ${PROJECT_SOURCE_DIR} PROPERTY VS_STARTUP_PROJECT FastNoiseSIMD) + +if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm" OR CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") + option(FN_COMPILE_NEON + "Only on arm or aarch64." + ON) +endif() + +option(FN_COMPILE_SSE2 "" ON) + +option(FN_COMPILE_SSE41 "" ON) + +option( + FN_COMPILE_AVX2 + "This does not break support for pre AVX CPUs, AVX code is only run if \ + support is detected." + OFF +) + +option(FN_COMPILE_AVX512 "Only the latest compilers will support this." OFF) + +option( + FN_USE_FMA + "Using FMA instructions with AVX(51)2/NEON provides a small performance \ + increase but can cause minute variations in noise output compared to other \ + SIMD levels due to higher calculation precision." + ON +) + +option( + FN_ALIGNED_SETS + "Using aligned sets of memory for float arrays allows faster storing of \ + SIMD data." + ON +) + +configure_file( + ${CMAKE_CURRENT_LIST_DIR}/cmake/FastNoiseSIMD_config.h.in + ${CMAKE_CURRENT_LIST_DIR}/include/FastNoiseSIMD/FastNoiseSIMD_config.h + @ONLY +) + +add_library(FastNoiseSIMD STATIC + src/FastNoiseSIMD.cpp + src/FastNoiseSIMD_avx2.cpp + src/FastNoiseSIMD_avx512.cpp + src/FastNoiseSIMD_internal.cpp + src/FastNoiseSIMD_neon.cpp + src/FastNoiseSIMD_sse2.cpp + src/FastNoiseSIMD_sse41.cpp +) + +set_target_properties(FastNoiseSIMD + PROPERTIES + CXX_STANDARD 11 + CXX_STANDARD_REQUIRED ON + CMAKE_DEBUG_POSTFIX "d" +) + +target_include_directories(FastNoiseSIMD PUBLIC + $ + $ +) + +set(FN_CXX_FLAGS) +if(${FN_COMPILE_AVX512}) + if(${MSVC}) + list(APPEND FN_CXX_FLAGS "/arch:AVX512") + else() + list(APPEND FN_CXX_FLAGS "-march=skylake-avx512") + endif() +elseif(${FN_COMPILE_AVX2}) + if(${MSVC}) + list(APPEND FN_CXX_FLAGS "/arch:AVX2") + else() + list(APPEND FN_CXX_FLAGS "-march=core-avx2") + endif() +endif() + +target_compile_options(FastNoiseSIMD PRIVATE "${FN_CXX_FLAGS}") + +if(BUILD_TESTING) + include(test/tests.cmake) +endif() + +# Only perform the installation steps when not being used as +# a subproject via `add_subdirectory`, or the destinations will break +if(NOT_SUBPROJECT) + set(FN_CMAKE_CONFIG_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/FastNoiseSIMD") + + configure_package_config_file( + ${CMAKE_CURRENT_LIST_DIR}/cmake/FastNoiseSIMDConfig.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/FastNoiseSIMDConfig.cmake + INSTALL_DESTINATION ${FN_CMAKE_CONFIG_DESTINATION} + ) + + # create and install an export set for FastNoiseSIMD target as FastNoiseSIMD + install( + TARGETS FastNoiseSIMD EXPORT FastNoiseSIMDTargets + DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + + + install(EXPORT FastNoiseSIMDTargets DESTINATION ${FN_CMAKE_CONFIG_DESTINATION}) + + write_basic_package_version_file( + "${CMAKE_CURRENT_BINARY_DIR}/FastNoiseSIMDConfigVersion.cmake" + COMPATIBILITY SameMajorVersion + ) + + install(TARGETS FastNoiseSIMD LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}") + install(DIRECTORY "include/" DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") + + install( + FILES + "${CMAKE_CURRENT_BINARY_DIR}/FastNoiseSIMDConfig.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/FastNoiseSIMDConfigVersion.cmake" + DESTINATION ${FN_CMAKE_CONFIG_DESTINATION} + ) +endif() diff --git a/README.md b/README.md index 299cc23..1c61780 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,20 @@ -# FastNoise SIMD -FastNoise SIMD is the SIMD implementation of my noise library [FastNoise](https://github.com/Auburns/FastNoise). It aims to provide faster performance through the use of intrinsic(SIMD) CPU functions. Vectorisation of the code allows noise functions to process data in sets of 4/8/16 increasing performance by 700% in some cases (Simplex). - -After releasing FastNoise I got in contact with the author of [FastNoise SIMD](https://github.com/jackmott/FastNoise-SIMD) (naming is coincidence) and was inspired to work with SIMD functions myself. Through his code and discussions with him I created my implementation with even more optimisation thanks to the removal of lookup tables. +

+ FastNoise SIMD +

+

+ + Travis Build Status + + + Appveyor Build Status + +
+ FastNoise SIMD is the SIMD implementation of FastNoise. +

+ +FastNoise SIMD aims to provide faster performance through the use of intrinsic(SIMD) CPU functions. Vectorisation of the code allows noise functions to process data in sets of 4/8/16 increasing performance by 700% in some cases (Simplex). + +Inspired by [FastNoise SIMD](https://github.com/jackmott/FastNoise-SIMD) (naming is coincidence). FastNoise SIMD was created with even more optimisation thanks to the removal of lookup tables. Runtime detection of highest supported instruction set ensures the fastest possible performance with only 1 compile needed. If no support is found it will fallback to standard types (float/int). @@ -66,7 +79,7 @@ Timings below are x1000 ns to generate 32x32x32 points of noise on a single thre | Cellular | 851 | 1283 | 2679 | 2959 | 2979 | 58125 | | Cubic | 615 | 952 | 1970 | 3516 | 2979 | | -Comparision of fractals and sampling performance [here](https://github.com/Auburns/FastNoiseSIMD/wiki/In-depth-SIMD-level). +Comparison of fractals and sampling performance [here](https://github.com/Auburns/FastNoiseSIMD/wiki/In-depth-SIMD-level). # Examples ### Cellular Noise diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 0000000..6b50cd6 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,25 @@ +version: 1.0.{build} +image: Visual Studio 2019 +configuration: Debug +clone_depth: 50 +platform: x64 + +cache: + - c:\tools\vcpkg\installed + +install: + - vcpkg install Catch2:x64-windows + - vcpkg integrate install + +before_build: + - mkdir build + - cd build + - cmake -DFN_COMPILE_AVX2=ON -DFN_COMPILE_AVX512=OFF -DBUILD_TESTING=ON -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake -DVCPKG_TARGET_TRIPLET=x64-windows -G "Visual Studio 16 2019" .. + +build: + project: build/FastNoiseSIMD.sln + parallel: true + verbosity: minimal + +test_script: + - ctest diff --git a/cmake/FastNoiseSIMD.pc.in b/cmake/FastNoiseSIMD.pc.in new file mode 100644 index 0000000..c45a3d5 --- /dev/null +++ b/cmake/FastNoiseSIMD.pc.in @@ -0,0 +1,7 @@ +includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ + +Name: FastNoiseSIMD +Description: +URL: https://github.com/c0rp3n/fastnoise-simd +Version: @FastNoiseSIMD_VERSION@ +Cflags: -I${includedir} \ No newline at end of file diff --git a/cmake/FastNoiseSIMDConfig.cmake.in b/cmake/FastNoiseSIMDConfig.cmake.in new file mode 100644 index 0000000..45ffb2a --- /dev/null +++ b/cmake/FastNoiseSIMDConfig.cmake.in @@ -0,0 +1,6 @@ +@PACKAGE_INIT@ + +# Provide path for scripts +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}") + +include(${CMAKE_CURRENT_LIST_DIR}/FastNoiseSIMDTargets.cmake) \ No newline at end of file diff --git a/cmake/FastNoiseSIMD_config.h.in b/cmake/FastNoiseSIMD_config.h.in new file mode 100644 index 0000000..a2c5b3d --- /dev/null +++ b/cmake/FastNoiseSIMD_config.h.in @@ -0,0 +1,33 @@ +#ifndef FASTNOISE_SIMD_CONFIG_H +#define FASTNOISE_SIMD_CONFIG_H + +#pragma once + +#if defined(__arm__) || defined(__aarch64__) +#define FN_ARM +//#define FN_IOS +#cmakedefine FN_COMPILE_NEON +#else + +// Comment out lines to not compile for certain instruction sets +#cmakedefine FN_COMPILE_SSE2 +#cmakedefine FN_COMPILE_SSE41 + +// To compile AVX2 set C++ code generation to use /arch:AVX(2) on FastNoiseSIMD_avx2.cpp +// Note: This does not break support for pre AVX CPUs, AVX code is only run if support is detected +#cmakedefine FN_COMPILE_AVX2 + +// Only the latest compilers will support this +#cmakedefine FN_COMPILE_AVX512 + +// Using FMA instructions with AVX(51)2/NEON provides a small performance increase but can cause +// minute variations in noise output compared to other SIMD levels due to higher calculation precision +// Intel compiler will always generate FMA instructions, use /Qfma- or -no-fma to disable +#cmakedefine FN_USE_FMA +#endif + +// Using aligned sets of memory for float arrays allows faster storing of SIMD data +// Comment out to allow unaligned float arrays to be used as sets +#cmakedefine FN_ALIGNED_SETS + +#endif diff --git a/FastNoiseSIMD/ARM/cpu-features.h b/include/FastNoiseSIMD/ARM/cpu-features.h similarity index 100% rename from FastNoiseSIMD/ARM/cpu-features.h rename to include/FastNoiseSIMD/ARM/cpu-features.h diff --git a/FastNoiseSIMD/FastNoiseSIMD.h b/include/FastNoiseSIMD/FastNoiseSIMD.h similarity index 93% rename from FastNoiseSIMD/FastNoiseSIMD.h rename to include/FastNoiseSIMD/FastNoiseSIMD.h index 22a3945..89141f0 100644 --- a/FastNoiseSIMD/FastNoiseSIMD.h +++ b/include/FastNoiseSIMD/FastNoiseSIMD.h @@ -31,32 +31,7 @@ #ifndef FASTNOISE_SIMD_H #define FASTNOISE_SIMD_H -#if defined(__arm__) || defined(__aarch64__) -#define FN_ARM -//#define FN_IOS -#define FN_COMPILE_NEON -#else - -// Comment out lines to not compile for certain instruction sets -#define FN_COMPILE_SSE2 -#define FN_COMPILE_SSE41 - -// To compile AVX2 set C++ code generation to use /arch:AVX(2) on FastNoiseSIMD_avx2.cpp -// Note: This does not break support for pre AVX CPUs, AVX code is only run if support is detected -#define FN_COMPILE_AVX2 - -// Only the latest compilers will support this -//#define FN_COMPILE_AVX512 - -// Using FMA instructions with AVX(51)2/NEON provides a small performance increase but can cause -// minute variations in noise output compared to other SIMD levels due to higher calculation precision -// Intel compiler will always generate FMA instructions, use /Qfma- or -no-fma to disable -#define FN_USE_FMA -#endif - -// Using aligned sets of memory for float arrays allows faster storing of SIMD data -// Comment out to allow unaligned float arrays to be used as sets -#define FN_ALIGNED_SETS +#include "FastNoiseSIMD_config.h" // SSE2/NEON support is guaranteed on 64bit CPUs so no fallback is needed #if !(defined(_WIN64) || defined(__x86_64__) || defined(__ppc64__) || defined(__aarch64__) || defined(FN_IOS)) || defined(_DEBUG) @@ -102,7 +77,7 @@ class FastNoiseSIMD { public: - enum NoiseType { Value, ValueFractal, Perlin, PerlinFractal, Simplex, SimplexFractal, WhiteNoise, Cellular, Cubic, CubicFractal }; + enum NoiseType { Value, ValueFractal, Perlin, PerlinFractal, Simplex, SimplexFractal, OpenSimplex2, OpenSimplex2Fractal, WhiteNoise, Cellular, Cubic, CubicFractal }; enum FractalType { FBM, Billow, RigidMulti }; enum PerturbType { None, Gradient, GradientFractal, Normalise, Gradient_Normalise, GradientFractal_Normalise }; @@ -279,6 +254,13 @@ class FastNoiseSIMD virtual void FillSimplexSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) = 0; virtual void FillSimplexFractalSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) = 0; + float* GetOpenSimplex2Set(int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f); + float* GetOpenSimplex2FractalSet(int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f); + virtual void FillOpenSimplex2Set(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) = 0; + virtual void FillOpenSimplex2FractalSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) = 0; + virtual void FillOpenSimplex2Set(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) = 0; + virtual void FillOpenSimplex2FractalSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) = 0; + float* GetCellularSet(int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f); virtual void FillCellularSet(float* noiseSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) = 0; virtual void FillCellularSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) = 0; diff --git a/install-catch2.sh b/install-catch2.sh new file mode 100755 index 0000000..960b6ac --- /dev/null +++ b/install-catch2.sh @@ -0,0 +1,6 @@ +#!/bin/sh +git clone https://github.com/catchorg/Catch2.git +mkdir Catch2/build +cd Catch2/build +cmake -DBUILD_TESTING=OFF .. +make && sudo make install diff --git a/FastNoiseSIMD/ARM/cpu-features.c b/src/ARM/cpu-features.c similarity index 100% rename from FastNoiseSIMD/ARM/cpu-features.c rename to src/ARM/cpu-features.c diff --git a/FastNoiseSIMD/FastNoiseSIMD.cpp b/src/FastNoiseSIMD.cpp similarity index 95% rename from FastNoiseSIMD/FastNoiseSIMD.cpp rename to src/FastNoiseSIMD.cpp index cbac32a..4825f0b 100644 --- a/FastNoiseSIMD/FastNoiseSIMD.cpp +++ b/src/FastNoiseSIMD.cpp @@ -26,7 +26,7 @@ // off every 'zix'.) // -#include "FastNoiseSIMD.h" +#include "FastNoiseSIMD/FastNoiseSIMD.h" #include #include #include @@ -67,7 +67,7 @@ #include #elif defined(FN_ARM) #if !defined(__aarch64__) && !defined(FN_IOS) -#include "ARM/cpu-features.h" +#include "FastNoiseSIMD/ARM/cpu-features.h" #endif #else #include @@ -422,6 +422,12 @@ void FastNoiseSIMD::FillNoiseSet(float* noiseSet, int xStart, int yStart, int zS case SimplexFractal: FillSimplexFractalSet(noiseSet, xStart, yStart, zStart, xSize, ySize, zSize, scaleModifier); break; + case OpenSimplex2: + FillOpenSimplex2Set(noiseSet, xStart, yStart, zStart, xSize, ySize, zSize, scaleModifier); + break; + case OpenSimplex2Fractal: + FillOpenSimplex2FractalSet(noiseSet, xStart, yStart, zStart, xSize, ySize, zSize, scaleModifier); + break; case WhiteNoise: FillWhiteNoiseSet(noiseSet, xStart, yStart, zStart, xSize, ySize, zSize, scaleModifier); break; @@ -461,6 +467,12 @@ void FastNoiseSIMD::FillNoiseSet(float* noiseSet, FastNoiseVectorSet* vectorSet, case SimplexFractal: FillSimplexFractalSet(noiseSet, vectorSet, xOffset, yOffset, zOffset); break; + case OpenSimplex2: + FillOpenSimplex2Set(noiseSet, vectorSet, xOffset, yOffset, zOffset); + break; + case OpenSimplex2Fractal: + FillOpenSimplex2FractalSet(noiseSet, vectorSet, xOffset, yOffset, zOffset); + break; case WhiteNoise: FillWhiteNoiseSet(noiseSet, vectorSet, xOffset, yOffset, zOffset); break; @@ -508,6 +520,9 @@ GET_SET(PerlinFractal) GET_SET(Simplex) GET_SET(SimplexFractal) +GET_SET(OpenSimplex2) +GET_SET(OpenSimplex2Fractal) + GET_SET(Cellular) GET_SET(Cubic) diff --git a/FastNoiseSIMD/FastNoiseSIMD_avx2.cpp b/src/FastNoiseSIMD_avx2.cpp similarity index 97% rename from FastNoiseSIMD/FastNoiseSIMD_avx2.cpp rename to src/FastNoiseSIMD_avx2.cpp index e8b4d09..2e68e3a 100644 --- a/FastNoiseSIMD/FastNoiseSIMD_avx2.cpp +++ b/src/FastNoiseSIMD_avx2.cpp @@ -26,7 +26,7 @@ // off every 'zix'.) // -#include "FastNoiseSIMD.h" +#include "FastNoiseSIMD/FastNoiseSIMD.h" // DISABLE WHOLE PROGRAM OPTIMIZATION for this file when using MSVC diff --git a/FastNoiseSIMD/FastNoiseSIMD_avx512.cpp b/src/FastNoiseSIMD_avx512.cpp similarity index 98% rename from FastNoiseSIMD/FastNoiseSIMD_avx512.cpp rename to src/FastNoiseSIMD_avx512.cpp index 78a00a7..2d52b29 100644 --- a/FastNoiseSIMD/FastNoiseSIMD_avx512.cpp +++ b/src/FastNoiseSIMD_avx512.cpp @@ -26,7 +26,7 @@ // off every 'zix'.) // -#include "FastNoiseSIMD.h" +#include "FastNoiseSIMD/FastNoiseSIMD.h" // DISABLE WHOLE PROGRAM OPTIMIZATION for this file when using MSVC diff --git a/FastNoiseSIMD/FastNoiseSIMD_internal.cpp b/src/FastNoiseSIMD_internal.cpp similarity index 96% rename from FastNoiseSIMD/FastNoiseSIMD_internal.cpp rename to src/FastNoiseSIMD_internal.cpp index 5f850bc..6453773 100644 --- a/FastNoiseSIMD/FastNoiseSIMD_internal.cpp +++ b/src/FastNoiseSIMD_internal.cpp @@ -26,7 +26,7 @@ // off every 'zix'.) // -#include "FastNoiseSIMD.h" +#include "FastNoiseSIMD/FastNoiseSIMD.h" #include #if defined(SIMD_LEVEL) || defined(FN_COMPILE_NO_SIMD_FALLBACK) @@ -461,6 +461,7 @@ static float FUNC(INV_SQRT)(float x) #define SIMDf_XOR(a,b) SIMDf_CAST_TO_FLOAT(SIMDi_CAST_TO_INT(a) ^ SIMDi_CAST_TO_INT(b)) #define SIMDf_FLOOR(a) floorf(a) +#define SIMDf_ROUND(a) roundf(a) #define SIMDf_ABS(a) fabsf(a) #define SIMDf_BLENDV(a,b,mask) (mask ? (b) : (a)) #define SIMDf_GATHER(p,a) (*(reinterpret_cast(p)+(a))) @@ -566,15 +567,18 @@ static SIMDf SIMDf_NUM(10); static SIMDf SIMDf_NUM(15); static SIMDf SIMDf_NUM(32); static SIMDf SIMDf_NUM(999999); +static SIMDf SIMDf_NUM(_1); static SIMDf SIMDf_NUM(0_5); static SIMDf SIMDf_NUM(0_6); static SIMDf SIMDf_NUM(15_5); static SIMDf SIMDf_NUM(511_5); +static SIMDf SIMDf_NUM(32768_5); //static SIMDf SIMDf_NUM(cellJitter); static SIMDf SIMDf_NUM(F3); static SIMDf SIMDf_NUM(G3); +static SIMDf SIMDf_NUM(R3); static SIMDf SIMDf_NUM(G33); static SIMDf SIMDf_NUM(hash2Float); static SIMDf SIMDf_NUM(vectorSize); @@ -632,15 +636,18 @@ void FUNC(InitSIMDValues)() SIMDf_NUM(15) = SIMDf_SET(15.0f); SIMDf_NUM(32) = SIMDf_SET(32.0f); SIMDf_NUM(999999) = SIMDf_SET(999999.0f); + SIMDf_NUM(_1) = SIMDf_SET(-1.0f); SIMDf_NUM(0_5) = SIMDf_SET(0.5f); SIMDf_NUM(0_6) = SIMDf_SET(0.6f); SIMDf_NUM(15_5) = SIMDf_SET(15.5f); SIMDf_NUM(511_5) = SIMDf_SET(511.5f); + SIMDf_NUM(32768_5) = SIMDf_SET(32768.5f); //SIMDf_NUM(cellJitter) = SIMDf_SET(0.39614f); SIMDf_NUM(F3) = SIMDf_SET(1.f / 3.f); SIMDf_NUM(G3) = SIMDf_SET(1.f / 6.f); + SIMDf_NUM(R3) = SIMDf_SET(2.f / 3.f); SIMDf_NUM(G33) = SIMDf_SET((3.f / 6.f) - 1.f); SIMDf_NUM(hash2Float) = SIMDf_SET(1.f / 2147483648.f); SIMDf_NUM(vectorSize) = SIMDf_SET(VECTOR_SIZE); @@ -922,6 +929,65 @@ static SIMDf VECTORCALL FUNC(SimplexSingle)(SIMDi seed, SIMDf x, SIMDf y, SIMDf return SIMDf_MUL(SIMDf_NUM(32), SIMDf_MASK_ADD(n0, SIMDf_MASK_ADD(n1, SIMDf_MASK_ADD(n2, v3, v2), v1), v0)); } +static SIMDf VECTORCALL FUNC(OpenSimplex2Single)(SIMDi seed, SIMDf x, SIMDf y, SIMDf z) +{ + SIMDf f = SIMDf_MUL(SIMDf_NUM(R3), SIMDf_ADD(SIMDf_ADD(x, y), z)); + SIMDf xr = SIMDf_SUB(f, x); + SIMDf yr = SIMDf_SUB(f, y); + SIMDf zr = SIMDf_SUB(f, z); + + SIMDf val = SIMDf_NUM(0); + for (int i = 0; i < 2; i++) + { + SIMDf v0xr = SIMDf_FLOOR(SIMDf_ADD(xr, SIMDf_NUM(0_5))); + SIMDf v0yr = SIMDf_FLOOR(SIMDf_ADD(yr, SIMDf_NUM(0_5))); + SIMDf v0zr = SIMDf_FLOOR(SIMDf_ADD(zr, SIMDf_NUM(0_5))); + SIMDf d0xr = SIMDf_SUB(xr, v0xr); + SIMDf d0yr = SIMDf_SUB(yr, v0yr); + SIMDf d0zr = SIMDf_SUB(zr, v0zr); + + SIMDf score0xr = SIMDf_ABS(d0xr); + SIMDf score0yr = SIMDf_ABS(d0yr); + SIMDf score0zr = SIMDf_ABS(d0zr); + MASK dir0xr = SIMDf_LESS_EQUAL(SIMDf_MAX(score0yr, score0zr), score0xr); + MASK dir0yr = SIMDi_AND_NOT(dir0xr, SIMDf_LESS_EQUAL(SIMDf_MAX(score0zr, score0xr), score0yr)); + MASK dir0zr = SIMDi_NOT(SIMDi_OR(dir0xr, dir0yr)); + SIMDf v1xr = SIMDf_ADD(v0xr, SIMDf_BLENDV(SIMDf_NUM(0), SIMDf_BLENDV(SIMDf_NUM(1), SIMDf_NUM(_1), SIMDf_LESS_THAN(d0xr, SIMDf_NUM(0))), dir0xr)); + SIMDf v1yr = SIMDf_ADD(v0yr, SIMDf_BLENDV(SIMDf_NUM(0), SIMDf_BLENDV(SIMDf_NUM(1), SIMDf_NUM(_1), SIMDf_LESS_THAN(d0yr, SIMDf_NUM(0))), dir0yr)); + SIMDf v1zr = SIMDf_ADD(v0zr, SIMDf_BLENDV(SIMDf_NUM(0), SIMDf_BLENDV(SIMDf_NUM(1), SIMDf_NUM(_1), SIMDf_LESS_THAN(d0zr, SIMDf_NUM(0))), dir0zr)); + SIMDf d1xr = SIMDf_SUB(xr, v1xr); + SIMDf d1yr = SIMDf_SUB(yr, v1yr); + SIMDf d1zr = SIMDf_SUB(zr, v1zr); + + SIMDi hv0xr = SIMDi_MUL(SIMDi_CONVERT_TO_INT(v0xr), SIMDi_NUM(xPrime)); + SIMDi hv0yr = SIMDi_MUL(SIMDi_CONVERT_TO_INT(v0yr), SIMDi_NUM(yPrime)); + SIMDi hv0zr = SIMDi_MUL(SIMDi_CONVERT_TO_INT(v0zr), SIMDi_NUM(zPrime)); + SIMDi hv1xr = SIMDi_MUL(SIMDi_CONVERT_TO_INT(v1xr), SIMDi_NUM(xPrime)); + SIMDi hv1yr = SIMDi_MUL(SIMDi_CONVERT_TO_INT(v1yr), SIMDi_NUM(yPrime)); + SIMDi hv1zr = SIMDi_MUL(SIMDi_CONVERT_TO_INT(v1zr), SIMDi_NUM(zPrime)); + + SIMDf t0 = SIMDf_NMUL_ADD(d0zr, d0zr, SIMDf_NMUL_ADD(d0yr, d0yr, SIMDf_NMUL_ADD(d0xr, d0xr, SIMDf_NUM(0_6)))); + SIMDf t1 = SIMDf_NMUL_ADD(d1zr, d1zr, SIMDf_NMUL_ADD(d1yr, d1yr, SIMDf_NMUL_ADD(d1xr, d1xr, SIMDf_NUM(0_6)))); + MASK n0 = SIMDf_GREATER_THAN(t0, SIMDf_NUM(0)); + MASK n1 = SIMDf_GREATER_THAN(t1, SIMDf_NUM(0)); + t0 = SIMDf_MUL(t0, t0); + t1 = SIMDf_MUL(t1, t1); + + SIMDf v0 = SIMDf_MUL(SIMDf_MUL(t0, t0), FUNC(GradCoord)(seed, hv0xr, hv0yr, hv0zr, d0xr, d0yr, d0zr)); + SIMDf v1 = SIMDf_MUL(SIMDf_MUL(t1, t1), FUNC(GradCoord)(seed, hv1xr, hv1yr, hv1zr, d1xr, d1yr, d1zr)); + + val = SIMDf_MASK_ADD(n0, SIMDf_MASK_ADD(n1, val, v1), v0); + + if (i == 0) { + xr = SIMDf_ADD(xr, SIMDf_NUM(32768_5)); + yr = SIMDf_ADD(yr, SIMDf_NUM(32768_5)); + zr = SIMDf_ADD(zr, SIMDf_NUM(32768_5)); + } + } + + return SIMDf_MUL(SIMDf_NUM(32), val); +} + static SIMDf VECTORCALL FUNC(CubicSingle)(SIMDi seed, SIMDf x, SIMDf y, SIMDf z) { SIMDf xf1 = SIMDf_FLOOR(x); @@ -1376,6 +1442,9 @@ FILL_FRACTAL_SET(Perlin) FILL_SET(Simplex) FILL_FRACTAL_SET(Simplex) +FILL_SET(OpenSimplex2) +FILL_FRACTAL_SET(OpenSimplex2) + //FILL_SET(WhiteNoise) FILL_SET(Cubic) @@ -1491,6 +1560,9 @@ void SIMD_LEVEL_CLASS::Fill##func##FractalSet(float* noiseSet, FastNoiseVectorSe FILL_VECTOR_SET(Simplex) FILL_FRACTAL_VECTOR_SET(Simplex) + FILL_VECTOR_SET(OpenSimplex2) + FILL_FRACTAL_VECTOR_SET(OpenSimplex2) + FILL_VECTOR_SET(WhiteNoise) FILL_VECTOR_SET(Cubic) @@ -1765,6 +1837,12 @@ static SIMDf VECTORCALL FUNC(CellularLookup##distanceFunc##Single)(SIMDi seedV, case FastNoiseSIMD::SimplexFractal:\ CELLULAR_LOOKUP_FRACTAL_VALUE(Simplex);\ break; \ + case FastNoiseSIMD::OpenSimplex2:\ + result = FUNC(OpenSimplex2Single)(seedV, xF, yF, zF); \ + break;\ + case FastNoiseSIMD::OpenSimplex2Fractal:\ + CELLULAR_LOOKUP_FRACTAL_VALUE(OpenSimplex2);\ + break; \ case FastNoiseSIMD::Cubic:\ result = FUNC(CubicSingle)(seedV, xF, yF, zF); \ break;\ diff --git a/FastNoiseSIMD/FastNoiseSIMD_internal.h b/src/FastNoiseSIMD_internal.h similarity index 89% rename from FastNoiseSIMD/FastNoiseSIMD_internal.h rename to src/FastNoiseSIMD_internal.h index a20e02d..ba37c1f 100644 --- a/FastNoiseSIMD/FastNoiseSIMD_internal.h +++ b/src/FastNoiseSIMD_internal.h @@ -64,6 +64,11 @@ namespace FastNoiseSIMD_internal void FillSimplexSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) override; void FillSimplexFractalSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) override; + void FillOpenSimplex2Set(float* floatSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) override; + void FillOpenSimplex2FractalSet(float* floatSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) override; + void FillOpenSimplex2Set(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) override; + void FillOpenSimplex2FractalSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) override; + void FillCellularSet(float* floatSet, int xStart, int yStart, int zStart, int xSize, int ySize, int zSize, float scaleModifier = 1.0f) override; void FillCellularSet(float* noiseSet, FastNoiseVectorSet* vectorSet, float xOffset = 0.0f, float yOffset = 0.0f, float zOffset = 0.0f) override; diff --git a/FastNoiseSIMD/FastNoiseSIMD_neon.cpp b/src/FastNoiseSIMD_neon.cpp similarity index 97% rename from FastNoiseSIMD/FastNoiseSIMD_neon.cpp rename to src/FastNoiseSIMD_neon.cpp index cd44b61..f11fdca 100644 --- a/FastNoiseSIMD/FastNoiseSIMD_neon.cpp +++ b/src/FastNoiseSIMD_neon.cpp @@ -26,7 +26,7 @@ // off every 'zix'.) // -#include "FastNoiseSIMD.h" +#include "FastNoiseSIMD/FastNoiseSIMD.h" #ifdef FN_COMPILE_NEON #define SIMD_LEVEL_H FN_NEON diff --git a/FastNoiseSIMD/FastNoiseSIMD_sse2.cpp b/src/FastNoiseSIMD_sse2.cpp similarity index 97% rename from FastNoiseSIMD/FastNoiseSIMD_sse2.cpp rename to src/FastNoiseSIMD_sse2.cpp index 21ac5a0..c366674 100644 --- a/FastNoiseSIMD/FastNoiseSIMD_sse2.cpp +++ b/src/FastNoiseSIMD_sse2.cpp @@ -26,7 +26,7 @@ // off every 'zix'.) // -#include "FastNoiseSIMD.h" +#include "FastNoiseSIMD/FastNoiseSIMD.h" // DISABLE WHOLE PROGRAM OPTIMIZATION for this file when using MSVC diff --git a/FastNoiseSIMD/FastNoiseSIMD_sse41.cpp b/src/FastNoiseSIMD_sse41.cpp similarity index 97% rename from FastNoiseSIMD/FastNoiseSIMD_sse41.cpp rename to src/FastNoiseSIMD_sse41.cpp index a61c22c..0cf7133 100644 --- a/FastNoiseSIMD/FastNoiseSIMD_sse41.cpp +++ b/src/FastNoiseSIMD_sse41.cpp @@ -26,7 +26,7 @@ // off every 'zix'.) // -#include "FastNoiseSIMD.h" +#include "FastNoiseSIMD/FastNoiseSIMD.h" // DISABLE WHOLE PROGRAM OPTIMIZATION for this file when using MSVC diff --git a/test/main.cpp b/test/main.cpp new file mode 100644 index 0000000..0fac309 --- /dev/null +++ b/test/main.cpp @@ -0,0 +1,2 @@ +#define CATCH_CONFIG_MAIN // This tells Catch to provide a main() +#include diff --git a/test/simplex_noise.cpp b/test/simplex_noise.cpp new file mode 100644 index 0000000..984a278 --- /dev/null +++ b/test/simplex_noise.cpp @@ -0,0 +1,19 @@ +#include + +#include "FastNoiseSIMD/FastNoiseSIMD.h" + +TEST_CASE("simplex", "[FastNoiseSIMD]") +{ + FastNoiseSIMD* noise = FastNoiseSIMD::NewFastNoiseSIMD(); + + int x = 16; + int y = 16; + int z = 16; + int x_size = 8; + int y_size = 8; + int z_size = 8; + float* simplex_set = noise->GetSimplexSet(x, y, z, x_size, y_size, z_size); + + noise->FreeNoiseSet(simplex_set); + delete noise; +} diff --git a/test/tests.cmake b/test/tests.cmake new file mode 100644 index 0000000..8fc2ad7 --- /dev/null +++ b/test/tests.cmake @@ -0,0 +1,13 @@ +add_executable(FastNoiseSIMD_tests + test/simplex_noise.cpp + test/main.cpp +) + +target_link_libraries(FastNoiseSIMD_tests + FastNoiseSIMD + Catch2::Catch2 +) + +include(CTest) +include(Catch) +catch_discover_tests(FastNoiseSIMD_tests)