Skip to content

Commit 251f075

Browse files
committed
Added VCL SIMD header wrapper for aliasing between simd, tuple and scalar arithmetic
Signed-off-by: Nick Avramoussis <4256455+Idclip@users.noreply.github.com>
1 parent 9a74efe commit 251f075

10 files changed

Lines changed: 882 additions & 52 deletions

File tree

CMakeLists.txt

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,9 @@ required.]=] ${USE_EXR})
131131
option(USE_PNG "Use PNG while building openvdb components." OFF)
132132
option(USE_AX "Use OpenVDB AX while building openvdb components." ${OPENVDB_BUILD_AX})
133133
option(USE_NANOVDB "Use NanoVDB while building openvdb components." ${OPENVDB_BUILD_NANOVDB})
134+
option(USE_VCL [=[
135+
Use the internally supplied copy of Agner Fog's vectorclass (VCL) SIMD wrapper library to instrument OpenVDB tools.
136+
This also requires VCL to be installed along with OpenVDB headers. Only applicable which compiling for x86 targets.]=] OFF)
134137

135138
cmake_dependent_option(OPENVDB_DISABLE_BOOST_IMPLICIT_LINKING
136139
"Disable the implicit linking of Boost libraries on Windows" ON "WIN32" OFF)
@@ -175,7 +178,7 @@ endif()
175178
set(_OPENVDB_SIMD_OPTIONS None SSE42 AVX)
176179
if(NOT OPENVDB_SIMD)
177180
set(OPENVDB_SIMD None CACHE STRING
178-
"Choose whether to enable SIMD compiler flags or not, options are: None SSE42 AVX.
181+
"Choose whether to enable x86 SIMD compiler flags or not, options are: None SSE42 AVX.
179182
Although not required, it is strongly recommended to enable SIMD. AVX implies SSE42.
180183
None is the default." FORCE
181184
)
@@ -404,20 +407,24 @@ if(CONCURRENT_MALLOC STREQUAL "Auto")
404407
endif()
405408
endif()
406409

407-
# Configure SIMD. AVX implies SSE 4.2.
410+
# Configure SIMD. AVX implies SSE 4.2 (for our builds)
411+
# @note You can also provide target specific flags to CXX_FLAGS, however
412+
# OpenVDB may not detect this usage correctly in all places. To ensure
413+
# the library is instrumented fully with your desired ISA, it's best to
414+
# define these macros.
408415

409416
if(OPENVDB_SIMD STREQUAL "AVX")
410417
if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
411418
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:-mavx>")
412419
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:-msse4.2>")
413420
endif()
414-
add_compile_definitions("$<$<COMPILE_LANGUAGE:CXX>:OPENVDB_USE_AVX>")
415-
add_compile_definitions("$<$<COMPILE_LANGUAGE:CXX>:OPENVDB_USE_SSE42>")
421+
set(OPENVDB_USE_AVX ON)
422+
set(OPENVDB_USE_SSE42 ON)
416423
elseif(OPENVDB_SIMD STREQUAL "SSE42")
417424
if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
418425
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:-msse4.2>")
419426
endif()
420-
add_compile_definitions("$<$<COMPILE_LANGUAGE:CXX>:OPENVDB_USE_SSE42>")
427+
set(OPENVDB_USE_SSE42 ON)
421428
endif()
422429

423430
#########################################################################

cmake/OpenVDBUtils.cmake

Lines changed: 99 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,8 @@ function(OPENVDB_GET_VERSION_DEFINE HEADER KEY VALUE)
8383
endfunction()
8484

8585

86-
########################################################################
87-
########################################################################
86+
###############################################################################
87+
###############################################################################
8888

8989

9090
function(OPENVDB_VERSION_FROM_HEADER OPENVDB_VERSION_FILE)
@@ -120,8 +120,8 @@ function(OPENVDB_VERSION_FROM_HEADER OPENVDB_VERSION_FILE)
120120
endfunction()
121121

122122

123-
########################################################################
124-
########################################################################
123+
###############################################################################
124+
###############################################################################
125125

126126

127127
function(OPENVDB_ABI_VERSION_FROM_PRINT OPENVDB_PRINT)
@@ -162,3 +162,98 @@ function(OPENVDB_ABI_VERSION_FROM_PRINT OPENVDB_PRINT)
162162
set(${_VDB_ABI} ${_OpenVDB_ABI} PARENT_SCOPE)
163163
endif()
164164
endfunction()
165+
166+
###############################################################################
167+
## @brief Compute the enumerated ISA value for x86/x86_64 architectures that
168+
## VCL will use during compilation of OpenVDB. The value is stored in the
169+
## VCL_INSTRSET variable. Possible values are:
170+
## 0: Unknown or error
171+
## 2: SSE2
172+
## 3: SSE3
173+
## 4: SSSE3
174+
## 5: SSE4.1
175+
## 6: SSE4.2
176+
## 7: AVX
177+
## 8: AVX2
178+
## 9: AVX512F
179+
## 10: AVX512BW/DQ/VL
180+
function(OPENVDB_COMPUTE_X86_INSTRSET_FROM_VCL VCL_INSTRSET)
181+
# Append project specific flags
182+
set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
183+
if(OPENVDB_SIMD STREQUAL "AVX")
184+
set(CMAKE_REQUIRED_FLAGS "-mavx;-msse4.2")
185+
elseif(OPENVDB_SIMD STREQUAL "SSE42")
186+
set(CMAKE_REQUIRED_FLAGS "-msse4.2")
187+
endif()
188+
189+
# Figure out that the INSTRSET macor would expand to.
190+
# @todo would be much faster with try_run
191+
include(CheckCXXSourceCompiles)
192+
check_cxx_source_compiles([[
193+
#include "${CMAKE_SOURCE_DIR}/ext/vcl/openvdb/ext/vcl/instrset.h"
194+
int main() { static_assert(INSTRSET == 8); return 0; } ]] COMPUTE_VCL_INSTRSET__AVX2__)
195+
if(COMPUTE_VCL_INSTRSET__AVX2__)
196+
set(${VCL_INSTRSET} 8 PARENT_SCOPE)
197+
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
198+
return()
199+
endif()
200+
check_cxx_source_compiles([[
201+
#include "${CMAKE_SOURCE_DIR}/ext/vcl/openvdb/ext/vcl/instrset.h"
202+
int main() { static_assert(INSTRSET == 7); return 0; } ]] COMPUTE_VCL_INSTRSET__AVX__)
203+
if(COMPUTE_VCL_INSTRSET__AVX__)
204+
set(${VCL_INSTRSET} 7 PARENT_SCOPE)
205+
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
206+
return()
207+
endif()
208+
check_cxx_source_compiles([[
209+
#include "${CMAKE_SOURCE_DIR}/ext/vcl/openvdb/ext/vcl/instrset.h"
210+
int main() { static_assert(INSTRSET == 6); return 0; } ]] COMPUTE_VCL_INSTRSET__SSE4_2__)
211+
if(COMPUTE_VCL_INSTRSET__SSE4_2__)
212+
set(${VCL_INSTRSET} 6 PARENT_SCOPE)
213+
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
214+
return()
215+
endif()
216+
check_cxx_source_compiles([[
217+
#include "${CMAKE_SOURCE_DIR}/ext/vcl/openvdb/ext/vcl/instrset.h"
218+
int main() { static_assert(INSTRSET == 5); return 0; } ]] COMPUTE_VCL_INSTRSET__SSE4_1__)
219+
if(COMPUTE_VCL_INSTRSET__SSE4_1__)
220+
set(${VCL_INSTRSET} 5 PARENT_SCOPE)
221+
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
222+
return()
223+
endif()
224+
check_cxx_source_compiles([[
225+
#include "${CMAKE_SOURCE_DIR}/ext/vcl/openvdb/ext/vcl/instrset.h"
226+
int main() { static_assert(INSTRSET == 4); return 0; } ]] COMPUTE_VCL_INSTRSET__SSSE3__)
227+
if(COMPUTE_VCL_INSTRSET__SSSE3__)
228+
set(${VCL_INSTRSET} 4 PARENT_SCOPE)
229+
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
230+
return()
231+
endif()
232+
check_cxx_source_compiles([[
233+
#include "${CMAKE_SOURCE_DIR}/ext/vcl/openvdb/ext/vcl/instrset.h"
234+
int main() { static_assert(INSTRSET == 3); return 0; } ]] COMPUTE_VCL_INSTRSET__SSE3__)
235+
if(COMPUTE_VCL_INSTRSET__SSE3__)
236+
set(${VCL_INSTRSET} 3 PARENT_SCOPE)
237+
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
238+
return()
239+
endif()
240+
check_cxx_source_compiles([[
241+
#include "${CMAKE_SOURCE_DIR}/ext/vcl/openvdb/ext/vcl/instrset.h"
242+
int main() { static_assert(INSTRSET == 2); return 0; } ]] COMPUTE_VCL_INSTRSET__SSE2__)
243+
if(COMPUTE_VCL_INSTRSET__SSE2__)
244+
set(${VCL_INSTRSET} 2 PARENT_SCOPE)
245+
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
246+
return()
247+
endif()
248+
check_cxx_source_compiles([[
249+
#include "${CMAKE_SOURCE_DIR}/ext/vcl/openvdb/ext/vcl/instrset.h"
250+
int main() { static_assert(INSTRSET == 1); return 0; } ]] COMPUTE_VCL_INSTRSET__SSE__)
251+
if(COMPUTE_VCL_INSTRSET__SSE__)
252+
set(${VCL_INSTRSET} 1 PARENT_SCOPE)
253+
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
254+
return()
255+
endif()
256+
# Unknown or error
257+
set(${VCL_INSTRSET} 0 PARENT_SCOPE)
258+
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
259+
endfunction()

doc/dependencies.txt

Lines changed: 38 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ Reference Platform, but for those that do, their specified versions are
3636

3737
Component | Requirements | Optional
3838
----------------------- | ----------------------------------------------- | --------
39-
OpenVDB Core Library | CMake, C++17 compiler, TBB::tbb | Blosc, ZLib, Log4cplus, Imath::Imath, Boost::iostream
39+
OpenVDB Core Library | CMake, C++17 compiler, TBB::tbb | Blosc, ZLib, Log4cplus, Imath::Imath (included), Boost::iostream, VCL (included)
4040
OpenVDB Print | Core Library dependencies | -
4141
OpenVDB LOD | Core Library dependencies | -
4242
OpenVDB Render | Core Library dependencies | OpenEXR, Imath::Imath, libpng
@@ -45,41 +45,51 @@ OpenVDB Python | Core Library dependencies, Python, nanobind | -
4545
OpenVDB AX | Core Library dependencies, LLVM | Bison, Flex
4646
NanoVDB | - | Core Library, CUDA, TBB, Blosc, ZLib
4747
OpenVDB UnitTests | Core Library dependencies, GoogleTest, numpy* | -
48-
OpenVDB Documentation | Doxygen | -
48+
OpenVDB Documentation | Doxygen, Doxygen Awesome (included) | -
4949

5050
- @b * GLEW is only required for building the vdb_view binary on Windows.
5151
- @b * numpy only required for python binding unit tests
5252
- @b Note - Bison and Flex are only required for re-generation of the AX
5353
grammar which is not typically necessary for the majority of users.
5454

55+
Note that OpenVDB comes bundled with the following dependencies. Please see
56+
their respective license documentation in `ext/THIRD-PARTY.md` for license
57+
information.
58+
59+
- Doxygen Awesome
60+
- Imath::Imath
61+
- VCL
62+
5563
@subsection depDependencyTable Dependency Table
5664

57-
Package | Minimum | Recommended | Description | apt-get | Homebrew | Source
58-
-------------- | ------- | ----------- | ----------------------------------------------------------------- | ------- | -------- | ------
59-
CMake | 3.24 | Latest | Cross-platform family of tools designed to help build software | Y | Y | https://cmake.org
60-
GCC | 11.2.1 | 11.2.1 | C++ 17 Compiler: The GNU Compiler Collection | Y | Y | https://www.gnu.org/software/gcc
61-
Clang | 5.0 | Latest | C++ 17 Compiler: A C language family frontend for LLVM | Y | Y | https://clang.llvm.org
62-
Intel ICC | 19 | Latest | C++ 17 Compiler: Intels C++ Compiler | Y | Y | https://software.intel.com/en-us/c-compilers
63-
MSVC | 19.30 | 19.30 | C++ 17 Compiler: Microsoft Visual C++ Compiler | Y | Y | https://visualstudio.microsoft.com/vs
64-
Imath | 3.2 | >= 3.3 | Half precision floating points | Y | Y | http://www.openexr.com
65-
OpenEXR | 3.2 | >= 3.3 | EXR serialization support | Y | Y | http://www.openexr.com
66-
TBB | 2020.3 | >= 2021 | Threading Building Blocks - template library for task parallelism | Y | Y | https://www.threadingbuildingblocks.org
67-
ZLIB | 1.2.7 | Latest | Compression library for disk serialization compression | Y | Y | https://www.zlib.net
68-
Boost | 1.82 | >= 1.85 | Components: iostreams | Y | Y | https://www.boost.org
69-
LLVM | 15.0.0* | 18.0.0 | Target-independent code generation | Y | Y | https://llvm.org/
70-
Bison | 3.7.0 | 3.7.0 | General-purpose parser generator | Y | Y | https://www.gnu.org/software/gcc
71-
Flex | 2.6.4 | 2.6.4 | Fast lexical analyzer generator | Y | Y | https://github.com/westes/flex
72-
Python | 3.10 | 3.11 | The python interpreter and libraries | Y | Y | https://www.python.org
73-
nanobind** | 2.0.0 | >= 2.5.0 | C++/python bindings | Y | Y | https://nanobind.readthedocs.io
74-
GoogleTest | 1.10 | Latest | A unit testing framework module for C++ | Y | Y | https://github.com/google/googletest
75-
Blosc | 1.17.0* | Latest | Recommended dependency for improved disk compression | Y | Y | https://github.com/Blosc/c-blosc/releases
76-
Log4cplus | 1.1.2 | Latest | An optional dependency for improved OpenVDB Logging | Y | Y | https://github.com/log4cplus/log4cplus
77-
libpng | - | Latest | Library for manipulating PNG images | Y | Y | http://www.libpng.org/pub/png/libpng.html
78-
GLFW | 3.3 | Latest | Simple API for OpenGL development | Y | Y | https://www.glfw.org
79-
OpenGL | 3.2 | Latest | Environment for developing portable graphics applications | Y | Y | https://www.opengl.org
80-
GLEW | 1.0.0 | Latest | A cross-platform OpenGL extension loading library. | Y | Y | http://glew.sourceforge.net
81-
CUDA | - | Latest | Parallel computing platform for graphical processing units. | Y | N | https://developer.nvidia.com/cuda-downloads
82-
Doxygen | 1.8.8 | <= 1.14.0 | Documentation generation from C++ | Y | Y | http://www.doxygen.nl
65+
Package | Minimum | Recommended | Description | apt-get | Homebrew | Source
66+
--------------- | ------- | ----------- | ----------------------------------------------------------------- | ------- | -------- | ------
67+
CMake | 3.24 | Latest | Cross-platform family of tools designed to help build software | Y | Y | https://cmake.org
68+
GCC | 11.2.1 | 11.2.1 | C++ 17 Compiler: The GNU Compiler Collection | Y | Y | https://www.gnu.org/software/gcc
69+
Clang | 5.0 | Latest | C++ 17 Compiler: A C language family frontend for LLVM | Y | Y | https://clang.llvm.org
70+
Intel ICC | 19 | Latest | C++ 17 Compiler: Intels C++ Compiler | Y | Y | https://software.intel.com/en-us/c-compilers
71+
MSVC | 19.30 | 19.30 | C++ 17 Compiler: Microsoft Visual C++ Compiler | Y | Y | https://visualstudio.microsoft.com/vs
72+
Imath | 3.2 | >= 3.3 | Half precision floating points | Y | Y | http://www.openexr.com
73+
OpenEXR | 3.2 | >= 3.3 | EXR serialization support | Y | Y | http://www.openexr.com
74+
TBB | 2020.3 | >= 2021 | Threading Building Blocks - template library for task parallelism | Y | Y | https://www.threadingbuildingblocks.org
75+
ZLIB | 1.2.7 | Latest | Compression library for disk serialization compression | Y | Y | https://www.zlib.net
76+
Boost | 1.82 | >= 1.85 | Components: iostreams | Y | Y | https://www.boost.org
77+
VCL | 2.02.0 | >= 2.02.0 | x86 intrinsics and SIMD class wrappers and | Y | Y | https://github.com/vectorclass/version2
78+
LLVM | 15.0.0* | 18.0.0 | Target-independent code generation | Y | Y | https://llvm.org/
79+
Bison | 3.7.0 | 3.7.0 | General-purpose parser generator | Y | Y | https://www.gnu.org/software/gcc
80+
Flex | 2.6.4 | 2.6.4 | Fast lexical analyzer generator | Y | Y | https://github.com/westes/flex
81+
Python | 3.10 | 3.11 | The python interpreter and libraries | Y | Y | https://www.python.org
82+
nanobind** | 2.0.0 | >= 2.5.0 | C++/python bindings | Y | Y | https://nanobind.readthedocs.io
83+
GoogleTest | 1.10 | Latest | A unit testing framework module for C++ | Y | Y | https://github.com/google/googletest
84+
Blosc | 1.17.0* | Latest | Recommended dependency for improved disk compression | Y | Y | https://github.com/Blosc/c-blosc/releases
85+
Log4cplus | 1.1.2 | Latest | An optional dependency for improved OpenVDB Logging | Y | Y | https://github.com/log4cplus/log4cplus
86+
libpng | - | Latest | Library for manipulating PNG images | Y | Y | http://www.libpng.org/pub/png/libpng.html
87+
GLFW | 3.3 | Latest | Simple API for OpenGL development | Y | Y | https://www.glfw.org
88+
OpenGL | 3.2 | Latest | Environment for developing portable graphics applications | Y | Y | https://www.opengl.org
89+
GLEW | 1.0.0 | Latest | A cross-platform OpenGL extension loading library. | Y | Y | http://glew.sourceforge.net
90+
CUDA | - | Latest | Parallel computing platform for graphical processing units. | Y | N | https://developer.nvidia.com/cuda-downloads
91+
Doxygen | 1.8.8 | <= 1.14.0 | Documentation generation from C++ | Y | Y | http://www.doxygen.nl
92+
Doxygen Awesome | 2.0.0 | >= 2.0.0 | Documentation generation from C++ | Y | Y | https://github.com/jothepro/doxygen-awesome-css
8393

8494
- @b * See the note in [known issues](@ref depKnownIssues) regarding supported blosc/llvm versions.
8595
- @b ** See the note in [known issues](@ref depKnownIssues) regarding supported nanobind build issues.

ext/THIRD-PARTY.md

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,32 @@ SOFTWARE.
7676

7777
-------------------------------------------------------------------------
7878

79+
Apache 2.0
80+
81+
* VCL (c) Copyright 2012-2022 Agner Fog.
82+
https://github.com/vectorclass/version2
83+
84+
OpenVDB includes a copy of vectorclass for x86 SIMD intrinsic usage. Usage
85+
of VCL is enabled by default and requires the VCL headers to be shipped with
86+
OpenVDB installations. This behaviour and usage of VCL can be disabled
87+
should license issues be of a concern. See the full LICENSE terms in:
88+
89+
vcl/openvdb/ext/vcl/LICENSE
90+
91+
Licensed under the Apache License, Version 2.0 (the "License");
92+
you may not use this file except in compliance with the License.
93+
You may obtain a copy of the License at
94+
95+
http://www.apache.org/licenses/LICENSE-2.0
96+
97+
Unless required by applicable law or agreed to in writing, software
98+
distributed under the License is distributed on an "AS IS" BASIS,
99+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
100+
See the License for the specific language governing permissions and
101+
limitations under the License.
102+
103+
-------------------------------------------------------------------------
104+
79105
Public domain
80106

81107
* The simplex noise implementation included in OpenVDB AX is Copyright (c)

0 commit comments

Comments
 (0)