Skip to content

Commit cce1e27

Browse files
Some build requirement updates
1. Make OpenMP a build requirement as non-OpenMP builds - don't help for anything - add code burden that would require special testing - don't have acceptable performance External code like rawspeed and libraw are not touched. 2. With current compilers we can assume proper vectorizing support so the DT_NO_VECTORIZATION and DT_NO_SIMD_HINTS can be removed. 3. As OpenMP and SSE2 are no build options, don't show them in the log file.
1 parent b3c41a1 commit cce1e27

29 files changed

Lines changed: 52 additions & 272 deletions

CMakeLists.txt

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -75,21 +75,19 @@ else()
7575
set(RPATH_ORIGIN $ORIGIN)
7676
endif()
7777

78-
if (USE_OPENMP)
79-
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR
80-
CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
78+
if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR
79+
CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
8180

82-
# Clang has an option to specify the OpenMP standard to use. Specify it.
83-
# FIXME: Implement this in FindOpenMP.cmake
84-
set(OPENMP_VERSION_SPECIFIER "-fopenmp-version=51")
81+
# Clang has an option to specify the OpenMP standard to use. Specify it.
82+
# FIXME: Implement this in FindOpenMP.cmake
83+
set(OPENMP_VERSION_SPECIFIER "-fopenmp-version=51")
8584

86-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_VERSION_SPECIFIER}")
87-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_VERSION_SPECIFIER}")
88-
endif()
89-
90-
find_package(OpenMP 4.5 REQUIRED)
85+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OPENMP_VERSION_SPECIFIER}")
86+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_VERSION_SPECIFIER}")
9187
endif()
9288

89+
find_package(OpenMP 4.5 REQUIRED)
90+
9391
# Check for base threading library
9492
set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
9593
find_package(Threads REQUIRED)

DefineOptions.cmake

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ option(DONT_USE_INTERNAL_LUA "Never fall back to the intree copy of lua" ON)
66
option(USE_KWALLET "Build kwallet password storage back-end" ON)
77
option(USE_LIBSECRET "Build libsecret password storage back-end" ON)
88
option(USE_UNITY "Use libunity to report progress in the launcher" OFF)
9-
option(USE_OPENMP "Use OpenMP threading support." ON)
109
option(USE_OPENCL "Use OpenCL support." ON)
1110
option(USE_GRAPHICSMAGICK "Use GraphicsMagick library for image import." ON)
1211
option(USE_IMAGEMAGICK "Use ImageMagick library for image import." OFF)

build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ CMAKE_OPTIONS_FROM_CMDLINE=""
3535

3636
PRINT_HELP=0
3737

38-
FEATURES="AI CAMERA COLORD GMIC GRAPHICSMAGICK IMAGEMAGICK JXL KWALLET LIBSECRET LUA MAC_INTEGRATION MAP OPENCL OPENEXR OPENMP UNITY WEBP"
38+
FEATURES="AI CAMERA COLORD GMIC GRAPHICSMAGICK IMAGEMAGICK JXL KWALLET LIBSECRET LUA MAC_INTEGRATION MAP OPENCL OPENEXR UNITY WEBP"
3939

4040
# prepare a lowercase version with a space before and after
4141
# it's very important for parse_feature, has no impact in for loop expansions

src/CMakeLists.txt

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -273,25 +273,23 @@ list(APPEND LIBS "${CMAKE_THREAD_LIBS_INIT}")
273273
# Need to explicitly link against math library.
274274
list(APPEND LIBS "-lm")
275275

276-
if(USE_OPENMP)
277-
if(OpenMP_C_FLAGS)
278-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
279-
endif()
276+
if(OpenMP_C_FLAGS)
277+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
278+
endif()
280279

281-
if(OpenMP_CXX_FLAGS)
282-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
283-
endif()
280+
if(OpenMP_CXX_FLAGS)
281+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
282+
endif()
284283

285-
if(OpenMP_C_INCLUDE_DIRS)
286-
include_directories(${OpenMP_C_INCLUDE_DIRS})
287-
endif()
284+
if(OpenMP_C_INCLUDE_DIRS)
285+
include_directories(${OpenMP_C_INCLUDE_DIRS})
286+
endif()
288287

289-
if(OpenMP_CXX_INCLUDE_DIRS)
290-
include_directories(${OpenMP_CXX_INCLUDE_DIRS})
291-
endif()
288+
if(OpenMP_CXX_INCLUDE_DIRS)
289+
include_directories(${OpenMP_CXX_INCLUDE_DIRS})
290+
endif()
292291

293-
list(APPEND LIBS ${OpenMP_C_LIBRARIES} ${OpenMP_CXX_LIBRARIES})
294-
endif(USE_OPENMP)
292+
list(APPEND LIBS ${OpenMP_C_LIBRARIES} ${OpenMP_CXX_LIBRARIES})
295293

296294
if(USE_DARKTABLE_PROFILING)
297295
add_definitions(-DUSE_DARKTABLE_PROFILING)
@@ -1134,9 +1132,6 @@ set(CPACK_COMPONENTS_ALL DTApplication DTDebugSymbols DTDocuments)
11341132

11351133
if(USE_LIBRAW AND NOT (DONT_USE_INTERNAL_LIBRAW AND libraw_FOUND))
11361134
set(LIBRAW_PATH "${CMAKE_CURRENT_SOURCE_DIR}/external/LibRaw" CACHE STRING "Relative path to libraw directory (default=CMAKE_CURRENT_SOURCE_DIR)")
1137-
if(NOT USE_OPENMP)
1138-
set(ENABLE_OPENMP OFF CACHE BOOL "")
1139-
endif()
11401135
set(ENABLE_LCMS OFF CACHE BOOL "")
11411136
set(ENABLE_JASPER OFF CACHE BOOL "")
11421137

src/chart/main.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1910,9 +1910,7 @@ int main(int argc, char *argv[])
19101910
SetErrorMode(SEM_FAILCRITICALERRORS);
19111911
#endif
19121912

1913-
#ifdef _OPENMP
19141913
omp_set_num_threads(omp_get_num_procs());
1915-
#endif
19161914

19171915
int res = 1;
19181916
dt_lut_t *self = (dt_lut_t *)calloc(1, sizeof(dt_lut_t));

src/common/ai/restore_rgb.c

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -738,13 +738,7 @@ void dt_restore_apply_detail_recovery(const float *original_4ch,
738738
= dt_alloc_align_float(npix);
739739
if(!lum_residual) return;
740740

741-
#ifdef _OPENMP
742-
#pragma omp parallel for simd default(none) \
743-
dt_omp_firstprivate(original_4ch, denoised_4ch, \
744-
lum_residual, npix) \
745-
schedule(simd:static) \
746-
aligned(original_4ch, denoised_4ch, lum_residual:64)
747-
#endif
741+
DT_OMP_FOR_SIMD(aligned(original_4ch, denoised_4ch, lum_residual:64))
748742
for(size_t i = 0; i < npix; i++)
749743
{
750744
const size_t p = i * 4;
@@ -764,13 +758,7 @@ void dt_restore_apply_detail_recovery(const float *original_4ch,
764758
dwt_denoise(lum_residual, width, height,
765759
DWT_DETAIL_BANDS, noise);
766760

767-
#ifdef _OPENMP
768-
#pragma omp parallel for simd default(none) \
769-
dt_omp_firstprivate(denoised_4ch, lum_residual, \
770-
npix, alpha) \
771-
schedule(simd:static) \
772-
aligned(denoised_4ch, lum_residual:64)
773-
#endif
761+
DT_OMP_FOR_SIMD(aligned(denoised_4ch, lum_residual:64))
774762
for(size_t i = 0; i < npix; i++)
775763
{
776764
const size_t p = i * 4;

src/common/darktable.c

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,7 @@
119119
#include <sys/varargs.h>
120120
#endif
121121

122-
#ifdef _OPENMP
123122
#include <omp.h>
124-
#endif
125123

126124
#ifdef USE_LUA
127125
#include "lua/configuration.h"
@@ -826,18 +824,6 @@ char *version = g_strdup_printf(
826824
" Debug -> DISABLED\n"
827825
#endif
828826

829-
#if defined(__SSE2__) && defined(__SSE__)
830-
" SSE2 optimizations -> ENABLED\n"
831-
#else
832-
" SSE2 optimizations -> DISABLED\n"
833-
#endif
834-
835-
#ifdef _OPENMP
836-
" OpenMP -> ENABLED\n"
837-
#else
838-
" OpenMP -> DISABLED\n"
839-
#endif
840-
841827
#ifdef HAVE_OPENCL
842828
" OpenCL -> ENABLED\n"
843829
#else
@@ -1270,12 +1256,10 @@ int dt_init(int argc,
12701256
dt_print(DT_DEBUG_ALWAYS,
12711257
"[dt_init --threads] requested %d ompthreads restricted to %d",
12721258
desired, possible);
1273-
#ifdef _OPENMP
12741259
dt_print(DT_DEBUG_ALWAYS,
12751260
"[dt_init --threads] using %d threads of %d for openmp parallel sections %s",
12761261
darktable.num_openmp_threads, (int)dt_get_num_procs(),
12771262
omp_get_dynamic() ? "(dynamic)" : "(static)");
1278-
#endif
12791263
k++;
12801264
argv[k-1] = NULL;
12811265
argv[k] = NULL;
@@ -1540,10 +1524,8 @@ int dt_init(int argc,
15401524
darktable.gui = (dt_gui_gtk_t *)calloc(1, sizeof(dt_gui_gtk_t));
15411525
}
15421526

1543-
#ifdef _OPENMP
15441527
omp_set_num_threads(darktable.num_openmp_threads);
15451528
omp_set_dynamic(FALSE);
1546-
#endif
15471529

15481530
#ifdef USE_LUA
15491531
dt_lua_init_early(L);

src/common/darktable.h

Lines changed: 5 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,7 @@ typedef unsigned int u_int;
7878
#include <sys/sysctl.h>
7979
#endif
8080

81-
#ifdef _OPENMP
82-
# include <omp.h>
81+
#include <omp.h>
8382

8483
/* See https://redmine.darktable.org/issues/12568#note-14 */
8584
# ifdef HAVE_OMP_FIRSTPRIVATE_WITH_CONST
@@ -91,41 +90,30 @@ typedef unsigned int u_int;
9190
# endif/* HAVE_OMP_FIRSTPRIVATE_WITH_CONST */
9291

9392
#ifndef dt_omp_sharedconst
94-
#ifdef _OPENMP
9593
#if defined(__clang__) || __GNUC__ > 8
96-
# define dt_omp_sharedconst(...) shared(__VA_ARGS__)
94+
#define dt_omp_sharedconst(...) shared(__VA_ARGS__)
9795
#else
9896
// GCC 8.4 throws string of errors "'x' is predetermined 'shared' for 'shared'" if we explicitly declare
9997
// 'const' variables as shared
100-
# define dt_omp_sharedconst(var, ...)
98+
#define dt_omp_sharedconst(var, ...)
10199
#endif
102-
#endif /* _OPENMP */
103100
#endif /* dt_omp_sharedconst */
104101

105102
#ifndef dt_omp_nontemporal
106103
// Clang 10+ supports the nontemporal() OpenMP directive
107104
// GCC 9 recognizes it as valid, but does not do anything with it
108105
// GCC 10+ ???
109106
#if (__clang__+0 >= 10 || __GNUC__ >= 9)
110-
# define dt_omp_nontemporal(...) nontemporal(__VA_ARGS__)
107+
#define dt_omp_nontemporal(...) nontemporal(__VA_ARGS__)
111108
#else
112109
// GCC7/8 only support OpenMP 4.5, which does not have the nontemporal() directive.
113-
# define dt_omp_nontemporal(var, ...)
110+
#define dt_omp_nontemporal(var, ...)
114111
#endif
115112
#endif /* dt_omp_nontemporal */
116113

117114
#define DT_OMP_STRINGIFY(...) #__VA_ARGS__
118115
#define DT_OMP_PRAGMA(...) _Pragma(DT_OMP_STRINGIFY(omp __VA_ARGS__))
119116

120-
#else /* _OPENMP */
121-
122-
# define omp_get_max_threads() 1
123-
# define omp_get_thread_num() 0
124-
125-
#define DT_OMP_PRAGMA(...)
126-
127-
#endif /* _OPENMP */
128-
129117
#define DT_OMP_SIMD(clauses) DT_OMP_PRAGMA(simd clauses)
130118
#define DT_OMP_DECLARE_SIMD(clauses) DT_OMP_PRAGMA(declare simd clauses)
131119
#define DT_OMP_FOR(clauses) DT_OMP_PRAGMA(parallel for default(firstprivate) schedule(static) clauses)
@@ -778,29 +766,17 @@ gboolean dt_supported_image(const gchar *filename);
778766

779767
static inline size_t dt_get_num_threads()
780768
{
781-
#ifdef _OPENMP
782769
return (size_t)CLAMP(omp_get_num_procs(), 1, darktable.num_openmp_threads);
783-
#else
784-
return 1;
785-
#endif
786770
}
787771

788772
static inline size_t dt_get_num_procs()
789773
{
790-
#ifdef _OPENMP
791774
return (size_t)MAX(1, omp_get_num_procs());
792-
#else
793-
return 1;
794-
#endif
795775
}
796776

797777
static inline int dt_get_thread_num()
798778
{
799-
#ifdef _OPENMP
800779
return omp_get_thread_num();
801-
#else
802-
return 0;
803-
#endif
804780
}
805781

806782
#define DT_INITHASH 5381
@@ -871,20 +847,6 @@ static inline float *dt_calloc_perthread_float(const size_t n,
871847
#define dt_get_bythread(buf, padsize, tnum) \
872848
DT_IS_ALIGNED((buf) + ((padsize) * (tnum)))
873849

874-
// Most code in dt assumes that the compiler is capable of
875-
// auto-vectorization. In some cases, this will yield suboptimal code
876-
// if the compiler in fact does NOT auto-vectorize. Uncomment the
877-
// following line for such a compiler.
878-
879-
//#define DT_NO_VECTORIZATION
880-
881-
// For some combinations of compiler and architecture, the compiler
882-
// may actually emit inferior code if given a hint to vectorize a
883-
// loop. Uncomment the following line if such a combination is the
884-
// compilation target.
885-
886-
//#define DT_NO_SIMD_HINTS
887-
888850
// copy the RGB channels of a pixel; includes the 'alpha' channel as
889851
// well if faster due to vectorization, but subsequent code should
890852
// ignore the value of the alpha unless explicitly set afterwards

src/common/densecrf.cc

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,7 @@
2121

2222
#include <exception>
2323
#include <math.h>
24-
#ifdef _OPENMP
2524
#include <omp.h>
26-
#endif
2725

2826
#include "common/darktable.h"
2927
#include "common/densecrf.h"

src/common/dttypes.h

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,14 +62,7 @@ typedef DT_ALIGNED_PIXEL float dt_aligned_pixel_t[4];
6262
// a 3x3 matrix, padded to permit SSE instructions to be used for multiplication and addition
6363
typedef float DT_ALIGNED_ARRAY dt_colormatrix_t[4][4];
6464

65-
// To be able to vectorize per-pixel loops, we need to operate on all four channels, but if the compiler does
66-
// not auto-vectorize, doing so increases computation by 1/3 for a channel which typically is ignored anyway.
67-
// Select the appropriate number of channels over which to loop to produce the fastest code.
68-
#ifdef DT_NO_VECTORIZATION
69-
#define DT_PIXEL_SIMD_CHANNELS 3
70-
#else
7165
#define DT_PIXEL_SIMD_CHANNELS 4
72-
#endif
7366

7467
// A function to compute how many pixels each thread should process in a parallelized for loop.
7568
// For very small RoIs on a CPU with lots of threads, the last one or two hardware threads can end
@@ -91,7 +84,7 @@ static inline size_t dt_cacheline_chunks(const size_t npixels, const size_t nthr
9184
// {
9285
// src[k] = dest[k] / 3.0f;
9386
// }
94-
#if defined(_OPENMP) && defined(OPENMP_SIMD_) && !defined(DT_NO_SIMD_HINTS)
87+
#if defined(OPENMP_SIMD_)
9588
//https://stackoverflow.com/questions/45762357/how-to-concatenate-strings-in-the-arguments-of-pragma
9689
#define _DT_Pragma_(x) _Pragma(#x)
9790
#define _DT_Pragma(x) _DT_Pragma_(x)

0 commit comments

Comments
 (0)