Skip to content

Commit 9ac93d8

Browse files
committed
Run clang-format
Signed-off-by: Rémi Achard <remiachard@gmail.com>
1 parent e991d25 commit 9ac93d8

453 files changed

Lines changed: 32808 additions & 30548 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

include/OpenColorIO/OpenColorAppHelpers.h

Lines changed: 95 additions & 95 deletions
Large diffs are not rendered by default.

include/OpenColorIO/OpenColorIO.h

Lines changed: 676 additions & 634 deletions
Large diffs are not rendered by default.

include/OpenColorIO/OpenColorTransforms.h

Lines changed: 299 additions & 290 deletions
Large diffs are not rendered by default.

include/OpenColorIO/OpenColorTypes.h

Lines changed: 91 additions & 103 deletions
Large diffs are not rendered by default.

src/OpenColorIO/AVX.h

Lines changed: 65 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
// SPDX-License-Identifier: BSD-3-Clause
22
// Copyright Contributors to the OpenColorIO Project.
33

4-
54
#ifndef INCLUDED_OCIO_AVX_H
65
#define INCLUDED_OCIO_AVX_H
76

@@ -10,8 +9,8 @@
109

1110
#include <immintrin.h>
1211

13-
#include <OpenColorIO/OpenColorIO.h>
1412
#include "BitDepthUtils.h"
13+
#include <OpenColorIO/OpenColorIO.h>
1514

1615
// Macros for alignment declarations
1716
#define AVX_SIMD_BYTES 32
@@ -31,14 +30,21 @@ inline __m256 avx_movehl_ps(__m256 a, __m256 b)
3130
return _mm256_castpd_ps(_mm256_unpackhi_pd(_mm256_castps_pd(b), _mm256_castps_pd(a)));
3231
}
3332

34-
inline __m256 avx_clamp(__m256 value, const __m256& maxValue)
33+
inline __m256 avx_clamp(__m256 value, const __m256 & maxValue)
3534
{
3635
value = _mm256_max_ps(value, _mm256_setzero_ps());
3736
return _mm256_min_ps(value, maxValue);
3837
}
3938

40-
inline void avxRGBATranspose_4x4_4x4(__m256 row0, __m256 row1, __m256 row2, __m256 row3,
41-
__m256 &out_r, __m256 &out_g, __m256 &out_b, __m256 &out_a )
39+
inline void avxRGBATranspose_4x4_4x4(
40+
__m256 row0,
41+
__m256 row1,
42+
__m256 row2,
43+
__m256 row3,
44+
__m256 & out_r,
45+
__m256 & out_g,
46+
__m256 & out_b,
47+
__m256 & out_a)
4248
{
4349
// the rgba transpose result will look this
4450
//
@@ -61,14 +67,13 @@ inline void avxRGBATranspose_4x4_4x4(__m256 row0, __m256 row1, __m256 row2, __m2
6167
out_g = avx_movehl_ps(tmp2, tmp0);
6268
out_b = avx_movelh_ps(tmp1, tmp3);
6369
out_a = avx_movehl_ps(tmp3, tmp1);
64-
6570
}
6671

6772
inline __m256i avx_load_u8(__m128i a)
6873
{
69-
__m128i b = _mm_shuffle_epi32(a, _MM_SHUFFLE(1,0,3,1));
70-
b = _mm_cvtepu8_epi32(b);
71-
a = _mm_cvtepu8_epi32(a);
74+
__m128i b = _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 1));
75+
b = _mm_cvtepu8_epi32(b);
76+
a = _mm_cvtepu8_epi32(a);
7277

7378
return _mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1);
7479
}
@@ -93,31 +98,34 @@ inline __m128i avx_pack_u8(__m256i a, __m256i b)
9398

9499
// Note Packing functions perform no 0.0 - 1.0 normalization
95100
// but perform 0 - max value clamping for integer formats
96-
template<BitDepth BD> struct AVXRGBAPack {};
101+
template <BitDepth BD> struct AVXRGBAPack
102+
{
103+
};
97104

98-
template <>
99-
struct AVXRGBAPack<BIT_DEPTH_UINT8>
105+
template <> struct AVXRGBAPack<BIT_DEPTH_UINT8>
100106
{
101-
static inline void Load(const uint8_t *in, __m256& r, __m256& g, __m256& b, __m256& a)
107+
static inline void Load(const uint8_t * in, __m256 & r, __m256 & g, __m256 & b, __m256 & a)
102108
{
103-
__m256i rgba_00_07 = _mm256_loadu_si256((const __m256i*)in);
109+
__m256i rgba_00_07 = _mm256_loadu_si256((const __m256i *)in);
104110

105-
__m128i rgba_00_03 =_mm256_castsi256_si128(rgba_00_07);
106-
__m128i rgba_04_07 =_mm256_extractf128_si256(rgba_00_07, 1);
111+
__m128i rgba_00_03 = _mm256_castsi256_si128(rgba_00_07);
112+
__m128i rgba_04_07 = _mm256_extractf128_si256(rgba_00_07, 1);
107113

108114
// : 0, 1, 2, 3 | 4, 5, 6, 7 | 8, 9, 10, 11 | 12, 13, 14, 15
109115
// rgba_x03 : r0, g0, b0, a0 | r1, g1, b1, a1 | r2, g2, b2, a2 | r3, g3, b3, a3
110116
// rgba_x47 : r4, g4, b4, a4 | r5, g5, b5, a5 | r6, g6, b6, a6 | r7, g7, b7, a7
111117

112118
__m256 rgba0 = _mm256_cvtepi32_ps(avx_load_u8(rgba_00_03));
113-
__m256 rgba1 = _mm256_cvtepi32_ps(avx_load_u8(_mm_shuffle_epi32(rgba_00_03, _MM_SHUFFLE(3, 2, 3, 2))));
119+
__m256 rgba1 = _mm256_cvtepi32_ps(
120+
avx_load_u8(_mm_shuffle_epi32(rgba_00_03, _MM_SHUFFLE(3, 2, 3, 2))));
114121

115122
__m256 rgba2 = _mm256_cvtepi32_ps(avx_load_u8(rgba_04_07));
116-
__m256 rgba3 = _mm256_cvtepi32_ps(avx_load_u8(_mm_shuffle_epi32(rgba_04_07, _MM_SHUFFLE(3, 2, 3, 2))));
123+
__m256 rgba3 = _mm256_cvtepi32_ps(
124+
avx_load_u8(_mm_shuffle_epi32(rgba_04_07, _MM_SHUFFLE(3, 2, 3, 2))));
117125

118126
avxRGBATranspose_4x4_4x4(rgba0, rgba1, rgba2, rgba3, r, g, b, a);
119127
}
120-
static inline void Store(uint8_t *out, __m256 r, __m256 g, __m256 b, __m256 a)
128+
static inline void Store(uint8_t * out, __m256 r, __m256 g, __m256 b, __m256 a)
121129
{
122130
__m256 rgba0, rgba1, rgba2, rgba3;
123131
const __m256 maxValue = _mm256_set1_ps(255.0f);
@@ -129,7 +137,8 @@ struct AVXRGBAPack<BIT_DEPTH_UINT8>
129137
rgba2 = avx_clamp(rgba2, maxValue);
130138
rgba3 = avx_clamp(rgba3, maxValue);
131139

132-
// NOTE note using cvtps which will round based on MXCSR register defaults to _MM_ROUND_NEAREST
140+
// NOTE note using cvtps which will round based on MXCSR register defaults to
141+
// _MM_ROUND_NEAREST
133142
__m256i rgba01 = _mm256_cvtps_epi32(rgba0);
134143
__m256i rgba23 = _mm256_cvtps_epi32(rgba1);
135144
__m256i rgba45 = _mm256_cvtps_epi32(rgba2);
@@ -140,15 +149,15 @@ struct AVXRGBAPack<BIT_DEPTH_UINT8>
140149

141150
__m256i rgba = _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1);
142151

143-
_mm256_storeu_si256((__m256i*)out, rgba);
152+
_mm256_storeu_si256((__m256i *)out, rgba);
144153
}
145154
};
146155

147156
inline __m256i avx_unpack_u16(__m128i a)
148157
{
149-
__m128i b = _mm_shuffle_epi32(a, _MM_SHUFFLE(1,0,3,2));
150-
b = _mm_cvtepu16_epi32(b);
151-
a = _mm_cvtepu16_epi32(a);
158+
__m128i b = _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 0, 3, 2));
159+
b = _mm_cvtepu16_epi32(b);
160+
a = _mm_cvtepu16_epi32(a);
152161

153162
return _mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1);
154163
}
@@ -176,16 +185,15 @@ inline __m128i avx_pack_u16(__m256i a)
176185
return _mm_or_si128(lo, hi);
177186
}
178187

179-
template<BitDepth BD>
180-
struct AVXRGBAPack16
188+
template <BitDepth BD> struct AVXRGBAPack16
181189
{
182190
typedef typename BitDepthInfo<BD>::Type Type;
183191

184-
static inline void Load(const Type *in, __m256& r, __m256& g, __m256& b, __m256& a)
192+
static inline void Load(const Type * in, __m256 & r, __m256 & g, __m256 & b, __m256 & a)
185193
{
186194
// const __m256 scale = _mm256_set1_ps(1.0f / (float)BitDepthInfo<BD>::maxValue);
187-
__m256i rgba_00_03 = _mm256_loadu_si256((const __m256i*)(in + 0));
188-
__m256i rgba_04_07 = _mm256_loadu_si256((const __m256i*)(in + 16));
195+
__m256i rgba_00_03 = _mm256_loadu_si256((const __m256i *)(in + 0));
196+
__m256i rgba_04_07 = _mm256_loadu_si256((const __m256i *)(in + 16));
189197

190198
__m256 rgba0 = _mm256_cvtepi32_ps(avx_unpack_u16(_mm256_castsi256_si128(rgba_00_03)));
191199
__m256 rgba1 = _mm256_cvtepi32_ps(avx_unpack_u16(_mm256_extractf128_si256(rgba_00_03, 1)));
@@ -195,7 +203,7 @@ struct AVXRGBAPack16
195203
avxRGBATranspose_4x4_4x4(rgba0, rgba1, rgba2, rgba3, r, g, b, a);
196204
}
197205

198-
static inline void Store(Type *out, __m256 r, __m256 g, __m256 b, __m256 a)
206+
static inline void Store(Type * out, __m256 r, __m256 g, __m256 b, __m256 a)
199207
{
200208
__m256 rgba0, rgba1, rgba2, rgba3;
201209
__m128i lo, hi;
@@ -209,7 +217,8 @@ struct AVXRGBAPack16
209217
rgba2 = avx_clamp(rgba2, maxValue);
210218
rgba3 = avx_clamp(rgba3, maxValue);
211219

212-
// NOTE note using cvtps which will round based on MXCSR register defaults to _MM_ROUND_NEAREST
220+
// NOTE note using cvtps which will round based on MXCSR register defaults to
221+
// _MM_ROUND_NEAREST
213222
__m256i rgba01 = _mm256_cvtps_epi32(rgba0);
214223
__m256i rgba23 = _mm256_cvtps_epi32(rgba1);
215224
__m256i rgba45 = _mm256_cvtps_epi32(rgba2);
@@ -219,65 +228,61 @@ struct AVXRGBAPack16
219228
hi = avx_pack_u16(rgba23);
220229

221230
rgba = _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1);
222-
_mm256_storeu_si256((__m256i*)(out+0), rgba);
231+
_mm256_storeu_si256((__m256i *)(out + 0), rgba);
223232

224233
lo = avx_pack_u16(rgba45);
225234
hi = avx_pack_u16(rgba67);
226235

227236
rgba = _mm256_insertf128_si256(_mm256_castsi128_si256(lo), hi, 1);
228-
_mm256_storeu_si256((__m256i*)(out+16), rgba);
237+
_mm256_storeu_si256((__m256i *)(out + 16), rgba);
229238
}
230239
};
231240

232-
template <>
233-
struct AVXRGBAPack<BIT_DEPTH_UINT10>
241+
template <> struct AVXRGBAPack<BIT_DEPTH_UINT10>
234242
{
235-
static inline void Load(const uint16_t *in, __m256& r, __m256& g, __m256& b, __m256& a)
243+
static inline void Load(const uint16_t * in, __m256 & r, __m256 & g, __m256 & b, __m256 & a)
236244
{
237245
AVXRGBAPack16<BIT_DEPTH_UINT10>::Load(in, r, g, b, a);
238246
}
239-
static inline void Store(uint16_t *out, __m256 r, __m256 g, __m256 b, __m256 a)
247+
static inline void Store(uint16_t * out, __m256 r, __m256 g, __m256 b, __m256 a)
240248
{
241249
AVXRGBAPack16<BIT_DEPTH_UINT10>::Store(out, r, g, b, a);
242250
}
243251
};
244252

245-
template <>
246-
struct AVXRGBAPack<BIT_DEPTH_UINT12>
253+
template <> struct AVXRGBAPack<BIT_DEPTH_UINT12>
247254
{
248-
static inline void Load(const uint16_t *in, __m256& r, __m256& g, __m256& b, __m256& a)
255+
static inline void Load(const uint16_t * in, __m256 & r, __m256 & g, __m256 & b, __m256 & a)
249256
{
250257
AVXRGBAPack16<BIT_DEPTH_UINT12>::Load(in, r, g, b, a);
251258
}
252-
static inline void Store(uint16_t *out, __m256 r, __m256 g, __m256 b, __m256 a)
259+
static inline void Store(uint16_t * out, __m256 r, __m256 g, __m256 b, __m256 a)
253260
{
254261
AVXRGBAPack16<BIT_DEPTH_UINT12>::Store(out, r, g, b, a);
255262
}
256263
};
257264

258-
template <>
259-
struct AVXRGBAPack<BIT_DEPTH_UINT16>
265+
template <> struct AVXRGBAPack<BIT_DEPTH_UINT16>
260266
{
261-
static inline void Load(const uint16_t *in, __m256& r, __m256& g, __m256& b, __m256& a)
267+
static inline void Load(const uint16_t * in, __m256 & r, __m256 & g, __m256 & b, __m256 & a)
262268
{
263269
AVXRGBAPack16<BIT_DEPTH_UINT16>::Load(in, r, g, b, a);
264270
}
265-
static inline void Store(uint16_t *out, __m256 r, __m256 g, __m256 b, __m256 a)
271+
static inline void Store(uint16_t * out, __m256 r, __m256 g, __m256 b, __m256 a)
266272
{
267273
AVXRGBAPack16<BIT_DEPTH_UINT16>::Store(out, r, g, b, a);
268274
}
269275
};
270276

271277
#if OCIO_USE_F16C
272278

273-
template <>
274-
struct AVXRGBAPack<BIT_DEPTH_F16>
279+
template <> struct AVXRGBAPack<BIT_DEPTH_F16>
275280
{
276-
static inline void Load(const half *in, __m256& r, __m256& g, __m256& b, __m256& a)
281+
static inline void Load(const half * in, __m256 & r, __m256 & g, __m256 & b, __m256 & a)
277282
{
278283

279-
__m256i rgba_00_03 = _mm256_loadu_si256((const __m256i*)(in + 0));
280-
__m256i rgba_04_07 = _mm256_loadu_si256((const __m256i*)(in + 16));
284+
__m256i rgba_00_03 = _mm256_loadu_si256((const __m256i *)(in + 0));
285+
__m256i rgba_04_07 = _mm256_loadu_si256((const __m256i *)(in + 16));
281286

282287
__m256 rgba0 = _mm256_cvtph_ps(_mm256_castsi256_si128(rgba_00_03));
283288
__m256 rgba1 = _mm256_cvtph_ps(_mm256_extractf128_si256(rgba_00_03, 1));
@@ -287,7 +292,7 @@ struct AVXRGBAPack<BIT_DEPTH_F16>
287292
avxRGBATranspose_4x4_4x4(rgba0, rgba1, rgba2, rgba3, r, g, b, a);
288293
}
289294

290-
static inline void Store(half *out, __m256 r, __m256 g, __m256 b, __m256 a)
295+
static inline void Store(half * out, __m256 r, __m256 g, __m256 b, __m256 a)
291296
{
292297
__m256 rgba0, rgba1, rgba2, rgba3;
293298
__m256i rgba;
@@ -300,36 +305,34 @@ struct AVXRGBAPack<BIT_DEPTH_F16>
300305
__m128i rgba12_16 = _mm256_cvtps_ph(rgba3, 0);
301306

302307
rgba = _mm256_insertf128_si256(_mm256_castsi128_si256(rgba00_03), rgba04_07, 1);
303-
_mm256_storeu_si256((__m256i*)(out+0), rgba);
308+
_mm256_storeu_si256((__m256i *)(out + 0), rgba);
304309

305310
rgba = _mm256_insertf128_si256(_mm256_castsi128_si256(rgba08_11), rgba12_16, 1);
306-
_mm256_storeu_si256((__m256i*)(out+16), rgba);
311+
_mm256_storeu_si256((__m256i *)(out + 16), rgba);
307312
}
308313
};
309314

310315
#endif
311316

312-
template <>
313-
struct AVXRGBAPack<BIT_DEPTH_F32>
317+
template <> struct AVXRGBAPack<BIT_DEPTH_F32>
314318
{
315-
static inline void Load(const float *in, __m256& r, __m256& g, __m256& b, __m256& a)
319+
static inline void Load(const float * in, __m256 & r, __m256 & g, __m256 & b, __m256 & a)
316320
{
317-
__m256 rgba0 = _mm256_loadu_ps(in + 0);
318-
__m256 rgba1 = _mm256_loadu_ps(in + 8);
321+
__m256 rgba0 = _mm256_loadu_ps(in + 0);
322+
__m256 rgba1 = _mm256_loadu_ps(in + 8);
319323
__m256 rgba2 = _mm256_loadu_ps(in + 16);
320324
__m256 rgba3 = _mm256_loadu_ps(in + 24);
321325

322326
avxRGBATranspose_4x4_4x4(rgba0, rgba1, rgba2, rgba3, r, g, b, a);
323-
324327
}
325328

326-
static inline void Store(float *out, __m256 r, __m256 g, __m256 b, __m256 a)
329+
static inline void Store(float * out, __m256 r, __m256 g, __m256 b, __m256 a)
327330
{
328331
__m256 rgba0, rgba1, rgba2, rgba3;
329332
avxRGBATranspose_4x4_4x4(r, g, b, a, rgba0, rgba1, rgba2, rgba3);
330333

331-
_mm256_storeu_ps(out + 0, rgba0);
332-
_mm256_storeu_ps(out + 8, rgba1);
334+
_mm256_storeu_ps(out + 0, rgba0);
335+
_mm256_storeu_ps(out + 8, rgba1);
333336
_mm256_storeu_ps(out + 16, rgba2);
334337
_mm256_storeu_ps(out + 24, rgba3);
335338
}

0 commit comments

Comments
 (0)