Skip to content

Commit a125f4a

Browse files
committed
Float improvements
1 parent 4f842f4 commit a125f4a

2 files changed

Lines changed: 50 additions & 52 deletions

File tree

include/math/brdf.hpp

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ static constexpr uint8 calcGaussCoeffCount(uint8 kernelWidth) noexcept
4040
}
4141

4242
static constexpr uint32 ggxKernelWidth = 21;
43-
static constexpr float ggxSigma0 = (ggxKernelWidth + 1) / 6.0f;
43+
static constexpr double ggxSigma0 = (ggxKernelWidth + 1) / 6.0;
4444
static constexpr auto ggxCoeffCount = calcGaussCoeffCount(ggxKernelWidth);
4545

4646
/**
@@ -63,7 +63,7 @@ static float calcGgxLodOffset(uint2 bufferSize, float fieldOfView) noexcept
6363
{
6464
constexpr float d = 1.0f; // Note: Texel size of the blur buffer in world units at 1 meter.
6565
auto texelSizeAtOneMeter = (d * std::tan(fieldOfView * 0.5f)) / bufferSize.y;
66-
return -std::log2((M_SQRT2 * ggxSigma0) * texelSizeAtOneMeter);
66+
return -std::log2(float(M_SQRT2 * ggxSigma0) * texelSizeAtOneMeter);
6767
}
6868

6969
/***********************************************************************************************************************
@@ -74,12 +74,14 @@ static float calcGgxLodOffset(uint2 bufferSize, float fieldOfView) noexcept
7474
* degrees of roughness, such as metals, plastics, and other materials with glossy or shiny finishes.
7575
*
7676
* @param noh dot product between the surface normal (n) and the half-vector (h)
77-
* @param roughness spread of microfacets on a surface (0.0-1.0 / smooth-rough)
77+
* @param linearRoughness spread of microfacets on a surface (0.0-1.0 / smooth-rough)
7878
*/
79-
static constexpr float ggx(float noh, float roughness) noexcept
79+
static constexpr float ggx(float noh, float linearRoughness) noexcept
8080
{
81-
auto f = (roughness - 1.0f) * ((roughness + 1.0f) * (noh * noh)) + 1.0f;
82-
return (roughness * roughness) / ((float)M_PI * f * f);
81+
auto oneMinusNohSquared = 1.0f - noh * noh;
82+
auto a = noh * linearRoughness;
83+
auto k = linearRoughness / (a * a + oneMinusNohSquared);
84+
return k * k * M_1_PI;
8385
}
8486

8587
/**
@@ -115,14 +117,16 @@ static constexpr float2 hammersley(uint32 index, float invSampleCount) noexcept
115117
* @param u spherical coordinates
116118
* @param a roughness value
117119
*/
118-
static f32x4 importanceSamplingNdfDggx(float2 u, float a) noexcept
120+
static f32x4 importanceSamplingNdfDggx(float2 u, float linearRoughness) noexcept
119121
{
120-
auto phi = (float)(2.0 * M_PI) * u.x;
121-
auto cosTheta2 = (1.0f - u.y) / (1.0f + (a + 1.0f) * ((a - 1.0f) * u.y));
122+
auto a2 = linearRoughness * linearRoughness;
123+
auto phi = u.x * float(M_PI * 2.0);
124+
auto cosTheta2 = (1.0f - u.y) / std::fma(a2 - 1.0f, u.y, 1.0f);
122125
auto cosTheta = std::sqrt(cosTheta2);
123126
auto sinTheta = std::sqrt(1.0f - cosTheta2);
124-
return f32x4(sinTheta * std::cos(phi), sinTheta * std::sin(phi), cosTheta);
127+
return f32x4(std::cos(phi) * sinTheta, std::sin(phi) * sinTheta, cosTheta);
125128
}
129+
// TODO: use faster alg +7.5%: https://arxiv.org/pdf/2306.05044
126130

127131
/**
128132
* @brief Computes diffuse irradiance from spherical harmonics (SH) using a 3rd-order.

include/math/color-space.hpp

Lines changed: 36 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -46,66 +46,60 @@ static f32x4 srgbToRgb(f32x4 sRGB) noexcept
4646
return r;
4747
}
4848

49+
static const f32x4x4 rgbToXyzMat = f32x4x4
50+
(
51+
0.41239079926595934f, 0.21263900587151027f, 0.01933081871559182f, 0.0f,
52+
0.35758433938387800f, 0.71516867876775600f, 0.11919477979462598f, 0.0f,
53+
0.18048078840183430f, 0.07219231536073371f, 0.95053215224966070f, 0.0f,
54+
0.0f , 0.0f , 0.0f , 0.0f
55+
);
56+
static const f32x4x4 xyzToRgbMat = f32x4x4
57+
(
58+
3.2409699419045226f, -0.96924363628087960f, 0.05563007969699366f, 0.0f,
59+
-1.5373831775700940f, 1.87596750150772020f, -0.20397695888897652f, 0.0f,
60+
-0.4986107602930034f, 0.04155505740717559f, 1.05697151424287860f, 0.0f,
61+
0.0f , 0.0f , 0.0f , 0.0f
62+
);
63+
4964
/**
50-
* @brief Converts linear RGB color to the XYZ color space. (CIE 1931)
51-
* @param rgb target linear RGB color
65+
* @brief Converts linear sRGB color to the CIE XYZ color space.
66+
* @param rgb target linear sRGB color
5267
*/
53-
static f32x4 rgbToXyz(f32x4 rgb) noexcept
54-
{
55-
static const auto m = f32x4x4
56-
(
57-
0.4124564f, 0.2126729f, 0.0193339f, 0.0f,
58-
0.3575761f, 0.7151522f, 0.1191920f, 0.0f,
59-
0.1804375f, 0.0721750f, 0.9503041f, 0.0f,
60-
0.0f , 0.0f , 0.0f , 0.0f
61-
);
62-
return multiply3x3(m, rgb);
63-
}
68+
static f32x4 rgbToXyz(f32x4 rgb) noexcept { return multiply3x3(rgbToXyzMat, rgb); }
6469
/**
65-
* @brief Converts XYZ color to the linear RGB color space. (CIE 1931)
66-
* @param xyz target XYZ color
70+
* @brief Converts CIE XYZ color to the linear sRGB color space.
71+
* @param xyz target CIE XYZ color
6772
*/
68-
static f32x4 xyzToRgb(f32x4 xyz) noexcept
69-
{
70-
static const auto m = f32x4x4
71-
(
72-
3.2404542f, -0.9692660f, 0.0556434f, 0.0f,
73-
-1.5371385f, 1.8760108f, -0.2040259f, 0.0f,
74-
-0.4985314f, 0.0415560f, 1.0572252f, 0.0f,
75-
0.0f , 0.0f , 0.0f , 0.0f
76-
);
77-
return multiply3x3(m, xyz);
78-
}
73+
static f32x4 xyzToRgb(f32x4 xyz) noexcept { return multiply3x3(xyzToRgbMat, xyz); }
7974

8075
/**
81-
* @brief Converts XYZ color to the YXY color space.
82-
* @param xyz target XYZ color
76+
* @brief Converts CIE XYZ color to the CIE xyY color space.
77+
* @param xyz target CIE XYZ color
8378
*/
84-
static f32x4 xyzToYxy(f32x4 xyz) noexcept
79+
static f32x4 xyzToXyy(f32x4 xyz) noexcept
8580
{
86-
auto y = xyz.getY();
87-
auto inv = 1.0f / dot3(xyz, f32x4::one);
88-
return f32x4(y, xyz.getX() * inv, y * inv);
81+
auto a = std::max(xyz.getX() + xyz.getY() + xyz.getZ(), 1e-5f);
82+
return f32x4(xyz.getX() / a, xyz.getY() / a, xyz.getY());
8983
}
9084
/**
91-
* @brief Converts YXY color to the XYZ color space.
92-
* @param yxy target YXY color
85+
* @brief Converts CIE xyY color to the CIE XYZ color space.
86+
* @param xyy target CIE xyY color
9387
*/
94-
static f32x4 yxyToXyz(f32x4 yxy) noexcept
88+
static f32x4 xyyToXyz(f32x4 xyy) noexcept
9589
{
96-
auto x = yxy.getX(), y = yxy.getY(), z = yxy.getZ();
97-
return f32x4(x * y / z, x, x * (1.0f - y - z) / z);
90+
float a = xyy.getZ() / std::max(xyy.getY(), 1e-5f);
91+
return f32x4(xyy.getX() * a, xyy.getZ(), (1.0f - xyy.getX() - xyy.getY()) * a);
9892
}
9993

10094
/**
101-
* @brief Converts linear RGB color to the YXY color space.
95+
* @brief Converts linear sRGB color to the CIE xyY color space.
10296
* @param rgb target linear RGB color
10397
*/
104-
static f32x4 rgbToYxy(f32x4 rgb) noexcept { return xyzToYxy(rgbToXyz(rgb)); }
98+
static f32x4 rgbToXyy(f32x4 rgb) noexcept { return xyzToXyy(rgbToXyz(rgb)); }
10599
/**
106-
* @brief Converts YXY color to the linear RGB color space.
107-
* @param yxy target YXY color
100+
* @brief Converts CIE xyY color to the linear sRGB color space.
101+
* @param xyy target CIE xyY color
108102
*/
109-
static f32x4 yxyToRgb(f32x4 yxy) noexcept { return xyzToRgb(yxyToXyz(yxy)); }
103+
static f32x4 xyyToRgb(f32x4 xyy) noexcept { return xyzToRgb(xyyToXyz(xyy)); }
110104

111105
} // namespace math

0 commit comments

Comments
 (0)