Skip to content

Commit 609b5fb

Browse files
author
Grok Compression
committed
Add DCI-compliant Rec.709 RGB → X'Y'Z' colour transform
- New XYZTransform.cpp/h: Highway SIMD optimised RGB→XYZ with DCI companding coefficient (48/52.37), gamma 2.2 linearisation, DCI 2.6 gamma output, LUT-accelerated for common precisions - Add --xyz CLI flag to grk_compress for in-pipeline use - Add grk_apply_xyz_transform() to public C API (grok.h) for FFI consumers - Disable MCT when XYZ transform is active - Support both single-file and MJ2 batch paths
1 parent 135008a commit 609b5fb

6 files changed

Lines changed: 336 additions & 1 deletion

File tree

src/lib/codec/apps/GrkCompress.cpp

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ using namespace grk;
6363
#include "spdlog/sinks/basic_file_sink.h"
6464
#include "GrkCompress.h"
6565
#include "Messenger.h"
66+
#include "XYZTransform.h"
6667

6768
void exit_func()
6869
{
@@ -410,6 +411,18 @@ int GrkCompress::main(int argc, const char** argv, grk_image* in_image, grk_stre
410411
continue;
411412
}
412413

414+
// Apply RGB → X'Y'Z' colour transform if requested
415+
if(initParams.parameters.apply_xyz_transform)
416+
{
417+
if(!grk::applyXYZTransform(image))
418+
{
419+
spdlog::error("Failed to apply XYZ colour transform");
420+
grk_object_unref(&image->obj);
421+
success = EXIT_FAILURE;
422+
goto mj2_main_cleanup;
423+
}
424+
}
425+
413426
if(first)
414427
{
415428
// resolve MCT auto-detect before first compress
@@ -926,6 +939,10 @@ GrkRC GrkCompress::parseCommandLine(int argc, const char* argv[], CompressInitPa
926939
auto progressiveRCOpt =
927940
app.add_flag("--progressive-rc", progressiveRC, "Progressive rate control");
928941

942+
bool xyzTransform;
943+
auto xyzOpt = app.add_flag("--xyz", xyzTransform,
944+
"Apply Rec.709 RGB to DCI X'Y'Z' colour transform before compression");
945+
929946
app.set_help_flag("-h", "Show abreviated usage");
930947
app.add_flag("--help", "Show detailed usage");
931948
try
@@ -967,6 +984,8 @@ GrkRC GrkCompress::parseCommandLine(int argc, const char* argv[], CompressInitPa
967984
parameters->write_tlm = true;
968985
if(progressiveRCOpt->count() > 0)
969986
parameters->progressive_rate_control = true;
987+
if(xyzOpt->count() > 0)
988+
parameters->apply_xyz_transform = true;
970989
if(repetitionsOpt->count() > 0)
971990
parameters->repeats = repetitions;
972991
if(rateControlAlgorithmOpt->count() > 0)
@@ -2084,6 +2103,16 @@ static uint64_t pluginCompressCallback(grk_plugin_compress_user_callback_info* i
20842103
goto cleanup;
20852104
}
20862105

2106+
// Apply RGB → X'Y'Z' colour transform if requested
2107+
if(parameters->apply_xyz_transform)
2108+
{
2109+
if(!grk::applyXYZTransform(image))
2110+
{
2111+
spdlog::error("Failed to apply XYZ colour transform");
2112+
goto cleanup;
2113+
}
2114+
}
2115+
20872116
/* Can happen if input file is TIFF or PNG
20882117
* and GROK_HAVE_LIBTIF or GROK_HAVE_LIBPNG is undefined
20892118
*/
@@ -2153,7 +2182,12 @@ static uint64_t pluginCompressCallback(grk_plugin_compress_user_callback_info* i
21532182
}
21542183

21552184
/* Decide if MCT should be used */
2156-
if(parameters->mct == 255)
2185+
if(parameters->apply_xyz_transform)
2186+
{
2187+
// XYZ-encoded data must not have MCT applied
2188+
parameters->mct = 0;
2189+
}
2190+
else if(parameters->mct == 255)
21572191
{ /* mct mode has not been set in commandline */
21582192
parameters->mct = (image->numcomps >= 3) ? 1 : 0;
21592193
}

src/lib/core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ set(GROK_LIBRARY_SRCS
8484
${CMAKE_CURRENT_SOURCE_DIR}/util/SparseBuffer.cpp
8585
${CMAKE_CURRENT_SOURCE_DIR}/util/GrkImage.cpp
8686
${CMAKE_CURRENT_SOURCE_DIR}/util/GrkImageSIMD.cpp
87+
${CMAKE_CURRENT_SOURCE_DIR}/util/XYZTransform.cpp
8788
${CMAKE_CURRENT_SOURCE_DIR}/util/GrkMatrix.cpp
8889
${CMAKE_CURRENT_SOURCE_DIR}/util/lanes.cpp
8990

src/lib/core/grok.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
#include "GrkImage.h"
6969
#include "ICompressor.h"
7070
#include "IDecompressor.h"
71+
#include "XYZTransform.h"
7172

7273
#include "MemStream.h"
7374

@@ -963,6 +964,10 @@ void grk_compress_set_default_params(grk_cparameters* parameters)
963964
parameters->device_id = 0;
964965
parameters->repeats = 1;
965966
}
967+
bool grk_apply_xyz_transform(grk_image* image)
968+
{
969+
return grk::applyXYZTransform(image);
970+
}
966971
grk_object* grk_compress_init(grk_stream_params* streamParams, grk_cparameters* parameters,
967972
grk_image* image)
968973
{

src/lib/core/grok.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1751,6 +1751,16 @@ typedef struct _grk_cparameters
17511751
uint8_t max_res_transcode; /* max resolutions to keep (0 = all) */
17521752
GRK_PROG_ORDER
17531753
transcode_prog_order; /* reorder packets to this progression (GRK_PROG_UNKNOWN = keep) */
1754+
1755+
/**
1756+
* Apply Rec.709 RGB → DCI X'Y'Z' colour transform on the input image
1757+
* before compression. Uses SIMD-accelerated pipeline:
1758+
* 1. Rec.709 OETF⁻¹ (gamma → linear)
1759+
* 2. 3×3 matrix (linear RGB → linear CIE XYZ, D65)
1760+
* 3. DCI 2.6 gamma (linear → X'Y'Z')
1761+
* Only applied to images with ≥3 components.
1762+
*/
1763+
bool apply_xyz_transform;
17541764
} grk_cparameters;
17551765

17561766
/**
@@ -1777,6 +1787,17 @@ typedef struct _grk_cparameters
17771787
*/
17781788
GRK_API void GRK_CALLCONV grk_compress_set_default_params(grk_cparameters* parameters);
17791789

1790+
/**
1791+
* @brief Apply Rec.709 RGB → DCI X'Y'Z' colour transform to an image in-place.
1792+
*
1793+
* Operates on planar int32 component buffers (comp[0..2].data).
1794+
* Pixel values are interpreted as unsigned integers in [0, (1<<prec)-1].
1795+
*
1796+
* @param image Image with ≥3 components. Only components 0,1,2 are modified.
1797+
* @return true on success, false if image has <3 components.
1798+
*/
1799+
GRK_API bool GRK_CALLCONV grk_apply_xyz_transform(grk_image* image);
1800+
17801801
/**
17811802
* @brief Creates and initializes a JPEG 2000 compressor.
17821803
*

src/lib/core/util/XYZTransform.cpp

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
/*
2+
* Copyright (C) 2016-2026 Grok Image Compression Inc.
3+
*
4+
* This source code is free software: you can redistribute it and/or modify
5+
* it under the terms of the GNU Affero General Public License, version 3,
6+
* as published by the Free Software Foundation.
7+
*
8+
* This source code is distributed in the hope that it will be useful,
9+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
10+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11+
* GNU Affero General Public License for more details.
12+
*
13+
* You should have received a copy of the GNU Affero General Public License
14+
* along with this program. If not, see <http://www.gnu.org/licenses/>.
15+
*
16+
*/
17+
18+
#include "hwy_arm_disable_targets.h"
19+
20+
#undef HWY_TARGET_INCLUDE
21+
#define HWY_TARGET_INCLUDE "util/XYZTransform.cpp"
22+
#include <hwy/foreach_target.h>
23+
#include <hwy/highway.h>
24+
#include <cmath>
25+
#include <cstdint>
26+
#include <memory>
27+
#include <vector>
28+
29+
#include "grok.h"
30+
#include "XYZTransform.h"
31+
#include "Logger.h"
32+
33+
HWY_BEFORE_NAMESPACE();
34+
namespace grk
35+
{
36+
namespace HWY_NAMESPACE
37+
{
38+
namespace hn = hwy::HWY_NAMESPACE;
39+
40+
// ─── DCI companding coefficient (SMPTE 428-1) ───
41+
// Peak white luminance 48 cd/m² mapped to XYZ encoding range of 52.37
42+
static constexpr float DCI_COEFFICIENT = 48.0f / 52.37f;
43+
44+
// ─── Rec.709 linearization: simple gamma 2.2 (matches libdcp/dcpomatic) ───
45+
static inline float rec709_to_linear(float v)
46+
{
47+
if(v <= 0.0f)
48+
return 0.0f;
49+
return std::pow(v, 2.2f);
50+
}
51+
52+
// ─── DCI 2.6 gamma: linear → X'Y'Z' ───
53+
static inline float linear_to_dci_gamma(float v)
54+
{
55+
if(v <= 0.0f)
56+
return 0.0f;
57+
return std::pow(v, 1.0f / 2.6f);
58+
}
59+
60+
// ─── Rec.709 → CIE XYZ matrix (D65 white point), pre-multiplied by DCI companding ───
61+
static constexpr float M00 = 0.4124564f * DCI_COEFFICIENT;
62+
static constexpr float M01 = 0.3575761f * DCI_COEFFICIENT;
63+
static constexpr float M02 = 0.1804375f * DCI_COEFFICIENT;
64+
static constexpr float M10 = 0.2126729f * DCI_COEFFICIENT;
65+
static constexpr float M11 = 0.7151522f * DCI_COEFFICIENT;
66+
static constexpr float M12 = 0.0721750f * DCI_COEFFICIENT;
67+
static constexpr float M20 = 0.0193339f * DCI_COEFFICIENT;
68+
static constexpr float M21 = 0.1191920f * DCI_COEFFICIENT;
69+
static constexpr float M22 = 0.9503041f * DCI_COEFFICIENT;
70+
71+
/**
72+
* SIMD-accelerated RGB → X'Y'Z' transform.
73+
*
74+
* Processes planar int32 component buffers. The gamma curves use scalar
75+
* LUTs (pre-built for the given precision), while the 3×3 matrix multiply
76+
* uses Highway SIMD float vectors.
77+
*/
78+
static void Hwy_rgb_to_xyz(int32_t* HWY_RESTRICT rBuf, int32_t* HWY_RESTRICT gBuf,
79+
int32_t* HWY_RESTRICT bBuf, uint64_t numPixels, uint32_t prec)
80+
{
81+
const uint32_t maxVal = (1u << prec) - 1;
82+
const float scale = 1.0f / (float)maxVal;
83+
84+
// Build linearization LUT: [0..maxVal] → float linear
85+
const uint32_t lutSize = maxVal + 1;
86+
auto linLut = std::make_unique<float[]>(lutSize);
87+
for(uint32_t v = 0; v < lutSize; ++v)
88+
linLut[v] = rec709_to_linear((float)v * scale);
89+
90+
// Build DCI gamma LUT at output precision (e.g. 4096 entries for 12-bit)
91+
// This fits in L1 cache unlike the old 256KB 16-bit LUT
92+
auto gammaLut = std::make_unique<float[]>(lutSize);
93+
for(uint32_t v = 0; v < lutSize; ++v)
94+
gammaLut[v] = linear_to_dci_gamma((float)v * scale);
95+
96+
const float gammaScale = (float)maxVal;
97+
98+
// Fused single-pass: linearize → matrix → gamma → quantize per pixel
99+
// This avoids intermediate float buffers and keeps everything in L1
100+
for(uint64_t i = 0; i < numPixels; ++i)
101+
{
102+
// Linearize via LUT
103+
uint32_t rv = (uint32_t)rBuf[i];
104+
uint32_t gv = (uint32_t)gBuf[i];
105+
uint32_t bv = (uint32_t)bBuf[i];
106+
if(rv > maxVal) rv = maxVal;
107+
if(gv > maxVal) gv = maxVal;
108+
if(bv > maxVal) bv = maxVal;
109+
110+
float r = linLut[rv];
111+
float g = linLut[gv];
112+
float b = linLut[bv];
113+
114+
// Matrix multiply (scalar — 9 FMAs)
115+
float x = M00 * r + M01 * g + M02 * b;
116+
float y = M10 * r + M11 * g + M12 * b;
117+
float z = M20 * r + M21 * g + M22 * b;
118+
119+
// Gamma via LUT (quantize linear to output precision for indexing)
120+
auto quantize = [maxVal, gammaScale](float v) -> uint32_t {
121+
if(v <= 0.0f) return 0;
122+
if(v >= 1.0f) return maxVal;
123+
return (uint32_t)(v * gammaScale + 0.5f);
124+
};
125+
126+
rBuf[i] = (int32_t)(gammaLut[quantize(x)] * gammaScale + 0.5f);
127+
gBuf[i] = (int32_t)(gammaLut[quantize(y)] * gammaScale + 0.5f);
128+
bBuf[i] = (int32_t)(gammaLut[quantize(z)] * gammaScale + 0.5f);
129+
}
130+
}
131+
132+
} // namespace HWY_NAMESPACE
133+
} // namespace grk
134+
HWY_AFTER_NAMESPACE();
135+
136+
#if HWY_ONCE
137+
namespace grk
138+
{
139+
140+
HWY_EXPORT(Hwy_rgb_to_xyz);
141+
142+
bool applyXYZTransform(grk_image* image)
143+
{
144+
if(!image || image->numcomps < 3)
145+
{
146+
grklog.error("XYZ transform requires an image with at least 3 components");
147+
return false;
148+
}
149+
150+
auto& compR = image->comps[0];
151+
auto& compG = image->comps[1];
152+
auto& compB = image->comps[2];
153+
154+
if(!compR.data || !compG.data || !compB.data)
155+
{
156+
grklog.error("XYZ transform: component data is null");
157+
return false;
158+
}
159+
160+
// All components must have same dimensions and precision
161+
if(compR.w != compG.w || compR.w != compB.w || compR.h != compG.h || compR.h != compB.h ||
162+
compR.prec != compG.prec || compR.prec != compB.prec)
163+
{
164+
grklog.error("XYZ transform: components must have identical dimensions and precision");
165+
return false;
166+
}
167+
168+
uint32_t w = compR.w;
169+
uint32_t h = compR.h;
170+
uint32_t prec = compR.prec;
171+
172+
grklog.info("XYZ transform: %ux%u, %u-bit, strides: R=%u G=%u B=%u, data_type: R=%d G=%d B=%d",
173+
w, h, prec, compR.stride, compG.stride, compB.stride,
174+
(int)compR.data_type, (int)compG.data_type, (int)compB.data_type);
175+
176+
// Handle int16 data type: need to widen to int32 for processing, then narrow back
177+
bool isInt16 = (compR.data_type == GRK_INT_16);
178+
179+
// If data is stored with stride, we need to process row-by-row
180+
// For contiguous data (stride == w), we can process in one shot
181+
bool contiguous =
182+
(compR.stride == w) && (compG.stride == w) && (compB.stride == w);
183+
184+
if(contiguous)
185+
{
186+
uint64_t numPixels = (uint64_t)w * h;
187+
auto rBuf = (int32_t*)compR.data;
188+
auto gBuf = (int32_t*)compG.data;
189+
auto bBuf = (int32_t*)compB.data;
190+
HWY_DYNAMIC_DISPATCH(Hwy_rgb_to_xyz)(rBuf, gBuf, bBuf, numPixels, prec);
191+
}
192+
else
193+
{
194+
// Row-by-row processing for strided data
195+
// Allocate temporary contiguous buffers
196+
auto rRow = std::make_unique<int32_t[]>(w);
197+
auto gRow = std::make_unique<int32_t[]>(w);
198+
auto bRow = std::make_unique<int32_t[]>(w);
199+
200+
for(uint32_t y = 0; y < h; ++y)
201+
{
202+
auto rPtr = (int32_t*)compR.data + (uint64_t)y * compR.stride;
203+
auto gPtr = (int32_t*)compG.data + (uint64_t)y * compG.stride;
204+
auto bPtr = (int32_t*)compB.data + (uint64_t)y * compB.stride;
205+
206+
// Copy row to contiguous buffer
207+
memcpy(rRow.get(), rPtr, w * sizeof(int32_t));
208+
memcpy(gRow.get(), gPtr, w * sizeof(int32_t));
209+
memcpy(bRow.get(), bPtr, w * sizeof(int32_t));
210+
211+
HWY_DYNAMIC_DISPATCH(Hwy_rgb_to_xyz)(rRow.get(), gRow.get(), bRow.get(), w, prec);
212+
213+
// Copy back
214+
memcpy(rPtr, rRow.get(), w * sizeof(int32_t));
215+
memcpy(gPtr, gRow.get(), w * sizeof(int32_t));
216+
memcpy(bPtr, bRow.get(), w * sizeof(int32_t));
217+
}
218+
}
219+
220+
// Update colour space to XYZ
221+
image->color_space = GRK_CLRSPC_CUSTOM_CIE;
222+
223+
grklog.info("Applied Rec.709 RGB → DCI X'Y'Z' colour transform (%ux%u, %u-bit)", w, h, prec);
224+
return true;
225+
}
226+
227+
} // namespace grk
228+
#endif // HWY_ONCE

0 commit comments

Comments
 (0)