Skip to content

Commit 8480db0

Browse files
committed
Add kanzi processor
1 parent d8996fb commit 8480db0

9 files changed

Lines changed: 194 additions & 0 deletions

File tree

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,6 @@
100100
[submodule "third_party/woff2"]
101101
path = third_party/woff2
102102
url = https://github.com/google/woff2.git
103+
[submodule "third_party/kanzi-cpp"]
104+
path = third_party/kanzi-cpp
105+
url = https://github.com/flanglet/kanzi-cpp.git

CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,17 @@ cmake_minimum_required(VERSION 3.5...4.1.2)
22

33
project(chisel VERSION 1.4.2 LANGUAGES CXX C)
44

5+
# prevents redefinition of 'uninstall' target
6+
macro(add_custom_target target_name)
7+
if("${target_name}" STREQUAL "uninstall")
8+
if(NOT TARGET uninstall)
9+
_add_custom_target(${ARGV})
10+
endif()
11+
else()
12+
_add_custom_target(${ARGV})
13+
endif()
14+
endmacro()
15+
516
option(CHISEL_BUILD_CLI "Build the chisel command-line executable" ON)
617
option(ENABLE_OPTIVORBIS "Enable rust optivorbis bridge" ON)
718
option(ENABLE_MP3PACKER "Enable OCaml mp3packer integration" ON)
@@ -622,6 +633,10 @@ if(APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
622633
endif ()
623634
add_subdirectory(third_party/woff2 EXCLUDE_FROM_ALL)
624635

636+
# kanzicpp
637+
set(CONCURRENCY_DISABLED ON CACHE BOOL "Disable multithreading in Kanzi" FORCE)
638+
add_subdirectory(third_party/kanzi-cpp EXCLUDE_FROM_ALL)
639+
625640
# common libraries for all platforms (exposed to libchisel/chisel_cli)
626641
set(CHISEL_LIBS
627642
FLAC
@@ -656,6 +671,7 @@ set(CHISEL_LIBS
656671
woff2dec
657672
woff2enc
658673
liblzma
674+
libkanzi
659675
)
660676

661677
if(ENABLE_MATROSKA)

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,7 @@ Extending `chisel` with a new encoder or format requires just a few operations:
299299
| Archives | Java EE | application/java-archive | .war, .ear | libarchive (ZIP-based) |
300300
| Archives | Android Bundle | application/vnd.android.package-archive | .aab | libarchive (ZIP-based) |
301301
| Archives | Tencent Resource DB | application/x-rdb | .rdb | internal |
302+
| Archives | Kanzi | application/x-kanzi | .knz | kanzi |
302303
| Fonts | WOFF | font/woff | .woff | zlib |
303304
| Fonts | WOFF2 | font/woff2 | .woff2 | woff2 |
304305
| Scientific | MSEED | application/vnd.fdsn.mseed | .mseed | libmseed |

libchisel/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ set(LIBCHISEL_SOURCES
99
include/gif_processor.hpp
1010
include/jpeg_processor.hpp
1111
include/jxl_processor.hpp
12+
include/kanzi_processor.hpp
1213
include/log_sink.hpp
1314
include/logger.hpp
1415
include/mime_detector.hpp
@@ -35,6 +36,7 @@ set(LIBCHISEL_SOURCES
3536
src/processors/gif_processor.cpp
3637
src/processors/jpeg_processor.cpp
3738
src/processors/jxl_processor.cpp
39+
src/processors/kanzi_processor.cpp
3840
src/processors/mkv_processor.cpp
3941
src/processors/mseed_processor.cpp
4042
src/processors/odf_processor.cpp
@@ -113,6 +115,10 @@ set(LIBCHISEL_SOURCES
113115
src/processors/icns_processor.cpp
114116
include/woff_processor.hpp
115117
src/processors/woff_processor.cpp
118+
119+
${CMAKE_SOURCE_DIR}/third_party/kanzi-cpp/src/app/BlockCompressor.cpp
120+
${CMAKE_SOURCE_DIR}/third_party/kanzi-cpp/src/app/BlockDecompressor.cpp
121+
${CMAKE_SOURCE_DIR}/third_party/kanzi-cpp/src/app/InfoPrinter.cpp
116122
)
117123
if(ENABLE_OPTIVORBIS)
118124
corrosion_import_crate(MANIFEST_PATH "rust_bridge/Cargo.toml")
@@ -122,6 +128,7 @@ add_library(libchisel STATIC ${LIBCHISEL_SOURCES})
122128

123129
target_compile_definitions(libchisel PRIVATE
124130
CHISEL_VERSION="${PROJECT_VERSION}"
131+
CONCURRENCY_DISABLED
125132
)
126133

127134
target_include_directories(libchisel
@@ -160,6 +167,7 @@ target_include_directories(libchisel
160167
${CMAKE_SOURCE_DIR}/third_party/xz/src/liblzma/api
161168
${ZSTD_INCLUDE_DIR}
162169
${BZIP2_DIR}
170+
${CMAKE_SOURCE_DIR}/third_party/kanzi-cpp/src
163171
)
164172

165173
if(NOT WIN32)
@@ -241,6 +249,7 @@ add_dependencies(libchisel libgifsicle)
241249
add_dependencies(libchisel mseed_static)
242250
add_dependencies(libchisel brotlicommon brotlidec brotlienc)
243251
add_dependencies(libchisel woff2common woff2dec woff2enc)
252+
add_dependencies(libchisel libkanzi)
244253

245254
if(NOT WIN32)
246255
add_dependencies(libchisel LIBMAGIC)

libchisel/include/file_type.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ enum class ContainerFormat {
5858
Cpio,
5959
Ar,
6060
Zstd,
61+
Kanzi,
6162
Unknown
6263
};
6364

@@ -108,6 +109,7 @@ inline const std::unordered_map<std::string, ContainerFormat> mime_to_format = {
108109
{ "application/zip", ContainerFormat::Zip },
109110
{ "application/java-archive", ContainerFormat::Zip },
110111
{ "application/vnd.android.package-archive", ContainerFormat::Apk },
112+
{ "application/x-kanzi", ContainerFormat::Kanzi }
111113
};
112114

113115
/**
@@ -148,6 +150,7 @@ inline std::string container_format_to_string(const ContainerFormat fmt) {
148150
case ContainerFormat::Cpio: return "cpio";
149151
case ContainerFormat::Ar: return "a";
150152
case ContainerFormat::Zstd: return "zst";
153+
case ContainerFormat::Kanzi: return "knz";
151154
default: return "unknown";
152155
}
153156
}
@@ -203,6 +206,7 @@ inline std::optional<ContainerFormat> parse_container_format(const std::string &
203206
if (s == "nupkg") return ContainerFormat::Zip;
204207
if (s == "war" || s == "ear") return ContainerFormat::Jar;
205208
if (s == "aab") return ContainerFormat::Apk;
209+
if (s == "knz") return ContainerFormat::Kanzi;
206210
return std::nullopt;
207211
}
208212

@@ -255,6 +259,7 @@ inline bool can_write_format(const ContainerFormat fmt) {
255259
case ContainerFormat::Cpio:
256260
case ContainerFormat::Ar:
257261
case ContainerFormat::Zstd:
262+
case ContainerFormat::Kanzi:
258263
return true;
259264
default:
260265
return false;
@@ -295,6 +300,7 @@ static const std::unordered_map<std::string, std::string> ext_to_mime = {
295300
{".war", "application/java-archive"},
296301
{".ear", "application/java-archive"},
297302
{".aab", "application/vnd.android.package-archive"},
303+
{".knz", "application/x-kanzi"},
298304

299305
// images
300306
{".jpg", "image/jpeg"},
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
//
2+
// Created by Giuseppe Francione on 20/05/2026.
3+
//
4+
5+
#ifndef CHISEL_KANZI_PROCESSOR_HPP
6+
#define CHISEL_KANZI_PROCESSOR_HPP
7+
8+
#include "processor.hpp"
9+
10+
namespace chisel {
11+
12+
class KanziProcessor : public IProcessor {
13+
public:
14+
[[nodiscard]] std::string_view get_name() const noexcept override {
15+
return "KanziProcessor";
16+
}
17+
18+
[[nodiscard]] std::span<const std::string_view> get_supported_mime_types() const noexcept override {
19+
static constexpr std::array<std::string_view, 1> mimes = {
20+
"application/x-kanzi"
21+
};
22+
return mimes;
23+
}
24+
25+
[[nodiscard]] std::span<const std::string_view> get_supported_extensions() const noexcept override {
26+
static constexpr std::array<std::string_view, 1> exts = {
27+
".knz"
28+
};
29+
return exts;
30+
}
31+
32+
[[nodiscard]] bool can_recompress() const noexcept override {
33+
return false; // stream container, actual data is recompressed by pipeline
34+
}
35+
36+
[[nodiscard]] bool can_extract_contents() const noexcept override {
37+
return true;
38+
}
39+
40+
void recompress(const fs::path& /*input_path*/, const fs::path& /*output_path*/, const ProcessingOptions& /*options*/) override {
41+
throw std::logic_error("KanziProcessor does not support direct recompression.");
42+
}
43+
44+
std::optional<ExtractedContent> prepare_extraction(const std::filesystem::path& input_path) override;
45+
46+
std::filesystem::path finalize_extraction(const ExtractedContent& content,
47+
const ProcessingOptions& options) override;
48+
49+
[[nodiscard]] std::string get_raw_checksum(const std::filesystem::path& file_path) const override;
50+
};
51+
52+
} // namespace chisel
53+
54+
#endif // CHISEL_KANZI_PROCESSOR_HPP
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
//
2+
// Created by Giuseppe Francione on 20/05/2026.
3+
//
4+
5+
#include "../../include/kanzi_processor.hpp"
6+
#include "../../include/logger.hpp"
7+
#include "../../include/file_utils.hpp"
8+
#include "../../include/random_utils.hpp"
9+
#include "../../include/file_type.hpp"
10+
#include <Context.hpp>
11+
#include <array>
12+
#include "app/BlockCompressor.hpp"
13+
#include "app/BlockDecompressor.hpp"
14+
15+
namespace chisel {
16+
17+
namespace fs = std::filesystem;
18+
19+
20+
std::optional<ExtractedContent> KanziProcessor::prepare_extraction(const fs::path& input_path) {
21+
Logger::log(LogLevel::Debug, "Starting kanzi decompression", get_name());
22+
23+
ExtractedContent content;
24+
content.original_path = input_path;
25+
content.temp_dir = make_temp_dir_for(input_path, "kanzi");
26+
27+
content.format = ContainerFormat::Kanzi;
28+
29+
fs::path raw_bin = content.temp_dir / "stream.raw";
30+
31+
try {
32+
kanzi::Context ctx;
33+
ctx.putString("inputName", input_path.string());
34+
ctx.putString("outputName", raw_bin.string());
35+
ctx.putInt("jobs", 1);
36+
ctx.putInt("overwrite", 1);
37+
ctx.putInt("verbosity", 0);
38+
39+
kanzi::BlockDecompressor bd(ctx);
40+
uint64_t read = 0;
41+
42+
if (bd.decompress(read) != 0) {
43+
Logger::log(LogLevel::Error, "Kanzi decompression returned non-zero status", get_name());
44+
return std::nullopt;
45+
}
46+
} catch (const std::exception& e) {
47+
Logger::log(LogLevel::Error, std::string("Kanzi exception: ") + e.what(), get_name());
48+
return std::nullopt;
49+
}
50+
51+
// expose the uncompressed stream to the processing pipeline
52+
content.extracted_files.push_back(raw_bin);
53+
return content;
54+
}
55+
56+
fs::path KanziProcessor::finalize_extraction(const ExtractedContent& content, const ProcessingOptions& /*options*/) {
57+
if (content.extracted_files.empty()) {
58+
Logger::log(LogLevel::Error, "No raw stream found for kanzi repacking", get_name());
59+
throw std::runtime_error("KanziProcessor: empty extracted files");
60+
}
61+
62+
Logger::log(LogLevel::Debug, "Repacking kanzi", get_name());
63+
64+
const fs::path& processed_bin = content.extracted_files.front();
65+
fs::path out_knz = fs::temp_directory_path() /
66+
(content.original_path.stem().string() + "_tmp" + RandomUtils::random_suffix() + ".knz");
67+
68+
try {
69+
kanzi::Context ctx;
70+
ctx.putString("inputName", processed_bin.string());
71+
ctx.putString("outputName", out_knz.string());
72+
73+
// force extreme compression (level 9 maps to EXE+RLT+TEXT+UTF+DNA&TPAQX)
74+
ctx.putInt("level", 9);
75+
ctx.putInt("autoBlock", 1);
76+
ctx.putInt("jobs", 1);
77+
ctx.putInt("overwrite", 1);
78+
ctx.putInt("checksum", 64); // enforce integrity
79+
80+
kanzi::BlockCompressor bc(ctx);
81+
uint64_t written = 0;
82+
83+
if (bc.compress(written) != 0) {
84+
Logger::log(LogLevel::Error, "Kanzi compression returned non-zero status", get_name());
85+
fs::remove(out_knz);
86+
throw std::runtime_error("KanziProcessor: compression failed");
87+
}
88+
} catch (const std::exception& e) {
89+
Logger::log(LogLevel::Error, std::string("Kanzi exception: ") + e.what(), get_name());
90+
fs::remove(out_knz);
91+
throw;
92+
}
93+
94+
cleanup_temp_dir(content.temp_dir, get_name());
95+
return out_knz;
96+
}
97+
98+
std::string KanziProcessor::get_raw_checksum(const fs::path& /*file_path*/) const {
99+
return "";
100+
}
101+
102+
} // namespace chisel

libchisel/src/utils/processor_registry.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
#include "rdb_processor.hpp"
4848
#include "icns_processor.hpp"
4949
#include "woff_processor.hpp"
50+
#include "kanzi_processor.hpp"
5051
#include <algorithm>
5152
#include <cctype>
5253

@@ -97,6 +98,7 @@ ProcessorRegistry::ProcessorRegistry() {
9798
processors_.push_back(std::make_unique<RdbProcessor>());
9899
processors_.push_back(std::make_unique<IcnsProcessor>());
99100
processors_.push_back(std::make_unique<WoffProcessor>());
101+
processors_.push_back(std::make_unique<KanziProcessor>());
100102
}
101103

102104
std::vector<IProcessor*> ProcessorRegistry::find_by_mime(const std::string& mime) const {

third_party/kanzi-cpp

Submodule kanzi-cpp added at 7665342

0 commit comments

Comments
 (0)