Skip to content

Commit 5a7cc6a

Browse files
feat: replace zlib with custom lightweight deflate/inflate implementation
Remove zlib dependency entirely. Implement deflate compression (fixed Huffman + LZ77 with hash chain matching) and inflate decompression (fixed/dynamic/stored block support) in ~600 lines of header-only C++23. Changes: - New deflate.hpp: BitWriter with MSB-first Huffman encoding, LZ77 matcher - New inflate.hpp: BitReader with accumulator-based peek/read, Huffman table builder supporting fixed and dynamic tables - Rewrite compress.hpp: gzip format (RFC 1952) using our own deflate - Update unzip.cpp: use raw_inflate instead of zlib - Remove zlib from CMakeLists.txt (no more CPM fetch) - Add 15 compression tests (round-trip, edge cases, corruption) Verified: output compatible with system gunzip, all 331 tests pass.
1 parent 21da138 commit 5a7cc6a

6 files changed

Lines changed: 706 additions & 66 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,7 @@ include(cmake/third_party/CPM.cmake)
1616
include(cmake/compile/CompilerFlag.cmake)
1717

1818
# ── Dependencies ──────────────────────────────────────────────
19-
CPMAddPackage(
20-
NAME zlib
21-
GITHUB_REPOSITORY madler/zlib
22-
GIT_TAG v1.3.1
23-
OPTIONS "CMAKE_POSITION_INDEPENDENT_CODE ON"
24-
)
19+
# (zlib removed — using built-in deflate/inflate implementation)
2520

2621
# ── Applet configuration ─────────────────────────────────────
2722
include(cmake/Config.cmake)
@@ -60,13 +55,6 @@ add_executable(cfbox src/main.cpp ${CFBOX_APPLET_SOURCES})
6055
target_include_directories(cfbox PUBLIC include)
6156
target_include_directories(cfbox PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/include)
6257
target_link_libraries(cfbox PRIVATE cfbox_compiler_flags)
63-
if(zlib_ADDED)
64-
target_link_libraries(cfbox PRIVATE zlibstatic)
65-
target_include_directories(cfbox SYSTEM PRIVATE ${zlib_SOURCE_DIR} ${zlib_BINARY_DIR})
66-
else()
67-
find_package(ZLIB REQUIRED)
68-
target_link_libraries(cfbox PRIVATE ZLIB::ZLIB)
69-
endif()
7058

7159
# ── GTest via CPM (FetchContent) ──────────────────────────────
7260
if(NOT CMAKE_CROSSCOMPILING)
@@ -89,12 +77,6 @@ if(GTest_ADDED)
8977
cfbox_compiler_flags
9078
GTest::gtest_main
9179
)
92-
if(zlib_ADDED)
93-
target_link_libraries(cfbox_tests PRIVATE zlibstatic)
94-
target_include_directories(cfbox_tests SYSTEM PRIVATE ${zlib_SOURCE_DIR} ${zlib_BINARY_DIR})
95-
else()
96-
target_link_libraries(cfbox_tests PRIVATE ZLIB::ZLIB)
97-
endif()
9880

9981
include(GoogleTest)
10082
gtest_discover_tests(cfbox_tests)

include/cfbox/compress.hpp

Lines changed: 100 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,118 @@
11
#pragma once
22

3+
#include <cstddef>
34
#include <cstdint>
5+
#include <cstring>
46
#include <string>
57
#include <string_view>
6-
#include <vector>
7-
#include <zlib.h>
8+
9+
#include <cfbox/checksum.hpp>
10+
#include <cfbox/deflate.hpp>
11+
#include <cfbox/inflate.hpp>
812

913
namespace cfbox::compress {
1014

15+
// Write a little-endian 32-bit value
16+
inline auto write_le32(std::uint32_t val, std::string& out) -> void {
17+
out += static_cast<char>(val & 0xFF);
18+
out += static_cast<char>((val >> 8) & 0xFF);
19+
out += static_cast<char>((val >> 16) & 0xFF);
20+
out += static_cast<char>((val >> 24) & 0xFF);
21+
}
22+
23+
// Read a little-endian 32-bit value
24+
inline auto read_le32(const std::uint8_t* p) -> std::uint32_t {
25+
return static_cast<std::uint32_t>(p[0]) |
26+
(static_cast<std::uint32_t>(p[1]) << 8) |
27+
(static_cast<std::uint32_t>(p[2]) << 16) |
28+
(static_cast<std::uint32_t>(p[3]) << 24);
29+
}
30+
31+
// Gzip compress: RFC 1952 header + deflate + CRC32 + size trailer
1132
inline auto gzip_compress(std::string_view data) -> std::string {
12-
z_stream strm{};
13-
deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY);
33+
std::string out;
1434

15-
std::string output;
16-
output.resize(data.size() + data.size() / 10 + 256);
35+
// Gzip header (10 bytes)
36+
out += static_cast<char>(0x1F); // ID1
37+
out += static_cast<char>(0x8B); // ID2
38+
out += static_cast<char>(8); // CM = deflate
39+
out += static_cast<char>(0); // FLG
40+
out += static_cast<char>(0); // MTIME (4 bytes)
41+
out += static_cast<char>(0);
42+
out += static_cast<char>(0);
43+
out += static_cast<char>(0);
44+
out += static_cast<char>(0); // XFL
45+
out += static_cast<char>(255); // OS = unknown
1746

18-
strm.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(data.data()));
19-
strm.avail_in = static_cast<uInt>(data.size());
20-
strm.next_out = reinterpret_cast<Bytef*>(output.data());
21-
strm.avail_out = static_cast<uInt>(output.size());
47+
// Deflate compressed data
48+
auto compressed = deflate::deflate_compress(
49+
reinterpret_cast<const std::uint8_t*>(data.data()), data.size());
50+
out.append(reinterpret_cast<const char*>(compressed.data()),
51+
static_cast<std::size_t>(compressed.size()));
2252

23-
deflate(&strm, Z_FINISH);
24-
output.resize(strm.total_out);
25-
deflateEnd(&strm);
26-
return output;
53+
// Trailer: CRC32 + ISIZE
54+
auto crc = checksum::crc32(data);
55+
write_le32(crc, out);
56+
write_le32(static_cast<std::uint32_t>(data.size() & 0xFFFFFFFF), out);
57+
58+
return out;
2759
}
2860

61+
// Gzip decompress: parse RFC 1952 header + inflate + verify CRC32
2962
inline auto gzip_decompress(std::string_view data) -> std::string {
30-
z_stream strm{};
31-
inflateInit2(&strm, 15 + 16);
32-
33-
std::string output;
34-
output.resize(data.size() * 4 + 4096);
35-
36-
strm.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(data.data()));
37-
strm.avail_in = static_cast<uInt>(data.size());
38-
39-
int ret;
40-
do {
41-
if (output.size() - strm.total_out < 4096) {
42-
output.resize(output.size() * 2);
43-
}
44-
strm.next_out = reinterpret_cast<Bytef*>(output.data() + strm.total_out);
45-
strm.avail_out = static_cast<uInt>(output.size() - strm.total_out);
46-
ret = inflate(&strm, Z_NO_FLUSH);
47-
} while (ret == Z_OK);
48-
49-
output.resize(strm.total_out);
50-
inflateEnd(&strm);
51-
return output;
63+
if (data.size() < 18) return {};
64+
auto* p = reinterpret_cast<const std::uint8_t*>(data.data());
65+
66+
// Check gzip magic
67+
if (p[0] != 0x1F || p[1] != 0x8B || p[2] != 8) return {};
68+
69+
std::uint8_t flg = p[3];
70+
std::size_t offset = 10;
71+
72+
// Skip optional fields based on FLG
73+
if (flg & 0x04) { // FEXTRA
74+
auto xlen = static_cast<std::size_t>(p[offset]) |
75+
(static_cast<std::size_t>(p[offset + 1]) << 8);
76+
offset += 2 + xlen;
77+
}
78+
if (flg & 0x08) { // FNAME
79+
while (offset < data.size() && p[offset] != 0) ++offset;
80+
++offset; // skip null terminator
81+
}
82+
if (flg & 0x10) { // FCOMMENT
83+
while (offset < data.size() && p[offset] != 0) ++offset;
84+
++offset;
85+
}
86+
if (flg & 0x02) { // FHCRC
87+
offset += 2;
88+
}
89+
90+
if (offset + 8 > data.size()) return {};
91+
92+
// Compressed data is between offset and (end - 8)
93+
std::size_t compressed_size = data.size() - offset - 8;
94+
95+
// Read trailer
96+
auto* trailer = p + data.size() - 8;
97+
auto expected_crc = read_le32(trailer);
98+
auto expected_size = read_le32(trailer + 4);
99+
100+
// Inflate
101+
auto result = deflate::inflate(p + offset, compressed_size, expected_size);
102+
103+
// Verify
104+
auto actual_crc = checksum::crc32(result);
105+
if (actual_crc != expected_crc) return {};
106+
if ((result.size() & 0xFFFFFFFF) != expected_size) return {};
107+
108+
return result;
109+
}
110+
111+
// Raw deflate decompression (for unzip method 8)
112+
inline auto raw_inflate(std::string_view compressed, std::size_t expected_size) -> std::string {
113+
return deflate::inflate(
114+
reinterpret_cast<const std::uint8_t*>(compressed.data()),
115+
compressed.size(), expected_size);
52116
}
53117

54118
} // namespace cfbox::compress

0 commit comments

Comments
 (0)