|
1 | 1 | #pragma once |
2 | 2 |
|
| 3 | +#include <cstddef> |
3 | 4 | #include <cstdint> |
| 5 | +#include <cstring> |
4 | 6 | #include <string> |
5 | 7 | #include <string_view> |
6 | | -#include <vector> |
7 | | -#include <zlib.h> |
| 8 | + |
| 9 | +#include <cfbox/checksum.hpp> |
| 10 | +#include <cfbox/deflate.hpp> |
| 11 | +#include <cfbox/inflate.hpp> |
8 | 12 |
|
9 | 13 | namespace cfbox::compress { |
10 | 14 |
|
| 15 | +// Write a little-endian 32-bit value |
| 16 | +inline auto write_le32(std::uint32_t val, std::string& out) -> void { |
| 17 | + out += static_cast<char>(val & 0xFF); |
| 18 | + out += static_cast<char>((val >> 8) & 0xFF); |
| 19 | + out += static_cast<char>((val >> 16) & 0xFF); |
| 20 | + out += static_cast<char>((val >> 24) & 0xFF); |
| 21 | +} |
| 22 | + |
| 23 | +// Read a little-endian 32-bit value |
| 24 | +inline auto read_le32(const std::uint8_t* p) -> std::uint32_t { |
| 25 | + return static_cast<std::uint32_t>(p[0]) | |
| 26 | + (static_cast<std::uint32_t>(p[1]) << 8) | |
| 27 | + (static_cast<std::uint32_t>(p[2]) << 16) | |
| 28 | + (static_cast<std::uint32_t>(p[3]) << 24); |
| 29 | +} |
| 30 | + |
| 31 | +// Gzip compress: RFC 1952 header + deflate + CRC32 + size trailer |
11 | 32 | inline auto gzip_compress(std::string_view data) -> std::string { |
12 | | - z_stream strm{}; |
13 | | - deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, 15 + 16, 8, Z_DEFAULT_STRATEGY); |
| 33 | + std::string out; |
14 | 34 |
|
15 | | - std::string output; |
16 | | - output.resize(data.size() + data.size() / 10 + 256); |
| 35 | + // Gzip header (10 bytes) |
| 36 | + out += static_cast<char>(0x1F); // ID1 |
| 37 | + out += static_cast<char>(0x8B); // ID2 |
| 38 | + out += static_cast<char>(8); // CM = deflate |
| 39 | + out += static_cast<char>(0); // FLG |
| 40 | + out += static_cast<char>(0); // MTIME (4 bytes) |
| 41 | + out += static_cast<char>(0); |
| 42 | + out += static_cast<char>(0); |
| 43 | + out += static_cast<char>(0); |
| 44 | + out += static_cast<char>(0); // XFL |
| 45 | + out += static_cast<char>(255); // OS = unknown |
17 | 46 |
|
18 | | - strm.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(data.data())); |
19 | | - strm.avail_in = static_cast<uInt>(data.size()); |
20 | | - strm.next_out = reinterpret_cast<Bytef*>(output.data()); |
21 | | - strm.avail_out = static_cast<uInt>(output.size()); |
| 47 | + // Deflate compressed data |
| 48 | + auto compressed = deflate::deflate_compress( |
| 49 | + reinterpret_cast<const std::uint8_t*>(data.data()), data.size()); |
| 50 | + out.append(reinterpret_cast<const char*>(compressed.data()), |
| 51 | + static_cast<std::size_t>(compressed.size())); |
22 | 52 |
|
23 | | - deflate(&strm, Z_FINISH); |
24 | | - output.resize(strm.total_out); |
25 | | - deflateEnd(&strm); |
26 | | - return output; |
| 53 | + // Trailer: CRC32 + ISIZE |
| 54 | + auto crc = checksum::crc32(data); |
| 55 | + write_le32(crc, out); |
| 56 | + write_le32(static_cast<std::uint32_t>(data.size() & 0xFFFFFFFF), out); |
| 57 | + |
| 58 | + return out; |
27 | 59 | } |
28 | 60 |
|
| 61 | +// Gzip decompress: parse RFC 1952 header + inflate + verify CRC32 |
29 | 62 | inline auto gzip_decompress(std::string_view data) -> std::string { |
30 | | - z_stream strm{}; |
31 | | - inflateInit2(&strm, 15 + 16); |
32 | | - |
33 | | - std::string output; |
34 | | - output.resize(data.size() * 4 + 4096); |
35 | | - |
36 | | - strm.next_in = const_cast<Bytef*>(reinterpret_cast<const Bytef*>(data.data())); |
37 | | - strm.avail_in = static_cast<uInt>(data.size()); |
38 | | - |
39 | | - int ret; |
40 | | - do { |
41 | | - if (output.size() - strm.total_out < 4096) { |
42 | | - output.resize(output.size() * 2); |
43 | | - } |
44 | | - strm.next_out = reinterpret_cast<Bytef*>(output.data() + strm.total_out); |
45 | | - strm.avail_out = static_cast<uInt>(output.size() - strm.total_out); |
46 | | - ret = inflate(&strm, Z_NO_FLUSH); |
47 | | - } while (ret == Z_OK); |
48 | | - |
49 | | - output.resize(strm.total_out); |
50 | | - inflateEnd(&strm); |
51 | | - return output; |
| 63 | + if (data.size() < 18) return {}; |
| 64 | + auto* p = reinterpret_cast<const std::uint8_t*>(data.data()); |
| 65 | + |
| 66 | + // Check gzip magic |
| 67 | + if (p[0] != 0x1F || p[1] != 0x8B || p[2] != 8) return {}; |
| 68 | + |
| 69 | + std::uint8_t flg = p[3]; |
| 70 | + std::size_t offset = 10; |
| 71 | + |
| 72 | + // Skip optional fields based on FLG |
| 73 | + if (flg & 0x04) { // FEXTRA |
| 74 | + auto xlen = static_cast<std::size_t>(p[offset]) | |
| 75 | + (static_cast<std::size_t>(p[offset + 1]) << 8); |
| 76 | + offset += 2 + xlen; |
| 77 | + } |
| 78 | + if (flg & 0x08) { // FNAME |
| 79 | + while (offset < data.size() && p[offset] != 0) ++offset; |
| 80 | + ++offset; // skip null terminator |
| 81 | + } |
| 82 | + if (flg & 0x10) { // FCOMMENT |
| 83 | + while (offset < data.size() && p[offset] != 0) ++offset; |
| 84 | + ++offset; |
| 85 | + } |
| 86 | + if (flg & 0x02) { // FHCRC |
| 87 | + offset += 2; |
| 88 | + } |
| 89 | + |
| 90 | + if (offset + 8 > data.size()) return {}; |
| 91 | + |
| 92 | + // Compressed data is between offset and (end - 8) |
| 93 | + std::size_t compressed_size = data.size() - offset - 8; |
| 94 | + |
| 95 | + // Read trailer |
| 96 | + auto* trailer = p + data.size() - 8; |
| 97 | + auto expected_crc = read_le32(trailer); |
| 98 | + auto expected_size = read_le32(trailer + 4); |
| 99 | + |
| 100 | + // Inflate |
| 101 | + auto result = deflate::inflate(p + offset, compressed_size, expected_size); |
| 102 | + |
| 103 | + // Verify |
| 104 | + auto actual_crc = checksum::crc32(result); |
| 105 | + if (actual_crc != expected_crc) return {}; |
| 106 | + if ((result.size() & 0xFFFFFFFF) != expected_size) return {}; |
| 107 | + |
| 108 | + return result; |
| 109 | +} |
| 110 | + |
| 111 | +// Raw deflate decompression (for unzip method 8) |
| 112 | +inline auto raw_inflate(std::string_view compressed, std::size_t expected_size) -> std::string { |
| 113 | + return deflate::inflate( |
| 114 | + reinterpret_cast<const std::uint8_t*>(compressed.data()), |
| 115 | + compressed.size(), expected_size); |
52 | 116 | } |
53 | 117 |
|
54 | 118 | } // namespace cfbox::compress |
0 commit comments