Skip to content

Commit 083cc5f

Browse files
authored
[#124] feat(Vfs): support compressed VDF Union files
* feat(Vfs): support compressed VDF Union files * feat(Vfs): support for compressed VDF files depends on ZK_ENABLE_ZIPPED_VDF option * fix(Vfs): small fix in zipped stream and one more test * fix(Vfs): indentation style fix * fixup! feat(Vfs): support compressed VDF Union files
1 parent 475bd14 commit 083cc5f

8 files changed

Lines changed: 485 additions & 19 deletions

File tree

CMakeLists.txt

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ option(ZK_ENABLE_ASAN "ZenKit: Enable sanitizers in debug builds." ON)
1111
option(ZK_ENABLE_DEPRECATION "ZenKit: Enable deprecation warnings." ON)
1212
option(ZK_ENABLE_INSTALL "ZenKit: Enable CMake install target creation." ON)
1313
option(ZK_ENABLE_MMAP "ZenKit: Build ZenKit with memory-mapping support." ON)
14+
option(ZK_ENABLE_ZIPPED_VDF "ZenKit: Build with support for reading and writing compressed VDF files (Union ZippedStream format)." OFF)
1415
option(ZK_ENABLE_FUTURE "ZenKit: Enable breaking changes to be release in a future version" OFF)
1516

1617
add_subdirectory(vendor)
@@ -137,7 +138,14 @@ target_include_directories(zenkit PUBLIC include)
137138
target_compile_definitions(zenkit PRIVATE _ZKEXPORT=1 ZKNO_REM=1)
138139
target_compile_options(zenkit PRIVATE ${_ZK_COMPILE_FLAGS})
139140
target_link_options(zenkit PUBLIC ${_ZK_LINK_FLAGS})
140-
target_link_libraries(zenkit PUBLIC squish)
141+
if (ZK_ENABLE_ZIPPED_VDF)
142+
message(STATUS "ZenKit: Building with zipped VDF support")
143+
target_compile_definitions(zenkit PUBLIC _ZK_WITH_ZIPPED_VDF=1)
144+
target_link_libraries(zenkit PUBLIC miniz squish)
145+
else ()
146+
message(STATUS "ZenKit: Building WITHOUT zipped VDF support")
147+
target_link_libraries(zenkit PUBLIC squish)
148+
endif ()
141149
set_target_properties(zenkit PROPERTIES DEBUG_POSTFIX "d" VERSION ${PROJECT_VERSION})
142150

143151
if (ZK_ENABLE_INSTALL)

include/zenkit/Stream.hh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ namespace zenkit {
9999
virtual void seek(ssize_t off, Whence whence) noexcept = 0;
100100
[[nodiscard]] virtual size_t tell() const noexcept = 0;
101101
[[nodiscard]] virtual bool eof() const noexcept = 0;
102+
#ifdef _ZK_WITH_ZIPPED_VDF
103+
[[nodiscard]] static std::unique_ptr<Read> from_zipped(std::unique_ptr<Read> stream);
104+
#endif
102105

103106
[[nodiscard]] static std::unique_ptr<Read> from(FILE* stream);
104107
[[nodiscard]] static std::unique_ptr<Read> from(std::istream* stream);

include/zenkit/Vfs.hh

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,10 @@ namespace zenkit {
3939
struct VfsFileDescriptor {
4040
std::byte const* memory;
4141
std::size_t size;
42+
std::size_t raw_size; ///< The catalog entry size (uncompressed size for zipped files).
43+
bool zipped; ///< Whether the file data is stored as a Union ZippedStream.
4244

43-
VfsFileDescriptor(std::byte const* mem, size_t len, bool del);
45+
VfsFileDescriptor(std::byte const* mem, size_t len, bool del, bool zipped = false, size_t raw_size = 0);
4446
VfsFileDescriptor(VfsFileDescriptor const& cpy);
4547
~VfsFileDescriptor() noexcept;
4648

@@ -186,10 +188,27 @@ namespace zenkit {
186188
/// \return The node with the given name or `nullptr` if no node with the given name was found.
187189
[[nodiscard]] ZKAPI VfsNode* find(std::string_view name) noexcept;
188190

191+
/// \brief Save the Vfs contents as an uncompressed VDF archive.
192+
/// \param w The output stream to write the VDF archive to.
193+
/// \param version The game version determining the VDF signature format.
194+
/// \param unix_t The timestamp to store in the VDF header. If 0, the current time is used.
189195
ZKAPI void save(Write* w, GameVersion version, time_t unix_t = 0) const;
190196

197+
/// \brief Save the Vfs contents as a compressed VDF archive (Union ZippedStream format).
198+
///
199+
/// File data is written as ZippedStream blocks (volume flag 0xA0).
200+
/// Audio files (.WAV, .OGG) are always stored uncompressed, following Union's convention.
201+
///
202+
/// \param w The output stream to write the VDF archive to.
203+
/// \param version The game version determining the VDF signature format.
204+
/// \param unix_t The timestamp to store in the VDF header. If 0, the current time is used.
205+
#ifdef _ZK_WITH_ZIPPED_VDF
206+
ZKAPI void save_compressed(Write* w, GameVersion version, time_t unix_t = 0) const;
207+
#endif
208+
191209
private:
192210
ZKINT void mount_disk(std::byte const* buf, std::size_t size, VfsOverwriteBehavior overwrite);
211+
ZKINT void save_internal(Write* w, GameVersion version, time_t unix_t, bool compressed) const;
193212

194213
VfsNode _m_root;
195214
std::vector<std::unique_ptr<std::byte[]>> _m_data;

src/Stream.cc

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
#include <algorithm>
99
#include <cstring>
1010
#include <fstream>
11+
#ifdef _ZK_WITH_ZIPPED_VDF
12+
#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES
13+
#include <miniz.h>
14+
#endif
1115

1216
namespace zenkit {
1317
template <typename T>
@@ -477,4 +481,190 @@ namespace zenkit {
477481
std::unique_ptr<Write> Write::to(std::vector<std::byte>* vector) {
478482
return std::make_unique<detail::WriteDynamic>(vector);
479483
}
484+
// -----------------------------------------------------------------------------------------------------------------
485+
486+
#ifdef _ZK_WITH_ZIPPED_VDF
487+
namespace detail {
488+
/// Reads file data stored as a Union ZippedStream.
489+
///
490+
/// In a zipped VDF (VolumeHeader.Flags == 0xA0), the catalog/file table remains
491+
/// uncompressed, but each file's data at its catalog offset is stored as a
492+
/// ZippedStream — a block-compressed format used by Union's VDFS library.
493+
///
494+
/// ZippedStream layout (at the file's offset):
495+
/// Stream header: Length (4) | BlockSize (4) | BlocksCount (4)
496+
/// Per block (interleaved):
497+
/// Block header: LengthSource (4) | LengthCompressed (4) | BlockSize (4)
498+
/// Block data: [LengthCompressed bytes of zlib-compressed data]
499+
///
500+
/// Each block is independently zlib-compressed and can be decompressed on demand.
501+
class ReadZipped final : public Read {
502+
public:
503+
explicit ReadZipped(std::unique_ptr<Read> r) : _m_stream(std::move(r)) {
504+
_m_stream->seek(0, Whence::BEG);
505+
}
506+
507+
~ReadZipped() override = default;
508+
509+
size_t read(void* buf, size_t len) noexcept override {
510+
// Implementation of reading logic using blocks
511+
uint8_t* out = static_cast<uint8_t*>(buf);
512+
size_t total_read = 0;
513+
514+
while (len > 0) {
515+
if (_m_current_block >= _m_header.blocks_count) break;
516+
517+
// Ensure current block is cached/decompressed
518+
if (!_m_cache_valid || _m_cache_idx != _m_current_block) {
519+
if (!cache_block(_m_current_block)) {
520+
ZKLOGE("ReadZipped", "Failed to decompress block %u", _m_current_block);
521+
return total_read;
522+
}
523+
}
524+
525+
size_t offset_in_block = _m_position - (_m_current_block * _m_header.block_size);
526+
size_t available = _m_blocks[_m_current_block].len_src - offset_in_block;
527+
size_t to_copy = std::min(len, available);
528+
529+
memcpy(out, _m_cache.data() + offset_in_block, to_copy);
530+
531+
out += to_copy;
532+
len -= to_copy;
533+
total_read += to_copy;
534+
_m_position += to_copy;
535+
536+
if (to_copy == available) {
537+
_m_current_block++;
538+
}
539+
}
540+
return total_read;
541+
}
542+
543+
void seek(ssize_t off, Whence whence) noexcept override {
544+
// Update _m_position and _m_current_block
545+
ssize_t new_pos = 0;
546+
if (whence == Whence::BEG)
547+
new_pos = off;
548+
else if (whence == Whence::CUR)
549+
new_pos = static_cast<ssize_t>(_m_position) + off;
550+
else if (whence == Whence::END)
551+
new_pos = static_cast<ssize_t>(_m_header.length_uncompressed) + off;
552+
553+
// Clamp to [0, length_uncompressed]
554+
if (new_pos < 0) new_pos = 0;
555+
if (static_cast<size_t>(new_pos) > _m_header.length_uncompressed)
556+
new_pos = static_cast<ssize_t>(_m_header.length_uncompressed);
557+
558+
_m_position = static_cast<size_t>(new_pos);
559+
if (_m_header.block_size > 0) _m_current_block = _m_position / _m_header.block_size;
560+
}
561+
562+
[[nodiscard]] size_t tell() const noexcept override {
563+
return _m_position;
564+
}
565+
566+
[[nodiscard]] bool eof() const noexcept override {
567+
return _m_position >= _m_header.length_uncompressed;
568+
}
569+
570+
/// Reads the ZippedStream header and block table from the underlying stream.
571+
/// Block headers and data are interleaved: each block header (12 bytes) is
572+
/// immediately followed by its compressed payload (len_cmp bytes).
573+
bool init() {
574+
try {
575+
_m_header.length_uncompressed = _m_stream->read_uint();
576+
_m_header.block_size = _m_stream->read_uint();
577+
_m_header.blocks_count = _m_stream->read_uint();
578+
579+
// Validate: a valid ZippedStream must have at least one block
580+
// and a non-zero block size.
581+
if (_m_header.blocks_count == 0 || _m_header.block_size == 0 ||
582+
_m_header.length_uncompressed == 0) {
583+
ZKLOGE("ReadZipped", "Invalid ZippedStream header: length=%u, block_size=%u, blocks_count=%u",
584+
_m_header.length_uncompressed, _m_header.block_size, _m_header.blocks_count);
585+
return false;
586+
}
587+
588+
// Validate: blocks_count must be consistent with the header.
589+
// In a valid ZippedStream, blocks_count == ceil(length / block_size).
590+
// Reject if it doesn't match — the data is not a ZippedStream.
591+
uint32_t expected_blocks =
592+
(_m_header.length_uncompressed + _m_header.block_size - 1) / _m_header.block_size;
593+
if (_m_header.blocks_count != expected_blocks) {
594+
ZKLOGE("ReadZipped", "Block count mismatch: expected %u, got %u", expected_blocks,
595+
_m_header.blocks_count);
596+
return false;
597+
}
598+
599+
_m_blocks.resize(_m_header.blocks_count);
600+
601+
// Scan through the interleaved block headers to record each
602+
// block's compressed data offset, then skip past its data.
603+
for (auto& blk : _m_blocks) {
604+
blk.len_src = _m_stream->read_uint();
605+
blk.len_cmp = _m_stream->read_uint();
606+
blk.size_blk = _m_stream->read_uint();
607+
blk.offset = _m_stream->tell(); // compressed data starts here
608+
_m_stream->seek(static_cast<ssize_t>(blk.len_cmp), Whence::CUR);
609+
}
610+
611+
return true;
612+
} catch (...) {
613+
return false;
614+
}
615+
}
616+
617+
private:
618+
std::unique_ptr<Read> _m_stream;
619+
620+
struct StreamHeader {
621+
uint32_t length_uncompressed;
622+
uint32_t block_size;
623+
uint32_t blocks_count;
624+
} _m_header {};
625+
626+
struct BlockInfo {
627+
uint32_t len_src;
628+
uint32_t len_cmp;
629+
uint32_t size_blk;
630+
size_t offset;
631+
};
632+
std::vector<BlockInfo> _m_blocks;
633+
634+
size_t _m_position = 0;
635+
uint32_t _m_current_block = 0;
636+
637+
// Cache
638+
std::vector<uint8_t> _m_cache;
639+
uint32_t _m_cache_idx = 0xFFFFFFFF;
640+
bool _m_cache_valid = false;
641+
642+
bool cache_block(uint32_t idx) {
643+
if (idx >= _m_blocks.size()) return false;
644+
645+
BlockInfo& blk = _m_blocks[idx];
646+
_m_stream->seek(blk.offset, Whence::BEG);
647+
std::vector<uint8_t> cmp_data(blk.len_cmp);
648+
if (_m_stream->read(cmp_data.data(), blk.len_cmp) != blk.len_cmp) return false;
649+
650+
_m_cache.resize(blk.len_src);
651+
652+
mz_ulong out_len = static_cast<mz_ulong>(blk.len_src);
653+
int res = mz_uncompress(_m_cache.data(), &out_len, cmp_data.data(), static_cast<mz_ulong>(blk.len_cmp));
654+
655+
if (res != MZ_OK) return false;
656+
657+
_m_cache_idx = idx;
658+
_m_cache_valid = true;
659+
return true;
660+
}
661+
};
662+
} // namespace detail
663+
664+
std::unique_ptr<Read> Read::from_zipped(std::unique_ptr<Read> stream) {
665+
auto reader = std::make_unique<detail::ReadZipped>(std::move(stream));
666+
if (!reader->init()) return nullptr;
667+
return reader;
668+
}
669+
#endif // _ZK_WITH_ZIPPED_VDF
480670
} // namespace zenkit

0 commit comments

Comments
 (0)