From 08a756f86123c261cfb48313ee2909dd1efb4ce4 Mon Sep 17 00:00:00 2001 From: JalonSolov Date: Mon, 18 May 2026 08:37:17 -0400 Subject: [PATCH 1/5] compress.zlib: changed to pure V RFC-compliant code --- vlib/compress/deflate/deflate.v | 25 +-- vlib/compress/deflate/deflate_inflate.v | 3 + vlib/compress/deflate/deflate_test.v | 8 + vlib/compress/zlib/README.md | 12 +- vlib/compress/zlib/interop/README.md | 16 ++ vlib/compress/zlib/interop/zlib_interop.vsh | 185 ++++++++++++++++++ vlib/compress/zlib/interop/zlib_ref.c | 150 ++++++++++++++ vlib/compress/zlib/interop/zlib_ref.py | 19 ++ vlib/compress/zlib/zlib.v | 14 +- .../zlib/zlib_miniz_compat_names_collision.h | 13 -- ...zlib_miniz_compat_names_collision_test.c.v | 7 - vlib/compress/zlib/zlib_test.v | 67 ++++++- 12 files changed, 465 insertions(+), 54 deletions(-) create mode 100644 vlib/compress/zlib/interop/README.md create mode 100644 vlib/compress/zlib/interop/zlib_interop.vsh create mode 100644 vlib/compress/zlib/interop/zlib_ref.c create mode 100644 vlib/compress/zlib/interop/zlib_ref.py delete mode 100644 vlib/compress/zlib/zlib_miniz_compat_names_collision.h delete mode 100644 vlib/compress/zlib/zlib_miniz_compat_names_collision_test.c.v diff --git a/vlib/compress/deflate/deflate.v b/vlib/compress/deflate/deflate.v index 4e4900f947f6ef..9622a9c1325f6c 100644 --- a/vlib/compress/deflate/deflate.v +++ b/vlib/compress/deflate/deflate.v @@ -1,6 +1,7 @@ module deflate import encoding.binary +import hash.adler32 import hash.crc32 // CompressFormat selects the output container around the RFC 1951 payload. @@ -26,9 +27,10 @@ pub fn compress(data []u8, format CompressParams) ![]u8 { } } +// compress_zlib compresses data into a zlib stream (RFC 1950). pub fn compress_zlib(data []u8) ![]u8 { payload := deflate_compress_fixed(data) - cksum := adler32(data) + cksum := adler32.sum(data) mut out := []u8{cap: 2 + payload.len + 4} out << u8(0x78) // CMF: CM=8 deflate, CINFO=7 (32K window) out << u8(0x9c) // FLG: default compression, FCHECK satisfies (CMF*256+FLG)%31==0 @@ -71,7 +73,9 @@ pub fn decompress(data []u8) ![]u8 { return inflate(data) } -fn decompress_zlib(data []u8) ![]u8 { +// decompress_zlib decompresses a zlib stream (RFC 1950) and returns the +// decompressed bytes in a new array. +pub fn decompress_zlib(data []u8) ![]u8 { if data.len < 6 { return error('invalid zlib stream: too short') } @@ -87,13 +91,15 @@ fn decompress_zlib(data []u8) ![]u8 { payload := data[2..data.len - 4] expected := binary.big_endian_u32_at(data, data.len - 4) decoded := inflate(payload)! - if adler32(decoded) != expected { + if adler32.sum(decoded) != expected { return error('invalid zlib stream: adler32 mismatch') } return decoded } -fn decompress_gzip(data []u8) ![]u8 { +// decompress_gzip decompresses a gzip stream (RFC 1952) and returns the +// decompressed bytes in a new array. +pub fn decompress_gzip(data []u8) ![]u8 { if data.len < 18 { return error('invalid gzip stream: too short') } @@ -143,17 +149,6 @@ fn decompress_gzip(data []u8) ![]u8 { return decoded } -fn adler32(data []u8) u32 { - mod_adler := u32(65521) - mut a := u32(1) - mut b := u32(0) - for byte_ in data { - a = (a + u32(byte_)) % mod_adler - b = (b + a) % mod_adler - } - return (b << 16) | a -} - fn bit_reverse(v u32, n int) u32 { mut r := u32(0) mut val := v diff --git a/vlib/compress/deflate/deflate_inflate.v b/vlib/compress/deflate/deflate_inflate.v index 9d159d9d89e856..6abf4f33fe7324 100644 --- a/vlib/compress/deflate/deflate_inflate.v +++ b/vlib/compress/deflate/deflate_inflate.v @@ -144,6 +144,9 @@ fn (mut r BitReader) huff_decode(t HuffTree) !u32 { return error('inflate: invalid Huffman code') } len_ := int(entry & 0x1f) + if len_ > r.nbits { + return error('inflate: unexpected end of stream') + } sym := entry >> 5 r.bits >>= u32(len_) r.nbits -= len_ diff --git a/vlib/compress/deflate/deflate_test.v b/vlib/compress/deflate/deflate_test.v index 06bcbedafec148..0b69332b98025d 100644 --- a/vlib/compress/deflate/deflate_test.v +++ b/vlib/compress/deflate/deflate_test.v @@ -61,3 +61,11 @@ fn test_corrupt_checksum_fails() { } assert false } + +fn test_truncated_zlib_payload_fails() { + decompress([u8(0x78), 0x9c, 0x03, 0x00, 0x00, 0x00, 0x01]) or { + assert err.msg().contains('unexpected end of stream') + return + } + assert false +} diff --git a/vlib/compress/zlib/README.md b/vlib/compress/zlib/README.md index 9436ae3f4ade5b..7e6db1e045b9d7 100644 --- a/vlib/compress/zlib/README.md +++ b/vlib/compress/zlib/README.md @@ -1,7 +1,7 @@ ## Description -`compress.zlib` is a module that assists in the compression and -decompression of binary data using `zlib` compression +`compress.zlib` is a small wrapper around `compress.deflate` for working with zlib streams +(RFC 1950). ## Examples @@ -9,9 +9,9 @@ decompression of binary data using `zlib` compression import compress.zlib fn main() { - uncompressed := 'Hello world!' - compressed := zlib.compress(uncompressed.bytes())! - decompressed := zlib.decompress(compressed)! - assert decompressed == uncompressed.bytes() + data := 'Hello world!'.bytes() + compressed := zlib.compress(data) or { panic(err) } + decompressed := zlib.decompress(compressed) or { panic(err) } + assert decompressed == data } ``` diff --git a/vlib/compress/zlib/interop/README.md b/vlib/compress/zlib/interop/README.md new file mode 100644 index 00000000000000..07e6177505abdc --- /dev/null +++ b/vlib/compress/zlib/interop/README.md @@ -0,0 +1,16 @@ +# zlib interop checker +This helper verifies interoperability between: +- V module: `compress.zlib` +- C helper linked with `libz` +- Python stdlib: `zlib` + It runs deterministic test vectors, compresses each vector with all three producers, then + cross-decompresses every produced stream with all three consumers. + A case passes only if every decompressed output is byte-identical to the original input. +## Run +```bash +./vnew run vlib/compress/zlib/interop/zlib_interop.vsh +``` +## Requirements +- `python3` with the stdlib `zlib` module +- a C compiler (`cc`, `gcc`, or `clang`) +- `libz` development headers and linker support (`-lz`) diff --git a/vlib/compress/zlib/interop/zlib_interop.vsh b/vlib/compress/zlib/interop/zlib_interop.vsh new file mode 100644 index 00000000000000..85ff5ed81243b1 --- /dev/null +++ b/vlib/compress/zlib/interop/zlib_interop.vsh @@ -0,0 +1,185 @@ +#!/usr/bin/env -S v + +import compress.zlib + +struct TestVector { + name string + data []u8 +} + +fn main() { + tmp_root := join_path(temp_dir(), 'v_zlib_interop_${getpid()}') + mkdir_all(tmp_root) or { panic(err) } + defer { + rmdir_all(tmp_root) or {} + } + + c_bin := compile_c_helper(tmp_root) or { + eprintln('SKIP: ${err.msg()}') + exit(2) + } + py_script := join_path(@DIR, 'zlib_ref.py') + ensure_python(py_script) or { + eprintln('SKIP: ${err.msg()}') + exit(2) + } + + vectors := make_test_vectors() + mut total_checks := 0 + for i, vec in vectors { + total_checks += run_case(tmp_root, c_bin, py_script, i, vec) or { + eprintln('FAIL: ${vec.name}: ${err.msg()}') + exit(1) + 0 + } + println('ok ${i + 1}/${vectors.len}: ${vec.name} (${vec.data.len} bytes)') + } + println('PASS: ${vectors.len} vectors, ${total_checks} cross-checks') +} + +fn compile_c_helper(tmp_root string) !string { + cc := choose_cc() + if cc == '' { + return error('no C compiler found (tried cc, gcc, clang)') + } + src := join_path(@DIR, 'zlib_ref.c') + bin := join_path(tmp_root, 'zlib_interop_ref') + must_succeed('${cc} -O2 ${shell_quote(src)} -lz -o ${shell_quote(bin)}', + 'C zlib helper build failed')! + return bin +} + +fn choose_cc() string { + for cc in ['cc', 'gcc', 'clang'] { + if execute('${cc} --version >/dev/null 2>&1').exit_code == 0 { + return cc + } + } + return '' +} + +fn ensure_python(py_script string) ! { + must_succeed("python3 -c 'import zlib' >/dev/null 2>&1", + 'python3 with zlib module is not available')! + if !exists(py_script) { + return error('missing Python helper: ${py_script}') + } +} + +fn make_test_vectors() []TestVector { + mut vectors := []TestVector{} + vectors << TestVector{'empty', []u8{}} + vectors << TestVector{'ascii_text', 'The quick brown fox jumps over the lazy dog.\n'.repeat(64).bytes()} + vectors << TestVector{'repeated_byte', []u8{len: 10000, init: `A`}} + vectors << TestVector{'all_bytes_x4', all_bytes_repeated(4)} + vectors << TestVector{'lcg_64k', lcg_bytes(65536)} + return vectors +} + +fn run_case(tmp_root string, c_bin string, py_script string, case_idx int, vec TestVector) !int { + case_dir := join_path(tmp_root, 'case_${case_idx:02}_${vec.name}') + mkdir_all(case_dir)! + + v_z := zlib.compress(vec.data)! + c_z := c_compress(case_dir, c_bin, vec.data)! + py_z := py_compress(case_dir, py_script, vec.data)! + + mut checks := 0 + producers := { + 'v': v_z + 'c': c_z + 'py': py_z + } + for producer, compressed in producers { + v_plain := zlib.decompress(compressed)! + assert_equal_bytes('v.decompress(${producer}.compress)', vec.data, v_plain)! + checks++ + + c_plain := c_decompress(case_dir, c_bin, producer, compressed)! + assert_equal_bytes('c.decompress(${producer}.compress)', vec.data, c_plain)! + checks++ + + py_plain := py_decompress(case_dir, py_script, producer, compressed)! + assert_equal_bytes('python.decompress(${producer}.compress)', vec.data, py_plain)! + checks++ + } + return checks +} + +fn c_compress(case_dir string, c_bin string, plain []u8) ![]u8 { + in_path := join_path(case_dir, 'plain.in') + out_path := join_path(case_dir, 'c.zlib') + write_file_array(in_path, plain)! + must_succeed('${shell_quote(c_bin)} compress ${shell_quote(in_path)} ${shell_quote(out_path)}', + 'C zlib compression failed')! + return read_bytes(out_path)! +} + +fn c_decompress(case_dir string, c_bin string, producer string, compressed []u8) ![]u8 { + in_path := join_path(case_dir, '${producer}.for_c.zlib') + out_path := join_path(case_dir, '${producer}.from_c.out') + write_file_array(in_path, compressed)! + must_succeed('${shell_quote(c_bin)} decompress ${shell_quote(in_path)} ${shell_quote(out_path)}', + 'C zlib decompression failed')! + return read_bytes(out_path)! +} + +fn py_compress(case_dir string, py_script string, plain []u8) ![]u8 { + in_path := join_path(case_dir, 'plain_py.in') + out_path := join_path(case_dir, 'py.zlib') + write_file_array(in_path, plain)! + must_succeed('python3 ${shell_quote(py_script)} compress ${shell_quote(in_path)} ${shell_quote(out_path)}', + 'Python zlib compression failed')! + return read_bytes(out_path)! +} + +fn py_decompress(case_dir string, py_script string, producer string, compressed []u8) ![]u8 { + in_path := join_path(case_dir, '${producer}.for_py.zlib') + out_path := join_path(case_dir, '${producer}.from_py.out') + write_file_array(in_path, compressed)! + must_succeed('python3 ${shell_quote(py_script)} decompress ${shell_quote(in_path)} ${shell_quote(out_path)}', + 'Python zlib decompression failed')! + return read_bytes(out_path)! +} + +fn all_bytes_repeated(times int) []u8 { + mut out := []u8{cap: 256 * times} + for _ in 0 .. times { + for i in 0 .. 256 { + out << u8(i) + } + } + return out +} + +fn lcg_bytes(n int) []u8 { + mut out := []u8{len: n} + mut x := u32(0x12345678) + for i in 0 .. n { + x = x * u32(1664525) + u32(1013904223) + out[i] = u8((x >> 16) & u32(0xff)) + } + return out +} + +fn assert_equal_bytes(label string, expected []u8, got []u8) ! { + if expected.len != got.len { + return error('${label}: length mismatch expected=${expected.len} got=${got.len}') + } + for i in 0 .. expected.len { + if expected[i] != got[i] { + return error('${label}: byte mismatch at offset ${i}') + } + } +} + +fn must_succeed(command string, context string) ! { + res := execute(command) + if res.exit_code != 0 { + return error('${context}\ncommand: ${command}\nexit_code: ${res.exit_code}\n${res.output}') + } +} + +fn shell_quote(s string) string { + return "'${s.replace("'", "'\\''")}'" +} diff --git a/vlib/compress/zlib/interop/zlib_ref.c b/vlib/compress/zlib/interop/zlib_ref.c new file mode 100644 index 00000000000000..4c644bd3bcdb4a --- /dev/null +++ b/vlib/compress/zlib/interop/zlib_ref.c @@ -0,0 +1,150 @@ +#include +#include +#include +#include + +static int rf(const char* p, unsigned char** o, size_t* n) +{ + FILE* f = fopen(p, "rb"); + if (!f) return 1; + if (fseek(f, 0, SEEK_END) != 0) + { + fclose(f); + return 1; + } + long s = ftell(f); + if (s < 0) + { + fclose(f); + return 1; + } + if (fseek(f, 0, SEEK_SET) != 0) + { + fclose(f); + return 1; + } + *n = (size_t)s; + *o = *n ? (unsigned char*)malloc(*n) : NULL; + if (*n && !*o) + { + fclose(f); + return 1; + } + if (*n && fread(*o, 1, *n, f) != *n) + { + free(*o); + fclose(f); + return 1; + } + fclose(f); + return 0; +} + +static int wf(const char* p, const unsigned char* b, size_t n) +{ + FILE* f = fopen(p, "wb"); + if (!f) return 1; + if (n && fwrite(b, 1, n, f) != n) + { + fclose(f); + return 1; + } + fclose(f); + return 0; +} + +int main(int argc, char** argv) +{ + static const unsigned char dummy = 0; + if (argc != 4) + { + fputs("usage: zlib_ref compress|decompress in out\n", stderr); + return 2; + } + unsigned char* in = NULL; + size_t in_n = 0; + if (rf(argv[2], &in, &in_n)) + { + fputs("read error\n", stderr); + return 1; + } + const unsigned char* in_ptr = in_n ? in : &dummy; + if (strcmp(argv[1], "compress") == 0) + { + uLongf out_n = compressBound((uLong)in_n); + unsigned char* out = (unsigned char*)malloc(out_n ? out_n : 1); + if (!out) + { + free(in); + return 1; + } + if (compress2(out, &out_n, in_ptr, (uLong)in_n, Z_DEFAULT_COMPRESSION) != Z_OK) + { + fputs("compress2 failed\n", stderr); + free(in); + free(out); + return 1; + } + if (wf(argv[3], out, (size_t)out_n)) + { + fputs("write error\n", stderr); + free(in); + free(out); + return 1; + } + free(out); + } + else if (strcmp(argv[1], "decompress") == 0) + { + uLongf out_n = in_n * 8 + 64; + if (out_n < 256) out_n = 256; + unsigned char* out = NULL; + int rc = Z_BUF_ERROR; + while (rc == Z_BUF_ERROR) + { + unsigned char* next = (unsigned char*)realloc(out, out_n); + if (!next) + { + free(in); + free(out); + return 1; + } + out = next; + uLongf cap = out_n; + rc = uncompress(out, &cap, in_ptr, (uLong)in_n); + if (rc == Z_OK) + { + out_n = cap; + break; + } + if (rc == Z_BUF_ERROR) + { + out_n *= 2; + if (out_n < 256) out_n = 256; + } + } + if (rc != Z_OK) + { + fputs("uncompress failed\n", stderr); + free(in); + free(out); + return 1; + } + if (wf(argv[3], out, (size_t)out_n)) + { + fputs("write error\n", stderr); + free(in); + free(out); + return 1; + } + free(out); + } + else + { + fputs("unknown mode\n", stderr); + free(in); + return 2; + } + free(in); + return 0; +} diff --git a/vlib/compress/zlib/interop/zlib_ref.py b/vlib/compress/zlib/interop/zlib_ref.py new file mode 100644 index 00000000000000..0235cfa54cea47 --- /dev/null +++ b/vlib/compress/zlib/interop/zlib_ref.py @@ -0,0 +1,19 @@ +import sys, zlib +def rf(path): + with open(path, "rb") as f: + return f.read() +def wf(path, data): + with open(path, "wb") as f: + f.write(data) +if len(sys.argv) != 4: + print("usage: zlib_ref.py compress|decompress in out", file=sys.stderr) + sys.exit(2) +mode, inp, outp = sys.argv[1], sys.argv[2], sys.argv[3] +data = rf(inp) +if mode == "compress": + wf(outp, zlib.compress(data)) +elif mode == "decompress": + wf(outp, zlib.decompress(data)) +else: + print("unknown mode", file=sys.stderr) + sys.exit(2) diff --git a/vlib/compress/zlib/zlib.v b/vlib/compress/zlib/zlib.v index dc1bada94659c3..68dd901bb2be4b 100644 --- a/vlib/compress/zlib/zlib.v +++ b/vlib/compress/zlib/zlib.v @@ -1,17 +1,13 @@ module zlib -import compress as compr +import compress.deflate -// compresses an array of bytes using zlib and returns the compressed bytes in a new array -// Example: b := 'abcdefgh'.repeat(1000).bytes(); cmpr := zlib.compress(b)!; assert cmpr.len < b.len; dc := zlib.decompress(cmpr)!; assert b == dc +// compress compresses data using the zlib container format. pub fn compress(data []u8) ![]u8 { - // flags = TDEFL_WRITE_ZLIB_HEADER (0x01000) - return compr.compress(data, 0x01000) + return deflate.compress_zlib(data) } -// decompresses an array of bytes using zlib and returns the decompressed bytes in a new array -// Example: b := 'abcdefgh'.repeat(1000).bytes(); cmpr := zlib.compress(b)!; assert cmpr.len < b.len; dc := zlib.decompress(cmpr)!; assert b == dc +// decompress decompresses zlib-compressed data. pub fn decompress(data []u8) ![]u8 { - // flags = TINFL_FLAG_PARSE_ZLIB_HEADER (0x1) - return compr.decompress(data, 0x1) + return deflate.decompress_zlib(data) } diff --git a/vlib/compress/zlib/zlib_miniz_compat_names_collision.h b/vlib/compress/zlib/zlib_miniz_compat_names_collision.h deleted file mode 100644 index 850471081fd26a..00000000000000 --- a/vlib/compress/zlib/zlib_miniz_compat_names_collision.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef ISSUE_18378_ZLIB_COMPAT_H -#define ISSUE_18378_ZLIB_COMPAT_H - -typedef void *voidpf; -typedef unsigned int uInt; - -typedef struct issue_18378_z_stream_s { - int dummy; -} z_stream; - -typedef voidpf (*alloc_func)(voidpf opaque, uInt items, uInt size); - -#endif diff --git a/vlib/compress/zlib/zlib_miniz_compat_names_collision_test.c.v b/vlib/compress/zlib/zlib_miniz_compat_names_collision_test.c.v deleted file mode 100644 index 101ac73ae7b57c..00000000000000 --- a/vlib/compress/zlib/zlib_miniz_compat_names_collision_test.c.v +++ /dev/null @@ -1,7 +0,0 @@ -module zlib - -#include "@DIR/zlib_miniz_compat_names_collision.h" - -fn test_issue_18378_multiple_headers_do_not_conflict() { - assert true -} diff --git a/vlib/compress/zlib/zlib_test.v b/vlib/compress/zlib/zlib_test.v index 73338ae17c152b..af3cf67aa17255 100644 --- a/vlib/compress/zlib/zlib_test.v +++ b/vlib/compress/zlib/zlib_test.v @@ -1,8 +1,67 @@ module zlib -fn test_zlib() { - uncompressed := 'Hello world!' - compressed := compress(uncompressed.bytes())! +import encoding.hex + +fn must_decode_hex(s string) []u8 { + return hex.decode(s) or { panic(err) } +} + +fn assert_decompress_error(data []u8, reason string) ! { + decompress(data) or { + assert err.msg() == reason + return + } + return error('did not error') +} + +fn test_zlib_roundtrip_text() { + data := 'Hello world!'.bytes() + compressed := compress(data)! + decompressed := decompress(compressed)! + assert decompressed == data +} + +fn test_zlib_roundtrip_empty() { + data := []u8{} + compressed := compress(data)! + decompressed := decompress(compressed)! + assert decompressed == data +} + +fn test_zlib_roundtrip_binary() { + data := [u8(0), 1, 2, 3, 127, 128, 254, 255] + compressed := compress(data)! decompressed := decompress(compressed)! - assert decompressed == uncompressed.bytes() + assert decompressed == data +} + +fn test_zlib_roundtrip_large() { + data := 'abcdefgh'.repeat(1000).bytes() + compressed := compress(data)! + assert compressed.len < data.len + decompressed := decompress(compressed)! + assert decompressed == data +} + +fn test_zlib_decompress_known_python_vector() { + compressed := must_decode_hex('789ccb48cdc9c95728cf2fca49e102001e720467') + decompressed := decompress(compressed)! + assert decompressed == 'hello world\n'.bytes() +} + +fn test_zlib_invalid_too_short() { + assert_decompress_error([]u8{}, 'invalid zlib stream: too short')! +} + +fn test_zlib_invalid_header_checksum() { + assert_decompress_error([u8(0x78), 0x9d, 0x00, 0x00, 0x00, 0x01], + 'invalid zlib stream: bad header checksum')! +} + +fn test_zlib_invalid_truncated_payload() { + decompress([u8(0x78), 0x9c, 0x03, 0x00, 0x00, 0x00, 0x01]) or { + assert err.msg().contains('unexpected end of stream') + return + } + assert false } From a41f4a83a0fac71d11e75b0be72026190a870435 Mon Sep 17 00:00:00 2001 From: JalonSolov Date: Mon, 18 May 2026 09:21:18 -0400 Subject: [PATCH 2/5] fix for review --- vlib/compress/deflate/deflate.v | 12 ++++++++++-- vlib/compress/deflate/deflate_inflate.v | 21 ++++++++++++++++++-- vlib/compress/deflate/deflate_test.v | 26 +++++++++++++++++++++++++ vlib/compress/zlib/zlib_test.v | 9 +++++++++ 4 files changed, 64 insertions(+), 4 deletions(-) diff --git a/vlib/compress/deflate/deflate.v b/vlib/compress/deflate/deflate.v index 9622a9c1325f6c..0bd51642bc5836 100644 --- a/vlib/compress/deflate/deflate.v +++ b/vlib/compress/deflate/deflate.v @@ -90,7 +90,11 @@ pub fn decompress_zlib(data []u8) ![]u8 { } payload := data[2..data.len - 4] expected := binary.big_endian_u32_at(data, data.len - 4) - decoded := inflate(payload)! + res := inflate_with_consumed(payload)! + if res.consumed != payload.len { + return error('invalid zlib stream: trailing data before adler32') + } + decoded := res.decoded if adler32.sum(decoded) != expected { return error('invalid zlib stream: adler32 mismatch') } @@ -139,7 +143,11 @@ pub fn decompress_gzip(data []u8) ![]u8 { payload := data[pos..data.len - 8] expected_crc := binary.little_endian_u32_at(data, data.len - 8) expected_size := binary.little_endian_u32_at(data, data.len - 4) - decoded := inflate(payload)! + res := inflate_with_consumed(payload)! + if res.consumed != payload.len { + return error('invalid gzip stream: trailing data before trailer') + } + decoded := res.decoded if crc32.sum(decoded) != expected_crc { return error('invalid gzip stream: crc32 mismatch') } diff --git a/vlib/compress/deflate/deflate_inflate.v b/vlib/compress/deflate/deflate_inflate.v index 6abf4f33fe7324..5f5d908d1c2bd8 100644 --- a/vlib/compress/deflate/deflate_inflate.v +++ b/vlib/compress/deflate/deflate_inflate.v @@ -154,7 +154,14 @@ fn (mut r BitReader) huff_decode(t HuffTree) !u32 { } // inflate decompresses raw RFC 1951 DEFLATE data. -fn inflate(data []u8) ![]u8 { +struct InflateResult { + decoded []u8 + consumed int +} + +// inflate_with_consumed decompresses raw RFC 1951 DEFLATE data and reports +// how many input bytes were consumed by the DEFLATE bitstream. +fn inflate_with_consumed(data []u8) !InflateResult { mut r := BitReader{ buf: data } @@ -229,7 +236,17 @@ fn inflate(data []u8) ![]u8 { break } } - return out + consumed := r.pos - (r.nbits >> 3) + return InflateResult{ + decoded: out + consumed: consumed + } +} + +// inflate decompresses raw RFC 1951 DEFLATE data. +fn inflate(data []u8) ![]u8 { + res := inflate_with_consumed(data)! + return res.decoded } @[direct_array_access] diff --git a/vlib/compress/deflate/deflate_test.v b/vlib/compress/deflate/deflate_test.v index 0b69332b98025d..6072f6f15c2dc9 100644 --- a/vlib/compress/deflate/deflate_test.v +++ b/vlib/compress/deflate/deflate_test.v @@ -69,3 +69,29 @@ fn test_truncated_zlib_payload_fails() { } assert false } + +fn test_zlib_inserted_bytes_before_adler_fails() { + enc := compress('zlib injected trailer bytes'.repeat(4).bytes())! + mut bad := []u8{cap: enc.len + 2} + bad << enc[..enc.len - 4] + bad << [u8(0xaa), 0x55] + bad << enc[enc.len - 4..] + decompress(bad) or { + assert err.msg() == 'invalid zlib stream: trailing data before adler32' + return + } + assert false +} + +fn test_gzip_inserted_bytes_before_trailer_fails() { + enc := compress('gzip injected trailer bytes'.repeat(4).bytes(), format: .gzip)! + mut bad := []u8{cap: enc.len + 1} + bad << enc[..enc.len - 8] + bad << u8(0x42) + bad << enc[enc.len - 8..] + decompress(bad) or { + assert err.msg() == 'invalid gzip stream: trailing data before trailer' + return + } + assert false +} diff --git a/vlib/compress/zlib/zlib_test.v b/vlib/compress/zlib/zlib_test.v index af3cf67aa17255..3c33f6c992bc57 100644 --- a/vlib/compress/zlib/zlib_test.v +++ b/vlib/compress/zlib/zlib_test.v @@ -65,3 +65,12 @@ fn test_zlib_invalid_truncated_payload() { } assert false } + +fn test_zlib_invalid_inserted_bytes_before_adler() { + enc := compress('zlib edge-case regression'.repeat(5).bytes())! + mut bad := []u8{cap: enc.len + 1} + bad << enc[..enc.len - 4] + bad << u8(0x7f) + bad << enc[enc.len - 4..] + assert_decompress_error(bad, 'invalid zlib stream: trailing data before adler32')! +} From 79a073bd4b28dfb4954d526021b2b06d83edc497 Mon Sep 17 00:00:00 2001 From: JalonSolov Date: Mon, 18 May 2026 20:27:28 -0400 Subject: [PATCH 3/5] fix embed_file --- vlib/v/embed_file/embed_file.v | 3 ++- vlib/v/gen/c/embed.v | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/vlib/v/embed_file/embed_file.v b/vlib/v/embed_file/embed_file.v index bfea8b681f2333..6d4dc4079fa3db 100644 --- a/vlib/v/embed_file/embed_file.v +++ b/vlib/v/embed_file/embed_file.v @@ -6,6 +6,7 @@ pub struct EmbedFileData { compression_type string mut: compressed &u8 = unsafe { nil } + compressed_len int uncompressed &u8 = unsafe { nil } free_compressed bool free_uncompressed bool @@ -59,7 +60,7 @@ pub fn (mut ed EmbedFileData) data() &u8 { decoder := g_embed_file_decoders.decoders[ed.compression_type] or { panic('EmbedFileData error: unknown compression of "${ed.path}": "${ed.compression_type}"') } - compressed := unsafe { ed.compressed.vbytes(ed.len) } + compressed := unsafe { ed.compressed.vbytes(ed.compressed_len) } decompressed := decoder.decompress(compressed) or { panic('EmbedFileData error: decompression of "${ed.path}" failed: ${err}') } diff --git a/vlib/v/gen/c/embed.v b/vlib/v/gen/c/embed.v index 137b22068b1fc8..e7c57bb49e510e 100644 --- a/vlib/v/gen/c/embed.v +++ b/vlib/v/gen/c/embed.v @@ -107,6 +107,7 @@ fn (mut g Gen) gen_embedded_metadata() { if emfile.is_compressed { g.embedded_data.writeln('\t\t\tres.compression_type = ${ctoslit(emfile.compression_type)};') g.embedded_data.writeln('\t\t\tres.compressed = v__embed_file__find_index_entry_by_path((voidptr)_v_embed_file_index, ${ctoslit(emfile.rpath)}, ${ctoslit(emfile.compression_type)})->data;') + g.embedded_data.writeln('\t\t\tres.compressed_len = ${emfile.bytes.len};') g.embedded_data.writeln('\t\t\tres.uncompressed = NULL;') } else { g.embedded_data.writeln('\t\t\tres.uncompressed = v__embed_file__find_index_entry_by_path((voidptr)_v_embed_file_index, ${ctoslit(emfile.rpath)}, ${ctoslit(emfile.compression_type)})->data;') From d8f33683375e30d7b820a158d951b16a2043d461 Mon Sep 17 00:00:00 2001 From: JalonSolov Date: Mon, 18 May 2026 20:52:45 -0400 Subject: [PATCH 4/5] fix c.must_have --- .../v/gen/c/testdata/embed_with_prod_zlib.c.must_have | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/vlib/v/gen/c/testdata/embed_with_prod_zlib.c.must_have b/vlib/v/gen/c/testdata/embed_with_prod_zlib.c.must_have index 9beff83d565313..b1831e35487119 100644 --- a/vlib/v/gen/c/testdata/embed_with_prod_zlib.c.must_have +++ b/vlib/v/gen/c/testdata/embed_with_prod_zlib.c.must_have @@ -1,13 +1,14 @@ #define _VPROD (1) // V embedded data: +static const v__embed_file__EmbedFileIndexEntry _v_embed_file_index[3]; static const unsigned char _v_embed_blob_0[142] = { -0x78,0x01,0x05,0x00,0x41,0x0a,0x84,0x20,0xf0,0xac,0xaf,0x98,0xc3,0xc2,0xea,0x21, + 0x78,0x01,0x05,0x00,0x41,0x0a,0x84,0x20,0xf0,0xac,0xaf,0x98,0xc3,0xc2,0xea,0x21, -const v__embed_file__EmbedFileIndexEntry _v_embed_file_index[2] = { - {0, { .str=(byteptr)("embed.vv"), .len=8, .is_lit=1 }, { .str=(byteptr)("zlib"), .len=4, .is_lit=1 }, (byteptr)_v_embed_blob_0}, - {-1, { .str=(byteptr)(""), .len=0, .is_lit=1 }, { .str=(byteptr)(""), .len=0, .is_lit=1 }, NULL} -}; +static const v__embed_file__EmbedFileIndexEntry _v_embed_file_index[3] = { + {0, { .str=(byteptr)("embed.vv"), .len=8, .is_lit=1 }, { .str=(byteptr)("zlib"), .len=4, .is_lit=1 }, (byteptr)_v_embed_blob_0}, + {1, { .str=(byteptr)("embed.vv"), .len=8, .is_lit=1 }, { .str=(byteptr)("none"), .len=4, .is_lit=1 }, (byteptr)_v_embed_blob_1}, + {-1, { .str=(byteptr)(""), .len=0, .is_lit=1 }, { .str=(byteptr)(""), .len=0, .is_lit=1 }, NULL} typedef struct v__embed_file__EmbedFileData v__embed_file__EmbedFileData; typedef struct v__embed_file__EmbedFileIndexEntry v__embed_file__EmbedFileIndexEntry; From 66ab32b86ce0ba0222503cff33c3f82d44b5a45b Mon Sep 17 00:00:00 2001 From: JalonSolov Date: Tue, 19 May 2026 08:38:07 -0400 Subject: [PATCH 5/5] fixed c.must_have as of latest v --- vlib/v/gen/c/testdata/embed_with_prod_zlib.c.must_have | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/vlib/v/gen/c/testdata/embed_with_prod_zlib.c.must_have b/vlib/v/gen/c/testdata/embed_with_prod_zlib.c.must_have index b1831e35487119..b69c642ae3e2b3 100644 --- a/vlib/v/gen/c/testdata/embed_with_prod_zlib.c.must_have +++ b/vlib/v/gen/c/testdata/embed_with_prod_zlib.c.must_have @@ -1,13 +1,12 @@ #define _VPROD (1) // V embedded data: -static const v__embed_file__EmbedFileIndexEntry _v_embed_file_index[3]; -static const unsigned char _v_embed_blob_0[142] = { - 0x78,0x01,0x05,0x00,0x41,0x0a,0x84,0x20,0xf0,0xac,0xaf,0x98,0xc3,0xc2,0xea,0x21, +static const v__embed_file__EmbedFileIndexEntry _v_embed_file_index[2]; +static const unsigned char _v_embed_blob_0[139] = { + 0x78,0x9c,0xd3,0xd7,0x57,0x28,0x2b,0x49,0x2d,0x2e,0x51,0x28,0x4b,0xcb,0x49,0x4c, -static const v__embed_file__EmbedFileIndexEntry _v_embed_file_index[3] = { +static const v__embed_file__EmbedFileIndexEntry _v_embed_file_index[2] = { {0, { .str=(byteptr)("embed.vv"), .len=8, .is_lit=1 }, { .str=(byteptr)("zlib"), .len=4, .is_lit=1 }, (byteptr)_v_embed_blob_0}, - {1, { .str=(byteptr)("embed.vv"), .len=8, .is_lit=1 }, { .str=(byteptr)("none"), .len=4, .is_lit=1 }, (byteptr)_v_embed_blob_1}, {-1, { .str=(byteptr)(""), .len=0, .is_lit=1 }, { .str=(byteptr)(""), .len=0, .is_lit=1 }, NULL} typedef struct v__embed_file__EmbedFileData v__embed_file__EmbedFileData;