Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 20 additions & 17 deletions vlib/compress/deflate/deflate.v
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module deflate

import encoding.binary
import hash.adler32
import hash.crc32

// CompressFormat selects the output container around the RFC 1951 payload.
Expand All @@ -26,9 +27,10 @@ pub fn compress(data []u8, format CompressParams) ![]u8 {
}
}

// compress_zlib compresses data into a zlib stream (RFC 1950).
pub fn compress_zlib(data []u8) ![]u8 {
payload := deflate_compress_fixed(data)
cksum := adler32(data)
cksum := adler32.sum(data)
mut out := []u8{cap: 2 + payload.len + 4}
out << u8(0x78) // CMF: CM=8 deflate, CINFO=7 (32K window)
out << u8(0x9c) // FLG: default compression, FCHECK satisfies (CMF*256+FLG)%31==0
Expand Down Expand Up @@ -71,7 +73,9 @@ pub fn decompress(data []u8) ![]u8 {
return inflate(data)
}

fn decompress_zlib(data []u8) ![]u8 {
// decompress_zlib decompresses a zlib stream (RFC 1950) and returns the
// decompressed bytes in a new array.
pub fn decompress_zlib(data []u8) ![]u8 {
if data.len < 6 {
return error('invalid zlib stream: too short')
}
Expand All @@ -86,14 +90,20 @@ fn decompress_zlib(data []u8) ![]u8 {
}
payload := data[2..data.len - 4]
expected := binary.big_endian_u32_at(data, data.len - 4)
decoded := inflate(payload)!
if adler32(decoded) != expected {
res := inflate_with_consumed(payload)!
if res.consumed != payload.len {
return error('invalid zlib stream: trailing data before adler32')
}
decoded := res.decoded
if adler32.sum(decoded) != expected {
return error('invalid zlib stream: adler32 mismatch')
}
return decoded
}

fn decompress_gzip(data []u8) ![]u8 {
// decompress_gzip decompresses a gzip stream (RFC 1952) and returns the
// decompressed bytes in a new array.
pub fn decompress_gzip(data []u8) ![]u8 {
if data.len < 18 {
return error('invalid gzip stream: too short')
}
Expand Down Expand Up @@ -133,7 +143,11 @@ fn decompress_gzip(data []u8) ![]u8 {
payload := data[pos..data.len - 8]
expected_crc := binary.little_endian_u32_at(data, data.len - 8)
expected_size := binary.little_endian_u32_at(data, data.len - 4)
decoded := inflate(payload)!
res := inflate_with_consumed(payload)!
if res.consumed != payload.len {
return error('invalid gzip stream: trailing data before trailer')
}
decoded := res.decoded
if crc32.sum(decoded) != expected_crc {
return error('invalid gzip stream: crc32 mismatch')
}
Expand All @@ -143,17 +157,6 @@ fn decompress_gzip(data []u8) ![]u8 {
return decoded
}

fn adler32(data []u8) u32 {
mod_adler := u32(65521)
mut a := u32(1)
mut b := u32(0)
for byte_ in data {
a = (a + u32(byte_)) % mod_adler
b = (b + a) % mod_adler
}
return (b << 16) | a
}

fn bit_reverse(v u32, n int) u32 {
mut r := u32(0)
mut val := v
Expand Down
24 changes: 22 additions & 2 deletions vlib/compress/deflate/deflate_inflate.v
Original file line number Diff line number Diff line change
Expand Up @@ -144,14 +144,24 @@ fn (mut r BitReader) huff_decode(t HuffTree) !u32 {
return error('inflate: invalid Huffman code')
}
len_ := int(entry & 0x1f)
if len_ > r.nbits {
return error('inflate: unexpected end of stream')
}
sym := entry >> 5
r.bits >>= u32(len_)
r.nbits -= len_
return sym
}

// inflate decompresses raw RFC 1951 DEFLATE data.
fn inflate(data []u8) ![]u8 {
struct InflateResult {
decoded []u8
consumed int
}

// inflate_with_consumed decompresses raw RFC 1951 DEFLATE data and reports
// how many input bytes were consumed by the DEFLATE bitstream.
fn inflate_with_consumed(data []u8) !InflateResult {
mut r := BitReader{
buf: data
}
Expand Down Expand Up @@ -226,7 +236,17 @@ fn inflate(data []u8) ![]u8 {
break
}
}
return out
consumed := r.pos - (r.nbits >> 3)
return InflateResult{
decoded: out
consumed: consumed
}
}

// inflate decompresses raw RFC 1951 DEFLATE data.
fn inflate(data []u8) ![]u8 {
res := inflate_with_consumed(data)!
return res.decoded
}

@[direct_array_access]
Expand Down
34 changes: 34 additions & 0 deletions vlib/compress/deflate/deflate_test.v
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,37 @@ fn test_corrupt_checksum_fails() {
}
assert false
}

fn test_truncated_zlib_payload_fails() {
decompress([u8(0x78), 0x9c, 0x03, 0x00, 0x00, 0x00, 0x01]) or {
assert err.msg().contains('unexpected end of stream')
return
}
assert false
}

fn test_zlib_inserted_bytes_before_adler_fails() {
enc := compress('zlib injected trailer bytes'.repeat(4).bytes())!
mut bad := []u8{cap: enc.len + 2}
bad << enc[..enc.len - 4]
bad << [u8(0xaa), 0x55]
bad << enc[enc.len - 4..]
decompress(bad) or {
assert err.msg() == 'invalid zlib stream: trailing data before adler32'
return
}
assert false
}

fn test_gzip_inserted_bytes_before_trailer_fails() {
enc := compress('gzip injected trailer bytes'.repeat(4).bytes(), format: .gzip)!
mut bad := []u8{cap: enc.len + 1}
bad << enc[..enc.len - 8]
bad << u8(0x42)
bad << enc[enc.len - 8..]
decompress(bad) or {
assert err.msg() == 'invalid gzip stream: trailing data before trailer'
return
}
assert false
}
12 changes: 6 additions & 6 deletions vlib/compress/zlib/README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
## Description

`compress.zlib` is a module that assists in the compression and
decompression of binary data using `zlib` compression
`compress.zlib` is a small wrapper around `compress.deflate` for working with zlib streams
(RFC 1950).

## Examples

```v
import compress.zlib

fn main() {
uncompressed := 'Hello world!'
compressed := zlib.compress(uncompressed.bytes())!
decompressed := zlib.decompress(compressed)!
assert decompressed == uncompressed.bytes()
data := 'Hello world!'.bytes()
compressed := zlib.compress(data) or { panic(err) }
decompressed := zlib.decompress(compressed) or { panic(err) }
assert decompressed == data
}
```
16 changes: 16 additions & 0 deletions vlib/compress/zlib/interop/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# zlib interop checker
This helper verifies interoperability between:
- V module: `compress.zlib`
- C helper linked with `libz`
- Python stdlib: `zlib`
It runs deterministic test vectors, compresses each vector with all three producers, then
cross-decompresses every produced stream with all three consumers.
A case passes only if every decompressed output is byte-identical to the original input.
## Run
```bash
./vnew run vlib/compress/zlib/interop/zlib_interop.vsh
```
## Requirements
- `python3` with the stdlib `zlib` module
- a C compiler (`cc`, `gcc`, or `clang`)
- `libz` development headers and linker support (`-lz`)
185 changes: 185 additions & 0 deletions vlib/compress/zlib/interop/zlib_interop.vsh
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
#!/usr/bin/env -S v

import compress.zlib

struct TestVector {
name string
data []u8
}

fn main() {
tmp_root := join_path(temp_dir(), 'v_zlib_interop_${getpid()}')
mkdir_all(tmp_root) or { panic(err) }
defer {
rmdir_all(tmp_root) or {}
}

c_bin := compile_c_helper(tmp_root) or {
eprintln('SKIP: ${err.msg()}')
exit(2)
}
py_script := join_path(@DIR, 'zlib_ref.py')
ensure_python(py_script) or {
eprintln('SKIP: ${err.msg()}')
exit(2)
}

vectors := make_test_vectors()
mut total_checks := 0
for i, vec in vectors {
total_checks += run_case(tmp_root, c_bin, py_script, i, vec) or {
eprintln('FAIL: ${vec.name}: ${err.msg()}')
exit(1)
0
}
println('ok ${i + 1}/${vectors.len}: ${vec.name} (${vec.data.len} bytes)')
}
println('PASS: ${vectors.len} vectors, ${total_checks} cross-checks')
}

fn compile_c_helper(tmp_root string) !string {
cc := choose_cc()
if cc == '' {
return error('no C compiler found (tried cc, gcc, clang)')
}
src := join_path(@DIR, 'zlib_ref.c')
bin := join_path(tmp_root, 'zlib_interop_ref')
must_succeed('${cc} -O2 ${shell_quote(src)} -lz -o ${shell_quote(bin)}',
'C zlib helper build failed')!
return bin
}

fn choose_cc() string {
for cc in ['cc', 'gcc', 'clang'] {
if execute('${cc} --version >/dev/null 2>&1').exit_code == 0 {
return cc
}
}
return ''
}

fn ensure_python(py_script string) ! {
must_succeed("python3 -c 'import zlib' >/dev/null 2>&1",
'python3 with zlib module is not available')!
if !exists(py_script) {
return error('missing Python helper: ${py_script}')
}
}

fn make_test_vectors() []TestVector {
mut vectors := []TestVector{}
vectors << TestVector{'empty', []u8{}}
vectors << TestVector{'ascii_text', 'The quick brown fox jumps over the lazy dog.\n'.repeat(64).bytes()}
vectors << TestVector{'repeated_byte', []u8{len: 10000, init: `A`}}
vectors << TestVector{'all_bytes_x4', all_bytes_repeated(4)}
vectors << TestVector{'lcg_64k', lcg_bytes(65536)}
return vectors
}

fn run_case(tmp_root string, c_bin string, py_script string, case_idx int, vec TestVector) !int {
case_dir := join_path(tmp_root, 'case_${case_idx:02}_${vec.name}')
mkdir_all(case_dir)!

v_z := zlib.compress(vec.data)!
c_z := c_compress(case_dir, c_bin, vec.data)!
py_z := py_compress(case_dir, py_script, vec.data)!

mut checks := 0
producers := {
'v': v_z
'c': c_z
'py': py_z
}
for producer, compressed in producers {
v_plain := zlib.decompress(compressed)!
assert_equal_bytes('v.decompress(${producer}.compress)', vec.data, v_plain)!
checks++

c_plain := c_decompress(case_dir, c_bin, producer, compressed)!
assert_equal_bytes('c.decompress(${producer}.compress)', vec.data, c_plain)!
checks++

py_plain := py_decompress(case_dir, py_script, producer, compressed)!
assert_equal_bytes('python.decompress(${producer}.compress)', vec.data, py_plain)!
checks++
}
return checks
}

fn c_compress(case_dir string, c_bin string, plain []u8) ![]u8 {
in_path := join_path(case_dir, 'plain.in')
out_path := join_path(case_dir, 'c.zlib')
write_file_array(in_path, plain)!
must_succeed('${shell_quote(c_bin)} compress ${shell_quote(in_path)} ${shell_quote(out_path)}',
'C zlib compression failed')!
return read_bytes(out_path)!
}

fn c_decompress(case_dir string, c_bin string, producer string, compressed []u8) ![]u8 {
in_path := join_path(case_dir, '${producer}.for_c.zlib')
out_path := join_path(case_dir, '${producer}.from_c.out')
write_file_array(in_path, compressed)!
must_succeed('${shell_quote(c_bin)} decompress ${shell_quote(in_path)} ${shell_quote(out_path)}',
'C zlib decompression failed')!
return read_bytes(out_path)!
}

fn py_compress(case_dir string, py_script string, plain []u8) ![]u8 {
in_path := join_path(case_dir, 'plain_py.in')
out_path := join_path(case_dir, 'py.zlib')
write_file_array(in_path, plain)!
must_succeed('python3 ${shell_quote(py_script)} compress ${shell_quote(in_path)} ${shell_quote(out_path)}',
'Python zlib compression failed')!
return read_bytes(out_path)!
}

fn py_decompress(case_dir string, py_script string, producer string, compressed []u8) ![]u8 {
in_path := join_path(case_dir, '${producer}.for_py.zlib')
out_path := join_path(case_dir, '${producer}.from_py.out')
write_file_array(in_path, compressed)!
must_succeed('python3 ${shell_quote(py_script)} decompress ${shell_quote(in_path)} ${shell_quote(out_path)}',
'Python zlib decompression failed')!
return read_bytes(out_path)!
}

fn all_bytes_repeated(times int) []u8 {
mut out := []u8{cap: 256 * times}
for _ in 0 .. times {
for i in 0 .. 256 {
out << u8(i)
}
}
return out
}

fn lcg_bytes(n int) []u8 {
mut out := []u8{len: n}
mut x := u32(0x12345678)
for i in 0 .. n {
x = x * u32(1664525) + u32(1013904223)
out[i] = u8((x >> 16) & u32(0xff))
}
return out
}

fn assert_equal_bytes(label string, expected []u8, got []u8) ! {
if expected.len != got.len {
return error('${label}: length mismatch expected=${expected.len} got=${got.len}')
}
for i in 0 .. expected.len {
if expected[i] != got[i] {
return error('${label}: byte mismatch at offset ${i}')
}
}
}

fn must_succeed(command string, context string) ! {
res := execute(command)
if res.exit_code != 0 {
return error('${context}\ncommand: ${command}\nexit_code: ${res.exit_code}\n${res.output}')
}
}

fn shell_quote(s string) string {
return "'${s.replace("'", "'\\''")}'"
}
Loading
Loading