Skip to content

Commit 000f6af

Browse files
author
Rituparna Khaund
committed
tests: add parquet format conversion unit tests
Add tests for out_s3_compress_parquet with all supported page-level codecs (snappy, zstd, gzip, none). Tests verify that the function returns success for valid JSON input, output contains valid Parquet magic bytes (PAR1), and snappy-compressed output is not larger than uncompressed output. Tests are gated behind FLB_HAVE_ARROW_PARQUET and only compile when Apache Arrow Parquet support is enabled at build time. Signed-off-by: Rituparna Khaund <ritukhau@amazon.co.uk>
1 parent 4b304a0 commit 000f6af

1 file changed

Lines changed: 109 additions & 0 deletions

File tree

tests/internal/aws_compress.c

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,107 @@ void test_b64_truncated_gzip_boundary()
334334
flb_aws_compress_truncate_b64_test_cases__gzip_decode(cases, 40);
335335
}
336336

337+
#ifdef FLB_HAVE_ARROW_PARQUET
338+
void test_parquet_format_snappy()
339+
{
340+
int ret;
341+
void *out_buf = NULL;
342+
size_t out_size = 0;
343+
char *json = "{\"key\":\"value\",\"num\":42}\n"
344+
"{\"key\":\"other\",\"num\":99}\n";
345+
size_t json_len = strlen(json);
346+
347+
ret = out_s3_compress_parquet(json, json_len, &out_buf, &out_size,
348+
FLB_PARQUET_COMPRESSION_SNAPPY);
349+
TEST_CHECK(ret == 0);
350+
TEST_CHECK(out_size > 8);
351+
TEST_CHECK(memcmp(out_buf, "PAR1", 4) == 0);
352+
TEST_CHECK(memcmp((char *)out_buf + out_size - 4, "PAR1", 4) == 0);
353+
flb_free(out_buf);
354+
}
355+
356+
void test_parquet_format_zstd()
357+
{
358+
int ret;
359+
void *out_buf = NULL;
360+
size_t out_size = 0;
361+
char *json = "{\"key\":\"value\",\"num\":42}\n"
362+
"{\"key\":\"other\",\"num\":99}\n";
363+
size_t json_len = strlen(json);
364+
365+
ret = out_s3_compress_parquet(json, json_len, &out_buf, &out_size,
366+
FLB_PARQUET_COMPRESSION_ZSTD);
367+
TEST_CHECK(ret == 0);
368+
TEST_CHECK(out_size > 8);
369+
TEST_CHECK(memcmp(out_buf, "PAR1", 4) == 0);
370+
TEST_CHECK(memcmp((char *)out_buf + out_size - 4, "PAR1", 4) == 0);
371+
flb_free(out_buf);
372+
}
373+
374+
void test_parquet_format_gzip()
375+
{
376+
int ret;
377+
void *out_buf = NULL;
378+
size_t out_size = 0;
379+
char *json = "{\"key\":\"value\",\"num\":42}\n"
380+
"{\"key\":\"other\",\"num\":99}\n";
381+
size_t json_len = strlen(json);
382+
383+
ret = out_s3_compress_parquet(json, json_len, &out_buf, &out_size,
384+
FLB_PARQUET_COMPRESSION_GZIP);
385+
TEST_CHECK(ret == 0);
386+
TEST_CHECK(out_size > 8);
387+
TEST_CHECK(memcmp(out_buf, "PAR1", 4) == 0);
388+
TEST_CHECK(memcmp((char *)out_buf + out_size - 4, "PAR1", 4) == 0);
389+
flb_free(out_buf);
390+
}
391+
392+
void test_parquet_format_uncompressed()
393+
{
394+
int ret;
395+
void *out_buf = NULL;
396+
size_t out_size = 0;
397+
char *json = "{\"key\":\"value\",\"num\":42}\n"
398+
"{\"key\":\"other\",\"num\":99}\n";
399+
size_t json_len = strlen(json);
400+
401+
ret = out_s3_compress_parquet(json, json_len, &out_buf, &out_size,
402+
FLB_PARQUET_COMPRESSION_NONE);
403+
TEST_CHECK(ret == 0);
404+
TEST_CHECK(out_size > 8);
405+
TEST_CHECK(memcmp(out_buf, "PAR1", 4) == 0);
406+
TEST_CHECK(memcmp((char *)out_buf + out_size - 4, "PAR1", 4) == 0);
407+
flb_free(out_buf);
408+
}
409+
410+
void test_parquet_compression_reduces_size()
411+
{
412+
int ret;
413+
void *buf_none = NULL;
414+
void *buf_snappy = NULL;
415+
size_t size_none = 0;
416+
size_t size_snappy = 0;
417+
char *json = "{\"msg\":\"hello hello hello hello hello hello\"}\n"
418+
"{\"msg\":\"hello hello hello hello hello hello\"}\n"
419+
"{\"msg\":\"hello hello hello hello hello hello\"}\n"
420+
"{\"msg\":\"hello hello hello hello hello hello\"}\n"
421+
"{\"msg\":\"hello hello hello hello hello hello\"}\n";
422+
size_t json_len = strlen(json);
423+
424+
ret = out_s3_compress_parquet(json, json_len, &buf_none, &size_none,
425+
FLB_PARQUET_COMPRESSION_NONE);
426+
TEST_CHECK(ret == 0);
427+
428+
ret = out_s3_compress_parquet(json, json_len, &buf_snappy, &size_snappy,
429+
FLB_PARQUET_COMPRESSION_SNAPPY);
430+
TEST_CHECK(ret == 0);
431+
TEST_CHECK(size_snappy <= size_none);
432+
433+
flb_free(buf_none);
434+
flb_free(buf_snappy);
435+
}
436+
#endif
437+
337438
TEST_LIST = {
338439
{ "test_compression_gzip", test_compression_gzip },
339440
{ "test_compression_zstd", test_compression_zstd },
@@ -352,6 +453,14 @@ TEST_LIST = {
352453
test_b64_truncated_gzip_truncation_multi_rounds },
353454
{ "test_b64_truncated_gzip_boundary",
354455
test_b64_truncated_gzip_boundary },
456+
#ifdef FLB_HAVE_ARROW_PARQUET
457+
{ "test_parquet_format_snappy", test_parquet_format_snappy },
458+
{ "test_parquet_format_zstd", test_parquet_format_zstd },
459+
{ "test_parquet_format_gzip", test_parquet_format_gzip },
460+
{ "test_parquet_format_uncompressed", test_parquet_format_uncompressed },
461+
{ "test_parquet_compression_reduces_size",
462+
test_parquet_compression_reduces_size },
463+
#endif
355464
{ 0 }
356465
};
357466

0 commit comments

Comments
 (0)