apache
diff --git a/‎parquet-column/src/test/java/org/apache/parquet/column/values/alp/benchmark/AlpCodecThroughput.java‎
Lines changed: 5 additions & 4 deletions b/‎parquet-column/src/test/java/org/apache/parquet/column/values/alp/benchmark/AlpCodecThroughput.java‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎parquet-hadoop/src/test/java/org/apache/parquet/hadoop/GenerateAlpParquet.java‎
Lines changed: 10 additions & 7 deletions b/‎parquet-hadoop/src/test/java/org/apache/parquet/hadoop/GenerateAlpParquet.java‎
Lines changed: 10 additions & 7 deletions
diff --git a/‎parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestInteropAlpEncoding.java‎
Lines changed: 15 additions & 12 deletions b/‎parquet-hadoop/src/test/java/org/apache/parquet/hadoop/TestInteropAlpEncoding.java‎
Lines changed: 15 additions & 12 deletions
diff --git a/‎parquet-hadoop/src/test/resources/.alp_java_arade.parquet.crc‎
-1.39 KB b/‎parquet-hadoop/src/test/resources/.alp_java_arade.parquet.crc‎
-1.39 KB
diff --git a/‎parquet-hadoop/src/test/resources/.alp_java_float_arade.parquet.crc‎
-2.09 KB b/‎parquet-hadoop/src/test/resources/.alp_java_float_arade.parquet.crc‎
-2.09 KB
diff --git a/‎parquet-hadoop/src/test/resources/.alp_java_float_spotify1.parquet.crc‎
-2.58 KB b/‎parquet-hadoop/src/test/resources/.alp_java_float_spotify1.parquet.crc‎
-2.58 KB
diff --git a/‎parquet-hadoop/src/test/resources/.alp_java_spotify1.parquet.crc‎
-2.38 KB b/‎parquet-hadoop/src/test/resources/.alp_java_spotify1.parquet.crc‎
-2.38 KB
@@ -23,6 +23,7 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
+import java.util.zip.GZIPInputStream;
 import java.nio.ByteBuffer;
 import java.nio.file.Files;
 import java.nio.file.Path;
@@ -53,8 +54,8 @@ public class AlpCodecThroughput {
   private static final int MEASURED = 30;
 
   private static final String CSV_DIR = "parquet-hadoop/src/test/resources";
-  private static final String DOUBLE_CSV = "alp_spotify1_expect.csv";
-  private static final String FLOAT_CSV = "alp_float_spotify1_expect.csv";
+  private static final String DOUBLE_CSV = "alp_spotify1_expect.csv.gz";
+  private static final String FLOAT_CSV = "alp_float_spotify1_expect.csv.gz";
 
   // Spotify column names matching C++ benchmark
   private static final String[] COLUMNS = {
@@ -140,7 +141,7 @@ private static Path findCsvDir() throws IOException {
   }
 
   private static double[][] loadDoubleCsv(Path csvPath) throws IOException {
-    try (InputStream is = new FileInputStream(csvPath.toFile())) {
+    try (InputStream is = new GZIPInputStream(new FileInputStream(csvPath.toFile()))) {
       BufferedReader br = new BufferedReader(new InputStreamReader(is));
       String header = br.readLine();
       int numCols = header.split(",").length;
@@ -168,7 +169,7 @@ private static double[][] loadDoubleCsv(Path csvPath) throws IOException {
   }
 
   private static float[][] loadFloatCsv(Path csvPath) throws IOException {
-    try (InputStream is = new FileInputStream(csvPath.toFile())) {
+    try (InputStream is = new GZIPInputStream(new FileInputStream(csvPath.toFile()))) {
       BufferedReader br = new BufferedReader(new InputStreamReader(is));
       String header = br.readLine();
       int numCols = header.split(",").length;
 
@@ -23,6 +23,7 @@
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.nio.charset.StandardCharsets;
+import java.util.zip.GZIPInputStream;
 import java.nio.file.Files;
 import java.nio.file.Paths;
 import java.util.ArrayList;
@@ -38,7 +39,7 @@
 /**
  * Standalone utility to generate ALP-encoded parquet files from CSV test data.
  *
- * <p>Reads the existing expect CSV files (alp_spotify1_expect.csv, alp_arade_expect.csv)
+ * <p>Reads the existing expect CSV files (alp_spotify1_expect.csv.gz, alp_arade_expect.csv.gz)
  * from test resources and writes ALP-encoded parquet files using the Java ALP encoder.
  *
  * <p>Usage: java GenerateAlpParquet [output_directory]
@@ -50,18 +51,18 @@ public static void main(String[] args) throws IOException {
     String outputDir = args.length > 0 ? args[0] : ".";
     Files.createDirectories(Paths.get(outputDir));
 
-    generateAlpParquet("/alp_arade_expect.csv", outputDir + "/alp_java_arade.parquet");
+    generateAlpParquet("/alp_arade_expect.csv.gz", outputDir + "/alp_java_arade.parquet");
     System.out.println("Generated: " + outputDir + "/alp_java_arade.parquet");
 
-    generateAlpParquet("/alp_spotify1_expect.csv", outputDir + "/alp_java_spotify1.parquet");
+    generateAlpParquet("/alp_spotify1_expect.csv.gz", outputDir + "/alp_java_spotify1.parquet");
     System.out.println("Generated: " + outputDir + "/alp_java_spotify1.parquet");
 
     generateAlpParquetFloat(
-        "/alp_float_arade_expect.csv", outputDir + "/alp_java_float_arade.parquet");
+        "/alp_float_arade_expect.csv.gz", outputDir + "/alp_java_float_arade.parquet");
     System.out.println("Generated: " + outputDir + "/alp_java_float_arade.parquet");
 
     generateAlpParquetFloat(
-        "/alp_float_spotify1_expect.csv", outputDir + "/alp_java_float_spotify1.parquet");
+        "/alp_float_spotify1_expect.csv.gz", outputDir + "/alp_java_float_spotify1.parquet");
     System.out.println("Generated: " + outputDir + "/alp_java_float_spotify1.parquet");
   }
 
@@ -70,7 +71,8 @@ private static void generateAlpParquet(String csvResource, String outputPath) th
     String[] columnNames;
     List<double[]> rows = new ArrayList<>();
 
-    try (InputStream is = GenerateAlpParquet.class.getResourceAsStream(csvResource);
+    try (InputStream raw = GenerateAlpParquet.class.getResourceAsStream(csvResource);
+        InputStream is = new GZIPInputStream(raw);
         BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
       // Parse header
       String header = br.readLine();
@@ -126,7 +128,8 @@ private static void generateAlpParquetFloat(String csvResource, String outputPat
     String[] columnNames;
     List<float[]> rows = new ArrayList<>();
 
-    try (InputStream is = GenerateAlpParquet.class.getResourceAsStream(csvResource);
+    try (InputStream raw = GenerateAlpParquet.class.getResourceAsStream(csvResource);
+        InputStream is = new GZIPInputStream(raw);
         BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
       // Parse header
       String header = br.readLine();
 
@@ -25,6 +25,7 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
+import java.util.zip.GZIPInputStream;
 import java.net.URISyntaxException;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
@@ -66,7 +67,7 @@ public void testReadAlpAradeParquet() throws IOException {
     int expectedRows = 15000;
 
     // Read expected values from CSV
-    double[][] expected = readExpectedCsv("/alp_arade_expect.csv", columnNames.length, expectedRows);
+    double[][] expected = readExpectedCsv("/alp_arade_expect.csv.gz", columnNames.length, expectedRows);
 
     // Read parquet file using GroupReadSupport
     List<Group> rows = readParquetGroups(parquetPath);
@@ -109,7 +110,7 @@ public void testReadAlpSpotify1Parquet() throws IOException {
     int expectedRows = 15000;
 
     // Read expected values from CSV
-    double[][] expected = readExpectedCsv("/alp_spotify1_expect.csv", columnNames.length, expectedRows);
+    double[][] expected = readExpectedCsv("/alp_spotify1_expect.csv.gz", columnNames.length, expectedRows);
 
     // Read parquet file using GroupReadSupport
     List<Group> rows = readParquetGroups(parquetPath);
@@ -141,7 +142,7 @@ public void testReadAlpJavaAradeParquet() throws IOException {
     String[] columnNames = {"value1", "value2", "value3", "value4"};
     int expectedRows = 15000;
 
-    double[][] expected = readExpectedCsv("/alp_arade_expect.csv", columnNames.length, expectedRows);
+    double[][] expected = readExpectedCsv("/alp_arade_expect.csv.gz", columnNames.length, expectedRows);
 
     List<Group> rows = readParquetGroups(parquetPath);
     assertEquals("Row count should match", expectedRows, rows.size());
@@ -180,7 +181,7 @@ public void testReadAlpJavaSpotify1Parquet() throws IOException {
     };
     int expectedRows = 15000;
 
-    double[][] expected = readExpectedCsv("/alp_spotify1_expect.csv", columnNames.length, expectedRows);
+    double[][] expected = readExpectedCsv("/alp_spotify1_expect.csv.gz", columnNames.length, expectedRows);
 
     List<Group> rows = readParquetGroups(parquetPath);
     assertEquals("Row count should match", expectedRows, rows.size());
@@ -209,7 +210,7 @@ public void testReadAlpFloatAradeParquet() throws IOException {
     String[] columnNames = {"value1", "value2", "value3", "value4"};
     int expectedRows = 15000;
 
-    float[][] expected = readExpectedCsvFloat("/alp_float_arade_expect.csv", columnNames.length, expectedRows);
+    float[][] expected = readExpectedCsvFloat("/alp_float_arade_expect.csv.gz", columnNames.length, expectedRows);
 
     List<Group> rows = readParquetGroups(parquetPath);
     assertEquals("Row count should match", expectedRows, rows.size());
@@ -248,7 +249,7 @@ public void testReadAlpFloatSpotify1Parquet() throws IOException {
     };
     int expectedRows = 15000;
 
-    float[][] expected = readExpectedCsvFloat("/alp_float_spotify1_expect.csv", columnNames.length, expectedRows);
+    float[][] expected = readExpectedCsvFloat("/alp_float_spotify1_expect.csv.gz", columnNames.length, expectedRows);
 
     List<Group> rows = readParquetGroups(parquetPath);
     assertEquals("Row count should match", expectedRows, rows.size());
@@ -277,7 +278,7 @@ public void testReadAlpJavaFloatAradeParquet() throws IOException {
     String[] columnNames = {"value1", "value2", "value3", "value4"};
     int expectedRows = 15000;
 
-    float[][] expected = readExpectedCsvFloat("/alp_float_arade_expect.csv", columnNames.length, expectedRows);
+    float[][] expected = readExpectedCsvFloat("/alp_float_arade_expect.csv.gz", columnNames.length, expectedRows);
 
     List<Group> rows = readParquetGroups(parquetPath);
     assertEquals("Row count should match", expectedRows, rows.size());
@@ -316,7 +317,7 @@ public void testReadAlpJavaFloatSpotify1Parquet() throws IOException {
     };
     int expectedRows = 15000;
 
-    float[][] expected = readExpectedCsvFloat("/alp_float_spotify1_expect.csv", columnNames.length, expectedRows);
+    float[][] expected = readExpectedCsvFloat("/alp_float_spotify1_expect.csv.gz", columnNames.length, expectedRows);
 
     List<Group> rows = readParquetGroups(parquetPath);
     assertEquals("Row count should match", expectedRows, rows.size());
@@ -368,9 +369,10 @@ private void verifyAlpEncoding(Path path) throws IOException {
    */
   private double[][] readExpectedCsv(String resourcePath, int numColumns, int expectedRows) throws IOException {
     double[][] columns = new double[numColumns][expectedRows];
-    try (InputStream is = getClass().getResourceAsStream(resourcePath);
+    try (InputStream raw = getClass().getResourceAsStream(resourcePath);
+        InputStream is = new GZIPInputStream(raw);
         BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
-      assertNotNull("CSV resource not found: " + resourcePath, is);
+      assertNotNull("CSV resource not found: " + resourcePath, raw);
 
       // Skip header
       String header = br.readLine();
@@ -397,9 +399,10 @@ private double[][] readExpectedCsv(String resourcePath, int numColumns, int expe
    */
   private float[][] readExpectedCsvFloat(String resourcePath, int numColumns, int expectedRows) throws IOException {
     float[][] columns = new float[numColumns][expectedRows];
-    try (InputStream is = getClass().getResourceAsStream(resourcePath);
+    try (InputStream raw = getClass().getResourceAsStream(resourcePath);
+        InputStream is = new GZIPInputStream(raw);
         BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
-      assertNotNull("CSV resource not found: " + resourcePath, is);
+      assertNotNull("CSV resource not found: " + resourcePath, raw);
 
       // Skip header
       String header = br.readLine();