1919package org .apache .parquet .column .values .alp .benchmark ;
2020
2121import java .io .BufferedReader ;
22+ import java .io .FileInputStream ;
2223import java .io .IOException ;
2324import java .io .InputStream ;
2425import java .io .InputStreamReader ;
2526import java .nio .ByteBuffer ;
27+ import java .nio .file .Files ;
28+ import java .nio .file .Path ;
29+ import java .nio .file .Paths ;
2630import java .util .ArrayList ;
2731import java .util .List ;
2832import org .apache .parquet .bytes .ByteBufferInputStream ;
@@ -48,6 +52,7 @@ public class AlpCodecThroughput {
4852 private static final int WARMUP = 10 ;
4953 private static final int MEASURED = 30 ;
5054
55+ private static final String CSV_DIR = "parquet-hadoop/src/test/resources" ;
5156 private static final String DOUBLE_CSV = "alp_spotify1_expect.csv" ;
5257 private static final String FLOAT_CSV = "alp_float_spotify1_expect.csv" ;
5358
@@ -64,17 +69,17 @@ public class AlpCodecThroughput {
6469
6570 @ BeforeClass
6671 public static void setup () throws IOException {
67- // Load double columns from Spotify CSV
68- double [][] rawDoubles = loadDoubleCsv (DOUBLE_CSV );
72+ Path csvDir = findCsvDir ();
73+
74+ double [][] rawDoubles = loadDoubleCsv (csvDir .resolve (DOUBLE_CSV ));
6975 doubleColumns = new double [rawDoubles .length ][];
7076 doubleCompressed = new byte [rawDoubles .length ][];
7177 for (int c = 0 ; c < rawDoubles .length ; c ++) {
7278 doubleColumns [c ] = tile (rawDoubles [c ], TARGET_VALUES );
7379 doubleCompressed [c ] = compressDoubles (doubleColumns [c ]);
7480 }
7581
76- // Load float columns from Spotify CSV
77- float [][] rawFloats = loadFloatCsv (FLOAT_CSV );
82+ float [][] rawFloats = loadFloatCsv (csvDir .resolve (FLOAT_CSV ));
7883 floatColumns = new float [rawFloats .length ][];
7984 floatCompressed = new byte [rawFloats .length ][];
8085 for (int c = 0 ; c < rawFloats .length ; c ++) {
@@ -115,11 +120,27 @@ public void measureThroughput() throws IOException {
115120
116121 // ========== CSV loading ==========
117122
118- private static double [][] loadDoubleCsv (String resource ) throws IOException {
119- try (InputStream is = AlpCodecThroughput .class .getClassLoader ().getResourceAsStream (resource )) {
120- if (is == null ) {
121- throw new IOException ("Resource not found: " + resource );
123+ /**
124+ * Find the CSV directory. Searches from the working directory upward for the
125+ * parquet-hadoop test resources directory, so the benchmark works whether run
126+ * from the project root or from parquet-column/.
127+ */
128+ private static Path findCsvDir () throws IOException {
129+ Path dir = Paths .get ("" ).toAbsolutePath ();
130+ for (int i = 0 ; i < 3 ; i ++) {
131+ Path candidate = dir .resolve (CSV_DIR );
132+ if (Files .isDirectory (candidate ) && Files .exists (candidate .resolve (DOUBLE_CSV ))) {
133+ return candidate ;
122134 }
135+ dir = dir .getParent ();
136+ if (dir == null ) break ;
137+ }
138+ throw new IOException ("Cannot find CSV directory '" + CSV_DIR
139+ + "'. Run from the parquet-java project root." );
140+ }
141+
142+ private static double [][] loadDoubleCsv (Path csvPath ) throws IOException {
143+ try (InputStream is = new FileInputStream (csvPath .toFile ())) {
123144 BufferedReader br = new BufferedReader (new InputStreamReader (is ));
124145 String header = br .readLine ();
125146 int numCols = header .split ("," ).length ;
@@ -135,7 +156,6 @@ private static double[][] loadDoubleCsv(String resource) throws IOException {
135156 rows .add (row );
136157 }
137158
138- // Transpose: rows -> columns
139159 double [][] columns = new double [numCols ][rows .size ()];
140160 for (int r = 0 ; r < rows .size (); r ++) {
141161 double [] row = rows .get (r );
@@ -147,11 +167,8 @@ private static double[][] loadDoubleCsv(String resource) throws IOException {
147167 }
148168 }
149169
150- private static float [][] loadFloatCsv (String resource ) throws IOException {
151- try (InputStream is = AlpCodecThroughput .class .getClassLoader ().getResourceAsStream (resource )) {
152- if (is == null ) {
153- throw new IOException ("Resource not found: " + resource );
154- }
170+ private static float [][] loadFloatCsv (Path csvPath ) throws IOException {
171+ try (InputStream is = new FileInputStream (csvPath .toFile ())) {
155172 BufferedReader br = new BufferedReader (new InputStreamReader (is ));
156173 String header = br .readLine ();
157174 int numCols = header .split ("," ).length ;
0 commit comments