fast-path

deniskuzZ · deniskuzZ · commit 49ec3e4e6a50 · 2026-06-06T20:30:57.000+03:00
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReader.java
@@ -104,6 +104,32 @@ public interface ParquetDataColumnReader {
    */
   byte[] readDecimal();
 
+  /**
+   * True when this reader can supply DECIMAL_64 values as raw unscaled longs at the column's scale,
+   * with no per-row HiveDecimal/byte[] conversion -- i.e. an INT32/INT64-backed decimal whose file
+   * scale equals the requested Hive scale. When true, the long-backed reader may call
+   * {@link #readDecimal64()} / {@link #readDecimal64(int)} instead of {@link #readDecimal()} /
+   * {@link #readDecimal(int)}.
+   */
+  default boolean isFastDecimal64() {
+    return false;
+  }
+
+  /**
+   * @return the next value as a raw unscaled decimal64 long. Only valid when {@link #isFastDecimal64()}.
+   * {@link #isValid()} is set false when the value does not fit the Hive precision (caller -> NULL).
+   */
+  default long readDecimal64() {
+    throw new UnsupportedOperationException();
+  }
+
+  /**
+   * @return the dictionary value at {@code id} as a raw unscaled decimal64 long. See {@link #readDecimal64()}.
+   */
+  default long readDecimal64(int id) {
+    throw new UnsupportedOperationException();
+  }
+
   /**
    * @return the next Double from the page
    */
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java
@@ -382,6 +382,19 @@ byte[] validatedScaledDecimal(int inpScale) {
       }
     }
 
+    // Validate a raw unscaled decimal64 long (already at the Hive scale) against the Hive precision.
+    // Sets isValid; returns the value unchanged when in range, else 0 (caller marks the entry NULL).
+    // Used by the Decimal64 identity fast path; bounds via HiveDecimalWritable to avoid hand-rolling.
+    long validatedDecimal64(long unscaledValue) {
+      long absMax = HiveDecimalWritable.getDecimal64AbsMax(hivePrecision);
+      if (unscaledValue >= -absMax && unscaledValue <= absMax) {
+        this.isValid = true;
+        return unscaledValue;
+      }
+      this.isValid = false;
+      return 0;
+    }
+
     /**
      * Helper function to validate double data.  Sets the isValid to true if the data is valid
      * for the type it will be read in, otherwise false.
@@ -1622,6 +1635,23 @@ public byte[] readDecimal(int id) {
       hiveDecimalWritable.set(hiveDecimal);
       return super.validatedScaledDecimal(scale);
     }
+
+    @Override
+    public boolean isFastDecimal64() {
+      // Identity fast path: the file scale equals the Hive scale, so the stored unscaled value IS the
+      // Decimal64 value -- no rescale/rounding, only a precision bounds check.
+      return scale == hiveScale;
+    }
+
+    @Override
+    public long readDecimal64() {
+      return validatedDecimal64(valuesReader.readInteger());
+    }
+
+    @Override
+    public long readDecimal64(int id) {
+      return validatedDecimal64(dict.decodeToInt(id));
+    }
   }
 
   /**
@@ -1784,6 +1814,23 @@ public byte[] readDecimal(int id) {
       hiveDecimalWritable.set(hiveDecimal);
       return super.validatedScaledDecimal(scale);
     }
+
+    @Override
+    public boolean isFastDecimal64() {
+      // Identity fast path: the file scale equals the Hive scale, so the stored unscaled long IS the
+      // Decimal64 value -- no rescale/rounding, only a precision bounds check.
+      return scale == hiveScale;
+    }
+
+    @Override
+    public long readDecimal64() {
+      return validatedDecimal64(valuesReader.readLong());
+    }
+
+    @Override
+    public long readDecimal64(int id) {
+      return validatedDecimal64(dict.decodeToLong(id));
+    }
   }
 
   /**
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedPrimitiveColumnReader.java
@@ -611,17 +611,30 @@ private void decodeDictionaryIds(
     case DECIMAL:
       if (column instanceof Decimal64ColumnVector dec64) {
         fillDecimal64PrecisionScale(dec64);
+        boolean fast = dictionary.isFastDecimal64();
         short valueScale = getEncodedDecimalScale();
         for (int i = rowId; i < rowId + num; ++i) {
           if (!column.isNull[i]) {
-            byte[] bytes = dictionary.readDecimal((int) dictionaryIds.vector[i]);
-            if (dictionary.isValid()) {
-              // set() enforces the column precision/scale and marks the entry NULL on overflow.
-              dec64.set(i, bytes, valueScale);
+            int id = (int) dictionaryIds.vector[i];
+            boolean stored;
+            if (fast) {
+              // Identity fast path: store the raw unscaled long directly.
+              long v = dictionary.readDecimal64(id);
+              stored = dictionary.isValid();
+              if (stored) {
+                dec64.vector[i] = v;
+              }
             } else {
-              setNullValue(column, i);
+              // set() enforces the column precision/scale and marks the entry NULL on overflow.
+              byte[] bytes = dictionary.readDecimal(id);
+              stored = dictionary.isValid();
+              if (stored) {
+                dec64.set(i, bytes, valueScale);
+                stored = !dec64.isNull[i];
+              }
             }
-            if (dec64.isNull[i]) {
+            if (!stored) {
+              setNullValue(column, i);
               dec64.vector[i] = 0;
             }
           }
@@ -716,22 +729,34 @@ private short[] getDecimalPrecisionScale() {
    */
   private void readDecimal64(int total, Decimal64ColumnVector c, int rowId) {
     fillDecimal64PrecisionScale(c);
+    boolean fast = dataColumn.isFastDecimal64();
     short valueScale = getEncodedDecimalScale();
     int left = total;
     while (left > 0) {
       readRepetitionAndDefinitionLevels();
       if (definitionLevel >= maxDefLevel) {
-        byte[] bytes = dataColumn.readDecimal();
-        if (dataColumn.isValid()) {
-          c.isNull[rowId] = false;
-          // set() enforces the column precision/scale and marks the entry NULL on overflow.
-          c.set(rowId, bytes, valueScale);
-          if (c.isNull[rowId]) {
-            c.vector[rowId] = 0;
-            c.isRepeating = false;
-          } else {
-            c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]);
+        boolean stored;
+        if (fast) {
+          // Identity fast path: store the raw unscaled long directly (no HiveDecimal/byte[] per row).
+          long v = dataColumn.readDecimal64();
+          stored = dataColumn.isValid();
+          if (stored) {
+            c.vector[rowId] = v;
+          }
+        } else {
+          // set() enforces the column precision/scale and marks the entry NULL if the value does not
+          // fit (e.g. schema-evolved data whose larger file scale can't be held at the column scale).
+          byte[] bytes = dataColumn.readDecimal();
+          stored = dataColumn.isValid();
+          if (stored) {
+            c.isNull[rowId] = false;
+            c.set(rowId, bytes, valueScale);
+            stored = !c.isNull[rowId];
           }
+        }
+        if (stored) {
+          c.isNull[rowId] = false;
+          c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]);
         } else {
           c.vector[rowId] = 0;
           setNullValue(c, rowId);
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java
@@ -134,6 +134,16 @@ public void testDecimal64ReadInt64() throws Exception {
     decimal64ReadInt64();
   }
 
+  @Test
+  public void testDecimal64ReadScaleEvolution() throws Exception {
+    decimal64ReadScaleEvolution();
+  }
+
+  @Test
+  public void testDecimal64ReadPrecisionNarrowing() throws Exception {
+    decimal64ReadPrecisionNarrowing();
+  }
+
   @Test
   public void verifyBatchOffsets() throws Exception {
     super.verifyBatchOffsets();
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/VectorizedColumnReaderTestBase.java