Skip to content

Commit a6ddfcb

Browse files
authored
Merge branch 'main' into fix_comet_sum_compatbility_level
2 parents 2ad497d + 7cd0aaf commit a6ddfcb

35 files changed

Lines changed: 161 additions & 939 deletions

.claude/skills/bug-triage/SKILL.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,7 @@ For each issue, review the title and body and determine:
6565
2. **Area labels** (zero or more): from the area table in the guide
6666
(`area:writer`, `area:shuffle`, `area:aggregation`, `area:scan`,
6767
`area:expressions`, `area:ffi`, `area:ci`) plus the pre-existing area
68-
indicators (`native_datafusion`, `native_iceberg_compat`, `spark 4`,
69-
`spark sql tests`).
68+
indicators (`spark 4`, `spark sql tests`).
7069
3. **Escalation note**: if the issue matches an escalation trigger from the
7170
guide (e.g., a `priority:high` crash that may also produce wrong results),
7271
note it in the summary.

.github/workflows/codeql.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,11 @@ jobs:
4949
persist-credentials: false
5050

5151
- name: Initialize CodeQL
52-
uses: github/codeql-action/init@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4
52+
uses: github/codeql-action/init@9e0d7b8d25671d64c341c19c0152d693099fb5ba # v4
5353
with:
5454
languages: actions
5555

5656
- name: Perform CodeQL Analysis
57-
uses: github/codeql-action/analyze@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4
57+
uses: github/codeql-action/analyze@9e0d7b8d25671d64c341c19c0152d693099fb5ba # v4
5858
with:
5959
category: "/language:actions"

.github/workflows/stale.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
issues: write
2828
pull-requests: write
2929
steps:
30-
- uses: actions/stale@b5d41d4e1d5dceea10e7104786b73624c18a190f # v10.2.0
30+
- uses: actions/stale@eb5cf3af3ac0a1aa4c9c45633dd1ae542a27a899 # v10.3.0
3131
with:
3232
stale-pr-message: "Thank you for your contribution. Unfortunately, this pull request is stale because it has been open 60 days with no activity. Please remove the stale label or comment or this will be closed in 7 days."
3333
days-before-pr-stale: 60

native/Cargo.lock

Lines changed: 44 additions & 28 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

native/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ regex = "1.12.3"
5656
thiserror = "2"
5757
object_store = { version = "0.13.1", features = ["gcp", "azure", "aws", "http"] }
5858
url = "2.2"
59-
aws-config = "1.8.16"
59+
aws-config = "1.8.17"
6060
aws-credential-types = "1.2.13"
6161
iceberg = { git = "https://github.com/apache/iceberg-rust", rev = "1ad4bfd" }
6262
iceberg-storage-opendal = { git = "https://github.com/apache/iceberg-rust", rev = "1ad4bfd", features = ["opendal-all"] }

native/core/src/execution/columnar_to_row.rs

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,10 +1052,10 @@ impl ColumnarToRowContext {
10521052
})
10531053
}
10541054
(DataType::Int32, DataType::Decimal128(precision, scale)) => {
1055-
// Parquet stores small-precision decimals as Int32 for efficiency.
1056-
// When COMET_USE_DECIMAL_128 is false, BatchReader produces these types.
1057-
// The Int32 value is already scaled (e.g., -1 means -0.01 for scale 2).
1058-
// We need to reinterpret (not cast) to Decimal128 preserving the value.
1055+
// Parquet stores small-precision decimals as Int32 for efficiency, and the
1056+
// reader may surface them as the physical Int32 type. The value is already
1057+
// scaled (e.g., -1 means -0.01 for scale 2). Reinterpret (not cast) to
1058+
// Decimal128 preserving the value.
10591059
let int_array = array.as_any().downcast_ref::<Int32Array>().ok_or_else(|| {
10601060
CometError::Internal("Failed to downcast to Int32Array".to_string())
10611061
})?;
@@ -2581,8 +2581,7 @@ mod tests {
25812581
#[test]
25822582
fn test_convert_int32_to_decimal128() {
25832583
// Test that Int32 arrays are correctly cast to Decimal128 when schema expects Decimal128.
2584-
// This can happen when COMET_USE_DECIMAL_128 is false and the parquet reader produces
2585-
// Int32 for small-precision decimals.
2584+
// This can happen when the parquet reader surfaces small-precision decimals as Int32.
25862585

25872586
// Create an Int32 array representing decimals: [-1, -2, -3] which at scale 2 means
25882587
// [-0.01, -0.02, -0.03]
@@ -2619,8 +2618,7 @@ mod tests {
26192618
#[test]
26202619
fn test_convert_int64_to_decimal128() {
26212620
// Test that Int64 arrays are correctly cast to Decimal128 when schema expects Decimal128.
2622-
// This can happen when COMET_USE_DECIMAL_128 is false and the parquet reader produces
2623-
// Int64 for medium-precision decimals.
2621+
// This can happen when the parquet reader surfaces medium-precision decimals as Int64.
26242622

26252623
// Create an Int64 array representing decimals
26262624
let int_array: ArrayRef = Arc::new(Int64Array::from(vec![-100i64, -200, -300]));

native/core/src/parquet/parquet_exec.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,7 @@ use datafusion_datasource::TableSchema;
3838
use std::collections::HashMap;
3939
use std::sync::Arc;
4040

41-
/// Initializes a DataSourceExec plan with a ParquetSource. This may be used by either the
42-
/// `native_datafusion` scan or the `native_iceberg_compat` scan.
41+
/// Initializes a DataSourceExec plan with a ParquetSource for Comet's native Parquet scan.
4342
///
4443
/// `required_schema`: Schema to be projected by the scan.
4544
///

native/core/src/parquet/parquet_support.rs

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,6 @@ pub struct SparkParquetOptions {
7474
pub allow_incompat: bool,
7575
/// Support casting unsigned ints to signed ints (used by Parquet SchemaAdapter)
7676
pub allow_cast_unsigned_ints: bool,
77-
/// Whether to always represent decimals using 128 bits. If false, the native reader may represent decimals using 32 or 64 bits, depending on the precision.
78-
pub use_decimal_128: bool,
7977
/// Whether to read dates/timestamps that were written in the legacy hybrid Julian + Gregorian calendar as it is. If false, throw exceptions instead. If the spark type is TimestampNTZ, this should be true.
8078
pub use_legacy_date_timestamp_or_ntz: bool,
8179
// Whether schema field names are case sensitive
@@ -105,7 +103,6 @@ impl SparkParquetOptions {
105103
timezone: timezone.to_string(),
106104
allow_incompat,
107105
allow_cast_unsigned_ints: false,
108-
use_decimal_128: false,
109106
use_legacy_date_timestamp_or_ntz: false,
110107
case_sensitive: false,
111108
return_null_struct_if_all_fields_missing: true,
@@ -121,7 +118,6 @@ impl SparkParquetOptions {
121118
timezone: "".to_string(),
122119
allow_incompat,
123120
allow_cast_unsigned_ints: false,
124-
use_decimal_128: false,
125121
use_legacy_date_timestamp_or_ntz: false,
126122
case_sensitive: false,
127123
return_null_struct_if_all_fields_missing: true,

spark/src/main/java/org/apache/comet/vector/CometDecodedVector.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,13 +40,12 @@ public abstract class CometDecodedVector extends CometVector {
4040
private byte validityByteCache;
4141
protected boolean isUuid;
4242

43-
protected CometDecodedVector(ValueVector vector, Field valueField, boolean useDecimal128) {
44-
this(vector, valueField, useDecimal128, false);
43+
protected CometDecodedVector(ValueVector vector, Field valueField) {
44+
this(vector, valueField, false);
4545
}
4646

47-
protected CometDecodedVector(
48-
ValueVector vector, Field valueField, boolean useDecimal128, boolean isUuid) {
49-
super(Utils.fromArrowField(valueField), useDecimal128);
47+
protected CometDecodedVector(ValueVector vector, Field valueField, boolean isUuid) {
48+
super(Utils.fromArrowField(valueField));
5049
this.valueVector = vector;
5150
this.numNulls = valueVector.getNullCount();
5251
this.numValues = valueVector.getValueCount();

spark/src/main/java/org/apache/comet/vector/CometDelegateVector.java

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,11 @@ public class CometDelegateVector extends CometVector {
3333
protected CometVector delegate;
3434

3535
public CometDelegateVector(DataType dataType) {
36-
this(dataType, null, false);
36+
this(dataType, null);
3737
}
3838

39-
public CometDelegateVector(DataType dataType, boolean useDecimal128) {
40-
this(dataType, null, useDecimal128);
41-
}
42-
43-
public CometDelegateVector(DataType dataType, CometVector delegate, boolean useDecimal128) {
44-
super(dataType, useDecimal128);
39+
public CometDelegateVector(DataType dataType, CometVector delegate) {
40+
super(dataType);
4541
if (delegate instanceof CometDelegateVector) {
4642
throw new IllegalArgumentException("cannot have nested delegation");
4743
}

0 commit comments

Comments
 (0)