|
| 1 | +use datafusion::arrow::datatypes::{DataType, IntervalUnit}; |
| 2 | + |
| 3 | +/// Default data size estimate for variable-width columns when no statistics are available. |
| 4 | +/// |
| 5 | +/// Reference: Trino's PlanNodeStatsEstimate.java:40 |
| 6 | +/// https://github.com/trinodb/trino/blob/458/core/trino-main/src/main/java/io/trino/cost/PlanNodeStatsEstimate.java#L40 |
| 7 | +const DEFAULT_DATA_SIZE_PER_COLUMN: usize = 50; |
| 8 | + |
| 9 | +/// This function returns the amount of bytes each row is estimated to occupy. |
| 10 | +/// |
| 11 | +/// The estimation follows Trino's approach for calculating output size per row: |
| 12 | +/// - For fixed-width (primitive) types: uses the type's fixed byte width |
| 13 | +/// - For variable-width types: uses a default estimate plus offset overhead |
| 14 | +/// - Accounts for validity bitmap overhead (1 bit per value, rounded to 1 byte per row) |
| 15 | +/// |
| 16 | +/// DataFusion has `Statistics::calculate_total_byte_size()` which uses `DataType::primitive_width()`, |
| 17 | +/// but it returns `Precision::Absent` (unknown) when encountering any non-primitive type: |
| 18 | +/// https://github.com/apache/datafusion/blob/branch-52/datafusion/common/src/stats.rs#L326-L347 |
| 19 | +/// |
| 20 | +/// For distributed query planning, we need estimates even for variable-width types to make |
| 21 | +/// cost-based decisions about data shuffling and task count assignation. This implementation |
| 22 | +/// provides estimates for all types following Trino's cost model. |
| 23 | +/// |
| 24 | +/// Reference: Trino's PlanNodeStatsEstimate.getOutputSizeForSymbol() |
| 25 | +/// https://github.com/trinodb/trino/blob/458/core/trino-main/src/main/java/io/trino/cost/PlanNodeStatsEstimate.java#L89-L114 |
| 26 | +pub(super) fn default_bytes_for_datatype(data_type: &DataType) -> usize { |
| 27 | + // 1 byte for validity bitmap per row (Arrow uses 1 bit, but we round up for estimation). |
| 28 | + // Trino calls this the "is null" boolean array. |
| 29 | + // Reference: PlanNodeStatsEstimate.java:98-99 |
| 30 | + // https://github.com/trinodb/trino/blob/458/core/trino-main/src/main/java/io/trino/cost/PlanNodeStatsEstimate.java#L98-L99 |
| 31 | + const VALIDITY_OVERHEAD: usize = 1; |
| 32 | + |
| 33 | + // Handle non-primitive types. |
| 34 | + // NOTE: The cases below are Arrow-specific adaptations. Trino only distinguishes between |
| 35 | + // FixedWidthType and variable-width types, using Integer.BYTES (4) for offsets. |
| 36 | + // Reference: PlanNodeStatsEstimate.java:108-109 |
| 37 | + // https://github.com/trinodb/trino/blob/458/core/trino-main/src/main/java/io/trino/cost/PlanNodeStatsEstimate.java#L108-L109 |
| 38 | + match data_type { |
| 39 | + // Primitive types from data_type.primitive_width() |
| 40 | + DataType::Int8 => VALIDITY_OVERHEAD + 1, |
| 41 | + DataType::Int16 => VALIDITY_OVERHEAD + 2, |
| 42 | + DataType::Int32 => VALIDITY_OVERHEAD + 4, |
| 43 | + DataType::Int64 => VALIDITY_OVERHEAD + 8, |
| 44 | + DataType::UInt8 => VALIDITY_OVERHEAD + 1, |
| 45 | + DataType::UInt16 => VALIDITY_OVERHEAD + 2, |
| 46 | + DataType::UInt32 => VALIDITY_OVERHEAD + 4, |
| 47 | + DataType::UInt64 => VALIDITY_OVERHEAD + 8, |
| 48 | + DataType::Float16 => VALIDITY_OVERHEAD + 2, |
| 49 | + DataType::Float32 => VALIDITY_OVERHEAD + 4, |
| 50 | + DataType::Float64 => VALIDITY_OVERHEAD + 8, |
| 51 | + DataType::Timestamp(_, _) => VALIDITY_OVERHEAD + 8, |
| 52 | + DataType::Date32 => VALIDITY_OVERHEAD + 4, |
| 53 | + DataType::Date64 => VALIDITY_OVERHEAD + 8, |
| 54 | + DataType::Time32(_) => VALIDITY_OVERHEAD + 4, |
| 55 | + DataType::Time64(_) => VALIDITY_OVERHEAD + 8, |
| 56 | + DataType::Duration(_) => VALIDITY_OVERHEAD + 8, |
| 57 | + DataType::Interval(IntervalUnit::YearMonth) => VALIDITY_OVERHEAD + 4, |
| 58 | + DataType::Interval(IntervalUnit::DayTime) => VALIDITY_OVERHEAD + 8, |
| 59 | + DataType::Interval(IntervalUnit::MonthDayNano) => VALIDITY_OVERHEAD + 16, |
| 60 | + DataType::Decimal32(_, _) => VALIDITY_OVERHEAD + 4, |
| 61 | + DataType::Decimal64(_, _) => VALIDITY_OVERHEAD + 8, |
| 62 | + DataType::Decimal128(_, _) => VALIDITY_OVERHEAD + 16, |
| 63 | + DataType::Decimal256(_, _) => VALIDITY_OVERHEAD + 32, |
| 64 | + // Null type has no data (Arrow-specific) |
| 65 | + DataType::Null => 0, |
| 66 | + |
| 67 | + // Boolean is stored as bits (1/8 byte per value), but we round up (Arrow-specific) |
| 68 | + DataType::Boolean => VALIDITY_OVERHEAD + 1, |
| 69 | + |
| 70 | + // Fixed-size binary: just the fixed size + validity (Arrow-specific) |
| 71 | + DataType::FixedSizeBinary(size) => VALIDITY_OVERHEAD + (*size as usize), |
| 72 | + |
| 73 | + // Fixed-size list: fixed count * element size (Arrow-specific) |
| 74 | + DataType::FixedSizeList(field, size) => { |
| 75 | + VALIDITY_OVERHEAD + (*size as usize) * default_bytes_for_datatype(field.data_type()) |
| 76 | + } |
| 77 | + |
| 78 | + // Struct: sum of all child field sizes (Arrow-specific) |
| 79 | + // Trino would treat ROW types as variable-width |
| 80 | + DataType::Struct(fields) => fields |
| 81 | + .iter() |
| 82 | + .map(|f| default_bytes_for_datatype(f.data_type())) |
| 83 | + .sum(), |
| 84 | + |
| 85 | + // Dictionary-encoded: just the key indices, values are shared across rows (Arrow-specific) |
| 86 | + // Trino doesn't have dictionary encoding at the type level |
| 87 | + DataType::Dictionary(key_type, _value_type) => default_bytes_for_datatype(key_type), |
| 88 | + |
| 89 | + // Union: type_id (1 byte) + max child size (Arrow-specific) |
| 90 | + DataType::Union(fields, _) => { |
| 91 | + let max_child_size = fields |
| 92 | + .iter() |
| 93 | + .map(|(_, f)| default_bytes_for_datatype(f.data_type())) |
| 94 | + .max() |
| 95 | + .unwrap_or(0); |
| 96 | + 1 + max_child_size |
| 97 | + } |
| 98 | + |
| 99 | + // Run-end encoded: estimate as if it were the value type (Arrow-specific) |
| 100 | + // Actual compression depends on data distribution |
| 101 | + DataType::RunEndEncoded(_, values) => default_bytes_for_datatype(values.data_type()), |
| 102 | + |
| 103 | + // Variable-width string/binary types. |
| 104 | + // Offset size follows Trino's Integer.BYTES (4 bytes). |
| 105 | + // Reference: PlanNodeStatsEstimate.java:109 |
| 106 | + DataType::Utf8 | DataType::Binary => { |
| 107 | + VALIDITY_OVERHEAD + size_of::<i32>() + DEFAULT_DATA_SIZE_PER_COLUMN |
| 108 | + } |
| 109 | + // Large variants use i64 offsets (Arrow-specific, Trino doesn't have large variants) |
| 110 | + DataType::LargeUtf8 | DataType::LargeBinary => { |
| 111 | + VALIDITY_OVERHEAD + size_of::<i64>() + DEFAULT_DATA_SIZE_PER_COLUMN |
| 112 | + } |
| 113 | + // View types use 16-byte inline representation (Arrow-specific) |
| 114 | + // Reference: https://arrow.apache.org/docs/format/Columnar.html#variable-size-binary-view-layout |
| 115 | + DataType::Utf8View | DataType::BinaryView => VALIDITY_OVERHEAD + 16, |
| 116 | + |
| 117 | + // List types (Arrow-specific adaptation) |
| 118 | + // Spark assumes 1 element average for collections (SPARK-18853). Trino treats them |
| 119 | + // as flat variable-width with 50-byte default. We follow Spark's 1-element assumption |
| 120 | + // to avoid massive overestimation (e.g. Map<Int,String> was 605 bytes with 10 elements). |
| 121 | + DataType::List(field) => { |
| 122 | + VALIDITY_OVERHEAD + size_of::<i32>() + default_bytes_for_datatype(field.data_type()) |
| 123 | + } |
| 124 | + DataType::LargeList(field) => { |
| 125 | + VALIDITY_OVERHEAD + size_of::<i64>() + default_bytes_for_datatype(field.data_type()) |
| 126 | + } |
| 127 | + DataType::ListView(field) | DataType::LargeListView(field) => { |
| 128 | + VALIDITY_OVERHEAD + 8 + default_bytes_for_datatype(field.data_type()) |
| 129 | + } |
| 130 | + |
| 131 | + // Map type: stored as List<Struct<key, value>> (Arrow-specific) |
| 132 | + // Uses same 1-element assumption as List types (following Spark). |
| 133 | + DataType::Map(field, _) => { |
| 134 | + VALIDITY_OVERHEAD + size_of::<i32>() + default_bytes_for_datatype(field.data_type()) |
| 135 | + } // Fallback for any other types - use Trino's default |
| 136 | + } |
| 137 | +} |
0 commit comments