|
| 1 | +package com.databricks.jdbc.api.impl; |
| 2 | + |
| 3 | +import com.databricks.jdbc.exception.DatabricksSQLException; |
| 4 | +import com.databricks.jdbc.model.client.thrift.generated.TColumn; |
| 5 | +import com.databricks.jdbc.model.client.thrift.generated.TRowSet; |
| 6 | +import com.databricks.jdbc.model.telemetry.enums.DatabricksDriverErrorCode; |
| 7 | +import java.util.BitSet; |
| 8 | +import java.util.List; |
| 9 | + |
| 10 | +/** |
| 11 | + * Memory-efficient columnar view that provides row-based access without materializing all rows. |
| 12 | + * Instead of creating List<List<Object>>, this class provides direct access to columnar data on a |
| 13 | + * per-row, per-column basis, significantly reducing memory allocations. |
| 14 | + */ |
| 15 | +public class ColumnarRowView { |
| 16 | + private final List<TColumn> columns; |
| 17 | + private final int rowCount; |
| 18 | + private final ColumnAccessor[] columnAccessors; |
| 19 | + |
| 20 | + public ColumnarRowView(TRowSet rowSet) throws DatabricksSQLException { |
| 21 | + this.columns = rowSet != null ? rowSet.getColumns() : null; |
| 22 | + |
| 23 | + if (columns == null || columns.isEmpty()) { |
| 24 | + this.rowCount = 0; |
| 25 | + this.columnAccessors = new ColumnAccessor[0]; |
| 26 | + } else { |
| 27 | + this.rowCount = getRowCountFromFirstColumn(); |
| 28 | + this.columnAccessors = new ColumnAccessor[columns.size()]; |
| 29 | + for (int i = 0; i < columns.size(); i++) { |
| 30 | + this.columnAccessors[i] = createColumnAccessor(columns.get(i)); |
| 31 | + } |
| 32 | + } |
| 33 | + } |
| 34 | + |
| 35 | + /** Gets the number of rows in this view. */ |
| 36 | + public int getRowCount() { |
| 37 | + return rowCount; |
| 38 | + } |
| 39 | + |
| 40 | + /** Gets the number of columns in this view. */ |
| 41 | + public int getColumnCount() { |
| 42 | + return columns != null ? columns.size() : 0; |
| 43 | + } |
| 44 | + |
| 45 | + /** Gets the value at the specified row and column without materializing the entire row. */ |
| 46 | + public Object getValue(int rowIndex, int columnIndex) throws DatabricksSQLException { |
| 47 | + if (rowIndex < 0 || rowIndex >= rowCount) { |
| 48 | + throw new DatabricksSQLException( |
| 49 | + "Row index out of bounds: " + rowIndex, DatabricksDriverErrorCode.INVALID_STATE); |
| 50 | + } |
| 51 | + if (columnIndex < 0 || columnIndex >= columnAccessors.length) { |
| 52 | + throw new DatabricksSQLException( |
| 53 | + "Column index out of bounds: " + columnIndex, DatabricksDriverErrorCode.INVALID_STATE); |
| 54 | + } |
| 55 | + |
| 56 | + return columnAccessors[columnIndex].getValue(rowIndex); |
| 57 | + } |
| 58 | + |
| 59 | + /** |
| 60 | + * Creates a materialized row only when explicitly requested (for backward compatibility). This |
| 61 | + * should be avoided in performance-critical paths. |
| 62 | + */ |
| 63 | + public Object[] materializeRow(int rowIndex) throws DatabricksSQLException { |
| 64 | + if (rowIndex < 0 || rowIndex >= rowCount) { |
| 65 | + throw new DatabricksSQLException( |
| 66 | + "Row index out of bounds: " + rowIndex, DatabricksDriverErrorCode.INVALID_STATE); |
| 67 | + } |
| 68 | + |
| 69 | + Object[] row = new Object[columnAccessors.length]; |
| 70 | + for (int col = 0; col < columnAccessors.length; col++) { |
| 71 | + row[col] = columnAccessors[col].getValue(rowIndex); |
| 72 | + } |
| 73 | + return row; |
| 74 | + } |
| 75 | + |
| 76 | + private int getRowCountFromFirstColumn() throws DatabricksSQLException { |
| 77 | + if (columns.isEmpty()) { |
| 78 | + return 0; |
| 79 | + } |
| 80 | + TColumn firstColumn = columns.get(0); |
| 81 | + return getColumnSize(firstColumn); |
| 82 | + } |
| 83 | + |
| 84 | + private static int getColumnSize(TColumn column) throws DatabricksSQLException { |
| 85 | + if (column.isSetBinaryVal()) return column.getBinaryVal().getValuesSize(); |
| 86 | + if (column.isSetBoolVal()) return column.getBoolVal().getValuesSize(); |
| 87 | + if (column.isSetByteVal()) return column.getByteVal().getValuesSize(); |
| 88 | + if (column.isSetDoubleVal()) return column.getDoubleVal().getValuesSize(); |
| 89 | + if (column.isSetI16Val()) return column.getI16Val().getValuesSize(); |
| 90 | + if (column.isSetI32Val()) return column.getI32Val().getValuesSize(); |
| 91 | + if (column.isSetI64Val()) return column.getI64Val().getValuesSize(); |
| 92 | + if (column.isSetStringVal()) return column.getStringVal().getValuesSize(); |
| 93 | + |
| 94 | + throw new DatabricksSQLException( |
| 95 | + "Unsupported column type: " + column, DatabricksDriverErrorCode.UNSUPPORTED_OPERATION); |
| 96 | + } |
| 97 | + |
| 98 | + private static ColumnAccessor createColumnAccessor(TColumn column) throws DatabricksSQLException { |
| 99 | + if (column.isSetBinaryVal()) { |
| 100 | + return new TypedColumnAccessor<>( |
| 101 | + column.getBinaryVal().getValues(), column.getBinaryVal().getNulls()); |
| 102 | + } |
| 103 | + if (column.isSetBoolVal()) { |
| 104 | + return new TypedColumnAccessor<>( |
| 105 | + column.getBoolVal().getValues(), column.getBoolVal().getNulls()); |
| 106 | + } |
| 107 | + if (column.isSetByteVal()) { |
| 108 | + return new TypedColumnAccessor<>( |
| 109 | + column.getByteVal().getValues(), column.getByteVal().getNulls()); |
| 110 | + } |
| 111 | + if (column.isSetDoubleVal()) { |
| 112 | + return new TypedColumnAccessor<>( |
| 113 | + column.getDoubleVal().getValues(), column.getDoubleVal().getNulls()); |
| 114 | + } |
| 115 | + if (column.isSetI16Val()) { |
| 116 | + return new TypedColumnAccessor<>( |
| 117 | + column.getI16Val().getValues(), column.getI16Val().getNulls()); |
| 118 | + } |
| 119 | + if (column.isSetI32Val()) { |
| 120 | + return new TypedColumnAccessor<>( |
| 121 | + column.getI32Val().getValues(), column.getI32Val().getNulls()); |
| 122 | + } |
| 123 | + if (column.isSetI64Val()) { |
| 124 | + return new TypedColumnAccessor<>( |
| 125 | + column.getI64Val().getValues(), column.getI64Val().getNulls()); |
| 126 | + } |
| 127 | + if (column.isSetStringVal()) { |
| 128 | + return new TypedColumnAccessor<>( |
| 129 | + column.getStringVal().getValues(), column.getStringVal().getNulls()); |
| 130 | + } |
| 131 | + |
| 132 | + throw new DatabricksSQLException( |
| 133 | + "Unsupported column type: " + column, DatabricksDriverErrorCode.UNSUPPORTED_OPERATION); |
| 134 | + } |
| 135 | + |
| 136 | + /** Interface for accessing column values by index without materializing the entire column. */ |
| 137 | + private interface ColumnAccessor { |
| 138 | + Object getValue(int rowIndex); |
| 139 | + } |
| 140 | + |
| 141 | + /** Memory-efficient column accessor that handles nulls and provides direct index-based access. */ |
| 142 | + private static class TypedColumnAccessor<T> implements ColumnAccessor { |
| 143 | + private final List<T> values; |
| 144 | + private final BitSet nullBits; |
| 145 | + |
| 146 | + public TypedColumnAccessor(List<T> values, byte[] nulls) { |
| 147 | + this.values = values; |
| 148 | + this.nullBits = nulls != null ? BitSet.valueOf(nulls) : null; |
| 149 | + } |
| 150 | + |
| 151 | + @Override |
| 152 | + public Object getValue(int rowIndex) { |
| 153 | + if (nullBits != null && nullBits.get(rowIndex)) { |
| 154 | + return null; |
| 155 | + } |
| 156 | + return values.get(rowIndex); |
| 157 | + } |
| 158 | + } |
| 159 | +} |
0 commit comments