Skip to content

Commit dae91dc

Browse files
committed
commit
# Conflicts: # ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
1 parent f64adc6 commit dae91dc

8 files changed

Lines changed: 138 additions & 107 deletions

File tree

iceberg/iceberg-handler/src/test/queries/positive/iceberg_insert_overwrite_partition_transforms.q

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,38 +19,13 @@
1919
--! qt:replace:/(\S\"iceberg-version\\\":\\\")(\w+\s\w+\s\d+\.\d+\.\d+\s\(\w+\s\w+\))(\\\")/$1#Masked#$3/
2020

2121
set hive.explain.user=false;
22-
create external table ice_parquet_date_transform_year(
23-
bigintcol bigint,
24-
intcol integer,
25-
pcol date
26-
) partitioned by spec (year(pcol))
27-
stored by iceberg;
28-
29-
explain insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-05') values (1234567890123345, 2), (23456789012345678, 4);
30-
insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-05') values (1234567890123345, 2), (23456789012345678, 4);
31-
explain insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-26') values (1234567890123345, 3), (23456789012345678, 5);
32-
insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-26') values (1234567890123345, 3), (23456789012345678, 5);
33-
explain insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-12') values (3456789012345678, 4), (34567890123456789, 6);
34-
insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-12') values (3456789012345678, 4), (34567890123456789, 6);
35-
36-
select * from ice_parquet_date_transform_year;
37-
38-
explain insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-13') select bigintcol, intcol from ice_parquet_date_transform_year;
39-
insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-13') select bigintcol, intcol from ice_parquet_date_transform_year;
40-
explain insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-02') select 234675894076895090, intcol from ice_parquet_date_transform_year;
41-
insert overwrite table ice_parquet_date_transform_year partition (pcol = '1999-12-02') select 234675894076895090, intcol from ice_parquet_date_transform_year;
42-
43-
describe formatted ice_parquet_date_transform_year;
44-
select * from ice_parquet_date_transform_year;
45-
4622
create external table ice_parquet_date_transform_month(
4723
bigintcol bigint,
4824
pcol date,
4925
intcol integer
5026
) partitioned by spec (month(pcol))
5127
stored by iceberg;
5228

53-
explain insert overwrite table ice_parquet_date_transform_month partition (pcol = '1999-12-31') values (1234567890123345, 2), (23456789012345678, 4);
5429
insert overwrite table ice_parquet_date_transform_month partition (pcol = '1999-12-31') values (1234567890123345, 2), (23456789012345678, 4);
5530
explain insert overwrite table ice_parquet_date_transform_month partition (pcol = '1999-12-26') values (1234567890123345, 3), (23456789012345678, 5);
5631
insert overwrite table ice_parquet_date_transform_month partition (pcol = '1999-12-26') values (1234567890123345, 3), (23456789012345678, 5);

iceberg/iceberg-handler/src/test/results/positive/describe_iceberg_table.q.out

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,6 @@ POSTHOOK: query: DESCRIBE FORMATTED ice_t_transform
109109
POSTHOOK: type: DESCTABLE
110110
POSTHOOK: Input: default@ice_t_transform
111111
# col_name data_type comment
112-
year_field date
113-
month_field date
114-
day_field date
115-
hour_field timestamp
116-
truncate_field string
117-
bucket_field int
118-
identity_field int
119112

120113
# Partition Information
121114
# col_name data_type comment
@@ -182,13 +175,6 @@ POSTHOOK: type: DESCTABLE
182175
POSTHOOK: Input: default@ice_t_transform_prop
183176
# col_name data_type comment
184177
id int
185-
year_field date
186-
month_field date
187-
day_field date
188-
hour_field timestamp
189-
truncate_field string
190-
bucket_field int
191-
identity_field int
192178

193179
# Partition Information
194180
# col_name data_type comment
@@ -255,7 +241,6 @@ POSTHOOK: type: DESCTABLE
255241
POSTHOOK: Input: default@ice_t_identity_part
256242
# col_name data_type comment
257243
a int
258-
b string
259244

260245
# Partition Information
261246
# col_name data_type comment

ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java

Lines changed: 64 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -21,22 +21,13 @@
2121
import java.io.IOException;
2222
import java.io.Serial;
2323
import java.io.Serializable;
24-
import java.util.ArrayList;
25-
import java.util.Arrays;
26-
import java.util.Collections;
27-
import java.util.HashMap;
28-
import java.util.HashSet;
29-
import java.util.LinkedHashMap;
30-
import java.util.List;
31-
import java.util.Map;
32-
import java.util.Objects;
33-
import java.util.Properties;
34-
import java.util.Set;
24+
import java.util.*;
3525
import java.util.stream.Collectors;
3626

3727

3828
import org.apache.commons.collections4.CollectionUtils;
3929
import org.apache.commons.lang3.StringUtils;
30+
import org.apache.commons.lang3.tuple.Pair;
4031
import org.apache.hadoop.conf.Configuration;
4132
import org.apache.hadoop.fs.FileStatus;
4233
import org.apache.hadoop.fs.FileSystem;
@@ -113,7 +104,10 @@ public class Table implements Serializable {
113104
/**
114105
* These fields are all cached fields. The information comes from tTable.
115106
*/
116-
private List<FieldSchema> cachedPartCols;
107+
private List<FieldSchema> tablePartCols;
108+
private List<FieldSchema> tableNonPartCols;
109+
private List<FieldSchema> tableAllCols;
110+
private Map<String, Pair<Integer, FieldSchema>> inputColumnIndexByName;
117111
private transient Deserializer deserializer;
118112
private Class<? extends OutputFormat> outputFormatClass;
119113
private Class<? extends InputFormat> inputFormatClass;
@@ -198,8 +192,8 @@ public Table makeCopy() {
198192

199193
newTab.setMetaTable(this.getMetaTable());
200194
newTab.setSnapshotRef(this.getSnapshotRef());
201-
if (this.cachedPartCols != null) {
202-
newTab.cachedPartCols = new ArrayList<>(this.cachedPartCols);
195+
if (this.tablePartCols != null) {
196+
newTab.tablePartCols = new ArrayList<>(this.tablePartCols);
203197
}
204198
return newTab;
205199
}
@@ -616,15 +610,15 @@ private List<FieldSchema> getNativePartCols() {
616610
* where partition columns are not stored in the metastore.
617611
*/
618612
public List<FieldSchema> getPartCols() {
619-
if (cachedPartCols != null) {
620-
return cachedPartCols;
613+
if (tablePartCols != null) {
614+
return tablePartCols;
621615
}
622616
if (isTableTypeSet() && hasNonNativePartitionSupport()) {
623-
cachedPartCols = getStorageHandler().getPartitionKeys(this);
617+
tablePartCols = getStorageHandler().getPartitionKeys(this);
624618
} else {
625-
cachedPartCols = getNativePartCols();
619+
tablePartCols = getNativePartCols();
626620
}
627-
return cachedPartCols;
621+
return tablePartCols;
628622
}
629623

630624
private boolean isTableTypeSet() {
@@ -756,18 +750,49 @@ private boolean isField(String col) {
756750
return false;
757751
}
758752

759-
public List<FieldSchema> getCols() {
753+
private void fillColumnIndexByName() {
754+
inputColumnIndexByName = new HashMap<>();
755+
List<FieldSchema> fsList = new ArrayList<>(getColsInternal(false));
760756
if (!isNonNative()) {
761-
return getColsInternal(false);
757+
fsList.addAll(getNativePartCols());
758+
}
759+
for (int i = 0; i < fsList.size(); i++) {
760+
inputColumnIndexByName.put(fsList.get(i).getName(), Pair.of(i, fsList.get(i)));
761+
}
762+
}
763+
764+
public int getColumnIndexByName(String colName) {
765+
if (inputColumnIndexByName == null) {
766+
fillColumnIndexByName();
767+
}
768+
return inputColumnIndexByName.get(colName.toLowerCase()).getLeft();
769+
}
770+
771+
public FieldSchema getFieldSchemaByName(String colName) {
772+
if (inputColumnIndexByName == null) {
773+
fillColumnIndexByName();
774+
}
775+
return inputColumnIndexByName.get(colName).getRight();
776+
}
777+
778+
public List<FieldSchema> getCols() {
779+
if (tableNonPartCols != null) {
780+
return tableNonPartCols;
762781
}
763-
List<FieldSchema> nonPartFields = new ArrayList<>();
764-
Set<String> partFieldsName = getPartCols().stream().map(FieldSchema::getName).collect(Collectors.toSet());
765-
for (FieldSchema field : getColsInternal(false)) {
766-
if (!partFieldsName.contains(field.getName())) {
767-
nonPartFields.add(field);
782+
tableNonPartCols = new ArrayList<>();
783+
if (!isNonNative()) {
784+
tableNonPartCols.addAll(getColsInternal(false));
785+
} else {
786+
List<FieldSchema> nonPartFields = new ArrayList<>();
787+
Set<String> partFieldsName = getPartCols().stream().map(FieldSchema::getName).collect(Collectors.toSet());
788+
for (FieldSchema field : getColsInternal(false)) {
789+
if (!partFieldsName.contains(field.getName())) {
790+
nonPartFields.add(field);
791+
}
768792
}
793+
tableNonPartCols = nonPartFields;
769794
}
770-
return nonPartFields;
795+
return tableNonPartCols;
771796
}
772797

773798
public List<FieldSchema> getColsForMetastore() {
@@ -800,9 +825,18 @@ private List<FieldSchema> getColsInternal(boolean forMs) {
800825
* @return List&lt;FieldSchema&gt;
801826
*/
802827
public List<FieldSchema> getAllCols() {
803-
ArrayList<FieldSchema> allCols = new ArrayList<>(getCols());
804-
allCols.addAll(getPartCols());
805-
return allCols;
828+
if (tableAllCols != null) {
829+
return tableAllCols;
830+
}
831+
if (inputColumnIndexByName == null) {
832+
fillColumnIndexByName();
833+
}
834+
TreeMap<Integer, FieldSchema> orderedMap = new TreeMap<>();
835+
for (Map.Entry<String, Pair<Integer, FieldSchema>> e : inputColumnIndexByName.entrySet()) {
836+
orderedMap.put(e.getValue().getLeft(), e.getValue().getRight());
837+
}
838+
tableAllCols = orderedMap.values().stream().toList();
839+
return tableAllCols;
806840
}
807841

808842
public void setPartCols(List<FieldSchema> partCols) {

ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import com.google.common.collect.Iterables;
2727
import com.google.common.collect.Lists;
2828
import com.google.common.collect.Multimap;
29+
import com.google.common.collect.Sets;
2930

3031
import java.util.Map.Entry;
3132
import java.util.Optional;
@@ -3013,14 +3014,16 @@ private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticExc
30133014
ArrayList<ColumnInfo> partitionColumns = new ArrayList<ColumnInfo>();
30143015

30153016
// 3.2 Add column info corresponding to partition columns
3016-
for (FieldSchema part_col : tabMetaData.getPartCols()) {
3017-
colName = part_col.getName();
3018-
colInfo = new ColumnInfo(colName,
3019-
TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()),
3020-
isNullable(colName, nnc, pkc), tableAlias, true);
3021-
rr.put(tableAlias, colName, colInfo);
3022-
cInfoLst.add(colInfo);
3023-
partitionColumns.add(colInfo);
3017+
if (!tabMetaData.hasNonNativePartitionSupport()) {
3018+
for (FieldSchema part_col : tabMetaData.getPartCols()) {
3019+
colName = part_col.getName();
3020+
colInfo = new ColumnInfo(colName,
3021+
TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()),
3022+
isNullable(colName, nnc, pkc), tableAlias, true);
3023+
rr.put(tableAlias, colName, colInfo);
3024+
cInfoLst.add(colInfo);
3025+
partitionColumns.add(colInfo);
3026+
}
30243027
}
30253028

30263029
final TableType tableType = obtainTableType(tabMetaData);

ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsAutoGatherContext.java

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -236,24 +236,19 @@ private Operator genSelOp(String command, boolean rewritten, Context origCtx)
236236
/**
237237
* @param operator : the select operator in the analyze statement
238238
* @param input : the operator right before FS in the insert overwrite statement
239-
* @throws HiveException
239+
* @throws HiveException
240240
*/
241241
private void replaceSelectOperatorProcess(SelectOperator operator, Operator<? extends OperatorDesc> input)
242242
throws HiveException {
243243
RowSchema selRS = operator.getSchema();
244244
List<ColumnInfo> signature = new ArrayList<>();
245245
OpParseContext inputCtx = sa.opParseCtx.get(input);
246246
RowResolver inputRR = inputCtx.getRowResolver();
247-
List<ColumnInfo> columns = inputRR.getColumnInfos();
247+
List<ColumnInfo> inputColumns = inputRR.getColumnInfos();
248248
List<ExprNodeDesc> colList = new ArrayList<ExprNodeDesc>();
249249
List<String> columnNames = new ArrayList<String>();
250250
Map<String, ExprNodeDesc> columnExprMap =
251251
new HashMap<String, ExprNodeDesc>();
252-
// the column positions in the operator should be like this
253-
// <----non-partition columns---->|<--static partition columns-->|<--dynamic partition columns-->
254-
// ExprNodeColumnDesc | ExprNodeConstantDesc | ExprNodeColumnDesc
255-
// from input | generate itself | from input
256-
// |
257252

258253
// 1. deal with non-partition columns
259254
Map<String, Integer> columnNameToIndex = new HashMap<>();
@@ -262,7 +257,12 @@ private void replaceSelectOperatorProcess(SelectOperator operator, Operator<? ex
262257
columnNameToIndex.putIfAbsent(selRSSig.get(i).getAlias(), i);
263258
}
264259
for (int i = 0; i < this.columns.size(); i++) {
265-
ColumnInfo col = columns.get(i);
260+
FieldSchema columnSchema = this.columns.get(i);
261+
Integer inputIdx = tbl.getColumnIndexByName(columnSchema.getName().toLowerCase());
262+
if (inputIdx == null || inputIdx >= inputColumns.size()) {
263+
continue;
264+
}
265+
ColumnInfo col = inputColumns.get(inputIdx);
266266
Integer selRSIdx = getSelRSColumnIndex(i, col, columnNameToIndex);
267267
if (selRSIdx == null) {
268268
continue;
@@ -294,7 +294,12 @@ private void replaceSelectOperatorProcess(SelectOperator operator, Operator<? ex
294294
// 3. dynamic partition columns
295295
else {
296296
dynamicPartBegin++;
297-
ColumnInfo col = columns.get(this.columns.size() + dynamicPartBegin);
297+
Integer inputIdx = tbl.getColumnIndexByName(partColName.toLowerCase());
298+
if (inputIdx == null || inputIdx >= inputColumns.size()) {
299+
throw new SemanticException("Unable to resolve dynamic partition column '" + partColName
300+
+ "' from input columns " + inputRR.getColumnInfos());
301+
}
302+
ColumnInfo col = inputColumns.get(inputIdx);
298303
exprNodeDesc = new ExprNodeColumnDesc(col);
299304
srcType = col.getType();
300305

ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ private void handlePartialPartitionSpec(Map<String, String> partSpec, ColumnStat
159159
}
160160

161161
// User might have only specified partial list of partition keys, in which case add other partition keys in partSpec
162-
List<String> partKeys = Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys());
162+
List<String> partKeys = Utilities.getColumnNamesFromFieldSchema(tbl.getPartCols());
163163
for (String partKey : partKeys) {
164164
if (!partSpec.containsKey(partKey)) {
165165
partSpec.put(partKey, null);
@@ -174,14 +174,14 @@ private void handlePartialPartitionSpec(Map<String, String> partSpec, ColumnStat
174174
}
175175
}
176176

177-
private static CharSequence genPartitionClause(Table tbl, List<TransformSpec> partTransformSpec, int specId,
177+
private static CharSequence genPartitionClause(Table tbl, List<TransformSpec> partTransformSpec, int specId,
178178
Map<String, String> partSpec, HiveConf conf) {
179179
boolean predPresent = partSpec.values().stream().anyMatch(Objects::nonNull);
180-
180+
181181
StringBuilder whereClause = new StringBuilder(" where ").append(
182182
partSpec.entrySet().stream()
183183
.filter(part -> part.getValue() != null)
184-
.map(part -> unparseIdentifier(part.getKey(), conf) + " = "
184+
.map(part -> unparseIdentifier(part.getKey(), conf) + " = "
185185
+ genPartValueString(getColTypeOf(tbl, part.getKey()), part.getValue()))
186186
.collect(Collectors.joining(" and "))
187187
);
@@ -271,7 +271,7 @@ private String genRewrittenQuery(FieldSchemas columnSchemas, HiveConf conf,
271271
* included in the input table.
272272
*/
273273
protected static String genRewrittenQuery(Table tbl,
274-
HiveConf conf, List<TransformSpec> partTransformSpec, Map<String, String> partSpec,
274+
HiveConf conf, List<TransformSpec> partTransformSpec, Map<String, String> partSpec,
275275
boolean isPartitionStats) {
276276
return ColumnStatsSemanticAnalyzer.genRewrittenQuery(tbl, getStatsEligibleFieldSchemas(tbl), conf,
277277
partTransformSpec, -1, partSpec, isPartitionStats, true);
@@ -296,7 +296,7 @@ private static String genRewrittenQuery(Table tbl, FieldSchemas columnSchemas,
296296
final TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(columnSchema.getType());
297297

298298
try {
299-
genComputeStats(rewrittenQueryBuilder, conf, i, columnName, typeInfo);
299+
genComputeStats(rewrittenQueryBuilder, conf, tbl.getColumnIndexByName(columnSchema.getName()), columnName, typeInfo);
300300
} catch (SemanticException e) {
301301
throw new RuntimeException(e);
302302
}
@@ -308,17 +308,26 @@ private static String genRewrittenQuery(Table tbl, FieldSchemas columnSchemas,
308308
}
309309

310310
if (isPartitionStats) {
311-
if (partTransformSpec == null) {
312-
for (FieldSchema fs : tbl.getPartCols()) {
313-
String identifier = unparseIdentifier(fs.getName(), conf);
314-
rewrittenQueryBuilder.append(", ").append(identifier);
315-
columnNamesBuilder.append(", ").append(identifier);
316-
317-
columnDummyValuesBuilder.append(", cast(null as ")
318-
.append(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()).toString())
319-
.append(")");
311+
for (FieldSchema fs : tbl.getPartCols()) {
312+
String identifier = unparseIdentifier(fs.getName(), conf);
313+
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(fs.getType().toString());
314+
rewrittenQueryBuilder.append(", ");
315+
if (tbl.hasNonNativePartitionSupport()) {
316+
try {
317+
genComputeStats(rewrittenQueryBuilder, conf, tbl.getColumnIndexByName(fs.getName()), identifier, typeInfo);
318+
} catch (SemanticException e) {
319+
throw new RuntimeException(e);
320+
}
321+
} else {
322+
rewrittenQueryBuilder.append(identifier);
320323
}
321-
} else {
324+
columnNamesBuilder.append(", ").append(identifier);
325+
326+
columnDummyValuesBuilder.append(", cast(null as ")
327+
.append(typeInfo)
328+
.append(")");
329+
}
330+
if (partTransformSpec != null) {
322331
rewrittenQueryBuilder.append(", ")
323332
.append(TransformSpec.toNamedStruct(partTransformSpec, conf));
324333
}

0 commit comments

Comments
 (0)