|
32 | 32 | import java.util.Objects; |
33 | 33 | import java.util.Properties; |
34 | 34 | import java.util.Set; |
| 35 | +import java.util.TreeMap; |
35 | 36 | import java.util.stream.Collectors; |
36 | 37 |
|
37 | 38 |
|
38 | 39 | import org.apache.commons.collections4.CollectionUtils; |
39 | 40 | import org.apache.commons.lang3.StringUtils; |
| 41 | +import org.apache.commons.lang3.tuple.Pair; |
40 | 42 | import org.apache.hadoop.conf.Configuration; |
41 | 43 | import org.apache.hadoop.fs.FileStatus; |
42 | 44 | import org.apache.hadoop.fs.FileSystem; |
@@ -113,7 +115,10 @@ public class Table implements Serializable { |
113 | 115 | /** |
114 | 116 | * These fields are all cached fields. The information comes from tTable. |
115 | 117 | */ |
116 | | - private List<FieldSchema> cachedPartCols; |
| 118 | + private List<FieldSchema> tablePartCols; |
| 119 | + private List<FieldSchema> tableNonPartCols; |
| 120 | + private List<FieldSchema> tableAllCols; |
| 121 | + private Map<String, Pair<Integer, FieldSchema>> inputColumnIndexByName; |
117 | 122 | private transient Deserializer deserializer; |
118 | 123 | private Class<? extends OutputFormat> outputFormatClass; |
119 | 124 | private Class<? extends InputFormat> inputFormatClass; |
@@ -198,8 +203,8 @@ public Table makeCopy() { |
198 | 203 |
|
199 | 204 | newTab.setMetaTable(this.getMetaTable()); |
200 | 205 | newTab.setSnapshotRef(this.getSnapshotRef()); |
201 | | - if (this.cachedPartCols != null) { |
202 | | - newTab.cachedPartCols = new ArrayList<>(this.cachedPartCols); |
| 206 | + if (this.tablePartCols != null) { |
| 207 | + newTab.tablePartCols = new ArrayList<>(this.tablePartCols); |
203 | 208 | } |
204 | 209 | return newTab; |
205 | 210 | } |
@@ -616,15 +621,15 @@ private List<FieldSchema> getNativePartCols() { |
616 | 621 | * where partition columns are not stored in the metastore. |
617 | 622 | */ |
618 | 623 | public List<FieldSchema> getPartCols() { |
619 | | - if (cachedPartCols != null) { |
620 | | - return cachedPartCols; |
| 624 | + if (tablePartCols != null) { |
| 625 | + return tablePartCols; |
621 | 626 | } |
622 | 627 | if (isTableTypeSet() && hasNonNativePartitionSupport()) { |
623 | | - cachedPartCols = getStorageHandler().getPartitionKeys(this); |
| 628 | + tablePartCols = getStorageHandler().getPartitionKeys(this); |
624 | 629 | } else { |
625 | | - cachedPartCols = getNativePartCols(); |
| 630 | + tablePartCols = getNativePartCols(); |
626 | 631 | } |
627 | | - return cachedPartCols; |
| 632 | + return tablePartCols; |
628 | 633 | } |
629 | 634 |
|
630 | 635 | private boolean isTableTypeSet() { |
@@ -756,18 +761,48 @@ private boolean isField(String col) { |
756 | 761 | return false; |
757 | 762 | } |
758 | 763 |
|
759 | | - public List<FieldSchema> getCols() { |
| 764 | + private void fillColumnIndexByName() { |
| 765 | + inputColumnIndexByName = new HashMap<>(); |
| 766 | + List<FieldSchema> fsList = new ArrayList<>(getColsInternal(false)); |
760 | 767 | if (!isNonNative()) { |
761 | | - return getColsInternal(false); |
| 768 | + fsList.addAll(getNativePartCols()); |
| 769 | + } |
| 770 | + for (int i = 0; i < fsList.size(); i++) { |
| 771 | + inputColumnIndexByName.put(fsList.get(i).getName(), Pair.of(i, fsList.get(i))); |
| 772 | + } |
| 773 | + } |
| 774 | + |
| 775 | + public int getColumnIndexByName(String colName) { |
| 776 | + if (inputColumnIndexByName == null) { |
| 777 | + fillColumnIndexByName(); |
762 | 778 | } |
763 | | - List<FieldSchema> nonPartFields = new ArrayList<>(); |
764 | | - Set<String> partFieldsName = getPartCols().stream().map(FieldSchema::getName).collect(Collectors.toSet()); |
765 | | - for (FieldSchema field : getColsInternal(false)) { |
766 | | - if (!partFieldsName.contains(field.getName())) { |
767 | | - nonPartFields.add(field); |
| 779 | + return inputColumnIndexByName.get(colName.toLowerCase()).getLeft(); |
| 780 | + } |
| 781 | + |
| 782 | + public FieldSchema getFieldSchemaByName(String colName) { |
| 783 | + if (inputColumnIndexByName == null) { |
| 784 | + fillColumnIndexByName(); |
| 785 | + } |
| 786 | + return inputColumnIndexByName.get(colName).getRight(); |
| 787 | + } |
| 788 | + |
| 789 | + public List<FieldSchema> getCols() { |
| 790 | + if (tableNonPartCols != null) { |
| 791 | + return tableNonPartCols; |
| 792 | + } |
| 793 | + if (!isNonNative()) { |
| 794 | + tableNonPartCols = getColsInternal(false); |
| 795 | + } else { |
| 796 | + List<FieldSchema> nonPartFields = new ArrayList<>(); |
| 797 | + Set<String> partFieldsName = getPartCols().stream().map(FieldSchema::getName).collect(Collectors.toSet()); |
| 798 | + for (FieldSchema field : getColsInternal(false)) { |
| 799 | + if (!partFieldsName.contains(field.getName())) { |
| 800 | + nonPartFields.add(field); |
| 801 | + } |
768 | 802 | } |
| 803 | + tableNonPartCols = nonPartFields; |
769 | 804 | } |
770 | | - return nonPartFields; |
| 805 | + return tableNonPartCols; |
771 | 806 | } |
772 | 807 |
|
773 | 808 | public List<FieldSchema> getColsForMetastore() { |
@@ -800,9 +835,11 @@ private List<FieldSchema> getColsInternal(boolean forMs) { |
800 | 835 | * @return List<FieldSchema> |
801 | 836 | */ |
802 | 837 | public List<FieldSchema> getAllCols() { |
803 | | - ArrayList<FieldSchema> allCols = new ArrayList<>(getCols()); |
804 | | - allCols.addAll(getPartCols()); |
805 | | - return allCols; |
| 838 | + List<FieldSchema> fsList = new ArrayList<>(getColsInternal(false)); |
| 839 | + if (!isNonNative()) { |
| 840 | + fsList.addAll(getNativePartCols()); |
| 841 | + } |
| 842 | + return fsList; |
806 | 843 | } |
807 | 844 |
|
808 | 845 | public void setPartCols(List<FieldSchema> partCols) { |
|
0 commit comments