|
18 | 18 |
|
19 | 19 | package org.apache.hadoop.hive.ql.stats; |
20 | 20 |
|
| 21 | +import static org.junit.Assert.assertFalse; |
| 22 | +import static org.junit.Assert.assertTrue; |
21 | 23 | import static org.junit.jupiter.api.Assertions.assertEquals; |
22 | 24 | import static org.junit.jupiter.api.Assertions.assertNotEquals; |
23 | 25 | import static org.junit.jupiter.api.Assertions.assertNotNull; |
|
41 | 43 | import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; |
42 | 44 | import org.apache.hadoop.hive.metastore.api.Timestamp; |
43 | 45 | import org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData; |
| 46 | +import org.apache.hadoop.hive.ql.exec.ColumnInfo; |
44 | 47 | import org.apache.hadoop.hive.ql.plan.ColStatistics; |
45 | 48 | import org.apache.hadoop.hive.ql.plan.ColStatistics.Range; |
46 | 49 | import org.apache.hadoop.hive.ql.plan.Statistics; |
47 | 50 | import org.apache.hadoop.hive.serde.serdeConstants; |
| 51 | +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; |
48 | 52 | import org.junit.jupiter.api.Test; |
49 | 53 | import org.junit.jupiter.params.ParameterizedTest; |
50 | 54 | import org.junit.jupiter.params.provider.Arguments; |
@@ -565,4 +569,67 @@ void testGetColStatisticsTimestampType() { |
565 | 569 | assertEquals(1700000000L, range.maxValue.longValue(), "maxValue mismatch for TIMESTAMP"); |
566 | 570 | } |
567 | 571 |
|
| 572 | + @Test |
| 573 | + void testEstimateStatsForMissingColsHandlesEmptyList() { |
| 574 | + HiveConf conf = new HiveConf(); |
| 575 | + |
| 576 | + ColumnInfo columnInfoA = new ColumnInfo("a", TypeInfoFactory.intTypeInfo, "t", false); |
| 577 | + |
| 578 | + List<ColStatistics> allColumnStats = StatsUtils.estimateStatsForMissingCols( |
| 579 | + List.of("a"), Collections.emptyList(), conf, 0, List.of(columnInfoA)); |
| 580 | + |
| 581 | + assertEquals(1, allColumnStats.size()); |
| 582 | + } |
| 583 | + |
| 584 | + @Test |
| 585 | + void testEstimateStatsForMissingColsCombinesExistingStatsAndEstimations() { |
| 586 | + HiveConf conf = new HiveConf(); |
| 587 | + |
| 588 | + ColumnInfo colNeededButNotExists = new ColumnInfo("neededButNotExists", TypeInfoFactory.intTypeInfo, "t", false); |
| 589 | + ColumnInfo colNeededAndExists = new ColumnInfo("neededAndExists", TypeInfoFactory.intTypeInfo, "t", false); |
| 590 | + ColumnInfo colNotNeededButExists = new ColumnInfo("notNeededButExists", TypeInfoFactory.intTypeInfo, "t", false); |
| 591 | + ColumnInfo colNotNeededNotExists = new ColumnInfo("notNeededNotExists", TypeInfoFactory.intTypeInfo, "t", false); |
| 592 | + |
| 593 | + ColStatistics colStatNeededAndExists = new ColStatistics(); |
| 594 | + colStatNeededAndExists.setColumnName(colNeededAndExists.getInternalName()); |
| 595 | + ColStatistics colStatNotNeededButExists = new ColStatistics(); |
| 596 | + colStatNotNeededButExists.setColumnName(colNotNeededButExists.getInternalName()); |
| 597 | + |
| 598 | + List<ColStatistics> allColumnStats = StatsUtils.estimateStatsForMissingCols( |
| 599 | + List.of(colNeededAndExists.getInternalName(), colNeededButNotExists.getInternalName()), |
| 600 | + List.of(colStatNeededAndExists, colStatNotNeededButExists), |
| 601 | + conf, |
| 602 | + 0, |
| 603 | + List.of(colNeededButNotExists, colNeededAndExists, colNotNeededButExists, colNotNeededNotExists)); |
| 604 | + |
| 605 | + assertEquals(3, allColumnStats.size()); |
| 606 | + assertEquals(colStatNeededAndExists, allColumnStats.get(0)); |
| 607 | + assertFalse(allColumnStats.get(0).isEstimated()); |
| 608 | + assertEquals(colStatNotNeededButExists, allColumnStats.get(1)); |
| 609 | + assertFalse(allColumnStats.get(1).isEstimated()); |
| 610 | + assertEquals(colNeededButNotExists.getInternalName(), allColumnStats.get(2).getColumnName()); |
| 611 | + assertTrue(allColumnStats.get(2).isEstimated()); |
| 612 | + } |
| 613 | + |
| 614 | + @Test |
| 615 | + void testEstimateStatsForMissingColsReturnOnlyColumnsWithExistingStatsWhenNoNeededColumn() { |
| 616 | + HiveConf conf = new HiveConf(); |
| 617 | + |
| 618 | + ColumnInfo colNotNeededButExists = new ColumnInfo("notNeededButExists", TypeInfoFactory.intTypeInfo, "t", false); |
| 619 | + ColumnInfo colNotNeededNotExists = new ColumnInfo("notNeededNotExists", TypeInfoFactory.intTypeInfo, "t", false); |
| 620 | + |
| 621 | + ColStatistics colStatNotNeededButExists = new ColStatistics(); |
| 622 | + colStatNotNeededButExists.setColumnName(colNotNeededButExists.getInternalName()); |
| 623 | + |
| 624 | + List<ColStatistics> allColumnStats = StatsUtils.estimateStatsForMissingCols( |
| 625 | + Collections.emptyList(), |
| 626 | + List.of(colStatNotNeededButExists), |
| 627 | + conf, |
| 628 | + 0, |
| 629 | + List.of(colNotNeededButExists, colNotNeededNotExists)); |
| 630 | + |
| 631 | + assertEquals(1, allColumnStats.size()); |
| 632 | + assertEquals(allColumnStats.getFirst(), colStatNotNeededButExists); |
| 633 | + } |
| 634 | + |
568 | 635 | } |
0 commit comments