Skip to content

Commit cce3edb

Browse files
authored
HIVE-29642: Remove racy test-only counters from PartitionManagementTask (#6520)
* HIVE-29642: Remove racy test-only counters from PartitionManagementTask * HIVE-29642: splitting the too-long comment line to address SQ feedback
1 parent 45bdea4 commit cce3edb

2 files changed

Lines changed: 68 additions & 54 deletions

File tree

standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
import org.slf4j.Logger;
3838
import org.slf4j.LoggerFactory;
3939

40-
import com.google.common.annotations.VisibleForTesting;
4140
import com.google.common.util.concurrent.ThreadFactoryBuilder;
4241

4342
/**
@@ -57,9 +56,6 @@ public class PartitionManagementTask implements MetastoreTaskThread {
5756
public static final String DISCOVER_PARTITIONS_TBLPROPERTY = "discover.partitions";
5857
public static final String PARTITION_RETENTION_PERIOD_TBLPROPERTY = "partition.retention.period";
5958
private static final Lock lock = new ReentrantLock();
60-
// these are just for testing
61-
private static int completedAttempts;
62-
private static int skippedAttempts;
6359

6460
private Configuration conf;
6561

@@ -87,7 +83,6 @@ private static boolean partitionDiscoveryEnabled(Map<String, String> params) {
8783
@Override
8884
public void run() {
8985
if (lock.tryLock()) {
90-
skippedAttempts = 0;
9186
String qualifiedTableName = null;
9287
IMetaStoreClient msc = null;
9388
try {
@@ -136,10 +131,8 @@ public void run() {
136131
}
137132
lock.unlock();
138133
}
139-
completedAttempts++;
140134
} else {
141-
skippedAttempts++;
142-
LOG.info("Lock is held by some other partition discovery task. Skipping this attempt..#{}", skippedAttempts);
135+
LOG.info("Lock is held by some other partition discovery task. Skipping this attempt.");
143136
}
144137
}
145138

@@ -200,13 +193,4 @@ public void run() {
200193
}
201194
}
202195

203-
@VisibleForTesting
204-
public static int getSkippedAttempts() {
205-
return skippedAttempts;
206-
}
207-
208-
@VisibleForTesting
209-
public static int getCompletedAttempts() {
210-
return completedAttempts;
211-
}
212196
}

standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestPartitionManagement.java

Lines changed: 67 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@
5454
import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge;
5555
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
5656
import org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil;
57+
import org.apache.logging.log4j.Level;
58+
import org.apache.logging.log4j.LogManager;
59+
import org.apache.logging.log4j.core.LoggerContext;
60+
import org.apache.logging.log4j.core.config.LoggerConfig;
61+
import org.apache.logging.log4j.core.test.appender.ListAppender;
5762
import org.apache.thrift.TException;
5863
import org.junit.After;
5964
import org.junit.Assert;
@@ -473,7 +478,7 @@ public void testPartitionDiscoveryTablePattern() throws TException, IOException
473478
}
474479

475480
@Test
476-
public void testPartitionDiscoveryTransactionalTable()
481+
public void testPartitionDiscoveryTransactionalTableConcurrent()
477482
throws TException, IOException, InterruptedException, ExecutionException {
478483
String dbName = "db6";
479484
String tableName = "tbl6";
@@ -503,47 +508,72 @@ public void testPartitionDiscoveryTransactionalTable()
503508
TransactionalValidationListener.INSERTONLY_TRANSACTIONAL_PROPERTY);
504509
client.alter_table(dbName, tableName, table);
505510

506-
runPartitionManagementTask(conf);
507-
partitions = client.listPartitions(dbName, tableName, (short) -1);
508-
assertEquals(5, partitions.size());
509-
510-
// only one partition discovery task is running, there will be no skipped attempts
511-
assertEquals(0, PartitionManagementTask.getSkippedAttempts());
512-
513-
// delete a partition from fs, and submit 3 tasks at the same time each of them trying to acquire X lock on the
514-
// same table, only one of them will run other attempts will be skipped
515-
boolean deleted = fs.delete(newPart1.getParent(), true);
516-
assertTrue(deleted);
517-
assertEquals(4, fs.listStatus(tablePath).length);
518-
519-
// 3 tasks are submitted at the same time, only one will eventually lock the table and only one get to run at a time
520-
// This is to simulate, skipping partition discovery task attempt when previous attempt is still incomplete
521-
PartitionManagementTask partitionDiscoveryTask1 = new PartitionManagementTask();
522-
partitionDiscoveryTask1.setConf(conf);
523-
PartitionManagementTask partitionDiscoveryTask2 = new PartitionManagementTask();
524-
partitionDiscoveryTask2.setConf(conf);
525-
PartitionManagementTask partitionDiscoveryTask3 = new PartitionManagementTask();
526-
partitionDiscoveryTask3.setConf(conf);
527-
List<PartitionManagementTask> tasks = Lists
528-
.newArrayList(partitionDiscoveryTask1, partitionDiscoveryTask2, partitionDiscoveryTask3);
529-
ExecutorService executorService = Executors.newFixedThreadPool(3);
530-
int successBefore = PartitionManagementTask.getCompletedAttempts();
531-
int skippedBefore = PartitionManagementTask.getSkippedAttempts();
532-
List<Future<?>> futures = new ArrayList<>();
533-
for (PartitionManagementTask task : tasks) {
534-
futures.add(executorService.submit(task));
535-
}
536-
for (Future<?> future : futures) {
537-
future.get();
511+
final String appenderName = "testPartitionDiscoveryTransactionalTableConcurrentAppender";
512+
LoggerContext loggerContext = (LoggerContext) LogManager.getContext(false);
513+
LoggerConfig rootLoggerConfig = loggerContext.getConfiguration().getLoggerConfig("");
514+
ListAppender skipAppender = new ListAppender(appenderName);
515+
skipAppender.start();
516+
rootLoggerConfig.addAppender(skipAppender, Level.INFO, null);
517+
try {
518+
runPartitionManagementTask(conf);
519+
partitions = client.listPartitions(dbName, tableName, (short) -1);
520+
assertEquals(5, partitions.size());
521+
522+
// only one partition discovery task is running, there will be no skipped attempts
523+
assertEquals(0, countSkipMessages(skipAppender));
524+
assertEquals(1, countDiscoveryEntries(skipAppender));
525+
526+
// delete a partition from fs, and submit 3 tasks at the same time each of them trying to acquire X lock on the
527+
// same table, only one of them will run other attempts will be skipped
528+
boolean deleted = fs.delete(newPart1.getParent(), true);
529+
assertTrue(deleted);
530+
assertEquals(4, fs.listStatus(tablePath).length);
531+
532+
// 3 tasks are submitted at the same time, only one will eventually lock the table and only one
533+
// get to run at a time. This is to simulate, skipping partition discovery task attempt when
534+
// previous attempt is still incomplete
535+
PartitionManagementTask partitionDiscoveryTask1 = new PartitionManagementTask();
536+
partitionDiscoveryTask1.setConf(conf);
537+
PartitionManagementTask partitionDiscoveryTask2 = new PartitionManagementTask();
538+
partitionDiscoveryTask2.setConf(conf);
539+
PartitionManagementTask partitionDiscoveryTask3 = new PartitionManagementTask();
540+
partitionDiscoveryTask3.setConf(conf);
541+
List<PartitionManagementTask> tasks = Lists
542+
.newArrayList(partitionDiscoveryTask1, partitionDiscoveryTask2, partitionDiscoveryTask3);
543+
ExecutorService executorService = Executors.newFixedThreadPool(3);
544+
List<Future<?>> futures = new ArrayList<>();
545+
for (PartitionManagementTask task : tasks) {
546+
futures.add(executorService.submit(task));
547+
}
548+
for (Future<?> future : futures) {
549+
future.get();
550+
}
551+
long skips = countSkipMessages(skipAppender);
552+
long discoveries = countDiscoveryEntries(skipAppender);
553+
assertEquals(4, skips + discoveries);
554+
assertTrue("at least one more task should have entered the work path during the race", discoveries >= 2);
555+
} finally {
556+
rootLoggerConfig.removeAppender(appenderName);
557+
skipAppender.stop();
538558
}
539-
int successAfter = PartitionManagementTask.getCompletedAttempts();
540-
int skippedAfter = PartitionManagementTask.getSkippedAttempts();
541-
assertEquals(1, successAfter - successBefore);
542-
assertEquals(2, skippedAfter - skippedBefore);
543559
partitions = client.listPartitions(dbName, tableName, (short) -1);
544560
assertEquals(4, partitions.size());
545561
}
546562

563+
private static long countSkipMessages(ListAppender appender) {
564+
return appender.getEvents().stream()
565+
.map(e -> e.getMessage().getFormattedMessage())
566+
.filter(m -> m.equals("Lock is held by some other partition discovery task. Skipping this attempt."))
567+
.count();
568+
}
569+
570+
private static long countDiscoveryEntries(ListAppender appender) {
571+
return appender.getEvents().stream()
572+
.map(e -> e.getMessage().getFormattedMessage())
573+
.filter(m -> m.equals("Found 1 candidate tables for partition discovery"))
574+
.count();
575+
}
576+
547577
@Test
548578
public void testPartitionRetention() throws TException, IOException, InterruptedException {
549579
String dbName = "db7";

0 commit comments

Comments
 (0)