Revert "HBASE-24327 : Handle shutdown() if master cannot be contacted (#1684)" (#1691)

This reverts commit 34a74534fe.
This commit is contained in:
Viraj Jasani 2020-05-11 16:43:49 +05:30 committed by GitHub
parent ca81283fe5
commit 6729cafa1d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 13 additions and 46 deletions

View File

@ -21,10 +21,7 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
import java.io.IOException; import java.io.IOException;
import java.time.Duration;
import java.util.List; import java.util.List;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ClusterMetrics; import org.apache.hadoop.hbase.ClusterMetrics;
@ -34,8 +31,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.LocalHBaseCluster; import org.apache.hadoop.hbase.LocalHBaseCluster;
import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.StartMiniClusterOption; import org.apache.hadoop.hbase.StartMiniClusterOption;
import org.apache.hadoop.hbase.client.AsyncConnection; import org.apache.hadoop.hbase.Waiter;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread; import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread;
@ -133,7 +129,6 @@ public class TestMasterShutdown {
try { try {
htu = new HBaseTestingUtility( htu = new HBaseTestingUtility(
createMasterShutdownBeforeStartingAnyRegionServerConfiguration()); createMasterShutdownBeforeStartingAnyRegionServerConfiguration());
htu.getConfiguration().setInt("hbase.client.retries.number", 3);
// configure a cluster with // configure a cluster with
final StartMiniClusterOption options = StartMiniClusterOption.builder() final StartMiniClusterOption options = StartMiniClusterOption.builder()
@ -156,47 +151,19 @@ public class TestMasterShutdown {
hbaseCluster = new LocalHBaseCluster(htu.getConfiguration(), options.getNumMasters(), hbaseCluster = new LocalHBaseCluster(htu.getConfiguration(), options.getNumMasters(),
options.getNumRegionServers(), options.getMasterClass(), options.getRsClass()); options.getNumRegionServers(), options.getMasterClass(), options.getRsClass());
final MasterThread masterThread = hbaseCluster.getMasters().get(0); final MasterThread masterThread = hbaseCluster.getMasters().get(0);
masterThread.start(); masterThread.start();
final CompletableFuture<Void> shutdownFuture = CompletableFuture.runAsync(() -> { // Switching to master registry exacerbated a race in the master bootstrap that can result
// Switching to master registry exacerbated a race in the master bootstrap that can result // in a lost shutdown command (HBASE-8422, HBASE-23836). The race is essentially because
// in a lost shutdown command (HBASE-8422, HBASE-23836). The race is essentially because // the server manager in HMaster is not initialized by the time shutdown() RPC (below) is
// the server manager in HMaster is not initialized by the time shutdown() RPC (below) is // made to the master. The suspected reason as to why it was uncommon before HBASE-18095
// made to the master. The suspected reason as to why it was uncommon before HBASE-18095 // is because the connection creation with ZK registry is so slow that by then the server
// is because the connection creation with ZK registry is so slow that by then the server // manager is usually init'ed in time for the RPC to be made. For now, adding an explicit
// manager is usually init'ed in time for the RPC to be made. For now, adding an explicit // wait() in the test, waiting for the server manager to become available.
// wait() in the test, waiting for the server manager to become available. final long timeout = TimeUnit.MINUTES.toMillis(10);
final long timeout = TimeUnit.MINUTES.toMillis(10); assertNotEquals("Timeout waiting for server manager to become available.",
assertNotEquals("timeout waiting for server manager to become available.", -1, -1, Waiter.waitFor(htu.getConfiguration(), timeout,
htu.waitFor(timeout, () -> masterThread.getMaster().getServerManager() != null)); () -> masterThread.getMaster().getServerManager() != null));
htu.getConnection().getAdmin().shutdown();
// Master has come up far enough that we can terminate it without creating a zombie.
final long result = htu.waitFor(timeout, 1000, () -> {
final Configuration conf = createResponsiveZkConfig(htu.getConfiguration());
LOG.debug("Attempting to establish connection.");
final CompletableFuture<AsyncConnection> connFuture =
ConnectionFactory.createAsyncConnection(conf);
try (final AsyncConnection conn = connFuture.join()) {
LOG.info("Sending shutdown RPC.");
try {
conn.getAdmin().shutdown().join();
LOG.info("Shutdown RPC sent.");
return true;
} catch (CompletionException e) {
LOG.error("Failure sending shutdown RPC.");
}
} catch (IOException|CompletionException e) {
LOG.error("Failed to establish connection.");
} catch (Throwable e) {
LOG.error("Something unexpected happened.", e);
}
return false;
});
assertNotEquals("Failed to issue shutdown RPC after " + Duration.ofMillis(timeout),
-1, result);
});
shutdownFuture.join();
masterThread.join(); masterThread.join();
} finally { } finally {
if (hbaseCluster != null) { if (hbaseCluster != null) {