HBASE-20144 The shutdown of master will hang if there are no live region server

This commit is contained in:
zhangduo 2018-03-07 20:32:35 +08:00
parent 8e0674a2eb
commit a03d09abd7
3 changed files with 75 additions and 29 deletions

View File

@ -957,6 +957,10 @@ public class ServerManager {
String statusStr = "Cluster shutdown requested of master=" + this.master.getServerName(); String statusStr = "Cluster shutdown requested of master=" + this.master.getServerName();
LOG.info(statusStr); LOG.info(statusStr);
this.clusterShutdown.set(true); this.clusterShutdown.set(true);
if (onlineServers.isEmpty()) {
// we do not synchronize here so this may cause a double stop, but not a big deal
master.stop("OnlineServer=0 right after cluster shutdown set");
}
} }
boolean isClusterShutdown() { boolean isClusterShutdown() {

View File

@ -24,20 +24,21 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CompatibilityFactory; import org.apache.hadoop.hbase.CompatibilityFactory;
import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.test.MetricsAssertHelper; import org.apache.hadoop.hbase.test.MetricsAssertHelper;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.ClassRule; import org.junit.ClassRule;
import org.junit.Ignore;
import org.junit.Rule; import org.junit.Rule;
import org.junit.Test; import org.junit.Test;
import org.junit.experimental.categories.Category; import org.junit.experimental.categories.Category;
@ -45,10 +46,7 @@ import org.junit.rules.TestName;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static org.junit.Assert.fail; @Category({ MasterTests.class, MediumTests.class })
@Ignore // Disabled temporarily; reenable
@Category(MediumTests.class)
public class TestAssignmentManagerMetrics { public class TestAssignmentManagerMetrics {
@ClassRule @ClassRule
@ -61,7 +59,7 @@ public class TestAssignmentManagerMetrics {
private static MiniHBaseCluster cluster; private static MiniHBaseCluster cluster;
private static HMaster master; private static HMaster master;
private static HBaseTestingUtility TEST_UTIL; private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private static Configuration conf; private static Configuration conf;
private static final int msgInterval = 1000; private static final int msgInterval = 1000;
@ -71,7 +69,6 @@ public class TestAssignmentManagerMetrics {
@BeforeClass @BeforeClass
public static void startCluster() throws Exception { public static void startCluster() throws Exception {
LOG.info("Starting cluster"); LOG.info("Starting cluster");
TEST_UTIL = new HBaseTestingUtility();
conf = TEST_UTIL.getConfiguration(); conf = TEST_UTIL.getConfiguration();
// Disable sanity check for coprocessor // Disable sanity check for coprocessor
@ -98,20 +95,14 @@ public class TestAssignmentManagerMetrics {
@AfterClass @AfterClass
public static void after() throws Exception { public static void after() throws Exception {
if (TEST_UTIL != null) { TEST_UTIL.shutdownMiniCluster();
TEST_UTIL.shutdownMiniCluster();
}
} }
@Test @Test
public void testRITAssignmentManagerMetrics() throws Exception { public void testRITAssignmentManagerMetrics() throws Exception {
final TableName TABLENAME = TableName.valueOf(name.getMethodName()); final TableName TABLENAME = TableName.valueOf(name.getMethodName());
final byte[] FAMILY = Bytes.toBytes("family"); final byte[] FAMILY = Bytes.toBytes("family");
try (Table table = TEST_UTIL.createTable(TABLENAME, FAMILY)){
Table table = null;
try {
table = TEST_UTIL.createTable(TABLENAME, FAMILY);
final byte[] row = Bytes.toBytes("row"); final byte[] row = Bytes.toBytes("row");
final byte[] qualifier = Bytes.toBytes("qualifier"); final byte[] qualifier = Bytes.toBytes("qualifier");
final byte[] value = Bytes.toBytes("value"); final byte[] value = Bytes.toBytes("value");
@ -132,21 +123,19 @@ public class TestAssignmentManagerMetrics {
amSource); amSource);
// alter table with a non-existing coprocessor // alter table with a non-existing coprocessor
HTableDescriptor htd = new HTableDescriptor(TABLENAME);
HColumnDescriptor hcd = new HColumnDescriptor(FAMILY);
htd.addFamily(hcd);
String spec = "hdfs:///foo.jar|com.foo.FooRegionObserver|1001|arg1=1,arg2=2"; String spec = "hdfs:///foo.jar|com.foo.FooRegionObserver|1001|arg1=1,arg2=2";
htd.addCoprocessorWithSpec(spec); TableDescriptor htd = TableDescriptorBuilder.newBuilder(TABLENAME)
.addColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)).addCoprocessorWithSpec(spec)
.build();
try { try {
TEST_UTIL.getAdmin().modifyTable(TABLENAME, htd); TEST_UTIL.getAdmin().modifyTable(htd);
fail("Expected region failed to open"); fail("Expected region failed to open");
} catch (IOException e) { } catch (IOException e) {
// expected, the RS will crash and the assignment will spin forever waiting for a RS // expected, the RS will crash and the assignment will spin forever waiting for a RS
// to assign the region. the region will not go to FAILED_OPEN because in this case // to assign the region. the region will not go to FAILED_OPEN because in this case
// we have just one RS and it will do one retry. // we have just one RS and it will do one retry.
LOG.info("Expected error", e);
} }
// Sleep 3 seconds, wait for doMetrics chore catching up // Sleep 3 seconds, wait for doMetrics chore catching up
@ -154,11 +143,6 @@ public class TestAssignmentManagerMetrics {
metricsHelper.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_NAME, 1, amSource); metricsHelper.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_NAME, 1, amSource);
metricsHelper.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_OVER_THRESHOLD_NAME, 1, metricsHelper.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_OVER_THRESHOLD_NAME, 1,
amSource); amSource);
} finally {
if (table != null) {
table.close();
}
} }
} }
} }

View File

@ -0,0 +1,58 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
/**
* Testcase to confirm that we will not hang when shutdown a cluster with no live region servers.
*/
@Category({ MasterTests.class, MediumTests.class })
public class TestShutdownWithNoRegionServer {
@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestShutdownWithNoRegionServer.class);
private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
@BeforeClass
public static void setUp() throws Exception {
UTIL.startMiniCluster(1);
}
@AfterClass
public static void tearDown() throws Exception {
UTIL.shutdownMiniCluster();
}
@Test
public void test() throws InterruptedException {
RegionServerThread t = UTIL.getMiniHBaseCluster().stopRegionServer(0);
t.join();
}
}