HBASE-10085: Some regions aren't re-assigned after a master restarts
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1548726 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e2def0ce52
commit
b95200e31b
|
@ -3081,7 +3081,8 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
regionStates.getRegionTransitionState(encodedName);
|
||||
if (regionState == null
|
||||
|| (regionState.getServerName() != null && !regionState.isOnServer(sn))
|
||||
|| !(regionState.isFailedClose() || regionState.isPendingOpenOrOpening())) {
|
||||
|| !(regionState.isFailedClose() || regionState.isPendingOpenOrOpening() || regionState
|
||||
.isOffline())) {
|
||||
LOG.info("Skip " + regionState + " since it is not opening/failed_close"
|
||||
+ " on the dead server any more: " + sn);
|
||||
it.remove();
|
||||
|
|
|
@ -513,8 +513,8 @@ public class RegionStates {
|
|||
// pending open on this server, was open on another one.
|
||||
// It could be in failed_close state too if tried several times
|
||||
// to open it while the server is not reachable.
|
||||
if (state.isPendingOpenOrOpening() || state.isFailedClose()) {
|
||||
LOG.info("Found opening region " + state + " to be reassigned by SSH for " + sn);
|
||||
if (state.isPendingOpenOrOpening() || state.isFailedClose() || state.isOffline()) {
|
||||
LOG.info("Found region in " + state + " to be reassigned by SSH for " + sn);
|
||||
rits.add(hri);
|
||||
} else {
|
||||
LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
|
||||
|
|
|
@ -29,6 +29,8 @@ import java.util.List;
|
|||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
|
@ -39,9 +41,11 @@ import org.apache.hadoop.hbase.HRegionInfo;
|
|||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.MediumTests;
|
||||
import org.apache.hadoop.hbase.MiniHBaseCluster;
|
||||
import org.apache.hadoop.hbase.RegionTransition;
|
||||
import org.apache.hadoop.hbase.ServerLoad;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.Waiter;
|
||||
import org.apache.hadoop.hbase.catalog.MetaEditor;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
|
@ -60,6 +64,7 @@ import org.apache.hadoop.hbase.util.FSUtils;
|
|||
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.data.Stat;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
@ -70,6 +75,7 @@ import org.junit.experimental.categories.Category;
|
|||
*/
|
||||
@Category(MediumTests.class)
|
||||
public class TestAssignmentManagerOnCluster {
|
||||
private static final Log LOG = LogFactory.getLog(TestAssignmentManagerOnCluster.class);
|
||||
private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
|
||||
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
|
||||
private final static Configuration conf = TEST_UTIL.getConfiguration();
|
||||
|
@ -760,6 +766,66 @@ public class TestAssignmentManagerOnCluster {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This tests a RIT in offline state will get re-assigned after a master restart
|
||||
*/
|
||||
@Test(timeout = 60000)
|
||||
public void testOfflineRegionReAssginedAfterMasterRestart() throws Exception {
|
||||
final TableName table = TableName.valueOf("testOfflineRegionReAssginedAfterMasterRestart");
|
||||
final HRegionInfo hri = createTableAndGetOneRegion(table);
|
||||
HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
|
||||
RegionStates regionStates = master.getAssignmentManager().getRegionStates();
|
||||
ServerName serverName = regionStates.getRegionServerOfRegion(hri);
|
||||
TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
|
||||
|
||||
ServerName dstName = null;
|
||||
for (ServerName tmpServer : master.serverManager.getOnlineServers().keySet()) {
|
||||
if (!tmpServer.equals(serverName)) {
|
||||
dstName = tmpServer;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// find a different server
|
||||
assertTrue(dstName != null);
|
||||
// shutdown HBase cluster
|
||||
TEST_UTIL.shutdownMiniHBaseCluster();
|
||||
// create a RIT node in offline state
|
||||
ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
|
||||
ZKAssign.createNodeOffline(zkw, hri, dstName);
|
||||
Stat stat = new Stat();
|
||||
byte[] data =
|
||||
ZKAssign.getDataNoWatch(TEST_UTIL.getZooKeeperWatcher(), hri.getEncodedName(), stat);
|
||||
assertTrue(data != null);
|
||||
RegionTransition rt = RegionTransition.parseFrom(data);
|
||||
assertTrue(rt.getEventType() == EventType.M_ZK_REGION_OFFLINE);
|
||||
|
||||
LOG.info(hri.getEncodedName() + " region is in offline state with source server=" + serverName
|
||||
+ " and dst server=" + dstName);
|
||||
|
||||
// start HBase cluster
|
||||
TEST_UTIL.startMiniHBaseCluster(1, 4, MyMaster.class, null);
|
||||
|
||||
// wait for the region is re-assigned.
|
||||
TEST_UTIL.waitFor(30000, 200, new Waiter.Predicate<Exception>() {
|
||||
@Override
|
||||
public boolean evaluate() throws Exception {
|
||||
HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
|
||||
if (master != null && master.isInitialized()) {
|
||||
ServerManager serverManager = master.getServerManager();
|
||||
return !serverManager.areDeadServersInProgress();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
});
|
||||
|
||||
// verify the region is assigned
|
||||
master = TEST_UTIL.getHBaseCluster().getMaster();
|
||||
master.getAssignmentManager().waitForAssignment(hri);
|
||||
regionStates = master.getAssignmentManager().getRegionStates();
|
||||
RegionState newState = regionStates.getRegionState(hri);
|
||||
assertTrue(newState.isOpened());
|
||||
}
|
||||
|
||||
static class MyLoadBalancer extends StochasticLoadBalancer {
|
||||
// For this region, if specified, always assign to nowhere
|
||||
static volatile String controledRegion = null;
|
||||
|
|
Loading…
Reference in New Issue