HBASE-10085: Some regions aren't re-assigned after a master restarts
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1548726 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e2def0ce52
commit
b95200e31b
|
@ -3081,7 +3081,8 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
regionStates.getRegionTransitionState(encodedName);
|
regionStates.getRegionTransitionState(encodedName);
|
||||||
if (regionState == null
|
if (regionState == null
|
||||||
|| (regionState.getServerName() != null && !regionState.isOnServer(sn))
|
|| (regionState.getServerName() != null && !regionState.isOnServer(sn))
|
||||||
|| !(regionState.isFailedClose() || regionState.isPendingOpenOrOpening())) {
|
|| !(regionState.isFailedClose() || regionState.isPendingOpenOrOpening() || regionState
|
||||||
|
.isOffline())) {
|
||||||
LOG.info("Skip " + regionState + " since it is not opening/failed_close"
|
LOG.info("Skip " + regionState + " since it is not opening/failed_close"
|
||||||
+ " on the dead server any more: " + sn);
|
+ " on the dead server any more: " + sn);
|
||||||
it.remove();
|
it.remove();
|
||||||
|
|
|
@ -513,8 +513,8 @@ public class RegionStates {
|
||||||
// pending open on this server, was open on another one.
|
// pending open on this server, was open on another one.
|
||||||
// It could be in failed_close state too if tried several times
|
// It could be in failed_close state too if tried several times
|
||||||
// to open it while the server is not reachable.
|
// to open it while the server is not reachable.
|
||||||
if (state.isPendingOpenOrOpening() || state.isFailedClose()) {
|
if (state.isPendingOpenOrOpening() || state.isFailedClose() || state.isOffline()) {
|
||||||
LOG.info("Found opening region " + state + " to be reassigned by SSH for " + sn);
|
LOG.info("Found region in " + state + " to be reassigned by SSH for " + sn);
|
||||||
rits.add(hri);
|
rits.add(hri);
|
||||||
} else {
|
} else {
|
||||||
LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
|
LOG.warn("THIS SHOULD NOT HAPPEN: unexpected " + state);
|
||||||
|
|
|
@ -29,6 +29,8 @@ import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
@ -39,9 +41,11 @@ import org.apache.hadoop.hbase.HRegionInfo;
|
||||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||||
import org.apache.hadoop.hbase.MediumTests;
|
import org.apache.hadoop.hbase.MediumTests;
|
||||||
import org.apache.hadoop.hbase.MiniHBaseCluster;
|
import org.apache.hadoop.hbase.MiniHBaseCluster;
|
||||||
|
import org.apache.hadoop.hbase.RegionTransition;
|
||||||
import org.apache.hadoop.hbase.ServerLoad;
|
import org.apache.hadoop.hbase.ServerLoad;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.TableName;
|
import org.apache.hadoop.hbase.TableName;
|
||||||
|
import org.apache.hadoop.hbase.Waiter;
|
||||||
import org.apache.hadoop.hbase.catalog.MetaEditor;
|
import org.apache.hadoop.hbase.catalog.MetaEditor;
|
||||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||||
import org.apache.hadoop.hbase.client.HTable;
|
import org.apache.hadoop.hbase.client.HTable;
|
||||||
|
@ -60,6 +64,7 @@ import org.apache.hadoop.hbase.util.FSUtils;
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
|
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
||||||
import org.apache.zookeeper.KeeperException;
|
import org.apache.zookeeper.KeeperException;
|
||||||
|
import org.apache.zookeeper.data.Stat;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
@ -70,6 +75,7 @@ import org.junit.experimental.categories.Category;
|
||||||
*/
|
*/
|
||||||
@Category(MediumTests.class)
|
@Category(MediumTests.class)
|
||||||
public class TestAssignmentManagerOnCluster {
|
public class TestAssignmentManagerOnCluster {
|
||||||
|
private static final Log LOG = LogFactory.getLog(TestAssignmentManagerOnCluster.class);
|
||||||
private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
|
private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
|
||||||
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
|
private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
|
||||||
private final static Configuration conf = TEST_UTIL.getConfiguration();
|
private final static Configuration conf = TEST_UTIL.getConfiguration();
|
||||||
|
@ -760,6 +766,66 @@ public class TestAssignmentManagerOnCluster {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This tests a RIT in offline state will get re-assigned after a master restart
|
||||||
|
*/
|
||||||
|
@Test(timeout = 60000)
|
||||||
|
public void testOfflineRegionReAssginedAfterMasterRestart() throws Exception {
|
||||||
|
final TableName table = TableName.valueOf("testOfflineRegionReAssginedAfterMasterRestart");
|
||||||
|
final HRegionInfo hri = createTableAndGetOneRegion(table);
|
||||||
|
HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
|
||||||
|
RegionStates regionStates = master.getAssignmentManager().getRegionStates();
|
||||||
|
ServerName serverName = regionStates.getRegionServerOfRegion(hri);
|
||||||
|
TEST_UTIL.assertRegionOnServer(hri, serverName, 200);
|
||||||
|
|
||||||
|
ServerName dstName = null;
|
||||||
|
for (ServerName tmpServer : master.serverManager.getOnlineServers().keySet()) {
|
||||||
|
if (!tmpServer.equals(serverName)) {
|
||||||
|
dstName = tmpServer;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// find a different server
|
||||||
|
assertTrue(dstName != null);
|
||||||
|
// shutdown HBase cluster
|
||||||
|
TEST_UTIL.shutdownMiniHBaseCluster();
|
||||||
|
// create a RIT node in offline state
|
||||||
|
ZooKeeperWatcher zkw = TEST_UTIL.getZooKeeperWatcher();
|
||||||
|
ZKAssign.createNodeOffline(zkw, hri, dstName);
|
||||||
|
Stat stat = new Stat();
|
||||||
|
byte[] data =
|
||||||
|
ZKAssign.getDataNoWatch(TEST_UTIL.getZooKeeperWatcher(), hri.getEncodedName(), stat);
|
||||||
|
assertTrue(data != null);
|
||||||
|
RegionTransition rt = RegionTransition.parseFrom(data);
|
||||||
|
assertTrue(rt.getEventType() == EventType.M_ZK_REGION_OFFLINE);
|
||||||
|
|
||||||
|
LOG.info(hri.getEncodedName() + " region is in offline state with source server=" + serverName
|
||||||
|
+ " and dst server=" + dstName);
|
||||||
|
|
||||||
|
// start HBase cluster
|
||||||
|
TEST_UTIL.startMiniHBaseCluster(1, 4, MyMaster.class, null);
|
||||||
|
|
||||||
|
// wait for the region is re-assigned.
|
||||||
|
TEST_UTIL.waitFor(30000, 200, new Waiter.Predicate<Exception>() {
|
||||||
|
@Override
|
||||||
|
public boolean evaluate() throws Exception {
|
||||||
|
HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
|
||||||
|
if (master != null && master.isInitialized()) {
|
||||||
|
ServerManager serverManager = master.getServerManager();
|
||||||
|
return !serverManager.areDeadServersInProgress();
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// verify the region is assigned
|
||||||
|
master = TEST_UTIL.getHBaseCluster().getMaster();
|
||||||
|
master.getAssignmentManager().waitForAssignment(hri);
|
||||||
|
regionStates = master.getAssignmentManager().getRegionStates();
|
||||||
|
RegionState newState = regionStates.getRegionState(hri);
|
||||||
|
assertTrue(newState.isOpened());
|
||||||
|
}
|
||||||
|
|
||||||
static class MyLoadBalancer extends StochasticLoadBalancer {
|
static class MyLoadBalancer extends StochasticLoadBalancer {
|
||||||
// For this region, if specified, always assign to nowhere
|
// For this region, if specified, always assign to nowhere
|
||||||
static volatile String controledRegion = null;
|
static volatile String controledRegion = null;
|
||||||
|
|
Loading…
Reference in New Issue