HBASE-11525 Region server holding in region states is out of sync with meta

This commit is contained in:
Jimmy Xiang 2014-07-16 09:13:27 -07:00
parent f8153a1456
commit 7487278baf
3 changed files with 199 additions and 16 deletions

View File

@ -63,7 +63,6 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.TableStateManager;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.client.Result;
@ -300,7 +299,8 @@ public class AssignmentManager extends ZooKeeperListener {
int maxThreads = conf.getInt("hbase.assignment.threads.max", 30);
this.threadPoolExecutorService = Threads.getBoundedCachedThreadPool(
maxThreads, 60L, TimeUnit.SECONDS, Threads.newDaemonThreadFactory("AM."));
this.regionStates = new RegionStates(server, serverManager, regionStateStore);
this.regionStates = new RegionStates(
server, tableStateManager, serverManager, regionStateStore);
this.bulkAssignWaitTillAllAssigned =
conf.getBoolean("hbase.bulk.assignment.waittillallassigned", false);
@ -1154,6 +1154,7 @@ public class AssignmentManager extends ZooKeeperListener {
* This is handled in a separate code path because it breaks the normal rules.
* @param rt
*/
@SuppressWarnings("deprecation")
private void handleHBCK(RegionTransition rt) {
String encodedName = HRegionInfo.encodeRegionName(rt.getRegionName());
LOG.info("Handling HBCK triggered transition=" + rt.getEventType() +
@ -1949,6 +1950,7 @@ public class AssignmentManager extends ZooKeeperListener {
return state;
}
@SuppressWarnings("deprecation")
private boolean wasRegionOnDeadServerByMeta(
final HRegionInfo region, final ServerName sn) {
try {

View File

@ -39,9 +39,11 @@ import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerLoad;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableStateManager;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
@ -118,6 +120,7 @@ public class RegionStates {
private final HashMap<ServerName, Long> processedServers;
private long lastProcessedServerCleanTime;
private final TableStateManager tableStateManager;
private final RegionStateStore regionStateStore;
private final ServerManager serverManager;
private final Server server;
@ -126,7 +129,7 @@ public class RegionStates {
static final String LOG_SPLIT_TIME = "hbase.master.maximum.logsplit.keeptime";
static final long DEFAULT_LOG_SPLIT_TIME = 7200000L; // 2 hours
RegionStates(final Server master,
RegionStates(final Server master, final TableStateManager tableStateManager,
final ServerManager serverManager, final RegionStateStore regionStateStore) {
regionStates = new HashMap<String, RegionState>();
regionsInTransition = new HashMap<String, RegionState>();
@ -136,6 +139,7 @@ public class RegionStates {
lastAssignments = new HashMap<String, ServerName>();
processedServers = new HashMap<ServerName, Long>();
deadServers = new HashMap<String, Long>();
this.tableStateManager = tableStateManager;
this.regionStateStore = regionStateStore;
this.serverManager = serverManager;
this.server = master;
@ -405,7 +409,7 @@ public class RegionStates {
LOG.info("Onlined " + hri.getShortNameToLog() + " on " + serverName);
addToServerHoldings(serverName, hri);
addToReplicaMapping(hri);
if (oldServerName != null) {
if (oldServerName != null && serverHoldings.containsKey(oldServerName)) {
LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
removeFromServerHoldings(oldServerName, hri);
}
@ -528,7 +532,12 @@ public class RegionStates {
synchronized (this) {
regionsInTransition.remove(hri.getEncodedName());
ServerName oldServerName = regionAssignments.remove(hri);
if (oldServerName != null && serverHoldings.containsKey(oldServerName)) {
if (oldServerName != null && serverHoldings.containsKey(oldServerName)
&& (newState == State.MERGED || newState == State.SPLIT
|| tableStateManager.isTableState(hri.getTable(),
ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING))) {
// Offline the region only if it's merged/split, or the table is disabled/disabling.
// Otherwise, offline it from this server only when it is online on a different server.
LOG.info("Offlined " + hri.getShortNameToLog() + " from " + oldServerName);
removeFromServerHoldings(oldServerName, hri);
removeFromReplicaMapping(hri);
@ -554,16 +563,14 @@ public class RegionStates {
// Offline open regions, no need to offline if SPLIT/MERGED/OFFLINE
if (isRegionOnline(region)) {
regionsToOffline.add(region);
} else {
if (isRegionInState(region, State.SPLITTING, State.MERGING)) {
LOG.debug("Offline splitting/merging region " + getRegionState(region));
try {
// Delete the ZNode if exists
ZKAssign.deleteNodeFailSilent(watcher, region);
regionsToOffline.add(region);
} catch (KeeperException ke) {
server.abort("Unexpected ZK exception deleting node " + region, ke);
}
} else if (isRegionInState(region, State.SPLITTING, State.MERGING)) {
LOG.debug("Offline splitting/merging region " + getRegionState(region));
try {
// Delete the ZNode if exists
ZKAssign.deleteNodeFailSilent(watcher, region);
regionsToOffline.add(region);
} catch (KeeperException ke) {
server.abort("Unexpected ZK exception deleting node " + region, ke);
}
}
}
@ -921,6 +928,7 @@ public class RegionStates {
* @param regionName
* @return HRegionInfo for the region
*/
@SuppressWarnings("deprecation")
protected HRegionInfo getRegionInfo(final byte [] regionName) {
String encodedName = HRegionInfo.encodeRegionName(regionName);
RegionState regionState = getRegionState(encodedName);

View File

@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.MiniHBaseCluster.MiniHBaseClusterRegionServer;
import org.apache.hadoop.hbase.ServerLoad;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
@ -92,7 +93,7 @@ public class TestAssignmentManagerOnCluster {
// Reduce the maximum attempts to speed up the test
conf.setInt("hbase.assignment.maximum.attempts", 3);
TEST_UTIL.startMiniCluster(1, 4, null, MyMaster.class, null);
TEST_UTIL.startMiniCluster(1, 4, null, MyMaster.class, MyRegionServer.class);
admin = TEST_UTIL.getHBaseAdmin();
}
@ -795,6 +796,7 @@ public class TestAssignmentManagerOnCluster {
master.enableSSH(true);
}
TEST_UTIL.deleteTable(Bytes.toBytes(table));
cluster.startRegionServer();
}
}
@ -839,6 +841,162 @@ public class TestAssignmentManagerOnCluster {
}
}
/**
* Test offlined region is assigned by SSH
*/
@Test (timeout=60000)
public void testAssignOfflinedRegionBySSH() throws Exception {
String table = "testAssignOfflinedRegionBySSH";
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
MyMaster master = null;
try {
HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
desc.addFamily(new HColumnDescriptor(FAMILY));
admin.createTable(desc);
HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
HRegionInfo hri = new HRegionInfo(
desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
MetaTableAccessor.addRegionToMeta(meta, hri);
// Assign the region
master = (MyMaster)cluster.getMaster();
master.assignRegion(hri);
AssignmentManager am = master.getAssignmentManager();
RegionStates regionStates = am.getRegionStates();
ServerName metaServer = regionStates.getRegionServerOfRegion(
HRegionInfo.FIRST_META_REGIONINFO);
ServerName oldServerName = null;
while (true) {
assertTrue(am.waitForAssignment(hri));
RegionState state = regionStates.getRegionState(hri);
oldServerName = state.getServerName();
if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
// Mark the hosting server aborted, but don't actually kill it.
// It doesn't have meta on it.
MyRegionServer.abortedServer = oldServerName;
break;
}
int i = cluster.getServerWithMeta();
HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
oldServerName = rs.getServerName();
master.move(hri.getEncodedNameAsBytes(),
Bytes.toBytes(oldServerName.getServerName()));
}
// Make sure the region is assigned on the dead server
assertTrue(regionStates.isRegionOnline(hri));
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
// Try to unassign the dead region before SSH
am.unassign(hri, false);
// The region should be moved to offline since the server is dead
RegionState state = regionStates.getRegionState(hri);
assertTrue(state.isOffline());
// Kill the hosting server, which doesn't have meta on it.
cluster.killRegionServer(oldServerName);
cluster.waitForRegionServerToStop(oldServerName, -1);
ServerManager serverManager = master.getServerManager();
while (!serverManager.isServerDead(oldServerName)
|| serverManager.getDeadServers().areDeadServersInProgress()) {
Thread.sleep(100);
}
// Let's check if it's assigned after it's out of transition.
// no need to assign it manually, SSH should do it
am.waitOnRegionToClearRegionsInTransition(hri);
assertTrue(am.waitForAssignment(hri));
ServerName serverName = master.getAssignmentManager().
getRegionStates().getRegionServerOfRegion(hri);
TEST_UTIL.assertRegionOnlyOnServer(hri, serverName, 200);
} finally {
MyRegionServer.abortedServer = null;
TEST_UTIL.deleteTable(Bytes.toBytes(table));
cluster.startRegionServer();
}
}
/**
* Test disabled region is ignored by SSH
*/
@Test (timeout=60000)
public void testAssignDisabledRegionBySSH() throws Exception {
String table = "testAssignDisabledRegionBySSH";
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
MyMaster master = null;
try {
HTableDescriptor desc = new HTableDescriptor(TableName.valueOf(table));
desc.addFamily(new HColumnDescriptor(FAMILY));
admin.createTable(desc);
HTable meta = new HTable(conf, TableName.META_TABLE_NAME);
HRegionInfo hri = new HRegionInfo(
desc.getTableName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
MetaTableAccessor.addRegionToMeta(meta, hri);
// Assign the region
master = (MyMaster)cluster.getMaster();
master.assignRegion(hri);
AssignmentManager am = master.getAssignmentManager();
RegionStates regionStates = am.getRegionStates();
ServerName metaServer = regionStates.getRegionServerOfRegion(
HRegionInfo.FIRST_META_REGIONINFO);
ServerName oldServerName = null;
while (true) {
assertTrue(am.waitForAssignment(hri));
RegionState state = regionStates.getRegionState(hri);
oldServerName = state.getServerName();
if (!ServerName.isSameHostnameAndPort(oldServerName, metaServer)) {
// Mark the hosting server aborted, but don't actually kill it.
// It doesn't have meta on it.
MyRegionServer.abortedServer = oldServerName;
break;
}
int i = cluster.getServerWithMeta();
HRegionServer rs = cluster.getRegionServer(i == 0 ? 1 : 0);
oldServerName = rs.getServerName();
master.move(hri.getEncodedNameAsBytes(),
Bytes.toBytes(oldServerName.getServerName()));
}
// Make sure the region is assigned on the dead server
assertTrue(regionStates.isRegionOnline(hri));
assertEquals(oldServerName, regionStates.getRegionServerOfRegion(hri));
// Try to unassign the dead region before SSH
am.unassign(hri, false);
// The region should be moved to offline since the server is dead
RegionState state = regionStates.getRegionState(hri);
assertTrue(state.isOffline());
// Disable the table now.
master.disableTable(hri.getTable());
// Kill the hosting server, which doesn't have meta on it.
cluster.killRegionServer(oldServerName);
cluster.waitForRegionServerToStop(oldServerName, -1);
ServerManager serverManager = master.getServerManager();
while (!serverManager.isServerDead(oldServerName)
|| serverManager.getDeadServers().areDeadServersInProgress()) {
Thread.sleep(100);
}
// Wait till no more RIT, the region should be offline.
am.waitUntilNoRegionsInTransition(60000);
assertTrue(regionStates.isRegionOffline(hri));
} finally {
MyRegionServer.abortedServer = null;
TEST_UTIL.deleteTable(Bytes.toBytes(table));
cluster.startRegionServer();
}
}
static class MyLoadBalancer extends StochasticLoadBalancer {
// For this region, if specified, always assign to nowhere
static volatile String controledRegion = null;
@ -875,6 +1033,21 @@ public class TestAssignmentManagerOnCluster {
}
}
public static class MyRegionServer extends MiniHBaseClusterRegionServer {
static volatile ServerName abortedServer = null;
public MyRegionServer(Configuration conf, CoordinatedStateManager cp)
throws IOException, KeeperException,
InterruptedException {
super(conf, cp);
}
@Override
public boolean isAborted() {
return getServerName().equals(abortedServer) || super.isAborted();
}
}
public static class MyRegionObserver extends BaseRegionObserver {
// If enabled, fail all preClose calls
static AtomicBoolean preCloseEnabled = new AtomicBoolean(false);