HBASE-8545 Meta stuck in transition when it is assigned to a just restarted dead region sever
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1484875 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
634f63ef3c
commit
aa12c8ac72
|
@ -798,7 +798,6 @@ public class ZKAssign {
|
|||
// Node no longer exists. Return -1. It means unsuccessful transition.
|
||||
return -1;
|
||||
}
|
||||
RegionTransition rt = getRegionTransition(existingBytes);
|
||||
|
||||
// Verify it is the expected version
|
||||
if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
|
||||
|
@ -808,7 +807,9 @@ public class ZKAssign {
|
|||
"the node existed but was version " + stat.getVersion() +
|
||||
" not the expected version " + expectedVersion));
|
||||
return -1;
|
||||
} else if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
|
||||
}
|
||||
|
||||
if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
|
||||
&& endState.equals(EventType.RS_ZK_REGION_OPENING)
|
||||
&& expectedVersion == -1 && stat.getVersion() != 0) {
|
||||
// the below check ensures that double assignment doesnot happen.
|
||||
|
@ -822,6 +823,18 @@ public class ZKAssign {
|
|||
return -1;
|
||||
}
|
||||
|
||||
RegionTransition rt = getRegionTransition(existingBytes);
|
||||
|
||||
// Verify the server transition happens on is not changed
|
||||
if (!rt.getServerName().equals(serverName)) {
|
||||
LOG.warn(zkw.prefix("Attempt to transition the " +
|
||||
"unassigned node for " + encoded +
|
||||
" from " + beginState + " to " + endState + " failed, " +
|
||||
"the server that tried to transition was " + serverName +
|
||||
" not the expected " + rt.getServerName()));
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Verify it is in expected state
|
||||
EventType et = rt.getEventType();
|
||||
if (!et.equals(beginState)) {
|
||||
|
|
|
@ -534,7 +534,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
EventType et = rt.getEventType();
|
||||
// Get ServerName. Could not be null.
|
||||
final ServerName sn = rt.getServerName();
|
||||
String encodedRegionName = regionInfo.getEncodedName();
|
||||
final String encodedRegionName = regionInfo.getEncodedName();
|
||||
LOG.info("Processing region " + regionInfo.getRegionNameAsString() + " in state " + et);
|
||||
|
||||
|
||||
|
@ -592,6 +592,8 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
public void process() throws IOException {
|
||||
ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
|
||||
try {
|
||||
RegionPlan plan = new RegionPlan(regionInfo, null, sn);
|
||||
addPlan(encodedRegionName, plan);
|
||||
assign(rs, false, false);
|
||||
} finally {
|
||||
lock.unlock();
|
||||
|
|
|
@ -399,7 +399,7 @@ public class TestAssignmentManager {
|
|||
assertNotSame(-1, versionid);
|
||||
// This uglyness below is what the openregionhandler on RS side does.
|
||||
versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
|
||||
SERVERNAME_A, EventType.M_ZK_REGION_OFFLINE,
|
||||
SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
|
||||
EventType.RS_ZK_REGION_OPENING, versionid);
|
||||
assertNotSame(-1, versionid);
|
||||
// Move znode from OPENING to OPENED as RS does on successful open.
|
||||
|
|
|
@ -24,6 +24,7 @@ import static org.junit.Assert.fail;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
|
@ -34,6 +35,7 @@ import org.apache.hadoop.hbase.HConstants;
|
|||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||
import org.apache.hadoop.hbase.MediumTests;
|
||||
import org.apache.hadoop.hbase.ServerLoad;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.catalog.MetaEditor;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
|
@ -43,6 +45,7 @@ import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
|
|||
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
|
||||
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
|
||||
import org.apache.hadoop.hbase.coprocessor.RegionObserver;
|
||||
import org.apache.hadoop.hbase.executor.EventType;
|
||||
import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
@ -110,6 +113,64 @@ public class TestAssignmentManagerOnCluster {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This tests region assignment on a simulated restarted server
|
||||
*/
|
||||
@Test
|
||||
public void testAssignRegionOnRestartedServer() throws Exception {
|
||||
String table = "testAssignRegionOnRestartedServer";
|
||||
ServerName deadServer = null;
|
||||
HMaster master = null;
|
||||
try {
|
||||
HTableDescriptor desc = new HTableDescriptor(table);
|
||||
desc.addFamily(new HColumnDescriptor(FAMILY));
|
||||
admin.createTable(desc);
|
||||
|
||||
HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
HRegionInfo hri = new HRegionInfo(
|
||||
desc.getName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
|
||||
MetaEditor.addRegionToMeta(meta, hri);
|
||||
|
||||
master = TEST_UTIL.getHBaseCluster().getMaster();
|
||||
Set<ServerName> onlineServers = master.serverManager.getOnlineServers().keySet();
|
||||
assertFalse("There should be some servers online", onlineServers.isEmpty());
|
||||
|
||||
// Use the first server as the destination server
|
||||
ServerName destServer = onlineServers.iterator().next();
|
||||
|
||||
// Created faked dead server
|
||||
deadServer = new ServerName(destServer.getHostname(),
|
||||
destServer.getPort(), destServer.getStartcode() - 100L);
|
||||
master.serverManager.recordNewServer(deadServer, ServerLoad.EMPTY_SERVERLOAD);
|
||||
|
||||
AssignmentManager am = master.getAssignmentManager();
|
||||
RegionPlan plan = new RegionPlan(hri, null, deadServer);
|
||||
am.addPlan(hri.getEncodedName(), plan);
|
||||
master.assignRegion(hri);
|
||||
|
||||
int version = ZKAssign.transitionNode(master.getZooKeeper(), hri,
|
||||
destServer, EventType.M_ZK_REGION_OFFLINE,
|
||||
EventType.RS_ZK_REGION_OPENING, 0);
|
||||
assertEquals("TansitionNode should fail", -1, version);
|
||||
|
||||
// Give region 2 seconds to assign, which may not be enough.
|
||||
// However, if HBASE-8545 is broken, this test will be flaky.
|
||||
// Otherwise, this test should never be flaky.
|
||||
Thread.sleep(2000);
|
||||
|
||||
assertTrue("Region should still be in transition",
|
||||
am.getRegionStates().isRegionInTransition(hri));
|
||||
assertEquals("Assign node should still be in version 0", 0,
|
||||
ZKAssign.getVersion(master.getZooKeeper(), hri));
|
||||
} finally {
|
||||
if (deadServer != null) {
|
||||
master.serverManager.expireServer(deadServer);
|
||||
}
|
||||
|
||||
TEST_UTIL.deleteTable(Bytes.toBytes(table));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This tests offlining a region
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue