HBASE-8545 Meta stuck in transition when it is assigned to a just restarted dead region sever
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1484875 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
634f63ef3c
commit
aa12c8ac72
|
@ -798,7 +798,6 @@ public class ZKAssign {
|
||||||
// Node no longer exists. Return -1. It means unsuccessful transition.
|
// Node no longer exists. Return -1. It means unsuccessful transition.
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
RegionTransition rt = getRegionTransition(existingBytes);
|
|
||||||
|
|
||||||
// Verify it is the expected version
|
// Verify it is the expected version
|
||||||
if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
|
if (expectedVersion != -1 && stat.getVersion() != expectedVersion) {
|
||||||
|
@ -808,7 +807,9 @@ public class ZKAssign {
|
||||||
"the node existed but was version " + stat.getVersion() +
|
"the node existed but was version " + stat.getVersion() +
|
||||||
" not the expected version " + expectedVersion));
|
" not the expected version " + expectedVersion));
|
||||||
return -1;
|
return -1;
|
||||||
} else if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
|
}
|
||||||
|
|
||||||
|
if (beginState.equals(EventType.M_ZK_REGION_OFFLINE)
|
||||||
&& endState.equals(EventType.RS_ZK_REGION_OPENING)
|
&& endState.equals(EventType.RS_ZK_REGION_OPENING)
|
||||||
&& expectedVersion == -1 && stat.getVersion() != 0) {
|
&& expectedVersion == -1 && stat.getVersion() != 0) {
|
||||||
// the below check ensures that double assignment doesnot happen.
|
// the below check ensures that double assignment doesnot happen.
|
||||||
|
@ -822,6 +823,18 @@ public class ZKAssign {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RegionTransition rt = getRegionTransition(existingBytes);
|
||||||
|
|
||||||
|
// Verify the server transition happens on is not changed
|
||||||
|
if (!rt.getServerName().equals(serverName)) {
|
||||||
|
LOG.warn(zkw.prefix("Attempt to transition the " +
|
||||||
|
"unassigned node for " + encoded +
|
||||||
|
" from " + beginState + " to " + endState + " failed, " +
|
||||||
|
"the server that tried to transition was " + serverName +
|
||||||
|
" not the expected " + rt.getServerName()));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
// Verify it is in expected state
|
// Verify it is in expected state
|
||||||
EventType et = rt.getEventType();
|
EventType et = rt.getEventType();
|
||||||
if (!et.equals(beginState)) {
|
if (!et.equals(beginState)) {
|
||||||
|
|
|
@ -534,7 +534,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
EventType et = rt.getEventType();
|
EventType et = rt.getEventType();
|
||||||
// Get ServerName. Could not be null.
|
// Get ServerName. Could not be null.
|
||||||
final ServerName sn = rt.getServerName();
|
final ServerName sn = rt.getServerName();
|
||||||
String encodedRegionName = regionInfo.getEncodedName();
|
final String encodedRegionName = regionInfo.getEncodedName();
|
||||||
LOG.info("Processing region " + regionInfo.getRegionNameAsString() + " in state " + et);
|
LOG.info("Processing region " + regionInfo.getRegionNameAsString() + " in state " + et);
|
||||||
|
|
||||||
|
|
||||||
|
@ -592,6 +592,8 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
public void process() throws IOException {
|
public void process() throws IOException {
|
||||||
ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
|
ReentrantLock lock = locker.acquireLock(regionInfo.getEncodedName());
|
||||||
try {
|
try {
|
||||||
|
RegionPlan plan = new RegionPlan(regionInfo, null, sn);
|
||||||
|
addPlan(encodedRegionName, plan);
|
||||||
assign(rs, false, false);
|
assign(rs, false, false);
|
||||||
} finally {
|
} finally {
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
|
|
|
@ -399,7 +399,7 @@ public class TestAssignmentManager {
|
||||||
assertNotSame(-1, versionid);
|
assertNotSame(-1, versionid);
|
||||||
// This uglyness below is what the openregionhandler on RS side does.
|
// This uglyness below is what the openregionhandler on RS side does.
|
||||||
versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
|
versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO,
|
||||||
SERVERNAME_A, EventType.M_ZK_REGION_OFFLINE,
|
SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE,
|
||||||
EventType.RS_ZK_REGION_OPENING, versionid);
|
EventType.RS_ZK_REGION_OPENING, versionid);
|
||||||
assertNotSame(-1, versionid);
|
assertNotSame(-1, versionid);
|
||||||
// Move znode from OPENING to OPENED as RS does on successful open.
|
// Move znode from OPENING to OPENED as RS does on successful open.
|
||||||
|
|
|
@ -24,6 +24,7 @@ import static org.junit.Assert.fail;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
@ -34,6 +35,7 @@ import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.HRegionInfo;
|
import org.apache.hadoop.hbase.HRegionInfo;
|
||||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||||
import org.apache.hadoop.hbase.MediumTests;
|
import org.apache.hadoop.hbase.MediumTests;
|
||||||
|
import org.apache.hadoop.hbase.ServerLoad;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.catalog.MetaEditor;
|
import org.apache.hadoop.hbase.catalog.MetaEditor;
|
||||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||||
|
@ -43,6 +45,7 @@ import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
|
||||||
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
|
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
|
||||||
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
|
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
|
||||||
import org.apache.hadoop.hbase.coprocessor.RegionObserver;
|
import org.apache.hadoop.hbase.coprocessor.RegionObserver;
|
||||||
|
import org.apache.hadoop.hbase.executor.EventType;
|
||||||
import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
|
import org.apache.hadoop.hbase.master.balancer.StochasticLoadBalancer;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
@ -110,6 +113,64 @@ public class TestAssignmentManagerOnCluster {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This tests region assignment on a simulated restarted server
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testAssignRegionOnRestartedServer() throws Exception {
|
||||||
|
String table = "testAssignRegionOnRestartedServer";
|
||||||
|
ServerName deadServer = null;
|
||||||
|
HMaster master = null;
|
||||||
|
try {
|
||||||
|
HTableDescriptor desc = new HTableDescriptor(table);
|
||||||
|
desc.addFamily(new HColumnDescriptor(FAMILY));
|
||||||
|
admin.createTable(desc);
|
||||||
|
|
||||||
|
HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
|
||||||
|
HRegionInfo hri = new HRegionInfo(
|
||||||
|
desc.getName(), Bytes.toBytes("A"), Bytes.toBytes("Z"));
|
||||||
|
MetaEditor.addRegionToMeta(meta, hri);
|
||||||
|
|
||||||
|
master = TEST_UTIL.getHBaseCluster().getMaster();
|
||||||
|
Set<ServerName> onlineServers = master.serverManager.getOnlineServers().keySet();
|
||||||
|
assertFalse("There should be some servers online", onlineServers.isEmpty());
|
||||||
|
|
||||||
|
// Use the first server as the destination server
|
||||||
|
ServerName destServer = onlineServers.iterator().next();
|
||||||
|
|
||||||
|
// Created faked dead server
|
||||||
|
deadServer = new ServerName(destServer.getHostname(),
|
||||||
|
destServer.getPort(), destServer.getStartcode() - 100L);
|
||||||
|
master.serverManager.recordNewServer(deadServer, ServerLoad.EMPTY_SERVERLOAD);
|
||||||
|
|
||||||
|
AssignmentManager am = master.getAssignmentManager();
|
||||||
|
RegionPlan plan = new RegionPlan(hri, null, deadServer);
|
||||||
|
am.addPlan(hri.getEncodedName(), plan);
|
||||||
|
master.assignRegion(hri);
|
||||||
|
|
||||||
|
int version = ZKAssign.transitionNode(master.getZooKeeper(), hri,
|
||||||
|
destServer, EventType.M_ZK_REGION_OFFLINE,
|
||||||
|
EventType.RS_ZK_REGION_OPENING, 0);
|
||||||
|
assertEquals("TansitionNode should fail", -1, version);
|
||||||
|
|
||||||
|
// Give region 2 seconds to assign, which may not be enough.
|
||||||
|
// However, if HBASE-8545 is broken, this test will be flaky.
|
||||||
|
// Otherwise, this test should never be flaky.
|
||||||
|
Thread.sleep(2000);
|
||||||
|
|
||||||
|
assertTrue("Region should still be in transition",
|
||||||
|
am.getRegionStates().isRegionInTransition(hri));
|
||||||
|
assertEquals("Assign node should still be in version 0", 0,
|
||||||
|
ZKAssign.getVersion(master.getZooKeeper(), hri));
|
||||||
|
} finally {
|
||||||
|
if (deadServer != null) {
|
||||||
|
master.serverManager.expireServer(deadServer);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_UTIL.deleteTable(Bytes.toBytes(table));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This tests offlining a region
|
* This tests offlining a region
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in New Issue