HBASE-12196 SSH should retry in case failed to assign regions
This commit is contained in:
parent
cab0819327
commit
f2fc311b19
|
@ -273,6 +273,12 @@ public class ServerShutdownHandler extends EventHandler {
|
|||
} catch (InterruptedException ie) {
|
||||
LOG.error("Caught " + ie + " during round-robin assignment");
|
||||
throw (InterruptedIOException)new InterruptedIOException().initCause(ie);
|
||||
} catch (IOException ioe) {
|
||||
LOG.info("Caught " + ioe + " during region assignment, will retry");
|
||||
// Only do HLog splitting if shouldSplitHlog and in DLR mode
|
||||
serverManager.processDeadServer(serverName,
|
||||
this.shouldSplitHlog && distributedLogReplay);
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.shouldSplitHlog && distributedLogReplay) {
|
||||
|
|
|
@ -28,8 +28,10 @@ import static org.junit.Assert.fail;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
|
@ -79,6 +81,7 @@ import org.junit.experimental.categories.Category;
|
|||
/**
|
||||
* This tests AssignmentManager with a testing cluster.
|
||||
*/
|
||||
@SuppressWarnings("deprecation")
|
||||
@Category({MasterTests.class, MediumTests.class})
|
||||
public class TestAssignmentManagerOnCluster {
|
||||
private final static byte[] FAMILY = Bytes.toBytes("FAMILY");
|
||||
|
@ -830,6 +833,58 @@ public class TestAssignmentManagerOnCluster {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test SSH waiting for extra region server for assignment
|
||||
*/
|
||||
@Test (timeout=300000)
|
||||
public void testSSHWaitForServerToAssignRegion() throws Exception {
|
||||
TableName table = TableName.valueOf("testSSHWaitForServerToAssignRegion");
|
||||
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
|
||||
boolean startAServer = false;
|
||||
try {
|
||||
HTableDescriptor desc = new HTableDescriptor(table);
|
||||
desc.addFamily(new HColumnDescriptor(FAMILY));
|
||||
admin.createTable(desc);
|
||||
|
||||
HMaster master = cluster.getMaster();
|
||||
final ServerManager serverManager = master.getServerManager();
|
||||
MyLoadBalancer.countRegionServers = Integer.valueOf(
|
||||
serverManager.countOfRegionServers());
|
||||
HRegionServer rs = TEST_UTIL.getRSForFirstRegionInTable(table);
|
||||
assertNotNull("First region should be assigned", rs);
|
||||
final ServerName serverName = rs.getServerName();
|
||||
// Wait till SSH tried to assign regions a several times
|
||||
int counter = MyLoadBalancer.counter.get() + 5;
|
||||
cluster.killRegionServer(serverName);
|
||||
startAServer = true;
|
||||
cluster.waitForRegionServerToStop(serverName, -1);
|
||||
while (counter > MyLoadBalancer.counter.get()) {
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
cluster.startRegionServer();
|
||||
startAServer = false;
|
||||
// Wait till the dead server is processed by SSH
|
||||
TEST_UTIL.waitFor(120000, 1000, new Waiter.Predicate<Exception>() {
|
||||
@Override
|
||||
public boolean evaluate() throws Exception {
|
||||
return serverManager.isServerDead(serverName)
|
||||
&& !serverManager.areDeadServersInProgress();
|
||||
}
|
||||
});
|
||||
TEST_UTIL.waitUntilNoRegionsInTransition(300000);
|
||||
|
||||
rs = TEST_UTIL.getRSForFirstRegionInTable(table);
|
||||
assertTrue("First region should be re-assigned to a different server",
|
||||
rs != null && !serverName.equals(rs.getServerName()));
|
||||
} finally {
|
||||
MyLoadBalancer.countRegionServers = null;
|
||||
TEST_UTIL.deleteTable(table);
|
||||
if (startAServer) {
|
||||
cluster.startRegionServer();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Test force unassign/assign a region of a disabled table
|
||||
*/
|
||||
|
@ -1121,6 +1176,9 @@ public class TestAssignmentManagerOnCluster {
|
|||
// For this region, if specified, always assign to nowhere
|
||||
static volatile String controledRegion = null;
|
||||
|
||||
static volatile Integer countRegionServers = null;
|
||||
static AtomicInteger counter = new AtomicInteger(0);
|
||||
|
||||
@Override
|
||||
public ServerName randomAssignment(HRegionInfo regionInfo,
|
||||
List<ServerName> servers) {
|
||||
|
@ -1129,6 +1187,21 @@ public class TestAssignmentManagerOnCluster {
|
|||
}
|
||||
return super.randomAssignment(regionInfo, servers);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<ServerName, List<HRegionInfo>> roundRobinAssignment(
|
||||
List<HRegionInfo> regions, List<ServerName> servers) {
|
||||
if (countRegionServers != null && services != null) {
|
||||
int regionServers = services.getServerManager().countOfRegionServers();
|
||||
if (regionServers < countRegionServers.intValue()) {
|
||||
// Let's wait till more region servers join in.
|
||||
// Before that, fail region assignments.
|
||||
counter.incrementAndGet();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
return super.roundRobinAssignment(regions, servers);
|
||||
}
|
||||
}
|
||||
|
||||
public static class MyMaster extends HMaster {
|
||||
|
|
Loading…
Reference in New Issue