HBASE-14802 Replaying server crash recovery procedure after a failover causes incorrect handling of deadservers; Reapply of slightly different patch... see issue
This commit is contained in:
parent
bb6581345f
commit
dd5f454b03
|
@ -58,6 +58,11 @@ public class DeadServer {
|
||||||
*/
|
*/
|
||||||
private int numProcessing = 0;
|
private int numProcessing = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether a dead server is being processed currently.
|
||||||
|
*/
|
||||||
|
private boolean processing = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A dead server that comes back alive has a different start code. The new start code should be
|
* A dead server that comes back alive has a different start code. The new start code should be
|
||||||
* greater than the old one, but we don't take this into account in this method.
|
* greater than the old one, but we don't take this into account in this method.
|
||||||
|
@ -94,9 +99,7 @@ public class DeadServer {
|
||||||
*
|
*
|
||||||
* @return true if any RS are being processed as dead
|
* @return true if any RS are being processed as dead
|
||||||
*/
|
*/
|
||||||
public synchronized boolean areDeadServersInProgress() {
|
public synchronized boolean areDeadServersInProgress() { return processing; }
|
||||||
return numProcessing != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized Set<ServerName> copyServerNames() {
|
public synchronized Set<ServerName> copyServerNames() {
|
||||||
Set<ServerName> clone = new HashSet<ServerName>(deadServers.size());
|
Set<ServerName> clone = new HashSet<ServerName>(deadServers.size());
|
||||||
|
@ -109,15 +112,34 @@ public class DeadServer {
|
||||||
* @param sn the server name
|
* @param sn the server name
|
||||||
*/
|
*/
|
||||||
public synchronized void add(ServerName sn) {
|
public synchronized void add(ServerName sn) {
|
||||||
this.numProcessing++;
|
processing = true;
|
||||||
if (!deadServers.containsKey(sn)){
|
if (!deadServers.containsKey(sn)){
|
||||||
deadServers.put(sn, EnvironmentEdgeManager.currentTime());
|
deadServers.put(sn, EnvironmentEdgeManager.currentTime());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Notify that we started processing this dead server.
|
||||||
|
* @param sn ServerName for the dead server.
|
||||||
|
*/
|
||||||
|
public synchronized void notifyServer(ServerName sn) {
|
||||||
|
if (LOG.isDebugEnabled()) { LOG.debug("Started processing " + sn); }
|
||||||
|
processing = true;
|
||||||
|
numProcessing++;
|
||||||
|
}
|
||||||
|
|
||||||
public synchronized void finish(ServerName sn) {
|
public synchronized void finish(ServerName sn) {
|
||||||
if (LOG.isDebugEnabled()) LOG.debug("Finished " + sn + "; numProcessing=" + this.numProcessing);
|
numProcessing--;
|
||||||
this.numProcessing--;
|
if (LOG.isDebugEnabled()) LOG.debug("Finished " + sn + "; numProcessing=" + numProcessing);
|
||||||
|
|
||||||
|
assert numProcessing >= 0: "Number of dead servers in processing should always be non-negative";
|
||||||
|
|
||||||
|
if (numProcessing < 0) {
|
||||||
|
LOG.error("Number of dead servers in processing = " + numProcessing
|
||||||
|
+ ". Something went wrong, this should always be non-negative.");
|
||||||
|
numProcessing = 0;
|
||||||
|
}
|
||||||
|
if (numProcessing == 0) { processing = false; }
|
||||||
}
|
}
|
||||||
|
|
||||||
public synchronized int size() {
|
public synchronized int size() {
|
||||||
|
|
|
@ -109,6 +109,11 @@ implements ServerProcedureInterface {
|
||||||
*/
|
*/
|
||||||
private ServerName serverName;
|
private ServerName serverName;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether DeadServer knows that we are processing it.
|
||||||
|
*/
|
||||||
|
private boolean notifiedDeadServer = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Regions that were on the crashed server.
|
* Regions that were on the crashed server.
|
||||||
*/
|
*/
|
||||||
|
@ -183,6 +188,13 @@ implements ServerProcedureInterface {
|
||||||
if (!services.getAssignmentManager().isFailoverCleanupDone()) {
|
if (!services.getAssignmentManager().isFailoverCleanupDone()) {
|
||||||
throwProcedureYieldException("Waiting on master failover to complete");
|
throwProcedureYieldException("Waiting on master failover to complete");
|
||||||
}
|
}
|
||||||
|
// HBASE-14802
|
||||||
|
// If we have not yet notified that we are processing a dead server, we should do now.
|
||||||
|
if (!notifiedDeadServer) {
|
||||||
|
services.getServerManager().getDeadServers().notifyServer(serverName);
|
||||||
|
notifiedDeadServer = true;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
switch (state) {
|
switch (state) {
|
||||||
case SERVER_CRASH_START:
|
case SERVER_CRASH_START:
|
||||||
|
|
|
@ -17,13 +17,19 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hbase.master;
|
package org.apache.hadoop.hbase.master;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
|
import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
|
||||||
|
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
|
||||||
|
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
|
||||||
import org.apache.hadoop.hbase.testclassification.MasterTests;
|
import org.apache.hadoop.hbase.testclassification.MasterTests;
|
||||||
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||||
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
|
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
|
||||||
import org.apache.hadoop.hbase.util.ManualEnvironmentEdge;
|
import org.apache.hadoop.hbase.util.ManualEnvironmentEdge;
|
||||||
import org.apache.hadoop.hbase.util.Pair;
|
import org.apache.hadoop.hbase.util.Pair;
|
||||||
|
import org.junit.AfterClass;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.experimental.categories.Category;
|
import org.junit.experimental.categories.Category;
|
||||||
|
|
||||||
|
@ -35,24 +41,39 @@ import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
@Category({MasterTests.class, MediumTests.class})
|
@Category({MasterTests.class, MediumTests.class})
|
||||||
public class TestDeadServer {
|
public class TestDeadServer {
|
||||||
|
private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
|
||||||
|
|
||||||
final ServerName hostname123 = ServerName.valueOf("127.0.0.1", 123, 3L);
|
final ServerName hostname123 = ServerName.valueOf("127.0.0.1", 123, 3L);
|
||||||
final ServerName hostname123_2 = ServerName.valueOf("127.0.0.1", 123, 4L);
|
final ServerName hostname123_2 = ServerName.valueOf("127.0.0.1", 123, 4L);
|
||||||
final ServerName hostname1234 = ServerName.valueOf("127.0.0.2", 1234, 4L);
|
final ServerName hostname1234 = ServerName.valueOf("127.0.0.2", 1234, 4L);
|
||||||
final ServerName hostname12345 = ServerName.valueOf("127.0.0.2", 12345, 4L);
|
final ServerName hostname12345 = ServerName.valueOf("127.0.0.2", 12345, 4L);
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setupBeforeClass() throws Exception {
|
||||||
|
TEST_UTIL.startMiniCluster();
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void tearDownAfterClass() throws Exception {
|
||||||
|
TEST_UTIL.shutdownMiniCluster();
|
||||||
|
}
|
||||||
|
|
||||||
@Test public void testIsDead() {
|
@Test public void testIsDead() {
|
||||||
DeadServer ds = new DeadServer();
|
DeadServer ds = new DeadServer();
|
||||||
ds.add(hostname123);
|
ds.add(hostname123);
|
||||||
|
ds.notifyServer(hostname123);
|
||||||
assertTrue(ds.areDeadServersInProgress());
|
assertTrue(ds.areDeadServersInProgress());
|
||||||
ds.finish(hostname123);
|
ds.finish(hostname123);
|
||||||
assertFalse(ds.areDeadServersInProgress());
|
assertFalse(ds.areDeadServersInProgress());
|
||||||
|
|
||||||
ds.add(hostname1234);
|
ds.add(hostname1234);
|
||||||
|
ds.notifyServer(hostname1234);
|
||||||
assertTrue(ds.areDeadServersInProgress());
|
assertTrue(ds.areDeadServersInProgress());
|
||||||
ds.finish(hostname1234);
|
ds.finish(hostname1234);
|
||||||
assertFalse(ds.areDeadServersInProgress());
|
assertFalse(ds.areDeadServersInProgress());
|
||||||
|
|
||||||
ds.add(hostname12345);
|
ds.add(hostname12345);
|
||||||
|
ds.notifyServer(hostname12345);
|
||||||
assertTrue(ds.areDeadServersInProgress());
|
assertTrue(ds.areDeadServersInProgress());
|
||||||
ds.finish(hostname12345);
|
ds.finish(hostname12345);
|
||||||
assertFalse(ds.areDeadServersInProgress());
|
assertFalse(ds.areDeadServersInProgress());
|
||||||
|
@ -75,6 +96,16 @@ public class TestDeadServer {
|
||||||
assertFalse(ds.cleanPreviousInstance(deadServerHostComingAlive));
|
assertFalse(ds.cleanPreviousInstance(deadServerHostComingAlive));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 15000)
|
||||||
|
public void testCrashProcedureReplay() {
|
||||||
|
HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
|
||||||
|
ProcedureExecutor pExecutor = master.getMasterProcedureExecutor();
|
||||||
|
ServerCrashProcedure proc = new ServerCrashProcedure(hostname123, false, false);
|
||||||
|
|
||||||
|
ProcedureTestingUtility.submitAndWait(pExecutor, proc);
|
||||||
|
|
||||||
|
assertFalse(master.getServerManager().getDeadServers().areDeadServersInProgress());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSortExtract(){
|
public void testSortExtract(){
|
||||||
|
|
Loading…
Reference in New Issue