HBASE-13635 Regions stuck in transition because master is incorrectly assumed dead

Summary:
Requests to tell master that meta is moved can be blocked behind other requests that are trying to mutate meta.
This causes a dead lock and the master is assumed dead. However the master stays up the whole time.

This patch adds prioritization onto reporting meta moves. It should allow meta to make progress.

Test Plan: unit tests.

Differential Revision: https://reviews.facebook.net/D38109
This commit is contained in:
Elliott Clark 2015-05-06 11:27:33 -07:00
parent 174632111c
commit c3f83a9eff
3 changed files with 76 additions and 0 deletions

View File

@ -187,6 +187,9 @@ public class RWQueueRpcExecutor extends RpcExecutor {
if (methodName.equalsIgnoreCase("mutate")) {
return true;
}
if (methodName.equalsIgnoreCase("ReportRegionStateTransition")) {
return true;
}
return false;
}

View File

@ -25,9 +25,14 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.ipc.PriorityFunction;
import org.apache.hadoop.hbase.ipc.QosPriority;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.ReportRegionStateTransitionRequest;
import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos.RegionStateTransition;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CloseRegionRequest;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CompactRegionRequest;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.FlushRegionRequest;
@ -227,6 +232,22 @@ class AnnotationReadingPriorityFunction implements PriorityFunction {
return HConstants.SYSTEMTABLE_QOS;
}
}
// If meta is moving then all the rest of report the report state transitions will be
// blocked. We shouldn't be in the same queue.
if (methodName.equalsIgnoreCase("ReportRegionStateTransition")) { // Regions are moving
ReportRegionStateTransitionRequest tRequest = (ReportRegionStateTransitionRequest) param;
for (RegionStateTransition transition : tRequest.getTransitionList()) {
if (transition.getRegionInfoList() != null) {
for (HBaseProtos.RegionInfo info : transition.getRegionInfoList()) {
TableName tn = ProtobufUtil.toTableName(info.getTableName());
if (tn.isSystemTable()) {
return HConstants.SYSTEMTABLE_QOS;
}
}
}
}
}
return HConstants.NORMAL_QOS;
}

View File

@ -22,10 +22,17 @@ import static org.mockito.Mockito.when;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
import org.apache.hadoop.hbase.protobuf.generated.RegionServerStatusProtos;
import org.apache.hadoop.hbase.testclassification.RegionServerTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MultiRequest;
import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.RequestHeader;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.mockito.Mockito;
@ -53,6 +60,51 @@ public class TestQosFunction {
checkMethod("OpenRegion", HConstants.ADMIN_QOS, qosFunction);
// Check multi works.
checkMethod("Multi", HConstants.NORMAL_QOS, qosFunction, MultiRequest.getDefaultInstance());
}
@Test
public void testRegionInTransition() {
Configuration conf = HBaseConfiguration.create();
RSRpcServices rpcServices = Mockito.mock(RSRpcServices.class);
when(rpcServices.getConfiguration()).thenReturn(conf);
AnnotationReadingPriorityFunction qosFunction =
new AnnotationReadingPriorityFunction(rpcServices, RSRpcServices.class);
// Check ReportRegionInTransition
HBaseProtos.RegionInfo meta_ri = HRegionInfo.convert(HRegionInfo.FIRST_META_REGIONINFO);
HBaseProtos.RegionInfo normal_ri = HRegionInfo.convert(
new HRegionInfo(TableName.valueOf("test:table"),
Bytes.toBytes("a"), Bytes.toBytes("b"), false));
RegionServerStatusProtos.RegionStateTransition metaTransition = RegionServerStatusProtos
.RegionStateTransition.newBuilder()
.addRegionInfo(meta_ri)
.setTransitionCode(RegionServerStatusProtos.RegionStateTransition.TransitionCode.CLOSED)
.build();
RegionServerStatusProtos.RegionStateTransition normalTransition = RegionServerStatusProtos
.RegionStateTransition.newBuilder()
.addRegionInfo(normal_ri)
.setTransitionCode(RegionServerStatusProtos.RegionStateTransition.TransitionCode.CLOSED)
.build();
RegionServerStatusProtos.ReportRegionStateTransitionRequest metaTransitionRequest =
RegionServerStatusProtos.ReportRegionStateTransitionRequest.newBuilder()
.setServer(ProtobufUtil.toServerName(ServerName.valueOf("locahost:60020", 100)))
.addTransition(normalTransition)
.addTransition(metaTransition).build();
RegionServerStatusProtos.ReportRegionStateTransitionRequest normalTransitionRequest =
RegionServerStatusProtos.ReportRegionStateTransitionRequest.newBuilder()
.setServer(ProtobufUtil.toServerName(ServerName.valueOf("locahost:60020", 100)))
.addTransition(normalTransition).build();
final String reportFuncName = "ReportRegionStateTransition";
checkMethod(reportFuncName, HConstants.SYSTEMTABLE_QOS, qosFunction, metaTransitionRequest);
checkMethod(reportFuncName, HConstants.NORMAL_QOS, qosFunction, normalTransitionRequest);
}
private void checkMethod(final String methodName, final int expected,