HBASE-22128 Move namespace region then master crashed make deadlock
Signed-off-by: Allan Yang <allan163@apache.org>
This commit is contained in:
parent
11e84f9c68
commit
83668c78e3
|
@ -191,4 +191,22 @@ public class MoveRegionProcedure extends AbstractStateMachineRegionProcedure<Mov
|
||||||
ProtobufUtil.toServerName(state.getDestinationServer()) : null;
|
ProtobufUtil.toServerName(state.getDestinationServer()) : null;
|
||||||
this.plan = new RegionPlan(regionInfo, sourceServer, destinationServer);
|
this.plan = new RegionPlan(regionInfo, sourceServer, destinationServer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean waitInitialized(MasterProcedureEnv env) {
|
||||||
|
|
||||||
|
if (TableName.isMetaTableName(getTableName())) {
|
||||||
|
// only offline state master will try init meta procedure
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (getTableName().equals(TableName.NAMESPACE_TABLE_NAME)) {
|
||||||
|
// after unassign procedure finished, namespace region will be offline
|
||||||
|
// if master crashed at the same time and reboot
|
||||||
|
// it will be stuck as master init is block by waiting namespace table online
|
||||||
|
// but move region procedure can not go on, break the deadlock by not wait master initialized
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return super.waitInitialized(env);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,178 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.master.assignment;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.MoveRegionState.MOVE_REGION_ASSIGN;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.CountDownLatch;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||||
|
import org.apache.hadoop.hbase.HBaseIOException;
|
||||||
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
|
import org.apache.hadoop.hbase.HRegionLocation;
|
||||||
|
import org.apache.hadoop.hbase.MiniHBaseCluster;
|
||||||
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
|
import org.apache.hadoop.hbase.TableName;
|
||||||
|
import org.apache.hadoop.hbase.client.ClusterConnection;
|
||||||
|
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||||
|
import org.apache.hadoop.hbase.master.HMaster;
|
||||||
|
import org.apache.hadoop.hbase.master.RegionPlan;
|
||||||
|
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
|
||||||
|
import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.LargeTests;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.ClassRule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.experimental.categories.Category;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
|
||||||
|
|
||||||
|
@Category({LargeTests.class})
|
||||||
|
public class TestMoveSystemTableWithStopMaster {
|
||||||
|
|
||||||
|
private static final Logger LOG =
|
||||||
|
LoggerFactory.getLogger(TestMoveSystemTableWithStopMaster.class);
|
||||||
|
|
||||||
|
@ClassRule
|
||||||
|
public static final HBaseClassTestRule CLASS_RULE =
|
||||||
|
HBaseClassTestRule.forClass(TestMoveSystemTableWithStopMaster.class);
|
||||||
|
|
||||||
|
private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUp() throws Exception {
|
||||||
|
UTIL.startMiniCluster(1, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void tearDown() throws Exception {
|
||||||
|
UTIL.shutdownMiniCluster();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMoveMetaRegoinWithStopMaster() throws Exception {
|
||||||
|
ClusterConnection conn = (ClusterConnection) UTIL.getConnection();
|
||||||
|
MiniHBaseCluster miniHBaseCluster = UTIL.getHBaseCluster();
|
||||||
|
|
||||||
|
List<HRegionLocation> namespaceRegionLocations = conn.locateRegions(TableName.META_TABLE_NAME);
|
||||||
|
|
||||||
|
RegionInfo regionInfo = namespaceRegionLocations.get(0).getRegion();
|
||||||
|
ServerName source = namespaceRegionLocations.get(0).getServerName();
|
||||||
|
ServerName dstServerName = UTIL.getOtherRegionServer(
|
||||||
|
miniHBaseCluster.getRegionServer(source)).getServerName();
|
||||||
|
|
||||||
|
RegionPlan rp = new RegionPlan(regionInfo, source, dstServerName);
|
||||||
|
|
||||||
|
HMaster master = UTIL.getHBaseCluster().getMaster();
|
||||||
|
|
||||||
|
CountDownLatch moveRegionAssignLatch = new CountDownLatch(1);
|
||||||
|
CountDownLatch masterAbortLatch = new CountDownLatch(1);
|
||||||
|
|
||||||
|
MoveRegionProcedureHoldBeforeAssign proc = new MoveRegionProcedureHoldBeforeAssign(
|
||||||
|
master.getMasterProcedureExecutor().getEnvironment(), rp, true);
|
||||||
|
|
||||||
|
proc.moveRegionAssignLatch = moveRegionAssignLatch;
|
||||||
|
proc.masterStoppedLatch = masterAbortLatch;
|
||||||
|
|
||||||
|
ProcedureSyncWait.submitProcedure(master.getMasterProcedureExecutor(), proc);
|
||||||
|
|
||||||
|
moveRegionAssignLatch.await();
|
||||||
|
master.abort("for test");
|
||||||
|
// may not closed, and rs still conn to old master
|
||||||
|
master.getEventLoopGroupConfig().group().shutdownGracefully();
|
||||||
|
miniHBaseCluster.waitForMasterToStop(master.getServerName(), 30000);
|
||||||
|
masterAbortLatch.countDown();
|
||||||
|
|
||||||
|
UTIL.getMiniHBaseCluster().startMaster();
|
||||||
|
|
||||||
|
// master should be initialized in 30 seconds
|
||||||
|
Assert.assertTrue(miniHBaseCluster.waitForActiveAndReadyMaster(60000));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMoveNamespaceRegoinWithStopMaster() throws Exception {
|
||||||
|
ClusterConnection conn = (ClusterConnection) UTIL.getConnection();
|
||||||
|
MiniHBaseCluster miniHBaseCluster = UTIL.getHBaseCluster();
|
||||||
|
|
||||||
|
List<HRegionLocation> namespaceRegionLocations = conn.locateRegions(
|
||||||
|
TableName.NAMESPACE_TABLE_NAME);
|
||||||
|
|
||||||
|
RegionInfo regionInfo = namespaceRegionLocations.get(0).getRegion();
|
||||||
|
ServerName source = namespaceRegionLocations.get(0).getServerName();
|
||||||
|
ServerName dstServerName = UTIL.getOtherRegionServer(
|
||||||
|
miniHBaseCluster.getRegionServer(source)).getServerName();
|
||||||
|
|
||||||
|
RegionPlan rp = new RegionPlan(regionInfo, source, dstServerName);
|
||||||
|
|
||||||
|
HMaster master = UTIL.getHBaseCluster().getMaster();
|
||||||
|
|
||||||
|
CountDownLatch moveRegionAssignLatch = new CountDownLatch(1);
|
||||||
|
CountDownLatch masterAbortLatch = new CountDownLatch(1);
|
||||||
|
|
||||||
|
MoveRegionProcedureHoldBeforeAssign proc = new MoveRegionProcedureHoldBeforeAssign(
|
||||||
|
master.getMasterProcedureExecutor().getEnvironment(), rp, true);
|
||||||
|
|
||||||
|
proc.moveRegionAssignLatch = moveRegionAssignLatch;
|
||||||
|
proc.masterStoppedLatch = masterAbortLatch;
|
||||||
|
|
||||||
|
ProcedureSyncWait.submitProcedure(master.getMasterProcedureExecutor(), proc);
|
||||||
|
|
||||||
|
moveRegionAssignLatch.await();
|
||||||
|
master.abort("for test");
|
||||||
|
miniHBaseCluster.waitForMasterToStop(master.getServerName(), 30000);
|
||||||
|
masterAbortLatch.countDown();
|
||||||
|
|
||||||
|
UTIL.getMiniHBaseCluster().startMaster();
|
||||||
|
|
||||||
|
// master should be initialized in 60 seconds
|
||||||
|
Assert.assertTrue(miniHBaseCluster.waitForActiveAndReadyMaster(60000));
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class MoveRegionProcedureHoldBeforeAssign extends MoveRegionProcedure {
|
||||||
|
public MoveRegionProcedureHoldBeforeAssign() {
|
||||||
|
// Required by the Procedure framework to create the procedure on replay
|
||||||
|
super();
|
||||||
|
}
|
||||||
|
|
||||||
|
CountDownLatch masterStoppedLatch;
|
||||||
|
|
||||||
|
CountDownLatch moveRegionAssignLatch;
|
||||||
|
|
||||||
|
public MoveRegionProcedureHoldBeforeAssign(MasterProcedureEnv env,
|
||||||
|
RegionPlan plan, boolean check) throws HBaseIOException {
|
||||||
|
super(env, plan, check);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Flow executeFromState(final MasterProcedureEnv env,
|
||||||
|
final MasterProcedureProtos.MoveRegionState state) throws InterruptedException {
|
||||||
|
if (state == MOVE_REGION_ASSIGN) {
|
||||||
|
if (moveRegionAssignLatch != null) {
|
||||||
|
moveRegionAssignLatch.countDown();
|
||||||
|
masterStoppedLatch.await();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return super.executeFromState(env, state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue