HBASE-21330 ReopenTableRegionsProcedure will enter an infinite loop if we schedule a TRSP at the same time
This commit is contained in:
parent
3a75505cf2
commit
5efa5f6de4
|
@ -322,7 +322,8 @@ public abstract class Procedure<TEnvironment> implements Comparable<Procedure<TE
|
|||
* @see #holdLock(Object)
|
||||
* @return true if the procedure has the lock, false otherwise.
|
||||
*/
|
||||
protected final boolean hasLock() {
|
||||
@VisibleForTesting
|
||||
public final boolean hasLock() {
|
||||
return locked;
|
||||
}
|
||||
|
||||
|
|
|
@ -70,6 +70,18 @@ public class ReopenTableRegionsProcedure
|
|||
return TableOperationType.REGION_EDIT;
|
||||
}
|
||||
|
||||
private boolean canSchedule(MasterProcedureEnv env, HRegionLocation loc) {
|
||||
if (loc.getSeqNum() < 0) {
|
||||
return false;
|
||||
}
|
||||
RegionStateNode regionNode =
|
||||
env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion());
|
||||
// If the region node is null, then at least in the next round we can remove this region to make
|
||||
// progress. And the second condition is a normal one, if there are no TRSP with it then we can
|
||||
// schedule one to make progress.
|
||||
return regionNode == null || !regionNode.isInTransition();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState state)
|
||||
throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
|
||||
|
@ -85,8 +97,12 @@ public class ReopenTableRegionsProcedure
|
|||
return Flow.HAS_MORE_STATE;
|
||||
case REOPEN_TABLE_REGIONS_REOPEN_REGIONS:
|
||||
for (HRegionLocation loc : regions) {
|
||||
RegionStateNode regionNode = env.getAssignmentManager().getRegionStates()
|
||||
.getOrCreateRegionStateNode(loc.getRegion());
|
||||
RegionStateNode regionNode =
|
||||
env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion());
|
||||
// this possible, maybe the region has already been merged or split, see HBASE-20921
|
||||
if (regionNode == null) {
|
||||
continue;
|
||||
}
|
||||
TransitRegionStateProcedure proc;
|
||||
regionNode.lock();
|
||||
try {
|
||||
|
@ -108,13 +124,13 @@ public class ReopenTableRegionsProcedure
|
|||
if (regions.isEmpty()) {
|
||||
return Flow.NO_MORE_STATE;
|
||||
}
|
||||
if (regions.stream().anyMatch(l -> l.getSeqNum() >= 0)) {
|
||||
if (regions.stream().anyMatch(loc -> canSchedule(env, loc))) {
|
||||
attempt = 0;
|
||||
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
|
||||
return Flow.HAS_MORE_STATE;
|
||||
}
|
||||
// All the regions need to reopen are in OPENING state which means we can not schedule any
|
||||
// MRPs.
|
||||
// We can not schedule TRSP for all the regions need to reopen, wait for a while and retry
|
||||
// again.
|
||||
long backoff = ProcedureUtil.getBackoffTimeMs(this.attempt++);
|
||||
LOG.info(
|
||||
"There are still {} region(s) which need to be reopened for table {} are in " +
|
||||
|
@ -138,6 +154,7 @@ public class ReopenTableRegionsProcedure
|
|||
env.getProcedureScheduler().addFront(this);
|
||||
return false; // 'false' means that this procedure handled the timeout
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void rollbackState(MasterProcedureEnv env, ReopenTableRegionsState state)
|
||||
throws IOException, InterruptedException {
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hbase.master.procedure;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.master.HMaster;
|
||||
import org.apache.hadoop.hbase.master.ServerManager;
|
||||
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
|
||||
import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
|
||||
import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
|
||||
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
|
||||
import org.apache.hadoop.hbase.testclassification.MasterTests;
|
||||
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.ClassRule;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
|
||||
/**
|
||||
* Testcase for HBASE-21330.
|
||||
*/
|
||||
@Category({ MasterTests.class, MediumTests.class })
|
||||
public class TestReopenTableRegionsProcedureInfiniteLoop {
|
||||
|
||||
@ClassRule
|
||||
public static final HBaseClassTestRule CLASS_RULE =
|
||||
HBaseClassTestRule.forClass(TestReopenTableRegionsProcedureInfiniteLoop.class);
|
||||
|
||||
private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
|
||||
|
||||
private static TableName TABLE_NAME = TableName.valueOf("InfiniteLoop");
|
||||
|
||||
private static byte[] CF = Bytes.toBytes("cf");
|
||||
|
||||
@BeforeClass
|
||||
public static void setUp() throws Exception {
|
||||
UTIL.getConfiguration().setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
|
||||
UTIL.startMiniCluster(1);
|
||||
UTIL.createTable(TABLE_NAME, CF);
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void tearDown() throws Exception {
|
||||
UTIL.shutdownMiniCluster();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testInfiniteLoop() throws IOException {
|
||||
HMaster master = UTIL.getMiniHBaseCluster().getMaster();
|
||||
AssignmentManager am = master.getAssignmentManager();
|
||||
ProcedureExecutor<MasterProcedureEnv> exec = master.getMasterProcedureExecutor();
|
||||
RegionInfo regionInfo = UTIL.getAdmin().getRegions(TABLE_NAME).get(0);
|
||||
RegionStateNode regionNode = am.getRegionStates().getRegionStateNode(regionInfo);
|
||||
long procId;
|
||||
ReopenTableRegionsProcedure proc = new ReopenTableRegionsProcedure(TABLE_NAME);
|
||||
regionNode.lock();
|
||||
try {
|
||||
procId = exec.submitProcedure(proc);
|
||||
UTIL.waitFor(30000, () -> proc.hasLock());
|
||||
TransitRegionStateProcedure trsp =
|
||||
TransitRegionStateProcedure.reopen(exec.getEnvironment(), regionInfo);
|
||||
regionNode.setProcedure(trsp);
|
||||
exec.submitProcedure(trsp);
|
||||
} finally {
|
||||
regionNode.unlock();
|
||||
}
|
||||
UTIL.waitFor(60000, () -> exec.isFinished(procId));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue