HBASE-21330 ReopenTableRegionsProcedure will enter an infinite loop if we schedule a TRSP at the same time
This commit is contained in:
parent
3563f48900
commit
d7d3beb6bc
|
@ -322,7 +322,8 @@ public abstract class Procedure<TEnvironment> implements Comparable<Procedure<TE
|
||||||
* @see #holdLock(Object)
|
* @see #holdLock(Object)
|
||||||
* @return true if the procedure has the lock, false otherwise.
|
* @return true if the procedure has the lock, false otherwise.
|
||||||
*/
|
*/
|
||||||
protected final boolean hasLock() {
|
@VisibleForTesting
|
||||||
|
public final boolean hasLock() {
|
||||||
return locked;
|
return locked;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -70,6 +70,18 @@ public class ReopenTableRegionsProcedure
|
||||||
return TableOperationType.REGION_EDIT;
|
return TableOperationType.REGION_EDIT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean canSchedule(MasterProcedureEnv env, HRegionLocation loc) {
|
||||||
|
if (loc.getSeqNum() < 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
RegionStateNode regionNode =
|
||||||
|
env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion());
|
||||||
|
// If the region node is null, then at least in the next round we can remove this region to make
|
||||||
|
// progress. And the second condition is a normal one, if there are no TRSP with it then we can
|
||||||
|
// schedule one to make progress.
|
||||||
|
return regionNode == null || !regionNode.isInTransition();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState state)
|
protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState state)
|
||||||
throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
|
throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
|
||||||
|
@ -85,8 +97,12 @@ public class ReopenTableRegionsProcedure
|
||||||
return Flow.HAS_MORE_STATE;
|
return Flow.HAS_MORE_STATE;
|
||||||
case REOPEN_TABLE_REGIONS_REOPEN_REGIONS:
|
case REOPEN_TABLE_REGIONS_REOPEN_REGIONS:
|
||||||
for (HRegionLocation loc : regions) {
|
for (HRegionLocation loc : regions) {
|
||||||
RegionStateNode regionNode = env.getAssignmentManager().getRegionStates()
|
RegionStateNode regionNode =
|
||||||
.getOrCreateRegionStateNode(loc.getRegion());
|
env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion());
|
||||||
|
// this possible, maybe the region has already been merged or split, see HBASE-20921
|
||||||
|
if (regionNode == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
TransitRegionStateProcedure proc;
|
TransitRegionStateProcedure proc;
|
||||||
regionNode.lock();
|
regionNode.lock();
|
||||||
try {
|
try {
|
||||||
|
@ -108,13 +124,13 @@ public class ReopenTableRegionsProcedure
|
||||||
if (regions.isEmpty()) {
|
if (regions.isEmpty()) {
|
||||||
return Flow.NO_MORE_STATE;
|
return Flow.NO_MORE_STATE;
|
||||||
}
|
}
|
||||||
if (regions.stream().anyMatch(l -> l.getSeqNum() >= 0)) {
|
if (regions.stream().anyMatch(loc -> canSchedule(env, loc))) {
|
||||||
attempt = 0;
|
attempt = 0;
|
||||||
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
|
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
|
||||||
return Flow.HAS_MORE_STATE;
|
return Flow.HAS_MORE_STATE;
|
||||||
}
|
}
|
||||||
// All the regions need to reopen are in OPENING state which means we can not schedule any
|
// We can not schedule TRSP for all the regions need to reopen, wait for a while and retry
|
||||||
// MRPs.
|
// again.
|
||||||
long backoff = ProcedureUtil.getBackoffTimeMs(this.attempt++);
|
long backoff = ProcedureUtil.getBackoffTimeMs(this.attempt++);
|
||||||
LOG.info(
|
LOG.info(
|
||||||
"There are still {} region(s) which need to be reopened for table {} are in " +
|
"There are still {} region(s) which need to be reopened for table {} are in " +
|
||||||
|
@ -138,6 +154,7 @@ public class ReopenTableRegionsProcedure
|
||||||
env.getProcedureScheduler().addFront(this);
|
env.getProcedureScheduler().addFront(this);
|
||||||
return false; // 'false' means that this procedure handled the timeout
|
return false; // 'false' means that this procedure handled the timeout
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void rollbackState(MasterProcedureEnv env, ReopenTableRegionsState state)
|
protected void rollbackState(MasterProcedureEnv env, ReopenTableRegionsState state)
|
||||||
throws IOException, InterruptedException {
|
throws IOException, InterruptedException {
|
||||||
|
|
|
@ -0,0 +1,90 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hbase.master.procedure;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||||
|
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||||
|
import org.apache.hadoop.hbase.TableName;
|
||||||
|
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||||
|
import org.apache.hadoop.hbase.master.HMaster;
|
||||||
|
import org.apache.hadoop.hbase.master.ServerManager;
|
||||||
|
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
|
||||||
|
import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
|
||||||
|
import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
|
||||||
|
import org.apache.hadoop.hbase.procedure2.ProcedureExecutor;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.MasterTests;
|
||||||
|
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||||
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.ClassRule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.experimental.categories.Category;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Testcase for HBASE-21330.
|
||||||
|
*/
|
||||||
|
@Category({ MasterTests.class, MediumTests.class })
|
||||||
|
public class TestReopenTableRegionsProcedureInfiniteLoop {
|
||||||
|
|
||||||
|
@ClassRule
|
||||||
|
public static final HBaseClassTestRule CLASS_RULE =
|
||||||
|
HBaseClassTestRule.forClass(TestReopenTableRegionsProcedureInfiniteLoop.class);
|
||||||
|
|
||||||
|
private static final HBaseTestingUtility UTIL = new HBaseTestingUtility();
|
||||||
|
|
||||||
|
private static TableName TABLE_NAME = TableName.valueOf("InfiniteLoop");
|
||||||
|
|
||||||
|
private static byte[] CF = Bytes.toBytes("cf");
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUp() throws Exception {
|
||||||
|
UTIL.getConfiguration().setInt(ServerManager.WAIT_ON_REGIONSERVERS_MINTOSTART, 1);
|
||||||
|
UTIL.startMiniCluster(1);
|
||||||
|
UTIL.createTable(TABLE_NAME, CF);
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void tearDown() throws Exception {
|
||||||
|
UTIL.shutdownMiniCluster();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInfiniteLoop() throws IOException {
|
||||||
|
HMaster master = UTIL.getMiniHBaseCluster().getMaster();
|
||||||
|
AssignmentManager am = master.getAssignmentManager();
|
||||||
|
ProcedureExecutor<MasterProcedureEnv> exec = master.getMasterProcedureExecutor();
|
||||||
|
RegionInfo regionInfo = UTIL.getAdmin().getRegions(TABLE_NAME).get(0);
|
||||||
|
RegionStateNode regionNode = am.getRegionStates().getRegionStateNode(regionInfo);
|
||||||
|
long procId;
|
||||||
|
ReopenTableRegionsProcedure proc = new ReopenTableRegionsProcedure(TABLE_NAME);
|
||||||
|
regionNode.lock();
|
||||||
|
try {
|
||||||
|
procId = exec.submitProcedure(proc);
|
||||||
|
UTIL.waitFor(30000, () -> proc.hasLock());
|
||||||
|
TransitRegionStateProcedure trsp =
|
||||||
|
TransitRegionStateProcedure.reopen(exec.getEnvironment(), regionInfo);
|
||||||
|
regionNode.setProcedure(trsp);
|
||||||
|
exec.submitProcedure(trsp);
|
||||||
|
} finally {
|
||||||
|
regionNode.unlock();
|
||||||
|
}
|
||||||
|
UTIL.waitFor(60000, () -> exec.isFinished(procId));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue