HBASE-21144 AssignmentManager.waitForAssignment is not stable
This commit is contained in:
parent
83131b1ac4
commit
f504c4d797
|
@ -36,7 +36,6 @@ import org.apache.hadoop.hbase.DoNotRetryIOException;
|
|||
import org.apache.hadoop.hbase.HBaseIOException;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.PleaseHoldException;
|
||||
import org.apache.hadoop.hbase.RegionException;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.UnknownRegionException;
|
||||
|
@ -52,7 +51,6 @@ import org.apache.hadoop.hbase.favored.FavoredNodesPromoter;
|
|||
import org.apache.hadoop.hbase.master.LoadBalancer;
|
||||
import org.apache.hadoop.hbase.master.MasterServices;
|
||||
import org.apache.hadoop.hbase.master.MetricsAssignmentManager;
|
||||
import org.apache.hadoop.hbase.master.NoSuchProcedureException;
|
||||
import org.apache.hadoop.hbase.master.RegionPlan;
|
||||
import org.apache.hadoop.hbase.master.RegionState;
|
||||
import org.apache.hadoop.hbase.master.RegionState.State;
|
||||
|
@ -612,46 +610,6 @@ public class AssignmentManager implements ServerListener {
|
|||
return ProcedureSyncWait.submitProcedure(master.getMasterProcedureExecutor(), proc);
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public boolean waitForAssignment(final RegionInfo regionInfo) throws IOException {
|
||||
return waitForAssignment(regionInfo, Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
// TODO: Remove this?
|
||||
public boolean waitForAssignment(final RegionInfo regionInfo, final long timeout)
|
||||
throws IOException {
|
||||
RegionStateNode node = null;
|
||||
// This method can be called before the regionInfo has made it into the regionStateMap
|
||||
// so wait around here a while.
|
||||
long startTime = System.currentTimeMillis();
|
||||
// Something badly wrong if takes ten seconds to register a region.
|
||||
long endTime = startTime + 10000;
|
||||
while ((node = regionStates.getRegionStateNode(regionInfo)) == null && isRunning() &&
|
||||
System.currentTimeMillis() < endTime) {
|
||||
// Presume it not yet added but will be added soon. Let it spew a lot so we can tell if
|
||||
// we are waiting here alot.
|
||||
LOG.debug("Waiting on " + regionInfo + " to be added to regionStateMap");
|
||||
Threads.sleep(10);
|
||||
}
|
||||
if (node == null) {
|
||||
if (!isRunning()) {
|
||||
return false;
|
||||
}
|
||||
throw new RegionException(
|
||||
regionInfo.getRegionNameAsString() + " never registered with Assigment.");
|
||||
}
|
||||
|
||||
TransitRegionStateProcedure proc = node.getProcedure();
|
||||
if (proc == null) {
|
||||
throw new NoSuchProcedureException(node.toString());
|
||||
}
|
||||
|
||||
ProcedureSyncWait.waitForProcedureToCompleteIOE(master.getMasterProcedureExecutor(), proc,
|
||||
timeout);
|
||||
return true;
|
||||
}
|
||||
|
||||
// ============================================================================================
|
||||
// RegionTransition procedures helpers
|
||||
// ============================================================================================
|
||||
|
@ -1904,4 +1862,9 @@ public class AssignmentManager implements ServerListener {
|
|||
private void killRegionServer(final ServerStateNode serverNode) {
|
||||
master.getServerManager().expireServer(serverNode.getServerName());
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
MasterServices getMaster() {
|
||||
return master;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -101,6 +101,7 @@ import org.apache.hadoop.hbase.master.HMaster;
|
|||
import org.apache.hadoop.hbase.master.RegionState;
|
||||
import org.apache.hadoop.hbase.master.ServerManager;
|
||||
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
|
||||
import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
|
||||
import org.apache.hadoop.hbase.master.assignment.RegionStateStore;
|
||||
import org.apache.hadoop.hbase.master.assignment.RegionStates;
|
||||
import org.apache.hadoop.hbase.regionserver.BloomType;
|
||||
|
@ -3358,17 +3359,15 @@ public class HBaseTestingUtility extends HBaseZKTestingUtility {
|
|||
}
|
||||
|
||||
/**
|
||||
* Uses directly the assignment manager to assign the region.
|
||||
* and waits until the specified region has completed assignment.
|
||||
* @throws IOException
|
||||
* @throw InterruptedException
|
||||
* Uses directly the assignment manager to assign the region. and waits until the specified region
|
||||
* has completed assignment.
|
||||
* @return true if the region is assigned false otherwise.
|
||||
*/
|
||||
public boolean assignRegion(final RegionInfo regionInfo)
|
||||
throws IOException, InterruptedException {
|
||||
final AssignmentManager am = getHBaseCluster().getMaster().getAssignmentManager();
|
||||
am.assign(regionInfo);
|
||||
return am.waitForAssignment(regionInfo);
|
||||
return AssignmentTestingUtil.waitForAssignment(am, regionInfo);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -37,7 +37,6 @@ import org.apache.hadoop.hbase.HBaseClassTestRule;
|
|||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.master.HMaster;
|
||||
import org.apache.hadoop.hbase.master.NoSuchProcedureException;
|
||||
import org.apache.hadoop.hbase.master.RegionState;
|
||||
import org.apache.hadoop.hbase.master.ServerManager;
|
||||
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
|
||||
|
@ -92,22 +91,12 @@ public class TestAsyncRegionAdminApi extends TestAsyncAdminBase {
|
|||
// Expected
|
||||
assertThat(e.getCause(), instanceOf(DoNotRetryRegionException.class));
|
||||
}
|
||||
try {
|
||||
am.waitForAssignment(hri);
|
||||
fail("Expected NoSuchProcedureException");
|
||||
} catch (NoSuchProcedureException e) {
|
||||
// Expected
|
||||
}
|
||||
assertFalse(am.getRegionStates().getRegionStateNode(hri).isInTransition());
|
||||
assertTrue(regionStates.getRegionState(hri).isOpened());
|
||||
|
||||
// unassign region
|
||||
admin.unassign(hri.getRegionName(), true).get();
|
||||
try {
|
||||
am.waitForAssignment(hri);
|
||||
fail("Expected NoSuchProcedureException");
|
||||
} catch (NoSuchProcedureException e) {
|
||||
// Expected
|
||||
}
|
||||
assertFalse(am.getRegionStates().getRegionStateNode(hri).isInTransition());
|
||||
assertTrue(regionStates.getRegionState(hri).isClosed());
|
||||
}
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.client;
|
|||
import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors;
|
||||
import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck;
|
||||
import static org.junit.Assert.assertNotEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
|
@ -46,8 +47,8 @@ import org.apache.hadoop.hbase.ServerName;
|
|||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.TableNotFoundException;
|
||||
import org.apache.hadoop.hbase.Waiter;
|
||||
import org.apache.hadoop.hbase.master.NoSuchProcedureException;
|
||||
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
|
||||
import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
|
||||
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
||||
import org.apache.hadoop.hbase.regionserver.StorefileRefresherChore;
|
||||
import org.apache.hadoop.hbase.testclassification.LargeTests;
|
||||
|
@ -103,18 +104,14 @@ public class TestMetaWithReplicas {
|
|||
getMetaRegionLocation(TEST_UTIL.getZooKeeperWatcher());
|
||||
LOG.info("HBASE:META DEPLOY: on " + hbaseMetaServerName);
|
||||
sns.add(hbaseMetaServerName);
|
||||
for (int replicaId = 1; replicaId < 3; replicaId ++) {
|
||||
RegionInfo h =
|
||||
RegionReplicaUtil.getRegionInfoForReplica(RegionInfoBuilder.FIRST_META_REGIONINFO,
|
||||
replicaId);
|
||||
try {
|
||||
am.waitForAssignment(h);
|
||||
for (int replicaId = 1; replicaId < 3; replicaId++) {
|
||||
RegionInfo h = RegionReplicaUtil
|
||||
.getRegionInfoForReplica(RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId);
|
||||
AssignmentTestingUtil.waitForAssignment(am, h);
|
||||
ServerName sn = am.getRegionStates().getRegionServerOfRegion(h);
|
||||
assertNotNull(sn);
|
||||
LOG.info("HBASE:META DEPLOY: " + h.getRegionNameAsString() + " on " + sn);
|
||||
sns.add(sn);
|
||||
} catch (NoSuchProcedureException e) {
|
||||
LOG.info("Presume the procedure has been cleaned up so just proceed: " + e.toString());
|
||||
}
|
||||
}
|
||||
// Fun. All meta region replicas have ended up on the one server. This will cause this test
|
||||
// to fail ... sometimes.
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
package org.apache.hadoop.hbase.client;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.hadoop.hbase.HBaseClassTestRule;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
|
@ -28,7 +27,7 @@ import org.apache.hadoop.hbase.ServerName;
|
|||
import org.apache.hadoop.hbase.StartMiniClusterOption;
|
||||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.master.HMaster;
|
||||
import org.apache.hadoop.hbase.master.NoSuchProcedureException;
|
||||
import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||
import org.apache.hadoop.hbase.testclassification.MediumTests;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
|
@ -42,7 +41,6 @@ import org.junit.Rule;
|
|||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
import org.junit.rules.TestName;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -95,7 +93,7 @@ public class TestSeparateClientZKCluster {
|
|||
FileUtils.deleteDirectory(clientZkDir);
|
||||
}
|
||||
|
||||
@Test(timeout = 60000)
|
||||
@Test
|
||||
public void testBasicOperation() throws Exception {
|
||||
TableName tn = TableName.valueOf(name.getMethodName());
|
||||
// create table
|
||||
|
@ -122,7 +120,7 @@ public class TestSeparateClientZKCluster {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 60000)
|
||||
@Test
|
||||
public void testMasterSwitch() throws Exception {
|
||||
// get an admin instance and issue some request first
|
||||
Connection conn = TEST_UTIL.getConnection();
|
||||
|
@ -146,7 +144,7 @@ public class TestSeparateClientZKCluster {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 60000)
|
||||
@Test
|
||||
public void testMetaRegionMove() throws Exception {
|
||||
TableName tn = TableName.valueOf(name.getMethodName());
|
||||
// create table
|
||||
|
@ -202,7 +200,7 @@ public class TestSeparateClientZKCluster {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 120000)
|
||||
@Test
|
||||
public void testMetaMoveDuringClientZkClusterRestart() throws Exception {
|
||||
TableName tn = TableName.valueOf(name.getMethodName());
|
||||
// create table
|
||||
|
@ -233,12 +231,8 @@ public class TestSeparateClientZKCluster {
|
|||
Thread.sleep(200);
|
||||
}
|
||||
// wait for meta region online
|
||||
try {
|
||||
cluster.getMaster().getAssignmentManager()
|
||||
.waitForAssignment(RegionInfoBuilder.FIRST_META_REGIONINFO);
|
||||
} catch (NoSuchProcedureException e) {
|
||||
// we don't need to take any further action
|
||||
}
|
||||
AssignmentTestingUtil.waitForAssignment(cluster.getMaster().getAssignmentManager(),
|
||||
RegionInfoBuilder.FIRST_META_REGIONINFO);
|
||||
// wait some long time to make sure we will retry sync data to client ZK until data set
|
||||
Thread.sleep(10000);
|
||||
clientZkCluster.startup(clientZkDir);
|
||||
|
@ -253,7 +247,7 @@ public class TestSeparateClientZKCluster {
|
|||
}
|
||||
}
|
||||
|
||||
@Test(timeout = 60000)
|
||||
@Test
|
||||
public void testAsyncTable() throws Exception {
|
||||
TableName tn = TableName.valueOf(name.getMethodName());
|
||||
ColumnFamilyDescriptorBuilder cfDescBuilder = ColumnFamilyDescriptorBuilder.newBuilder(family);
|
||||
|
|
|
@ -18,14 +18,18 @@
|
|||
package org.apache.hadoop.hbase.master.assignment;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.Waiter;
|
||||
import org.apache.hadoop.hbase.Waiter.ExplainingPredicate;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.master.HMaster;
|
||||
import org.apache.hadoop.hbase.master.RegionState.State;
|
||||
import org.apache.hadoop.hbase.master.procedure.ProcedureSyncWait;
|
||||
import org.apache.hadoop.hbase.util.Threads;
|
||||
import org.apache.yetus.audience.InterfaceAudience;
|
||||
import org.apache.yetus.audience.InterfaceStability;
|
||||
|
@ -34,7 +38,7 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
@InterfaceAudience.Private
|
||||
@InterfaceStability.Evolving
|
||||
public abstract class AssignmentTestingUtil {
|
||||
public final class AssignmentTestingUtil {
|
||||
private static final Logger LOG = LoggerFactory.getLogger(AssignmentTestingUtil.class);
|
||||
|
||||
private AssignmentTestingUtil() {}
|
||||
|
@ -122,4 +126,30 @@ public abstract class AssignmentTestingUtil {
|
|||
private static HMaster getMaster(final HBaseTestingUtility util) {
|
||||
return util.getMiniHBaseCluster().getMaster();
|
||||
}
|
||||
|
||||
public static boolean waitForAssignment(AssignmentManager am, RegionInfo regionInfo)
|
||||
throws IOException {
|
||||
// This method can be called before the regionInfo has made it into the regionStateMap
|
||||
// so wait around here a while.
|
||||
Waiter.waitFor(am.getConfiguration(), 10000,
|
||||
() -> am.getRegionStates().getRegionStateNode(regionInfo) != null);
|
||||
RegionStateNode regionNode = am.getRegionStates().getRegionStateNode(regionInfo);
|
||||
// Wait until the region has already been open, or we have a TRSP along with it.
|
||||
Waiter.waitFor(am.getConfiguration(), 30000,
|
||||
() -> regionNode.isInState(State.OPEN) || regionNode.isInTransition());
|
||||
TransitRegionStateProcedure proc = regionNode.getProcedure();
|
||||
regionNode.lock();
|
||||
try {
|
||||
if (regionNode.isInState(State.OPEN)) {
|
||||
return true;
|
||||
}
|
||||
proc = regionNode.getProcedure();
|
||||
} finally {
|
||||
regionNode.unlock();
|
||||
}
|
||||
assertNotNull(proc);
|
||||
ProcedureSyncWait.waitForProcedureToCompleteIOE(am.getMaster().getMasterProcedureExecutor(),
|
||||
proc, 5L * 60 * 1000);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -75,10 +75,10 @@ import org.apache.hadoop.hbase.coprocessor.ObserverContext;
|
|||
import org.apache.hadoop.hbase.master.HMaster;
|
||||
import org.apache.hadoop.hbase.master.LoadBalancer;
|
||||
import org.apache.hadoop.hbase.master.MasterRpcServices;
|
||||
import org.apache.hadoop.hbase.master.NoSuchProcedureException;
|
||||
import org.apache.hadoop.hbase.master.RegionState;
|
||||
import org.apache.hadoop.hbase.master.RegionState.State;
|
||||
import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
|
||||
import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
|
||||
import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
|
||||
import org.apache.hadoop.hbase.master.assignment.RegionStates;
|
||||
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
|
||||
|
@ -167,11 +167,7 @@ public class TestSplitTransactionOnCluster {
|
|||
throws IOException, InterruptedException {
|
||||
assertEquals(1, regions.size());
|
||||
RegionInfo hri = regions.get(0).getRegionInfo();
|
||||
try {
|
||||
cluster.getMaster().getAssignmentManager().waitForAssignment(hri, 600000);
|
||||
} catch (NoSuchProcedureException e) {
|
||||
LOG.info("Presume the procedure has been cleaned up so just proceed: " + e.toString());
|
||||
}
|
||||
AssignmentTestingUtil.waitForAssignment(cluster.getMaster().getAssignmentManager(), hri);
|
||||
return hri;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue