HBASE-26459 HMaster should move non-meta region only if meta is ONLINE (#3875)
Signed-off-by: Viraj Jasani <vjasani@apache.org>
This commit is contained in:
parent
7192423ae7
commit
c472329460
|
@ -136,6 +136,12 @@ public final class HConstants {
|
|||
/** Default value for the max percent of regions in transition */
|
||||
public static final double DEFAULT_HBASE_MASTER_BALANCER_MAX_RIT_PERCENT = 1.0;
|
||||
|
||||
/** Time in milliseconds to wait meta region assignment, when moving non-meta regions. */
|
||||
public static final String HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT =
|
||||
"hbase.master.waiting.meta.assignment.timeout";
|
||||
|
||||
public static final long HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT_DEFAULT = 10000;
|
||||
|
||||
/** Config for the max balancing time */
|
||||
public static final String HBASE_BALANCER_MAX_BALANCING = "hbase.balancer.max.balancing";
|
||||
|
||||
|
|
|
@ -393,6 +393,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
|
|||
// Cached clusterId on stand by masters to serve clusterID requests from clients.
|
||||
private final CachedClusterId cachedClusterId;
|
||||
|
||||
// Waiting time of non-meta region's moving for meta regions assignment.
|
||||
private final long timeoutWaitMetaRegionAssignment;
|
||||
|
||||
public static class RedirectServlet extends HttpServlet {
|
||||
private static final long serialVersionUID = 2894774810058302473L;
|
||||
private final int regionServerInfoPort;
|
||||
|
@ -498,6 +501,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
|
|||
this.maxBalancingTime = getMaxBalancingTime();
|
||||
this.maxRitPercent = conf.getDouble(HConstants.HBASE_MASTER_BALANCER_MAX_RIT_PERCENT,
|
||||
HConstants.DEFAULT_HBASE_MASTER_BALANCER_MAX_RIT_PERCENT);
|
||||
this.timeoutWaitMetaRegionAssignment =
|
||||
conf.getLong(HConstants.HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT,
|
||||
HConstants.HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT_DEFAULT);
|
||||
|
||||
// Do we publish the status?
|
||||
|
||||
|
@ -1845,12 +1851,20 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
|
|||
// closed
|
||||
serverManager.sendRegionWarmup(rp.getDestination(), hri);
|
||||
|
||||
// Here wait until all the meta regions are not in transition.
|
||||
if (!hri.isMetaRegion() && assignmentManager.getRegionStates().isMetaRegionInTransition()) {
|
||||
Thread.sleep(timeoutWaitMetaRegionAssignment);
|
||||
if (assignmentManager.getRegionStates().isMetaRegionInTransition()) {
|
||||
throw new HBaseIOException("Fail-fast of the region move, " +
|
||||
" because hbase:meta region is still in transition. Failed region move info:" + rp);
|
||||
}
|
||||
}
|
||||
LOG.info(getClientIdAuditPrefix() + " move " + rp + ", running balancer");
|
||||
this.assignmentManager.balance(rp);
|
||||
if (this.cpHost != null) {
|
||||
this.cpHost.postMove(hri, rp.getSource(), rp.getDestination());
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
} catch (IOException | InterruptedException ioe) {
|
||||
if (ioe instanceof HBaseIOException) {
|
||||
throw (HBaseIOException)ioe;
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import java.util.List;
|
|||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.hbase.HBaseIOException;
|
||||
import org.apache.hadoop.hbase.HBaseTestingUtility;
|
||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
|
@ -46,7 +47,9 @@ import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
|
|||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.Pair;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
import org.junit.experimental.categories.Category;
|
||||
|
@ -66,6 +69,9 @@ public class TestMaster {
|
|||
public static void beforeAllTests() throws Exception {
|
||||
// we will retry operations when PleaseHoldException is thrown
|
||||
TEST_UTIL.getConfiguration().setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 3);
|
||||
// Here just set 1 ms for testing.
|
||||
TEST_UTIL.getConfiguration().
|
||||
setLong(HConstants.HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT, 1);
|
||||
// Set hbase.min.version.move.system.tables as version 0 so that
|
||||
// testMoveRegionWhenNotInitialized never fails even if hbase-default has valid default
|
||||
// value present for production use-case.
|
||||
|
@ -188,5 +194,60 @@ public class TestMaster {
|
|||
TEST_UTIL.deleteTable(tableName);
|
||||
}
|
||||
}
|
||||
|
||||
@Test (timeout = 300000)
|
||||
public void testMoveRegionWhenMetaRegionInTransition()
|
||||
throws IOException, InterruptedException, KeeperException {
|
||||
TableName tableName = TableName.valueOf("testMoveRegionWhenMetaRegionInTransition");
|
||||
HMaster master = TEST_UTIL.getMiniHBaseCluster().getMaster();
|
||||
HTableDescriptor htd = new HTableDescriptor(tableName);
|
||||
HColumnDescriptor hcd = new HColumnDescriptor("value");
|
||||
RegionStates regionStates = master.getAssignmentManager().getRegionStates();
|
||||
htd.addFamily(hcd);
|
||||
|
||||
admin.createTable(htd, null);
|
||||
try {
|
||||
HRegionInfo hri = admin.getTableRegions(tableName).get(0);
|
||||
|
||||
HRegionInfo metaRegion = admin.getTableRegions(TableName.META_TABLE_NAME).get(0);
|
||||
|
||||
ServerName rs0 = TEST_UTIL.getHBaseCluster().getRegionServer(0).getServerName();
|
||||
ServerName rs1 = TEST_UTIL.getHBaseCluster().getRegionServer(1).getServerName();
|
||||
|
||||
admin.move(hri.getEncodedNameAsBytes(), rs0.getServerName().getBytes());
|
||||
while (regionStates.isRegionInTransition(hri)) {
|
||||
// Make sure the region is not in transition
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
// Meta region should be in transition
|
||||
master.assignmentManager.unassign(metaRegion);
|
||||
// Then move the region to a new region server.
|
||||
try{
|
||||
master.move(hri.getEncodedNameAsBytes(), rs1.getServerName().getBytes());
|
||||
Assert.fail("Admin move should not be successful here.");
|
||||
} catch (HBaseIOException e) {
|
||||
assertTrue(e.getMessage().contains("Fail-fast"));
|
||||
}
|
||||
// Wait for the movement.
|
||||
Thread.sleep(HConstants.HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT_DEFAULT);
|
||||
// The region should be still on rs0.
|
||||
TEST_UTIL.assertRegionOnServer(hri, rs0, 5000);
|
||||
|
||||
// Wait until the meta region is reassigned.
|
||||
admin.assign(metaRegion.getEncodedNameAsBytes());
|
||||
while (regionStates.isMetaRegionInTransition()) {
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
|
||||
// Try to move region to rs1 once again.
|
||||
admin.move(hri.getEncodedNameAsBytes(), rs1.getServerName().getBytes());
|
||||
|
||||
Thread.sleep(HConstants.HBASE_MASTER_WAITING_META_ASSIGNMENT_TIMEOUT_DEFAULT);
|
||||
// It should be moved to rs1 this time.
|
||||
TEST_UTIL.assertRegionOnServer(hri, rs1, 5000);
|
||||
} finally {
|
||||
TEST_UTIL.deleteTable(tableName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue