HBASE-20202 [AMv2] Don't move region if its a split parent or offlined
M hbase-client/src/main/java/org/apache/hadoop/hbase/client/DoNotRetryRegionException.java M hbase-client/src/main/java/org/apache/hadoop/hbase/exceptions/MergeRegionException.java Allow passing cause to Constructor. M hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto Add prepare step to move procedure. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java Add check that regions to merge are actually online to the Constructor so we can fail fast if they are offline M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java Add prepare step. Check regions and context and skip move if not right. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java Add check parent region is online to constructor. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineTableProcedure.java Add generic check region is online utility function for use by subclasses. M hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMove.java Add test that we fail if we try to move an offlined region.
This commit is contained in:
parent
e0bdc87b27
commit
79d47dd57a
|
@ -37,4 +37,7 @@ public class DoNotRetryRegionException extends DoNotRetryIOException {
|
|||
super(s);
|
||||
}
|
||||
|
||||
public DoNotRetryRegionException(Throwable cause) {
|
||||
super(cause);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,4 +42,8 @@ public class MergeRegionException extends DoNotRetryRegionException {
|
|||
public MergeRegionException(String s) {
|
||||
super(s);
|
||||
}
|
||||
|
||||
public MergeRegionException(Throwable cause) {
|
||||
super(cause);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -338,6 +338,7 @@ message UnassignRegionStateData {
|
|||
}
|
||||
|
||||
enum MoveRegionState {
|
||||
MOVE_REGION_PREPARE = 0;
|
||||
MOVE_REGION_UNASSIGN = 1;
|
||||
MOVE_REGION_ASSIGN = 2;
|
||||
}
|
||||
|
|
|
@ -2399,7 +2399,7 @@ public class HMaster extends HRegionServer implements MasterServices {
|
|||
checkTableExists(tableName);
|
||||
TableState ts = getTableStateManager().getTableState(tableName);
|
||||
if (!ts.isDisabled()) {
|
||||
throw new TableNotDisabledException("Not DISABLE tableState=" + ts);
|
||||
throw new TableNotDisabledException("Not DISABLED; " + ts);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.ServerName;
|
|||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.UnknownRegionException;
|
||||
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
|
||||
import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
|
||||
import org.apache.hadoop.hbase.client.MasterSwitchType;
|
||||
import org.apache.hadoop.hbase.client.Mutation;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
|
@ -108,8 +109,8 @@ public class MergeTableRegionsProcedure
|
|||
super(env);
|
||||
|
||||
// Check daughter regions and make sure that we have valid daughter regions
|
||||
// before doing the real work.
|
||||
checkRegionsToMerge(regionsToMerge, forcible);
|
||||
// before doing the real work. This check calls the super method #checkOnline also.
|
||||
checkRegionsToMerge(env, regionsToMerge, forcible);
|
||||
|
||||
// WARN: make sure there is no parent region of the two merging regions in
|
||||
// hbase:meta If exists, fixing up daughters would cause daughter regions(we
|
||||
|
@ -122,7 +123,7 @@ public class MergeTableRegionsProcedure
|
|||
this.forcible = forcible;
|
||||
}
|
||||
|
||||
private static void checkRegionsToMerge(final RegionInfo[] regionsToMerge,
|
||||
private static void checkRegionsToMerge(MasterProcedureEnv env, final RegionInfo[] regionsToMerge,
|
||||
final boolean forcible) throws MergeRegionException {
|
||||
// For now, we only merge 2 regions.
|
||||
// It could be extended to more than 2 regions in the future.
|
||||
|
@ -131,10 +132,13 @@ public class MergeTableRegionsProcedure
|
|||
Arrays.toString(regionsToMerge));
|
||||
}
|
||||
|
||||
checkRegionsToMerge(regionsToMerge[0], regionsToMerge[1], forcible);
|
||||
checkRegionsToMerge(env, regionsToMerge[0], regionsToMerge[1], forcible);
|
||||
}
|
||||
|
||||
private static void checkRegionsToMerge(final RegionInfo regionToMergeA,
|
||||
/**
|
||||
* One time checks.
|
||||
*/
|
||||
private static void checkRegionsToMerge(MasterProcedureEnv env, final RegionInfo regionToMergeA,
|
||||
final RegionInfo regionToMergeB, final boolean forcible) throws MergeRegionException {
|
||||
if (!regionToMergeA.getTable().equals(regionToMergeB.getTable())) {
|
||||
throw new MergeRegionException("Can't merge regions from two different tables: " +
|
||||
|
@ -146,6 +150,13 @@ public class MergeTableRegionsProcedure
|
|||
throw new MergeRegionException("Can't merge non-default replicas");
|
||||
}
|
||||
|
||||
try {
|
||||
checkOnline(env, regionToMergeA);
|
||||
checkOnline(env, regionToMergeB);
|
||||
} catch (DoNotRetryRegionException dnrre) {
|
||||
throw new MergeRegionException(dnrre);
|
||||
}
|
||||
|
||||
if (!RegionInfo.areAdjacent(regionToMergeA, regionToMergeB)) {
|
||||
String msg = "Unable to merge non-adjacent regions " + regionToMergeA.getShortNameToLog() +
|
||||
", " + regionToMergeB.getShortNameToLog() + " where forcible = " + forcible;
|
||||
|
@ -156,6 +167,7 @@ public class MergeTableRegionsProcedure
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
private static RegionInfo createMergedRegionInfo(final RegionInfo[] regionsToMerge) {
|
||||
return createMergedRegionInfo(regionsToMerge[0], regionsToMerge[1]);
|
||||
}
|
||||
|
@ -457,7 +469,6 @@ public class MergeTableRegionsProcedure
|
|||
private boolean prepareMergeRegion(final MasterProcedureEnv env) throws IOException {
|
||||
// Note: the following logic assumes that we only have 2 regions to merge. In the future,
|
||||
// if we want to extend to more than 2 regions, the code needs to be modified a little bit.
|
||||
//
|
||||
CatalogJanitor catalogJanitor = env.getMasterServices().getCatalogJanitor();
|
||||
boolean regionAHasMergeQualifier = !catalogJanitor.cleanMergeQualifier(regionsToMerge[0]);
|
||||
if (regionAHasMergeQualifier
|
||||
|
@ -492,7 +503,6 @@ public class MergeTableRegionsProcedure
|
|||
return false;
|
||||
}
|
||||
|
||||
|
||||
// Ask the remote regionserver if regions are mergeable. If we get an IOE, report it
|
||||
// along with the failure, so we can see why regions are not mergeable at this time.
|
||||
IOException mergeableCheckIOE = null;
|
||||
|
|
|
@ -64,6 +64,7 @@ public class MoveRegionProcedure extends AbstractStateMachineRegionProcedure<Mov
|
|||
super(env, plan.getRegionInfo());
|
||||
this.plan = plan;
|
||||
preflightChecks(env, true);
|
||||
checkOnline(env, plan.getRegionInfo());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -73,6 +74,16 @@ public class MoveRegionProcedure extends AbstractStateMachineRegionProcedure<Mov
|
|||
LOG.trace(this + " execute state=" + state);
|
||||
}
|
||||
switch (state) {
|
||||
case MOVE_REGION_PREPARE:
|
||||
// Check context again and that region is online; do it here after we have lock on region.
|
||||
try {
|
||||
preflightChecks(env, true);
|
||||
checkOnline(env, this.plan.getRegionInfo());
|
||||
} catch (HBaseIOException e) {
|
||||
LOG.warn(this.toString() + " FAILED because " + e.toString());
|
||||
return Flow.NO_MORE_STATE;
|
||||
}
|
||||
break;
|
||||
case MOVE_REGION_UNASSIGN:
|
||||
addChildProcedure(new UnassignProcedure(plan.getRegionInfo(), plan.getSource(),
|
||||
plan.getDestination(), true));
|
||||
|
|
|
@ -107,6 +107,9 @@ public class SplitTableRegionProcedure
|
|||
final RegionInfo regionToSplit, final byte[] splitRow) throws IOException {
|
||||
super(env, regionToSplit);
|
||||
preflightChecks(env, true);
|
||||
// When procedure goes to run in its prepare step, it also does these checkOnline checks. Here
|
||||
// we fail-fast on construction. There it skips the split with just a warning.
|
||||
checkOnline(env, regionToSplit);
|
||||
this.bestSplitRow = splitRow;
|
||||
checkSplittable(env, regionToSplit, bestSplitRow);
|
||||
final TableName table = regionToSplit.getTable();
|
||||
|
|
|
@ -26,11 +26,16 @@ import org.apache.hadoop.hbase.TableName;
|
|||
import org.apache.hadoop.hbase.TableNotDisabledException;
|
||||
import org.apache.hadoop.hbase.TableNotEnabledException;
|
||||
import org.apache.hadoop.hbase.TableNotFoundException;
|
||||
import org.apache.hadoop.hbase.UnknownRegionException;
|
||||
import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.client.RegionOfflineException;
|
||||
import org.apache.hadoop.hbase.client.TableState;
|
||||
import org.apache.hadoop.hbase.master.MasterFileSystem;
|
||||
import org.apache.hadoop.hbase.master.MasterServices;
|
||||
import org.apache.hadoop.hbase.master.RegionState;
|
||||
import org.apache.hadoop.hbase.master.TableStateManager;
|
||||
import org.apache.hadoop.hbase.master.assignment.RegionStates;
|
||||
import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
|
||||
import org.apache.hadoop.hbase.security.User;
|
||||
import org.apache.hadoop.hbase.util.FSUtils;
|
||||
|
@ -173,4 +178,30 @@ public abstract class AbstractStateMachineTableProcedure<TState>
|
|||
throw new HBaseIOException(ioe);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check region is online.
|
||||
*/
|
||||
protected static void checkOnline(MasterProcedureEnv env, final RegionInfo ri)
|
||||
throws DoNotRetryRegionException {
|
||||
RegionStates regionStates = env.getAssignmentManager().getRegionStates();
|
||||
RegionState rs = regionStates.getRegionState(ri);
|
||||
if (rs == null) {
|
||||
throw new UnknownRegionException("No RegionState found for " + ri.getEncodedName());
|
||||
}
|
||||
if (!rs.isOpened()) {
|
||||
throw new DoNotRetryRegionException(ri.getEncodedName() + " is not OPEN");
|
||||
}
|
||||
if (ri.isSplitParent()) {
|
||||
throw new DoNotRetryRegionException(ri.getEncodedName() +
|
||||
" is not online (splitParent=true)");
|
||||
}
|
||||
if (ri.isSplit()) {
|
||||
throw new DoNotRetryRegionException(ri.getEncodedName() + " has split=true");
|
||||
}
|
||||
if (ri.isOffline()) {
|
||||
// RegionOfflineException is not instance of DNRIOE so wrap it.
|
||||
throw new DoNotRetryRegionException(new RegionOfflineException(ri.getEncodedName()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -51,6 +51,7 @@ import org.apache.hadoop.hbase.Waiter;
|
|||
import org.apache.hadoop.hbase.client.Admin;
|
||||
import org.apache.hadoop.hbase.client.Connection;
|
||||
import org.apache.hadoop.hbase.client.ConnectionFactory;
|
||||
import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.client.RegionLocator;
|
||||
import org.apache.hadoop.hbase.client.Table;
|
||||
|
@ -388,7 +389,12 @@ public class TestNamespaceAuditor {
|
|||
Collections.sort(hris);
|
||||
// verify that we cannot split
|
||||
HRegionInfo hriToSplit2 = hris.get(1);
|
||||
try {
|
||||
ADMIN.split(tableTwo, Bytes.toBytes("6"));
|
||||
fail();
|
||||
} catch (DoNotRetryRegionException e) {
|
||||
// Expected
|
||||
}
|
||||
Thread.sleep(2000);
|
||||
assertEquals(initialRegions, ADMIN.getTableRegions(tableTwo).size());
|
||||
}
|
||||
|
|
|
@ -26,7 +26,6 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import org.apache.commons.lang3.RandomUtils;
|
||||
|
@ -43,6 +42,7 @@ import org.apache.hadoop.hbase.TableName;
|
|||
import org.apache.hadoop.hbase.UnknownRegionException;
|
||||
import org.apache.hadoop.hbase.client.Admin;
|
||||
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
|
||||
import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
|
||||
|
@ -326,11 +326,9 @@ public class TestRegionMergeTransactionOnCluster {
|
|||
admin.mergeRegionsAsync(a.getEncodedNameAsBytes(), b.getEncodedNameAsBytes(), false)
|
||||
.get(syncWaitTimeout, TimeUnit.MILLISECONDS);
|
||||
fail("Offline regions should not be able to merge");
|
||||
} catch (ExecutionException ie) {
|
||||
} catch (DoNotRetryRegionException ie) {
|
||||
System.out.println(ie);
|
||||
assertTrue("Exception should mention regions not online",
|
||||
StringUtils.stringifyException(ie.getCause()).contains("regions that are not online")
|
||||
&& ie.getCause() instanceof MergeRegionException);
|
||||
assertTrue(ie instanceof MergeRegionException);
|
||||
}
|
||||
|
||||
try {
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase.regionserver;
|
||||
|
||||
import static junit.framework.TestCase.fail;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -29,6 +30,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
|
|||
import org.apache.hadoop.hbase.TableName;
|
||||
import org.apache.hadoop.hbase.TableNotEnabledException;
|
||||
import org.apache.hadoop.hbase.client.Admin;
|
||||
import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.client.Table;
|
||||
|
@ -115,6 +117,18 @@ public class TestRegionMove {
|
|||
+ ", but found none", !regionsOnRS1ForTable.isEmpty());
|
||||
final RegionInfo regionToMove = regionsOnRS1ForTable.get(0);
|
||||
|
||||
// Offline the region and then try to move it. Should fail.
|
||||
admin.unassign(regionToMove.getRegionName(), true);
|
||||
try {
|
||||
admin.move(regionToMove.getEncodedNameAsBytes(),
|
||||
Bytes.toBytes(rs2.getServerName().toString()));
|
||||
fail();
|
||||
} catch (DoNotRetryRegionException e) {
|
||||
// We got expected exception
|
||||
}
|
||||
// Reassign for next stage of test.
|
||||
admin.assign(regionToMove.getRegionName());
|
||||
|
||||
// Disable the table
|
||||
admin.disableTable(tableName);
|
||||
|
||||
|
|
|
@ -52,6 +52,7 @@ import org.apache.hadoop.hbase.ZooKeeperConnectionException;
|
|||
import org.apache.hadoop.hbase.client.Admin;
|
||||
import org.apache.hadoop.hbase.client.Consistency;
|
||||
import org.apache.hadoop.hbase.client.Delete;
|
||||
import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
|
||||
import org.apache.hadoop.hbase.client.Get;
|
||||
import org.apache.hadoop.hbase.client.Mutation;
|
||||
import org.apache.hadoop.hbase.client.Put;
|
||||
|
@ -320,9 +321,14 @@ public class TestSplitTransactionOnCluster {
|
|||
|
||||
// Now try splitting.... should fail. And each should successfully
|
||||
// rollback.
|
||||
// We don't roll back here anymore. Instead we fail-fast on construction of the
|
||||
// split transaction. Catch the exception instead.
|
||||
try {
|
||||
this.admin.splitRegion(hri.getRegionName());
|
||||
this.admin.splitRegion(hri.getRegionName());
|
||||
this.admin.splitRegion(hri.getRegionName());
|
||||
fail();
|
||||
} catch (DoNotRetryRegionException e) {
|
||||
// Expected
|
||||
}
|
||||
// Wait around a while and assert count of regions remains constant.
|
||||
for (int i = 0; i < 10; i++) {
|
||||
Thread.sleep(100);
|
||||
|
|
Loading…
Reference in New Issue