HBASE-20202 [AMv2] Don't move region if its a split parent or offlined

M hbase-client/src/main/java/org/apache/hadoop/hbase/client/DoNotRetryRegionException.java
M hbase-client/src/main/java/org/apache/hadoop/hbase/exceptions/MergeRegionException.java
 Allow passing cause to Constructor.

M hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto
 Add prepare step to move procedure.

M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java
 Add check that regions to merge are actually online to the Constructor
so we can fail fast if they are offline

M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java
 Add prepare step. Check regions and context and skip move if not right.

M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java
 Add check parent region is online to constructor.

M hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineTableProcedure.java
 Add generic check region is online utility function for use by subclasses.

M hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMove.java
 Add test that we fail if we try to move an offlined region.
This commit is contained in:
Michael Stack 2018-03-15 13:33:27 -07:00
parent e0bdc87b27
commit 79d47dd57a
12 changed files with 104 additions and 17 deletions

View File

@ -37,4 +37,7 @@ public class DoNotRetryRegionException extends DoNotRetryIOException {
super(s); super(s);
} }
public DoNotRetryRegionException(Throwable cause) {
super(cause);
}
} }

View File

@ -42,4 +42,8 @@ public class MergeRegionException extends DoNotRetryRegionException {
public MergeRegionException(String s) { public MergeRegionException(String s) {
super(s); super(s);
} }
public MergeRegionException(Throwable cause) {
super(cause);
}
} }

View File

@ -338,6 +338,7 @@ message UnassignRegionStateData {
} }
enum MoveRegionState { enum MoveRegionState {
MOVE_REGION_PREPARE = 0;
MOVE_REGION_UNASSIGN = 1; MOVE_REGION_UNASSIGN = 1;
MOVE_REGION_ASSIGN = 2; MOVE_REGION_ASSIGN = 2;
} }

View File

@ -2399,7 +2399,7 @@ public class HMaster extends HRegionServer implements MasterServices {
checkTableExists(tableName); checkTableExists(tableName);
TableState ts = getTableStateManager().getTableState(tableName); TableState ts = getTableStateManager().getTableState(tableName);
if (!ts.isDisabled()) { if (!ts.isDisabled()) {
throw new TableNotDisabledException("Not DISABLE tableState=" + ts); throw new TableNotDisabledException("Not DISABLED; " + ts);
} }
} }

View File

@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.UnknownRegionException; import org.apache.hadoop.hbase.UnknownRegionException;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
import org.apache.hadoop.hbase.client.MasterSwitchType; import org.apache.hadoop.hbase.client.MasterSwitchType;
import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfo;
@ -108,8 +109,8 @@ public class MergeTableRegionsProcedure
super(env); super(env);
// Check daughter regions and make sure that we have valid daughter regions // Check daughter regions and make sure that we have valid daughter regions
// before doing the real work. // before doing the real work. This check calls the super method #checkOnline also.
checkRegionsToMerge(regionsToMerge, forcible); checkRegionsToMerge(env, regionsToMerge, forcible);
// WARN: make sure there is no parent region of the two merging regions in // WARN: make sure there is no parent region of the two merging regions in
// hbase:meta If exists, fixing up daughters would cause daughter regions(we // hbase:meta If exists, fixing up daughters would cause daughter regions(we
@ -122,7 +123,7 @@ public class MergeTableRegionsProcedure
this.forcible = forcible; this.forcible = forcible;
} }
private static void checkRegionsToMerge(final RegionInfo[] regionsToMerge, private static void checkRegionsToMerge(MasterProcedureEnv env, final RegionInfo[] regionsToMerge,
final boolean forcible) throws MergeRegionException { final boolean forcible) throws MergeRegionException {
// For now, we only merge 2 regions. // For now, we only merge 2 regions.
// It could be extended to more than 2 regions in the future. // It could be extended to more than 2 regions in the future.
@ -131,10 +132,13 @@ public class MergeTableRegionsProcedure
Arrays.toString(regionsToMerge)); Arrays.toString(regionsToMerge));
} }
checkRegionsToMerge(regionsToMerge[0], regionsToMerge[1], forcible); checkRegionsToMerge(env, regionsToMerge[0], regionsToMerge[1], forcible);
} }
private static void checkRegionsToMerge(final RegionInfo regionToMergeA, /**
* One time checks.
*/
private static void checkRegionsToMerge(MasterProcedureEnv env, final RegionInfo regionToMergeA,
final RegionInfo regionToMergeB, final boolean forcible) throws MergeRegionException { final RegionInfo regionToMergeB, final boolean forcible) throws MergeRegionException {
if (!regionToMergeA.getTable().equals(regionToMergeB.getTable())) { if (!regionToMergeA.getTable().equals(regionToMergeB.getTable())) {
throw new MergeRegionException("Can't merge regions from two different tables: " + throw new MergeRegionException("Can't merge regions from two different tables: " +
@ -146,6 +150,13 @@ public class MergeTableRegionsProcedure
throw new MergeRegionException("Can't merge non-default replicas"); throw new MergeRegionException("Can't merge non-default replicas");
} }
try {
checkOnline(env, regionToMergeA);
checkOnline(env, regionToMergeB);
} catch (DoNotRetryRegionException dnrre) {
throw new MergeRegionException(dnrre);
}
if (!RegionInfo.areAdjacent(regionToMergeA, regionToMergeB)) { if (!RegionInfo.areAdjacent(regionToMergeA, regionToMergeB)) {
String msg = "Unable to merge non-adjacent regions " + regionToMergeA.getShortNameToLog() + String msg = "Unable to merge non-adjacent regions " + regionToMergeA.getShortNameToLog() +
", " + regionToMergeB.getShortNameToLog() + " where forcible = " + forcible; ", " + regionToMergeB.getShortNameToLog() + " where forcible = " + forcible;
@ -156,6 +167,7 @@ public class MergeTableRegionsProcedure
} }
} }
private static RegionInfo createMergedRegionInfo(final RegionInfo[] regionsToMerge) { private static RegionInfo createMergedRegionInfo(final RegionInfo[] regionsToMerge) {
return createMergedRegionInfo(regionsToMerge[0], regionsToMerge[1]); return createMergedRegionInfo(regionsToMerge[0], regionsToMerge[1]);
} }
@ -457,7 +469,6 @@ public class MergeTableRegionsProcedure
private boolean prepareMergeRegion(final MasterProcedureEnv env) throws IOException { private boolean prepareMergeRegion(final MasterProcedureEnv env) throws IOException {
// Note: the following logic assumes that we only have 2 regions to merge. In the future, // Note: the following logic assumes that we only have 2 regions to merge. In the future,
// if we want to extend to more than 2 regions, the code needs to be modified a little bit. // if we want to extend to more than 2 regions, the code needs to be modified a little bit.
//
CatalogJanitor catalogJanitor = env.getMasterServices().getCatalogJanitor(); CatalogJanitor catalogJanitor = env.getMasterServices().getCatalogJanitor();
boolean regionAHasMergeQualifier = !catalogJanitor.cleanMergeQualifier(regionsToMerge[0]); boolean regionAHasMergeQualifier = !catalogJanitor.cleanMergeQualifier(regionsToMerge[0]);
if (regionAHasMergeQualifier if (regionAHasMergeQualifier
@ -492,7 +503,6 @@ public class MergeTableRegionsProcedure
return false; return false;
} }
// Ask the remote regionserver if regions are mergeable. If we get an IOE, report it // Ask the remote regionserver if regions are mergeable. If we get an IOE, report it
// along with the failure, so we can see why regions are not mergeable at this time. // along with the failure, so we can see why regions are not mergeable at this time.
IOException mergeableCheckIOE = null; IOException mergeableCheckIOE = null;

View File

@ -64,6 +64,7 @@ public class MoveRegionProcedure extends AbstractStateMachineRegionProcedure<Mov
super(env, plan.getRegionInfo()); super(env, plan.getRegionInfo());
this.plan = plan; this.plan = plan;
preflightChecks(env, true); preflightChecks(env, true);
checkOnline(env, plan.getRegionInfo());
} }
@Override @Override
@ -73,6 +74,16 @@ public class MoveRegionProcedure extends AbstractStateMachineRegionProcedure<Mov
LOG.trace(this + " execute state=" + state); LOG.trace(this + " execute state=" + state);
} }
switch (state) { switch (state) {
case MOVE_REGION_PREPARE:
// Check context again and that region is online; do it here after we have lock on region.
try {
preflightChecks(env, true);
checkOnline(env, this.plan.getRegionInfo());
} catch (HBaseIOException e) {
LOG.warn(this.toString() + " FAILED because " + e.toString());
return Flow.NO_MORE_STATE;
}
break;
case MOVE_REGION_UNASSIGN: case MOVE_REGION_UNASSIGN:
addChildProcedure(new UnassignProcedure(plan.getRegionInfo(), plan.getSource(), addChildProcedure(new UnassignProcedure(plan.getRegionInfo(), plan.getSource(),
plan.getDestination(), true)); plan.getDestination(), true));

View File

@ -107,6 +107,9 @@ public class SplitTableRegionProcedure
final RegionInfo regionToSplit, final byte[] splitRow) throws IOException { final RegionInfo regionToSplit, final byte[] splitRow) throws IOException {
super(env, regionToSplit); super(env, regionToSplit);
preflightChecks(env, true); preflightChecks(env, true);
// When procedure goes to run in its prepare step, it also does these checkOnline checks. Here
// we fail-fast on construction. There it skips the split with just a warning.
checkOnline(env, regionToSplit);
this.bestSplitRow = splitRow; this.bestSplitRow = splitRow;
checkSplittable(env, regionToSplit, bestSplitRow); checkSplittable(env, regionToSplit, bestSplitRow);
final TableName table = regionToSplit.getTable(); final TableName table = regionToSplit.getTable();

View File

@ -26,11 +26,16 @@ import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotDisabledException; import org.apache.hadoop.hbase.TableNotDisabledException;
import org.apache.hadoop.hbase.TableNotEnabledException; import org.apache.hadoop.hbase.TableNotEnabledException;
import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.UnknownRegionException;
import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionOfflineException;
import org.apache.hadoop.hbase.client.TableState; import org.apache.hadoop.hbase.client.TableState;
import org.apache.hadoop.hbase.master.MasterFileSystem; import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.master.TableStateManager; import org.apache.hadoop.hbase.master.TableStateManager;
import org.apache.hadoop.hbase.master.assignment.RegionStates;
import org.apache.hadoop.hbase.procedure2.StateMachineProcedure; import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
import org.apache.hadoop.hbase.security.User; import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.FSUtils;
@ -173,4 +178,30 @@ public abstract class AbstractStateMachineTableProcedure<TState>
throw new HBaseIOException(ioe); throw new HBaseIOException(ioe);
} }
} }
/**
* Check region is online.
*/
protected static void checkOnline(MasterProcedureEnv env, final RegionInfo ri)
throws DoNotRetryRegionException {
RegionStates regionStates = env.getAssignmentManager().getRegionStates();
RegionState rs = regionStates.getRegionState(ri);
if (rs == null) {
throw new UnknownRegionException("No RegionState found for " + ri.getEncodedName());
}
if (!rs.isOpened()) {
throw new DoNotRetryRegionException(ri.getEncodedName() + " is not OPEN");
}
if (ri.isSplitParent()) {
throw new DoNotRetryRegionException(ri.getEncodedName() +
" is not online (splitParent=true)");
}
if (ri.isSplit()) {
throw new DoNotRetryRegionException(ri.getEncodedName() + " has split=true");
}
if (ri.isOffline()) {
// RegionOfflineException is not instance of DNRIOE so wrap it.
throw new DoNotRetryRegionException(new RegionOfflineException(ri.getEncodedName()));
}
}
} }

View File

@ -51,6 +51,7 @@ import org.apache.hadoop.hbase.Waiter;
import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionLocator; import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.Table;
@ -388,7 +389,12 @@ public class TestNamespaceAuditor {
Collections.sort(hris); Collections.sort(hris);
// verify that we cannot split // verify that we cannot split
HRegionInfo hriToSplit2 = hris.get(1); HRegionInfo hriToSplit2 = hris.get(1);
ADMIN.split(tableTwo, Bytes.toBytes("6")); try {
ADMIN.split(tableTwo, Bytes.toBytes("6"));
fail();
} catch (DoNotRetryRegionException e) {
// Expected
}
Thread.sleep(2000); Thread.sleep(2000);
assertEquals(initialRegions, ADMIN.getTableRegions(tableTwo).size()); assertEquals(initialRegions, ADMIN.getTableRegions(tableTwo).size());
} }

View File

@ -26,7 +26,6 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.lang3.RandomUtils; import org.apache.commons.lang3.RandomUtils;
@ -43,6 +42,7 @@ import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.UnknownRegionException; import org.apache.hadoop.hbase.UnknownRegionException;
import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.RegionReplicaUtil;
@ -326,11 +326,9 @@ public class TestRegionMergeTransactionOnCluster {
admin.mergeRegionsAsync(a.getEncodedNameAsBytes(), b.getEncodedNameAsBytes(), false) admin.mergeRegionsAsync(a.getEncodedNameAsBytes(), b.getEncodedNameAsBytes(), false)
.get(syncWaitTimeout, TimeUnit.MILLISECONDS); .get(syncWaitTimeout, TimeUnit.MILLISECONDS);
fail("Offline regions should not be able to merge"); fail("Offline regions should not be able to merge");
} catch (ExecutionException ie) { } catch (DoNotRetryRegionException ie) {
System.out.println(ie); System.out.println(ie);
assertTrue("Exception should mention regions not online", assertTrue(ie instanceof MergeRegionException);
StringUtils.stringifyException(ie.getCause()).contains("regions that are not online")
&& ie.getCause() instanceof MergeRegionException);
} }
try { try {

View File

@ -17,6 +17,7 @@
*/ */
package org.apache.hadoop.hbase.regionserver; package org.apache.hadoop.hbase.regionserver;
import static junit.framework.TestCase.fail;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import java.io.IOException; import java.io.IOException;
@ -29,6 +30,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotEnabledException; import org.apache.hadoop.hbase.TableNotEnabledException;
import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.client.Table;
@ -115,6 +117,18 @@ public class TestRegionMove {
+ ", but found none", !regionsOnRS1ForTable.isEmpty()); + ", but found none", !regionsOnRS1ForTable.isEmpty());
final RegionInfo regionToMove = regionsOnRS1ForTable.get(0); final RegionInfo regionToMove = regionsOnRS1ForTable.get(0);
// Offline the region and then try to move it. Should fail.
admin.unassign(regionToMove.getRegionName(), true);
try {
admin.move(regionToMove.getEncodedNameAsBytes(),
Bytes.toBytes(rs2.getServerName().toString()));
fail();
} catch (DoNotRetryRegionException e) {
// We got expected exception
}
// Reassign for next stage of test.
admin.assign(regionToMove.getRegionName());
// Disable the table // Disable the table
admin.disableTable(tableName); admin.disableTable(tableName);

View File

@ -52,6 +52,7 @@ import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Consistency; import org.apache.hadoop.hbase.client.Consistency;
import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.DoNotRetryRegionException;
import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Put;
@ -320,9 +321,14 @@ public class TestSplitTransactionOnCluster {
// Now try splitting.... should fail. And each should successfully // Now try splitting.... should fail. And each should successfully
// rollback. // rollback.
this.admin.splitRegion(hri.getRegionName()); // We don't roll back here anymore. Instead we fail-fast on construction of the
this.admin.splitRegion(hri.getRegionName()); // split transaction. Catch the exception instead.
this.admin.splitRegion(hri.getRegionName()); try {
this.admin.splitRegion(hri.getRegionName());
fail();
} catch (DoNotRetryRegionException e) {
// Expected
}
// Wait around a while and assert count of regions remains constant. // Wait around a while and assert count of regions remains constant.
for (int i = 0; i < 10; i++) { for (int i = 0; i < 10; i++) {
Thread.sleep(100); Thread.sleep(100);