From 79d47dd57a9b3fbcbab203d3da797140427f45a5 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Thu, 15 Mar 2018 13:33:27 -0700 Subject: [PATCH] HBASE-20202 [AMv2] Don't move region if its a split parent or offlined M hbase-client/src/main/java/org/apache/hadoop/hbase/client/DoNotRetryRegionException.java M hbase-client/src/main/java/org/apache/hadoop/hbase/exceptions/MergeRegionException.java Allow passing cause to Constructor. M hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto Add prepare step to move procedure. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java Add check that regions to merge are actually online to the Constructor so we can fail fast if they are offline M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java Add prepare step. Check regions and context and skip move if not right. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/SplitTableRegionProcedure.java Add check parent region is online to constructor. M hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AbstractStateMachineTableProcedure.java Add generic check region is online utility function for use by subclasses. M hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMove.java Add test that we fail if we try to move an offlined region. --- .../client/DoNotRetryRegionException.java | 3 ++ .../exceptions/MergeRegionException.java | 4 +++ .../src/main/protobuf/MasterProcedure.proto | 1 + .../apache/hadoop/hbase/master/HMaster.java | 2 +- .../MergeTableRegionsProcedure.java | 24 +++++++++----- .../assignment/MoveRegionProcedure.java | 11 +++++++ .../assignment/SplitTableRegionProcedure.java | 3 ++ .../AbstractStateMachineTableProcedure.java | 31 +++++++++++++++++++ .../hbase/namespace/TestNamespaceAuditor.java | 8 ++++- .../TestRegionMergeTransactionOnCluster.java | 8 ++--- .../hbase/regionserver/TestRegionMove.java | 14 +++++++++ .../TestSplitTransactionOnCluster.java | 12 +++++-- 12 files changed, 104 insertions(+), 17 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/DoNotRetryRegionException.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/DoNotRetryRegionException.java index 06a0b3da02d..61ad5cd48a2 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/DoNotRetryRegionException.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/DoNotRetryRegionException.java @@ -37,4 +37,7 @@ public class DoNotRetryRegionException extends DoNotRetryIOException { super(s); } + public DoNotRetryRegionException(Throwable cause) { + super(cause); + } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/exceptions/MergeRegionException.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/exceptions/MergeRegionException.java index e690084499b..5399f07cb56 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/exceptions/MergeRegionException.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/exceptions/MergeRegionException.java @@ -42,4 +42,8 @@ public class MergeRegionException extends DoNotRetryRegionException { public MergeRegionException(String s) { super(s); } + + public MergeRegionException(Throwable cause) { + super(cause); + } } diff --git a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto index 1134bd639e6..9666c258466 100644 --- a/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto +++ b/hbase-protocol-shaded/src/main/protobuf/MasterProcedure.proto @@ -338,6 +338,7 @@ message UnassignRegionStateData { } enum MoveRegionState { + MOVE_REGION_PREPARE = 0; MOVE_REGION_UNASSIGN = 1; MOVE_REGION_ASSIGN = 2; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 0ce6681a4a5..671a5737125 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -2399,7 +2399,7 @@ public class HMaster extends HRegionServer implements MasterServices { checkTableExists(tableName); TableState ts = getTableStateManager().getTableState(tableName); if (!ts.isDisabled()) { - throw new TableNotDisabledException("Not DISABLE tableState=" + ts); + throw new TableNotDisabledException("Not DISABLED; " + ts); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java index b94c0d872ce..57e71f897a6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MergeTableRegionsProcedure.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.UnknownRegionException; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.DoNotRetryRegionException; import org.apache.hadoop.hbase.client.MasterSwitchType; import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.RegionInfo; @@ -108,8 +109,8 @@ public class MergeTableRegionsProcedure super(env); // Check daughter regions and make sure that we have valid daughter regions - // before doing the real work. - checkRegionsToMerge(regionsToMerge, forcible); + // before doing the real work. This check calls the super method #checkOnline also. + checkRegionsToMerge(env, regionsToMerge, forcible); // WARN: make sure there is no parent region of the two merging regions in // hbase:meta If exists, fixing up daughters would cause daughter regions(we @@ -122,7 +123,7 @@ public class MergeTableRegionsProcedure this.forcible = forcible; } - private static void checkRegionsToMerge(final RegionInfo[] regionsToMerge, + private static void checkRegionsToMerge(MasterProcedureEnv env, final RegionInfo[] regionsToMerge, final boolean forcible) throws MergeRegionException { // For now, we only merge 2 regions. // It could be extended to more than 2 regions in the future. @@ -131,10 +132,13 @@ public class MergeTableRegionsProcedure Arrays.toString(regionsToMerge)); } - checkRegionsToMerge(regionsToMerge[0], regionsToMerge[1], forcible); + checkRegionsToMerge(env, regionsToMerge[0], regionsToMerge[1], forcible); } - private static void checkRegionsToMerge(final RegionInfo regionToMergeA, + /** + * One time checks. + */ + private static void checkRegionsToMerge(MasterProcedureEnv env, final RegionInfo regionToMergeA, final RegionInfo regionToMergeB, final boolean forcible) throws MergeRegionException { if (!regionToMergeA.getTable().equals(regionToMergeB.getTable())) { throw new MergeRegionException("Can't merge regions from two different tables: " + @@ -146,6 +150,13 @@ public class MergeTableRegionsProcedure throw new MergeRegionException("Can't merge non-default replicas"); } + try { + checkOnline(env, regionToMergeA); + checkOnline(env, regionToMergeB); + } catch (DoNotRetryRegionException dnrre) { + throw new MergeRegionException(dnrre); + } + if (!RegionInfo.areAdjacent(regionToMergeA, regionToMergeB)) { String msg = "Unable to merge non-adjacent regions " + regionToMergeA.getShortNameToLog() + ", " + regionToMergeB.getShortNameToLog() + " where forcible = " + forcible; @@ -156,6 +167,7 @@ public class MergeTableRegionsProcedure } } + private static RegionInfo createMergedRegionInfo(final RegionInfo[] regionsToMerge) { return createMergedRegionInfo(regionsToMerge[0], regionsToMerge[1]); } @@ -457,7 +469,6 @@ public class MergeTableRegionsProcedure private boolean prepareMergeRegion(final MasterProcedureEnv env) throws IOException { // Note: the following logic assumes that we only have 2 regions to merge. In the future, // if we want to extend to more than 2 regions, the code needs to be modified a little bit. - // CatalogJanitor catalogJanitor = env.getMasterServices().getCatalogJanitor(); boolean regionAHasMergeQualifier = !catalogJanitor.cleanMergeQualifier(regionsToMerge[0]); if (regionAHasMergeQualifier @@ -492,7 +503,6 @@ public class MergeTableRegionsProcedure return false; } - // Ask the remote regionserver if regions are mergeable. If we get an IOE, report it // along with the failure, so we can see why regions are not mergeable at this time. IOException mergeableCheckIOE = null; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java index 065987f013d..c52af3dee89 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java @@ -64,6 +64,7 @@ public class MoveRegionProcedure extends AbstractStateMachineRegionProcedure throw new HBaseIOException(ioe); } } + + /** + * Check region is online. + */ + protected static void checkOnline(MasterProcedureEnv env, final RegionInfo ri) + throws DoNotRetryRegionException { + RegionStates regionStates = env.getAssignmentManager().getRegionStates(); + RegionState rs = regionStates.getRegionState(ri); + if (rs == null) { + throw new UnknownRegionException("No RegionState found for " + ri.getEncodedName()); + } + if (!rs.isOpened()) { + throw new DoNotRetryRegionException(ri.getEncodedName() + " is not OPEN"); + } + if (ri.isSplitParent()) { + throw new DoNotRetryRegionException(ri.getEncodedName() + + " is not online (splitParent=true)"); + } + if (ri.isSplit()) { + throw new DoNotRetryRegionException(ri.getEncodedName() + " has split=true"); + } + if (ri.isOffline()) { + // RegionOfflineException is not instance of DNRIOE so wrap it. + throw new DoNotRetryRegionException(new RegionOfflineException(ri.getEncodedName())); + } + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java index d41291e5b4a..89687eb0f2c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/namespace/TestNamespaceAuditor.java @@ -51,6 +51,7 @@ import org.apache.hadoop.hbase.Waiter; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.DoNotRetryRegionException; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionLocator; import org.apache.hadoop.hbase.client.Table; @@ -388,7 +389,12 @@ public class TestNamespaceAuditor { Collections.sort(hris); // verify that we cannot split HRegionInfo hriToSplit2 = hris.get(1); - ADMIN.split(tableTwo, Bytes.toBytes("6")); + try { + ADMIN.split(tableTwo, Bytes.toBytes("6")); + fail(); + } catch (DoNotRetryRegionException e) { + // Expected + } Thread.sleep(2000); assertEquals(initialRegions, ADMIN.getTableRegions(tableTwo).size()); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java index 078413dfe79..52ed7ba2f46 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMergeTransactionOnCluster.java @@ -26,7 +26,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Objects; -import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.lang3.RandomUtils; @@ -43,6 +42,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.UnknownRegionException; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.DoNotRetryRegionException; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionReplicaUtil; @@ -326,11 +326,9 @@ public class TestRegionMergeTransactionOnCluster { admin.mergeRegionsAsync(a.getEncodedNameAsBytes(), b.getEncodedNameAsBytes(), false) .get(syncWaitTimeout, TimeUnit.MILLISECONDS); fail("Offline regions should not be able to merge"); - } catch (ExecutionException ie) { + } catch (DoNotRetryRegionException ie) { System.out.println(ie); - assertTrue("Exception should mention regions not online", - StringUtils.stringifyException(ie.getCause()).contains("regions that are not online") - && ie.getCause() instanceof MergeRegionException); + assertTrue(ie instanceof MergeRegionException); } try { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMove.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMove.java index 8940beaac4a..86c1e617ef9 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMove.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRegionMove.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hbase.regionserver; +import static junit.framework.TestCase.fail; import static org.junit.Assert.assertTrue; import java.io.IOException; @@ -29,6 +30,7 @@ import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableNotEnabledException; import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.DoNotRetryRegionException; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.Table; @@ -115,6 +117,18 @@ public class TestRegionMove { + ", but found none", !regionsOnRS1ForTable.isEmpty()); final RegionInfo regionToMove = regionsOnRS1ForTable.get(0); + // Offline the region and then try to move it. Should fail. + admin.unassign(regionToMove.getRegionName(), true); + try { + admin.move(regionToMove.getEncodedNameAsBytes(), + Bytes.toBytes(rs2.getServerName().toString())); + fail(); + } catch (DoNotRetryRegionException e) { + // We got expected exception + } + // Reassign for next stage of test. + admin.assign(regionToMove.getRegionName()); + // Disable the table admin.disableTable(tableName); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java index b2a88b0bde7..95e011268bb 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java @@ -52,6 +52,7 @@ import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Consistency; import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.DoNotRetryRegionException; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Mutation; import org.apache.hadoop.hbase.client.Put; @@ -320,9 +321,14 @@ public class TestSplitTransactionOnCluster { // Now try splitting.... should fail. And each should successfully // rollback. - this.admin.splitRegion(hri.getRegionName()); - this.admin.splitRegion(hri.getRegionName()); - this.admin.splitRegion(hri.getRegionName()); + // We don't roll back here anymore. Instead we fail-fast on construction of the + // split transaction. Catch the exception instead. + try { + this.admin.splitRegion(hri.getRegionName()); + fail(); + } catch (DoNotRetryRegionException e) { + // Expected + } // Wait around a while and assert count of regions remains constant. for (int i = 0; i < 10; i++) { Thread.sleep(100);