diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index a018a13dcba..76b6302d52e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -1288,6 +1288,7 @@ public class HBaseFsck { * the offline ipc call exposed on the master (<0.90.5, <0.92.0) a master * restart or failover may be required. */ + @SuppressWarnings("deprecation") private void closeRegion(HbckInfo hi) throws IOException, InterruptedException { if (hi.metaEntry == null && hi.hdfsEntry == null) { undeployRegions(hi); @@ -1901,22 +1902,22 @@ public class HBaseFsck { LOG.debug("Contained region dir before close"); debugLsr(hi.getHdfsRegionDir()); try { + LOG.info("Closing region: " + hi); closeRegion(hi); } catch (IOException ioe) { - // TODO exercise this - LOG.warn("Was unable to close region " + hi.getRegionNameAsString() - + ". Just continuing... "); + LOG.warn("Was unable to close region " + hi + + ". Just continuing... ", ioe); } catch (InterruptedException e) { - // TODO exercise this - LOG.warn("Was unable to close region " + hi.getRegionNameAsString() - + ". Just continuing... "); + LOG.warn("Was unable to close region " + hi + + ". Just continuing... ", e); } try { LOG.info("Offlining region: " + hi); offline(hi.getRegionName()); } catch (IOException ioe) { - LOG.warn("Unable to offline region from master: " + hi, ioe); + LOG.warn("Unable to offline region from master: " + hi + + ". Just continuing... ", ioe); } } @@ -1965,14 +1966,21 @@ public class HBaseFsck { try { LOG.info("Closing region: " + regionToSideline); closeRegion(regionToSideline); - } catch (InterruptedException ie) { - LOG.warn("Was unable to close region " + regionToSideline.getRegionNameAsString() - + ". Interrupted."); - throw new IOException(ie); + } catch (IOException ioe) { + LOG.warn("Was unable to close region " + regionToSideline + + ". Just continuing... ", ioe); + } catch (InterruptedException e) { + LOG.warn("Was unable to close region " + regionToSideline + + ". Just continuing... ", e); } - LOG.info("Offlining region: " + regionToSideline); - offline(regionToSideline.getRegionName()); + try { + LOG.info("Offlining region: " + regionToSideline); + offline(regionToSideline.getRegionName()); + } catch (IOException ioe) { + LOG.warn("Unable to offline region from master: " + regionToSideline + + ". Just continuing... ", ioe); + } LOG.info("Before sideline big overlapped region: " + regionToSideline.toString()); Path sidelineRegionDir = sidelineRegionDir(fs, TO_BE_LOADED, regionToSideline); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java index 6c1f37ae686..fdcceded8ae 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java @@ -24,6 +24,7 @@ import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertNoErrors; import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; @@ -62,9 +63,11 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.executor.EventHandler.EventType; +import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.util.HBaseFsck.HbckInfo; import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE; import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; @@ -74,6 +77,8 @@ import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; +import com.google.common.collect.Multimap; + /** * This tests HBaseFsck's ability to detect reasons for inconsistent tables. */ @@ -483,8 +488,7 @@ public class TestHBaseFsck { // differentiate on ts/regionId! We actually need to recheck // deployments! HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); - ServerName hsi; - while ( (hsi = findDeployedHSI(getDeployedHRIs(admin), hriDupe)) == null) { + while (findDeployedHSI(getDeployedHRIs(admin), hriDupe) == null) { Thread.sleep(250); } @@ -547,7 +551,6 @@ public class TestHBaseFsck { } } - /** * This creates and fixes a bad table where a region is completely contained * by another region. @@ -585,6 +588,98 @@ public class TestHBaseFsck { } } + /** + * This creates and fixes a bad table where an overlap group of + * 3 regions. Set HBaseFsck.maxMerge to 2 to trigger sideline overlapped + * region. Mess around the meta data so that closeRegion/offlineRegion + * throws exceptions. + */ + @Test + public void testSidelineOverlapRegion() throws Exception { + String table = "testSidelineOverlapRegion"; + try { + setupTable(table); + assertEquals(ROWKEYS.length, countRows()); + + // Mess it up by creating an overlap + MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + HMaster master = cluster.getMaster(); + HRegionInfo hriOverlap1 = createRegion(conf, tbl.getTableDescriptor(), + Bytes.toBytes("A"), Bytes.toBytes("AB")); + master.assignRegion(hriOverlap1); + master.getAssignmentManager().waitForAssignment(hriOverlap1); + HRegionInfo hriOverlap2 = createRegion(conf, tbl.getTableDescriptor(), + Bytes.toBytes("AB"), Bytes.toBytes("B")); + master.assignRegion(hriOverlap2); + master.getAssignmentManager().waitForAssignment(hriOverlap2); + + HBaseFsck hbck = doFsck(conf, false); + assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.DUPE_STARTKEYS, + ERROR_CODE.DUPE_STARTKEYS, ERROR_CODE.OVERLAP_IN_REGION_CHAIN}); + assertEquals(3, hbck.getOverlapGroups(table).size()); + assertEquals(ROWKEYS.length, countRows()); + + // mess around the overlapped regions, to trigger NotServingRegionException + Multimap overlapGroups = hbck.getOverlapGroups(table); + ServerName serverName = null; + byte[] regionName = null; + for (HbckInfo hbi: overlapGroups.values()) { + if ("A".equals(Bytes.toString(hbi.getStartKey())) + && "B".equals(Bytes.toString(hbi.getEndKey()))) { + regionName = hbi.getRegionName(); + + // get an RS not serving the region to force bad assignment info in to META. + int k = cluster.getServerWith(regionName); + for (int i = 0; i < 3; i++) { + if (i != k) { + HRegionServer rs = cluster.getRegionServer(i); + serverName = rs.getServerName(); + break; + } + } + + HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); + HBaseFsckRepair.closeRegionSilentlyAndWait(admin, + cluster.getRegionServer(k).getServerName(), hbi.getHdfsHRI()); + admin.offline(regionName); + break; + } + } + + assertNotNull(regionName); + assertNotNull(serverName); + HTable meta = new HTable(conf, HConstants.META_TABLE_NAME); + Put put = new Put(regionName); + put.add(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER, + Bytes.toBytes(serverName.getHostAndPort())); + meta.put(put); + + // fix the problem. + HBaseFsck fsck = new HBaseFsck(conf); + fsck.connect(); + fsck.setDisplayFullReport(); // i.e. -details + fsck.setTimeLag(0); + fsck.setFixAssignments(true); + fsck.setFixMeta(true); + fsck.setFixHdfsHoles(true); + fsck.setFixHdfsOverlaps(true); + fsck.setFixHdfsOrphans(true); + fsck.setFixVersionFile(true); + fsck.setSidelineBigOverlaps(true); + fsck.setMaxMerge(2); + fsck.onlineHbck(); + + // verify that overlaps are fixed, and there are less rows + // since one region is sidelined. + HBaseFsck hbck2 = doFsck(conf,false); + assertNoErrors(hbck2); + assertEquals(0, hbck2.getOverlapGroups(table).size()); + assertTrue(ROWKEYS.length > countRows()); + } finally { + deleteTable(table); + } + } + /** * This creates and fixes a bad table where a region is completely contained * by another region, and there is a hole (sort of like a bad split)