From a07ef888e1fed98d035f81b095582ecd28420fcd Mon Sep 17 00:00:00 2001 From: Enis Soztutar Date: Thu, 3 Oct 2013 20:11:58 +0000 Subject: [PATCH] HBASE-9698 HBCK does not handle tables with no regions left git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1528989 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/hadoop/hbase/util/HBaseFsck.java | 70 +++++++++++++---- .../hadoop/hbase/util/TestHBaseFsck.java | 77 +++++++++++++++++-- .../util/hbck/TestOfflineMetaRebuildHole.java | 4 +- .../hbck/TestOfflineMetaRebuildOverlap.java | 7 +- 4 files changed, 135 insertions(+), 23 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index 5c0de0a018e..b8873c8f976 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -807,6 +807,8 @@ public class HBaseFsck extends Configured implements Tool { } } + Path hbaseRoot = FSUtils.getRootDir(getConf()); + FileSystem fs = hbaseRoot.getFileSystem(getConf()); // serialized table info gathering. for (HbckInfo hbi: hbckInfos) { @@ -828,12 +830,10 @@ public class HBaseFsck extends Configured implements Tool { if (modTInfo == null) { // only executed once per table. modTInfo = new TableInfo(tableName); - Path hbaseRoot = FSUtils.getRootDir(getConf()); tablesInfo.put(tableName, modTInfo); try { HTableDescriptor htd = - FSTableDescriptors.getTableDescriptorFromFs(hbaseRoot.getFileSystem(getConf()), - hbaseRoot, tableName); + FSTableDescriptors.getTableDescriptorFromFs(fs, hbaseRoot, tableName); modTInfo.htds.add(htd); } catch (IOException ioe) { if (!orphanTableDirs.containsKey(tableName)) { @@ -851,6 +851,8 @@ public class HBaseFsck extends Configured implements Tool { } } + loadTableInfosForTablesWithNoRegion(); + return tablesInfo; } @@ -1849,6 +1851,8 @@ public class HBaseFsck extends Configured implements Tool { tablesInfo.put(tableName, modTInfo); } + loadTableInfosForTablesWithNoRegion(); + for (TableInfo tInfo : tablesInfo.values()) { TableIntegrityErrorHandler handler = tInfo.new IntegrityFixSuggester(tInfo, errors); if (!tInfo.checkRegionChain(handler)) { @@ -1858,6 +1862,21 @@ public class HBaseFsck extends Configured implements Tool { return tablesInfo; } + /** Loads table info's for tables that may not have been included, since there are no + * regions reported for the table, but table dir is there in hdfs + */ + private void loadTableInfosForTablesWithNoRegion() throws IOException { + Map allTables = new FSTableDescriptors(getConf()).getAll(); + for (HTableDescriptor htd : allTables.values()) { + TableName tableName = htd.getTableName(); + if (isTableIncluded(tableName) && !tablesInfo.containsKey(tableName)) { + TableInfo tableInfo = new TableInfo(tableName); + tableInfo.htds.add(htd); + tablesInfo.put(htd.getTableName(), tableInfo); + } + } + } + /** * Merge hdfs data by moving from contained HbckInfo into targetRegionDir. * @return number of file move fixes done to merge regions. @@ -2085,6 +2104,7 @@ public class HBaseFsck extends Configured implements Tool { * missing from META, HBase doesn't acknowledge the existance of the * table. */ + @Override public void handleRegionStartKeyNotEmpty(HbckInfo next) throws IOException { errors.reportError(ERROR_CODE.FIRST_REGION_STARTKEY_NOT_EMPTY, "First region should start with an empty key. Creating a new " + @@ -2102,6 +2122,7 @@ public class HBaseFsck extends Configured implements Tool { fixes++; } + @Override public void handleRegionEndKeyNotEmpty(byte[] curEndKey) throws IOException { errors.reportError(ERROR_CODE.LAST_REGION_ENDKEY_NOT_EMPTY, "Last region should end with an empty key. Creating a new " @@ -2121,6 +2142,7 @@ public class HBaseFsck extends Configured implements Tool { * There is a hole in the hdfs regions that violates the table integrity * rules. Create a new empty region that patches the hole. */ + @Override public void handleHoleInRegionChain(byte[] holeStartKey, byte[] holeStopKey) throws IOException { errors.reportError( ERROR_CODE.HOLE_IN_REGION_CHAIN, @@ -2305,6 +2327,12 @@ public class HBaseFsck extends Configured implements Tool { byte[] prevKey = null; byte[] problemKey = null; + + if (splits.size() == 0) { + // no region for this table + handler.handleHoleInRegionChain(HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW); + } + for (byte[] key : splits) { Collection ranges = regions.get(key); if (prevKey == null && !Bytes.equals(key, HConstants.EMPTY_BYTE_ARRAY)) { @@ -2462,7 +2490,7 @@ public class HBaseFsck extends Configured implements Tool { * @return tables that have not been modified recently * @throws IOException if an error is encountered */ - HTableDescriptor[] getTables(AtomicInteger numSkipped) { + HTableDescriptor[] getTables(AtomicInteger numSkipped) { List tableNames = new ArrayList(); long now = System.currentTimeMillis(); @@ -2482,18 +2510,17 @@ public class HBaseFsck extends Configured implements Tool { return getHTableDescriptors(tableNames); } - HTableDescriptor[] getHTableDescriptors(List tableNames) { + HTableDescriptor[] getHTableDescriptors(List tableNames) { HTableDescriptor[] htd = new HTableDescriptor[0]; - try { - LOG.info("getHTableDescriptors == tableNames => " + tableNames); - htd = new HBaseAdmin(getConf()).getTableDescriptorsByTableName(tableNames); - } catch (IOException e) { - LOG.debug("Exception getting table descriptors", e); - } - return htd; + try { + LOG.info("getHTableDescriptors == tableNames => " + tableNames); + htd = new HBaseAdmin(getConf()).getTableDescriptorsByTableName(tableNames); + } catch (IOException e) { + LOG.debug("Exception getting table descriptors", e); + } + return htd; } - /** * Gets the entry in regionInfo corresponding to the the given encoded * region name. If the region has not been seen yet, a new entry is added @@ -2575,11 +2602,13 @@ public class HBaseFsck extends Configured implements Tool { // comparator to sort KeyValues with latest modtime final Comparator comp = new Comparator() { + @Override public int compare(Cell k1, Cell k2) { return (int)(k1.getTimestamp() - k2.getTimestamp()); } }; + @Override public boolean processRow(Result result) throws IOException { try { @@ -2654,6 +2683,7 @@ public class HBaseFsck extends Configured implements Tool { this.splitB = splitB; } + @Override public boolean equals(Object o) { boolean superEq = super.equals(o); if (!superEq) { @@ -2701,6 +2731,7 @@ public class HBaseFsck extends Configured implements Tool { HRegionInfo hri; ServerName hsa; + @Override public String toString() { return hsa.toString() + ";" + hri.getRegionNameAsString(); } @@ -2729,6 +2760,7 @@ public class HBaseFsck extends Configured implements Tool { this.deployedOn.add(server); } + @Override public synchronized String toString() { StringBuilder sb = new StringBuilder(); sb.append("{ meta => "); @@ -2960,12 +2992,14 @@ public class HBaseFsck extends Configured implements Tool { // for use by unit tests to verify which errors were discovered private ArrayList errorList = new ArrayList(); + @Override public void clear() { errorTables.clear(); errorList.clear(); errorCount = 0; } + @Override public synchronized void reportError(ERROR_CODE errorCode, String message) { if (errorCode == ERROR_CODE.WRONG_USAGE) { System.err.println(message); @@ -2980,11 +3014,13 @@ public class HBaseFsck extends Configured implements Tool { showProgress = 0; } + @Override public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table) { errorTables.add(table); reportError(errorCode, message); } + @Override public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info) { errorTables.add(table); @@ -2992,6 +3028,7 @@ public class HBaseFsck extends Configured implements Tool { reportError(errorCode, reference + " " + message); } + @Override public synchronized void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info1, HbckInfo info2) { errorTables.add(table); @@ -3000,6 +3037,7 @@ public class HBaseFsck extends Configured implements Tool { reportError(errorCode, reference + " " + message); } + @Override public synchronized void reportError(String message) { reportError(ERROR_CODE.UNKNOWN, message); } @@ -3009,6 +3047,7 @@ public class HBaseFsck extends Configured implements Tool { * where the actual error would have been reported previously. * @param message */ + @Override public synchronized void report(String message) { if (! summary) { System.out.println("ERROR: " + message); @@ -3016,6 +3055,7 @@ public class HBaseFsck extends Configured implements Tool { showProgress = 0; } + @Override public synchronized int summarize() { System.out.println(Integer.toString(errorCount) + " inconsistencies detected."); @@ -3028,10 +3068,12 @@ public class HBaseFsck extends Configured implements Tool { } } + @Override public ArrayList getErrorList() { return errorList; } + @Override public synchronized void print(String message) { if (!summary) { System.out.println(message); @@ -3048,6 +3090,7 @@ public class HBaseFsck extends Configured implements Tool { errorCount = 0; } + @Override public synchronized void detail(String message) { if (details) { System.out.println(message); @@ -3055,6 +3098,7 @@ public class HBaseFsck extends Configured implements Tool { showProgress = 0; } + @Override public synchronized void progress() { if (showProgress++ == 10) { if (!summary) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java index 7d28712f2fe..531d68d081e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java @@ -50,7 +50,6 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.ClusterStatus; -import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; @@ -60,6 +59,7 @@ import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.LargeTests; import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Durability; @@ -1066,7 +1066,7 @@ public class TestHBaseFsck { // make sure data in regions, if in hlog only there is no data loss TEST_UTIL.getHBaseAdmin().flush(table.getName()); - // Mess it up by leaving a giant hole in meta + // Mess it up by deleting hdfs dirs deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes(""), Bytes.toBytes("A"), false, false, true); // don't rm meta deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"), @@ -1076,6 +1076,9 @@ public class TestHBaseFsck { deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("C"), Bytes.toBytes(""), false, false, true); // don't rm meta + // also remove the table directory in hdfs + deleteTableDir(table); + HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] {ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS, ERROR_CODE.NOT_IN_HDFS, @@ -1084,7 +1087,7 @@ public class TestHBaseFsck { assertEquals(0, hbck.getOverlapGroups(table).size()); // fix hole - doFsck(conf, true); // in 0.92+, meta entries auto create regiondirs + doFsck(conf, true); // detect dangling regions and remove those // check that hole fixed assertNoErrors(doFsck(conf,false)); @@ -1092,6 +1095,15 @@ public class TestHBaseFsck { TEST_UTIL.getHBaseAdmin().tableExists(table)); } + public void deleteTableDir(TableName table) throws IOException { + Path rootDir = FSUtils.getRootDir(conf); + FileSystem fs = rootDir.getFileSystem(conf); + Path p = FSUtils.getTableDir(rootDir, table); + HBaseFsck.debugLsr(conf, p); + boolean success = fs.delete(p, true); + LOG.info("Deleted " + p + " sucessfully? " + success); + } + /** * when the hbase.version file missing, It is fix the fault. */ @@ -1749,9 +1761,11 @@ public class TestHBaseFsck { // inject a fault in the hfcc created. final FileSystem fs = FileSystem.get(conf); HBaseFsck hbck = new HBaseFsck(conf, exec) { + @Override public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException { return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) { boolean attemptedFirstHFile = false; + @Override protected void checkHFile(Path p) throws IOException { if (!attemptedFirstHFile) { attemptedFirstHFile = true; @@ -1778,9 +1792,11 @@ public class TestHBaseFsck { // inject a fault in the hfcc created. final FileSystem fs = FileSystem.get(conf); HBaseFsck hbck = new HBaseFsck(conf, exec) { + @Override public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException { return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) { boolean attemptedFirstFamDir = false; + @Override protected void checkColFamDir(Path p) throws IOException { if (!attemptedFirstFamDir) { attemptedFirstFamDir = true; @@ -1805,9 +1821,11 @@ public class TestHBaseFsck { // inject a fault in the hfcc created. final FileSystem fs = FileSystem.get(conf); HBaseFsck hbck = new HBaseFsck(conf, exec) { + @Override public HFileCorruptionChecker createHFileCorruptionChecker(boolean sidelineCorruptHFiles) throws IOException { return new HFileCorruptionChecker(conf, executor, sidelineCorruptHFiles) { boolean attemptedFirstRegionDir = false; + @Override protected void checkRegionDir(Path p) throws IOException { if (!attemptedFirstRegionDir) { attemptedFirstRegionDir = true; @@ -1927,61 +1945,75 @@ public class TestHBaseFsck { static class MockErrorReporter implements ErrorReporter { static int calledCount = 0; + @Override public void clear() { calledCount++; } + @Override public void report(String message) { calledCount++; } + @Override public void reportError(String message) { calledCount++; } + @Override public void reportError(ERROR_CODE errorCode, String message) { calledCount++; } + @Override public void reportError(ERROR_CODE errorCode, String message, TableInfo table) { calledCount++; } + @Override public void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info) { calledCount++; } + @Override public void reportError(ERROR_CODE errorCode, String message, TableInfo table, HbckInfo info1, HbckInfo info2) { calledCount++; } + @Override public int summarize() { return ++calledCount; } + @Override public void detail(String details) { calledCount++; } + @Override public ArrayList getErrorList() { calledCount++; return new ArrayList(); } + @Override public void progress() { calledCount++; } + @Override public void print(String message) { calledCount++; } + @Override public void resetErrors() { calledCount++; } + @Override public boolean tableHasErrors(TableInfo table) { calledCount++; return false; @@ -2014,6 +2046,7 @@ public class TestHBaseFsck { final CountDownLatch latch = new CountDownLatch(1); new Thread() { + @Override public void run() { TableLock readLock = tableLockManager.writeLock(TableName.valueOf("foo"), "testCheckTableLocks"); @@ -2071,7 +2104,7 @@ public class TestHBaseFsck { hbck = doFsck(conf, false); assertNoErrors(hbck); } - + private void deleteMetaRegion(Configuration conf, boolean unassign, boolean hdfs, boolean regionInfoOnly) throws IOException, InterruptedException { HConnection connection = HConnectionManager.getConnection(conf); @@ -2106,7 +2139,41 @@ public class TestHBaseFsck { HBaseFsck.debugLsr(conf, p); } } - + + @Test + public void testTableWithNoRegions() throws Exception { + // We might end up with empty regions in a table + // see also testNoHdfsTable() + TableName table = + TableName.valueOf(name.getMethodName()); + try { + // create table with one region + HTableDescriptor desc = new HTableDescriptor(table); + HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toString(FAM)); + desc.addFamily(hcd); // If a table has no CF's it doesn't get checked + TEST_UTIL.getHBaseAdmin().createTable(desc); + tbl = new HTable(TEST_UTIL.getConfiguration(), table, executorService); + + // Mess it up by leaving a hole in the assignment, meta, and hdfs data + deleteRegion(conf, tbl.getTableDescriptor(), HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW, false, + false, true); + + HBaseFsck hbck = doFsck(conf, false); + assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NOT_IN_HDFS }); + + doFsck(conf, true); + + // fix hole + doFsck(conf, true); + + // check that hole fixed + assertNoErrors(doFsck(conf, false)); + } finally { + deleteTable(table); + } + + } + @org.junit.Rule public TestName name = new TestName(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildHole.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildHole.java index c2e8b739ac8..9cf8516ecce 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildHole.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildHole.java @@ -21,7 +21,6 @@ import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.assertErrors; import static org.apache.hadoop.hbase.util.hbck.HbckTestingUtil.doFsck; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; import java.util.Arrays; @@ -97,7 +96,8 @@ public class TestOfflineMetaRebuildHole extends OfflineMetaRebuildTestCore { assertErrors(doFsck(conf, false), new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, - ERROR_CODE.NOT_IN_META_OR_DEPLOYED}); + ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + ERROR_CODE.HOLE_IN_REGION_CHAIN}); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildOverlap.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildOverlap.java index 9cb30fd9ec6..34098ba1ade 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildOverlap.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/TestOfflineMetaRebuildOverlap.java @@ -79,7 +79,7 @@ public class TestOfflineMetaRebuildOverlap extends OfflineMetaRebuildTestCore { // bring up the minicluster TEST_UTIL.startMiniZKCluster(); // tables seem enabled by default TEST_UTIL.restartHBaseCluster(3); - + ZooKeeperWatcher zkw = HBaseTestingUtility.getZooKeeperWatcher(TEST_UTIL); LOG.info("Waiting for no more RIT"); @@ -93,7 +93,7 @@ public class TestOfflineMetaRebuildOverlap extends OfflineMetaRebuildTestCore { .getMaster().getAssignmentManager().getRegionStates().getRegionsInTransition()); Thread.sleep(1000); } - + // Meta still messed up. assertEquals(1, scanMeta()); HTableDescriptor[] htbls = TEST_UTIL.getHBaseAdmin().listTables(); @@ -107,7 +107,8 @@ public class TestOfflineMetaRebuildOverlap extends OfflineMetaRebuildTestCore { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED, - ERROR_CODE.NOT_IN_META_OR_DEPLOYED}); + ERROR_CODE.NOT_IN_META_OR_DEPLOYED, + ERROR_CODE.HOLE_IN_REGION_CHAIN}); } }