From 43bfefc3706f05821d719a9e81beba7c8dde51bb Mon Sep 17 00:00:00 2001 From: Jonathan Hsieh Date: Sun, 9 Sep 2012 16:21:15 +0000 Subject: [PATCH] HBASE-5631 hbck should handle case where .tableinfo file is missing (Jie Huang) git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1382529 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/hadoop/hbase/util/HBaseFsck.java | 137 +++++++++++++++++- .../hadoop/hbase/util/TestHBaseFsck.java | 33 ++++- .../hbase/util/hbck/HbckTestingUtil.java | 5 +- 3 files changed, 163 insertions(+), 12 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index e3c51465342..727f64cbd87 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -26,6 +26,7 @@ import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -54,6 +55,7 @@ import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionLocation; @@ -182,6 +184,7 @@ public class HBaseFsck { private boolean fixHdfsHoles = false; // fix fs holes? private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky) private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo) + private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo) private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs private boolean fixSplitParents = false; // fix lingering split parents @@ -231,6 +234,8 @@ public class HBaseFsck { * When initially looking at HDFS, we attempt to find any orphaned data. */ private List orphanHdfsDirs = Collections.synchronizedList(new ArrayList()); + + private Map> orphanTableDirs = new HashMap>(); /** * Constructor @@ -333,7 +338,8 @@ public class HBaseFsck { */ public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException { // Initial pass to fix orphans. - if (shouldFixHdfsOrphans() || shouldFixHdfsHoles() || shouldFixHdfsOverlaps()) { + if (shouldFixHdfsOrphans() || shouldFixHdfsHoles() + || shouldFixHdfsOverlaps() || shouldFixTableOrphans()) { LOG.info("Loading regioninfos HDFS"); // if nothing is happening this should always complete in two iterations. int maxIterations = conf.getInt("hbase.hbck.integrityrepair.iterations.max", 3); @@ -385,7 +391,7 @@ public class HBaseFsck { if (!checkMetaOnly) { reportTablesInFlux(); } - + // get regions according to what is online on each RegionServer loadDeployedRegions(); @@ -399,6 +405,9 @@ public class HBaseFsck { // Get disabled tables from ZooKeeper loadDisabledTables(); + // fix the orphan tables + fixOrphanTables(); + // Check and fix consistency checkAndFixConsistency(); @@ -704,7 +713,7 @@ public class HBaseFsck { if (modTInfo == null) { // only executed once per table. modTInfo = new TableInfo(tableName); - Path hbaseRoot = new Path(conf.get(HConstants.HBASE_DIR)); + Path hbaseRoot = FSUtils.getRootDir(conf); tablesInfo.put(tableName, modTInfo); try { HTableDescriptor htd = @@ -712,9 +721,14 @@ public class HBaseFsck { hbaseRoot, tableName); modTInfo.htds.add(htd); } catch (IOException ioe) { - LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe); - errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE, - "Unable to read .tableinfo from " + hbaseRoot); + if (!orphanTableDirs.containsKey(tableName)) { + LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe); + //should only report once for each table + errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE, + "Unable to read .tableinfo from " + hbaseRoot + "/" + tableName); + Set columns = new HashSet(); + orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi)); + } } } modTInfo.addRegionInfo(hbi); @@ -722,6 +736,103 @@ public class HBaseFsck { return tablesInfo; } + + /** + * To get the column family list according to the column family dirs + * @param columns + * @param hbi + * @return + * @throws IOException + */ + private Set getColumnFamilyList(Set columns, HbckInfo hbi) throws IOException { + Path regionDir = hbi.getHdfsRegionDir(); + FileSystem fs = regionDir.getFileSystem(conf); + FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs)); + for (FileStatus subdir : subDirs) { + String columnfamily = subdir.getPath().getName(); + columns.add(columnfamily); + } + return columns; + } + + /** + * To fabricate a .tableinfo file with following contents
+ * 1. the correct tablename
+ * 2. the correct colfamily list
+ * 3. the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}
+ * @param tableName + * @throws IOException + */ + private boolean fabricateTableInfo(String tableName, Set columns) throws IOException { + if (columns ==null || columns.isEmpty()) return false; + HTableDescriptor htd = new HTableDescriptor(tableName); + for (String columnfamimly : columns) { + htd.addFamily(new HColumnDescriptor(columnfamimly)); + } + FSTableDescriptors.createTableDescriptor(htd, conf, true); + return true; + } + + /** + * To fix orphan table by creating a .tableinfo file under tableDir
+ * 1. if TableInfo is cached, to recover the .tableinfo accordingly
+ * 2. else create a default .tableinfo file with following items
+ *  2.1 the correct tablename
+ *  2.2 the correct colfamily list
+ *  2.3 the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}
+ * @throws IOException + */ + public void fixOrphanTables() throws IOException { + if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) { + + Path hbaseRoot = FSUtils.getRootDir(conf); + List tmpList = new ArrayList(); + tmpList.addAll(orphanTableDirs.keySet()); + HTableDescriptor[] htds = getHTableDescriptors(tmpList); + Iterator iter = orphanTableDirs.entrySet().iterator(); + int j = 0; + int numFailedCase = 0; + while (iter.hasNext()) { + Entry> entry = (Entry>) iter.next(); + String tableName = entry.getKey(); + LOG.info("Trying to fix orphan table error: " + tableName); + if (j < htds.length) { + if (tableName.equals(Bytes.toString(htds[j].getName()))) { + HTableDescriptor htd = htds[j]; + LOG.info("fixing orphan table: " + tableName + " from cache"); + FSTableDescriptors.createTableDescriptor( + hbaseRoot.getFileSystem(conf), hbaseRoot, htd, true); + j++; + iter.remove(); + } + } else { + if (fabricateTableInfo(tableName, entry.getValue())) { + LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file"); + LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName); + iter.remove(); + } else { + LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information"); + numFailedCase++; + } + } + fixes++; + } + + if (orphanTableDirs.isEmpty()) { + // all orphanTableDirs are luckily recovered + // re-run doFsck after recovering the .tableinfo file + setShouldRerun(); + LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed"); + } else if (numFailedCase > 0) { + LOG.error("Failed to fix " + numFailedCase + + " OrphanTables with default .tableinfo files"); + } + + } + //cleanup the list + orphanTableDirs.clear(); + + } /** * This borrows code from MasterFileSystem.bootstrap() @@ -3017,7 +3128,15 @@ public class HBaseFsck { boolean shouldFixHdfsHoles() { return fixHdfsHoles; } - + + public void setFixTableOrphans(boolean shouldFix) { + fixTableOrphans = shouldFix; + } + + boolean shouldFixTableOrphans() { + return fixTableOrphans; + } + public void setFixHdfsOverlaps(boolean shouldFix) { fixHdfsOverlaps = shouldFix; } @@ -3159,6 +3278,7 @@ public class HBaseFsck { System.err.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good."); System.err.println(" -fixHdfsHoles Try to fix region holes in hdfs."); System.err.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs"); + System.err.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)"); System.err.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs."); System.err.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs."); System.err.println(" -maxMerge When fixing region overlaps, allow at most regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)"); @@ -3263,6 +3383,8 @@ public class HBaseFsck { setFixHdfsHoles(true); } else if (cmd.equals("-fixHdfsOrphans")) { setFixHdfsOrphans(true); + } else if (cmd.equals("-fixTableOrphans")) { + setFixTableOrphans(true); } else if (cmd.equals("-fixHdfsOverlaps")) { setFixHdfsOverlaps(true); } else if (cmd.equals("-fixVersionFile")) { @@ -3389,6 +3511,7 @@ public class HBaseFsck { setFixHdfsHoles(false); setFixHdfsOverlaps(false); setFixVersionFile(false); + setFixTableOrphans(false); errors.resetErrors(); code = onlineHbck(); setRetCode(code); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java index b61f0828c6c..db797f09ad2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java @@ -412,28 +412,55 @@ public class TestHBaseFsck { deleteTable(table); } } - + @Test - public void testHbckMissingTableinfo() throws Exception { + public void testHbckFixOrphanTable() throws Exception { String table = "tableInfo"; FileSystem fs = null; Path tableinfo = null; try { setupTable(table); + HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); + Path hbaseTableDir = new Path(conf.get(HConstants.HBASE_DIR) + "/" + table ); fs = hbaseTableDir.getFileSystem(conf); FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir); tableinfo = status.getPath(); fs.rename(tableinfo, new Path("/.tableinfo")); + //to report error if .tableinfo is missing. HBaseFsck hbck = doFsck(conf, false); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE }); + + // fix OrphanTable with default .tableinfo + hbck = doFsck(conf, true); + assertNoErrors(hbck); + status = null; + status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir); + assertNotNull(status); + + HTableDescriptor htd = admin.getTableDescriptor(table.getBytes()); + htd.setValue("NOT_DEFAULT", "true"); + admin.disableTable(table); + admin.modifyTable(table.getBytes(), htd); + admin.enableTable(table); + fs.delete(status.getPath(), true); + + // fix OrphanTable with cache + htd = admin.getTableDescriptor(table.getBytes()); + hbck = doFsck(conf, true); + assertNoErrors(hbck); + status = null; + status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir); + assertNotNull(status); + htd = admin.getTableDescriptor(table.getBytes()); + assertEquals(htd.getValue("NOT_DEFAULT"), "true"); } finally { fs.rename(new Path("/.tableinfo"), tableinfo); deleteTable(table); } } - + /** * This create and fixes a bad table with regions that have a duplicate * start key diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java index b25c4cb6034..299ae6e64d6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java @@ -37,12 +37,12 @@ public class HbckTestingUtil { public static HBaseFsck doFsck( Configuration conf, boolean fix, String table) throws Exception { - return doFsck(conf, fix, fix, fix, fix,fix, fix, table); + return doFsck(conf, fix, fix, fix, fix,fix, fix, fix, table); } public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments, boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps, - boolean fixHdfsOrphans, boolean fixVersionFile, + boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile, String table) throws Exception { HBaseFsck fsck = new HBaseFsck(conf); fsck.connect(); @@ -53,6 +53,7 @@ public class HbckTestingUtil { fsck.setFixHdfsHoles(fixHdfsHoles); fsck.setFixHdfsOverlaps(fixHdfsOverlaps); fsck.setFixHdfsOrphans(fixHdfsOrphans); + fsck.setFixTableOrphans(fixTableOrphans); fsck.setFixVersionFile(fixVersionFile); if (table != null) { fsck.includeTable(table);