HBASE-5631 hbck should handle case where .tableinfo file is missing (Jie Huang)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1382529 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jonathan Hsieh 2012-09-09 16:21:15 +00:00
parent f2140d640a
commit 43bfefc370
3 changed files with 163 additions and 12 deletions

View File

@ -26,6 +26,7 @@ import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
@ -54,6 +55,7 @@ import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.Abortable;
import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.HRegionLocation;
@ -182,6 +184,7 @@ public class HBaseFsck {
private boolean fixHdfsHoles = false; // fix fs holes? private boolean fixHdfsHoles = false; // fix fs holes?
private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky) private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo) private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
private boolean fixSplitParents = false; // fix lingering split parents private boolean fixSplitParents = false; // fix lingering split parents
@ -232,6 +235,8 @@ public class HBaseFsck {
*/ */
private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>()); private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
private Map<String, Set<String>> orphanTableDirs = new HashMap<String, Set<String>>();
/** /**
* Constructor * Constructor
* *
@ -333,7 +338,8 @@ public class HBaseFsck {
*/ */
public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException { public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
// Initial pass to fix orphans. // Initial pass to fix orphans.
if (shouldFixHdfsOrphans() || shouldFixHdfsHoles() || shouldFixHdfsOverlaps()) { if (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
|| shouldFixHdfsOverlaps() || shouldFixTableOrphans()) {
LOG.info("Loading regioninfos HDFS"); LOG.info("Loading regioninfos HDFS");
// if nothing is happening this should always complete in two iterations. // if nothing is happening this should always complete in two iterations.
int maxIterations = conf.getInt("hbase.hbck.integrityrepair.iterations.max", 3); int maxIterations = conf.getInt("hbase.hbck.integrityrepair.iterations.max", 3);
@ -399,6 +405,9 @@ public class HBaseFsck {
// Get disabled tables from ZooKeeper // Get disabled tables from ZooKeeper
loadDisabledTables(); loadDisabledTables();
// fix the orphan tables
fixOrphanTables();
// Check and fix consistency // Check and fix consistency
checkAndFixConsistency(); checkAndFixConsistency();
@ -704,7 +713,7 @@ public class HBaseFsck {
if (modTInfo == null) { if (modTInfo == null) {
// only executed once per table. // only executed once per table.
modTInfo = new TableInfo(tableName); modTInfo = new TableInfo(tableName);
Path hbaseRoot = new Path(conf.get(HConstants.HBASE_DIR)); Path hbaseRoot = FSUtils.getRootDir(conf);
tablesInfo.put(tableName, modTInfo); tablesInfo.put(tableName, modTInfo);
try { try {
HTableDescriptor htd = HTableDescriptor htd =
@ -712,9 +721,14 @@ public class HBaseFsck {
hbaseRoot, tableName); hbaseRoot, tableName);
modTInfo.htds.add(htd); modTInfo.htds.add(htd);
} catch (IOException ioe) { } catch (IOException ioe) {
LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe); if (!orphanTableDirs.containsKey(tableName)) {
errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE, LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
"Unable to read .tableinfo from " + hbaseRoot); //should only report once for each table
errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
"Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
Set<String> columns = new HashSet<String>();
orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
}
} }
} }
modTInfo.addRegionInfo(hbi); modTInfo.addRegionInfo(hbi);
@ -723,6 +737,103 @@ public class HBaseFsck {
return tablesInfo; return tablesInfo;
} }
/**
* To get the column family list according to the column family dirs
* @param columns
* @param hbi
* @return
* @throws IOException
*/
private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
Path regionDir = hbi.getHdfsRegionDir();
FileSystem fs = regionDir.getFileSystem(conf);
FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
for (FileStatus subdir : subDirs) {
String columnfamily = subdir.getPath().getName();
columns.add(columnfamily);
}
return columns;
}
/**
* To fabricate a .tableinfo file with following contents<br>
* 1. the correct tablename <br>
* 2. the correct colfamily list<br>
* 3. the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
* @param tableName
* @throws IOException
*/
private boolean fabricateTableInfo(String tableName, Set<String> columns) throws IOException {
if (columns ==null || columns.isEmpty()) return false;
HTableDescriptor htd = new HTableDescriptor(tableName);
for (String columnfamimly : columns) {
htd.addFamily(new HColumnDescriptor(columnfamimly));
}
FSTableDescriptors.createTableDescriptor(htd, conf, true);
return true;
}
/**
* To fix orphan table by creating a .tableinfo file under tableDir <br>
* 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
* 2. else create a default .tableinfo file with following items<br>
* &nbsp;2.1 the correct tablename <br>
* &nbsp;2.2 the correct colfamily list<br>
* &nbsp;2.3 the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
* @throws IOException
*/
public void fixOrphanTables() throws IOException {
if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
Path hbaseRoot = FSUtils.getRootDir(conf);
List<String> tmpList = new ArrayList<String>();
tmpList.addAll(orphanTableDirs.keySet());
HTableDescriptor[] htds = getHTableDescriptors(tmpList);
Iterator iter = orphanTableDirs.entrySet().iterator();
int j = 0;
int numFailedCase = 0;
while (iter.hasNext()) {
Entry<String, Set<String>> entry = (Entry<String, Set<String>>) iter.next();
String tableName = entry.getKey();
LOG.info("Trying to fix orphan table error: " + tableName);
if (j < htds.length) {
if (tableName.equals(Bytes.toString(htds[j].getName()))) {
HTableDescriptor htd = htds[j];
LOG.info("fixing orphan table: " + tableName + " from cache");
FSTableDescriptors.createTableDescriptor(
hbaseRoot.getFileSystem(conf), hbaseRoot, htd, true);
j++;
iter.remove();
}
} else {
if (fabricateTableInfo(tableName, entry.getValue())) {
LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
iter.remove();
} else {
LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
numFailedCase++;
}
}
fixes++;
}
if (orphanTableDirs.isEmpty()) {
// all orphanTableDirs are luckily recovered
// re-run doFsck after recovering the .tableinfo file
setShouldRerun();
LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
} else if (numFailedCase > 0) {
LOG.error("Failed to fix " + numFailedCase
+ " OrphanTables with default .tableinfo files");
}
}
//cleanup the list
orphanTableDirs.clear();
}
/** /**
* This borrows code from MasterFileSystem.bootstrap() * This borrows code from MasterFileSystem.bootstrap()
* *
@ -3018,6 +3129,14 @@ public class HBaseFsck {
return fixHdfsHoles; return fixHdfsHoles;
} }
public void setFixTableOrphans(boolean shouldFix) {
fixTableOrphans = shouldFix;
}
boolean shouldFixTableOrphans() {
return fixTableOrphans;
}
public void setFixHdfsOverlaps(boolean shouldFix) { public void setFixHdfsOverlaps(boolean shouldFix) {
fixHdfsOverlaps = shouldFix; fixHdfsOverlaps = shouldFix;
} }
@ -3159,6 +3278,7 @@ public class HBaseFsck {
System.err.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good."); System.err.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good.");
System.err.println(" -fixHdfsHoles Try to fix region holes in hdfs."); System.err.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
System.err.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs"); System.err.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
System.err.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
System.err.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs."); System.err.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
System.err.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs."); System.err.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
System.err.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)"); System.err.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
@ -3263,6 +3383,8 @@ public class HBaseFsck {
setFixHdfsHoles(true); setFixHdfsHoles(true);
} else if (cmd.equals("-fixHdfsOrphans")) { } else if (cmd.equals("-fixHdfsOrphans")) {
setFixHdfsOrphans(true); setFixHdfsOrphans(true);
} else if (cmd.equals("-fixTableOrphans")) {
setFixTableOrphans(true);
} else if (cmd.equals("-fixHdfsOverlaps")) { } else if (cmd.equals("-fixHdfsOverlaps")) {
setFixHdfsOverlaps(true); setFixHdfsOverlaps(true);
} else if (cmd.equals("-fixVersionFile")) { } else if (cmd.equals("-fixVersionFile")) {
@ -3389,6 +3511,7 @@ public class HBaseFsck {
setFixHdfsHoles(false); setFixHdfsHoles(false);
setFixHdfsOverlaps(false); setFixHdfsOverlaps(false);
setFixVersionFile(false); setFixVersionFile(false);
setFixTableOrphans(false);
errors.resetErrors(); errors.resetErrors();
code = onlineHbck(); code = onlineHbck();
setRetCode(code); setRetCode(code);

View File

@ -414,20 +414,47 @@ public class TestHBaseFsck {
} }
@Test @Test
public void testHbckMissingTableinfo() throws Exception { public void testHbckFixOrphanTable() throws Exception {
String table = "tableInfo"; String table = "tableInfo";
FileSystem fs = null; FileSystem fs = null;
Path tableinfo = null; Path tableinfo = null;
try { try {
setupTable(table); setupTable(table);
HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
Path hbaseTableDir = new Path(conf.get(HConstants.HBASE_DIR) + "/" + table ); Path hbaseTableDir = new Path(conf.get(HConstants.HBASE_DIR) + "/" + table );
fs = hbaseTableDir.getFileSystem(conf); fs = hbaseTableDir.getFileSystem(conf);
FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir); FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
tableinfo = status.getPath(); tableinfo = status.getPath();
fs.rename(tableinfo, new Path("/.tableinfo")); fs.rename(tableinfo, new Path("/.tableinfo"));
//to report error if .tableinfo is missing.
HBaseFsck hbck = doFsck(conf, false); HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE }); assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
// fix OrphanTable with default .tableinfo
hbck = doFsck(conf, true);
assertNoErrors(hbck);
status = null;
status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
assertNotNull(status);
HTableDescriptor htd = admin.getTableDescriptor(table.getBytes());
htd.setValue("NOT_DEFAULT", "true");
admin.disableTable(table);
admin.modifyTable(table.getBytes(), htd);
admin.enableTable(table);
fs.delete(status.getPath(), true);
// fix OrphanTable with cache
htd = admin.getTableDescriptor(table.getBytes());
hbck = doFsck(conf, true);
assertNoErrors(hbck);
status = null;
status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
assertNotNull(status);
htd = admin.getTableDescriptor(table.getBytes());
assertEquals(htd.getValue("NOT_DEFAULT"), "true");
} finally { } finally {
fs.rename(new Path("/.tableinfo"), tableinfo); fs.rename(new Path("/.tableinfo"), tableinfo);
deleteTable(table); deleteTable(table);

View File

@ -37,12 +37,12 @@ public class HbckTestingUtil {
public static HBaseFsck doFsck( public static HBaseFsck doFsck(
Configuration conf, boolean fix, String table) throws Exception { Configuration conf, boolean fix, String table) throws Exception {
return doFsck(conf, fix, fix, fix, fix,fix, fix, table); return doFsck(conf, fix, fix, fix, fix,fix, fix, fix, table);
} }
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments, public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments,
boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps, boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps,
boolean fixHdfsOrphans, boolean fixVersionFile, boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile,
String table) throws Exception { String table) throws Exception {
HBaseFsck fsck = new HBaseFsck(conf); HBaseFsck fsck = new HBaseFsck(conf);
fsck.connect(); fsck.connect();
@ -53,6 +53,7 @@ public class HbckTestingUtil {
fsck.setFixHdfsHoles(fixHdfsHoles); fsck.setFixHdfsHoles(fixHdfsHoles);
fsck.setFixHdfsOverlaps(fixHdfsOverlaps); fsck.setFixHdfsOverlaps(fixHdfsOverlaps);
fsck.setFixHdfsOrphans(fixHdfsOrphans); fsck.setFixHdfsOrphans(fixHdfsOrphans);
fsck.setFixTableOrphans(fixTableOrphans);
fsck.setFixVersionFile(fixVersionFile); fsck.setFixVersionFile(fixVersionFile);
if (table != null) { if (table != null) {
fsck.includeTable(table); fsck.includeTable(table);