HBASE-5631 hbck should handle case where .tableinfo file is missing (Jie Huang)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1382529 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jonathan Hsieh 2012-09-09 16:21:15 +00:00
parent f2140d640a
commit 43bfefc370
3 changed files with 163 additions and 12 deletions

View File

@ -26,6 +26,7 @@ import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@ -54,6 +55,7 @@ import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.hbase.Abortable;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation;
@ -182,6 +184,7 @@ public class HBaseFsck {
private boolean fixHdfsHoles = false; // fix fs holes?
private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
private boolean fixSplitParents = false; // fix lingering split parents
@ -231,6 +234,8 @@ public class HBaseFsck {
* When initially looking at HDFS, we attempt to find any orphaned data.
*/
private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
private Map<String, Set<String>> orphanTableDirs = new HashMap<String, Set<String>>();
/**
* Constructor
@ -333,7 +338,8 @@ public class HBaseFsck {
*/
public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
// Initial pass to fix orphans.
if (shouldFixHdfsOrphans() || shouldFixHdfsHoles() || shouldFixHdfsOverlaps()) {
if (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
|| shouldFixHdfsOverlaps() || shouldFixTableOrphans()) {
LOG.info("Loading regioninfos HDFS");
// if nothing is happening this should always complete in two iterations.
int maxIterations = conf.getInt("hbase.hbck.integrityrepair.iterations.max", 3);
@ -385,7 +391,7 @@ public class HBaseFsck {
if (!checkMetaOnly) {
reportTablesInFlux();
}
// get regions according to what is online on each RegionServer
loadDeployedRegions();
@ -399,6 +405,9 @@ public class HBaseFsck {
// Get disabled tables from ZooKeeper
loadDisabledTables();
// fix the orphan tables
fixOrphanTables();
// Check and fix consistency
checkAndFixConsistency();
@ -704,7 +713,7 @@ public class HBaseFsck {
if (modTInfo == null) {
// only executed once per table.
modTInfo = new TableInfo(tableName);
Path hbaseRoot = new Path(conf.get(HConstants.HBASE_DIR));
Path hbaseRoot = FSUtils.getRootDir(conf);
tablesInfo.put(tableName, modTInfo);
try {
HTableDescriptor htd =
@ -712,9 +721,14 @@ public class HBaseFsck {
hbaseRoot, tableName);
modTInfo.htds.add(htd);
} catch (IOException ioe) {
LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
"Unable to read .tableinfo from " + hbaseRoot);
if (!orphanTableDirs.containsKey(tableName)) {
LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
//should only report once for each table
errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
"Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
Set<String> columns = new HashSet<String>();
orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
}
}
}
modTInfo.addRegionInfo(hbi);
@ -722,6 +736,103 @@ public class HBaseFsck {
return tablesInfo;
}
/**
* To get the column family list according to the column family dirs
* @param columns
* @param hbi
* @return
* @throws IOException
*/
private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
Path regionDir = hbi.getHdfsRegionDir();
FileSystem fs = regionDir.getFileSystem(conf);
FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
for (FileStatus subdir : subDirs) {
String columnfamily = subdir.getPath().getName();
columns.add(columnfamily);
}
return columns;
}
/**
* To fabricate a .tableinfo file with following contents<br>
* 1. the correct tablename <br>
* 2. the correct colfamily list<br>
* 3. the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
* @param tableName
* @throws IOException
*/
private boolean fabricateTableInfo(String tableName, Set<String> columns) throws IOException {
if (columns ==null || columns.isEmpty()) return false;
HTableDescriptor htd = new HTableDescriptor(tableName);
for (String columnfamimly : columns) {
htd.addFamily(new HColumnDescriptor(columnfamimly));
}
FSTableDescriptors.createTableDescriptor(htd, conf, true);
return true;
}
/**
* To fix orphan table by creating a .tableinfo file under tableDir <br>
* 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
* 2. else create a default .tableinfo file with following items<br>
* &nbsp;2.1 the correct tablename <br>
* &nbsp;2.2 the correct colfamily list<br>
* &nbsp;2.3 the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
* @throws IOException
*/
public void fixOrphanTables() throws IOException {
if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
Path hbaseRoot = FSUtils.getRootDir(conf);
List<String> tmpList = new ArrayList<String>();
tmpList.addAll(orphanTableDirs.keySet());
HTableDescriptor[] htds = getHTableDescriptors(tmpList);
Iterator iter = orphanTableDirs.entrySet().iterator();
int j = 0;
int numFailedCase = 0;
while (iter.hasNext()) {
Entry<String, Set<String>> entry = (Entry<String, Set<String>>) iter.next();
String tableName = entry.getKey();
LOG.info("Trying to fix orphan table error: " + tableName);
if (j < htds.length) {
if (tableName.equals(Bytes.toString(htds[j].getName()))) {
HTableDescriptor htd = htds[j];
LOG.info("fixing orphan table: " + tableName + " from cache");
FSTableDescriptors.createTableDescriptor(
hbaseRoot.getFileSystem(conf), hbaseRoot, htd, true);
j++;
iter.remove();
}
} else {
if (fabricateTableInfo(tableName, entry.getValue())) {
LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
iter.remove();
} else {
LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
numFailedCase++;
}
}
fixes++;
}
if (orphanTableDirs.isEmpty()) {
// all orphanTableDirs are luckily recovered
// re-run doFsck after recovering the .tableinfo file
setShouldRerun();
LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
} else if (numFailedCase > 0) {
LOG.error("Failed to fix " + numFailedCase
+ " OrphanTables with default .tableinfo files");
}
}
//cleanup the list
orphanTableDirs.clear();
}
/**
* This borrows code from MasterFileSystem.bootstrap()
@ -3017,7 +3128,15 @@ public class HBaseFsck {
boolean shouldFixHdfsHoles() {
return fixHdfsHoles;
}
public void setFixTableOrphans(boolean shouldFix) {
fixTableOrphans = shouldFix;
}
boolean shouldFixTableOrphans() {
return fixTableOrphans;
}
public void setFixHdfsOverlaps(boolean shouldFix) {
fixHdfsOverlaps = shouldFix;
}
@ -3159,6 +3278,7 @@ public class HBaseFsck {
System.err.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good.");
System.err.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
System.err.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
System.err.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
System.err.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
System.err.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
System.err.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
@ -3263,6 +3383,8 @@ public class HBaseFsck {
setFixHdfsHoles(true);
} else if (cmd.equals("-fixHdfsOrphans")) {
setFixHdfsOrphans(true);
} else if (cmd.equals("-fixTableOrphans")) {
setFixTableOrphans(true);
} else if (cmd.equals("-fixHdfsOverlaps")) {
setFixHdfsOverlaps(true);
} else if (cmd.equals("-fixVersionFile")) {
@ -3389,6 +3511,7 @@ public class HBaseFsck {
setFixHdfsHoles(false);
setFixHdfsOverlaps(false);
setFixVersionFile(false);
setFixTableOrphans(false);
errors.resetErrors();
code = onlineHbck();
setRetCode(code);

View File

@ -412,28 +412,55 @@ public class TestHBaseFsck {
deleteTable(table);
}
}
@Test
public void testHbckMissingTableinfo() throws Exception {
public void testHbckFixOrphanTable() throws Exception {
String table = "tableInfo";
FileSystem fs = null;
Path tableinfo = null;
try {
setupTable(table);
HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
Path hbaseTableDir = new Path(conf.get(HConstants.HBASE_DIR) + "/" + table );
fs = hbaseTableDir.getFileSystem(conf);
FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
tableinfo = status.getPath();
fs.rename(tableinfo, new Path("/.tableinfo"));
//to report error if .tableinfo is missing.
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
// fix OrphanTable with default .tableinfo
hbck = doFsck(conf, true);
assertNoErrors(hbck);
status = null;
status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
assertNotNull(status);
HTableDescriptor htd = admin.getTableDescriptor(table.getBytes());
htd.setValue("NOT_DEFAULT", "true");
admin.disableTable(table);
admin.modifyTable(table.getBytes(), htd);
admin.enableTable(table);
fs.delete(status.getPath(), true);
// fix OrphanTable with cache
htd = admin.getTableDescriptor(table.getBytes());
hbck = doFsck(conf, true);
assertNoErrors(hbck);
status = null;
status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
assertNotNull(status);
htd = admin.getTableDescriptor(table.getBytes());
assertEquals(htd.getValue("NOT_DEFAULT"), "true");
} finally {
fs.rename(new Path("/.tableinfo"), tableinfo);
deleteTable(table);
}
}
/**
* This create and fixes a bad table with regions that have a duplicate
* start key

View File

@ -37,12 +37,12 @@ public class HbckTestingUtil {
public static HBaseFsck doFsck(
Configuration conf, boolean fix, String table) throws Exception {
return doFsck(conf, fix, fix, fix, fix,fix, fix, table);
return doFsck(conf, fix, fix, fix, fix,fix, fix, fix, table);
}
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments,
boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps,
boolean fixHdfsOrphans, boolean fixVersionFile,
boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile,
String table) throws Exception {
HBaseFsck fsck = new HBaseFsck(conf);
fsck.connect();
@ -53,6 +53,7 @@ public class HbckTestingUtil {
fsck.setFixHdfsHoles(fixHdfsHoles);
fsck.setFixHdfsOverlaps(fixHdfsOverlaps);
fsck.setFixHdfsOrphans(fixHdfsOrphans);
fsck.setFixTableOrphans(fixTableOrphans);
fsck.setFixVersionFile(fixVersionFile);
if (table != null) {
fsck.includeTable(table);