HBASE-5631 hbck should handle case where .tableinfo file is missing (Jie Huang)
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1382529 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f2140d640a
commit
43bfefc370
|
@ -26,6 +26,7 @@ import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
@ -54,6 +55,7 @@ import org.apache.hadoop.fs.permission.FsAction;
|
||||||
import org.apache.hadoop.hbase.Abortable;
|
import org.apache.hadoop.hbase.Abortable;
|
||||||
import org.apache.hadoop.hbase.ClusterStatus;
|
import org.apache.hadoop.hbase.ClusterStatus;
|
||||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||||
|
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||||
import org.apache.hadoop.hbase.HConstants;
|
import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.HRegionInfo;
|
import org.apache.hadoop.hbase.HRegionInfo;
|
||||||
import org.apache.hadoop.hbase.HRegionLocation;
|
import org.apache.hadoop.hbase.HRegionLocation;
|
||||||
|
@ -182,6 +184,7 @@ public class HBaseFsck {
|
||||||
private boolean fixHdfsHoles = false; // fix fs holes?
|
private boolean fixHdfsHoles = false; // fix fs holes?
|
||||||
private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
|
private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
|
||||||
private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
|
private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
|
||||||
|
private boolean fixTableOrphans = false; // fix fs holes (missing .tableinfo)
|
||||||
private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
|
private boolean fixVersionFile = false; // fix missing hbase.version file in hdfs
|
||||||
private boolean fixSplitParents = false; // fix lingering split parents
|
private boolean fixSplitParents = false; // fix lingering split parents
|
||||||
|
|
||||||
|
@ -232,6 +235,8 @@ public class HBaseFsck {
|
||||||
*/
|
*/
|
||||||
private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
|
private List<HbckInfo> orphanHdfsDirs = Collections.synchronizedList(new ArrayList<HbckInfo>());
|
||||||
|
|
||||||
|
private Map<String, Set<String>> orphanTableDirs = new HashMap<String, Set<String>>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor
|
* Constructor
|
||||||
*
|
*
|
||||||
|
@ -333,7 +338,8 @@ public class HBaseFsck {
|
||||||
*/
|
*/
|
||||||
public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
|
public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
|
||||||
// Initial pass to fix orphans.
|
// Initial pass to fix orphans.
|
||||||
if (shouldFixHdfsOrphans() || shouldFixHdfsHoles() || shouldFixHdfsOverlaps()) {
|
if (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
|
||||||
|
|| shouldFixHdfsOverlaps() || shouldFixTableOrphans()) {
|
||||||
LOG.info("Loading regioninfos HDFS");
|
LOG.info("Loading regioninfos HDFS");
|
||||||
// if nothing is happening this should always complete in two iterations.
|
// if nothing is happening this should always complete in two iterations.
|
||||||
int maxIterations = conf.getInt("hbase.hbck.integrityrepair.iterations.max", 3);
|
int maxIterations = conf.getInt("hbase.hbck.integrityrepair.iterations.max", 3);
|
||||||
|
@ -399,6 +405,9 @@ public class HBaseFsck {
|
||||||
// Get disabled tables from ZooKeeper
|
// Get disabled tables from ZooKeeper
|
||||||
loadDisabledTables();
|
loadDisabledTables();
|
||||||
|
|
||||||
|
// fix the orphan tables
|
||||||
|
fixOrphanTables();
|
||||||
|
|
||||||
// Check and fix consistency
|
// Check and fix consistency
|
||||||
checkAndFixConsistency();
|
checkAndFixConsistency();
|
||||||
|
|
||||||
|
@ -704,7 +713,7 @@ public class HBaseFsck {
|
||||||
if (modTInfo == null) {
|
if (modTInfo == null) {
|
||||||
// only executed once per table.
|
// only executed once per table.
|
||||||
modTInfo = new TableInfo(tableName);
|
modTInfo = new TableInfo(tableName);
|
||||||
Path hbaseRoot = new Path(conf.get(HConstants.HBASE_DIR));
|
Path hbaseRoot = FSUtils.getRootDir(conf);
|
||||||
tablesInfo.put(tableName, modTInfo);
|
tablesInfo.put(tableName, modTInfo);
|
||||||
try {
|
try {
|
||||||
HTableDescriptor htd =
|
HTableDescriptor htd =
|
||||||
|
@ -712,9 +721,14 @@ public class HBaseFsck {
|
||||||
hbaseRoot, tableName);
|
hbaseRoot, tableName);
|
||||||
modTInfo.htds.add(htd);
|
modTInfo.htds.add(htd);
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
|
if (!orphanTableDirs.containsKey(tableName)) {
|
||||||
errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
|
LOG.warn("Unable to read .tableinfo from " + hbaseRoot, ioe);
|
||||||
"Unable to read .tableinfo from " + hbaseRoot);
|
//should only report once for each table
|
||||||
|
errors.reportError(ERROR_CODE.NO_TABLEINFO_FILE,
|
||||||
|
"Unable to read .tableinfo from " + hbaseRoot + "/" + tableName);
|
||||||
|
Set<String> columns = new HashSet<String>();
|
||||||
|
orphanTableDirs.put(tableName, getColumnFamilyList(columns, hbi));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
modTInfo.addRegionInfo(hbi);
|
modTInfo.addRegionInfo(hbi);
|
||||||
|
@ -723,6 +737,103 @@ public class HBaseFsck {
|
||||||
return tablesInfo;
|
return tablesInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To get the column family list according to the column family dirs
|
||||||
|
* @param columns
|
||||||
|
* @param hbi
|
||||||
|
* @return
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
private Set<String> getColumnFamilyList(Set<String> columns, HbckInfo hbi) throws IOException {
|
||||||
|
Path regionDir = hbi.getHdfsRegionDir();
|
||||||
|
FileSystem fs = regionDir.getFileSystem(conf);
|
||||||
|
FileStatus[] subDirs = fs.listStatus(regionDir, new FSUtils.FamilyDirFilter(fs));
|
||||||
|
for (FileStatus subdir : subDirs) {
|
||||||
|
String columnfamily = subdir.getPath().getName();
|
||||||
|
columns.add(columnfamily);
|
||||||
|
}
|
||||||
|
return columns;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To fabricate a .tableinfo file with following contents<br>
|
||||||
|
* 1. the correct tablename <br>
|
||||||
|
* 2. the correct colfamily list<br>
|
||||||
|
* 3. the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
|
||||||
|
* @param tableName
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
private boolean fabricateTableInfo(String tableName, Set<String> columns) throws IOException {
|
||||||
|
if (columns ==null || columns.isEmpty()) return false;
|
||||||
|
HTableDescriptor htd = new HTableDescriptor(tableName);
|
||||||
|
for (String columnfamimly : columns) {
|
||||||
|
htd.addFamily(new HColumnDescriptor(columnfamimly));
|
||||||
|
}
|
||||||
|
FSTableDescriptors.createTableDescriptor(htd, conf, true);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* To fix orphan table by creating a .tableinfo file under tableDir <br>
|
||||||
|
* 1. if TableInfo is cached, to recover the .tableinfo accordingly <br>
|
||||||
|
* 2. else create a default .tableinfo file with following items<br>
|
||||||
|
* 2.1 the correct tablename <br>
|
||||||
|
* 2.2 the correct colfamily list<br>
|
||||||
|
* 2.3 the default properties for both {@link HTableDescriptor} and {@link HColumnDescriptor}<br>
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void fixOrphanTables() throws IOException {
|
||||||
|
if (shouldFixTableOrphans() && !orphanTableDirs.isEmpty()) {
|
||||||
|
|
||||||
|
Path hbaseRoot = FSUtils.getRootDir(conf);
|
||||||
|
List<String> tmpList = new ArrayList<String>();
|
||||||
|
tmpList.addAll(orphanTableDirs.keySet());
|
||||||
|
HTableDescriptor[] htds = getHTableDescriptors(tmpList);
|
||||||
|
Iterator iter = orphanTableDirs.entrySet().iterator();
|
||||||
|
int j = 0;
|
||||||
|
int numFailedCase = 0;
|
||||||
|
while (iter.hasNext()) {
|
||||||
|
Entry<String, Set<String>> entry = (Entry<String, Set<String>>) iter.next();
|
||||||
|
String tableName = entry.getKey();
|
||||||
|
LOG.info("Trying to fix orphan table error: " + tableName);
|
||||||
|
if (j < htds.length) {
|
||||||
|
if (tableName.equals(Bytes.toString(htds[j].getName()))) {
|
||||||
|
HTableDescriptor htd = htds[j];
|
||||||
|
LOG.info("fixing orphan table: " + tableName + " from cache");
|
||||||
|
FSTableDescriptors.createTableDescriptor(
|
||||||
|
hbaseRoot.getFileSystem(conf), hbaseRoot, htd, true);
|
||||||
|
j++;
|
||||||
|
iter.remove();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (fabricateTableInfo(tableName, entry.getValue())) {
|
||||||
|
LOG.warn("fixing orphan table: " + tableName + " with a default .tableinfo file");
|
||||||
|
LOG.warn("Strongly recommend to modify the HTableDescriptor if necessary for: " + tableName);
|
||||||
|
iter.remove();
|
||||||
|
} else {
|
||||||
|
LOG.error("Unable to create default .tableinfo for " + tableName + " while missing column family information");
|
||||||
|
numFailedCase++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fixes++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (orphanTableDirs.isEmpty()) {
|
||||||
|
// all orphanTableDirs are luckily recovered
|
||||||
|
// re-run doFsck after recovering the .tableinfo file
|
||||||
|
setShouldRerun();
|
||||||
|
LOG.warn("Strongly recommend to re-run manually hfsck after all orphanTableDirs being fixed");
|
||||||
|
} else if (numFailedCase > 0) {
|
||||||
|
LOG.error("Failed to fix " + numFailedCase
|
||||||
|
+ " OrphanTables with default .tableinfo files");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
//cleanup the list
|
||||||
|
orphanTableDirs.clear();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This borrows code from MasterFileSystem.bootstrap()
|
* This borrows code from MasterFileSystem.bootstrap()
|
||||||
*
|
*
|
||||||
|
@ -3018,6 +3129,14 @@ public class HBaseFsck {
|
||||||
return fixHdfsHoles;
|
return fixHdfsHoles;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setFixTableOrphans(boolean shouldFix) {
|
||||||
|
fixTableOrphans = shouldFix;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean shouldFixTableOrphans() {
|
||||||
|
return fixTableOrphans;
|
||||||
|
}
|
||||||
|
|
||||||
public void setFixHdfsOverlaps(boolean shouldFix) {
|
public void setFixHdfsOverlaps(boolean shouldFix) {
|
||||||
fixHdfsOverlaps = shouldFix;
|
fixHdfsOverlaps = shouldFix;
|
||||||
}
|
}
|
||||||
|
@ -3159,6 +3278,7 @@ public class HBaseFsck {
|
||||||
System.err.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good.");
|
System.err.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good.");
|
||||||
System.err.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
|
System.err.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
|
||||||
System.err.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
|
System.err.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
|
||||||
|
System.err.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
|
||||||
System.err.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
|
System.err.println(" -fixHdfsOverlaps Try to fix region overlaps in hdfs.");
|
||||||
System.err.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
|
System.err.println(" -fixVersionFile Try to fix missing hbase.version file in hdfs.");
|
||||||
System.err.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
|
System.err.println(" -maxMerge <n> When fixing region overlaps, allow at most <n> regions to merge. (n=" + DEFAULT_MAX_MERGE +" by default)");
|
||||||
|
@ -3263,6 +3383,8 @@ public class HBaseFsck {
|
||||||
setFixHdfsHoles(true);
|
setFixHdfsHoles(true);
|
||||||
} else if (cmd.equals("-fixHdfsOrphans")) {
|
} else if (cmd.equals("-fixHdfsOrphans")) {
|
||||||
setFixHdfsOrphans(true);
|
setFixHdfsOrphans(true);
|
||||||
|
} else if (cmd.equals("-fixTableOrphans")) {
|
||||||
|
setFixTableOrphans(true);
|
||||||
} else if (cmd.equals("-fixHdfsOverlaps")) {
|
} else if (cmd.equals("-fixHdfsOverlaps")) {
|
||||||
setFixHdfsOverlaps(true);
|
setFixHdfsOverlaps(true);
|
||||||
} else if (cmd.equals("-fixVersionFile")) {
|
} else if (cmd.equals("-fixVersionFile")) {
|
||||||
|
@ -3389,6 +3511,7 @@ public class HBaseFsck {
|
||||||
setFixHdfsHoles(false);
|
setFixHdfsHoles(false);
|
||||||
setFixHdfsOverlaps(false);
|
setFixHdfsOverlaps(false);
|
||||||
setFixVersionFile(false);
|
setFixVersionFile(false);
|
||||||
|
setFixTableOrphans(false);
|
||||||
errors.resetErrors();
|
errors.resetErrors();
|
||||||
code = onlineHbck();
|
code = onlineHbck();
|
||||||
setRetCode(code);
|
setRetCode(code);
|
||||||
|
|
|
@ -414,20 +414,47 @@ public class TestHBaseFsck {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHbckMissingTableinfo() throws Exception {
|
public void testHbckFixOrphanTable() throws Exception {
|
||||||
String table = "tableInfo";
|
String table = "tableInfo";
|
||||||
FileSystem fs = null;
|
FileSystem fs = null;
|
||||||
Path tableinfo = null;
|
Path tableinfo = null;
|
||||||
try {
|
try {
|
||||||
setupTable(table);
|
setupTable(table);
|
||||||
|
HBaseAdmin admin = TEST_UTIL.getHBaseAdmin();
|
||||||
|
|
||||||
Path hbaseTableDir = new Path(conf.get(HConstants.HBASE_DIR) + "/" + table );
|
Path hbaseTableDir = new Path(conf.get(HConstants.HBASE_DIR) + "/" + table );
|
||||||
fs = hbaseTableDir.getFileSystem(conf);
|
fs = hbaseTableDir.getFileSystem(conf);
|
||||||
FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
|
FileStatus status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
|
||||||
tableinfo = status.getPath();
|
tableinfo = status.getPath();
|
||||||
fs.rename(tableinfo, new Path("/.tableinfo"));
|
fs.rename(tableinfo, new Path("/.tableinfo"));
|
||||||
|
|
||||||
|
//to report error if .tableinfo is missing.
|
||||||
HBaseFsck hbck = doFsck(conf, false);
|
HBaseFsck hbck = doFsck(conf, false);
|
||||||
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
|
assertErrors(hbck, new ERROR_CODE[] { ERROR_CODE.NO_TABLEINFO_FILE });
|
||||||
|
|
||||||
|
// fix OrphanTable with default .tableinfo
|
||||||
|
hbck = doFsck(conf, true);
|
||||||
|
assertNoErrors(hbck);
|
||||||
|
status = null;
|
||||||
|
status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
|
||||||
|
assertNotNull(status);
|
||||||
|
|
||||||
|
HTableDescriptor htd = admin.getTableDescriptor(table.getBytes());
|
||||||
|
htd.setValue("NOT_DEFAULT", "true");
|
||||||
|
admin.disableTable(table);
|
||||||
|
admin.modifyTable(table.getBytes(), htd);
|
||||||
|
admin.enableTable(table);
|
||||||
|
fs.delete(status.getPath(), true);
|
||||||
|
|
||||||
|
// fix OrphanTable with cache
|
||||||
|
htd = admin.getTableDescriptor(table.getBytes());
|
||||||
|
hbck = doFsck(conf, true);
|
||||||
|
assertNoErrors(hbck);
|
||||||
|
status = null;
|
||||||
|
status = FSTableDescriptors.getTableInfoPath(fs, hbaseTableDir);
|
||||||
|
assertNotNull(status);
|
||||||
|
htd = admin.getTableDescriptor(table.getBytes());
|
||||||
|
assertEquals(htd.getValue("NOT_DEFAULT"), "true");
|
||||||
} finally {
|
} finally {
|
||||||
fs.rename(new Path("/.tableinfo"), tableinfo);
|
fs.rename(new Path("/.tableinfo"), tableinfo);
|
||||||
deleteTable(table);
|
deleteTable(table);
|
||||||
|
|
|
@ -37,12 +37,12 @@ public class HbckTestingUtil {
|
||||||
|
|
||||||
public static HBaseFsck doFsck(
|
public static HBaseFsck doFsck(
|
||||||
Configuration conf, boolean fix, String table) throws Exception {
|
Configuration conf, boolean fix, String table) throws Exception {
|
||||||
return doFsck(conf, fix, fix, fix, fix,fix, fix, table);
|
return doFsck(conf, fix, fix, fix, fix,fix, fix, fix, table);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments,
|
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments,
|
||||||
boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps,
|
boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps,
|
||||||
boolean fixHdfsOrphans, boolean fixVersionFile,
|
boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile,
|
||||||
String table) throws Exception {
|
String table) throws Exception {
|
||||||
HBaseFsck fsck = new HBaseFsck(conf);
|
HBaseFsck fsck = new HBaseFsck(conf);
|
||||||
fsck.connect();
|
fsck.connect();
|
||||||
|
@ -53,6 +53,7 @@ public class HbckTestingUtil {
|
||||||
fsck.setFixHdfsHoles(fixHdfsHoles);
|
fsck.setFixHdfsHoles(fixHdfsHoles);
|
||||||
fsck.setFixHdfsOverlaps(fixHdfsOverlaps);
|
fsck.setFixHdfsOverlaps(fixHdfsOverlaps);
|
||||||
fsck.setFixHdfsOrphans(fixHdfsOrphans);
|
fsck.setFixHdfsOrphans(fixHdfsOrphans);
|
||||||
|
fsck.setFixTableOrphans(fixTableOrphans);
|
||||||
fsck.setFixVersionFile(fixVersionFile);
|
fsck.setFixVersionFile(fixVersionFile);
|
||||||
if (table != null) {
|
if (table != null) {
|
||||||
fsck.includeTable(table);
|
fsck.includeTable(table);
|
||||||
|
|
Loading…
Reference in New Issue