HBASE-7190 Add an option to hbck to check only meta and assignment

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1413762 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
jxiang 2012-11-26 18:10:14 +00:00
parent c0b6769be8
commit 7233553030
2 changed files with 188 additions and 11 deletions

View File

@ -85,7 +85,6 @@ import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.regionserver.wal.HLog;
import org.apache.hadoop.hbase.regionserver.wal.HLogUtil;
import org.apache.hadoop.hbase.util.HBaseFsck.ErrorReporter.ERROR_CODE;
import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
@ -182,6 +181,7 @@ public class HBaseFsck {
private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
private boolean fixAssignments = false; // fix assignment errors?
private boolean fixMeta = false; // fix meta errors?
private boolean checkHdfs = true; // load and check fs consistency?
private boolean fixHdfsHoles = false; // fix fs holes?
private boolean fixHdfsOverlaps = false; // fix fs overlaps (risky)
private boolean fixHdfsOrphans = false; // fix fs holes (missing .regioninfo)
@ -339,8 +339,8 @@ public class HBaseFsck {
*/
public void offlineHdfsIntegrityRepair() throws IOException, InterruptedException {
// Initial pass to fix orphans.
if (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
|| shouldFixHdfsOverlaps() || shouldFixTableOrphans()) {
if (shouldCheckHdfs() && (shouldFixHdfsOrphans() || shouldFixHdfsHoles()
|| shouldFixHdfsOverlaps() || shouldFixTableOrphans())) {
LOG.info("Loading regioninfos HDFS");
// if nothing is happening this should always complete in two iterations.
int maxIterations = conf.getInt("hbase.hbck.integrityrepair.iterations.max", 3);
@ -397,8 +397,10 @@ public class HBaseFsck {
loadDeployedRegions();
// load regiondirs and regioninfos from HDFS
loadHdfsRegionDirs();
loadHdfsRegionInfos();
if (shouldCheckHdfs()) {
loadHdfsRegionDirs();
loadHdfsRegionInfos();
}
// Empty cells in .META.?
reportEmptyMetaCells();
@ -790,7 +792,7 @@ public class HBaseFsck {
List<String> tmpList = new ArrayList<String>();
tmpList.addAll(orphanTableDirs.keySet());
HTableDescriptor[] htds = getHTableDescriptors(tmpList);
Iterator iter = orphanTableDirs.entrySet().iterator();
Iterator<Entry<String, Set<String>>> iter = orphanTableDirs.entrySet().iterator();
int j = 0;
int numFailedCase = 0;
while (iter.hasNext()) {
@ -1485,8 +1487,12 @@ public class HBaseFsck {
errors.print(msg);
undeployRegions(hbi);
setShouldRerun();
HBaseFsckRepair.fixUnassigned(admin, hbi.getHdfsHRI());
HBaseFsckRepair.waitUntilAssigned(admin, hbi.getHdfsHRI());
HRegionInfo hri = hbi.getHdfsHRI();
if (hri == null) {
hri = hbi.metaEntry;
}
HBaseFsckRepair.fixUnassigned(admin, hri);
HBaseFsckRepair.waitUntilAssigned(admin, hri);
}
}
@ -1498,7 +1504,8 @@ public class HBaseFsck {
String descriptiveName = hbi.toString();
boolean inMeta = hbi.metaEntry != null;
boolean inHdfs = hbi.getHdfsRegionDir()!= null;
// In case not checking HDFS, assume the region is on HDFS
boolean inHdfs = !shouldCheckHdfs() || hbi.getHdfsRegionDir() != null;
boolean hasMetaAssignment = inMeta && hbi.metaEntry.regionServer != null;
boolean isDeployed = !hbi.deployedOn.isEmpty();
boolean isMultiplyDeployed = hbi.deployedOn.size() > 1;
@ -1508,7 +1515,7 @@ public class HBaseFsck {
boolean splitParent =
(hbi.metaEntry == null)? false: hbi.metaEntry.isSplit() && hbi.metaEntry.isOffline();
boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
boolean recentlyModified = hbi.getHdfsRegionDir() != null &&
boolean recentlyModified = inHdfs &&
hbi.getModTime() + timelag > System.currentTimeMillis();
// ========== First the healthy cases =============
@ -3122,6 +3129,14 @@ public class HBaseFsck {
return fixMeta;
}
public void setCheckHdfs(boolean checking) {
checkHdfs = checking;
}
boolean shouldCheckHdfs() {
return checkHdfs;
}
public void setFixHdfsHoles(boolean shouldFix) {
fixHdfsHoles = shouldFix;
}
@ -3277,6 +3292,8 @@ public class HBaseFsck {
System.err.println(" -fix Try to fix region assignments. This is for backwards compatiblity");
System.err.println(" -fixAssignments Try to fix region assignments. Replaces the old -fix");
System.err.println(" -fixMeta Try to fix meta problems. This assumes HDFS region info is good.");
System.err.println(" -noHdfsChecking Don't load/check region info from HDFS."
+ " Assumes META region info is good. Won't check/fix any HDFS issue, e.g. hole, orphan, or overlap");
System.err.println(" -fixHdfsHoles Try to fix region holes in hdfs.");
System.err.println(" -fixHdfsOrphans Try to fix region dirs with no .regioninfo file in hdfs");
System.err.println(" -fixTableOrphans Try to fix table dirs with no .tableinfo file in hdfs (online mode only)");
@ -3380,6 +3397,8 @@ public class HBaseFsck {
setFixAssignments(true);
} else if (cmd.equals("-fixMeta")) {
setFixMeta(true);
} else if (cmd.equals("-noHdfsChecking")) {
setCheckHdfs(false);
} else if (cmd.equals("-fixHdfsHoles")) {
setFixHdfsHoles(true);
} else if (cmd.equals("-fixHdfsOrphans")) {
@ -3411,6 +3430,7 @@ public class HBaseFsck {
setFixVersionFile(true);
setSidelineBigOverlaps(true);
setFixSplitParents(false);
setCheckHdfs(true);
} else if (cmd.equals("-repairHoles")) {
// this will make all missing hdfs regions available but may lose data
setFixHdfsHoles(true);
@ -3420,6 +3440,7 @@ public class HBaseFsck {
setFixHdfsOverlaps(false);
setSidelineBigOverlaps(false);
setFixSplitParents(false);
setCheckHdfs(true);
} else if (cmd.equals("-maxOverlapsToSideline")) {
if (i == args.length - 1) {
System.err.println("-maxOverlapsToSideline needs a numeric value argument.");
@ -3531,7 +3552,7 @@ public class HBaseFsck {
* ls -r for debugging purposes
*/
public static void debugLsr(Configuration conf, Path p) throws IOException {
if (!LOG.isDebugEnabled()) {
if (!LOG.isDebugEnabled() || p == null) {
return;
}
FileSystem fs = p.getFileSystem(conf);

View File

@ -1316,6 +1316,162 @@ public class TestHBaseFsck {
}
}
/**
* Test -noHdfsChecking option can detect and fix assignments issue.
*/
@Test
public void testFixAssignmentsAndNoHdfsChecking() throws Exception {
String table = "testFixAssignmentsAndNoHdfsChecking";
try {
setupTable(table);
assertEquals(ROWKEYS.length, countRows());
// Mess it up by closing a region
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
Bytes.toBytes("B"), true, false, false, false);
// verify there is no other errors
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] {
ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
// verify that noHdfsChecking report the same errors
HBaseFsck fsck = new HBaseFsck(conf);
fsck.connect();
fsck.setDisplayFullReport(); // i.e. -details
fsck.setTimeLag(0);
fsck.setCheckHdfs(false);
fsck.onlineHbck();
assertErrors(fsck, new ERROR_CODE[] {
ERROR_CODE.NOT_DEPLOYED, ERROR_CODE.HOLE_IN_REGION_CHAIN});
// verify that fixAssignments works fine with noHdfsChecking
fsck = new HBaseFsck(conf);
fsck.connect();
fsck.setDisplayFullReport(); // i.e. -details
fsck.setTimeLag(0);
fsck.setCheckHdfs(false);
fsck.setFixAssignments(true);
fsck.onlineHbck();
assertTrue(fsck.shouldRerun());
fsck.onlineHbck();
assertNoErrors(fsck);
assertEquals(ROWKEYS.length, countRows());
} finally {
deleteTable(table);
}
}
/**
* Test -noHdfsChecking option can detect region is not in meta but deployed.
* However, it can not fix it without checking Hdfs because we need to get
* the region info from Hdfs in this case, then to patch the meta.
*/
@Test
public void testFixMetaNotWorkingWithNoHdfsChecking() throws Exception {
String table = "testFixMetaNotWorkingWithNoHdfsChecking";
try {
setupTable(table);
assertEquals(ROWKEYS.length, countRows());
// Mess it up by deleting a region from the metadata
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
Bytes.toBytes("B"), false, true, false, false);
// verify there is no other errors
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] {
ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
// verify that noHdfsChecking report the same errors
HBaseFsck fsck = new HBaseFsck(conf);
fsck.connect();
fsck.setDisplayFullReport(); // i.e. -details
fsck.setTimeLag(0);
fsck.setCheckHdfs(false);
fsck.onlineHbck();
assertErrors(fsck, new ERROR_CODE[] {
ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
// verify that fixMeta doesn't work with noHdfsChecking
fsck = new HBaseFsck(conf);
fsck.connect();
fsck.setDisplayFullReport(); // i.e. -details
fsck.setTimeLag(0);
fsck.setCheckHdfs(false);
fsck.setFixAssignments(true);
fsck.setFixMeta(true);
fsck.onlineHbck();
assertFalse(fsck.shouldRerun());
assertErrors(fsck, new ERROR_CODE[] {
ERROR_CODE.NOT_IN_META, ERROR_CODE.HOLE_IN_REGION_CHAIN});
} finally {
deleteTable(table);
}
}
/**
* Test -fixHdfsHoles doesn't work with -noHdfsChecking option,
* and -noHdfsChecking can't detect orphan Hdfs region.
*/
@Test
public void testFixHdfsHolesNotWorkingWithNoHdfsChecking() throws Exception {
String table = "testFixHdfsHolesNotWorkingWithNoHdfsChecking";
try {
setupTable(table);
assertEquals(ROWKEYS.length, countRows());
// Mess it up by creating an overlap in the metadata
TEST_UTIL.getHBaseAdmin().disableTable(table);
deleteRegion(conf, tbl.getTableDescriptor(), Bytes.toBytes("A"),
Bytes.toBytes("B"), true, true, false, true);
TEST_UTIL.getHBaseAdmin().enableTable(table);
HRegionInfo hriOverlap = createRegion(conf, tbl.getTableDescriptor(),
Bytes.toBytes("A2"), Bytes.toBytes("B"));
TEST_UTIL.getHBaseCluster().getMaster().assignRegion(hriOverlap);
TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager()
.waitForAssignment(hriOverlap);
ServerName server = regionStates.getRegionServerOfRegion(hriOverlap);
TEST_UTIL.assertRegionOnServer(hriOverlap, server, REGION_ONLINE_TIMEOUT);
HBaseFsck hbck = doFsck(conf, false);
assertErrors(hbck, new ERROR_CODE[] {
ERROR_CODE.ORPHAN_HDFS_REGION, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.HOLE_IN_REGION_CHAIN});
// verify that noHdfsChecking can't detect ORPHAN_HDFS_REGION
HBaseFsck fsck = new HBaseFsck(conf);
fsck.connect();
fsck.setDisplayFullReport(); // i.e. -details
fsck.setTimeLag(0);
fsck.setCheckHdfs(false);
fsck.onlineHbck();
assertErrors(fsck, new ERROR_CODE[] {
ERROR_CODE.HOLE_IN_REGION_CHAIN});
// verify that fixHdfsHoles doesn't work with noHdfsChecking
fsck = new HBaseFsck(conf);
fsck.connect();
fsck.setDisplayFullReport(); // i.e. -details
fsck.setTimeLag(0);
fsck.setCheckHdfs(false);
fsck.setFixHdfsHoles(true);
fsck.setFixHdfsOverlaps(true);
fsck.setFixHdfsOrphans(true);
fsck.onlineHbck();
assertFalse(fsck.shouldRerun());
assertErrors(fsck, new ERROR_CODE[] {
ERROR_CODE.HOLE_IN_REGION_CHAIN});
} finally {
if (TEST_UTIL.getHBaseAdmin().isTableDisabled(table)) {
TEST_UTIL.getHBaseAdmin().enableTable(table);
}
deleteTable(table);
}
}
/**
* We don't have an easy way to verify that a flush completed, so we loop until we find a
* legitimate hfile and return it.