HBASE-3407 hbck should pause between fixing and re-checking state

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1056023 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Todd Lipcon 2011-01-06 19:32:32 +00:00
parent e89bfb07f3
commit 857d41472f
3 changed files with 43 additions and 13 deletions

View File

@ -820,6 +820,7 @@ Release 0.90.0 - Unreleased
a Close event; messes up state a Close event; messes up state
HBASE-3423 hbase-env.sh over-rides HBASE_OPTS incorrectly (Ted Dunning via HBASE-3423 hbase-env.sh over-rides HBASE_OPTS incorrectly (Ted Dunning via
Andrew Purtell) Andrew Purtell)
HBASE-3407 hbck should pause between fixing and re-checking state
IMPROVEMENTS IMPROVEMENTS

View File

@ -68,6 +68,7 @@ import com.google.common.collect.Lists;
*/ */
public class HBaseFsck { public class HBaseFsck {
public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute public static final long DEFAULT_TIME_LAG = 60000; // default value of 1 minute
public static final long DEFAULT_SLEEP_BEFORE_RERUN = 10000;
private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName()); private static final Log LOG = LogFactory.getLog(HBaseFsck.class.getName());
private Configuration conf; private Configuration conf;
@ -898,8 +899,8 @@ public class HBaseFsck {
* Fix inconsistencies found by fsck. This should try to fix errors (if any) * Fix inconsistencies found by fsck. This should try to fix errors (if any)
* found by fsck utility. * found by fsck utility.
*/ */
void setFixErrors() { void setFixErrors(boolean shouldFix) {
fix = true; fix = shouldFix;
} }
boolean shouldFix() { boolean shouldFix() {
@ -923,6 +924,8 @@ public class HBaseFsck {
" have not experienced any metadata updates in the last " + " have not experienced any metadata updates in the last " +
" {{timeInSeconds} seconds."); " {{timeInSeconds} seconds.");
System.err.println(" -fix Try to fix some of the errors."); System.err.println(" -fix Try to fix some of the errors.");
System.err.println(" -sleepBeforeRerun {timeInSeconds} Sleep this many seconds" +
" before checking if the fix worked if run with -fix");
System.err.println(" -summary Print only summary of the tables and status."); System.err.println(" -summary Print only summary of the tables and status.");
Runtime.getRuntime().exit(-2); Runtime.getRuntime().exit(-2);
@ -939,6 +942,7 @@ public class HBaseFsck {
Configuration conf = HBaseConfiguration.create(); Configuration conf = HBaseConfiguration.create();
conf.set("fs.defaultFS", conf.get("hbase.rootdir")); conf.set("fs.defaultFS", conf.get("hbase.rootdir"));
HBaseFsck fsck = new HBaseFsck(conf); HBaseFsck fsck = new HBaseFsck(conf);
long sleepBeforeRerun = DEFAULT_SLEEP_BEFORE_RERUN;
// Process command-line args. // Process command-line args.
for (int i = 0; i < args.length; i++) { for (int i = 0; i < args.length; i++) {
@ -958,8 +962,20 @@ public class HBaseFsck {
printUsageAndExit(); printUsageAndExit();
} }
i++; i++;
} else if (cmd.equals("-sleepBeforeRerun")) {
if (i == args.length - 1) {
System.err.println("HBaseFsck: -sleepBeforeRerun needs a value.");
printUsageAndExit();
}
try {
sleepBeforeRerun = Long.parseLong(args[i+1]);
} catch (NumberFormatException e) {
System.err.println("-sleepBeforeRerun needs a numeric value.");
printUsageAndExit();
}
i++;
} else if (cmd.equals("-fix")) { } else if (cmd.equals("-fix")) {
fsck.setFixErrors(); fsck.setFixErrors(true);
} else if (cmd.equals("-summary")) { } else if (cmd.equals("-summary")) {
fsck.setSummary(); fsck.setSummary();
} else { } else {
@ -976,6 +992,14 @@ public class HBaseFsck {
// We run it only once more because otherwise we can easily fall into // We run it only once more because otherwise we can easily fall into
// an infinite loop. // an infinite loop.
if (fsck.shouldRerun()) { if (fsck.shouldRerun()) {
try {
LOG.info("Sleeping " + sleepBeforeRerun + "ms before re-checking after fix...");
Thread.sleep(sleepBeforeRerun);
} catch (InterruptedException ie) {
Runtime.getRuntime().exit(code);
}
// Just report
fsck.setFixErrors(false);
code = fsck.doWork(); code = fsck.doWork();
} }

View File

@ -50,19 +50,24 @@ public class TestHBaseFsck {
TEST_UTIL.startMiniCluster(3); TEST_UTIL.startMiniCluster(3);
} }
@Test private int doFsck(boolean fix) throws Exception {
public void testHBaseFsck() throws Exception {
HBaseFsck fsck = new HBaseFsck(conf); HBaseFsck fsck = new HBaseFsck(conf);
fsck.displayFullReport(); fsck.displayFullReport();
fsck.setTimeLag(0); fsck.setTimeLag(0);
fsck.setFixErrors(fix);
// Most basic check ever, 0 tables // Most basic check ever, 0 tables
int result = fsck.doWork(); return fsck.doWork();
}
@Test
public void testHBaseFsck() throws Exception {
int result = doFsck(false);
assertEquals(0, result); assertEquals(0, result);
TEST_UTIL.createTable(TABLE, FAM); TEST_UTIL.createTable(TABLE, FAM);
// We created 1 table, should be fine // We created 1 table, should be fine
result = fsck.doWork(); result = doFsck(false);
assertEquals(0, result); assertEquals(0, result);
// Now let's mess it up and change the assignment in .META. to // Now let's mess it up and change the assignment in .META. to
@ -92,14 +97,14 @@ public class TestHBaseFsck {
} }
} }
// We set this here, but it's really not fixing anything... // Try to fix the data
fsck.setFixErrors(); result = doFsck(true);
result = fsck.doWork();
// Fixed or not, it still reports inconsistencies
assertEquals(-1, result); assertEquals(-1, result);
Thread.sleep(15000); Thread.sleep(15000);
// Disabled, won't work because the region stays unassigned, see HBASE-3217 result = doFsck(false);
// new HTable(conf, TABLE).getScanner(new Scan()); // Should have fixed
assertEquals(0, result);
new HTable(conf, TABLE).getScanner(new Scan());
} }
} }