diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index 7e71994e04a..bbbf9aed58e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -231,9 +231,9 @@ public class HBaseFsck extends Configured implements Closeable { * Options ***********/ private static boolean details = false; // do we display the full report - private static boolean useLock = true; // do we use the hbck exclusivity lock - private static boolean switchBalancer = true; // do we turn the balancer off while running private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older + private static boolean forceExclusive = false; // only this hbck can modify HBase + private static boolean disableBalancer = false; // disable load balancer to keep regions stable private boolean fixAssignments = false; // fix assignment errors? private boolean fixMeta = false; // fix meta errors? private boolean checkHdfs = true; // load and check fs consistency? @@ -445,7 +445,7 @@ public class HBaseFsck extends Configured implements Closeable { } private void unlockHbck() { - if (hbckLockCleanup.compareAndSet(true, false)) { + if (isExclusive() && hbckLockCleanup.compareAndSet(true, false)) { RetryCounter retryCounter = lockFileRetryCounterFactory.create(); do { try { @@ -478,13 +478,13 @@ public class HBaseFsck extends Configured implements Closeable { */ public void connect() throws IOException { - if (useLock) { - // Check if another instance of balancer is running + if (isExclusive()) { + // Grab the lock hbckOutFd = checkAndMarkRunningHbck(); if (hbckOutFd == null) { setRetCode(-1); - LOG.error("Another instance of hbck is running, exiting this instance.[If you are sure" + - " no other instance is running, delete the lock file " + + LOG.error("Another instance of hbck is fixing HBase, exiting this instance. " + + "[If you are sure no other instance is running, delete the lock file " + HBCK_LOCK_PATH + " and rerun the tool]"); throw new IOException("Duplicate hbck - Abort"); } @@ -688,9 +688,8 @@ public class HBaseFsck extends Configured implements Closeable { errors.print("Version: " + status.getHBaseVersion()); offlineHdfsIntegrityRepair(); - boolean oldBalancer = true; - // turn the balancer off - if (switchBalancer) { + boolean oldBalancer = false; + if (shouldDisableBalancer()) { oldBalancer = admin.setBalancerRunning(false, true); } @@ -698,7 +697,10 @@ public class HBaseFsck extends Configured implements Closeable { onlineConsistencyRepair(); } finally { - if (switchBalancer) { + // Only restore the balancer if it was true when we started repairing and + // we actually disabled it. Otherwise, we might clobber another run of + // hbck that has just restored it. + if (shouldDisableBalancer() && oldBalancer) { admin.setBalancerRunning(oldBalancer, false); } } @@ -4150,12 +4152,34 @@ public class HBaseFsck extends Configured implements Closeable { details = true; } - public static void setNoLock() { - useLock = false; + /** + * Set exclusive mode. + */ + public static void setForceExclusive() { + forceExclusive = true; } - public static void setNoBalacerSwitch() { - switchBalancer = false; + /** + * Only one instance of hbck can modify HBase at a time. + */ + public boolean isExclusive() { + return fixAny || forceExclusive; + } + + /** + * Disable the load balancer. + */ + public static void setDisableBalancer() { + disableBalancer = true; + } + + /** + * The balancer should be disabled if we are modifying HBase. + * It can be disabled if you want to prevent region movement from causing + * false positives. + */ + public boolean shouldDisableBalancer() { + return fixAny || disableBalancer; } /** @@ -4410,8 +4434,8 @@ public class HBaseFsck extends Configured implements Closeable { out.println(" -metaonly Only check the state of the hbase:meta table."); out.println(" -sidelineDir HDFS path to backup existing meta."); out.println(" -boundaries Verify that regions boundaries are the same between META and store files."); - out.println(" -noLock Turn off using the hdfs lock file."); - out.println(" -noBalancerSwitch Don't switch the balancer off."); + out.println(" -exclusive Abort if another hbck is exclusive or fixing."); + out.println(" -disableBalancer Disable the load balancer."); out.println(""); out.println(" Metadata Repair options: (expert features, use with caution!)"); @@ -4501,10 +4525,10 @@ public class HBaseFsck extends Configured implements Closeable { return printUsageAndExit(); } else if (cmd.equals("-details")) { setDisplayFullReport(); - } else if (cmd.equals("-noLock")) { - setNoLock(); - } else if (cmd.equals("-noBalancerSwitch")) { - setNoBalacerSwitch(); + } else if (cmd.equals("-exclusive")) { + setForceExclusive(); + } else if (cmd.equals("-disableBalancer")) { + setDisableBalancer(); } else if (cmd.equals("-timelag")) { if (i == args.length - 1) { errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value."); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java index 597f6652420..3562a69a515 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestHBaseFsck.java @@ -611,7 +611,7 @@ public class TestHBaseFsck { // To avoid flakiness of the test, set low max wait time. c.setInt("hbase.hbck.lockfile.maxwaittime", 3); try{ - return doFsck(c, false); + return doFsck(c, true); // Exclusive hbck only when fixing } catch(Exception e){ if (e.getMessage().contains("Duplicate hbck")) { fail = false; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java index 3691cd73a44..dca0831294c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/hbck/HbckTestingUtil.java @@ -50,7 +50,6 @@ public class HbckTestingUtil { TableName table) throws Exception { HBaseFsck fsck = new HBaseFsck(conf, exec); try { - fsck.connect(); HBaseFsck.setDisplayFullReport(); // i.e. -details fsck.setTimeLag(0); fsck.setFixAssignments(fixAssignments); @@ -66,6 +65,9 @@ public class HbckTestingUtil { if (table != null) { fsck.includeTable(table); } + + // Parse command line flags before connecting, to grab the lock. + fsck.connect(); fsck.onlineHbck(); } finally { fsck.close();