HBASE-12070 Add an option to hbck to fix ZK inconsistencies (Stephen Yuan Jiang)

This commit is contained in:
Enis Soztutar 2015-02-11 17:53:11 -08:00
parent f886fbed79
commit 4969368bf1
5 changed files with 215 additions and 23 deletions

View File

@ -120,18 +120,52 @@ public class ZKTableStateClientSideReader {
*/
public static Set<TableName> getDisabledOrDisablingTables(ZooKeeperWatcher zkw)
throws KeeperException, InterruptedException {
Set<TableName> disabledTables = new HashSet<TableName>();
List<String> children =
ZKUtil.listChildrenNoWatch(zkw, zkw.tableZNode);
return
getTablesInStates(
zkw,
ZooKeeperProtos.Table.State.DISABLED,
ZooKeeperProtos.Table.State.DISABLING);
}
/**
* Gets a list of all the tables set as enabling in zookeeper.
* @param zkw ZooKeeperWatcher instance to use
* @return Set of enabling tables, empty Set if none
* @throws KeeperException
* @throws InterruptedException
*/
public static Set<TableName> getEnablingTables(ZooKeeperWatcher zkw)
throws KeeperException, InterruptedException {
return getTablesInStates(zkw, ZooKeeperProtos.Table.State.ENABLING);
}
/**
* Gets a list of tables that are set as one of the passing in states in zookeeper.
* @param zkw ZooKeeperWatcher instance to use
* @param states the list of states that a table could be in
* @return Set of tables in one of the states, empty Set if none
* @throws KeeperException
* @throws InterruptedException
*/
private static Set<TableName> getTablesInStates(
ZooKeeperWatcher zkw,
ZooKeeperProtos.Table.State... states)
throws KeeperException, InterruptedException {
Set<TableName> tableNameSet = new HashSet<TableName>();
List<String> children = ZKUtil.listChildrenNoWatch(zkw, zkw.tableZNode);
TableName tableName;
ZooKeeperProtos.Table.State tableState;
for (String child: children) {
TableName tableName =
TableName.valueOf(child);
ZooKeeperProtos.Table.State state = getTableState(zkw, tableName);
if (state == ZooKeeperProtos.Table.State.DISABLED ||
state == ZooKeeperProtos.Table.State.DISABLING)
disabledTables.add(tableName);
tableName = TableName.valueOf(child);
tableState = getTableState(zkw, tableName);
for (ZooKeeperProtos.Table.State state : states) {
if (tableState == state) {
tableNameSet.add(tableName);
break;
}
}
}
return disabledTables;
return tableNameSet;
}
static boolean isTableState(final ZooKeeperProtos.Table.State expectedState,

View File

@ -54,7 +54,6 @@ import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -71,6 +70,7 @@ import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hbase.Abortable;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.CoordinatedStateException;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.HColumnDescriptor;
@ -110,6 +110,7 @@ import org.apache.hadoop.hbase.master.MasterFileSystem;
import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
@ -124,6 +125,7 @@ import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
import org.apache.hadoop.hbase.wal.WALSplitter;
import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
import org.apache.hadoop.hbase.zookeeper.ZKTableStateClientSideReader;
import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
@ -238,6 +240,7 @@ public class HBaseFsck extends Configured implements Closeable {
private boolean fixReferenceFiles = false; // fix lingering reference store file
private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
private boolean fixTableLocks = false; // fix table locks which are expired
private boolean fixTableZNodes = false; // fix table Znodes which are orphaned
private boolean fixAny = false; // Set to true if any of the fix is required.
// limit checking/fixes to listed tables, if empty attempt to check/fix all
@ -292,6 +295,11 @@ public class HBaseFsck extends Configured implements Closeable {
private Map<TableName, Set<String>> orphanTableDirs =
new HashMap<TableName, Set<String>>();
/**
* List of orphaned table ZNodes
*/
private Set<TableName> orphanedTableZNodes = new HashSet<TableName>();
/**
* Constructor
*
@ -622,6 +630,9 @@ public class HBaseFsck extends Configured implements Closeable {
checkAndFixTableLocks();
// Check (and fix if requested) orphaned table ZNodes
checkAndFixOrphanedTableZNodes();
// Remove the hbck lock
unlockHbck();
@ -3011,11 +3022,69 @@ public class HBaseFsck extends Configured implements Closeable {
}
private void checkAndFixTableLocks() throws IOException {
TableLockChecker checker = new TableLockChecker(createZooKeeperWatcher(), errors);
checker.checkTableLocks();
ZooKeeperWatcher zkw = createZooKeeperWatcher();
if (this.fixTableLocks) {
checker.fixExpiredTableLocks();
try {
TableLockChecker checker = new TableLockChecker(zkw, errors);
checker.checkTableLocks();
if (this.fixTableLocks) {
checker.fixExpiredTableLocks();
}
} finally {
zkw.close();
}
}
/**
* Check whether a orphaned table ZNode exists and fix it if requested.
* @throws IOException
* @throws KeeperException
* @throws InterruptedException
*/
private void checkAndFixOrphanedTableZNodes()
throws IOException, KeeperException, InterruptedException {
ZooKeeperWatcher zkw = createZooKeeperWatcher();
try {
Set<TableName> enablingTables = ZKTableStateClientSideReader.getEnablingTables(zkw);
String msg;
TableInfo tableInfo;
for (TableName tableName : enablingTables) {
// Check whether the table exists in hbase
tableInfo = tablesInfo.get(tableName);
if (tableInfo != null) {
// Table exists. This table state is in transit. No problem for this table.
continue;
}
msg = "Table " + tableName + " not found in hbase:meta. Orphaned table ZNode found.";
LOG.warn(msg);
orphanedTableZNodes.add(tableName);
errors.reportError(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY, msg);
}
if (orphanedTableZNodes.size() > 0 && this.fixTableZNodes) {
ZKTableStateManager zkTableStateMgr = new ZKTableStateManager(zkw);
for (TableName tableName : orphanedTableZNodes) {
try {
// Set the table state to be disabled so that if we made mistake, we can trace
// the history and figure it out.
// Another choice is to call checkAndRemoveTableState() to delete the orphaned ZNode.
// Both approaches works.
zkTableStateMgr.setTableState(tableName, ZooKeeperProtos.Table.State.DISABLED);
} catch (CoordinatedStateException e) {
// This exception should not happen here
LOG.error(
"Got a CoordinatedStateException while fixing the ENABLING table znode " + tableName,
e);
}
}
}
} finally {
zkw.close();
}
}
@ -3533,7 +3602,7 @@ public class HBaseFsck extends Configured implements Closeable {
FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR
WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, ORPHANED_ZK_TABLE_ENTRY, BOUNDARIES_ERROR
}
void clear();
void report(String message);
@ -3900,6 +3969,15 @@ public class HBaseFsck extends Configured implements Closeable {
fixAny |= shouldFix;
}
/**
* Set orphaned table ZNodes fix mode.
* Set the table state to disable in the orphaned table ZNode.
*/
public void setFixTableZNodes(boolean shouldFix) {
fixTableZNodes = shouldFix;
fixAny |= shouldFix;
}
/**
* Check if we should rerun fsck again. This checks if we've tried to
* fix something and we should rerun fsck tool again.
@ -4150,13 +4228,18 @@ public class HBaseFsck extends Configured implements Closeable {
out.println("");
out.println(" Metadata Repair shortcuts");
out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
"-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles -fixTableLocks");
"-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps " +
"-fixReferenceFiles -fixTableLocks -fixOrphanedTableZnodes");
out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
out.println("");
out.println(" Table lock options");
out.println(" -fixTableLocks Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
out.println("");
out.println(" Table Znode options");
out.println(" -fixOrphanedTableZnodes Set table state in ZNode to disabled if table does not exists");
out.flush();
errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
@ -4290,6 +4373,7 @@ public class HBaseFsck extends Configured implements Closeable {
setCheckHdfs(true);
setFixReferenceFiles(true);
setFixTableLocks(true);
setFixTableZNodes(true);
} else if (cmd.equals("-repairHoles")) {
// this will make all missing hdfs regions available but may lose data
setFixHdfsHoles(true);
@ -4338,6 +4422,8 @@ public class HBaseFsck extends Configured implements Closeable {
setRegionBoundariesCheck();
} else if (cmd.equals("-fixTableLocks")) {
setFixTableLocks(true);
} else if (cmd.equals("-fixOrphanedTableZnodes")) {
setFixTableZNodes(true);
} else if (cmd.startsWith("-")) {
errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
return printUsageAndExit();

View File

@ -146,7 +146,23 @@ public class ZKTableStateManager implements TableStateManager {
throws CoordinatedStateException {
synchronized (this.cache) {
if (isTableState(tableName, states)) {
return false;
// If the table is in the one of the states from the states list, the cache
// might be out-of-date, try to find it out from the master source (zookeeper server).
//
// Note: this adds extra zookeeper server calls and might have performance impact.
// However, this is not the happy path so we should not reach here often. Therefore,
// the performance impact should be minimal to none.
try {
ZooKeeperProtos.Table.State curstate = getTableState(watcher, tableName);
if (isTableInState(Arrays.asList(states), curstate)) {
return false;
}
} catch (KeeperException e) {
throw new CoordinatedStateException(e);
} catch (InterruptedException e) {
throw new CoordinatedStateException(e);
}
}
try {
setTableStateInZK(tableName, newState);

View File

@ -66,6 +66,7 @@ import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableExistsException;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.RegionLocations;
@ -100,6 +101,7 @@ import org.apache.hadoop.hbase.master.TableLockManager;
import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
@ -1610,7 +1612,8 @@ public class TestHBaseFsck {
// TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on
// for some time until children references are deleted. HBCK erroneously sees this as
// overlapping regions
HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, false, false, null);
HBaseFsck hbck = doFsck(
conf, true, true, false, false, false, true, true, true, false, false, false, null);
assertErrors(hbck, new ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported
// assert that the split hbase:meta entry is still there.
@ -1678,7 +1681,8 @@ public class TestHBaseFsck {
ERROR_CODE.HOLE_IN_REGION_CHAIN }); //no LINGERING_SPLIT_PARENT
// now fix it. The fix should not revert the region split, but add daughters to META
hbck = doFsck(conf, true, true, false, false, false, false, false, false, false, false, null);
hbck = doFsck(
conf, true, true, false, false, false, false, false, false, false, false, false, null);
assertErrors(hbck,
new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.HOLE_IN_REGION_CHAIN });
@ -2332,7 +2336,7 @@ public class TestHBaseFsck {
}
}
@Test(timeout=60000)
@Test(timeout=180000)
public void testCheckTableLocks() throws Exception {
IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0);
EnvironmentEdgeManager.injectEdge(edge);
@ -2401,6 +2405,55 @@ public class TestHBaseFsck {
writeLock.release(); // release for clean state
}
/**
* Test orphaned table ZNode (for table states)
*/
@Test
public void testOrphanedTableZNode() throws Exception {
TableName table = TableName.valueOf("testOrphanedZKTableEntry");
try {
TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getTableStateManager()
.setTableState(table, ZooKeeperProtos.Table.State.ENABLING);
try {
setupTable(table);
Assert.fail(
"Create table should fail when its ZNode has already existed with ENABLING state.");
} catch(TableExistsException t) {
//Expected exception
}
// The setup table was interrupted in some state that needs to some cleanup.
try {
cleanupTable(table);
} catch (IOException e) {
// Because create table failed, it is expected that the cleanup table would
// throw some exception. Ignore and continue.
}
HBaseFsck hbck = doFsck(conf, false);
assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY));
// fix the orphaned ZK entry
hbck = doFsck(conf, true);
// check that orpahned ZK table entry is gone.
hbck = doFsck(conf, false);
assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY));
// Now create table should succeed.
setupTable(table);
} finally {
// This code could be called that either a table was created successfully or set up
// table failed in some unknown state. Therefore, clean up can either succeed or fail.
try {
cleanupTable(table);
} catch (IOException e) {
// The cleanup table would throw some exception if create table failed in some state.
// Ignore this exception
}
}
}
@Test (timeout=180000)
public void testMetaOffline() throws Exception {
// check no errors
@ -2606,7 +2659,8 @@ public class TestHBaseFsck {
// fix hole
assertErrors(
doFsck(conf, false, true, false, false, false, false, false, false, false, false, null),
doFsck(
conf, false, true, false, false, false, false, false, false, false, false, false, null),
new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
ERROR_CODE.NOT_IN_META_OR_DEPLOYED });

View File

@ -40,13 +40,14 @@ public class HbckTestingUtil {
public static HBaseFsck doFsck(
Configuration conf, boolean fix, TableName table) throws Exception {
return doFsck(conf, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, table);
return doFsck(conf, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, table);
}
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments,
boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps,
boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile,
boolean fixReferenceFiles, boolean fixEmptyMetaRegionInfo, boolean fixTableLocks,
boolean fixTableZnodes,
TableName table) throws Exception {
HBaseFsck fsck = new HBaseFsck(conf, exec);
fsck.connect();
@ -62,6 +63,7 @@ public class HbckTestingUtil {
fsck.setFixReferenceFiles(fixReferenceFiles);
fsck.setFixEmptyMetaCells(fixEmptyMetaRegionInfo);
fsck.setFixTableLocks(fixTableLocks);
fsck.setFixTableZNodes(fixTableZnodes);
if (table != null) {
fsck.includeTable(table);
}