HBASE-12070 Add an option to hbck to fix ZK inconsistencies (Stephen Yuan Jiang)
This commit is contained in:
parent
f886fbed79
commit
4969368bf1
|
@ -120,18 +120,52 @@ public class ZKTableStateClientSideReader {
|
||||||
*/
|
*/
|
||||||
public static Set<TableName> getDisabledOrDisablingTables(ZooKeeperWatcher zkw)
|
public static Set<TableName> getDisabledOrDisablingTables(ZooKeeperWatcher zkw)
|
||||||
throws KeeperException, InterruptedException {
|
throws KeeperException, InterruptedException {
|
||||||
Set<TableName> disabledTables = new HashSet<TableName>();
|
return
|
||||||
List<String> children =
|
getTablesInStates(
|
||||||
ZKUtil.listChildrenNoWatch(zkw, zkw.tableZNode);
|
zkw,
|
||||||
|
ZooKeeperProtos.Table.State.DISABLED,
|
||||||
|
ZooKeeperProtos.Table.State.DISABLING);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a list of all the tables set as enabling in zookeeper.
|
||||||
|
* @param zkw ZooKeeperWatcher instance to use
|
||||||
|
* @return Set of enabling tables, empty Set if none
|
||||||
|
* @throws KeeperException
|
||||||
|
* @throws InterruptedException
|
||||||
|
*/
|
||||||
|
public static Set<TableName> getEnablingTables(ZooKeeperWatcher zkw)
|
||||||
|
throws KeeperException, InterruptedException {
|
||||||
|
return getTablesInStates(zkw, ZooKeeperProtos.Table.State.ENABLING);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets a list of tables that are set as one of the passing in states in zookeeper.
|
||||||
|
* @param zkw ZooKeeperWatcher instance to use
|
||||||
|
* @param states the list of states that a table could be in
|
||||||
|
* @return Set of tables in one of the states, empty Set if none
|
||||||
|
* @throws KeeperException
|
||||||
|
* @throws InterruptedException
|
||||||
|
*/
|
||||||
|
private static Set<TableName> getTablesInStates(
|
||||||
|
ZooKeeperWatcher zkw,
|
||||||
|
ZooKeeperProtos.Table.State... states)
|
||||||
|
throws KeeperException, InterruptedException {
|
||||||
|
Set<TableName> tableNameSet = new HashSet<TableName>();
|
||||||
|
List<String> children = ZKUtil.listChildrenNoWatch(zkw, zkw.tableZNode);
|
||||||
|
TableName tableName;
|
||||||
|
ZooKeeperProtos.Table.State tableState;
|
||||||
for (String child: children) {
|
for (String child: children) {
|
||||||
TableName tableName =
|
tableName = TableName.valueOf(child);
|
||||||
TableName.valueOf(child);
|
tableState = getTableState(zkw, tableName);
|
||||||
ZooKeeperProtos.Table.State state = getTableState(zkw, tableName);
|
for (ZooKeeperProtos.Table.State state : states) {
|
||||||
if (state == ZooKeeperProtos.Table.State.DISABLED ||
|
if (tableState == state) {
|
||||||
state == ZooKeeperProtos.Table.State.DISABLING)
|
tableNameSet.add(tableName);
|
||||||
disabledTables.add(tableName);
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return disabledTables;
|
return tableNameSet;
|
||||||
}
|
}
|
||||||
|
|
||||||
static boolean isTableState(final ZooKeeperProtos.Table.State expectedState,
|
static boolean isTableState(final ZooKeeperProtos.Table.State expectedState,
|
||||||
|
|
|
@ -54,7 +54,6 @@ import java.util.concurrent.TimeoutException;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -71,6 +70,7 @@ import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
import org.apache.hadoop.hbase.Abortable;
|
import org.apache.hadoop.hbase.Abortable;
|
||||||
import org.apache.hadoop.hbase.Cell;
|
import org.apache.hadoop.hbase.Cell;
|
||||||
import org.apache.hadoop.hbase.ClusterStatus;
|
import org.apache.hadoop.hbase.ClusterStatus;
|
||||||
|
import org.apache.hadoop.hbase.CoordinatedStateException;
|
||||||
import org.apache.hadoop.hbase.HBaseConfiguration;
|
import org.apache.hadoop.hbase.HBaseConfiguration;
|
||||||
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
|
||||||
import org.apache.hadoop.hbase.HColumnDescriptor;
|
import org.apache.hadoop.hbase.HColumnDescriptor;
|
||||||
|
@ -110,6 +110,7 @@ import org.apache.hadoop.hbase.master.MasterFileSystem;
|
||||||
import org.apache.hadoop.hbase.master.RegionState;
|
import org.apache.hadoop.hbase.master.RegionState;
|
||||||
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
|
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService.BlockingInterface;
|
||||||
|
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
|
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
|
||||||
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
|
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
|
||||||
|
@ -124,6 +125,7 @@ import org.apache.hadoop.hbase.util.hbck.TableLockChecker;
|
||||||
import org.apache.hadoop.hbase.wal.WALSplitter;
|
import org.apache.hadoop.hbase.wal.WALSplitter;
|
||||||
import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
|
import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZKTableStateClientSideReader;
|
import org.apache.hadoop.hbase.zookeeper.ZKTableStateClientSideReader;
|
||||||
|
import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager;
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
|
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
||||||
import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
|
import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
|
||||||
|
@ -238,6 +240,7 @@ public class HBaseFsck extends Configured implements Closeable {
|
||||||
private boolean fixReferenceFiles = false; // fix lingering reference store file
|
private boolean fixReferenceFiles = false; // fix lingering reference store file
|
||||||
private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
|
private boolean fixEmptyMetaCells = false; // fix (remove) empty REGIONINFO_QUALIFIER rows
|
||||||
private boolean fixTableLocks = false; // fix table locks which are expired
|
private boolean fixTableLocks = false; // fix table locks which are expired
|
||||||
|
private boolean fixTableZNodes = false; // fix table Znodes which are orphaned
|
||||||
private boolean fixAny = false; // Set to true if any of the fix is required.
|
private boolean fixAny = false; // Set to true if any of the fix is required.
|
||||||
|
|
||||||
// limit checking/fixes to listed tables, if empty attempt to check/fix all
|
// limit checking/fixes to listed tables, if empty attempt to check/fix all
|
||||||
|
@ -292,6 +295,11 @@ public class HBaseFsck extends Configured implements Closeable {
|
||||||
private Map<TableName, Set<String>> orphanTableDirs =
|
private Map<TableName, Set<String>> orphanTableDirs =
|
||||||
new HashMap<TableName, Set<String>>();
|
new HashMap<TableName, Set<String>>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List of orphaned table ZNodes
|
||||||
|
*/
|
||||||
|
private Set<TableName> orphanedTableZNodes = new HashSet<TableName>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor
|
* Constructor
|
||||||
*
|
*
|
||||||
|
@ -622,6 +630,9 @@ public class HBaseFsck extends Configured implements Closeable {
|
||||||
|
|
||||||
checkAndFixTableLocks();
|
checkAndFixTableLocks();
|
||||||
|
|
||||||
|
// Check (and fix if requested) orphaned table ZNodes
|
||||||
|
checkAndFixOrphanedTableZNodes();
|
||||||
|
|
||||||
// Remove the hbck lock
|
// Remove the hbck lock
|
||||||
unlockHbck();
|
unlockHbck();
|
||||||
|
|
||||||
|
@ -3011,11 +3022,69 @@ public class HBaseFsck extends Configured implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkAndFixTableLocks() throws IOException {
|
private void checkAndFixTableLocks() throws IOException {
|
||||||
TableLockChecker checker = new TableLockChecker(createZooKeeperWatcher(), errors);
|
ZooKeeperWatcher zkw = createZooKeeperWatcher();
|
||||||
checker.checkTableLocks();
|
|
||||||
|
|
||||||
if (this.fixTableLocks) {
|
try {
|
||||||
checker.fixExpiredTableLocks();
|
TableLockChecker checker = new TableLockChecker(zkw, errors);
|
||||||
|
checker.checkTableLocks();
|
||||||
|
|
||||||
|
if (this.fixTableLocks) {
|
||||||
|
checker.fixExpiredTableLocks();
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
zkw.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check whether a orphaned table ZNode exists and fix it if requested.
|
||||||
|
* @throws IOException
|
||||||
|
* @throws KeeperException
|
||||||
|
* @throws InterruptedException
|
||||||
|
*/
|
||||||
|
private void checkAndFixOrphanedTableZNodes()
|
||||||
|
throws IOException, KeeperException, InterruptedException {
|
||||||
|
ZooKeeperWatcher zkw = createZooKeeperWatcher();
|
||||||
|
|
||||||
|
try {
|
||||||
|
Set<TableName> enablingTables = ZKTableStateClientSideReader.getEnablingTables(zkw);
|
||||||
|
String msg;
|
||||||
|
TableInfo tableInfo;
|
||||||
|
|
||||||
|
for (TableName tableName : enablingTables) {
|
||||||
|
// Check whether the table exists in hbase
|
||||||
|
tableInfo = tablesInfo.get(tableName);
|
||||||
|
if (tableInfo != null) {
|
||||||
|
// Table exists. This table state is in transit. No problem for this table.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
msg = "Table " + tableName + " not found in hbase:meta. Orphaned table ZNode found.";
|
||||||
|
LOG.warn(msg);
|
||||||
|
orphanedTableZNodes.add(tableName);
|
||||||
|
errors.reportError(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY, msg);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (orphanedTableZNodes.size() > 0 && this.fixTableZNodes) {
|
||||||
|
ZKTableStateManager zkTableStateMgr = new ZKTableStateManager(zkw);
|
||||||
|
|
||||||
|
for (TableName tableName : orphanedTableZNodes) {
|
||||||
|
try {
|
||||||
|
// Set the table state to be disabled so that if we made mistake, we can trace
|
||||||
|
// the history and figure it out.
|
||||||
|
// Another choice is to call checkAndRemoveTableState() to delete the orphaned ZNode.
|
||||||
|
// Both approaches works.
|
||||||
|
zkTableStateMgr.setTableState(tableName, ZooKeeperProtos.Table.State.DISABLED);
|
||||||
|
} catch (CoordinatedStateException e) {
|
||||||
|
// This exception should not happen here
|
||||||
|
LOG.error(
|
||||||
|
"Got a CoordinatedStateException while fixing the ENABLING table znode " + tableName,
|
||||||
|
e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
zkw.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3533,7 +3602,7 @@ public class HBaseFsck extends Configured implements Closeable {
|
||||||
FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
|
FIRST_REGION_STARTKEY_NOT_EMPTY, LAST_REGION_ENDKEY_NOT_EMPTY, DUPE_STARTKEYS,
|
||||||
HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
|
HOLE_IN_REGION_CHAIN, OVERLAP_IN_REGION_CHAIN, REGION_CYCLE, DEGENERATE_REGION,
|
||||||
ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
|
ORPHAN_HDFS_REGION, LINGERING_SPLIT_PARENT, NO_TABLEINFO_FILE, LINGERING_REFERENCE_HFILE,
|
||||||
WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, BOUNDARIES_ERROR
|
WRONG_USAGE, EMPTY_META_CELL, EXPIRED_TABLE_LOCK, ORPHANED_ZK_TABLE_ENTRY, BOUNDARIES_ERROR
|
||||||
}
|
}
|
||||||
void clear();
|
void clear();
|
||||||
void report(String message);
|
void report(String message);
|
||||||
|
@ -3900,6 +3969,15 @@ public class HBaseFsck extends Configured implements Closeable {
|
||||||
fixAny |= shouldFix;
|
fixAny |= shouldFix;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set orphaned table ZNodes fix mode.
|
||||||
|
* Set the table state to disable in the orphaned table ZNode.
|
||||||
|
*/
|
||||||
|
public void setFixTableZNodes(boolean shouldFix) {
|
||||||
|
fixTableZNodes = shouldFix;
|
||||||
|
fixAny |= shouldFix;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if we should rerun fsck again. This checks if we've tried to
|
* Check if we should rerun fsck again. This checks if we've tried to
|
||||||
* fix something and we should rerun fsck tool again.
|
* fix something and we should rerun fsck tool again.
|
||||||
|
@ -4150,13 +4228,18 @@ public class HBaseFsck extends Configured implements Closeable {
|
||||||
out.println("");
|
out.println("");
|
||||||
out.println(" Metadata Repair shortcuts");
|
out.println(" Metadata Repair shortcuts");
|
||||||
out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
|
out.println(" -repair Shortcut for -fixAssignments -fixMeta -fixHdfsHoles " +
|
||||||
"-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps -fixReferenceFiles -fixTableLocks");
|
"-fixHdfsOrphans -fixHdfsOverlaps -fixVersionFile -sidelineBigOverlaps " +
|
||||||
|
"-fixReferenceFiles -fixTableLocks -fixOrphanedTableZnodes");
|
||||||
out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
|
out.println(" -repairHoles Shortcut for -fixAssignments -fixMeta -fixHdfsHoles");
|
||||||
|
|
||||||
out.println("");
|
out.println("");
|
||||||
out.println(" Table lock options");
|
out.println(" Table lock options");
|
||||||
out.println(" -fixTableLocks Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
|
out.println(" -fixTableLocks Deletes table locks held for a long time (hbase.table.lock.expire.ms, 10min by default)");
|
||||||
|
|
||||||
|
out.println("");
|
||||||
|
out.println(" Table Znode options");
|
||||||
|
out.println(" -fixOrphanedTableZnodes Set table state in ZNode to disabled if table does not exists");
|
||||||
|
|
||||||
out.flush();
|
out.flush();
|
||||||
errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
|
errors.reportError(ERROR_CODE.WRONG_USAGE, sw.toString());
|
||||||
|
|
||||||
|
@ -4290,6 +4373,7 @@ public class HBaseFsck extends Configured implements Closeable {
|
||||||
setCheckHdfs(true);
|
setCheckHdfs(true);
|
||||||
setFixReferenceFiles(true);
|
setFixReferenceFiles(true);
|
||||||
setFixTableLocks(true);
|
setFixTableLocks(true);
|
||||||
|
setFixTableZNodes(true);
|
||||||
} else if (cmd.equals("-repairHoles")) {
|
} else if (cmd.equals("-repairHoles")) {
|
||||||
// this will make all missing hdfs regions available but may lose data
|
// this will make all missing hdfs regions available but may lose data
|
||||||
setFixHdfsHoles(true);
|
setFixHdfsHoles(true);
|
||||||
|
@ -4338,6 +4422,8 @@ public class HBaseFsck extends Configured implements Closeable {
|
||||||
setRegionBoundariesCheck();
|
setRegionBoundariesCheck();
|
||||||
} else if (cmd.equals("-fixTableLocks")) {
|
} else if (cmd.equals("-fixTableLocks")) {
|
||||||
setFixTableLocks(true);
|
setFixTableLocks(true);
|
||||||
|
} else if (cmd.equals("-fixOrphanedTableZnodes")) {
|
||||||
|
setFixTableZNodes(true);
|
||||||
} else if (cmd.startsWith("-")) {
|
} else if (cmd.startsWith("-")) {
|
||||||
errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
|
errors.reportError(ERROR_CODE.WRONG_USAGE, "Unrecognized option:" + cmd);
|
||||||
return printUsageAndExit();
|
return printUsageAndExit();
|
||||||
|
|
|
@ -146,7 +146,23 @@ public class ZKTableStateManager implements TableStateManager {
|
||||||
throws CoordinatedStateException {
|
throws CoordinatedStateException {
|
||||||
synchronized (this.cache) {
|
synchronized (this.cache) {
|
||||||
if (isTableState(tableName, states)) {
|
if (isTableState(tableName, states)) {
|
||||||
return false;
|
// If the table is in the one of the states from the states list, the cache
|
||||||
|
// might be out-of-date, try to find it out from the master source (zookeeper server).
|
||||||
|
//
|
||||||
|
// Note: this adds extra zookeeper server calls and might have performance impact.
|
||||||
|
// However, this is not the happy path so we should not reach here often. Therefore,
|
||||||
|
// the performance impact should be minimal to none.
|
||||||
|
try {
|
||||||
|
ZooKeeperProtos.Table.State curstate = getTableState(watcher, tableName);
|
||||||
|
|
||||||
|
if (isTableInState(Arrays.asList(states), curstate)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} catch (KeeperException e) {
|
||||||
|
throw new CoordinatedStateException(e);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
throw new CoordinatedStateException(e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
setTableStateInZK(tableName, newState);
|
setTableStateInZK(tableName, newState);
|
||||||
|
|
|
@ -66,6 +66,7 @@ import org.apache.hadoop.hbase.HConstants;
|
||||||
import org.apache.hadoop.hbase.HRegionInfo;
|
import org.apache.hadoop.hbase.HRegionInfo;
|
||||||
import org.apache.hadoop.hbase.HRegionLocation;
|
import org.apache.hadoop.hbase.HRegionLocation;
|
||||||
import org.apache.hadoop.hbase.HTableDescriptor;
|
import org.apache.hadoop.hbase.HTableDescriptor;
|
||||||
|
import org.apache.hadoop.hbase.TableExistsException;
|
||||||
import org.apache.hadoop.hbase.testclassification.LargeTests;
|
import org.apache.hadoop.hbase.testclassification.LargeTests;
|
||||||
import org.apache.hadoop.hbase.MiniHBaseCluster;
|
import org.apache.hadoop.hbase.MiniHBaseCluster;
|
||||||
import org.apache.hadoop.hbase.RegionLocations;
|
import org.apache.hadoop.hbase.RegionLocations;
|
||||||
|
@ -100,6 +101,7 @@ import org.apache.hadoop.hbase.master.TableLockManager;
|
||||||
import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
|
import org.apache.hadoop.hbase.master.TableLockManager.TableLock;
|
||||||
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
|
||||||
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
|
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
|
||||||
|
import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
|
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
|
||||||
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
import org.apache.hadoop.hbase.regionserver.HRegionServer;
|
||||||
|
@ -1610,7 +1612,8 @@ public class TestHBaseFsck {
|
||||||
// TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on
|
// TODO: fixHdfsHoles does not work against splits, since the parent dir lingers on
|
||||||
// for some time until children references are deleted. HBCK erroneously sees this as
|
// for some time until children references are deleted. HBCK erroneously sees this as
|
||||||
// overlapping regions
|
// overlapping regions
|
||||||
HBaseFsck hbck = doFsck(conf, true, true, false, false, false, true, true, true, false, false, null);
|
HBaseFsck hbck = doFsck(
|
||||||
|
conf, true, true, false, false, false, true, true, true, false, false, false, null);
|
||||||
assertErrors(hbck, new ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported
|
assertErrors(hbck, new ERROR_CODE[] {}); //no LINGERING_SPLIT_PARENT reported
|
||||||
|
|
||||||
// assert that the split hbase:meta entry is still there.
|
// assert that the split hbase:meta entry is still there.
|
||||||
|
@ -1678,7 +1681,8 @@ public class TestHBaseFsck {
|
||||||
ERROR_CODE.HOLE_IN_REGION_CHAIN }); //no LINGERING_SPLIT_PARENT
|
ERROR_CODE.HOLE_IN_REGION_CHAIN }); //no LINGERING_SPLIT_PARENT
|
||||||
|
|
||||||
// now fix it. The fix should not revert the region split, but add daughters to META
|
// now fix it. The fix should not revert the region split, but add daughters to META
|
||||||
hbck = doFsck(conf, true, true, false, false, false, false, false, false, false, false, null);
|
hbck = doFsck(
|
||||||
|
conf, true, true, false, false, false, false, false, false, false, false, false, null);
|
||||||
assertErrors(hbck,
|
assertErrors(hbck,
|
||||||
new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
|
new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED, ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
|
||||||
ERROR_CODE.HOLE_IN_REGION_CHAIN });
|
ERROR_CODE.HOLE_IN_REGION_CHAIN });
|
||||||
|
@ -2332,7 +2336,7 @@ public class TestHBaseFsck {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout=60000)
|
@Test(timeout=180000)
|
||||||
public void testCheckTableLocks() throws Exception {
|
public void testCheckTableLocks() throws Exception {
|
||||||
IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0);
|
IncrementingEnvironmentEdge edge = new IncrementingEnvironmentEdge(0);
|
||||||
EnvironmentEdgeManager.injectEdge(edge);
|
EnvironmentEdgeManager.injectEdge(edge);
|
||||||
|
@ -2401,6 +2405,55 @@ public class TestHBaseFsck {
|
||||||
writeLock.release(); // release for clean state
|
writeLock.release(); // release for clean state
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test orphaned table ZNode (for table states)
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testOrphanedTableZNode() throws Exception {
|
||||||
|
TableName table = TableName.valueOf("testOrphanedZKTableEntry");
|
||||||
|
|
||||||
|
try {
|
||||||
|
TEST_UTIL.getHBaseCluster().getMaster().getAssignmentManager().getTableStateManager()
|
||||||
|
.setTableState(table, ZooKeeperProtos.Table.State.ENABLING);
|
||||||
|
|
||||||
|
try {
|
||||||
|
setupTable(table);
|
||||||
|
Assert.fail(
|
||||||
|
"Create table should fail when its ZNode has already existed with ENABLING state.");
|
||||||
|
} catch(TableExistsException t) {
|
||||||
|
//Expected exception
|
||||||
|
}
|
||||||
|
// The setup table was interrupted in some state that needs to some cleanup.
|
||||||
|
try {
|
||||||
|
cleanupTable(table);
|
||||||
|
} catch (IOException e) {
|
||||||
|
// Because create table failed, it is expected that the cleanup table would
|
||||||
|
// throw some exception. Ignore and continue.
|
||||||
|
}
|
||||||
|
|
||||||
|
HBaseFsck hbck = doFsck(conf, false);
|
||||||
|
assertTrue(hbck.getErrors().getErrorList().contains(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY));
|
||||||
|
|
||||||
|
// fix the orphaned ZK entry
|
||||||
|
hbck = doFsck(conf, true);
|
||||||
|
|
||||||
|
// check that orpahned ZK table entry is gone.
|
||||||
|
hbck = doFsck(conf, false);
|
||||||
|
assertFalse(hbck.getErrors().getErrorList().contains(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY));
|
||||||
|
// Now create table should succeed.
|
||||||
|
setupTable(table);
|
||||||
|
} finally {
|
||||||
|
// This code could be called that either a table was created successfully or set up
|
||||||
|
// table failed in some unknown state. Therefore, clean up can either succeed or fail.
|
||||||
|
try {
|
||||||
|
cleanupTable(table);
|
||||||
|
} catch (IOException e) {
|
||||||
|
// The cleanup table would throw some exception if create table failed in some state.
|
||||||
|
// Ignore this exception
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test (timeout=180000)
|
@Test (timeout=180000)
|
||||||
public void testMetaOffline() throws Exception {
|
public void testMetaOffline() throws Exception {
|
||||||
// check no errors
|
// check no errors
|
||||||
|
@ -2606,7 +2659,8 @@ public class TestHBaseFsck {
|
||||||
|
|
||||||
// fix hole
|
// fix hole
|
||||||
assertErrors(
|
assertErrors(
|
||||||
doFsck(conf, false, true, false, false, false, false, false, false, false, false, null),
|
doFsck(
|
||||||
|
conf, false, true, false, false, false, false, false, false, false, false, false, null),
|
||||||
new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
|
new ERROR_CODE[] { ERROR_CODE.NOT_IN_META_OR_DEPLOYED,
|
||||||
ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
|
ERROR_CODE.NOT_IN_META_OR_DEPLOYED });
|
||||||
|
|
||||||
|
|
|
@ -40,13 +40,14 @@ public class HbckTestingUtil {
|
||||||
|
|
||||||
public static HBaseFsck doFsck(
|
public static HBaseFsck doFsck(
|
||||||
Configuration conf, boolean fix, TableName table) throws Exception {
|
Configuration conf, boolean fix, TableName table) throws Exception {
|
||||||
return doFsck(conf, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, table);
|
return doFsck(conf, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, fix, table);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments,
|
public static HBaseFsck doFsck(Configuration conf, boolean fixAssignments,
|
||||||
boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps,
|
boolean fixMeta, boolean fixHdfsHoles, boolean fixHdfsOverlaps,
|
||||||
boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile,
|
boolean fixHdfsOrphans, boolean fixTableOrphans, boolean fixVersionFile,
|
||||||
boolean fixReferenceFiles, boolean fixEmptyMetaRegionInfo, boolean fixTableLocks,
|
boolean fixReferenceFiles, boolean fixEmptyMetaRegionInfo, boolean fixTableLocks,
|
||||||
|
boolean fixTableZnodes,
|
||||||
TableName table) throws Exception {
|
TableName table) throws Exception {
|
||||||
HBaseFsck fsck = new HBaseFsck(conf, exec);
|
HBaseFsck fsck = new HBaseFsck(conf, exec);
|
||||||
fsck.connect();
|
fsck.connect();
|
||||||
|
@ -62,6 +63,7 @@ public class HbckTestingUtil {
|
||||||
fsck.setFixReferenceFiles(fixReferenceFiles);
|
fsck.setFixReferenceFiles(fixReferenceFiles);
|
||||||
fsck.setFixEmptyMetaCells(fixEmptyMetaRegionInfo);
|
fsck.setFixEmptyMetaCells(fixEmptyMetaRegionInfo);
|
||||||
fsck.setFixTableLocks(fixTableLocks);
|
fsck.setFixTableLocks(fixTableLocks);
|
||||||
|
fsck.setFixTableZNodes(fixTableZnodes);
|
||||||
if (table != null) {
|
if (table != null) {
|
||||||
fsck.includeTable(table);
|
fsck.includeTable(table);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue