HBASE-16008 A robust way deal with early termination of HBCK (Stephen Yuan Jiang)

This commit is contained in:
Stephen Yuan Jiang 2016-07-23 20:21:32 -07:00
parent 276acdb0b0
commit a8dd359d7e
15 changed files with 1788 additions and 730 deletions

View File

@ -839,6 +839,13 @@ public interface Admin extends Abortable, Closeable {
*/
void stopMaster() throws IOException;
/**
* Check whether Master is in maintenance mode
*
* @throws IOException if a remote or network exception occurs
*/
boolean isMasterInMaintenanceMode() throws IOException;
/**
* Stop the designated regionserver
*

View File

@ -1862,6 +1862,13 @@ class ConnectionManager {
return stub.stopMaster(controller, request);
}
@Override
public MasterProtos.IsInMaintenanceModeResponse isMasterInMaintenanceMode(
final RpcController controller,
final MasterProtos.IsInMaintenanceModeRequest request) throws ServiceException {
return stub.isMasterInMaintenanceMode(controller, request);
}
@Override
public BalanceResponse balance(RpcController controller,
BalanceRequest request) throws ServiceException {

View File

@ -125,6 +125,8 @@ import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetSchemaAlterSta
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetTableDescriptorsRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetTableDescriptorsResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.GetTableNamesRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsInMaintenanceModeRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsInMaintenanceModeResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsProcedureDoneRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsProcedureDoneResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsRestoreSnapshotDoneRequest;
@ -2933,6 +2935,19 @@ public class HBaseAdmin implements Admin {
* @return cluster status
* @throws IOException if a remote or network exception occurs
*/
@Override
public boolean isMasterInMaintenanceMode() throws IOException {
return executeCallable(new MasterCallable<IsInMaintenanceModeResponse>(getConnection()) {
@Override
public IsInMaintenanceModeResponse call(int callTimeout) throws ServiceException {
PayloadCarryingRpcController controller = rpcControllerFactory.newController();
controller.setCallTimeout(callTimeout);
return master.isMasterInMaintenanceMode(
controller, IsInMaintenanceModeRequest.newBuilder().build());
}
}).getInMaintenanceMode();
}
@Override
public ClusterStatus getClusterStatus() throws IOException {
return executeCallable(new MasterCallable<ClusterStatus>(getConnection()) {

View File

@ -126,6 +126,8 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
public String recoveringRegionsZNode;
// znode containing namespace descriptors
public static String namespaceZNode = "namespace";
// znode of indicating master maintenance mode
public static String masterMaintZNode = "masterMaintenance";
// Certain ZooKeeper nodes need to be world-readable
public static final ArrayList<ACL> CREATOR_ALL_AND_WORLD_READABLE =
@ -207,6 +209,7 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
ZKUtil.createAndFailSilent(this, backupMasterAddressesZNode);
ZKUtil.createAndFailSilent(this, tableLockZNode);
ZKUtil.createAndFailSilent(this, recoveringRegionsZNode);
ZKUtil.createAndFailSilent(this, masterMaintZNode);
} catch (KeeperException e) {
throw new ZooKeeperConnectionException(
prefix("Unexpected KeeperException creating base node"), e);
@ -457,6 +460,8 @@ public class ZooKeeperWatcher implements Watcher, Abortable, Closeable {
conf.get("zookeeper.znode.recovering.regions", "recovering-regions"));
namespaceZNode = ZKUtil.joinZNode(baseZNode,
conf.get("zookeeper.znode.namespace", "namespace"));
masterMaintZNode = ZKUtil.joinZNode(baseZNode,
conf.get("zookeeper.znode.masterMaintenance", "master-maintenance"));
}
/**

View File

@ -247,6 +247,13 @@ message StopMasterRequest {
message StopMasterResponse {
}
message IsInMaintenanceModeRequest {
}
message IsInMaintenanceModeResponse {
required bool inMaintenanceMode = 1;
}
message BalanceRequest {
optional bool force = 1;
}
@ -618,6 +625,12 @@ service MasterService {
rpc StopMaster(StopMasterRequest)
returns(StopMasterResponse);
/**
* Query whether the Master is in maintenance mode.
*/
rpc IsMasterInMaintenanceMode(IsInMaintenanceModeRequest)
returns(IsInMaintenanceModeResponse);
/**
* Run the balancer. Will run the balancer and if regions to move, it will
* go ahead and do the reassignments. Can NOT run for various reasons.

View File

@ -111,6 +111,7 @@ public class CatalogJanitor extends ScheduledChore {
try {
AssignmentManager am = this.services.getAssignmentManager();
if (this.enabled.get()
&& !this.services.isInMaintenanceMode()
&& am != null
&& am.isFailoverCleanupDone()
&& am.getRegionStates().getRegionsInTransition().size() == 0) {
@ -242,6 +243,11 @@ public class CatalogJanitor extends ScheduledChore {
int mergeCleaned = 0;
Map<HRegionInfo, Result> mergedRegions = scanTriple.getSecond();
for (Map.Entry<HRegionInfo, Result> e : mergedRegions.entrySet()) {
if (this.services.isInMaintenanceMode()) {
// Stop cleaning if the master is in maintenance mode
break;
}
HRegionInfo regionA = HRegionInfo.getHRegionInfo(e.getValue(),
HConstants.MERGEA_QUALIFIER);
HRegionInfo regionB = HRegionInfo.getHRegionInfo(e.getValue(),
@ -268,6 +274,11 @@ public class CatalogJanitor extends ScheduledChore {
// regions whose parents are still around
HashSet<String> parentNotCleaned = new HashSet<String>();
for (Map.Entry<HRegionInfo, Result> e : splitParents.entrySet()) {
if (this.services.isInMaintenanceMode()) {
// Stop cleaning if the master is in maintenance mode
break;
}
if (!parentNotCleaned.contains(e.getKey().getEncodedName()) &&
cleanParent(e.getKey(), e.getValue())) {
splitCleaned++;

View File

@ -158,6 +158,7 @@ import org.apache.hadoop.hbase.util.VersionInfo;
import org.apache.hadoop.hbase.zookeeper.DrainingServerTracker;
import org.apache.hadoop.hbase.zookeeper.LoadBalancerTracker;
import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
import org.apache.hadoop.hbase.zookeeper.MasterMaintenanceModeTracker;
import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
import org.apache.hadoop.hbase.zookeeper.RegionNormalizerTracker;
import org.apache.hadoop.hbase.zookeeper.RegionServerTracker;
@ -269,6 +270,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
/** Namespace stuff */
private TableNamespaceManager tableNamespaceManager;
//Tracker for master maintenance mode setting
private MasterMaintenanceModeTracker maintenanceModeTracker;
// Metrics for the HMaster
final MetricsMaster metricsMaster;
// file system manager for the master FS operations
@ -616,6 +620,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
this.serverManager);
this.drainingServerTracker.start();
this.maintenanceModeTracker = new MasterMaintenanceModeTracker(zooKeeper);
this.maintenanceModeTracker.start();
// Set the cluster as up. If new RSs, they'll be waiting on this before
// going ahead with their startup.
boolean wasUp = this.clusterStatusTracker.isClusterUp();
@ -1292,6 +1299,12 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
LOG.debug("Master has not been initialized, don't run balancer.");
return false;
}
if (isInMaintenanceMode()) {
LOG.info("Master is in maintenanceMode mode, don't run balancer.");
return false;
}
// Do this call outside of synchronized block.
int maximumBalanceTime = getBalancerCutoffTime();
synchronized (this.balancer) {
@ -1390,6 +1403,11 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
return false;
}
if (isInMaintenanceMode()) {
LOG.info("Master is in maintenance mode, don't run region normalizer.");
return false;
}
if (!this.regionNormalizerTracker.isNormalizerOn()) {
LOG.debug("Region normalization is disabled, don't run region normalizer.");
return false;
@ -1404,6 +1422,11 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
Collections.shuffle(allEnabledTables);
for (TableName table : allEnabledTables) {
if (isInMaintenanceMode()) {
LOG.debug("Master is in maintenance mode, stop running region normalizer.");
return false;
}
if (quotaManager.getNamespaceQuotaManager() != null &&
quotaManager.getNamespaceQuotaManager().getState(table.getNamespaceAsString()) != null){
LOG.debug("Skipping normalizing " + table + " since its namespace has quota");
@ -2384,6 +2407,16 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
return initialized.isReady();
}
/**
* Report whether this master is in maintenance mode.
*
* @return true if master is in maintenanceMode
*/
@Override
public boolean isInMaintenanceMode() {
return maintenanceModeTracker.isInMaintenanceMode();
}
@VisibleForTesting
public void setInitialized(boolean isInitialized) {
procedureExecutor.getEnvironment().setEventReady(initialized, isInitialized);
@ -2847,7 +2880,9 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
* @return The state of the load balancer, or false if the load balancer isn't defined.
*/
public boolean isBalancerOn() {
if (null == loadBalancerTracker) return false;
if (null == loadBalancerTracker || isInMaintenanceMode()) {
return false;
}
return loadBalancerTracker.isBalancerOn();
}
@ -2855,14 +2890,11 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
* Queries the state of the {@link RegionNormalizerTracker}. If it's not initialized,
* false is returned.
*/
public boolean isNormalizerOn() {
if (null == regionNormalizerTracker) {
return false;
}
return regionNormalizerTracker.isNormalizerOn();
public boolean isNormalizerOn() {
return (null == regionNormalizerTracker || isInMaintenanceMode()) ?
false: regionNormalizerTracker.isNormalizerOn();
}
/**
* Queries the state of the {@link SplitOrMergeTracker}. If it is not initialized,
* false is returned. If switchType is illegal, false will return.
@ -2870,7 +2902,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server {
* @return The state of the switch
*/
public boolean isSplitOrMergeEnabled(Admin.MasterSwitchType switchType) {
if (null == splitOrMergeTracker) {
if (null == splitOrMergeTracker || isInMaintenanceMode()) {
return false;
}
return splitOrMergeTracker.isSplitOrMergeEnabled(switchType);

View File

@ -109,6 +109,8 @@ import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsBalancerEnabled
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsBalancerEnabledResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsCatalogJanitorEnabledRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsCatalogJanitorEnabledResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsInMaintenanceModeRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsInMaintenanceModeResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsMasterRunningRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsMasterRunningResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsNormalizerEnabledRequest;
@ -1358,6 +1360,15 @@ public class MasterRpcServices extends RSRpcServices
return StopMasterResponse.newBuilder().build();
}
@Override
public IsInMaintenanceModeResponse isMasterInMaintenanceMode(
final RpcController controller,
final IsInMaintenanceModeRequest request) throws ServiceException {
IsInMaintenanceModeResponse.Builder response = IsInMaintenanceModeResponse.newBuilder();
response.setInMaintenanceMode(master.isInMaintenanceMode());
return response.build();
}
@Override
public UnassignRegionResponse unassignRegion(RpcController controller,
UnassignRegionRequest req) throws ServiceException {

View File

@ -330,6 +330,11 @@ public interface MasterServices extends Server {
final long nonceGroup,
final long nonce) throws IOException;
/**
* @return true if master is in maintanceMode
*/
boolean isInMaintenanceMode();
/**
* Abort a procedure.
* @param procId ID of the procedure

View File

@ -211,6 +211,9 @@ public class HBaseFsck extends Configured implements Closeable {
// AlreadyBeingCreatedException which is implies timeout on this operations up to
// HdfsConstants.LEASE_SOFTLIMIT_PERIOD (60 seconds).
private static final int DEFAULT_WAIT_FOR_LOCK_TIMEOUT = 80; // seconds
private static final int DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS = 5;
private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL = 200; // milliseconds
private static final int DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME = 5000; // milliseconds
/**********************
* Internal resources
@ -238,8 +241,6 @@ public class HBaseFsck extends Configured implements Closeable {
private static boolean details = false; // do we display the full report
private long timelag = DEFAULT_TIME_LAG; // tables whose modtime is older
private static boolean forceExclusive = false; // only this hbck can modify HBase
private static boolean disableBalancer = false; // disable load balancer to keep regions stable
private static boolean disableSplitAndMerge = false; // disable split and merge
private boolean fixAssignments = false; // fix assignment errors?
private boolean fixMeta = false; // fix meta errors?
private boolean checkHdfs = true; // load and check fs consistency?
@ -315,7 +316,11 @@ public class HBaseFsck extends Configured implements Closeable {
*/
private Set<TableName> orphanedTableZNodes = new HashSet<TableName>();
private final RetryCounterFactory lockFileRetryCounterFactory;
private final RetryCounterFactory createZNodeRetryCounterFactory;
private ZooKeeperWatcher zkw = null;
private String hbckEphemeralNodePath = null;
private boolean hbckZodeCreated = false;
/**
* Constructor
@ -355,6 +360,15 @@ public class HBaseFsck extends Configured implements Closeable {
"hbase.hbck.lockfile.attempt.sleep.interval", DEFAULT_LOCK_FILE_ATTEMPT_SLEEP_INTERVAL),
getConf().getInt(
"hbase.hbck.lockfile.attempt.maxsleeptime", DEFAULT_LOCK_FILE_ATTEMPT_MAX_SLEEP_TIME));
createZNodeRetryCounterFactory = new RetryCounterFactory(
getConf().getInt("hbase.hbck.createznode.attempts", DEFAULT_MAX_CREATE_ZNODE_ATTEMPTS),
getConf().getInt(
"hbase.hbck.createznode.attempt.sleep.interval",
DEFAULT_CREATE_ZNODE_ATTEMPT_SLEEP_INTERVAL),
getConf().getInt(
"hbase.hbck.createznode.attempt.maxsleeptime",
DEFAULT_CREATE_ZNODE_ATTEMPT_MAX_SLEEP_TIME));
zkw = createZooKeeperWatcher();
}
private class FileLockCallable implements Callable<FSDataOutputStream> {
@ -503,6 +517,7 @@ public class HBaseFsck extends Configured implements Closeable {
@Override
public void run() {
IOUtils.closeQuietly(HBaseFsck.this);
cleanupHbckZnode();
unlockHbck();
}
});
@ -681,49 +696,78 @@ public class HBaseFsck extends Configured implements Closeable {
return errors.getErrorList().size();
}
/**
* This method maintains an ephemeral znode. If the creation fails we return false or throw
* exception
*
* @return true if creating znode succeeds; false otherwise
* @throws IOException if IO failure occurs
*/
private boolean setMasterInMaintenanceMode() throws IOException {
RetryCounter retryCounter = createZNodeRetryCounterFactory.create();
hbckEphemeralNodePath = ZKUtil.joinZNode(
ZooKeeperWatcher.masterMaintZNode,
"hbck-" + Long.toString(EnvironmentEdgeManager.currentTime()));
do {
try {
hbckZodeCreated = ZKUtil.createEphemeralNodeAndWatch(zkw, hbckEphemeralNodePath, null);
if (hbckZodeCreated) {
break;
}
} catch (KeeperException e) {
if (retryCounter.getAttemptTimes() >= retryCounter.getMaxAttempts()) {
throw new IOException("Can't create znode " + hbckEphemeralNodePath, e);
}
// fall through and retry
}
LOG.warn("Fail to create znode " + hbckEphemeralNodePath + ", try=" +
(retryCounter.getAttemptTimes() + 1) + " of " + retryCounter.getMaxAttempts());
try {
retryCounter.sleepUntilNextRetry();
} catch (InterruptedException ie) {
throw (InterruptedIOException) new InterruptedIOException(
"Can't create znode " + hbckEphemeralNodePath).initCause(ie);
}
} while (retryCounter.shouldRetry());
return hbckZodeCreated;
}
private void cleanupHbckZnode() {
try {
if (zkw != null && hbckZodeCreated) {
ZKUtil.deleteNode(zkw, hbckEphemeralNodePath);
hbckZodeCreated = false;
}
} catch (KeeperException e) {
// Ignore
if (!e.code().equals(KeeperException.Code.NONODE)) {
LOG.warn("Delete HBCK znode " + hbckEphemeralNodePath + " failed ", e);
}
}
}
/**
* Contacts the master and prints out cluster-wide information
* @return 0 on success, non-zero on failure
*/
public int onlineHbck() throws IOException, KeeperException, InterruptedException, ServiceException {
public int onlineHbck()
throws IOException, KeeperException, InterruptedException, ServiceException {
// print hbase server version
errors.print("Version: " + status.getHBaseVersion());
offlineHdfsIntegrityRepair();
boolean oldBalancer = false;
if (shouldDisableBalancer()) {
oldBalancer = admin.setBalancerRunning(false, true);
}
boolean[] oldSplitAndMerge = null;
if (shouldDisableSplitAndMerge()) {
oldSplitAndMerge = admin.setSplitOrMergeEnabled(false, false,
Admin.MasterSwitchType.SPLIT, Admin.MasterSwitchType.MERGE);
// If Master runs maintenance tasks (such as balancer, catalog janitor, etc) during online
// hbck, it is likely that hbck would be misled and report transient errors. Therefore, it
// is better to set Master into maintenance mode during online hbck.
//
if (!setMasterInMaintenanceMode()) {
LOG.warn("HBCK is running while master is not in maintenance mode, you might see transient "
+ "error. Please run HBCK multiple times to reduce the chance of transient error.");
}
try {
onlineConsistencyRepair();
}
finally {
// Only restore the balancer if it was true when we started repairing and
// we actually disabled it. Otherwise, we might clobber another run of
// hbck that has just restored it.
if (shouldDisableBalancer() && oldBalancer) {
admin.setBalancerRunning(oldBalancer, false);
}
if (shouldDisableSplitAndMerge()) {
if (oldSplitAndMerge != null) {
if (oldSplitAndMerge[0] && oldSplitAndMerge[1]) {
admin.setSplitOrMergeEnabled(true, false,
Admin.MasterSwitchType.SPLIT, Admin.MasterSwitchType.MERGE);
} else if (oldSplitAndMerge[0]) {
admin.setSplitOrMergeEnabled(true, false, Admin.MasterSwitchType.SPLIT);
} else if (oldSplitAndMerge[1]) {
admin.setSplitOrMergeEnabled(true, false, Admin.MasterSwitchType.MERGE);
}
}
}
}
onlineConsistencyRepair();
if (checkRegionBoundaries) {
checkRegionBoundaries();
@ -738,6 +782,9 @@ public class HBaseFsck extends Configured implements Closeable {
checkAndFixReplication();
// Remove the hbck znode
cleanupHbckZnode();
// Remove the hbck lock
unlockHbck();
@ -757,9 +804,20 @@ public class HBaseFsck extends Configured implements Closeable {
@Override
public void close() throws IOException {
IOUtils.closeQuietly(admin);
IOUtils.closeQuietly(meta);
IOUtils.closeQuietly(connection);
try {
cleanupHbckZnode();
unlockHbck();
} catch (Exception io) {
LOG.warn(io);
} finally {
if (zkw != null) {
zkw.close();
zkw = null;
}
IOUtils.closeQuietly(admin);
IOUtils.closeQuietly(meta);
IOUtils.closeQuietly(connection);
}
}
private static class RegionBoundariesInformation {
@ -1644,7 +1702,6 @@ public class HBaseFsck extends Configured implements Closeable {
HConnectionManager.execute(new HConnectable<Void>(getConf()) {
@Override
public Void connect(HConnection connection) throws IOException {
ZooKeeperWatcher zkw = createZooKeeperWatcher();
try {
for (TableName tableName :
ZKTableStateClientSideReader.getDisabledOrDisablingTables(zkw)) {
@ -1654,8 +1711,6 @@ public class HBaseFsck extends Configured implements Closeable {
throw new IOException(ke);
} catch (InterruptedException e) {
throw new InterruptedIOException();
} finally {
zkw.close();
}
return null;
}
@ -1775,17 +1830,6 @@ public class HBaseFsck extends Configured implements Closeable {
});
}
private ServerName getMetaRegionServerName(int replicaId)
throws IOException, KeeperException {
ZooKeeperWatcher zkw = createZooKeeperWatcher();
ServerName sn = null;
try {
sn = new MetaTableLocator().getMetaRegionLocation(zkw, replicaId);
} finally {
zkw.close();
}
return sn;
}
/**
* Contacts each regionserver and fetches metadata about regions.
@ -3230,32 +3274,21 @@ public class HBaseFsck extends Configured implements Closeable {
}
private void checkAndFixTableLocks() throws IOException {
ZooKeeperWatcher zkw = createZooKeeperWatcher();
TableLockChecker checker = new TableLockChecker(zkw, errors);
checker.checkTableLocks();
try {
TableLockChecker checker = new TableLockChecker(zkw, errors);
checker.checkTableLocks();
if (this.fixTableLocks) {
checker.fixExpiredTableLocks();
}
} finally {
zkw.close();
if (this.fixTableLocks) {
checker.fixExpiredTableLocks();
}
}
private void checkAndFixReplication() throws IOException {
ZooKeeperWatcher zkw = createZooKeeperWatcher();
try {
ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, connection, errors);
checker.checkUnDeletedQueues();
ReplicationChecker checker = new ReplicationChecker(getConf(), zkw, connection, errors);
checker.checkUnDeletedQueues();
if (checker.hasUnDeletedQueues() && this.fixReplication) {
checker.fixUnDeletedQueues();
setShouldRerun();
}
} finally {
zkw.close();
if (checker.hasUnDeletedQueues() && this.fixReplication) {
checker.fixUnDeletedQueues();
setShouldRerun();
}
}
@ -3267,47 +3300,41 @@ public class HBaseFsck extends Configured implements Closeable {
*/
private void checkAndFixOrphanedTableZNodes()
throws IOException, KeeperException, InterruptedException {
ZooKeeperWatcher zkw = createZooKeeperWatcher();
Set<TableName> enablingTables = ZKTableStateClientSideReader.getEnablingTables(zkw);
String msg;
TableInfo tableInfo;
try {
Set<TableName> enablingTables = ZKTableStateClientSideReader.getEnablingTables(zkw);
String msg;
TableInfo tableInfo;
for (TableName tableName : enablingTables) {
// Check whether the table exists in hbase
tableInfo = tablesInfo.get(tableName);
if (tableInfo != null) {
// Table exists. This table state is in transit. No problem for this table.
continue;
}
msg = "Table " + tableName + " not found in hbase:meta. Orphaned table ZNode found.";
LOG.warn(msg);
orphanedTableZNodes.add(tableName);
errors.reportError(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY, msg);
for (TableName tableName : enablingTables) {
// Check whether the table exists in hbase
tableInfo = tablesInfo.get(tableName);
if (tableInfo != null) {
// Table exists. This table state is in transit. No problem for this table.
continue;
}
if (orphanedTableZNodes.size() > 0 && this.fixTableZNodes) {
ZKTableStateManager zkTableStateMgr = new ZKTableStateManager(zkw);
msg = "Table " + tableName + " not found in hbase:meta. Orphaned table ZNode found.";
LOG.warn(msg);
orphanedTableZNodes.add(tableName);
errors.reportError(ERROR_CODE.ORPHANED_ZK_TABLE_ENTRY, msg);
}
for (TableName tableName : orphanedTableZNodes) {
try {
// Set the table state to be disabled so that if we made mistake, we can trace
// the history and figure it out.
// Another choice is to call checkAndRemoveTableState() to delete the orphaned ZNode.
// Both approaches works.
zkTableStateMgr.setTableState(tableName, ZooKeeperProtos.Table.State.DISABLED);
} catch (CoordinatedStateException e) {
// This exception should not happen here
LOG.error(
"Got a CoordinatedStateException while fixing the ENABLING table znode " + tableName,
e);
}
if (orphanedTableZNodes.size() > 0 && this.fixTableZNodes) {
ZKTableStateManager zkTableStateMgr = new ZKTableStateManager(zkw);
for (TableName tableName : orphanedTableZNodes) {
try {
// Set the table state to be disabled so that if we made mistake, we can trace
// the history and figure it out.
// Another choice is to call checkAndRemoveTableState() to delete the orphaned ZNode.
// Both approaches works.
zkTableStateMgr.setTableState(tableName, ZooKeeperProtos.Table.State.DISABLED);
} catch (CoordinatedStateException e) {
// This exception should not happen here
LOG.error(
"Got a CoordinatedStateException while fixing the ENABLING table znode " + tableName,
e);
}
}
} finally {
zkw.close();
}
}
@ -3377,12 +3404,7 @@ public class HBaseFsck extends Configured implements Closeable {
private void unassignMetaReplica(HbckInfo hi) throws IOException, InterruptedException,
KeeperException {
undeployRegions(hi);
ZooKeeperWatcher zkw = createZooKeeperWatcher();
try {
ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(hi.metaEntry.getReplicaId()));
} finally {
zkw.close();
}
ZKUtil.deleteNode(zkw, zkw.getZNodeForReplica(hi.metaEntry.getReplicaId()));
}
private void assignMetaReplica(int replicaId)
@ -4250,38 +4272,6 @@ public class HBaseFsck extends Configured implements Closeable {
return fixAny || forceExclusive;
}
/**
* Disable the load balancer.
*/
public static void setDisableBalancer() {
disableBalancer = true;
}
/**
* Disable the split and merge
*/
public static void setDisableSplitAndMerge() {
disableSplitAndMerge = true;
}
/**
* The balancer should be disabled if we are modifying HBase.
* It can be disabled if you want to prevent region movement from causing
* false positives.
*/
public boolean shouldDisableBalancer() {
return fixAny || disableBalancer;
}
/**
* The split and merge should be disabled if we are modifying HBase.
* It can be disabled if you want to prevent region movement from causing
* false positives.
*/
public boolean shouldDisableSplitAndMerge() {
return fixAny || disableSplitAndMerge;
}
/**
* Set summary mode.
* Print only summary of the tables and status (OK or INCONSISTENT)
@ -4552,7 +4542,6 @@ public class HBaseFsck extends Configured implements Closeable {
out.println(" -sidelineDir <hdfs://> HDFS path to backup existing meta.");
out.println(" -boundaries Verify that regions boundaries are the same between META and store files.");
out.println(" -exclusive Abort if another hbck is exclusive or fixing.");
out.println(" -disableBalancer Disable the load balancer.");
out.println("");
out.println(" Metadata Repair options: (expert features, use with caution!)");
@ -4653,10 +4642,6 @@ public class HBaseFsck extends Configured implements Closeable {
setDisplayFullReport();
} else if (cmd.equals("-exclusive")) {
setForceExclusive();
} else if (cmd.equals("-disableBalancer")) {
setDisableBalancer();
} else if (cmd.equals("-disableSplitAndMerge")) {
setDisableSplitAndMerge();
} else if (cmd.equals("-timelag")) {
if (i == args.length - 1) {
errors.reportError(ERROR_CODE.WRONG_USAGE, "HBaseFsck: -timelag needs a value.");

View File

@ -0,0 +1,81 @@
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.zookeeper;
import java.util.List;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.zookeeper.KeeperException;
/**
* Tracks the master Maintenance Mode via ZK.
*/
@InterfaceAudience.Private
public class MasterMaintenanceModeTracker extends ZooKeeperListener {
private boolean hasChildren;
public MasterMaintenanceModeTracker(ZooKeeperWatcher watcher) {
super(watcher);
hasChildren = false;
}
public boolean isInMaintenanceMode() {
return hasChildren;
}
private void update(String path) {
if (path.startsWith(ZooKeeperWatcher.masterMaintZNode)) {
update();
}
}
private void update() {
try {
List<String> children =
ZKUtil.listChildrenAndWatchForNewChildren(watcher, ZooKeeperWatcher.masterMaintZNode);
hasChildren = (children != null && children.size() > 0);
} catch (KeeperException e) {
// Ignore the ZK keeper exception
hasChildren = false;
}
}
/**
* Starts the tracking of whether master is in Maintenance Mode.
*/
public void start() {
watcher.registerListener(this);
update();
}
@Override
public void nodeCreated(String path) {
update(path);
}
@Override
public void nodeDeleted(String path) {
update(path);
}
@Override
public void nodeChildrenChanged(String path) {
update(path);
}
}

View File

@ -146,6 +146,4 @@ public class SplitOrMergeTracker {
return builder.build();
}
}
}

View File

@ -534,6 +534,11 @@ public class TestCatalogJanitor {
return false;
}
@Override
public boolean isInMaintenanceMode() {
return false;
}
@Override
public long getLastMajorCompactionTimestamp(TableName table) throws IOException {
// Auto-generated method stub

View File

@ -37,8 +37,6 @@ import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Map.Entry;
import java.util.NavigableMap;
import java.util.Set;
import java.util.concurrent.Callable;
@ -69,7 +67,6 @@ import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableExistsException;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.RegionLocations;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.MetaTableAccessor;
@ -84,12 +81,10 @@ import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.MetaScanner;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.RowMutations;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.replication.ReplicationAdmin;