HBASE-3337 Restore HBCK fix of unassignment and dupe assignment for new master
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1049236 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8a521615cd
commit
c0c723f660
|
@ -19,6 +19,8 @@ Release 0.91.0 - Unreleased
|
|||
Wrong (Ed Kohlwey via Stack)
|
||||
HBASE-1888 KeyValue methods throw NullPointerException instead of
|
||||
IllegalArgumentException during parameter sanity check
|
||||
HBASE-3337 Restore HBCK fix of unassignment and dupe assignment for new
|
||||
master
|
||||
|
||||
IMPROVEMENTS
|
||||
HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via
|
||||
|
|
|
@ -358,6 +358,9 @@ public final class HConstants {
|
|||
public static final String
|
||||
REPLICATION_ENABLE_KEY = "hbase.replication";
|
||||
|
||||
/** HBCK special code name used as server name when manipulating ZK nodes */
|
||||
public static final String HBCK_CODE_NAME = "HBCKServerName";
|
||||
|
||||
private HConstants() {
|
||||
// Can't be instantiated with this ctor.
|
||||
}
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.commons.logging.Log;
|
|||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hbase.Chore;
|
||||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.HServerAddress;
|
||||
import org.apache.hadoop.hbase.HServerInfo;
|
||||
|
@ -51,10 +52,9 @@ import org.apache.hadoop.hbase.catalog.CatalogTracker;
|
|||
import org.apache.hadoop.hbase.catalog.MetaReader;
|
||||
import org.apache.hadoop.hbase.catalog.RootLocationEditor;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.executor.EventHandler.EventType;
|
||||
import org.apache.hadoop.hbase.executor.ExecutorService;
|
||||
import org.apache.hadoop.hbase.executor.RegionTransitionData;
|
||||
import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
|
||||
import org.apache.hadoop.hbase.executor.EventHandler.EventType;
|
||||
import org.apache.hadoop.hbase.master.LoadBalancer.RegionPlan;
|
||||
import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler;
|
||||
import org.apache.hadoop.hbase.master.handler.OpenedRegionHandler;
|
||||
|
@ -65,9 +65,9 @@ import org.apache.hadoop.hbase.util.Threads;
|
|||
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKTable;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKUtil.NodeAndData;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKUtil.NodeAndData;
|
||||
import org.apache.hadoop.io.Writable;
|
||||
import org.apache.hadoop.ipc.RemoteException;
|
||||
import org.apache.zookeeper.AsyncCallback;
|
||||
|
@ -143,7 +143,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
* @param serverManager
|
||||
* @param catalogTracker
|
||||
* @param service
|
||||
* @throws KeeperException
|
||||
* @throws KeeperException
|
||||
*/
|
||||
public AssignmentManager(Server master, ServerManager serverManager,
|
||||
CatalogTracker catalogTracker, final ExecutorService service)
|
||||
|
@ -337,6 +337,11 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
LOG.warn("Unexpected NULL input " + data);
|
||||
return;
|
||||
}
|
||||
// Check if this is a special HBCK transition
|
||||
if (data.getServerName().equals(HConstants.HBCK_CODE_NAME)) {
|
||||
handleHBCK(data);
|
||||
return;
|
||||
}
|
||||
// Verify this is a known server
|
||||
if (!serverManager.isServerOnline(data.getServerName()) &&
|
||||
!this.master.getServerName().equals(data.getServerName())) {
|
||||
|
@ -424,6 +429,45 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle a ZK unassigned node transition triggered by HBCK repair tool.
|
||||
* <p>
|
||||
* This is handled in a separate code path because it breaks the normal rules.
|
||||
* @param data
|
||||
*/
|
||||
private void handleHBCK(RegionTransitionData data) {
|
||||
String encodedName = HRegionInfo.encodeRegionName(data.getRegionName());
|
||||
LOG.info("Handling HBCK triggered transition=" + data.getEventType() +
|
||||
", server=" + data.getServerName() + ", region=" +
|
||||
HRegionInfo.prettyPrint(encodedName));
|
||||
RegionState regionState = regionsInTransition.get(encodedName);
|
||||
switch (data.getEventType()) {
|
||||
case M_ZK_REGION_OFFLINE:
|
||||
HRegionInfo regionInfo = null;
|
||||
if (regionState != null) {
|
||||
regionInfo = regionState.getRegion();
|
||||
} else {
|
||||
try {
|
||||
regionInfo = MetaReader.getRegion(catalogTracker,
|
||||
data.getRegionName()).getFirst();
|
||||
} catch (IOException e) {
|
||||
LOG.info("Exception reading META doing HBCK repair operation", e);
|
||||
return;
|
||||
}
|
||||
}
|
||||
LOG.info("HBCK repair is triggering assignment of region=" +
|
||||
regionInfo.getRegionNameAsString());
|
||||
// trigger assign, node is already in OFFLINE so don't need to update ZK
|
||||
assign(regionInfo, false);
|
||||
break;
|
||||
|
||||
default:
|
||||
LOG.warn("Received unexpected region state from HBCK (" +
|
||||
data.getEventType() + ")");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// ZooKeeper events
|
||||
|
||||
/**
|
||||
|
@ -1001,7 +1045,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
public void unassign(HRegionInfo region, boolean force) {
|
||||
LOG.debug("Starting unassignment of region " +
|
||||
region.getRegionNameAsString() + " (offlining)");
|
||||
synchronized (this.regions) {
|
||||
synchronized (this.regions) {
|
||||
// Check if this region is currently assigned
|
||||
if (!regions.containsKey(region)) {
|
||||
LOG.debug("Attempted to unassign region " +
|
||||
|
|
|
@ -23,11 +23,11 @@ import java.io.IOException;
|
|||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
|
@ -49,19 +49,21 @@ import org.apache.hadoop.hbase.MasterNotRunningException;
|
|||
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
|
||||
import org.apache.hadoop.hbase.client.HBaseAdmin;
|
||||
import org.apache.hadoop.hbase.client.HConnection;
|
||||
import org.apache.hadoop.hbase.client.HConnectionManager;
|
||||
import org.apache.hadoop.hbase.client.MetaScanner;
|
||||
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
|
||||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
|
||||
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
||||
import org.apache.hadoop.hbase.regionserver.wal.HLog;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.Writables;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKTable;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
|
||||
import com.google.common.base.Joiner;
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* Check consistency among the in-memory states of the master and the
|
||||
* Check consistency among the in-memory states of the master and the
|
||||
* region server(s) and the state of data in HDFS.
|
||||
*/
|
||||
public class HBaseFsck {
|
||||
|
@ -75,6 +77,8 @@ public class HBaseFsck {
|
|||
|
||||
private TreeMap<String, HbckInfo> regionInfo = new TreeMap<String, HbckInfo>();
|
||||
private TreeMap<String, TInfo> tablesInfo = new TreeMap<String, TInfo>();
|
||||
private TreeSet<byte[]> disabledTables =
|
||||
new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
|
||||
ErrorReporter errors = new PrintingErrorReporter();
|
||||
|
||||
private static boolean details = false; // do we display the full report
|
||||
|
@ -92,7 +96,7 @@ public class HBaseFsck {
|
|||
* @throws MasterNotRunningException if the master is not running
|
||||
* @throws ZooKeeperConnectionException if unable to connect to zookeeper
|
||||
*/
|
||||
public HBaseFsck(Configuration conf)
|
||||
public HBaseFsck(Configuration conf)
|
||||
throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
|
||||
this.conf = conf;
|
||||
|
||||
|
@ -105,8 +109,10 @@ public class HBaseFsck {
|
|||
* Contacts the master and prints out cluster-wide information
|
||||
* @throws IOException if a remote or network exception occurs
|
||||
* @return 0 on success, non-zero on failure
|
||||
* @throws KeeperException
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
int doWork() throws IOException {
|
||||
int doWork() throws IOException, KeeperException, InterruptedException {
|
||||
// print hbase server version
|
||||
errors.print("Version: " + status.getHBaseVersion());
|
||||
|
||||
|
@ -114,7 +120,7 @@ public class HBaseFsck {
|
|||
regionInfo.clear();
|
||||
tablesInfo.clear();
|
||||
emptyRegionInfoQualifiers.clear();
|
||||
|
||||
disabledTables.clear();
|
||||
|
||||
// get a list of all regions from the master. This involves
|
||||
// scanning the META table
|
||||
|
@ -152,7 +158,7 @@ public class HBaseFsck {
|
|||
|
||||
// From the master, get a list of all known live region servers
|
||||
Collection<HServerInfo> regionServers = status.getServerInfo();
|
||||
errors.print("Number of live region servers: " +
|
||||
errors.print("Number of live region servers: " +
|
||||
regionServers.size());
|
||||
if (details) {
|
||||
for (HServerInfo rsinfo: regionServers) {
|
||||
|
@ -162,7 +168,7 @@ public class HBaseFsck {
|
|||
|
||||
// From the master, get a list of all dead region servers
|
||||
Collection<String> deadRegionServers = status.getDeadServerNames();
|
||||
errors.print("Number of dead region servers: " +
|
||||
errors.print("Number of dead region servers: " +
|
||||
deadRegionServers.size());
|
||||
if (details) {
|
||||
for (String name: deadRegionServers) {
|
||||
|
@ -185,6 +191,9 @@ public class HBaseFsck {
|
|||
}
|
||||
}
|
||||
|
||||
// Get disabled tables from ZooKeeper
|
||||
loadDisabledTables();
|
||||
|
||||
// Check consistency
|
||||
checkConsistency();
|
||||
|
||||
|
@ -197,6 +206,31 @@ public class HBaseFsck {
|
|||
return errors.summarize();
|
||||
}
|
||||
|
||||
/**
|
||||
* Load the list of disabled tables in ZK into local set.
|
||||
* @throws ZooKeeperConnectionException
|
||||
* @throws IOException
|
||||
* @throws KeeperException
|
||||
*/
|
||||
private void loadDisabledTables()
|
||||
throws ZooKeeperConnectionException, IOException, KeeperException {
|
||||
ZooKeeperWatcher zkw =
|
||||
HConnectionManager.getConnection(conf).getZooKeeperWatcher();
|
||||
for (String tableName : ZKTable.getDisabledOrDisablingTables(zkw)) {
|
||||
disabledTables.add(Bytes.toBytes(tableName));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the specified region's table is disabled.
|
||||
* @throws ZooKeeperConnectionException
|
||||
* @throws IOException
|
||||
* @throws KeeperException
|
||||
*/
|
||||
private boolean isTableDisabled(HRegionInfo regionInfo) {
|
||||
return disabledTables.contains(regionInfo.getTableDesc().getName());
|
||||
}
|
||||
|
||||
/**
|
||||
* Scan HDFS for all regions, recording their information into
|
||||
* regionInfo
|
||||
|
@ -280,7 +314,7 @@ public class HBaseFsck {
|
|||
regionInfo.put(rootLocation.getRegionInfo().getEncodedName(), hbInfo);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Contacts each regionserver and fetches metadata about regions.
|
||||
* @param regionServerList - the list of region servers to connect to
|
||||
|
@ -315,7 +349,7 @@ public class HBaseFsck {
|
|||
HbckInfo hbi = getOrCreateInfo(r.getEncodedName());
|
||||
hbi.deployedOn.add(rsinfo.getServerAddress());
|
||||
}
|
||||
} catch (IOException e) { // unable to connect to the region server.
|
||||
} catch (IOException e) { // unable to connect to the region server.
|
||||
errors.reportError("\nRegionServer:" + rsinfo.getServerName() +
|
||||
" Unable to fetch region information. " + e);
|
||||
}
|
||||
|
@ -324,8 +358,11 @@ public class HBaseFsck {
|
|||
|
||||
/**
|
||||
* Check consistency of all regions that have been found in previous phases.
|
||||
* @throws KeeperException
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
void checkConsistency() throws IOException {
|
||||
void checkConsistency()
|
||||
throws IOException, KeeperException, InterruptedException {
|
||||
for (java.util.Map.Entry<String, HbckInfo> e: regionInfo.entrySet()) {
|
||||
doConsistencyCheck(e.getKey(), e.getValue());
|
||||
}
|
||||
|
@ -333,9 +370,11 @@ public class HBaseFsck {
|
|||
|
||||
/**
|
||||
* Check a single region for consistency and correct deployment.
|
||||
* @throws KeeperException
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
void doConsistencyCheck(final String key, final HbckInfo hbi)
|
||||
throws IOException {
|
||||
throws IOException, KeeperException, InterruptedException {
|
||||
String descriptiveName = hbi.toString();
|
||||
|
||||
boolean inMeta = hbi.metaEntry != null;
|
||||
|
@ -346,7 +385,7 @@ public class HBaseFsck {
|
|||
boolean deploymentMatchesMeta =
|
||||
hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
|
||||
hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
|
||||
boolean shouldBeDeployed = inMeta && !hbi.metaEntry.isOffline();
|
||||
boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
|
||||
boolean recentlyModified = hbi.foundRegionDir != null &&
|
||||
hbi.foundRegionDir.getModificationTime() + timelag > System.currentTimeMillis();
|
||||
|
||||
|
@ -533,8 +572,8 @@ public class HBaseFsck {
|
|||
/**
|
||||
* Return a list of user-space table names whose metadata have not been
|
||||
* modified in the last few milliseconds specified by timelag
|
||||
* if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
|
||||
* SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
|
||||
* if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
|
||||
* SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
|
||||
* milliseconds specified by timelag, then the table is a candidate to be returned.
|
||||
* @param regionList - all entries found in .META
|
||||
* @return tables that have not been modified recently
|
||||
|
@ -580,8 +619,11 @@ public class HBaseFsck {
|
|||
* If there are inconsistencies (i.e. zero or more than one regions
|
||||
* pretend to be holding the .META.) try to fix that and report an error.
|
||||
* @throws IOException from HBaseFsckRepair functions
|
||||
* @throws KeeperException
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
boolean checkMetaEntries() throws IOException {
|
||||
boolean checkMetaEntries()
|
||||
throws IOException, KeeperException, InterruptedException {
|
||||
List <HbckInfo> metaRegions = Lists.newArrayList();
|
||||
for (HbckInfo value : regionInfo.values()) {
|
||||
if (value.metaEntry.isMetaTable()) {
|
||||
|
@ -709,7 +751,7 @@ public class HBaseFsck {
|
|||
HServerAddress regionServer; // server hosting this region
|
||||
long modTime; // timestamp of most recent modification metadata
|
||||
|
||||
public MetaEntry(HRegionInfo rinfo, HServerAddress regionServer,
|
||||
public MetaEntry(HRegionInfo rinfo, HServerAddress regionServer,
|
||||
byte[] startCode, long modTime) {
|
||||
super(rinfo);
|
||||
this.regionServer = regionServer;
|
||||
|
@ -883,16 +925,16 @@ public class HBaseFsck {
|
|||
/**
|
||||
* Main program
|
||||
* @param args
|
||||
* @throws Exception
|
||||
*/
|
||||
public static void main(String [] args)
|
||||
throws IOException, MasterNotRunningException {
|
||||
public static void main(String [] args) throws Exception {
|
||||
|
||||
// create a fsck object
|
||||
Configuration conf = HBaseConfiguration.create();
|
||||
conf.set("fs.defaultFS", conf.get("hbase.rootdir"));
|
||||
HBaseFsck fsck = new HBaseFsck(conf);
|
||||
|
||||
// Process command-line args.
|
||||
// Process command-line args.
|
||||
for (int i = 0; i < args.length; i++) {
|
||||
String cmd = args[i];
|
||||
if (cmd.equals("-details")) {
|
||||
|
|
|
@ -26,99 +26,83 @@ import org.apache.hadoop.conf.Configuration;
|
|||
import org.apache.hadoop.hbase.HConstants;
|
||||
import org.apache.hadoop.hbase.HRegionInfo;
|
||||
import org.apache.hadoop.hbase.HServerAddress;
|
||||
import org.apache.hadoop.hbase.client.Delete;
|
||||
import org.apache.hadoop.hbase.NotServingRegionException;
|
||||
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
|
||||
import org.apache.hadoop.hbase.client.HConnectionManager;
|
||||
import org.apache.hadoop.hbase.client.HTable;
|
||||
import org.apache.hadoop.hbase.ipc.HMasterInterface;
|
||||
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
|
||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
|
||||
public class HBaseFsckRepair {
|
||||
|
||||
/**
|
||||
* Fix dupe assignment by doing silent closes on each RS hosting the region
|
||||
* and then force ZK unassigned node to OFFLINE to trigger assignment by
|
||||
* master.
|
||||
* @param conf
|
||||
* @param region
|
||||
* @param servers
|
||||
* @throws IOException
|
||||
* @throws KeeperException
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
public static void fixDupeAssignment(Configuration conf, HRegionInfo region,
|
||||
List<HServerAddress> servers)
|
||||
throws IOException {
|
||||
throws IOException, KeeperException, InterruptedException {
|
||||
|
||||
HRegionInfo actualRegion = new HRegionInfo(region);
|
||||
|
||||
// Clear status in master and zk
|
||||
clearInMaster(conf, actualRegion);
|
||||
clearInZK(conf, actualRegion);
|
||||
|
||||
// Close region on the servers
|
||||
// Close region on the servers silently
|
||||
for(HServerAddress server : servers) {
|
||||
closeRegion(conf, server, actualRegion);
|
||||
closeRegionSilentlyAndWait(conf, server, actualRegion);
|
||||
}
|
||||
|
||||
// It's unassigned so fix it as such
|
||||
fixUnassigned(conf, actualRegion);
|
||||
// Force ZK node to OFFLINE so master assigns
|
||||
forceOfflineInZK(conf, actualRegion);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fix unassigned by creating/transition the unassigned ZK node for this
|
||||
* region to OFFLINE state with a special flag to tell the master that this
|
||||
* is a forced operation by HBCK.
|
||||
* @param conf
|
||||
* @param region
|
||||
* @throws IOException
|
||||
* @throws KeeperException
|
||||
*/
|
||||
public static void fixUnassigned(Configuration conf, HRegionInfo region)
|
||||
throws IOException {
|
||||
|
||||
throws IOException, KeeperException {
|
||||
HRegionInfo actualRegion = new HRegionInfo(region);
|
||||
|
||||
// Clear status in master and zk
|
||||
clearInMaster(conf, actualRegion);
|
||||
clearInZK(conf, actualRegion);
|
||||
|
||||
// Clear assignment in META or ROOT
|
||||
clearAssignment(conf, actualRegion);
|
||||
// Force ZK node to OFFLINE so master assigns
|
||||
forceOfflineInZK(conf, actualRegion);
|
||||
}
|
||||
|
||||
private static void clearInMaster(Configuration conf, HRegionInfo region)
|
||||
throws IOException {
|
||||
System.out.println("Region being cleared in master: " + region);
|
||||
HMasterInterface master = HConnectionManager.getConnection(conf).getMaster();
|
||||
long masterVersion =
|
||||
master.getProtocolVersion("org.apache.hadoop.hbase.ipc.HMasterInterface", 25);
|
||||
System.out.println("Master protocol version: " + masterVersion);
|
||||
try {
|
||||
// TODO: Do we want to do it this way?
|
||||
// Better way is to tell master to fix the issue itself?
|
||||
// That way it can use in-memory state to determine best plan
|
||||
// master.clearFromTransition(region);
|
||||
} catch (Exception e) {}
|
||||
private static void forceOfflineInZK(Configuration conf, HRegionInfo region)
|
||||
throws ZooKeeperConnectionException, KeeperException, IOException {
|
||||
ZKAssign.createOrForceNodeOffline(
|
||||
HConnectionManager.getConnection(conf).getZooKeeperWatcher(),
|
||||
region, HConstants.HBCK_CODE_NAME);
|
||||
}
|
||||
|
||||
private static void clearInZK(Configuration conf, HRegionInfo region)
|
||||
throws IOException {
|
||||
ZooKeeperWatcher zkw =
|
||||
HConnectionManager.getConnection(conf).getZooKeeperWatcher();
|
||||
try {
|
||||
ZKAssign.deleteNodeFailSilent(zkw, region);
|
||||
} catch (KeeperException e) {
|
||||
throw new IOException("Unexpected ZK exception", e);
|
||||
}
|
||||
}
|
||||
|
||||
private static void closeRegion(Configuration conf, HServerAddress server,
|
||||
HRegionInfo region)
|
||||
throws IOException {
|
||||
private static void closeRegionSilentlyAndWait(Configuration conf,
|
||||
HServerAddress server, HRegionInfo region)
|
||||
throws IOException, InterruptedException {
|
||||
HRegionInterface rs =
|
||||
HConnectionManager.getConnection(conf).getHRegionConnection(server);
|
||||
rs.closeRegion(region, false);
|
||||
}
|
||||
|
||||
private static void clearAssignment(Configuration conf,
|
||||
HRegionInfo region)
|
||||
throws IOException {
|
||||
HTable ht = null;
|
||||
if (region.isMetaTable()) {
|
||||
// Clear assignment in ROOT
|
||||
ht = new HTable(conf, HConstants.ROOT_TABLE_NAME);
|
||||
long timeout = conf.getLong("hbase.hbck.close.timeout", 120000);
|
||||
long expiration = timeout + System.currentTimeMillis();
|
||||
while (System.currentTimeMillis() < expiration) {
|
||||
try {
|
||||
HRegionInfo rsRegion = rs.getRegionInfo(region.getRegionName());
|
||||
if (rsRegion == null) throw new NotServingRegionException();
|
||||
} catch (Exception e) {
|
||||
return;
|
||||
}
|
||||
Thread.sleep(1000);
|
||||
}
|
||||
else {
|
||||
// Clear assignment in META
|
||||
ht = new HTable(conf, HConstants.META_TABLE_NAME);
|
||||
}
|
||||
Delete del = new Delete(region.getRegionName());
|
||||
del.deleteColumns(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
|
||||
del.deleteColumns(HConstants.CATALOG_FAMILY,
|
||||
HConstants.STARTCODE_QUALIFIER);
|
||||
ht.delete(del);
|
||||
throw new IOException("Region " + region + " failed to close within" +
|
||||
" timeout " + timeout);
|
||||
}
|
||||
}
|
|
@ -313,4 +313,39 @@ public class ZKTable {
|
|||
}
|
||||
return disabledTables;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a list of all the tables set as disabled in zookeeper.
|
||||
* @return Set of disabled tables, empty Set if none
|
||||
* @throws KeeperException
|
||||
*/
|
||||
public static Set<String> getDisabledTables(ZooKeeperWatcher zkw)
|
||||
throws KeeperException {
|
||||
Set<String> disabledTables = new HashSet<String>();
|
||||
List<String> children =
|
||||
ZKUtil.listChildrenNoWatch(zkw, zkw.tableZNode);
|
||||
for (String child: children) {
|
||||
TableState state = getTableState(zkw, child);
|
||||
if (state == TableState.DISABLED) disabledTables.add(child);
|
||||
}
|
||||
return disabledTables;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a list of all the tables set as disabled in zookeeper.
|
||||
* @return Set of disabled tables, empty Set if none
|
||||
* @throws KeeperException
|
||||
*/
|
||||
public static Set<String> getDisabledOrDisablingTables(ZooKeeperWatcher zkw)
|
||||
throws KeeperException {
|
||||
Set<String> disabledTables = new HashSet<String>();
|
||||
List<String> children =
|
||||
ZKUtil.listChildrenNoWatch(zkw, zkw.tableZNode);
|
||||
for (String child: children) {
|
||||
TableState state = getTableState(zkw, child);
|
||||
if (state == TableState.DISABLED || state == TableState.DISABLING)
|
||||
disabledTables.add(child);
|
||||
}
|
||||
return disabledTables;
|
||||
}
|
||||
}
|
|
@ -19,7 +19,7 @@
|
|||
*/
|
||||
package org.apache.hadoop.hbase.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
@ -36,8 +36,6 @@ import org.apache.hadoop.hbase.client.Scan;
|
|||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
public class TestHBaseFsck {
|
||||
|
||||
final Log LOG = LogFactory.getLog(getClass());
|
||||
|
@ -53,7 +51,7 @@ public class TestHBaseFsck {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void testHBaseFsck() throws IOException {
|
||||
public void testHBaseFsck() throws Exception {
|
||||
HBaseFsck fsck = new HBaseFsck(conf);
|
||||
fsck.displayFullReport();
|
||||
fsck.setTimeLag(0);
|
||||
|
@ -71,7 +69,7 @@ public class TestHBaseFsck {
|
|||
// point to a different region server
|
||||
HTable meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getName());
|
||||
ResultScanner scanner = meta.getScanner(new Scan());
|
||||
|
||||
|
||||
resforloop : for (Result res : scanner) {
|
||||
long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY,
|
||||
HConstants.STARTCODE_QUALIFIER));
|
||||
|
@ -100,6 +98,7 @@ public class TestHBaseFsck {
|
|||
// Fixed or not, it still reports inconsistencies
|
||||
assertEquals(-1, result);
|
||||
|
||||
Thread.sleep(15000);
|
||||
// Disabled, won't work because the region stays unassigned, see HBASE-3217
|
||||
// new HTable(conf, TABLE).getScanner(new Scan());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue