HBASE-3337 Restore HBCK fix of unassignment and dupe assignment for new master

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1049236 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jonathan Gray 2010-12-14 19:58:08 +00:00
parent 8a521615cd
commit c0c723f660
7 changed files with 208 additions and 99 deletions

View File

@ -19,6 +19,8 @@ Release 0.91.0 - Unreleased
Wrong (Ed Kohlwey via Stack)
HBASE-1888 KeyValue methods throw NullPointerException instead of
IllegalArgumentException during parameter sanity check
HBASE-3337 Restore HBCK fix of unassignment and dupe assignment for new
master
IMPROVEMENTS
HBASE-2001 Coprocessors: Colocate user code with regions (Mingjie Lai via

View File

@ -358,6 +358,9 @@ public final class HConstants {
public static final String
REPLICATION_ENABLE_KEY = "hbase.replication";
/** HBCK special code name used as server name when manipulating ZK nodes */
public static final String HBCK_CODE_NAME = "HBCKServerName";
private HConstants() {
// Can't be instantiated with this ctor.
}

View File

@ -40,6 +40,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Chore;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.HServerInfo;
@ -51,10 +52,9 @@ import org.apache.hadoop.hbase.catalog.CatalogTracker;
import org.apache.hadoop.hbase.catalog.MetaReader;
import org.apache.hadoop.hbase.catalog.RootLocationEditor;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.executor.EventHandler.EventType;
import org.apache.hadoop.hbase.executor.ExecutorService;
import org.apache.hadoop.hbase.executor.RegionTransitionData;
import org.apache.hadoop.hbase.master.AssignmentManager.RegionState;
import org.apache.hadoop.hbase.executor.EventHandler.EventType;
import org.apache.hadoop.hbase.master.LoadBalancer.RegionPlan;
import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler;
import org.apache.hadoop.hbase.master.handler.OpenedRegionHandler;
@ -65,9 +65,9 @@ import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.hadoop.hbase.zookeeper.ZKTable;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZKUtil.NodeAndData;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.hadoop.hbase.zookeeper.ZKUtil.NodeAndData;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.zookeeper.AsyncCallback;
@ -143,7 +143,7 @@ public class AssignmentManager extends ZooKeeperListener {
* @param serverManager
* @param catalogTracker
* @param service
* @throws KeeperException
* @throws KeeperException
*/
public AssignmentManager(Server master, ServerManager serverManager,
CatalogTracker catalogTracker, final ExecutorService service)
@ -337,6 +337,11 @@ public class AssignmentManager extends ZooKeeperListener {
LOG.warn("Unexpected NULL input " + data);
return;
}
// Check if this is a special HBCK transition
if (data.getServerName().equals(HConstants.HBCK_CODE_NAME)) {
handleHBCK(data);
return;
}
// Verify this is a known server
if (!serverManager.isServerOnline(data.getServerName()) &&
!this.master.getServerName().equals(data.getServerName())) {
@ -424,6 +429,45 @@ public class AssignmentManager extends ZooKeeperListener {
}
}
/**
* Handle a ZK unassigned node transition triggered by HBCK repair tool.
* <p>
* This is handled in a separate code path because it breaks the normal rules.
* @param data
*/
private void handleHBCK(RegionTransitionData data) {
String encodedName = HRegionInfo.encodeRegionName(data.getRegionName());
LOG.info("Handling HBCK triggered transition=" + data.getEventType() +
", server=" + data.getServerName() + ", region=" +
HRegionInfo.prettyPrint(encodedName));
RegionState regionState = regionsInTransition.get(encodedName);
switch (data.getEventType()) {
case M_ZK_REGION_OFFLINE:
HRegionInfo regionInfo = null;
if (regionState != null) {
regionInfo = regionState.getRegion();
} else {
try {
regionInfo = MetaReader.getRegion(catalogTracker,
data.getRegionName()).getFirst();
} catch (IOException e) {
LOG.info("Exception reading META doing HBCK repair operation", e);
return;
}
}
LOG.info("HBCK repair is triggering assignment of region=" +
regionInfo.getRegionNameAsString());
// trigger assign, node is already in OFFLINE so don't need to update ZK
assign(regionInfo, false);
break;
default:
LOG.warn("Received unexpected region state from HBCK (" +
data.getEventType() + ")");
break;
}
}
// ZooKeeper events
/**
@ -1001,7 +1045,7 @@ public class AssignmentManager extends ZooKeeperListener {
public void unassign(HRegionInfo region, boolean force) {
LOG.debug("Starting unassignment of region " +
region.getRegionNameAsString() + " (offlining)");
synchronized (this.regions) {
synchronized (this.regions) {
// Check if this region is currently assigned
if (!regions.containsKey(region)) {
LOG.debug("Attempted to unassign region " +

View File

@ -23,11 +23,11 @@ import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.logging.Log;
@ -49,19 +49,21 @@ import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.MetaScanner;
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.regionserver.wal.HLog;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.hbase.zookeeper.ZKTable;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
import com.google.common.base.Joiner;
import com.google.common.collect.Lists;
/**
* Check consistency among the in-memory states of the master and the
* Check consistency among the in-memory states of the master and the
* region server(s) and the state of data in HDFS.
*/
public class HBaseFsck {
@ -75,6 +77,8 @@ public class HBaseFsck {
private TreeMap<String, HbckInfo> regionInfo = new TreeMap<String, HbckInfo>();
private TreeMap<String, TInfo> tablesInfo = new TreeMap<String, TInfo>();
private TreeSet<byte[]> disabledTables =
new TreeSet<byte[]>(Bytes.BYTES_COMPARATOR);
ErrorReporter errors = new PrintingErrorReporter();
private static boolean details = false; // do we display the full report
@ -92,7 +96,7 @@ public class HBaseFsck {
* @throws MasterNotRunningException if the master is not running
* @throws ZooKeeperConnectionException if unable to connect to zookeeper
*/
public HBaseFsck(Configuration conf)
public HBaseFsck(Configuration conf)
throws MasterNotRunningException, ZooKeeperConnectionException, IOException {
this.conf = conf;
@ -105,8 +109,10 @@ public class HBaseFsck {
* Contacts the master and prints out cluster-wide information
* @throws IOException if a remote or network exception occurs
* @return 0 on success, non-zero on failure
* @throws KeeperException
* @throws InterruptedException
*/
int doWork() throws IOException {
int doWork() throws IOException, KeeperException, InterruptedException {
// print hbase server version
errors.print("Version: " + status.getHBaseVersion());
@ -114,7 +120,7 @@ public class HBaseFsck {
regionInfo.clear();
tablesInfo.clear();
emptyRegionInfoQualifiers.clear();
disabledTables.clear();
// get a list of all regions from the master. This involves
// scanning the META table
@ -152,7 +158,7 @@ public class HBaseFsck {
// From the master, get a list of all known live region servers
Collection<HServerInfo> regionServers = status.getServerInfo();
errors.print("Number of live region servers: " +
errors.print("Number of live region servers: " +
regionServers.size());
if (details) {
for (HServerInfo rsinfo: regionServers) {
@ -162,7 +168,7 @@ public class HBaseFsck {
// From the master, get a list of all dead region servers
Collection<String> deadRegionServers = status.getDeadServerNames();
errors.print("Number of dead region servers: " +
errors.print("Number of dead region servers: " +
deadRegionServers.size());
if (details) {
for (String name: deadRegionServers) {
@ -185,6 +191,9 @@ public class HBaseFsck {
}
}
// Get disabled tables from ZooKeeper
loadDisabledTables();
// Check consistency
checkConsistency();
@ -197,6 +206,31 @@ public class HBaseFsck {
return errors.summarize();
}
/**
* Load the list of disabled tables in ZK into local set.
* @throws ZooKeeperConnectionException
* @throws IOException
* @throws KeeperException
*/
private void loadDisabledTables()
throws ZooKeeperConnectionException, IOException, KeeperException {
ZooKeeperWatcher zkw =
HConnectionManager.getConnection(conf).getZooKeeperWatcher();
for (String tableName : ZKTable.getDisabledOrDisablingTables(zkw)) {
disabledTables.add(Bytes.toBytes(tableName));
}
}
/**
* Check if the specified region's table is disabled.
* @throws ZooKeeperConnectionException
* @throws IOException
* @throws KeeperException
*/
private boolean isTableDisabled(HRegionInfo regionInfo) {
return disabledTables.contains(regionInfo.getTableDesc().getName());
}
/**
* Scan HDFS for all regions, recording their information into
* regionInfo
@ -280,7 +314,7 @@ public class HBaseFsck {
regionInfo.put(rootLocation.getRegionInfo().getEncodedName(), hbInfo);
return true;
}
/**
* Contacts each regionserver and fetches metadata about regions.
* @param regionServerList - the list of region servers to connect to
@ -315,7 +349,7 @@ public class HBaseFsck {
HbckInfo hbi = getOrCreateInfo(r.getEncodedName());
hbi.deployedOn.add(rsinfo.getServerAddress());
}
} catch (IOException e) { // unable to connect to the region server.
} catch (IOException e) { // unable to connect to the region server.
errors.reportError("\nRegionServer:" + rsinfo.getServerName() +
" Unable to fetch region information. " + e);
}
@ -324,8 +358,11 @@ public class HBaseFsck {
/**
* Check consistency of all regions that have been found in previous phases.
* @throws KeeperException
* @throws InterruptedException
*/
void checkConsistency() throws IOException {
void checkConsistency()
throws IOException, KeeperException, InterruptedException {
for (java.util.Map.Entry<String, HbckInfo> e: regionInfo.entrySet()) {
doConsistencyCheck(e.getKey(), e.getValue());
}
@ -333,9 +370,11 @@ public class HBaseFsck {
/**
* Check a single region for consistency and correct deployment.
* @throws KeeperException
* @throws InterruptedException
*/
void doConsistencyCheck(final String key, final HbckInfo hbi)
throws IOException {
throws IOException, KeeperException, InterruptedException {
String descriptiveName = hbi.toString();
boolean inMeta = hbi.metaEntry != null;
@ -346,7 +385,7 @@ public class HBaseFsck {
boolean deploymentMatchesMeta =
hasMetaAssignment && isDeployed && !isMultiplyDeployed &&
hbi.metaEntry.regionServer.equals(hbi.deployedOn.get(0));
boolean shouldBeDeployed = inMeta && !hbi.metaEntry.isOffline();
boolean shouldBeDeployed = inMeta && !isTableDisabled(hbi.metaEntry);
boolean recentlyModified = hbi.foundRegionDir != null &&
hbi.foundRegionDir.getModificationTime() + timelag > System.currentTimeMillis();
@ -533,8 +572,8 @@ public class HBaseFsck {
/**
* Return a list of user-space table names whose metadata have not been
* modified in the last few milliseconds specified by timelag
* if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
* SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
* if any of the REGIONINFO_QUALIFIER, SERVER_QUALIFIER, STARTCODE_QUALIFIER,
* SPLITA_QUALIFIER, SPLITB_QUALIFIER have not changed in the last
* milliseconds specified by timelag, then the table is a candidate to be returned.
* @param regionList - all entries found in .META
* @return tables that have not been modified recently
@ -580,8 +619,11 @@ public class HBaseFsck {
* If there are inconsistencies (i.e. zero or more than one regions
* pretend to be holding the .META.) try to fix that and report an error.
* @throws IOException from HBaseFsckRepair functions
* @throws KeeperException
* @throws InterruptedException
*/
boolean checkMetaEntries() throws IOException {
boolean checkMetaEntries()
throws IOException, KeeperException, InterruptedException {
List <HbckInfo> metaRegions = Lists.newArrayList();
for (HbckInfo value : regionInfo.values()) {
if (value.metaEntry.isMetaTable()) {
@ -709,7 +751,7 @@ public class HBaseFsck {
HServerAddress regionServer; // server hosting this region
long modTime; // timestamp of most recent modification metadata
public MetaEntry(HRegionInfo rinfo, HServerAddress regionServer,
public MetaEntry(HRegionInfo rinfo, HServerAddress regionServer,
byte[] startCode, long modTime) {
super(rinfo);
this.regionServer = regionServer;
@ -883,16 +925,16 @@ public class HBaseFsck {
/**
* Main program
* @param args
* @throws Exception
*/
public static void main(String [] args)
throws IOException, MasterNotRunningException {
public static void main(String [] args) throws Exception {
// create a fsck object
Configuration conf = HBaseConfiguration.create();
conf.set("fs.defaultFS", conf.get("hbase.rootdir"));
HBaseFsck fsck = new HBaseFsck(conf);
// Process command-line args.
// Process command-line args.
for (int i = 0; i < args.length; i++) {
String cmd = args[i];
if (cmd.equals("-details")) {

View File

@ -26,99 +26,83 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HServerAddress;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.NotServingRegionException;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HConnectionManager;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.ipc.HMasterInterface;
import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.zookeeper.ZKAssign;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.zookeeper.KeeperException;
public class HBaseFsckRepair {
/**
* Fix dupe assignment by doing silent closes on each RS hosting the region
* and then force ZK unassigned node to OFFLINE to trigger assignment by
* master.
* @param conf
* @param region
* @param servers
* @throws IOException
* @throws KeeperException
* @throws InterruptedException
*/
public static void fixDupeAssignment(Configuration conf, HRegionInfo region,
List<HServerAddress> servers)
throws IOException {
throws IOException, KeeperException, InterruptedException {
HRegionInfo actualRegion = new HRegionInfo(region);
// Clear status in master and zk
clearInMaster(conf, actualRegion);
clearInZK(conf, actualRegion);
// Close region on the servers
// Close region on the servers silently
for(HServerAddress server : servers) {
closeRegion(conf, server, actualRegion);
closeRegionSilentlyAndWait(conf, server, actualRegion);
}
// It's unassigned so fix it as such
fixUnassigned(conf, actualRegion);
// Force ZK node to OFFLINE so master assigns
forceOfflineInZK(conf, actualRegion);
}
/**
* Fix unassigned by creating/transition the unassigned ZK node for this
* region to OFFLINE state with a special flag to tell the master that this
* is a forced operation by HBCK.
* @param conf
* @param region
* @throws IOException
* @throws KeeperException
*/
public static void fixUnassigned(Configuration conf, HRegionInfo region)
throws IOException {
throws IOException, KeeperException {
HRegionInfo actualRegion = new HRegionInfo(region);
// Clear status in master and zk
clearInMaster(conf, actualRegion);
clearInZK(conf, actualRegion);
// Clear assignment in META or ROOT
clearAssignment(conf, actualRegion);
// Force ZK node to OFFLINE so master assigns
forceOfflineInZK(conf, actualRegion);
}
private static void clearInMaster(Configuration conf, HRegionInfo region)
throws IOException {
System.out.println("Region being cleared in master: " + region);
HMasterInterface master = HConnectionManager.getConnection(conf).getMaster();
long masterVersion =
master.getProtocolVersion("org.apache.hadoop.hbase.ipc.HMasterInterface", 25);
System.out.println("Master protocol version: " + masterVersion);
try {
// TODO: Do we want to do it this way?
// Better way is to tell master to fix the issue itself?
// That way it can use in-memory state to determine best plan
// master.clearFromTransition(region);
} catch (Exception e) {}
private static void forceOfflineInZK(Configuration conf, HRegionInfo region)
throws ZooKeeperConnectionException, KeeperException, IOException {
ZKAssign.createOrForceNodeOffline(
HConnectionManager.getConnection(conf).getZooKeeperWatcher(),
region, HConstants.HBCK_CODE_NAME);
}
private static void clearInZK(Configuration conf, HRegionInfo region)
throws IOException {
ZooKeeperWatcher zkw =
HConnectionManager.getConnection(conf).getZooKeeperWatcher();
try {
ZKAssign.deleteNodeFailSilent(zkw, region);
} catch (KeeperException e) {
throw new IOException("Unexpected ZK exception", e);
}
}
private static void closeRegion(Configuration conf, HServerAddress server,
HRegionInfo region)
throws IOException {
private static void closeRegionSilentlyAndWait(Configuration conf,
HServerAddress server, HRegionInfo region)
throws IOException, InterruptedException {
HRegionInterface rs =
HConnectionManager.getConnection(conf).getHRegionConnection(server);
rs.closeRegion(region, false);
}
private static void clearAssignment(Configuration conf,
HRegionInfo region)
throws IOException {
HTable ht = null;
if (region.isMetaTable()) {
// Clear assignment in ROOT
ht = new HTable(conf, HConstants.ROOT_TABLE_NAME);
long timeout = conf.getLong("hbase.hbck.close.timeout", 120000);
long expiration = timeout + System.currentTimeMillis();
while (System.currentTimeMillis() < expiration) {
try {
HRegionInfo rsRegion = rs.getRegionInfo(region.getRegionName());
if (rsRegion == null) throw new NotServingRegionException();
} catch (Exception e) {
return;
}
Thread.sleep(1000);
}
else {
// Clear assignment in META
ht = new HTable(conf, HConstants.META_TABLE_NAME);
}
Delete del = new Delete(region.getRegionName());
del.deleteColumns(HConstants.CATALOG_FAMILY, HConstants.SERVER_QUALIFIER);
del.deleteColumns(HConstants.CATALOG_FAMILY,
HConstants.STARTCODE_QUALIFIER);
ht.delete(del);
throw new IOException("Region " + region + " failed to close within" +
" timeout " + timeout);
}
}

View File

@ -313,4 +313,39 @@ public class ZKTable {
}
return disabledTables;
}
/**
* Gets a list of all the tables set as disabled in zookeeper.
* @return Set of disabled tables, empty Set if none
* @throws KeeperException
*/
public static Set<String> getDisabledTables(ZooKeeperWatcher zkw)
throws KeeperException {
Set<String> disabledTables = new HashSet<String>();
List<String> children =
ZKUtil.listChildrenNoWatch(zkw, zkw.tableZNode);
for (String child: children) {
TableState state = getTableState(zkw, child);
if (state == TableState.DISABLED) disabledTables.add(child);
}
return disabledTables;
}
/**
* Gets a list of all the tables set as disabled in zookeeper.
* @return Set of disabled tables, empty Set if none
* @throws KeeperException
*/
public static Set<String> getDisabledOrDisablingTables(ZooKeeperWatcher zkw)
throws KeeperException {
Set<String> disabledTables = new HashSet<String>();
List<String> children =
ZKUtil.listChildrenNoWatch(zkw, zkw.tableZNode);
for (String child: children) {
TableState state = getTableState(zkw, child);
if (state == TableState.DISABLED || state == TableState.DISABLING)
disabledTables.add(child);
}
return disabledTables;
}
}

View File

@ -19,7 +19,7 @@
*/
package org.apache.hadoop.hbase.util;
import java.io.IOException;
import static org.junit.Assert.assertEquals;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -36,8 +36,6 @@ import org.apache.hadoop.hbase.client.Scan;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
public class TestHBaseFsck {
final Log LOG = LogFactory.getLog(getClass());
@ -53,7 +51,7 @@ public class TestHBaseFsck {
}
@Test
public void testHBaseFsck() throws IOException {
public void testHBaseFsck() throws Exception {
HBaseFsck fsck = new HBaseFsck(conf);
fsck.displayFullReport();
fsck.setTimeLag(0);
@ -71,7 +69,7 @@ public class TestHBaseFsck {
// point to a different region server
HTable meta = new HTable(conf, HTableDescriptor.META_TABLEDESC.getName());
ResultScanner scanner = meta.getScanner(new Scan());
resforloop : for (Result res : scanner) {
long startCode = Bytes.toLong(res.getValue(HConstants.CATALOG_FAMILY,
HConstants.STARTCODE_QUALIFIER));
@ -100,6 +98,7 @@ public class TestHBaseFsck {
// Fixed or not, it still reports inconsistencies
assertEquals(-1, result);
Thread.sleep(15000);
// Disabled, won't work because the region stays unassigned, see HBASE-3217
// new HTable(conf, TABLE).getScanner(new Scan());
}