HADOOP-2017 TestRegionServerAbort failure in patch build #903 and
nightly #266 git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@583309 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5233c28aaa
commit
c9f105acd5
|
@ -72,6 +72,8 @@ Trunk (unreleased changes)
|
||||||
HADOOP-2004 webapp hql formatting bugs
|
HADOOP-2004 webapp hql formatting bugs
|
||||||
HADOOP_2011 Make hbase daemon scripts take args in same order as hadoop
|
HADOOP_2011 Make hbase daemon scripts take args in same order as hadoop
|
||||||
daemon scripts
|
daemon scripts
|
||||||
|
HADOOP-2017 TestRegionServerAbort failure in patch build #903 and
|
||||||
|
nightly #266
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
HADOOP-1737 Make HColumnDescriptor data publically members settable
|
HADOOP-1737 Make HColumnDescriptor data publically members settable
|
||||||
|
|
|
@ -24,7 +24,6 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.hbase.util.Sleeper;
|
import org.apache.hadoop.hbase.util.Sleeper;
|
||||||
import org.apache.hadoop.hbase.util.Threads;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Chore is a task performed on a period in hbase. The chore is run in its own
|
* Chore is a task performed on a period in hbase. The chore is run in its own
|
||||||
|
|
|
@ -205,7 +205,7 @@ HMasterRegionInterface {
|
||||||
HRegionInterface regionServer = null;
|
HRegionInterface regionServer = null;
|
||||||
long scannerId = -1L;
|
long scannerId = -1L;
|
||||||
LOG.info(Thread.currentThread().getName() + " scanning meta region " +
|
LOG.info(Thread.currentThread().getName() + " scanning meta region " +
|
||||||
region.getRegionName() + " on " + region.getServer().toString());
|
region.toString());
|
||||||
|
|
||||||
// Array to hold list of split parents found. Scan adds to list. After
|
// Array to hold list of split parents found. Scan adds to list. After
|
||||||
// scan we go check if parents can be removed.
|
// scan we go check if parents can be removed.
|
||||||
|
@ -247,7 +247,7 @@ HMasterRegionInterface {
|
||||||
}
|
}
|
||||||
numberOfRegionsFound += 1;
|
numberOfRegionsFound += 1;
|
||||||
}
|
}
|
||||||
if (rootRegion) {
|
if (this.rootRegion) {
|
||||||
numberOfMetaRegions.set(numberOfRegionsFound);
|
numberOfMetaRegions.set(numberOfRegionsFound);
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -282,7 +282,7 @@ HMasterRegionInterface {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
LOG.info(Thread.currentThread().getName() + " scan of meta region " +
|
LOG.info(Thread.currentThread().getName() + " scan of meta region " +
|
||||||
region.getRegionName() + " complete");
|
region.toString() + " complete");
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -405,8 +405,8 @@ HMasterRegionInterface {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void checkAssigned(final HRegionInfo info,
|
protected void checkAssigned(final HRegionInfo info,
|
||||||
final String serverName, final long startCode) throws IOException {
|
final String serverName, final long startCode)
|
||||||
|
throws IOException {
|
||||||
// Skip region - if ...
|
// Skip region - if ...
|
||||||
if(info.offLine // offline
|
if(info.offLine // offline
|
||||||
|| killedRegions.contains(info.regionName) // queued for offline
|
|| killedRegions.contains(info.regionName) // queued for offline
|
||||||
|
@ -415,7 +415,6 @@ HMasterRegionInterface {
|
||||||
assignAttempts.remove(info.regionName);
|
assignAttempts.remove(info.regionName);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
HServerInfo storedInfo = null;
|
HServerInfo storedInfo = null;
|
||||||
if (serverName.length() != 0) {
|
if (serverName.length() != 0) {
|
||||||
Map<Text, HRegionInfo> regionsToKill = killList.get(serverName);
|
Map<Text, HRegionInfo> regionsToKill = killList.get(serverName);
|
||||||
|
@ -432,14 +431,17 @@ HMasterRegionInterface {
|
||||||
storedInfo = serversToServerInfo.get(serverName);
|
storedInfo = serversToServerInfo.get(serverName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Checking " + info.regionName + " is assigned");
|
||||||
|
}
|
||||||
if (!(unassignedRegions.containsKey(info.regionName) ||
|
if (!(unassignedRegions.containsKey(info.regionName) ||
|
||||||
pendingRegions.contains(info.regionName))
|
pendingRegions.contains(info.regionName))
|
||||||
&& (storedInfo == null || storedInfo.getStartCode() != startCode)) {
|
&& (storedInfo == null || storedInfo.getStartCode() != startCode)) {
|
||||||
|
|
||||||
// The current assignment is no good
|
// The current assignment is no good
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Current assignment of " + info.regionName + " is no good");
|
||||||
|
}
|
||||||
// Recover the region server's log if there is one.
|
// Recover the region server's log if there is one.
|
||||||
|
|
||||||
if (serverName.length() != 0) {
|
if (serverName.length() != 0) {
|
||||||
StringBuilder dirName = new StringBuilder("log_");
|
StringBuilder dirName = new StringBuilder("log_");
|
||||||
dirName.append(serverName.replace(":", "_"));
|
dirName.append(serverName.replace(":", "_"));
|
||||||
|
@ -449,20 +451,19 @@ HMasterRegionInterface {
|
||||||
splitLogLock.lock();
|
splitLogLock.lock();
|
||||||
try {
|
try {
|
||||||
HLog.splitLog(dir, logDir, fs, conf);
|
HLog.splitLog(dir, logDir, fs, conf);
|
||||||
|
|
||||||
} finally {
|
} finally {
|
||||||
splitLogLock.unlock();
|
splitLogLock.unlock();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Split " + logDir.toString());
|
||||||
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.warn("unable to split region server log because: ", e);
|
LOG.warn("unable to split region server log because: ", e);
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now get the region assigned
|
// Now get the region assigned
|
||||||
|
|
||||||
unassignedRegions.put(info.regionName, info);
|
unassignedRegions.put(info.regionName, info);
|
||||||
assignAttempts.put(info.regionName, Long.valueOf(0L));
|
assignAttempts.put(info.regionName, Long.valueOf(0L));
|
||||||
}
|
}
|
||||||
|
@ -479,6 +480,7 @@ HMasterRegionInterface {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void scanRoot() {
|
private void scanRoot() {
|
||||||
|
boolean succeeded = false;
|
||||||
int tries = 0;
|
int tries = 0;
|
||||||
while (!closed.get() && tries < numRetries) {
|
while (!closed.get() && tries < numRetries) {
|
||||||
synchronized (rootRegionLocation) {
|
synchronized (rootRegionLocation) {
|
||||||
|
@ -503,6 +505,7 @@ HMasterRegionInterface {
|
||||||
scanRegion(new MetaRegion(rootRegionLocation.get(),
|
scanRegion(new MetaRegion(rootRegionLocation.get(),
|
||||||
HGlobals.rootRegionInfo.regionName, null));
|
HGlobals.rootRegionInfo.regionName, null));
|
||||||
}
|
}
|
||||||
|
succeeded = true;
|
||||||
break;
|
break;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
e = RemoteExceptionHandler.checkIOException(e);
|
e = RemoteExceptionHandler.checkIOException(e);
|
||||||
|
@ -511,8 +514,9 @@ HMasterRegionInterface {
|
||||||
LOG.warn("Scan ROOT region", e);
|
LOG.warn("Scan ROOT region", e);
|
||||||
} else {
|
} else {
|
||||||
LOG.error("Scan ROOT region", e);
|
LOG.error("Scan ROOT region", e);
|
||||||
if (tries == numRetries - 1) {
|
if (tries == numRetries - 1) {
|
||||||
// We ran out of tries. Make sure the file system is still available
|
// We ran out of tries. Make sure the file system is still
|
||||||
|
// available
|
||||||
if (!checkFileSystem()) {
|
if (!checkFileSystem()) {
|
||||||
continue; // Avoid sleeping.
|
continue; // Avoid sleeping.
|
||||||
}
|
}
|
||||||
|
@ -524,7 +528,13 @@ HMasterRegionInterface {
|
||||||
LOG.error("Unexpected exception", e);
|
LOG.error("Unexpected exception", e);
|
||||||
}
|
}
|
||||||
sleeper.sleep();
|
sleeper.sleep();
|
||||||
}
|
}
|
||||||
|
if (!succeeded) {
|
||||||
|
// We tried numretries to reach root and failed. Is it gone.
|
||||||
|
// Currently we just flounder. Should we reallocate root?
|
||||||
|
// This would be catastrophic?
|
||||||
|
// unassignRootRegion();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -564,6 +574,12 @@ HMasterRegionInterface {
|
||||||
this.startKey.set(startKey);
|
this.startKey.set(startKey);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "regionname: " + this.regionName.toString() + ", startKey: <" +
|
||||||
|
this.startKey.toString() + ">, server: " + this.server.toString() + "}";
|
||||||
|
}
|
||||||
|
|
||||||
/** @return the regionName */
|
/** @return the regionName */
|
||||||
public Text getRegionName() {
|
public Text getRegionName() {
|
||||||
|
@ -599,23 +615,28 @@ HMasterRegionInterface {
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
public int compareTo(Object o) {
|
public int compareTo(Object o) {
|
||||||
MetaRegion other = (MetaRegion)o;
|
MetaRegion other = (MetaRegion)o;
|
||||||
|
|
||||||
int result = this.regionName.compareTo(other.getRegionName());
|
int result = this.regionName.compareTo(other.getRegionName());
|
||||||
if(result == 0) {
|
if(result == 0) {
|
||||||
result = this.startKey.compareTo(other.getStartKey());
|
result = this.startKey.compareTo(other.getStartKey());
|
||||||
|
if (result == 0) {
|
||||||
|
// Might be on different host?
|
||||||
|
result = this.server.compareTo(other.server);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Set by root scanner to indicate the number of meta regions */
|
/** Set by root scanner to indicate the number of meta regions */
|
||||||
AtomicInteger numberOfMetaRegions;
|
final AtomicInteger numberOfMetaRegions = new AtomicInteger();
|
||||||
|
|
||||||
/** Work for the meta scanner is queued up here */
|
/** Work for the meta scanner is queued up here */
|
||||||
BlockingQueue<MetaRegion> metaRegionsToScan;
|
final BlockingQueue<MetaRegion> metaRegionsToScan =
|
||||||
|
new LinkedBlockingQueue<MetaRegion>();
|
||||||
|
|
||||||
/** These are the online meta regions */
|
/** These are the online meta regions */
|
||||||
SortedMap<Text, MetaRegion> onlineMetaRegions;
|
final SortedMap<Text, MetaRegion> onlineMetaRegions =
|
||||||
|
Collections.synchronizedSortedMap(new TreeMap<Text, MetaRegion>());
|
||||||
|
|
||||||
/** Set by meta scanner after initial scan */
|
/** Set by meta scanner after initial scan */
|
||||||
volatile boolean initialMetaScanComplete;
|
volatile boolean initialMetaScanComplete;
|
||||||
|
@ -657,15 +678,24 @@ HMasterRegionInterface {
|
||||||
e = RemoteExceptionHandler.checkIOException(e);
|
e = RemoteExceptionHandler.checkIOException(e);
|
||||||
tries += 1;
|
tries += 1;
|
||||||
if (tries == 1) {
|
if (tries == 1) {
|
||||||
LOG.warn("Scan one META region", e);
|
LOG.warn("Scan one META region: " + region.toString(), e);
|
||||||
} else {
|
} else {
|
||||||
LOG.error("Scan one META region", e);
|
LOG.error("Scan one META region: " + region.toString(), e);
|
||||||
if (tries == numRetries - 1) {
|
}
|
||||||
// We ran out of tries. Make sure the file system is still
|
// The region may have moved (TestRegionServerAbort, etc.). If
|
||||||
// available
|
// so, either it won't be in the onlineMetaRegions list or its host
|
||||||
if (!checkFileSystem()) {
|
// address has changed and the containsValue will fail. If not
|
||||||
continue; // avoid sleeping
|
// found, best thing to do here is probably break.
|
||||||
}
|
if (!onlineMetaRegions.containsValue(region)) {
|
||||||
|
LOG.debug("Scanned region is no longer in map of online " +
|
||||||
|
"regions or its value has changed");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (tries == numRetries - 1) {
|
||||||
|
// We ran out of tries. Make sure the file system is still
|
||||||
|
// available
|
||||||
|
if (!checkFileSystem()) {
|
||||||
|
continue; // avoid sleeping
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@ -673,6 +703,7 @@ HMasterRegionInterface {
|
||||||
// at least log it rather than go out silently.
|
// at least log it rather than go out silently.
|
||||||
LOG.error("Unexpected exception", e);
|
LOG.error("Unexpected exception", e);
|
||||||
}
|
}
|
||||||
|
// Sleep before going around again.
|
||||||
sleeper.sleep();
|
sleeper.sleep();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -710,9 +741,8 @@ HMasterRegionInterface {
|
||||||
* regions. This wakes up any threads that were waiting for this to happen.
|
* regions. This wakes up any threads that were waiting for this to happen.
|
||||||
*/
|
*/
|
||||||
private synchronized boolean metaRegionsScanned() {
|
private synchronized boolean metaRegionsScanned() {
|
||||||
if (!rootScanned ||
|
if (!rootScanned ||
|
||||||
numberOfMetaRegions.get() != onlineMetaRegions.size()) {
|
numberOfMetaRegions.get() != onlineMetaRegions.size()) {
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
LOG.info("all meta regions scanned");
|
LOG.info("all meta regions scanned");
|
||||||
|
@ -754,14 +784,16 @@ HMasterRegionInterface {
|
||||||
* <p>Items are removed from this list when a region server reports in that
|
* <p>Items are removed from this list when a region server reports in that
|
||||||
* the region has been deployed.
|
* the region has been deployed.
|
||||||
*/
|
*/
|
||||||
SortedMap<Text, HRegionInfo> unassignedRegions;
|
final SortedMap<Text, HRegionInfo> unassignedRegions =
|
||||||
|
Collections.synchronizedSortedMap(new TreeMap<Text, HRegionInfo>());
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The 'assignAttempts' table maps from regions to a timestamp that indicates
|
* The 'assignAttempts' table maps from regions to a timestamp that indicates
|
||||||
* the last time we *tried* to assign the region to a RegionServer. If the
|
* the last time we *tried* to assign the region to a RegionServer. If the
|
||||||
* timestamp is out of date, then we can try to reassign it.
|
* timestamp is out of date, then we can try to reassign it.
|
||||||
*/
|
*/
|
||||||
Map<Text, Long> assignAttempts;
|
final Map<Text, Long> assignAttempts =
|
||||||
|
Collections.synchronizedMap(new HashMap<Text, Long>());
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Regions that have been assigned, and the server has reported that it has
|
* Regions that have been assigned, and the server has reported that it has
|
||||||
|
@ -897,28 +929,11 @@ HMasterRegionInterface {
|
||||||
this.rootScannerThread = new RootScanner();
|
this.rootScannerThread = new RootScanner();
|
||||||
|
|
||||||
// Scans the meta table
|
// Scans the meta table
|
||||||
|
|
||||||
this.numberOfMetaRegions = new AtomicInteger();
|
|
||||||
this.metaRegionsToScan = new LinkedBlockingQueue<MetaRegion>();
|
|
||||||
|
|
||||||
this.onlineMetaRegions =
|
|
||||||
Collections.synchronizedSortedMap(new TreeMap<Text, MetaRegion>());
|
|
||||||
|
|
||||||
this.initialMetaScanComplete = false;
|
this.initialMetaScanComplete = false;
|
||||||
|
|
||||||
this.metaScannerThread = new MetaScanner();
|
this.metaScannerThread = new MetaScanner();
|
||||||
|
|
||||||
this.unassignedRegions =
|
unassignRootRegion();
|
||||||
Collections.synchronizedSortedMap(new TreeMap<Text, HRegionInfo>());
|
|
||||||
|
|
||||||
this.unassignedRegions.put(HGlobals.rootRegionInfo.regionName,
|
|
||||||
HGlobals.rootRegionInfo);
|
|
||||||
|
|
||||||
this.assignAttempts =
|
|
||||||
Collections.synchronizedMap(new HashMap<Text, Long>());
|
|
||||||
|
|
||||||
this.assignAttempts.put(HGlobals.rootRegionInfo.regionName,
|
|
||||||
Long.valueOf(0L));
|
|
||||||
|
|
||||||
this.pendingRegions =
|
this.pendingRegions =
|
||||||
Collections.synchronizedSet(new HashSet<Text>());
|
Collections.synchronizedSet(new HashSet<Text>());
|
||||||
|
@ -943,6 +958,22 @@ HMasterRegionInterface {
|
||||||
this.closed.set(false);
|
this.closed.set(false);
|
||||||
LOG.info("HMaster initialized on " + this.address.toString());
|
LOG.info("HMaster initialized on " + this.address.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Unassign the root region.
|
||||||
|
* This method would be used in case where root region server had died
|
||||||
|
* without reporting in. Currently, we just flounder and never recover. We
|
||||||
|
* could 'notice' dead region server in root scanner -- if we failed access
|
||||||
|
* multiple times -- but reassigning root is catastrophic.
|
||||||
|
*/
|
||||||
|
void unassignRootRegion() {
|
||||||
|
this.rootRegionLocation.set(null);
|
||||||
|
this.unassignedRegions.put(HGlobals.rootRegionInfo.regionName,
|
||||||
|
HGlobals.rootRegionInfo);
|
||||||
|
this.assignAttempts.put(HGlobals.rootRegionInfo.regionName,
|
||||||
|
Long.valueOf(0L));
|
||||||
|
// TODO: If the old root region server had a log, it needs splitting.
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks to see if the file system is still accessible.
|
* Checks to see if the file system is still accessible.
|
||||||
|
@ -1271,17 +1302,13 @@ HMasterRegionInterface {
|
||||||
|
|
||||||
LOG.info("Region server " + serverName +
|
LOG.info("Region server " + serverName +
|
||||||
": MSG_REPORT_EXITING -- lease cancelled");
|
": MSG_REPORT_EXITING -- lease cancelled");
|
||||||
|
|
||||||
// Get all the regions the server was serving reassigned
|
// Get all the regions the server was serving reassigned
|
||||||
// (if we are not shutting down).
|
// (if we are not shutting down).
|
||||||
|
|
||||||
if (!closed.get()) {
|
if (!closed.get()) {
|
||||||
for (int i = 1; i < msgs.length; i++) {
|
for (int i = 1; i < msgs.length; i++) {
|
||||||
HRegionInfo info = msgs[i].getRegionInfo();
|
HRegionInfo info = msgs[i].getRegionInfo();
|
||||||
|
|
||||||
if (info.tableDesc.getName().equals(ROOT_TABLE_NAME)) {
|
if (info.tableDesc.getName().equals(ROOT_TABLE_NAME)) {
|
||||||
rootRegionLocation.set(null);
|
rootRegionLocation.set(null);
|
||||||
|
|
||||||
} else if (info.tableDesc.getName().equals(META_TABLE_NAME)) {
|
} else if (info.tableDesc.getName().equals(META_TABLE_NAME)) {
|
||||||
onlineMetaRegions.remove(info.getStartKey());
|
onlineMetaRegions.remove(info.getStartKey());
|
||||||
}
|
}
|
||||||
|
@ -1448,14 +1475,11 @@ HMasterRegionInterface {
|
||||||
} else {
|
} else {
|
||||||
LOG.info(info.getServerAddress().toString() + " serving " +
|
LOG.info(info.getServerAddress().toString() + " serving " +
|
||||||
region.regionName);
|
region.regionName);
|
||||||
|
|
||||||
// Remove from unassigned list so we don't assign it to someone else
|
// Remove from unassigned list so we don't assign it to someone else
|
||||||
this.unassignedRegions.remove(region.regionName);
|
this.unassignedRegions.remove(region.regionName);
|
||||||
this.assignAttempts.remove(region.regionName);
|
this.assignAttempts.remove(region.regionName);
|
||||||
|
|
||||||
if (region.regionName.compareTo(
|
if (region.regionName.compareTo(
|
||||||
HGlobals.rootRegionInfo.regionName) == 0) {
|
HGlobals.rootRegionInfo.regionName) == 0) {
|
||||||
|
|
||||||
// Store the Root Region location (in memory)
|
// Store the Root Region location (in memory)
|
||||||
synchronized (rootRegionLocation) {
|
synchronized (rootRegionLocation) {
|
||||||
this.rootRegionLocation.
|
this.rootRegionLocation.
|
||||||
|
@ -2226,30 +2250,22 @@ HMasterRegionInterface {
|
||||||
* root region which is handled specially.
|
* root region which is handled specially.
|
||||||
*/
|
*/
|
||||||
private class PendingOpenReport extends PendingOperation {
|
private class PendingOpenReport extends PendingOperation {
|
||||||
private boolean rootRegion;
|
private final boolean rootRegion;
|
||||||
private HRegionInfo region;
|
private final HRegionInfo region;
|
||||||
private HServerAddress serverAddress;
|
private final HServerAddress serverAddress;
|
||||||
private byte [] startCode;
|
private final byte [] startCode;
|
||||||
|
|
||||||
PendingOpenReport(HServerInfo info, HRegionInfo region)
|
PendingOpenReport(HServerInfo info, HRegionInfo region)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (region.tableDesc.getName().equals(META_TABLE_NAME)) {
|
// If true, the region which just came on-line is a META region.
|
||||||
// The region which just came on-line is a META region.
|
// We need to look in the ROOT region for its information. Otherwise,
|
||||||
// We need to look in the ROOT region for its information.
|
// its just an ordinary region. Look for it in the META table.
|
||||||
|
this.rootRegion = region.tableDesc.getName().equals(META_TABLE_NAME);
|
||||||
this.rootRegion = true;
|
|
||||||
|
|
||||||
} else {
|
|
||||||
// Just an ordinary region. Look for it in the META table.
|
|
||||||
|
|
||||||
this.rootRegion = false;
|
|
||||||
}
|
|
||||||
this.region = region;
|
this.region = region;
|
||||||
this.serverAddress = info.getServerAddress();
|
this.serverAddress = info.getServerAddress();
|
||||||
this.startCode = Writables.longToBytes(info.getStartCode());
|
this.startCode = Writables.longToBytes(info.getStartCode());
|
||||||
}
|
}
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "PendingOpenOperation from " + serverAddress.toString();
|
return "PendingOpenOperation from " + serverAddress.toString();
|
||||||
|
@ -2261,15 +2277,15 @@ HMasterRegionInterface {
|
||||||
if (closed.get()) {
|
if (closed.get()) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
LOG.info(region.getRegionName() + " open on " +
|
LOG.info(region.toString() + " open on " +
|
||||||
this.serverAddress.toString());
|
this.serverAddress.toString());
|
||||||
|
|
||||||
// Register the newly-available Region's location.
|
// Register the newly-available Region's location.
|
||||||
Text metaRegionName;
|
Text metaRegionName;
|
||||||
HRegionInterface server;
|
HRegionInterface server;
|
||||||
if (rootRegion) {
|
if (this.rootRegion) {
|
||||||
if (rootRegionLocation.get() == null || !rootScanned) {
|
if (rootRegionLocation.get() == null || !rootScanned) {
|
||||||
// We can't proceed until the root region is online and has been scanned
|
// We can't proceed until root region is online and scanned
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("root region: " +
|
LOG.debug("root region: " +
|
||||||
((rootRegionLocation.get() != null)?
|
((rootRegionLocation.get() != null)?
|
||||||
|
@ -2283,12 +2299,10 @@ HMasterRegionInterface {
|
||||||
} else {
|
} else {
|
||||||
if (!rootScanned ||
|
if (!rootScanned ||
|
||||||
numberOfMetaRegions.get() != onlineMetaRegions.size()) {
|
numberOfMetaRegions.get() != onlineMetaRegions.size()) {
|
||||||
|
|
||||||
// We can't proceed because not all of the meta regions are online.
|
// We can't proceed because not all of the meta regions are online.
|
||||||
// We can't block either because that would prevent the meta region
|
// We can't block either because that would prevent the meta region
|
||||||
// online message from being processed. So return false to have this
|
// online message from being processed. So return false to have this
|
||||||
// operation requeued.
|
// operation requeued.
|
||||||
|
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Requeuing open because rootScanned: " +
|
LOG.debug("Requeuing open because rootScanned: " +
|
||||||
rootScanned + ", numberOfMetaRegions: " +
|
rootScanned + ", numberOfMetaRegions: " +
|
||||||
|
@ -2298,21 +2312,18 @@ HMasterRegionInterface {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
MetaRegion r = null;
|
MetaRegion r = onlineMetaRegions.containsKey(region.getRegionName())?
|
||||||
if (onlineMetaRegions.containsKey(region.getRegionName())) {
|
onlineMetaRegions.get(region.getRegionName()):
|
||||||
r = onlineMetaRegions.get(region.getRegionName());
|
onlineMetaRegions.get(onlineMetaRegions.
|
||||||
} else {
|
headMap(region.getRegionName()).lastKey());
|
||||||
r = onlineMetaRegions.get(onlineMetaRegions.headMap(
|
|
||||||
region.getRegionName()).lastKey());
|
|
||||||
}
|
|
||||||
metaRegionName = r.getRegionName();
|
metaRegionName = r.getRegionName();
|
||||||
server = connection.getHRegionConnection(r.getServer());
|
server = connection.getHRegionConnection(r.getServer());
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("updating row " + region.getRegionName() + " in table " +
|
LOG.info("updating row " + region.getRegionName() + " in table " +
|
||||||
metaRegionName + " with startcode " +
|
metaRegionName + " with startcode " +
|
||||||
Writables.bytesToLong(this.startCode) + " and server "+
|
Writables.bytesToLong(this.startCode) + " and server "+
|
||||||
serverAddress.toString());
|
serverAddress.toString());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
BatchUpdate b = new BatchUpdate(rand.nextLong());
|
BatchUpdate b = new BatchUpdate(rand.nextLong());
|
||||||
long lockid = b.startUpdate(region.getRegionName());
|
long lockid = b.startUpdate(region.getRegionName());
|
||||||
|
@ -2320,34 +2331,28 @@ HMasterRegionInterface {
|
||||||
Writables.stringToBytes(serverAddress.toString()));
|
Writables.stringToBytes(serverAddress.toString()));
|
||||||
b.put(lockid, COL_STARTCODE, startCode);
|
b.put(lockid, COL_STARTCODE, startCode);
|
||||||
server.batchUpdate(metaRegionName, System.currentTimeMillis(), b);
|
server.batchUpdate(metaRegionName, System.currentTimeMillis(), b);
|
||||||
|
|
||||||
if (region.tableDesc.getName().equals(META_TABLE_NAME)) {
|
if (region.tableDesc.getName().equals(META_TABLE_NAME)) {
|
||||||
// It's a meta region.
|
// It's a meta region.
|
||||||
|
MetaRegion m = new MetaRegion(this.serverAddress,
|
||||||
MetaRegion m =
|
this.region.regionName, this.region.startKey);
|
||||||
new MetaRegion(serverAddress, region.regionName, region.startKey);
|
|
||||||
|
|
||||||
if (!initialMetaScanComplete) {
|
if (!initialMetaScanComplete) {
|
||||||
// Put it on the queue to be scanned for the first time.
|
// Put it on the queue to be scanned for the first time.
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
LOG.debug("Adding " + m.toString() + " to regions to scan");
|
||||||
metaRegionsToScan.put(m);
|
metaRegionsToScan.put(m);
|
||||||
|
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
throw new RuntimeException(
|
throw new RuntimeException(
|
||||||
"Putting into metaRegionsToScan was interrupted.", e);
|
"Putting into metaRegionsToScan was interrupted.", e);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Add it to the online meta regions
|
// Add it to the online meta regions
|
||||||
|
LOG.debug("Adding to onlineMetaRegions: " + m.toString());
|
||||||
onlineMetaRegions.put(region.startKey, m);
|
onlineMetaRegions.put(this.region.startKey, m);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// If updated successfully, remove from pending list.
|
// If updated successfully, remove from pending list.
|
||||||
|
|
||||||
pendingRegions.remove(region.getRegionName());
|
pendingRegions.remove(region.getRegionName());
|
||||||
break;
|
break;
|
||||||
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
if (tries == numRetries - 1) {
|
if (tries == numRetries - 1) {
|
||||||
throw RemoteExceptionHandler.checkIOException(e);
|
throw RemoteExceptionHandler.checkIOException(e);
|
||||||
|
|
|
@ -68,41 +68,12 @@ public class DFSAbort extends HBaseClusterTestCase {
|
||||||
cluster.getDFSCluster().shutdown();
|
cluster.getDFSCluster().shutdown();
|
||||||
// Now wait for Mini HBase Cluster to shut down
|
// Now wait for Mini HBase Cluster to shut down
|
||||||
// cluster.join();
|
// cluster.join();
|
||||||
join();
|
threadDumpingJoin();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
e.printStackTrace();
|
e.printStackTrace();
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void join() {
|
|
||||||
if (this.cluster.regionThreads != null) {
|
|
||||||
synchronized(this.cluster.regionThreads) {
|
|
||||||
for(Thread t: this.cluster.regionThreads) {
|
|
||||||
join(t);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
join(this.cluster.getMasterThread());
|
|
||||||
}
|
|
||||||
|
|
||||||
private void join(final Thread t) {
|
|
||||||
if (t == null) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
for (int i = 0; t.isAlive(); i++) {
|
|
||||||
try {
|
|
||||||
Thread.sleep(1000);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
LOG.info("Continuing...", e);
|
|
||||||
}
|
|
||||||
if (i != 0 && i % 30 == 0) {
|
|
||||||
ReflectionUtils.printThreadInfo(new PrintWriter(System.out),
|
|
||||||
"Automatic Stack Trace every 30 seconds waiting on " +
|
|
||||||
t.getName());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param args unused
|
* @param args unused
|
||||||
|
|
|
@ -19,11 +19,19 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.hbase;
|
package org.apache.hadoop.hbase;
|
||||||
|
|
||||||
|
import java.io.PrintWriter;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.util.ReflectionUtils;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract base class for HBase cluster junit tests. Spins up cluster on
|
* Abstract base class for HBase cluster junit tests. Spins up cluster on
|
||||||
* {@link #setUp()} and takes it down again in {@link #tearDown()}.
|
* {@link #setUp()} and takes it down again in {@link #tearDown()}.
|
||||||
*/
|
*/
|
||||||
public abstract class HBaseClusterTestCase extends HBaseTestCase {
|
public abstract class HBaseClusterTestCase extends HBaseTestCase {
|
||||||
|
private static final Log LOG =
|
||||||
|
LogFactory.getLog(HBaseClusterTestCase.class.getName());
|
||||||
protected MiniHBaseCluster cluster;
|
protected MiniHBaseCluster cluster;
|
||||||
final boolean miniHdfs;
|
final boolean miniHdfs;
|
||||||
int regionServers;
|
int regionServers;
|
||||||
|
@ -69,7 +77,6 @@ public abstract class HBaseClusterTestCase extends HBaseTestCase {
|
||||||
this.regionServers = 1;
|
this.regionServers = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
@Override
|
||||||
protected void setUp() throws Exception {
|
protected void setUp() throws Exception {
|
||||||
super.setUp();
|
super.setUp();
|
||||||
|
@ -77,7 +84,6 @@ public abstract class HBaseClusterTestCase extends HBaseTestCase {
|
||||||
new MiniHBaseCluster(this.conf, this.regionServers, this.miniHdfs);
|
new MiniHBaseCluster(this.conf, this.regionServers, this.miniHdfs);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
|
||||||
@Override
|
@Override
|
||||||
protected void tearDown() throws Exception {
|
protected void tearDown() throws Exception {
|
||||||
super.tearDown();
|
super.tearDown();
|
||||||
|
@ -86,4 +92,41 @@ public abstract class HBaseClusterTestCase extends HBaseTestCase {
|
||||||
}
|
}
|
||||||
HConnectionManager.deleteConnection(conf);
|
HConnectionManager.deleteConnection(conf);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use this utility method debugging why cluster won't go down. On a
|
||||||
|
* period it throws a thread dump. Method ends when all cluster
|
||||||
|
* regionservers and master threads are no long alive.
|
||||||
|
*/
|
||||||
|
public void threadDumpingJoin() {
|
||||||
|
if (this.cluster.regionThreads != null) {
|
||||||
|
synchronized(this.cluster.regionThreads) {
|
||||||
|
for(Thread t: this.cluster.regionThreads) {
|
||||||
|
threadDumpingJoin(t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
threadDumpingJoin(this.cluster.getMasterThread());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void threadDumpingJoin(final Thread t) {
|
||||||
|
if (t == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
long startTime = System.currentTimeMillis();
|
||||||
|
while (t.isAlive()) {
|
||||||
|
try {
|
||||||
|
Thread.sleep(1000);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
LOG.info("Continuing...", e);
|
||||||
|
}
|
||||||
|
if (System.currentTimeMillis() - startTime > 60000) {
|
||||||
|
startTime = System.currentTimeMillis();
|
||||||
|
ReflectionUtils.printThreadInfo(new PrintWriter(System.out),
|
||||||
|
"Automatic Stack Trace every 60 seconds waiting on " +
|
||||||
|
t.getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -21,11 +21,8 @@ package org.apache.hadoop.hbase;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.dfs.MiniDFSCluster;
|
import org.apache.hadoop.dfs.MiniDFSCluster;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
@ -38,22 +35,17 @@ import org.apache.hadoop.io.WritableComparable;
|
||||||
/**
|
/**
|
||||||
* Test HStoreFile
|
* Test HStoreFile
|
||||||
*/
|
*/
|
||||||
public class TestHStoreFile extends TestCase {
|
public class TestHStoreFile extends HBaseTestCase {
|
||||||
static final Log LOG = LogFactory.getLog(TestHStoreFile.class);
|
static final Log LOG = LogFactory.getLog(TestHStoreFile.class);
|
||||||
private static String DIR = "/";
|
private static String DIR = "/";
|
||||||
private static final char FIRST_CHAR = 'a';
|
|
||||||
private static final char LAST_CHAR = 'z';
|
|
||||||
private MiniDFSCluster cluster;
|
private MiniDFSCluster cluster;
|
||||||
private FileSystem fs;
|
private FileSystem fs;
|
||||||
private Configuration conf;
|
|
||||||
private Path dir = null;
|
private Path dir = null;
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
@Override
|
@Override
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
super.setUp();
|
super.setUp();
|
||||||
this.conf = new HBaseConfiguration();
|
|
||||||
this.cluster = null;
|
|
||||||
this.cluster = new MiniDFSCluster(this.conf, 2, true, (String[])null);
|
this.cluster = new MiniDFSCluster(this.conf, 2, true, (String[])null);
|
||||||
this.fs = cluster.getFileSystem();
|
this.fs = cluster.getFileSystem();
|
||||||
this.dir = new Path(DIR, getName());
|
this.dir = new Path(DIR, getName());
|
||||||
|
|
|
@ -30,15 +30,15 @@ import org.apache.hadoop.io.Text;
|
||||||
* Tests region server failover when a region server exits.
|
* Tests region server failover when a region server exits.
|
||||||
*/
|
*/
|
||||||
public class TestRegionServerAbort extends HBaseClusterTestCase {
|
public class TestRegionServerAbort extends HBaseClusterTestCase {
|
||||||
private final Log LOG = LogFactory.getLog(this.getClass().getName());
|
final Log LOG = LogFactory.getLog(this.getClass().getName());
|
||||||
private HTable table;
|
HTable table;
|
||||||
|
|
||||||
/** constructor */
|
/** constructor */
|
||||||
public TestRegionServerAbort() {
|
public TestRegionServerAbort() {
|
||||||
super(2);
|
super(2);
|
||||||
conf.setInt("ipc.client.timeout", 5000); // reduce client timeout
|
conf.setInt("ipc.client.timeout", 10000); // reduce client timeout
|
||||||
conf.setInt("ipc.client.connect.max.retries", 5); // and number of retries
|
conf.setInt("ipc.client.connect.max.retries", 5); // and number of retries
|
||||||
conf.setInt("hbase.client.retries.number", 5); // reduce HBase retries
|
conf.setInt("hbase.client.retries.number", 5); // reduce HBase retries
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -50,14 +50,14 @@ public class TestRegionServerAbort extends HBaseClusterTestCase {
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
|
HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
|
||||||
// Put something into the meta table.
|
// Put something into the meta table.
|
||||||
String tableName = getName();
|
final String tableName = getName();
|
||||||
HTableDescriptor desc = new HTableDescriptor(tableName);
|
HTableDescriptor desc = new HTableDescriptor(tableName);
|
||||||
desc.addFamily(new HColumnDescriptor(HConstants.COLUMN_FAMILY.toString()));
|
desc.addFamily(new HColumnDescriptor(HConstants.COLUMN_FAMILY.toString()));
|
||||||
HBaseAdmin admin = new HBaseAdmin(conf);
|
HBaseAdmin admin = new HBaseAdmin(conf);
|
||||||
admin.createTable(desc);
|
admin.createTable(desc);
|
||||||
// put some values in the table
|
// put some values in the table
|
||||||
this.table = new HTable(conf, new Text(tableName));
|
this.table = new HTable(conf, new Text(tableName));
|
||||||
Text row = new Text("row1");
|
final Text row = new Text("row1");
|
||||||
long lockid = table.startUpdate(row);
|
long lockid = table.startUpdate(row);
|
||||||
table.put(lockid, HConstants.COLUMN_FAMILY,
|
table.put(lockid, HConstants.COLUMN_FAMILY,
|
||||||
tableName.getBytes(HConstants.UTF8_ENCODING));
|
tableName.getBytes(HConstants.UTF8_ENCODING));
|
||||||
|
@ -68,29 +68,45 @@ public class TestRegionServerAbort extends HBaseClusterTestCase {
|
||||||
// Now shutdown the region server and wait for it to go down.
|
// Now shutdown the region server and wait for it to go down.
|
||||||
this.cluster.abortRegionServer(0);
|
this.cluster.abortRegionServer(0);
|
||||||
LOG.info(this.cluster.waitOnRegionServer(0) + " has been shutdown");
|
LOG.info(this.cluster.waitOnRegionServer(0) + " has been shutdown");
|
||||||
HScannerInterface scanner = null;
|
// Run verification in a thread so I can concurrently run a thread-dumper
|
||||||
try {
|
// while we're waiting (because in this test sometimes the meta scanner
|
||||||
// Verify that the client can find the data after the region has moved
|
// looks to be be stuck).
|
||||||
// to a different server
|
Runnable runnable = new Runnable() {
|
||||||
scanner =
|
public void run() {
|
||||||
table.obtainScanner(HConstants.COLUMN_FAMILY_ARRAY, new Text());
|
HScannerInterface scanner = null;
|
||||||
LOG.info("Obtained scanner " + scanner);
|
try {
|
||||||
HStoreKey key = new HStoreKey();
|
// Verify that the client can find the data after the region has moved
|
||||||
TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
|
// to a different server
|
||||||
while (scanner.next(key, results)) {
|
scanner =
|
||||||
assertTrue(key.getRow().equals(row));
|
table.obtainScanner(HConstants.COLUMN_FAMILY_ARRAY, new Text());
|
||||||
assertEquals(1, results.size());
|
LOG.info("Obtained scanner " + scanner);
|
||||||
byte[] bytes = results.get(HConstants.COLUMN_FAMILY);
|
HStoreKey key = new HStoreKey();
|
||||||
assertNotNull(bytes);
|
TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
|
||||||
assertTrue(tableName.equals(new String(bytes,
|
while (scanner.next(key, results)) {
|
||||||
HConstants.UTF8_ENCODING)));
|
assertTrue(key.getRow().equals(row));
|
||||||
|
assertEquals(1, results.size());
|
||||||
|
byte[] bytes = results.get(HConstants.COLUMN_FAMILY);
|
||||||
|
assertNotNull(bytes);
|
||||||
|
assertTrue(tableName.equals(new String(bytes,
|
||||||
|
HConstants.UTF8_ENCODING)));
|
||||||
|
}
|
||||||
|
LOG.info("Success!");
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} finally {
|
||||||
|
if (scanner != null) {
|
||||||
|
LOG.info("Closing scanner " + scanner);
|
||||||
|
try {
|
||||||
|
scanner.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
LOG.info("Success!");
|
};
|
||||||
} finally {
|
Thread t = new Thread(runnable);
|
||||||
if (scanner != null) {
|
t.start();
|
||||||
LOG.info("Closing scanner " + scanner);
|
threadDumpingJoin(t);
|
||||||
scanner.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue