HADOOP-2017 TestRegionServerAbort failure in patch build #903 and

nightly #266


git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@583309 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2007-10-09 21:48:11 +00:00
parent 5233c28aaa
commit c9f105acd5
7 changed files with 200 additions and 172 deletions

View File

@ -72,6 +72,8 @@ Trunk (unreleased changes)
HADOOP-2004 webapp hql formatting bugs HADOOP-2004 webapp hql formatting bugs
HADOOP_2011 Make hbase daemon scripts take args in same order as hadoop HADOOP_2011 Make hbase daemon scripts take args in same order as hadoop
daemon scripts daemon scripts
HADOOP-2017 TestRegionServerAbort failure in patch build #903 and
nightly #266
IMPROVEMENTS IMPROVEMENTS
HADOOP-1737 Make HColumnDescriptor data publically members settable HADOOP-1737 Make HColumnDescriptor data publically members settable

View File

@ -24,7 +24,6 @@ import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.util.Sleeper; import org.apache.hadoop.hbase.util.Sleeper;
import org.apache.hadoop.hbase.util.Threads;
/** /**
* Chore is a task performed on a period in hbase. The chore is run in its own * Chore is a task performed on a period in hbase. The chore is run in its own

View File

@ -205,7 +205,7 @@ HMasterRegionInterface {
HRegionInterface regionServer = null; HRegionInterface regionServer = null;
long scannerId = -1L; long scannerId = -1L;
LOG.info(Thread.currentThread().getName() + " scanning meta region " + LOG.info(Thread.currentThread().getName() + " scanning meta region " +
region.getRegionName() + " on " + region.getServer().toString()); region.toString());
// Array to hold list of split parents found. Scan adds to list. After // Array to hold list of split parents found. Scan adds to list. After
// scan we go check if parents can be removed. // scan we go check if parents can be removed.
@ -247,7 +247,7 @@ HMasterRegionInterface {
} }
numberOfRegionsFound += 1; numberOfRegionsFound += 1;
} }
if (rootRegion) { if (this.rootRegion) {
numberOfMetaRegions.set(numberOfRegionsFound); numberOfMetaRegions.set(numberOfRegionsFound);
} }
} catch (IOException e) { } catch (IOException e) {
@ -282,7 +282,7 @@ HMasterRegionInterface {
} }
} }
LOG.info(Thread.currentThread().getName() + " scan of meta region " + LOG.info(Thread.currentThread().getName() + " scan of meta region " +
region.getRegionName() + " complete"); region.toString() + " complete");
} }
/* /*
@ -405,8 +405,8 @@ HMasterRegionInterface {
} }
protected void checkAssigned(final HRegionInfo info, protected void checkAssigned(final HRegionInfo info,
final String serverName, final long startCode) throws IOException { final String serverName, final long startCode)
throws IOException {
// Skip region - if ... // Skip region - if ...
if(info.offLine // offline if(info.offLine // offline
|| killedRegions.contains(info.regionName) // queued for offline || killedRegions.contains(info.regionName) // queued for offline
@ -415,7 +415,6 @@ HMasterRegionInterface {
assignAttempts.remove(info.regionName); assignAttempts.remove(info.regionName);
return; return;
} }
HServerInfo storedInfo = null; HServerInfo storedInfo = null;
if (serverName.length() != 0) { if (serverName.length() != 0) {
Map<Text, HRegionInfo> regionsToKill = killList.get(serverName); Map<Text, HRegionInfo> regionsToKill = killList.get(serverName);
@ -432,14 +431,17 @@ HMasterRegionInterface {
storedInfo = serversToServerInfo.get(serverName); storedInfo = serversToServerInfo.get(serverName);
} }
} }
if (LOG.isDebugEnabled()) {
LOG.debug("Checking " + info.regionName + " is assigned");
}
if (!(unassignedRegions.containsKey(info.regionName) || if (!(unassignedRegions.containsKey(info.regionName) ||
pendingRegions.contains(info.regionName)) pendingRegions.contains(info.regionName))
&& (storedInfo == null || storedInfo.getStartCode() != startCode)) { && (storedInfo == null || storedInfo.getStartCode() != startCode)) {
// The current assignment is no good // The current assignment is no good
if (LOG.isDebugEnabled()) {
LOG.debug("Current assignment of " + info.regionName + " is no good");
}
// Recover the region server's log if there is one. // Recover the region server's log if there is one.
if (serverName.length() != 0) { if (serverName.length() != 0) {
StringBuilder dirName = new StringBuilder("log_"); StringBuilder dirName = new StringBuilder("log_");
dirName.append(serverName.replace(":", "_")); dirName.append(serverName.replace(":", "_"));
@ -449,20 +451,19 @@ HMasterRegionInterface {
splitLogLock.lock(); splitLogLock.lock();
try { try {
HLog.splitLog(dir, logDir, fs, conf); HLog.splitLog(dir, logDir, fs, conf);
} finally { } finally {
splitLogLock.unlock(); splitLogLock.unlock();
} }
} }
if (LOG.isDebugEnabled()) {
LOG.debug("Split " + logDir.toString());
}
} catch (IOException e) { } catch (IOException e) {
LOG.warn("unable to split region server log because: ", e); LOG.warn("unable to split region server log because: ", e);
throw e; throw e;
} }
} }
// Now get the region assigned // Now get the region assigned
unassignedRegions.put(info.regionName, info); unassignedRegions.put(info.regionName, info);
assignAttempts.put(info.regionName, Long.valueOf(0L)); assignAttempts.put(info.regionName, Long.valueOf(0L));
} }
@ -479,6 +480,7 @@ HMasterRegionInterface {
} }
private void scanRoot() { private void scanRoot() {
boolean succeeded = false;
int tries = 0; int tries = 0;
while (!closed.get() && tries < numRetries) { while (!closed.get() && tries < numRetries) {
synchronized (rootRegionLocation) { synchronized (rootRegionLocation) {
@ -503,6 +505,7 @@ HMasterRegionInterface {
scanRegion(new MetaRegion(rootRegionLocation.get(), scanRegion(new MetaRegion(rootRegionLocation.get(),
HGlobals.rootRegionInfo.regionName, null)); HGlobals.rootRegionInfo.regionName, null));
} }
succeeded = true;
break; break;
} catch (IOException e) { } catch (IOException e) {
e = RemoteExceptionHandler.checkIOException(e); e = RemoteExceptionHandler.checkIOException(e);
@ -511,8 +514,9 @@ HMasterRegionInterface {
LOG.warn("Scan ROOT region", e); LOG.warn("Scan ROOT region", e);
} else { } else {
LOG.error("Scan ROOT region", e); LOG.error("Scan ROOT region", e);
if (tries == numRetries - 1) { if (tries == numRetries - 1) {
// We ran out of tries. Make sure the file system is still available // We ran out of tries. Make sure the file system is still
// available
if (!checkFileSystem()) { if (!checkFileSystem()) {
continue; // Avoid sleeping. continue; // Avoid sleeping.
} }
@ -524,7 +528,13 @@ HMasterRegionInterface {
LOG.error("Unexpected exception", e); LOG.error("Unexpected exception", e);
} }
sleeper.sleep(); sleeper.sleep();
} }
if (!succeeded) {
// We tried numretries to reach root and failed. Is it gone.
// Currently we just flounder. Should we reallocate root?
// This would be catastrophic?
// unassignRootRegion();
}
} }
@Override @Override
@ -564,6 +574,12 @@ HMasterRegionInterface {
this.startKey.set(startKey); this.startKey.set(startKey);
} }
} }
@Override
public String toString() {
return "regionname: " + this.regionName.toString() + ", startKey: <" +
this.startKey.toString() + ">, server: " + this.server.toString() + "}";
}
/** @return the regionName */ /** @return the regionName */
public Text getRegionName() { public Text getRegionName() {
@ -599,23 +615,28 @@ HMasterRegionInterface {
/** {@inheritDoc} */ /** {@inheritDoc} */
public int compareTo(Object o) { public int compareTo(Object o) {
MetaRegion other = (MetaRegion)o; MetaRegion other = (MetaRegion)o;
int result = this.regionName.compareTo(other.getRegionName()); int result = this.regionName.compareTo(other.getRegionName());
if(result == 0) { if(result == 0) {
result = this.startKey.compareTo(other.getStartKey()); result = this.startKey.compareTo(other.getStartKey());
if (result == 0) {
// Might be on different host?
result = this.server.compareTo(other.server);
}
} }
return result; return result;
} }
} }
/** Set by root scanner to indicate the number of meta regions */ /** Set by root scanner to indicate the number of meta regions */
AtomicInteger numberOfMetaRegions; final AtomicInteger numberOfMetaRegions = new AtomicInteger();
/** Work for the meta scanner is queued up here */ /** Work for the meta scanner is queued up here */
BlockingQueue<MetaRegion> metaRegionsToScan; final BlockingQueue<MetaRegion> metaRegionsToScan =
new LinkedBlockingQueue<MetaRegion>();
/** These are the online meta regions */ /** These are the online meta regions */
SortedMap<Text, MetaRegion> onlineMetaRegions; final SortedMap<Text, MetaRegion> onlineMetaRegions =
Collections.synchronizedSortedMap(new TreeMap<Text, MetaRegion>());
/** Set by meta scanner after initial scan */ /** Set by meta scanner after initial scan */
volatile boolean initialMetaScanComplete; volatile boolean initialMetaScanComplete;
@ -657,15 +678,24 @@ HMasterRegionInterface {
e = RemoteExceptionHandler.checkIOException(e); e = RemoteExceptionHandler.checkIOException(e);
tries += 1; tries += 1;
if (tries == 1) { if (tries == 1) {
LOG.warn("Scan one META region", e); LOG.warn("Scan one META region: " + region.toString(), e);
} else { } else {
LOG.error("Scan one META region", e); LOG.error("Scan one META region: " + region.toString(), e);
if (tries == numRetries - 1) { }
// We ran out of tries. Make sure the file system is still // The region may have moved (TestRegionServerAbort, etc.). If
// available // so, either it won't be in the onlineMetaRegions list or its host
if (!checkFileSystem()) { // address has changed and the containsValue will fail. If not
continue; // avoid sleeping // found, best thing to do here is probably break.
} if (!onlineMetaRegions.containsValue(region)) {
LOG.debug("Scanned region is no longer in map of online " +
"regions or its value has changed");
break;
}
if (tries == numRetries - 1) {
// We ran out of tries. Make sure the file system is still
// available
if (!checkFileSystem()) {
continue; // avoid sleeping
} }
} }
} catch (Exception e) { } catch (Exception e) {
@ -673,6 +703,7 @@ HMasterRegionInterface {
// at least log it rather than go out silently. // at least log it rather than go out silently.
LOG.error("Unexpected exception", e); LOG.error("Unexpected exception", e);
} }
// Sleep before going around again.
sleeper.sleep(); sleeper.sleep();
} }
} }
@ -710,9 +741,8 @@ HMasterRegionInterface {
* regions. This wakes up any threads that were waiting for this to happen. * regions. This wakes up any threads that were waiting for this to happen.
*/ */
private synchronized boolean metaRegionsScanned() { private synchronized boolean metaRegionsScanned() {
if (!rootScanned || if (!rootScanned ||
numberOfMetaRegions.get() != onlineMetaRegions.size()) { numberOfMetaRegions.get() != onlineMetaRegions.size()) {
return false; return false;
} }
LOG.info("all meta regions scanned"); LOG.info("all meta regions scanned");
@ -754,14 +784,16 @@ HMasterRegionInterface {
* <p>Items are removed from this list when a region server reports in that * <p>Items are removed from this list when a region server reports in that
* the region has been deployed. * the region has been deployed.
*/ */
SortedMap<Text, HRegionInfo> unassignedRegions; final SortedMap<Text, HRegionInfo> unassignedRegions =
Collections.synchronizedSortedMap(new TreeMap<Text, HRegionInfo>());
/** /**
* The 'assignAttempts' table maps from regions to a timestamp that indicates * The 'assignAttempts' table maps from regions to a timestamp that indicates
* the last time we *tried* to assign the region to a RegionServer. If the * the last time we *tried* to assign the region to a RegionServer. If the
* timestamp is out of date, then we can try to reassign it. * timestamp is out of date, then we can try to reassign it.
*/ */
Map<Text, Long> assignAttempts; final Map<Text, Long> assignAttempts =
Collections.synchronizedMap(new HashMap<Text, Long>());
/** /**
* Regions that have been assigned, and the server has reported that it has * Regions that have been assigned, and the server has reported that it has
@ -897,28 +929,11 @@ HMasterRegionInterface {
this.rootScannerThread = new RootScanner(); this.rootScannerThread = new RootScanner();
// Scans the meta table // Scans the meta table
this.numberOfMetaRegions = new AtomicInteger();
this.metaRegionsToScan = new LinkedBlockingQueue<MetaRegion>();
this.onlineMetaRegions =
Collections.synchronizedSortedMap(new TreeMap<Text, MetaRegion>());
this.initialMetaScanComplete = false; this.initialMetaScanComplete = false;
this.metaScannerThread = new MetaScanner(); this.metaScannerThread = new MetaScanner();
this.unassignedRegions = unassignRootRegion();
Collections.synchronizedSortedMap(new TreeMap<Text, HRegionInfo>());
this.unassignedRegions.put(HGlobals.rootRegionInfo.regionName,
HGlobals.rootRegionInfo);
this.assignAttempts =
Collections.synchronizedMap(new HashMap<Text, Long>());
this.assignAttempts.put(HGlobals.rootRegionInfo.regionName,
Long.valueOf(0L));
this.pendingRegions = this.pendingRegions =
Collections.synchronizedSet(new HashSet<Text>()); Collections.synchronizedSet(new HashSet<Text>());
@ -943,6 +958,22 @@ HMasterRegionInterface {
this.closed.set(false); this.closed.set(false);
LOG.info("HMaster initialized on " + this.address.toString()); LOG.info("HMaster initialized on " + this.address.toString());
} }
/*
* Unassign the root region.
* This method would be used in case where root region server had died
* without reporting in. Currently, we just flounder and never recover. We
* could 'notice' dead region server in root scanner -- if we failed access
* multiple times -- but reassigning root is catastrophic.
*/
void unassignRootRegion() {
this.rootRegionLocation.set(null);
this.unassignedRegions.put(HGlobals.rootRegionInfo.regionName,
HGlobals.rootRegionInfo);
this.assignAttempts.put(HGlobals.rootRegionInfo.regionName,
Long.valueOf(0L));
// TODO: If the old root region server had a log, it needs splitting.
}
/** /**
* Checks to see if the file system is still accessible. * Checks to see if the file system is still accessible.
@ -1271,17 +1302,13 @@ HMasterRegionInterface {
LOG.info("Region server " + serverName + LOG.info("Region server " + serverName +
": MSG_REPORT_EXITING -- lease cancelled"); ": MSG_REPORT_EXITING -- lease cancelled");
// Get all the regions the server was serving reassigned // Get all the regions the server was serving reassigned
// (if we are not shutting down). // (if we are not shutting down).
if (!closed.get()) { if (!closed.get()) {
for (int i = 1; i < msgs.length; i++) { for (int i = 1; i < msgs.length; i++) {
HRegionInfo info = msgs[i].getRegionInfo(); HRegionInfo info = msgs[i].getRegionInfo();
if (info.tableDesc.getName().equals(ROOT_TABLE_NAME)) { if (info.tableDesc.getName().equals(ROOT_TABLE_NAME)) {
rootRegionLocation.set(null); rootRegionLocation.set(null);
} else if (info.tableDesc.getName().equals(META_TABLE_NAME)) { } else if (info.tableDesc.getName().equals(META_TABLE_NAME)) {
onlineMetaRegions.remove(info.getStartKey()); onlineMetaRegions.remove(info.getStartKey());
} }
@ -1448,14 +1475,11 @@ HMasterRegionInterface {
} else { } else {
LOG.info(info.getServerAddress().toString() + " serving " + LOG.info(info.getServerAddress().toString() + " serving " +
region.regionName); region.regionName);
// Remove from unassigned list so we don't assign it to someone else // Remove from unassigned list so we don't assign it to someone else
this.unassignedRegions.remove(region.regionName); this.unassignedRegions.remove(region.regionName);
this.assignAttempts.remove(region.regionName); this.assignAttempts.remove(region.regionName);
if (region.regionName.compareTo( if (region.regionName.compareTo(
HGlobals.rootRegionInfo.regionName) == 0) { HGlobals.rootRegionInfo.regionName) == 0) {
// Store the Root Region location (in memory) // Store the Root Region location (in memory)
synchronized (rootRegionLocation) { synchronized (rootRegionLocation) {
this.rootRegionLocation. this.rootRegionLocation.
@ -2226,30 +2250,22 @@ HMasterRegionInterface {
* root region which is handled specially. * root region which is handled specially.
*/ */
private class PendingOpenReport extends PendingOperation { private class PendingOpenReport extends PendingOperation {
private boolean rootRegion; private final boolean rootRegion;
private HRegionInfo region; private final HRegionInfo region;
private HServerAddress serverAddress; private final HServerAddress serverAddress;
private byte [] startCode; private final byte [] startCode;
PendingOpenReport(HServerInfo info, HRegionInfo region) PendingOpenReport(HServerInfo info, HRegionInfo region)
throws IOException { throws IOException {
if (region.tableDesc.getName().equals(META_TABLE_NAME)) { // If true, the region which just came on-line is a META region.
// The region which just came on-line is a META region. // We need to look in the ROOT region for its information. Otherwise,
// We need to look in the ROOT region for its information. // its just an ordinary region. Look for it in the META table.
this.rootRegion = region.tableDesc.getName().equals(META_TABLE_NAME);
this.rootRegion = true;
} else {
// Just an ordinary region. Look for it in the META table.
this.rootRegion = false;
}
this.region = region; this.region = region;
this.serverAddress = info.getServerAddress(); this.serverAddress = info.getServerAddress();
this.startCode = Writables.longToBytes(info.getStartCode()); this.startCode = Writables.longToBytes(info.getStartCode());
} }
/** {@inheritDoc} */
@Override @Override
public String toString() { public String toString() {
return "PendingOpenOperation from " + serverAddress.toString(); return "PendingOpenOperation from " + serverAddress.toString();
@ -2261,15 +2277,15 @@ HMasterRegionInterface {
if (closed.get()) { if (closed.get()) {
return true; return true;
} }
LOG.info(region.getRegionName() + " open on " + LOG.info(region.toString() + " open on " +
this.serverAddress.toString()); this.serverAddress.toString());
// Register the newly-available Region's location. // Register the newly-available Region's location.
Text metaRegionName; Text metaRegionName;
HRegionInterface server; HRegionInterface server;
if (rootRegion) { if (this.rootRegion) {
if (rootRegionLocation.get() == null || !rootScanned) { if (rootRegionLocation.get() == null || !rootScanned) {
// We can't proceed until the root region is online and has been scanned // We can't proceed until root region is online and scanned
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("root region: " + LOG.debug("root region: " +
((rootRegionLocation.get() != null)? ((rootRegionLocation.get() != null)?
@ -2283,12 +2299,10 @@ HMasterRegionInterface {
} else { } else {
if (!rootScanned || if (!rootScanned ||
numberOfMetaRegions.get() != onlineMetaRegions.size()) { numberOfMetaRegions.get() != onlineMetaRegions.size()) {
// We can't proceed because not all of the meta regions are online. // We can't proceed because not all of the meta regions are online.
// We can't block either because that would prevent the meta region // We can't block either because that would prevent the meta region
// online message from being processed. So return false to have this // online message from being processed. So return false to have this
// operation requeued. // operation requeued.
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("Requeuing open because rootScanned: " + LOG.debug("Requeuing open because rootScanned: " +
rootScanned + ", numberOfMetaRegions: " + rootScanned + ", numberOfMetaRegions: " +
@ -2298,21 +2312,18 @@ HMasterRegionInterface {
return false; return false;
} }
MetaRegion r = null; MetaRegion r = onlineMetaRegions.containsKey(region.getRegionName())?
if (onlineMetaRegions.containsKey(region.getRegionName())) { onlineMetaRegions.get(region.getRegionName()):
r = onlineMetaRegions.get(region.getRegionName()); onlineMetaRegions.get(onlineMetaRegions.
} else { headMap(region.getRegionName()).lastKey());
r = onlineMetaRegions.get(onlineMetaRegions.headMap(
region.getRegionName()).lastKey());
}
metaRegionName = r.getRegionName(); metaRegionName = r.getRegionName();
server = connection.getHRegionConnection(r.getServer()); server = connection.getHRegionConnection(r.getServer());
} }
LOG.info("updating row " + region.getRegionName() + " in table " + LOG.info("updating row " + region.getRegionName() + " in table " +
metaRegionName + " with startcode " + metaRegionName + " with startcode " +
Writables.bytesToLong(this.startCode) + " and server "+ Writables.bytesToLong(this.startCode) + " and server "+
serverAddress.toString()); serverAddress.toString());
try { try {
BatchUpdate b = new BatchUpdate(rand.nextLong()); BatchUpdate b = new BatchUpdate(rand.nextLong());
long lockid = b.startUpdate(region.getRegionName()); long lockid = b.startUpdate(region.getRegionName());
@ -2320,34 +2331,28 @@ HMasterRegionInterface {
Writables.stringToBytes(serverAddress.toString())); Writables.stringToBytes(serverAddress.toString()));
b.put(lockid, COL_STARTCODE, startCode); b.put(lockid, COL_STARTCODE, startCode);
server.batchUpdate(metaRegionName, System.currentTimeMillis(), b); server.batchUpdate(metaRegionName, System.currentTimeMillis(), b);
if (region.tableDesc.getName().equals(META_TABLE_NAME)) { if (region.tableDesc.getName().equals(META_TABLE_NAME)) {
// It's a meta region. // It's a meta region.
MetaRegion m = new MetaRegion(this.serverAddress,
MetaRegion m = this.region.regionName, this.region.startKey);
new MetaRegion(serverAddress, region.regionName, region.startKey);
if (!initialMetaScanComplete) { if (!initialMetaScanComplete) {
// Put it on the queue to be scanned for the first time. // Put it on the queue to be scanned for the first time.
try { try {
LOG.debug("Adding " + m.toString() + " to regions to scan");
metaRegionsToScan.put(m); metaRegionsToScan.put(m);
} catch (InterruptedException e) { } catch (InterruptedException e) {
throw new RuntimeException( throw new RuntimeException(
"Putting into metaRegionsToScan was interrupted.", e); "Putting into metaRegionsToScan was interrupted.", e);
} }
} else { } else {
// Add it to the online meta regions // Add it to the online meta regions
LOG.debug("Adding to onlineMetaRegions: " + m.toString());
onlineMetaRegions.put(region.startKey, m); onlineMetaRegions.put(this.region.startKey, m);
} }
} }
// If updated successfully, remove from pending list. // If updated successfully, remove from pending list.
pendingRegions.remove(region.getRegionName()); pendingRegions.remove(region.getRegionName());
break; break;
} catch (IOException e) { } catch (IOException e) {
if (tries == numRetries - 1) { if (tries == numRetries - 1) {
throw RemoteExceptionHandler.checkIOException(e); throw RemoteExceptionHandler.checkIOException(e);

View File

@ -68,41 +68,12 @@ public class DFSAbort extends HBaseClusterTestCase {
cluster.getDFSCluster().shutdown(); cluster.getDFSCluster().shutdown();
// Now wait for Mini HBase Cluster to shut down // Now wait for Mini HBase Cluster to shut down
// cluster.join(); // cluster.join();
join(); threadDumpingJoin();
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); e.printStackTrace();
throw e; throw e;
} }
} }
private void join() {
if (this.cluster.regionThreads != null) {
synchronized(this.cluster.regionThreads) {
for(Thread t: this.cluster.regionThreads) {
join(t);
}
}
}
join(this.cluster.getMasterThread());
}
private void join(final Thread t) {
if (t == null) {
return;
}
for (int i = 0; t.isAlive(); i++) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
LOG.info("Continuing...", e);
}
if (i != 0 && i % 30 == 0) {
ReflectionUtils.printThreadInfo(new PrintWriter(System.out),
"Automatic Stack Trace every 30 seconds waiting on " +
t.getName());
}
}
}
/** /**
* @param args unused * @param args unused

View File

@ -19,11 +19,19 @@
*/ */
package org.apache.hadoop.hbase; package org.apache.hadoop.hbase;
import java.io.PrintWriter;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.util.ReflectionUtils;
/** /**
* Abstract base class for HBase cluster junit tests. Spins up cluster on * Abstract base class for HBase cluster junit tests. Spins up cluster on
* {@link #setUp()} and takes it down again in {@link #tearDown()}. * {@link #setUp()} and takes it down again in {@link #tearDown()}.
*/ */
public abstract class HBaseClusterTestCase extends HBaseTestCase { public abstract class HBaseClusterTestCase extends HBaseTestCase {
private static final Log LOG =
LogFactory.getLog(HBaseClusterTestCase.class.getName());
protected MiniHBaseCluster cluster; protected MiniHBaseCluster cluster;
final boolean miniHdfs; final boolean miniHdfs;
int regionServers; int regionServers;
@ -69,7 +77,6 @@ public abstract class HBaseClusterTestCase extends HBaseTestCase {
this.regionServers = 1; this.regionServers = 1;
} }
/** {@inheritDoc} */
@Override @Override
protected void setUp() throws Exception { protected void setUp() throws Exception {
super.setUp(); super.setUp();
@ -77,7 +84,6 @@ public abstract class HBaseClusterTestCase extends HBaseTestCase {
new MiniHBaseCluster(this.conf, this.regionServers, this.miniHdfs); new MiniHBaseCluster(this.conf, this.regionServers, this.miniHdfs);
} }
/** {@inheritDoc} */
@Override @Override
protected void tearDown() throws Exception { protected void tearDown() throws Exception {
super.tearDown(); super.tearDown();
@ -86,4 +92,41 @@ public abstract class HBaseClusterTestCase extends HBaseTestCase {
} }
HConnectionManager.deleteConnection(conf); HConnectionManager.deleteConnection(conf);
} }
}
/**
* Use this utility method debugging why cluster won't go down. On a
* period it throws a thread dump. Method ends when all cluster
* regionservers and master threads are no long alive.
*/
public void threadDumpingJoin() {
if (this.cluster.regionThreads != null) {
synchronized(this.cluster.regionThreads) {
for(Thread t: this.cluster.regionThreads) {
threadDumpingJoin(t);
}
}
}
threadDumpingJoin(this.cluster.getMasterThread());
}
public void threadDumpingJoin(final Thread t) {
if (t == null) {
return;
}
long startTime = System.currentTimeMillis();
while (t.isAlive()) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
LOG.info("Continuing...", e);
}
if (System.currentTimeMillis() - startTime > 60000) {
startTime = System.currentTimeMillis();
ReflectionUtils.printThreadInfo(new PrintWriter(System.out),
"Automatic Stack Trace every 60 seconds waiting on " +
t.getName());
}
}
}
}

View File

@ -21,11 +21,8 @@ package org.apache.hadoop.hbase;
import java.io.IOException; import java.io.IOException;
import junit.framework.TestCase;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.dfs.MiniDFSCluster; import org.apache.hadoop.dfs.MiniDFSCluster;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
@ -38,22 +35,17 @@ import org.apache.hadoop.io.WritableComparable;
/** /**
* Test HStoreFile * Test HStoreFile
*/ */
public class TestHStoreFile extends TestCase { public class TestHStoreFile extends HBaseTestCase {
static final Log LOG = LogFactory.getLog(TestHStoreFile.class); static final Log LOG = LogFactory.getLog(TestHStoreFile.class);
private static String DIR = "/"; private static String DIR = "/";
private static final char FIRST_CHAR = 'a';
private static final char LAST_CHAR = 'z';
private MiniDFSCluster cluster; private MiniDFSCluster cluster;
private FileSystem fs; private FileSystem fs;
private Configuration conf;
private Path dir = null; private Path dir = null;
/** {@inheritDoc} */ /** {@inheritDoc} */
@Override @Override
public void setUp() throws Exception { public void setUp() throws Exception {
super.setUp(); super.setUp();
this.conf = new HBaseConfiguration();
this.cluster = null;
this.cluster = new MiniDFSCluster(this.conf, 2, true, (String[])null); this.cluster = new MiniDFSCluster(this.conf, 2, true, (String[])null);
this.fs = cluster.getFileSystem(); this.fs = cluster.getFileSystem();
this.dir = new Path(DIR, getName()); this.dir = new Path(DIR, getName());

View File

@ -30,15 +30,15 @@ import org.apache.hadoop.io.Text;
* Tests region server failover when a region server exits. * Tests region server failover when a region server exits.
*/ */
public class TestRegionServerAbort extends HBaseClusterTestCase { public class TestRegionServerAbort extends HBaseClusterTestCase {
private final Log LOG = LogFactory.getLog(this.getClass().getName()); final Log LOG = LogFactory.getLog(this.getClass().getName());
private HTable table; HTable table;
/** constructor */ /** constructor */
public TestRegionServerAbort() { public TestRegionServerAbort() {
super(2); super(2);
conf.setInt("ipc.client.timeout", 5000); // reduce client timeout conf.setInt("ipc.client.timeout", 10000); // reduce client timeout
conf.setInt("ipc.client.connect.max.retries", 5); // and number of retries conf.setInt("ipc.client.connect.max.retries", 5); // and number of retries
conf.setInt("hbase.client.retries.number", 5); // reduce HBase retries conf.setInt("hbase.client.retries.number", 5); // reduce HBase retries
} }
/** /**
@ -50,14 +50,14 @@ public class TestRegionServerAbort extends HBaseClusterTestCase {
@SuppressWarnings("unused") @SuppressWarnings("unused")
HTable meta = new HTable(conf, HConstants.META_TABLE_NAME); HTable meta = new HTable(conf, HConstants.META_TABLE_NAME);
// Put something into the meta table. // Put something into the meta table.
String tableName = getName(); final String tableName = getName();
HTableDescriptor desc = new HTableDescriptor(tableName); HTableDescriptor desc = new HTableDescriptor(tableName);
desc.addFamily(new HColumnDescriptor(HConstants.COLUMN_FAMILY.toString())); desc.addFamily(new HColumnDescriptor(HConstants.COLUMN_FAMILY.toString()));
HBaseAdmin admin = new HBaseAdmin(conf); HBaseAdmin admin = new HBaseAdmin(conf);
admin.createTable(desc); admin.createTable(desc);
// put some values in the table // put some values in the table
this.table = new HTable(conf, new Text(tableName)); this.table = new HTable(conf, new Text(tableName));
Text row = new Text("row1"); final Text row = new Text("row1");
long lockid = table.startUpdate(row); long lockid = table.startUpdate(row);
table.put(lockid, HConstants.COLUMN_FAMILY, table.put(lockid, HConstants.COLUMN_FAMILY,
tableName.getBytes(HConstants.UTF8_ENCODING)); tableName.getBytes(HConstants.UTF8_ENCODING));
@ -68,29 +68,45 @@ public class TestRegionServerAbort extends HBaseClusterTestCase {
// Now shutdown the region server and wait for it to go down. // Now shutdown the region server and wait for it to go down.
this.cluster.abortRegionServer(0); this.cluster.abortRegionServer(0);
LOG.info(this.cluster.waitOnRegionServer(0) + " has been shutdown"); LOG.info(this.cluster.waitOnRegionServer(0) + " has been shutdown");
HScannerInterface scanner = null; // Run verification in a thread so I can concurrently run a thread-dumper
try { // while we're waiting (because in this test sometimes the meta scanner
// Verify that the client can find the data after the region has moved // looks to be be stuck).
// to a different server Runnable runnable = new Runnable() {
scanner = public void run() {
table.obtainScanner(HConstants.COLUMN_FAMILY_ARRAY, new Text()); HScannerInterface scanner = null;
LOG.info("Obtained scanner " + scanner); try {
HStoreKey key = new HStoreKey(); // Verify that the client can find the data after the region has moved
TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>(); // to a different server
while (scanner.next(key, results)) { scanner =
assertTrue(key.getRow().equals(row)); table.obtainScanner(HConstants.COLUMN_FAMILY_ARRAY, new Text());
assertEquals(1, results.size()); LOG.info("Obtained scanner " + scanner);
byte[] bytes = results.get(HConstants.COLUMN_FAMILY); HStoreKey key = new HStoreKey();
assertNotNull(bytes); TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
assertTrue(tableName.equals(new String(bytes, while (scanner.next(key, results)) {
HConstants.UTF8_ENCODING))); assertTrue(key.getRow().equals(row));
assertEquals(1, results.size());
byte[] bytes = results.get(HConstants.COLUMN_FAMILY);
assertNotNull(bytes);
assertTrue(tableName.equals(new String(bytes,
HConstants.UTF8_ENCODING)));
}
LOG.info("Success!");
} catch (IOException e) {
e.printStackTrace();
} finally {
if (scanner != null) {
LOG.info("Closing scanner " + scanner);
try {
scanner.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
} }
LOG.info("Success!"); };
} finally { Thread t = new Thread(runnable);
if (scanner != null) { t.start();
LOG.info("Closing scanner " + scanner); threadDumpingJoin(t);
scanner.close();
}
}
} }
} }