HADOOP-2490 Failure in nightly #346

Add one fix and more logging to help diagnose the failures up on hudson.


git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@610237 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2008-01-09 00:07:16 +00:00
parent 450fe7b100
commit a22bf5e1e2
8 changed files with 73 additions and 24 deletions

View File

@ -105,6 +105,7 @@ Trunk (unreleased changes)
HADOOP-2507 REST servlet does not properly base64 row keys and column names
(Bryan Duxbury via Stack)
HADOOP-2530 Missing type in new hbase custom RPC serializer
HADOOP-2490 Failure in nightly #346 (Added debugging of hudson failures).
IMPROVEMENTS
HADOOP-2401 Add convenience put method that takes writable

View File

@ -462,12 +462,17 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
!pendingRegions.contains(info.getRegionName())
)
)
) {
) {
// The current assignment is no good
if (LOG.isDebugEnabled()) {
LOG.debug("Current assignment of " + info.getRegionName() +
" is no good");
" is no good: storedInfo: " + storedInfo + ", startCode: " +
startCode + ", storedInfo.startCode: " +
((storedInfo != null)? storedInfo.getStartCode(): -1) +
", unassignedRegions: " + unassignedRegions.containsKey(info) +
", pendingRegions: " +
pendingRegions.contains(info.getRegionName()));
}
// Recover the region server's log if there is one.
// This is only done from here if we are restarting and there is stale
@ -1026,9 +1031,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
final String threadName = "HMaster";
Thread.currentThread().setName(threadName);
startServiceThreads();
/*
* Main processing loop
*/
/* Main processing loop */
try {
for (RegionServerOperation op = null; !closed.get(); ) {
if (shutdownRequested && serversToServerInfo.size() == 0) {
@ -1037,7 +1040,6 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
}
if (rootRegionLocation.get() != null) {
// We can't process server shutdowns unless the root region is online
op = this.delayedToDoQueue.poll();
}
if (op == null ) {
@ -1179,6 +1181,9 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
this.closed.set(true);
LOG.error("Failed startup", e);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Started service threads");
}
}
/*

View File

@ -1057,7 +1057,8 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
*/
private MapWritable reportForDuty() throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("Telling master we are up");
LOG.debug("Telling master at " +
conf.get(MASTER_ADDRESS) + " that we are up");
}
// Do initial RPC setup.
this.hbaseMaster = (HMasterRegionInterface)HbaseRPC.waitForProxy(

View File

@ -20,6 +20,7 @@
package org.apache.hadoop.hbase;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
@ -27,6 +28,7 @@ import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ReflectionUtils;
/**
* This class creates a single process HBase cluster. One thread is created for
@ -229,7 +231,10 @@ public class LocalHBaseCluster implements HConstants {
if (this.master != null) {
while (this.master.isAlive()) {
try {
this.master.join();
// The below has been replaced to debug sometime hangs on end of
// tests.
// this.master.join():
threadDumpingJoin(this.master);
} catch(InterruptedException e) {
// continue
}
@ -240,6 +245,22 @@ public class LocalHBaseCluster implements HConstants {
" " + this.regionThreads.size() + " region server(s)");
}
public void threadDumpingJoin(final Thread t) throws InterruptedException {
if (t == null) {
return;
}
long startTime = System.currentTimeMillis();
while (t.isAlive()) {
Thread.sleep(1000);
if (System.currentTimeMillis() - startTime > 60000) {
startTime = System.currentTimeMillis();
ReflectionUtils.printThreadInfo(new PrintWriter(System.out),
"Automatic Stack Trace every 60 seconds waiting on " +
t.getName());
}
}
}
/**
* Changes <code>hbase.master</code> from 'local' to 'localhost:PORT' in
* passed Configuration instance.

View File

@ -37,6 +37,11 @@ import org.apache.hadoop.io.WritableComparator;
*
* <p>Equals considers a Text equal if the TextSequence brackets the same bytes.
*
* <p>TextSequence will not always work as a Text. For instance, the following
* fails <code>Text c = new Text(new TextSequence(new Text("some string")));
* </code> because the Text constructor accesses private Text data members
* making the new instance from the passed 'Text'.
*
* <p>TODO: Should this be an Interface as CharSequence is?
*/
public class TextSequence extends Text {

View File

@ -116,4 +116,14 @@
<value>/hbase</value>
<description>location of HBase instance in dfs</description>
</property>
<property>
<name>hbase.hregion.max.filesize</name>
<value>67108864</value>
<description>
Maximum desired file size for an HRegion. If filesize exceeds
value + (value / 2), the HRegion is split in two. Default: 256M.
Keep the maximum filesize small so we split more often in tests.
</description>
</property>
</configuration>

View File

@ -138,7 +138,13 @@ public class StaticTestEnvironment {
}
LOG.info("Shutting down Mini DFS ");
cluster.shutdown();
try {
cluster.shutdown();
} catch (Exception e) {
/// Can get a java.lang.reflect.UndeclaredThrowableException thrown
// here because of an InterruptedException. Don't let exceptions in
// here be cause of test failure.
}
}
}
}

View File

@ -122,6 +122,8 @@ public class TestTableMapReduce extends MultiRegionTable {
dir = new Path("/hbase");
fs.mkdirs(dir);
// Start up HBase cluster
// Only one region server. MultiRegionServer manufacturing code below
// depends on there being one region server only.
hCluster = new MiniHBaseCluster(conf, 1, dfsCluster);
LOG.info("Master is at " + this.conf.get(HConstants.MASTER_ADDRESS));
} catch (Exception e) {
@ -235,7 +237,8 @@ public class TestTableMapReduce extends MultiRegionTable {
}
}
LOG.info("Print table contents before map/reduce");
LOG.info("Print table contents before map/reduce for " +
SINGLE_REGION_TABLE_NAME);
scanTable(SINGLE_REGION_TABLE_NAME, true);
@SuppressWarnings("deprecation")
@ -252,19 +255,18 @@ public class TestTableMapReduce extends MultiRegionTable {
TableReduce.initJob(SINGLE_REGION_TABLE_NAME,
IdentityTableReduce.class, jobConf);
LOG.info("Started " + SINGLE_REGION_TABLE_NAME);
JobClient.runJob(jobConf);
LOG.info("Print table contents after map/reduce for " +
SINGLE_REGION_TABLE_NAME);
scanTable(SINGLE_REGION_TABLE_NAME, true);
// verify map-reduce results
verify(SINGLE_REGION_TABLE_NAME);
} finally {
mrCluster.shutdown();
}
LOG.info("Print table contents after map/reduce");
scanTable(SINGLE_REGION_TABLE_NAME, true);
// verify map-reduce results
verify(SINGLE_REGION_TABLE_NAME);
} finally {
table.close();
}
@ -307,16 +309,14 @@ public class TestTableMapReduce extends MultiRegionTable {
TableReduce.initJob(MULTI_REGION_TABLE_NAME,
IdentityTableReduce.class, jobConf);
LOG.info("Started " + MULTI_REGION_TABLE_NAME);
JobClient.runJob(jobConf);
// verify map-reduce results
verify(MULTI_REGION_TABLE_NAME);
} finally {
mrCluster.shutdown();
}
// verify map-reduce results
verify(MULTI_REGION_TABLE_NAME);
} finally {
table.close();
}