HADOOP-2490 Failure in nightly #346
Add one fix and more logging to help diagnose the failures up on hudson. git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@610237 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
450fe7b100
commit
a22bf5e1e2
|
@ -105,6 +105,7 @@ Trunk (unreleased changes)
|
|||
HADOOP-2507 REST servlet does not properly base64 row keys and column names
|
||||
(Bryan Duxbury via Stack)
|
||||
HADOOP-2530 Missing type in new hbase custom RPC serializer
|
||||
HADOOP-2490 Failure in nightly #346 (Added debugging of hudson failures).
|
||||
|
||||
IMPROVEMENTS
|
||||
HADOOP-2401 Add convenience put method that takes writable
|
||||
|
|
|
@ -462,12 +462,17 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
|
|||
!pendingRegions.contains(info.getRegionName())
|
||||
)
|
||||
)
|
||||
) {
|
||||
) {
|
||||
|
||||
// The current assignment is no good
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Current assignment of " + info.getRegionName() +
|
||||
" is no good");
|
||||
" is no good: storedInfo: " + storedInfo + ", startCode: " +
|
||||
startCode + ", storedInfo.startCode: " +
|
||||
((storedInfo != null)? storedInfo.getStartCode(): -1) +
|
||||
", unassignedRegions: " + unassignedRegions.containsKey(info) +
|
||||
", pendingRegions: " +
|
||||
pendingRegions.contains(info.getRegionName()));
|
||||
}
|
||||
// Recover the region server's log if there is one.
|
||||
// This is only done from here if we are restarting and there is stale
|
||||
|
@ -1026,9 +1031,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
|
|||
final String threadName = "HMaster";
|
||||
Thread.currentThread().setName(threadName);
|
||||
startServiceThreads();
|
||||
/*
|
||||
* Main processing loop
|
||||
*/
|
||||
/* Main processing loop */
|
||||
try {
|
||||
for (RegionServerOperation op = null; !closed.get(); ) {
|
||||
if (shutdownRequested && serversToServerInfo.size() == 0) {
|
||||
|
@ -1037,7 +1040,6 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
|
|||
}
|
||||
if (rootRegionLocation.get() != null) {
|
||||
// We can't process server shutdowns unless the root region is online
|
||||
|
||||
op = this.delayedToDoQueue.poll();
|
||||
}
|
||||
if (op == null ) {
|
||||
|
@ -1179,6 +1181,9 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
|
|||
this.closed.set(true);
|
||||
LOG.error("Failed startup", e);
|
||||
}
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Started service threads");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -1057,7 +1057,8 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
|||
*/
|
||||
private MapWritable reportForDuty() throws IOException {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Telling master we are up");
|
||||
LOG.debug("Telling master at " +
|
||||
conf.get(MASTER_ADDRESS) + " that we are up");
|
||||
}
|
||||
// Do initial RPC setup.
|
||||
this.hbaseMaster = (HMasterRegionInterface)HbaseRPC.waitForProxy(
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
package org.apache.hadoop.hbase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
@ -27,6 +28,7 @@ import java.util.List;
|
|||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.util.ReflectionUtils;
|
||||
|
||||
/**
|
||||
* This class creates a single process HBase cluster. One thread is created for
|
||||
|
@ -229,7 +231,10 @@ public class LocalHBaseCluster implements HConstants {
|
|||
if (this.master != null) {
|
||||
while (this.master.isAlive()) {
|
||||
try {
|
||||
this.master.join();
|
||||
// The below has been replaced to debug sometime hangs on end of
|
||||
// tests.
|
||||
// this.master.join():
|
||||
threadDumpingJoin(this.master);
|
||||
} catch(InterruptedException e) {
|
||||
// continue
|
||||
}
|
||||
|
@ -240,6 +245,22 @@ public class LocalHBaseCluster implements HConstants {
|
|||
" " + this.regionThreads.size() + " region server(s)");
|
||||
}
|
||||
|
||||
public void threadDumpingJoin(final Thread t) throws InterruptedException {
|
||||
if (t == null) {
|
||||
return;
|
||||
}
|
||||
long startTime = System.currentTimeMillis();
|
||||
while (t.isAlive()) {
|
||||
Thread.sleep(1000);
|
||||
if (System.currentTimeMillis() - startTime > 60000) {
|
||||
startTime = System.currentTimeMillis();
|
||||
ReflectionUtils.printThreadInfo(new PrintWriter(System.out),
|
||||
"Automatic Stack Trace every 60 seconds waiting on " +
|
||||
t.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Changes <code>hbase.master</code> from 'local' to 'localhost:PORT' in
|
||||
* passed Configuration instance.
|
||||
|
|
|
@ -37,6 +37,11 @@ import org.apache.hadoop.io.WritableComparator;
|
|||
*
|
||||
* <p>Equals considers a Text equal if the TextSequence brackets the same bytes.
|
||||
*
|
||||
* <p>TextSequence will not always work as a Text. For instance, the following
|
||||
* fails <code>Text c = new Text(new TextSequence(new Text("some string")));
|
||||
* </code> because the Text constructor accesses private Text data members
|
||||
* making the new instance from the passed 'Text'.
|
||||
*
|
||||
* <p>TODO: Should this be an Interface as CharSequence is?
|
||||
*/
|
||||
public class TextSequence extends Text {
|
||||
|
|
|
@ -116,4 +116,14 @@
|
|||
<value>/hbase</value>
|
||||
<description>location of HBase instance in dfs</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>hbase.hregion.max.filesize</name>
|
||||
<value>67108864</value>
|
||||
<description>
|
||||
Maximum desired file size for an HRegion. If filesize exceeds
|
||||
value + (value / 2), the HRegion is split in two. Default: 256M.
|
||||
|
||||
Keep the maximum filesize small so we split more often in tests.
|
||||
</description>
|
||||
</property>
|
||||
</configuration>
|
||||
|
|
|
@ -138,7 +138,13 @@ public class StaticTestEnvironment {
|
|||
}
|
||||
|
||||
LOG.info("Shutting down Mini DFS ");
|
||||
cluster.shutdown();
|
||||
try {
|
||||
cluster.shutdown();
|
||||
} catch (Exception e) {
|
||||
/// Can get a java.lang.reflect.UndeclaredThrowableException thrown
|
||||
// here because of an InterruptedException. Don't let exceptions in
|
||||
// here be cause of test failure.
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -122,6 +122,8 @@ public class TestTableMapReduce extends MultiRegionTable {
|
|||
dir = new Path("/hbase");
|
||||
fs.mkdirs(dir);
|
||||
// Start up HBase cluster
|
||||
// Only one region server. MultiRegionServer manufacturing code below
|
||||
// depends on there being one region server only.
|
||||
hCluster = new MiniHBaseCluster(conf, 1, dfsCluster);
|
||||
LOG.info("Master is at " + this.conf.get(HConstants.MASTER_ADDRESS));
|
||||
} catch (Exception e) {
|
||||
|
@ -235,7 +237,8 @@ public class TestTableMapReduce extends MultiRegionTable {
|
|||
}
|
||||
}
|
||||
|
||||
LOG.info("Print table contents before map/reduce");
|
||||
LOG.info("Print table contents before map/reduce for " +
|
||||
SINGLE_REGION_TABLE_NAME);
|
||||
scanTable(SINGLE_REGION_TABLE_NAME, true);
|
||||
|
||||
@SuppressWarnings("deprecation")
|
||||
|
@ -252,19 +255,18 @@ public class TestTableMapReduce extends MultiRegionTable {
|
|||
|
||||
TableReduce.initJob(SINGLE_REGION_TABLE_NAME,
|
||||
IdentityTableReduce.class, jobConf);
|
||||
|
||||
LOG.info("Started " + SINGLE_REGION_TABLE_NAME);
|
||||
JobClient.runJob(jobConf);
|
||||
|
||||
LOG.info("Print table contents after map/reduce for " +
|
||||
SINGLE_REGION_TABLE_NAME);
|
||||
scanTable(SINGLE_REGION_TABLE_NAME, true);
|
||||
|
||||
// verify map-reduce results
|
||||
verify(SINGLE_REGION_TABLE_NAME);
|
||||
} finally {
|
||||
mrCluster.shutdown();
|
||||
}
|
||||
|
||||
LOG.info("Print table contents after map/reduce");
|
||||
scanTable(SINGLE_REGION_TABLE_NAME, true);
|
||||
|
||||
// verify map-reduce results
|
||||
verify(SINGLE_REGION_TABLE_NAME);
|
||||
|
||||
} finally {
|
||||
table.close();
|
||||
}
|
||||
|
@ -307,16 +309,14 @@ public class TestTableMapReduce extends MultiRegionTable {
|
|||
|
||||
TableReduce.initJob(MULTI_REGION_TABLE_NAME,
|
||||
IdentityTableReduce.class, jobConf);
|
||||
|
||||
LOG.info("Started " + MULTI_REGION_TABLE_NAME);
|
||||
JobClient.runJob(jobConf);
|
||||
|
||||
|
||||
// verify map-reduce results
|
||||
verify(MULTI_REGION_TABLE_NAME);
|
||||
} finally {
|
||||
mrCluster.shutdown();
|
||||
}
|
||||
|
||||
// verify map-reduce results
|
||||
verify(MULTI_REGION_TABLE_NAME);
|
||||
|
||||
} finally {
|
||||
table.close();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue