HDFS-4006. TestCheckpoint#testSecondaryHasVeryOutOfDateImage occasionally fails due to unexpected exit. Contributed by Todd Lipcon
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1395387 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e32af6034b
commit
5a5473b29f
|
@ -300,6 +300,9 @@ Release 2.0.3-alpha - Unreleased
|
||||||
|
|
||||||
HDFS-3999. HttpFS OPEN operation expects len parameter, it should be length. (tucu)
|
HDFS-3999. HttpFS OPEN operation expects len parameter, it should be length. (tucu)
|
||||||
|
|
||||||
|
HDFS-4006. TestCheckpoint#testSecondaryHasVeryOutOfDateImage
|
||||||
|
occasionally fails due to unexpected exit. (todd via eli)
|
||||||
|
|
||||||
Release 2.0.2-alpha - 2012-09-07
|
Release 2.0.2-alpha - 2012-09-07
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -78,6 +78,7 @@ import org.apache.hadoop.util.StringUtils;
|
||||||
import org.apache.hadoop.util.Time;
|
import org.apache.hadoop.util.Time;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.collect.ImmutableList;
|
import com.google.common.collect.ImmutableList;
|
||||||
|
|
||||||
/**********************************************************
|
/**********************************************************
|
||||||
|
@ -122,6 +123,8 @@ public class SecondaryNameNode implements Runnable {
|
||||||
private CheckpointConf checkpointConf;
|
private CheckpointConf checkpointConf;
|
||||||
private FSNamesystem namesystem;
|
private FSNamesystem namesystem;
|
||||||
|
|
||||||
|
private Thread checkpointThread;
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
@ -277,6 +280,15 @@ public class SecondaryNameNode implements Runnable {
|
||||||
*/
|
*/
|
||||||
public void shutdown() {
|
public void shutdown() {
|
||||||
shouldRun = false;
|
shouldRun = false;
|
||||||
|
if (checkpointThread != null) {
|
||||||
|
checkpointThread.interrupt();
|
||||||
|
try {
|
||||||
|
checkpointThread.join(10000);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
LOG.info("Interrupted waiting to join on checkpointer thread");
|
||||||
|
Thread.currentThread().interrupt(); // maintain status
|
||||||
|
}
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
if (infoServer != null) infoServer.stop();
|
if (infoServer != null) infoServer.stop();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@ -586,8 +598,16 @@ public class SecondaryNameNode implements Runnable {
|
||||||
terminate(ret);
|
terminate(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a never ending deamon
|
secondary.startCheckpointThread();
|
||||||
Daemon checkpointThread = new Daemon(secondary);
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void startCheckpointThread() {
|
||||||
|
Preconditions.checkState(checkpointThread == null,
|
||||||
|
"Should not already have a thread");
|
||||||
|
Preconditions.checkState(shouldRun, "shouldRun should be true");
|
||||||
|
|
||||||
|
checkpointThread = new Daemon(this);
|
||||||
checkpointThread.start();
|
checkpointThread.start();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,6 +30,8 @@ import static org.junit.Assert.fail;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.lang.management.ManagementFactory;
|
import java.lang.management.ManagementFactory;
|
||||||
|
import java.lang.management.ThreadInfo;
|
||||||
|
import java.lang.management.ThreadMXBean;
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -74,6 +76,7 @@ import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
|
||||||
import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
|
import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
|
||||||
import org.apache.hadoop.util.StringUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
import org.apache.log4j.Level;
|
import org.apache.log4j.Level;
|
||||||
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.mockito.ArgumentMatcher;
|
import org.mockito.ArgumentMatcher;
|
||||||
|
@ -115,6 +118,22 @@ public class TestCheckpoint {
|
||||||
CheckpointFaultInjector.instance = faultInjector;
|
CheckpointFaultInjector.instance = faultInjector;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void checkForSNNThreads() {
|
||||||
|
ThreadMXBean threadBean = ManagementFactory.getThreadMXBean();
|
||||||
|
|
||||||
|
ThreadInfo[] infos = threadBean.getThreadInfo(threadBean.getAllThreadIds(), 20);
|
||||||
|
for (ThreadInfo info : infos) {
|
||||||
|
if (info == null) continue;
|
||||||
|
LOG.info("Check thread: " + info.getThreadName());
|
||||||
|
if (info.getThreadName().contains("SecondaryNameNode")) {
|
||||||
|
fail("Leaked thread: " + info + "\n" +
|
||||||
|
Joiner.on("\n").join(info.getStackTrace()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOG.info("--------");
|
||||||
|
}
|
||||||
|
|
||||||
static void checkFile(FileSystem fileSys, Path name, int repl)
|
static void checkFile(FileSystem fileSys, Path name, int repl)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
assertTrue(fileSys.exists(name));
|
assertTrue(fileSys.exists(name));
|
||||||
|
@ -1738,7 +1757,7 @@ public class TestCheckpoint {
|
||||||
/**
|
/**
|
||||||
* Test that the 2NN triggers a checkpoint after the configurable interval
|
* Test that the 2NN triggers a checkpoint after the configurable interval
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test(timeout=30000)
|
||||||
public void testCheckpointTriggerOnTxnCount() throws Exception {
|
public void testCheckpointTriggerOnTxnCount() throws Exception {
|
||||||
MiniDFSCluster cluster = null;
|
MiniDFSCluster cluster = null;
|
||||||
SecondaryNameNode secondary = null;
|
SecondaryNameNode secondary = null;
|
||||||
|
@ -1752,8 +1771,7 @@ public class TestCheckpoint {
|
||||||
.format(true).build();
|
.format(true).build();
|
||||||
FileSystem fs = cluster.getFileSystem();
|
FileSystem fs = cluster.getFileSystem();
|
||||||
secondary = startSecondaryNameNode(conf);
|
secondary = startSecondaryNameNode(conf);
|
||||||
Thread t = new Thread(secondary);
|
secondary.startCheckpointThread();
|
||||||
t.start();
|
|
||||||
final NNStorage storage = secondary.getFSImage().getStorage();
|
final NNStorage storage = secondary.getFSImage().getStorage();
|
||||||
|
|
||||||
// 2NN should checkpoint at startup
|
// 2NN should checkpoint at startup
|
||||||
|
|
Loading…
Reference in New Issue