HBASE-5163 TestLogRolling#testLogRollOnDatanodeDeath fails sometimes on Jenkins or hadoop QA ("The directory is already locked.") (N Keywal)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1230077 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Zhihong Yu 2012-01-11 14:58:46 +00:00
parent 15c62ed21a
commit 97579d4019
1 changed files with 39 additions and 49 deletions

View File

@ -19,51 +19,32 @@
*/
package org.apache.hadoop.hbase.regionserver.wal;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.io.EOFException;
import java.io.IOException;
import java.io.OutputStream;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.logging.impl.Log4JLogger;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.JVMClusterUtil;
import org.apache.hadoop.hbase.util.*;
import org.apache.hadoop.hdfs.DFSClient;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
import org.apache.log4j.Level;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.*;
import org.junit.experimental.categories.Category;
import java.io.*;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.*;
import static org.junit.Assert.*;
/**
* Test log deletion as logs are rolled.
*/
@ -303,6 +284,7 @@ public class TestLogRolling {
return (DatanodeInfo[]) repl;
}
/**
* Tests that logs are rolled upon detecting datanode death
* Requires an HDFS jar with HDFS-826 & syncFs() support (HDFS-200)
@ -313,14 +295,10 @@ public class TestLogRolling {
* @throws IllegalArgumentException
*/
@Test
public void testLogRollOnDatanodeDeath() throws IOException,
InterruptedException, IllegalArgumentException, IllegalAccessException,
InvocationTargetException {
assertTrue("This test requires HLog file replication.",
fs.getDefaultReplication() > 1);
public void testLogRollOnDatanodeDeath() throws Exception {
assertTrue("This test requires HLog file replication set to 2.",
fs.getDefaultReplication() == 2);
LOG.info("Replication=" + fs.getDefaultReplication());
// When the META table can be opened, the region servers are running
new HTable(TEST_UTIL.getConfiguration(), HConstants.META_TABLE_NAME);
this.server = cluster.getRegionServer(0);
this.log = server.getWAL();
@ -332,6 +310,7 @@ public class TestLogRolling {
admin.createTable(desc);
HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName);
assertTrue(table.isAutoFlush());
server = TEST_UTIL.getRSForFirstRegionInTable(Bytes.toBytes(tableName));
this.log = server.getWAL();
@ -342,27 +321,35 @@ public class TestLogRolling {
.isAppendSupported(TEST_UTIL.getConfiguration()));
// add up the datanode count, to ensure proper replication when we kill 1
// This function is synchronous; when it returns, the dfs cluster is active
// We start 3 servers and then stop 2 to avoid a directory naming conflict
// when we stop/start a namenode later, as mentioned in HBASE-5163
List<DataNode> existingNodes = dfsCluster.getDataNodes();
dfsCluster
.startDataNodes(TEST_UTIL.getConfiguration(), 1, true, null, null);
dfsCluster.waitActive();
assertTrue(dfsCluster.getDataNodes().size() >= fs.getDefaultReplication() + 1);
.startDataNodes(TEST_UTIL.getConfiguration(), 3, true, null, null);
for (DataNode dn: existingNodes){
dfsCluster.stopDataNode( dn.dnRegistration.getName() );
}
assertTrue(
dfsCluster.getDataNodes().size() >= fs.getDefaultReplication() + 1);
writeData(table, 2);
table.setAutoFlush(true);
long curTime = System.currentTimeMillis();
long oldFilenum = log.getFilenum();
assertTrue("Log should have a timestamp older than now",
curTime > oldFilenum && oldFilenum != -1);
assertTrue("The log shouldn't have rolled yet", oldFilenum == log.getFilenum());
DatanodeInfo[] pipeline = getPipeline(log);
assertTrue("The log shouldn't have rolled yet",
oldFilenum == log.getFilenum());
final DatanodeInfo[] pipeline = getPipeline(log);
assertTrue(pipeline.length == fs.getDefaultReplication());
// kill a datanode in the pipeline to force a log roll on the next sync()
// This function is synchronous, when it returns the node is killed.
assertTrue(dfsCluster.stopDataNode(pipeline[0].getName()) != null);
Thread.sleep(10000);
// this write should succeed, but trigger a log roll
writeData(table, 2);
long newFilenum = log.getFilenum();
@ -372,22 +359,25 @@ public class TestLogRolling {
// write some more log data (this should use a new hdfs_out)
writeData(table, 3);
assertTrue("The log should not roll again.", log.getFilenum() == newFilenum);
assertTrue("The log should not roll again.",
log.getFilenum() == newFilenum);
// kill another datanode in the pipeline, so the replicas will be lower than
// the configured value 2.
assertTrue(dfsCluster.stopDataNode(pipeline[1].getName()) != null);
Thread.sleep(10000);
batchWriteAndWait(table, 3, false, 10000);
assertTrue("LowReplication Roller should've been disabled",
!log.isLowReplicationRollEnabled());
dfsCluster
.startDataNodes(TEST_UTIL.getConfiguration(), 1, true, null, null);
dfsCluster.waitActive();
// Force roll writer. The new log file will have the default replications,
// and the LowReplication Roller will be enabled.
log.rollWriter(true);
batchWriteAndWait(table, 13, true, 10000);
assertTrue("New log file should have the default replication",
assertTrue("New log file should have the default replication instead of " +
log.getLogReplication(),
log.getLogReplication() == fs.getDefaultReplication());
assertTrue("LowReplication Roller should've been enabled",
log.isLowReplicationRollEnabled());