From 97579d40197fd37b7b8600339238d69d288cde25 Mon Sep 17 00:00:00 2001
From: Zhihong Yu <tedyu@apache.org>
Date: Wed, 11 Jan 2012 14:58:46 +0000
Subject: [PATCH] HBASE-5163 TestLogRolling#testLogRollOnDatanodeDeath fails
 sometimes on Jenkins or hadoop QA ("The directory is already locked.") (N
 Keywal)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1230077 13f79535-47bb-0310-9956-ffa450edef68
---
 .../regionserver/wal/TestLogRolling.java      | 88 ++++++++-----------
 1 file changed, 39 insertions(+), 49 deletions(-)

diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRolling.java b/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRolling.java
index 6e89cc4715b..f5b859f29f1 100644
--- a/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRolling.java
+++ b/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRolling.java
@@ -19,51 +19,32 @@
  */
 package org.apache.hadoop.hbase.regionserver.wal;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.lang.reflect.InvocationTargetException;
-import java.lang.reflect.Method;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.commons.logging.impl.Log4JLogger;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.*;
-import org.apache.hadoop.hbase.client.HBaseAdmin;
-import org.apache.hadoop.hbase.client.HTable;
-import org.apache.hadoop.hbase.client.Put;
-import org.apache.hadoop.hbase.client.Get;
-import org.apache.hadoop.hbase.client.Result;
-import org.apache.hadoop.hbase.client.ResultScanner;
-import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.*;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
-import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.FSUtils;
-import org.apache.hadoop.hbase.util.JVMClusterUtil;
+import org.apache.hadoop.hbase.util.*;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.server.datanode.DataNode;
 import org.apache.hadoop.hdfs.server.namenode.LeaseManager;
 import org.apache.log4j.Level;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
+import org.junit.*;
 import org.junit.experimental.categories.Category;
 
+import java.io.*;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.*;
+
+import static org.junit.Assert.*;
+
 /**
  * Test log deletion as logs are rolled.
  */
@@ -303,6 +284,7 @@ public class TestLogRolling  {
     return (DatanodeInfo[]) repl;
   }
 
+
   /**
    * Tests that logs are rolled upon detecting datanode death
    * Requires an HDFS jar with HDFS-826 & syncFs() support (HDFS-200)
@@ -313,14 +295,10 @@ public class TestLogRolling  {
    * @throws IllegalArgumentException
     */
   @Test
-  public void testLogRollOnDatanodeDeath() throws IOException,
-      InterruptedException, IllegalArgumentException, IllegalAccessException,
-      InvocationTargetException {
-    assertTrue("This test requires HLog file replication.",
-      fs.getDefaultReplication() > 1);
+  public void testLogRollOnDatanodeDeath() throws Exception {
+    assertTrue("This test requires HLog file replication set to 2.",
+      fs.getDefaultReplication() == 2);
     LOG.info("Replication=" + fs.getDefaultReplication());
-    // When the META table can be opened, the region servers are running
-    new HTable(TEST_UTIL.getConfiguration(), HConstants.META_TABLE_NAME);
 
     this.server = cluster.getRegionServer(0);
     this.log = server.getWAL();
@@ -332,6 +310,7 @@ public class TestLogRolling  {
 
     admin.createTable(desc);
     HTable table = new HTable(TEST_UTIL.getConfiguration(), tableName);
+    assertTrue(table.isAutoFlush());
 
     server = TEST_UTIL.getRSForFirstRegionInTable(Bytes.toBytes(tableName));
     this.log = server.getWAL();
@@ -342,27 +321,35 @@ public class TestLogRolling  {
         .isAppendSupported(TEST_UTIL.getConfiguration()));
 
     // add up the datanode count, to ensure proper replication when we kill 1
+    // This function is synchronous; when it returns, the dfs cluster is active
+    // We start 3 servers and then stop 2 to avoid a directory naming conflict
+    //  when we stop/start a namenode later, as mentioned in HBASE-5163
+    List<DataNode> existingNodes = dfsCluster.getDataNodes();
     dfsCluster
-        .startDataNodes(TEST_UTIL.getConfiguration(), 1, true, null, null);
-    dfsCluster.waitActive();
-    assertTrue(dfsCluster.getDataNodes().size() >= fs.getDefaultReplication() + 1);
+        .startDataNodes(TEST_UTIL.getConfiguration(), 3, true, null, null);
+    for (DataNode dn: existingNodes){
+      dfsCluster.stopDataNode( dn.dnRegistration.getName() );
+    }
+
+    assertTrue(
+      dfsCluster.getDataNodes().size() >= fs.getDefaultReplication() + 1);
 
     writeData(table, 2);
 
-    table.setAutoFlush(true);
-
     long curTime = System.currentTimeMillis();
     long oldFilenum = log.getFilenum();
     assertTrue("Log should have a timestamp older than now",
         curTime > oldFilenum && oldFilenum != -1);
 
-    assertTrue("The log shouldn't have rolled yet", oldFilenum == log.getFilenum());
-    DatanodeInfo[] pipeline = getPipeline(log);
+    assertTrue("The log shouldn't have rolled yet",
+      oldFilenum == log.getFilenum());
+    final DatanodeInfo[] pipeline = getPipeline(log);
     assertTrue(pipeline.length == fs.getDefaultReplication());
 
     // kill a datanode in the pipeline to force a log roll on the next sync()
+    // This function is synchronous, when it returns the node is killed.
     assertTrue(dfsCluster.stopDataNode(pipeline[0].getName()) != null);
-    Thread.sleep(10000);
+
     // this write should succeed, but trigger a log roll
     writeData(table, 2);
     long newFilenum = log.getFilenum();
@@ -372,23 +359,26 @@ public class TestLogRolling  {
 
     // write some more log data (this should use a new hdfs_out)
     writeData(table, 3);
-    assertTrue("The log should not roll again.", log.getFilenum() == newFilenum);
+    assertTrue("The log should not roll again.",
+      log.getFilenum() == newFilenum);
     // kill another datanode in the pipeline, so the replicas will be lower than
     // the configured value 2.
     assertTrue(dfsCluster.stopDataNode(pipeline[1].getName()) != null);
-    Thread.sleep(10000);
+
     batchWriteAndWait(table, 3, false, 10000);
     assertTrue("LowReplication Roller should've been disabled",
         !log.isLowReplicationRollEnabled());
+
     dfsCluster
         .startDataNodes(TEST_UTIL.getConfiguration(), 1, true, null, null);
-    dfsCluster.waitActive();
+
     // Force roll writer. The new log file will have the default replications,
     // and the LowReplication Roller will be enabled.
     log.rollWriter(true);
     batchWriteAndWait(table, 13, true, 10000);
-    assertTrue("New log file should have the default replication",
-        log.getLogReplication() == fs.getDefaultReplication());
+    assertTrue("New log file should have the default replication instead of " +
+      log.getLogReplication(),
+      log.getLogReplication() == fs.getDefaultReplication());
     assertTrue("LowReplication Roller should've been enabled",
         log.isLowReplicationRollEnabled());
   }