HBASE-2437 Refactor HLog splitLog

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@950174 13f79535-47bb-0310-9956-ffa450edef68
2010-06-01 18:01:53 +00:00 · 2010-06-01 18:01:53 +00:00 · dacf350835
commit dacf350835
parent edf52b76df
7 changed files with 1185 additions and 372 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -654,6 +654,7 @@ Release 0.21.0 - Unreleased
   HBASE-2630  HFile should use toStringBinary in various places
   HBASE-2632  Shell should autodetect terminal width
   HBASE-2636  Upgrade Jetty to 6.1.24
   HBASE-2437  Refactor HLog splitLog (Cosmin Lehene via Stack)
  NEW FEATURES
   HBASE-1961  HBase EC2 scripts
--- a/pom.xml
+++ b/pom.xml
@ -448,6 +448,7 @@
    <slf4j.version>1.5.8</slf4j.version>
    <stax-api>1.0.1</stax-api>
    <thrift.version>0.2.0</thrift.version>
    <guava.version>r03</guava.version>
  </properties>
  <dependencyManagement>
@ -697,10 +698,15 @@
      <version>${commons-math.version}</version>
      <scope>test</scope>
    </dependency>
-        <dependency>
+     <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-test</artifactId>
    </dependency>
     <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
       <version>${guava.version}</version>
    </dependency>
  </dependencies>
  <!--
--- a/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
+++ b/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
@ -30,6 +30,8 @@ import java.lang.reflect.Method;
 import java.net.URLEncoder;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.ListIterator;
@ -37,9 +39,12 @@ import java.util.Map;
 import java.util.SortedMap;
 import java.util.TreeMap;
 import java.util.TreeSet;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ConcurrentSkipListMap;
-import java.util.concurrent.ExecutorService;
+import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 import java.util.concurrent.ThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
@ -73,6 +78,9 @@ import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
 import org.apache.hadoop.hdfs.protocol.FSConstants;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Writable;
 import com.google.common.util.concurrent.NamingThreadFactory;
 import static org.apache.hadoop.hbase.util.FSUtils.recoverFileLease;
 /**
 * HLog stores all the edits to the HStore.  Its the hbase write-ahead-log
@ -131,8 +139,9 @@ public class HLog implements HConstants, Syncable {
  private final List<LogActionsListener> actionListeners =
      Collections.synchronizedList(new ArrayList<LogActionsListener>());
-  private static Class logWriterClass;
+
-  private static Class logReaderClass;
+  private static Class<? extends Writer> logWriterClass;
  private static Class<? extends Reader> logReaderClass;
  private OutputStream hdfs_out;     // OutputStream associated with the current SequenceFile.writer
  private int initialReplication;    // initial replication factor of SequenceFile.writer
@ -484,23 +493,23 @@ public class HLog implements HConstants, Syncable {
   * @return A WAL reader.  Close when done with it.
   * @throws IOException
   */
  @SuppressWarnings("unchecked")
  public static Reader getReader(final FileSystem fs,
    final Path path, Configuration conf)
  throws IOException {
    try {
      if (logReaderClass == null) {
-        logReaderClass = Class.forName(conf.get("hbase.regionserver.hlog.reader.impl",
+        logReaderClass =conf.getClass("hbase.regionserver.hlog.reader.impl",
-        SequenceFileLogReader.class.getCanonicalName()));
+                SequenceFileLogReader.class, Reader.class);
      }
-      HLog.Reader reader = (HLog.Reader) logReaderClass.newInstance();
+      HLog.Reader reader = logReaderClass.newInstance();
      reader.init(fs, path, conf);
      return reader;
-    } catch (Exception e) {
+    } catch (IOException e) {
-      IOException ie = new IOException("cannot get log reader");
+      throw e;
-      ie.initCause(e);
+    }
-      throw ie;
+    catch (Exception e) {
      throw new IOException("Cannot get log reader", e);
    }
  }
@ -511,14 +520,13 @@ public class HLog implements HConstants, Syncable {
   * @return A WAL writer.  Close when done with it.
   * @throws IOException
   */
  @SuppressWarnings("unchecked")
  public static Writer createWriter(final FileSystem fs,
      final Path path, Configuration conf)
  throws IOException {
    try {
      if (logWriterClass == null) {
-        logWriterClass = Class.forName(conf.get("hbase.regionserver.hlog.writer.impl",
+        logWriterClass = conf.getClass("hbase.regionserver.hlog.writer.impl",
-        SequenceFileLogWriter.class.getCanonicalName()));
+                SequenceFileLogWriter.class, Writer.class);
      }
      HLog.Writer writer = (HLog.Writer) logWriterClass.newInstance();
      writer.init(fs, path, conf);
@ -1145,10 +1153,11 @@ public class HLog implements HConstants, Syncable {
   * @param rootDir qualified root directory of the HBase instance
   * @param srcDir Directory of log files to split: e.g.
   *                <code>${ROOTDIR}/log_HOST_PORT</code>
-   * @param oldLogDir
+   * @param oldLogDir directory where processed (split) logs will be archived to
   * @param fs FileSystem
   * @param conf Configuration
-   * @throws IOException
+   * @throws IOException will throw if corrupted hlogs aren't tolerated
   * @return the list of splits
   */
  public static List<Path> splitLog(final Path rootDir, final Path srcDir,
    Path oldLogDir, final FileSystem fs, final Configuration conf)
@ -1167,18 +1176,10 @@ public class HLog implements HConstants, Syncable {
    }
    LOG.info("Splitting " + logfiles.length + " hlog(s) in " +
      srcDir.toString());
-    splits = splitLog(rootDir, oldLogDir, logfiles, fs, conf);
+    splits = splitLog(rootDir, srcDir, oldLogDir, logfiles, fs, conf);
    try {
-      FileStatus[] files = fs.listStatus(srcDir);
+      LOG.info("Spliting is done. Removing old log dir "+srcDir);
-      for(FileStatus file : files) {
+      fs.delete(srcDir, false);
        Path newPath = getHLogArchivePath(oldLogDir, file.getPath());
        LOG.debug("Moving " +  FSUtils.getPath(file.getPath()) + " to " +
                   FSUtils.getPath(newPath));
        fs.rename(file.getPath(), newPath);
      }
      LOG.debug("Moved " + files.length + " log files to " +
        FSUtils.getPath(oldLogDir));
      fs.delete(srcDir, true);
    } catch (IOException e) {
      e = RemoteExceptionHandler.checkIOException(e);
      IOException io = new IOException("Cannot delete: " + srcDir);
@ -1218,313 +1219,99 @@ public class HLog implements HConstants, Syncable {
    }
  }
-  /*
+  /**
-   * @param rootDir
+   * Sorts the HLog edits in the given list of logfiles (that are a mix of edits on multiple regions)
-   * @param logfiles
+   * by region and then splits them per region directories, in batches of (hbase.hlog.split.batch.size)
   *
   * A batch consists of a set of log files that will be sorted in a single map of edits indexed by region
   * the resulting map will be concurrently written by multiple threads to their corresponding regions
   *
   * Each batch consists of more more log files that are
   *  - recovered (files is opened for append then closed to ensure no process is writing into it)
   *  - parsed (each edit in the log is appended to a list of edits indexed by region
   *    see {@link #parseHLog} for more details)
   *  - marked as either processed or corrupt depending on parsing outcome
   *  - the resulting edits indexed by region are concurrently written to their corresponding region
   *    region directories
   *  - original files are then archived to a different directory
   *
   *
   *
   * @param rootDir  hbase directory
   * @param srcDir   logs directory
   * @param oldLogDir directory where processed logs are archived to
   * @param logfiles the list of log files to split
   * @param fs
   * @param conf
   * @return
   * @throws IOException
   * @return List of splits made.
   */
-  private static List<Path> splitLog(final Path rootDir,
+  private static List<Path> splitLog(final Path rootDir, final Path srcDir,
    Path oldLogDir, final FileStatus[] logfiles, final FileSystem fs,
    final Configuration conf)
  throws IOException {
    List<Path> processedLogs = new ArrayList<Path>();
    List<Path> corruptedLogs = new ArrayList<Path>();
    final Map<byte [], WriterAndPath> logWriters =
      Collections.synchronizedMap(
        new TreeMap<byte [], WriterAndPath>(Bytes.BYTES_COMPARATOR));
    List<Path> splits = null;
-    // Number of threads to use when log splitting to rewrite the logs.
+    // Number of logs in a read batch
-    // More means faster but bigger mem consumption.
+    // More means faster but bigger mem consumption
-    int logWriterThreads =
+    //TODO make a note on the conf rename and update hbase-site.xml if needed
      conf.getInt("hbase.regionserver.hlog.splitlog.writer.threads", 3);
    // Number of logs to read into memory before writing to their appropriate
    // regions when log splitting.  More means faster but bigger mem consumption
    int logFilesPerStep =
-      conf.getInt("hbase.regionserver.hlog.splitlog.reader.threads", 3);
+      conf.getInt("hbase.hlog.split.batch.size", 3);
     boolean skipErrors = conf.getBoolean("hbase.hlog.split.skip.errors", false);
    // append support = we can avoid data loss (yay)
    // we open for append, then close to recover the correct file length
    final boolean appendSupport = isAppend(conf);
    // store corrupt logs for post-mortem analysis (empty string = discard)
    final String corruptDir =
      conf.get("hbase.regionserver.hlog.splitlog.corrupt.dir", ".corrupt");
    List<Path> finishedFiles = new LinkedList<Path>();
    List<Path> corruptFiles = new LinkedList<Path>();
    try {
-      int maxSteps = Double.valueOf(Math.ceil((logfiles.length * 1.0) /
+      int i = -1;
-          logFilesPerStep)).intValue();
+      while (i < logfiles.length) {
-      for (int step = 0; step < maxSteps; step++) {
+        final Map<byte[], LinkedList<Entry>> editsByRegion =
-
+          new TreeMap<byte[], LinkedList<Entry>>(Bytes.BYTES_COMPARATOR);
-        // Step 1: read N log files into memory
+        for (int j = 0; j < logFilesPerStep; j++) {
-        final Map<byte[], LinkedList<HLog.Entry>> logEntries =
+          i++;
-          new TreeMap<byte[], LinkedList<HLog.Entry>>(Bytes.BYTES_COMPARATOR);
+          if (i == logfiles.length) {
-        int endIndex = step == maxSteps - 1? logfiles.length:
+            break;
          step * logFilesPerStep + logFilesPerStep;
        for (int i = (step * logFilesPerStep); i < endIndex; i++) {
          Path curLogFile = logfiles[i].getPath();
          // make sure we get the right file length before opening for read
          recoverLog(fs, curLogFile, appendSupport);
          long length = fs.getFileStatus(curLogFile).getLen();
          if (LOG.isDebugEnabled()) {
            LOG.debug("Splitting hlog " + (i + 1) + " of " + logfiles.length +
              ": " + curLogFile + ", length=" + length);
          }
-
+          FileStatus log = logfiles[i];
-          Reader in = null;
+          Path logPath = log.getPath();
-          boolean cleanRead = false;
+          long logLength = log.getLen();
-          int count = 0;
+          LOG.debug("Splitting hlog " + (i + 1) + " of " + logfiles.length +
            ": " + logPath + ", length=" + logLength );
          try {
-            in = HLog.getReader(fs, curLogFile, conf);
+            recoverFileLease(fs, logPath, conf);
-            try {
+            parseHLog(log, editsByRegion, fs, conf);
-              Entry entry;
+            processedLogs.add(logPath);
-              while ((entry = in.next()) != null) {
+           } catch (IOException e) {
-                byte [] regionName = entry.getKey().getRegionName();
+             if (skipErrors) {
-                LinkedList<HLog.Entry> queue = logEntries.get(regionName);
+               LOG.warn("Got while parsing hlog " + logPath +
-                if (queue == null) {
+                 ". Marking as corrupted", e);
-                  queue = new LinkedList<HLog.Entry>();
+               corruptedLogs.add(logPath);
-                  LOG.debug("Adding queue for " + Bytes.toStringBinary(regionName));
+             } else {
-                  logEntries.put(regionName, queue);
+               throw e;
-                }
+             }
                queue.push(entry);
                count++;
              }
              LOG.debug("Pushed=" + count + " entries from " + curLogFile);
              cleanRead = true;
            } catch (IOException e) {
              LOG.debug("IOE Pushed=" + count + " entries from " + curLogFile);
              e = RemoteExceptionHandler.checkIOException(e);
              if (!(e instanceof EOFException)) {
                String msg = "Exception processing " + curLogFile +
                             " -- continuing. Possible DATA LOSS!";
                if (corruptDir.length() > 0) {
                  msg += "  Storing in hlog corruption directory.";
                }
                LOG.warn(msg, e);
              }
            }
          } catch (IOException e) {
            if (length <= 0) {
              LOG.warn("Empty hlog, continuing: " + logfiles[i]);
              cleanRead = true;
              continue;
            }
            throw e;
          } finally {
            try {
              if (in != null) {
                in.close();
              }
            } catch (IOException e) {
              LOG.warn("File.close() threw exception -- continuing, "
                     + "but marking file as corrupt.", e);
              cleanRead = false;
            }
            if (cleanRead) {
              finishedFiles.add(curLogFile);
            } else {
              corruptFiles.add(curLogFile);
            }
            /* TODO FOR J-D REVIEW
            // Archive the input file now so we do not replay edits. We could
            // have gotten here because of an exception. If so, probably
            // nothing we can do about it. Replaying it, it could work but we
            // could be stuck replaying for ever. Just continue though we
            // could have lost some edits.
            fs.rename(logfiles[i].getPath(),
                getHLogArchivePath(oldLogDir, logfiles[i].getPath()));
            */
          }
        }
-
+        writeEditsBatchToRegions(editsByRegion, logWriters, rootDir, fs, conf);
        // Step 2: Some regionserver log files have been read into memory.
        //         Assign them to the appropriate region directory.
        class ThreadWithException extends Thread {
          ThreadWithException(String name) { super(name); }
          public IOException exception = null;
        }
        List<ThreadWithException> threadList =
          new ArrayList<ThreadWithException>(logEntries.size());
        ExecutorService threadPool =
          Executors.newFixedThreadPool(logWriterThreads);
        for (final byte [] region: logEntries.keySet()) {
          ThreadWithException thread =
              new ThreadWithException(Bytes.toStringBinary(region)) {
            @Override
            public void run() {
              LinkedList<HLog.Entry> entries = logEntries.get(region);
              LOG.debug("Thread got " + entries.size() + " to process");
              if(entries.size() <= 0) {
                LOG.warn("Got a region with no entries to process.");
                return;
              }
              long threadTime = System.currentTimeMillis();
              try {
                int count = 0;
                // get the logfile associated with this region.  2 logs often
                // write to the same region, so persist this info across logs
                WriterAndPath wap = logWriters.get(region);
                if (wap == null) {
                  // first write to this region, make new logfile
                  assert entries.size() > 0;
                  Path logfile = new Path(HRegion.getRegionDir(HTableDescriptor
                      .getTableDir(rootDir,
                                   entries.getFirst().getKey().getTablename()),
                      HRegionInfo.encodeRegionName(region)),
                      HREGION_OLDLOGFILE_NAME);
                  // If splitLog() was running when the user restarted his
                  // cluster, then we could already have a 'logfile'.
                  // Since we don't delete logs until everything is written to
                  // their respective regions, we can safely remove this tmp.
                  if (fs.exists(logfile)) {
                    LOG.warn("Deleting old hlog file: " + logfile);
                    // TODO: Archive? 
                    fs.delete(logfile, true);
                  }
                  // associate an OutputStream with this logfile
                  Writer w = createWriter(fs, logfile, conf);
                  wap = new WriterAndPath(logfile, w);
                  logWriters.put(region, wap);
                  if (LOG.isDebugEnabled()) {
                    LOG.debug("Creating new hlog file writer for path "
                        + logfile + " and region " + Bytes.toStringBinary(region));
                  }
                }
                // Items were added to the linkedlist oldest first. Pull them
                // out in that order.
                for (ListIterator<HLog.Entry> i = entries.listIterator(entries.size());
                    i.hasPrevious();) {
                  wap.w.append(i.previous());
                  count++;
                }
                if (LOG.isDebugEnabled()) {
                  LOG.debug("Applied " + count + " total edits to "
                      + Bytes.toStringBinary(region) + " in "
                      + (System.currentTimeMillis() - threadTime) + "ms");
                }
              } catch (IOException e) {
                e = RemoteExceptionHandler.checkIOException(e);
                LOG.warn("Got while writing region "
                    + Bytes.toStringBinary(region) + " log " + e);
                e.printStackTrace();
                exception = e;
              }
            }
          };
          threadList.add(thread);
          threadPool.execute(thread);
        }
        threadPool.shutdown();
        // Wait for all threads to terminate
        try {
          for(int i = 0; !threadPool.awaitTermination(5, TimeUnit.SECONDS); i++) {
            LOG.debug("Waiting for hlog writers to terminate, iteration #" + i);
          }
        } catch (InterruptedException ex) {
          LOG.warn("Hlog writers were interrupted during splitLog().  "
              +"Retaining log files to avoid data loss.");
          throw new IOException(ex.getMessage(), ex.getCause());
        }
        // throw an exception if one of the threads reported one
        for (ThreadWithException t : threadList) {
          if (t.exception != null) {
            throw t.exception;
          }
        }
        // End of for loop. Rinse and repeat
      }
      if (fs.listStatus(srcDir).length > processedLogs.size() + corruptedLogs.size()) {
        throw new IOException("Discovered orphan hlog after split. Maybe " +
          "HRegionServer was not dead when we started");
      }
      archiveLogs(corruptedLogs, processedLogs, oldLogDir, fs, conf);
    } finally {
      splits = new ArrayList<Path>(logWriters.size());
      for (WriterAndPath wap : logWriters.values()) {
        wap.w.close();
        LOG.debug("Closed " + wap.p);
        splits.add(wap.p);
        LOG.debug("Closed " + wap.p);
      }
    }
    // Step 3: All writes succeeded!  Get rid of the now-unnecessary logs
    for(Path p : finishedFiles) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Successfully split Hlog file.  Deleting " + p);
      }
      try {
        if (!fs.delete(p, true) && LOG.isDebugEnabled()) {
          LOG.debug("Delete of split Hlog (" + p + ") failed.");
        }
      } catch (IOException e) {
        // don't throw error here. worst case = double-read
        LOG.warn("Error deleting successfully split Hlog (" + p + ") -- " + e);
      }
    }
    for (Path p : corruptFiles) {
      if (corruptDir.length() > 0) {
        // store any corrupt logs for later analysis
        Path cp = new Path(conf.get(HBASE_DIR), corruptDir);
        if(!fs.exists(cp)) {
          fs.mkdirs(cp);
        }
        Path newp = new Path(cp, p.getName());
        if (!fs.exists(newp)) {
          if (!fs.rename(p, newp)) {
            LOG.warn("Rename of " + p + " to " + newp + " failed.");
          } else {
            LOG.warn("Corrupt Hlog (" + p + ") moved to " + newp);
          }
        } else {
          LOG.warn("Corrupt Hlog (" + p + ") already moved to " + newp +
                   ".  Ignoring");
        }
      } else {
        // data loss is less important than disk space, delete
        try {
          if (!fs.delete(p, true) ) {
            if (LOG.isDebugEnabled()) {
              LOG.debug("Delete of split Hlog " + p + " failed.");
            }
          } else {
            LOG.warn("Corrupt Hlog (" + p + ") deleted!");
          }
        } catch (IOException e) {
          LOG.warn("Error deleting corrupt Hlog (" + p + ") -- " + e);
        }
      }
    }
    return splits;
  }
  /*
   * @param conf
   * @return True if append enabled and we have the syncFs in our path.
   */
  static boolean isAppend(final Configuration conf) {
    boolean append = conf.getBoolean("dfs.support.append", false);
    if (append) {
      try {
        // TODO: The implementation that comes back when we do a createWriter
        // may not be using SequenceFile so the below is not a definitive test.
        // Will do for now (hdfs-200).
        SequenceFile.Writer.class.getMethod("syncFs", new Class<?> []{});
        append = true;
      } catch (SecurityException e) {
      } catch (NoSuchMethodException e) {
        append = false;
      }
    }
    return append;
  }
  /**
   * Utility class that lets us keep track of the edit with it's key
@ -1582,64 +1369,6 @@ public class HLog implements HConstants, Syncable {
    }
  }
  /*
   * Recover log.
   * Try and open log in append mode.
   * Doing this, we get a hold of the file that crashed writer
   * was writing to.  Once we have it, close it.  This will
   * allow subsequent reader to see up to last sync.
   * @param fs
   * @param p
   * @param append
   */
  public static void recoverLog(final FileSystem fs, final Path p,
      final boolean append) throws IOException {
    if (!append) {
      return;
    }
    // lease recovery not needed for local file system case.
    // currently, local file system doesn't implement append either.
    if (!(fs instanceof DistributedFileSystem)) {
      return;
    }
    LOG.debug("Recovering DFS lease for path " + p);
    long startWaiting = System.currentTimeMillis();
    // Trying recovery
    boolean recovered = false;
    while (!recovered) {
      try {
        FSDataOutputStream out = fs.append(p);
        out.close();
        recovered = true;
      } catch (IOException e) {
        e = RemoteExceptionHandler.checkIOException(e);
        if (e instanceof AlreadyBeingCreatedException) {
          // We expect that we'll get this message while the lease is still
          // within its soft limit, but if we get it past that, it means
          // that the RS is holding onto the file even though it lost its
          // znode. We could potentially abort after some time here.
          long waitedFor = System.currentTimeMillis() - startWaiting;
          if (waitedFor > FSConstants.LEASE_SOFTLIMIT_PERIOD) {
            LOG.warn("Waited " + waitedFor + "ms for lease recovery on " + p
            + ":" + e.getMessage());
          }
          try {
            Thread.sleep(1000);
          } catch (InterruptedException ex) {
            // ignore it and try again
          }
        } else {
          throw new IOException("Failed to open " + p + " for append", e);
        }
      }
    }
    LOG.info("Past out lease recovery");
  }
  /**
   * Construct the HLog directory name
   *
@ -1687,28 +1416,247 @@ public class HLog implements HConstants, Syncable {
    return new Path(oldLogDir, p.getName());
  }
-  private static void usage() {
+  /**
-    System.err.println("Usage: java org.apache.hbase.HLog" +
+   * Takes splitLogsMap and concurrently writes them to region directories using a thread pool
-        " {--dump <logfile>... | --split <logdir>...}");
+   *
   * @param splitLogsMap map that contains the log splitting result indexed by region
   * @param logWriters map that contains a writer per region
   * @param rootDir hbase root dir
   * @param fs
   * @param conf
   * @throws IOException
   */
  private static void writeEditsBatchToRegions(
    final Map<byte[], LinkedList<Entry>> splitLogsMap,
    final Map<byte[], WriterAndPath> logWriters,
    final Path rootDir, final FileSystem fs, final Configuration conf)
  throws IOException {
    // Number of threads to use when log splitting to rewrite the logs.
    // More means faster but bigger mem consumption.
    int logWriterThreads =
      conf.getInt("hbase.regionserver.hlog.splitlog.writer.threads", 3);
    boolean skipErrors = conf.getBoolean("hbase.skip.errors", false);
    HashMap<byte[], Future> writeFutureResult = new HashMap<byte[], Future>();
    NamingThreadFactory f  = new NamingThreadFactory(
            "SplitWriter-%1$d", Executors.defaultThreadFactory());
    ThreadPoolExecutor threadPool = (ThreadPoolExecutor)Executors.newFixedThreadPool(logWriterThreads, f);
    for (final byte[] region : splitLogsMap.keySet()) {
      Callable splitter = createNewSplitter(rootDir, logWriters, splitLogsMap, region, fs, conf);
      writeFutureResult.put(region, threadPool.submit(splitter));
    }
    threadPool.shutdown();
    // Wait for all threads to terminate
    try {
      for (int j = 0; !threadPool.awaitTermination(5, TimeUnit.SECONDS); j++) {
        String message = "Waiting for hlog writers to terminate, elapsed " + j * 5 + " seconds";
        if (j < 30) {
          LOG.debug(message);
        } else {
          LOG.info(message);
        }
      }
    } catch(InterruptedException ex) {
      LOG.warn("Hlog writers were interrupted, possible data loss!");
      if (!skipErrors) {
        throw new IOException("Could not finish writing log entries",  ex);
        //TODO  maybe we should fail here regardless if skipErrors is active or not
      }
    }
    for (Map.Entry<byte[], Future> entry : writeFutureResult.entrySet()) {
      try {
        entry.getValue().get();
      } catch (ExecutionException e) {
        throw (new IOException(e.getCause()));
      } catch (InterruptedException e1) {
        LOG.warn("Writer for region " +  Bytes.toString(entry.getKey()) +
                " was interrupted, however the write process should have " +
                "finished. Throwing up ", e1);
        throw (new IOException(e1.getCause()));
      }
    }
  }
  /*
   * Parse a single hlog and put the edits in @splitLogsMap
   *
   * @param logfile to split
   * @param splitLogsMap output parameter: a map with region names as keys and a
   * list of edits as values
   * @param fs the filesystem
   * @param conf the configuration
   * @throws IOException if hlog is corrupted, or can't be open
   */
  private static void parseHLog(final FileStatus logfile,
    final Map<byte[], LinkedList<Entry>> splitLogsMap, final FileSystem fs,
    final Configuration conf)
  throws IOException {
    // Check for possibly empty file. With appends, currently Hadoop reports a
    // zero length even if the file has been sync'd. Revisit if HDFS-376 or
    // HDFS-878 is committed.
    long length = logfile.getLen();
    if (length <= 0) {
      LOG.warn("File " + logfile.getPath() + " might be still open, length is 0");
    }
    Path path = logfile.getPath();
    Reader in;
    int editsCount = 0;
    try {
      in = HLog.getReader(fs, path, conf);
    } catch (EOFException e) {
      if (length <= 0) {
        //TODO should we ignore an empty, not-last log file if skip.errors is false?
        //Either way, the caller should decide what to do. E.g. ignore if this is the last
        //log in sequence.
        //TODO is this scenario still possible if the log has been recovered (i.e. closed)
        LOG.warn("Could not open " + path + " for reading. File is empty" + e);
        return;
      } else {
        throw e;
      }
    }
    try {
      Entry entry;
      while ((entry = in.next()) != null) {
        byte[] region = entry.getKey().getRegionName();
        LinkedList<Entry> queue = splitLogsMap.get(region);
        if (queue == null) {
          queue = new LinkedList<Entry>();
          splitLogsMap.put(region, queue);
        }
        queue.addFirst(entry);
        editsCount++;
      }
      LOG.debug("Pushed=" + editsCount + " entries from " + path);
    } finally {
      try {
        if (in != null) {
          in.close();
        }
      } catch (IOException e) {
        LOG.warn("Close log reader in finally threw exception -- continuing", e);
      }
    }
  }
  private static Callable<Void> createNewSplitter(final Path rootDir,
    final Map<byte[], WriterAndPath> logWriters,
    final Map<byte[], LinkedList<Entry>> logEntries,
    final byte[] region, final FileSystem fs, final Configuration conf) {
    return new Callable<Void>() {
      public String getName() {
        return "Split writer thread for region " + Bytes.toStringBinary(region);
      }
      @Override
      public Void call() throws IOException {
        LinkedList<Entry> entries = logEntries.get(region);
        LOG.debug(this.getName()+" got " + entries.size() + " to process");
        long threadTime = System.currentTimeMillis();
        try {
          int editsCount = 0;
          WriterAndPath wap = logWriters.get(region);
          for (ListIterator<Entry> iterator = entries.listIterator();
               iterator.hasNext();) {
            Entry logEntry =  iterator.next();
            if (wap == null) {
              Path logFile = getRegionLogPath(logEntry, rootDir);
              if (fs.exists(logFile)) {
                LOG.warn("Found existing old hlog file. It could be the result of a previous" +
                        "failed split attempt. Deleting " + logFile +
                        ", length=" + fs.getFileStatus(logFile).getLen());
                fs.delete(logFile, false);
              }
              Writer w = createWriter(fs, logFile, conf);
              wap = new WriterAndPath(logFile, w);
              logWriters.put(region, wap);
              LOG.debug("Creating writer path=" + logFile +
                " region=" + Bytes.toStringBinary(region));
            }
            wap.w.append(logEntry);
            editsCount++;
          }
          LOG.debug(this.getName() + " Applied " + editsCount +
            " total edits to " + Bytes.toStringBinary(region) +
            " in " + (System.currentTimeMillis() - threadTime) + "ms");
        } catch (IOException e) {
          e = RemoteExceptionHandler.checkIOException(e);
          LOG.fatal(this.getName() + " Got while writing log entry to log", e);
          throw e;
        }
        return null;
      }
    };
  }
  /**
   * Moves processed logs to a oldLogDir after successful processing
   * Moves corrupted logs (any log that couldn't be successfully parsed
   * to corruptDir (.corrupt) for later investigation
   *
-   * @param list
+   * @param corruptedLogs
   * @param processedLogs
   * @param oldLogDir
   * @param fs
   * @param conf
   * @throws IOException
   */
  private static void archiveLogs(final List<Path> corruptedLogs,
    final List<Path> processedLogs, final Path oldLogDir,
    final FileSystem fs, final Configuration conf)
  throws IOException{
    final Path corruptDir = new Path(conf.get(HBASE_DIR),
      conf.get("hbase.regionserver.hlog.splitlog.corrupt.dir", ".corrupt"));
    fs.mkdirs(corruptDir);
    fs.mkdirs(oldLogDir);
    for (Path corrupted: corruptedLogs) {
      Path p = new Path(corruptDir, corrupted.getName());
      LOG.info("Moving corrupted log " + corrupted + " to " + p);
      fs.rename(corrupted, p);
    }
    for (Path p: processedLogs) {
      Path newPath = getHLogArchivePath(oldLogDir, p);
      fs.rename(p, newPath);
      LOG.info("Archived processed log " + p + " to " + newPath);
    }
  }
  private static Path getRegionLogPath(Entry logEntry, Path rootDir) {
    Path tableDir =
      HTableDescriptor.getTableDir(rootDir, logEntry.getKey().getTablename());
    Path regionDir =
            HRegion.getRegionDir(tableDir, HRegionInfo.encodeRegionName(logEntry.getKey().getRegionName()));
    return new Path(regionDir, HREGION_OLDLOGFILE_NAME);
   }
  public void addLogActionsListerner(LogActionsListener list) {
    LOG.info("Adding a listener");
    this.actionListeners.add(list);
  }
  /**
   *
   * @param list
   */
  public boolean removeLogActionsListener(LogActionsListener list) {
    return this.actionListeners.remove(list);
  }
  private static void usage() {
    System.err.println("Usage: java org.apache.hbase.HLog" +
        " {--dump <logfile>... | --split <logdir>...}");
  }
  /**
   * Pass one or more log file names and it will either dump out a text version
   * on <code>stdout</code> or split the specified log files.
--- a/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java
+++ b/src/main/java/org/apache/hadoop/hbase/util/FSUtils.java
@ -34,7 +34,9 @@ import org.apache.hadoop.hbase.RemoteExceptionHandler;
 import org.apache.hadoop.hbase.master.HMaster;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
 import org.apache.hadoop.hdfs.protocol.FSConstants;
 import org.apache.hadoop.io.SequenceFile;
 import java.io.DataInputStream;
 import java.io.IOException;
@ -557,4 +559,88 @@ public class FSUtils {
      return isdir;
    }
  }
  /**
   * Heuristic to determine whether is safe or not to open a file for append
   * Looks both for dfs.support.append and use reflection to search
   * for SequenceFile.Writer.syncFs() or FSDataOutputStream.hflush()
   * @param conf
   * @return True if append support
   */
  public static boolean isAppendSupported(final Configuration conf) {
    boolean append = conf.getBoolean("dfs.support.append", false);
    if (append) {
      try {
        // TODO: The implementation that comes back when we do a createWriter
        // may not be using SequenceFile so the below is not a definitive test.
        // Will do for now (hdfs-200).
        SequenceFile.Writer.class.getMethod("syncFs", new Class<?> []{});
        append = true;
      } catch (SecurityException e) {
      } catch (NoSuchMethodException e) {
        append = false;
      }
    } else {
      try {
        FSDataOutputStream.class.getMethod("hflush", new Class<?> []{});
      } catch (NoSuchMethodException e) {
        append = false;
      }
    }
    return append;
  }
  /*
   * Recover file lease. Used when a file might be suspect to be had been left open by another process. <code>p</code>
   * @param fs
   * @param p
   * @param append True if append supported
   * @throws IOException
   */
  public static void recoverFileLease(final FileSystem fs, final Path p, Configuration conf)
  throws IOException{
    if (!isAppendSupported(conf)) {
      return;
    }
    // lease recovery not needed for local file system case.
    // currently, local file system doesn't implement append either.
    if (!(fs instanceof DistributedFileSystem)) {
      return;
    }
    LOG.info("Recovering file" + p);
    long startWaiting = System.currentTimeMillis();
    // Trying recovery
    boolean recovered = false;
    while (!recovered) {
      try {
        FSDataOutputStream out = fs.append(p);
        out.close();
        recovered = true;
      } catch (IOException e) {
        e = RemoteExceptionHandler.checkIOException(e);
        if (e instanceof AlreadyBeingCreatedException) {
          // We expect that we'll get this message while the lease is still
          // within its soft limit, but if we get it past that, it means
          // that the RS is holding onto the file even though it lost its
          // znode. We could potentially abort after some time here.
          long waitedFor = System.currentTimeMillis() - startWaiting;
          if (waitedFor > FSConstants.LEASE_SOFTLIMIT_PERIOD) {
            LOG.warn("Waited " + waitedFor + "ms for lease recovery on " + p +
              ":" + e.getMessage());
          }
          try {
            Thread.sleep(1000);
          } catch (InterruptedException ex) {
            // ignore it and try again
          }
        } else {
          throw new IOException("Failed to open " + p + " for append", e);
        }
      }
    }
    LOG.info("Finished lease recover attempt for " + p);
  }
 }
--- a/src/test/java/org/apache/hadoop/hbase/regionserver/wal/InstrumentedSequenceFileLogWriter.java
+++ b/src/test/java/org/apache/hadoop/hbase/regionserver/wal/InstrumentedSequenceFileLogWriter.java
@ -0,0 +1,37 @@
 /**
 * Copyright 2010 The Apache Software Foundation
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hadoop.hbase.regionserver.wal;
 import java.io.IOException;
 import org.apache.hadoop.hbase.util.Bytes;
 public class InstrumentedSequenceFileLogWriter extends SequenceFileLogWriter {
  public static boolean activateFailure = false;
  @Override
    public void append(HLog.Entry entry) throws IOException {
      super.append(entry);
      if (activateFailure && Bytes.equals(entry.getKey().getRegionName(), "break".getBytes())) {
        System.out.println(getClass().getName() + ": I will throw an exception now...");
        throw(new IOException("This exception is instrumented and should only be thrown for testing"));
      }
    }
 }
--- a/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java
+++ b/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLogSplit.java
@ -0,0 +1,735 @@
 /**
 * Copyright 2010 The Apache Software Foundation
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hadoop.hbase.regionserver.wal;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.lang.reflect.Field;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicLong;
 import javax.naming.InsufficientResourcesException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.wal.HLog;
 import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.hadoop.hdfs.server.namenode.NameNode;
 import org.apache.hadoop.ipc.RemoteException;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
 /**
 * Testing {@link HLog} splitting code.
 */
 public class TestHLogSplit {
  private Configuration conf;
  private FileSystem fs;
  private final static HBaseTestingUtility
          TEST_UTIL = new HBaseTestingUtility();
  private static final Path hbaseDir = new Path("/hbase");
  private static final Path hlogDir = new Path(hbaseDir, "hlog");
  private static final Path oldLogDir = new Path(hbaseDir, "hlog.old");
  private static final Path corruptDir = new Path(hbaseDir, ".corrupt");
  private static final int NUM_WRITERS = 10;
  private static final int ENTRIES = 10; // entries per writer per region
  private HLog.Writer[] writer = new HLog.Writer[NUM_WRITERS];
  private long seq = 0;
  private static final byte[] TABLE_NAME = "t1".getBytes();
  private static final byte[] FAMILY = "f1".getBytes();
  private static final byte[] QUALIFIER = "q1".getBytes();
  private static final byte[] VALUE = "v1".getBytes();
  private static final String HLOG_FILE_PREFIX = "hlog.dat.";
  private static List<String> regions;
  private static final String HBASE_SKIP_ERRORS = "hbase.hlog.split.skip.errors";
  static enum Corruptions {
    INSERT_GARBAGE_ON_FIRST_LINE,
    INSERT_GARBAGE_IN_THE_MIDDLE,
    APPEND_GARBAGE,
  }
  @BeforeClass
  public static void setUpBeforeClass() throws Exception {
    TEST_UTIL.getConfiguration().
            setInt("hbase.regionserver.flushlogentries", 1);
    TEST_UTIL.getConfiguration().
            setBoolean("dfs.support.append", true);
    TEST_UTIL.getConfiguration().
            setStrings("hbase.rootdir", hbaseDir.toString());
    TEST_UTIL.getConfiguration().
            setClass("hbase.regionserver.hlog.writer.impl",
                InstrumentedSequenceFileLogWriter.class, HLog.Writer.class);
    TEST_UTIL.startMiniDFSCluster(2);
  }
  @AfterClass
  public static void tearDownAfterClass() throws Exception {
    TEST_UTIL.shutdownMiniDFSCluster();
  }
  @Before
  public void setUp() throws Exception {
    conf = TEST_UTIL.getConfiguration();
    fs = TEST_UTIL.getDFSCluster().getFileSystem();
    FileStatus[] entries = fs.listStatus(new Path("/"));
    for (FileStatus dir : entries){
      fs.delete(dir.getPath(), true);
    }
    seq = 0;
    regions = new ArrayList<String>();
    Collections.addAll(regions, "bbb", "ccc");
    InstrumentedSequenceFileLogWriter.activateFailure = false;
    // Set the soft lease for hdfs to be down from default of 5 minutes or so.
    // TODO: If 0.20 hadoop do one thing, if 0.21 hadoop do another.
    // Not available in 0.20 hdfs
    // TEST_UTIL.getDFSCluster().getNamesystem().leaseManager.
    //  setLeasePeriod(100, 50000);
    // Use reflection to get at the 0.20 version of above.
    MiniDFSCluster dfsCluster = TEST_UTIL.getDFSCluster();
    //   private NameNode nameNode;
    Field field = dfsCluster.getClass().getDeclaredField("nameNode");
    field.setAccessible(true);
    NameNode nn = (NameNode)field.get(dfsCluster);
    nn.namesystem.leaseManager.setLeasePeriod(100, 50000);
  }
  @After
  public void tearDown() throws Exception {
  }
  @Test(expected = IOException.class)
  public void testSplitFailsIfNewHLogGetsCreatedAfterSplitStarted()
  throws IOException {
    AtomicBoolean stop = new AtomicBoolean(false);
    generateHLogs(-1);
    fs.initialize(fs.getUri(), conf);
    try {
    (new ZombieNewLogWriterRegionServer(stop)).start();
    HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
    } finally {
      stop.set(true);
    }
  }
  //TODO: check the edits order is respected (scenarios)
  @Test
  public void testEmptyLogFiles() throws IOException {
    injectEmptyFile(".empty", true);
    generateHLogs(Integer.MAX_VALUE);
    injectEmptyFile("empty", true);
    // make fs act as a different client now
    // initialize will create a new DFSClient with a new client ID
    fs.initialize(fs.getUri(), conf);
    HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
    for (String region : regions) {
      Path logfile = getLogForRegion(hbaseDir, TABLE_NAME, region);
      assertEquals(NUM_WRITERS * ENTRIES, countHLog(logfile, fs, conf));
    }
  }
  @Test
  public void testEmptyOpenLogFiles() throws IOException {
    injectEmptyFile(".empty", false);
    generateHLogs(Integer.MAX_VALUE);
    injectEmptyFile("empty", false);
    // make fs act as a different client now
    // initialize will create a new DFSClient with a new client ID
    fs.initialize(fs.getUri(), conf);
    HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
    for (String region : regions) {
      Path logfile = getLogForRegion(hbaseDir, TABLE_NAME, region);
      assertEquals(NUM_WRITERS * ENTRIES, countHLog(logfile, fs, conf));
    }
  }
  @Test
  public void testOpenZeroLengthReportedFileButWithDataGetsSplit() throws IOException {
    // generate logs but leave hlog.dat.5 open.
    generateHLogs(5);
    fs.initialize(fs.getUri(), conf);
    HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
    for (String region : regions) {
      Path logfile = getLogForRegion(hbaseDir, TABLE_NAME, region);
      assertEquals(NUM_WRITERS * ENTRIES, countHLog(logfile, fs, conf));
    }
  }
  @Test
  public void testTralingGarbageCorruptionFileSkipErrorsPasses() throws IOException {
    conf.setBoolean(HBASE_SKIP_ERRORS, true);
    generateHLogs(Integer.MAX_VALUE);
    corruptHLog(new Path(hlogDir, HLOG_FILE_PREFIX + "5"),
            Corruptions.APPEND_GARBAGE, true, fs);
    fs.initialize(fs.getUri(), conf);
    HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
    for (String region : regions) {
      Path logfile = getLogForRegion(hbaseDir, TABLE_NAME, region);
      assertEquals(NUM_WRITERS * ENTRIES, countHLog(logfile, fs, conf));
    }
  }
  @Test
  public void testFirstLineCorruptionLogFileSkipErrorsPasses() throws IOException {
    conf.setBoolean(HBASE_SKIP_ERRORS, true);
    generateHLogs(Integer.MAX_VALUE);
    corruptHLog(new Path(hlogDir, HLOG_FILE_PREFIX + "5"),
            Corruptions.INSERT_GARBAGE_ON_FIRST_LINE, true, fs);
    fs.initialize(fs.getUri(), conf);
    HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
    for (String region : regions) {
      Path logfile = getLogForRegion(hbaseDir, TABLE_NAME, region);
      assertEquals((NUM_WRITERS - 1) * ENTRIES, countHLog(logfile, fs, conf));
    }
  }
  @Test
  public void testMiddleGarbageCorruptionSkipErrorsReadsHalfOfFile() throws IOException {
    conf.setBoolean(HBASE_SKIP_ERRORS, true);
    generateHLogs(Integer.MAX_VALUE);
    corruptHLog(new Path(hlogDir, HLOG_FILE_PREFIX + "5"),
            Corruptions.INSERT_GARBAGE_IN_THE_MIDDLE, false, fs);
    fs.initialize(fs.getUri(), conf);
    HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
    for (String region : regions) {
      Path logfile = getLogForRegion(hbaseDir, TABLE_NAME, region);
      // the entries in the original logs are alternating regions
      // considering the sequence file header, the middle corruption should
      // affect at least half of the entries
      int goodEntries = (NUM_WRITERS - 1) * ENTRIES;
      int firstHalfEntries = (int) Math.ceil(ENTRIES / 2) - 1;
      assertTrue("The file up to the corrupted area hasn't been parsed",
              goodEntries + firstHalfEntries <= countHLog(logfile, fs, conf));
    }
  }
  @Test
  public void testCorruptedFileGetsArchivedIfSkipErrors() throws IOException {
    conf.setBoolean(HBASE_SKIP_ERRORS, true);
    Path c1 = new Path(hlogDir, HLOG_FILE_PREFIX + "0");
    Path c2 = new Path(hlogDir, HLOG_FILE_PREFIX + "5");
    Path c3 = new Path(hlogDir, HLOG_FILE_PREFIX + (NUM_WRITERS - 1));
    generateHLogs(-1);
    corruptHLog(c1, Corruptions.INSERT_GARBAGE_IN_THE_MIDDLE, false, fs);
    corruptHLog(c2, Corruptions.APPEND_GARBAGE, true, fs);
    corruptHLog(c3, Corruptions.INSERT_GARBAGE_ON_FIRST_LINE, true, fs);
    fs.initialize(fs.getUri(), conf);
    HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
    FileStatus[] archivedLogs = fs.listStatus(corruptDir);
    assertEquals("expected a different file", c1.getName(), archivedLogs[0].getPath().getName());
    assertEquals("expected a different file", c2.getName(), archivedLogs[1].getPath().getName());
    assertEquals("expected a different file", c3.getName(), archivedLogs[2].getPath().getName());
    assertEquals(archivedLogs.length, 3);
  }
  @Test
  public void testLogsGetArchivedAfterSplit() throws IOException {
    conf.setBoolean(HBASE_SKIP_ERRORS, false);
    generateHLogs(-1);
    fs.initialize(fs.getUri(), conf);
    HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
    FileStatus[] archivedLogs = fs.listStatus(oldLogDir);
    assertEquals("wrong number of files in the archive log", NUM_WRITERS, archivedLogs.length);
  }
  @Test(expected = IOException.class)
  public void testTrailingGarbageCorruptionLogFileSkipErrorsFalseThrows() throws IOException {
    conf.setBoolean(HBASE_SKIP_ERRORS, false);
    generateHLogs(Integer.MAX_VALUE);
    corruptHLog(new Path(hlogDir, HLOG_FILE_PREFIX + "5"),
            Corruptions.APPEND_GARBAGE, true, fs);
    fs.initialize(fs.getUri(), conf);
    HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
  }
  @Test
  public void testCorruptedLogFilesSkipErrorsFalseDoesNotTouchLogs() throws IOException {
    conf.setBoolean(HBASE_SKIP_ERRORS, false);
    generateHLogs(-1);
    corruptHLog(new Path(hlogDir, HLOG_FILE_PREFIX + "5"),
            Corruptions.APPEND_GARBAGE, true, fs);
    fs.initialize(fs.getUri(), conf);
    try {
      HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
    } catch (IOException e) {/* expected */}
    assertEquals("if skip.errors is false all files should remain in place",
            NUM_WRITERS, fs.listStatus(hlogDir).length);
  }
  @Test
  public void testSplit() throws IOException {
    generateHLogs(-1);
    fs.initialize(fs.getUri(), conf);
    HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
    for (String region : regions) {
      Path logfile = getLogForRegion(hbaseDir, TABLE_NAME, region);
      assertEquals(NUM_WRITERS * ENTRIES, countHLog(logfile, fs, conf));
    }
  }
  @Test
  public void testLogDirectoryShouldBeDeletedAfterSuccessfulSplit()
  throws IOException {
    generateHLogs(-1);
    fs.initialize(fs.getUri(), conf);
    HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
    FileStatus [] statuses = null;
    try {
      statuses = fs.listStatus(hlogDir);
      assertNull(statuses);
    } catch (FileNotFoundException e) {
      // hadoop 0.21 throws FNFE whereas hadoop 0.20 returns null
    }
  }
 /* DISABLED for now.  TODO: HBASE-2645 
  @Test
  public void testLogCannotBeWrittenOnceParsed() throws IOException {
    AtomicLong counter = new AtomicLong(0);
    AtomicBoolean stop = new AtomicBoolean(false);
    generateHLogs(9);
    fs.initialize(fs.getUri(), conf);
    Thread zombie = new ZombieLastLogWriterRegionServer(writer[9], counter, stop);
    try {
      zombie.start();
      HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
      Path logfile = getLogForRegion(hbaseDir, TABLE_NAME, "juliet");
      // It's possible that the writer got an error while appending and didn't count it
      // however the entry will in fact be written to file and split with the rest
      long numberOfEditsInRegion = countHLog(logfile, fs, conf);
      assertTrue("The log file could have at most 1 extra log entry, but " +
              "can't have less. Zombie could write "+counter.get() +" and logfile had only"+ numberOfEditsInRegion+" "  + logfile, counter.get() == numberOfEditsInRegion ||
                      counter.get() + 1 == numberOfEditsInRegion);
    } finally {
      stop.set(true);
    }
  }
 */
  @Test
  public void testSplitWillNotTouchLogsIfNewHLogGetsCreatedAfterSplitStarted()
  throws IOException {
    AtomicBoolean stop = new AtomicBoolean(false);
    generateHLogs(-1);
    fs.initialize(fs.getUri(), conf);
    Thread zombie = new ZombieNewLogWriterRegionServer(stop);
    try {
      zombie.start();
      try {
        HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
      } catch (IOException ex) {/* expected */}
      int logFilesNumber = fs.listStatus(hlogDir).length;
      assertEquals("Log files should not be archived if there's an extra file after split",
              NUM_WRITERS + 1, logFilesNumber);
    } finally {
      stop.set(true);
    }
  }
  @Test(expected = IOException.class)
  public void testSplitWillFailIfWritingToRegionFails() throws Exception {
    //leave 5th log open so we could append the "trap"
    generateHLogs(4);
    fs.initialize(fs.getUri(), conf);
    InstrumentedSequenceFileLogWriter.activateFailure = false;
    appendEntry(writer[4], TABLE_NAME, Bytes.toBytes("break"), ("r" + 999).getBytes(), FAMILY, QUALIFIER, VALUE, 0);
    writer[4].close();
    try {
      InstrumentedSequenceFileLogWriter.activateFailure = true;
      HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
    } catch (IOException e) {
      assertEquals("java.io.IOException: This exception is instrumented and should only be thrown for testing", e.getMessage());
      throw e;
    } finally {
      InstrumentedSequenceFileLogWriter.activateFailure = false;
    }
  }
 //  @Test
  public void testSplittingLargeNumberOfRegionsConsistency() throws IOException {
    regions.removeAll(regions);
    for (int i=0; i<500; i++) {
      regions.add("region__"+i);
    }
    generateHLogs(1, 100, -1);
    fs.initialize(fs.getUri(), conf);
    HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
    fs.rename(oldLogDir, hlogDir);
    Path firstSplitPath = new Path(hbaseDir, Bytes.toString(TABLE_NAME) + ".first");
    Path splitPath = new Path(hbaseDir, Bytes.toString(TABLE_NAME));
    fs.rename(splitPath,
            firstSplitPath);
    fs.initialize(fs.getUri(), conf);
    HLog.splitLog(hbaseDir, hlogDir, oldLogDir, fs, conf);
    FileStatus[] f1 = fs.listStatus(firstSplitPath);
    FileStatus[] f2 = fs.listStatus(splitPath);
    for (int i=0; i<f1.length; i++) {
      HLog.Reader in1, in2;
      in1 = HLog.getReader(fs, new Path(f1[i].getPath(), "oldlogfile.log"), conf);
      in2 = HLog.getReader(fs, new Path(f2[i].getPath(), "oldlogfile.log"), conf);
      HLog.Entry entry1;
      HLog.Entry entry2;
      while ((entry1 = in1.next()) != null) {
        entry2 = in2.next();
        assertEquals(0, entry1.getKey().compareTo(entry2.getKey()));
        assertEquals(entry1.getEdit().toString(), entry2.getEdit().toString());
      }
    }
  }
  /**
   * This thread will keep writing to the file after the split process has started
   * It simulates a region server that was considered dead but woke up and wrote
   * some more to he last log entry
   */
  class ZombieLastLogWriterRegionServer extends Thread {
    AtomicLong editsCount;
    AtomicBoolean stop;
    Path log;
    HLog.Writer lastLogWriter;
    public ZombieLastLogWriterRegionServer(HLog.Writer writer, AtomicLong counter, AtomicBoolean stop) {
      this.stop = stop;
      this.editsCount = counter;
      this.lastLogWriter = writer;
    }
    @Override
    public void run() {
      if (stop.get()){
        return;
      }
      flushToConsole("starting");
      while (true) {
        try {
          appendEntry(lastLogWriter, TABLE_NAME, "juliet".getBytes(),
                  ("r" + editsCount).getBytes(), FAMILY, QUALIFIER, VALUE, 0);
          lastLogWriter.sync();
          editsCount.incrementAndGet();
          try {
            Thread.sleep(1);
          } catch (InterruptedException e) {
            //
          }
        } catch (IOException ex) {
          if (ex instanceof RemoteException) {
            flushToConsole("Juliet: got RemoteException " +
                    ex.getMessage() + " while writing " + (editsCount.get() + 1));
            break;
          } else {
            assertTrue("Failed to write " + editsCount.get(), false);
          }
        }
      }
    }
  }
  /**
   * This thread will keep adding new log files
   * It simulates a region server that was considered dead but woke up and wrote
   * some more to a new hlog
   */
  class ZombieNewLogWriterRegionServer extends Thread {
    AtomicBoolean stop;
    public ZombieNewLogWriterRegionServer(AtomicBoolean stop) {
      super("ZombieNewLogWriterRegionServer");
      this.stop = stop;
    }
    @Override
    public void run() {
      if (stop.get()) {
        return;
      }
      boolean splitStarted = false;
      Path p = new Path(hbaseDir, new String(TABLE_NAME));
      while (!splitStarted) {
        try {
          FileStatus [] statuses = fs.listStatus(p);
          // In 0.20, listStatus comes back with a null if file doesn't exit.
          // In 0.21, it throws FNFE.
          if (statuses != null && statuses.length > 0) {
            // Done.
            break;
          }
        } catch (FileNotFoundException e) {
          // Expected in hadoop 0.21
        } catch (IOException e1) {
          assertTrue("Failed to list status ", false);
        }
        flushToConsole("Juliet: split not started, sleeping a bit...");
        Threads.sleep(100);
      }
      Path julietLog = new Path(hlogDir, HLOG_FILE_PREFIX + ".juliet");
      try {
        HLog.Writer writer = HLog.createWriter(fs,
                julietLog, conf);
        appendEntry(writer, "juliet".getBytes(), ("juliet").getBytes(),
                ("r").getBytes(), FAMILY, QUALIFIER, VALUE, 0);
        writer.close();
        flushToConsole("Juliet file creator: created file " + julietLog);
      } catch (IOException e1) {
        assertTrue("Failed to create file " + julietLog, false);
      }
    }
  }
  private void flushToConsole(String s) {
    System.out.println(s);
    System.out.flush();
  }
  private void generateHLogs(int leaveOpen) throws IOException {
    generateHLogs(NUM_WRITERS, ENTRIES, leaveOpen);
  }
  private void generateHLogs(int writers, int entries, int leaveOpen) throws IOException {
    for (int i = 0; i < writers; i++) {
      writer[i] = HLog.createWriter(fs, new Path(hlogDir, HLOG_FILE_PREFIX + i), conf);
      for (int j = 0; j < entries; j++) {
        int prefix = 0;
        for (String region : regions) {
          String row_key = region + prefix++ + i + j;
          appendEntry(writer[i], TABLE_NAME, region.getBytes(),
                  row_key.getBytes(), FAMILY, QUALIFIER, VALUE, seq);
        }
      }
      if (i != leaveOpen) {
        writer[i].close();
        flushToConsole("Closing writer " + i);
      }
    }
  }
  private Path getLogForRegion(Path rootdir, byte[] table, String region) {
    return new Path(HRegion.getRegionDir(HTableDescriptor
            .getTableDir(rootdir, table),
            HRegionInfo.encodeRegionName(region.getBytes())),
            "oldlogfile.log");
  }
  private void corruptHLog(Path path, Corruptions corruption, boolean close,
                           FileSystem fs) throws IOException {
    FSDataOutputStream out;
    int fileSize = (int) fs.listStatus(path)[0].getLen();
    FSDataInputStream in = fs.open(path);
    byte[] corrupted_bytes = new byte[fileSize];
    in.readFully(0, corrupted_bytes, 0, fileSize);
    in.close();
    switch (corruption) {
      case APPEND_GARBAGE:
        out = fs.append(path);
        out.write("-----".getBytes());
        closeOrFlush(close, out);
        break;
      case INSERT_GARBAGE_ON_FIRST_LINE:
        fs.delete(path, false);
        out = fs.create(path);
        out.write(0);
        out.write(corrupted_bytes);
        closeOrFlush(close, out);
        break;
      case INSERT_GARBAGE_IN_THE_MIDDLE:
        fs.delete(path, false);
        out = fs.create(path);
        int middle = (int) Math.floor(corrupted_bytes.length / 2);
        out.write(corrupted_bytes, 0, middle);
        out.write(0);
        out.write(corrupted_bytes, middle, corrupted_bytes.length - middle);
        closeOrFlush(close, out);
        break;
    }
  }
  private void closeOrFlush(boolean close, FSDataOutputStream out)
  throws IOException {
    if (close) {
      out.close();
    } else {
      out.sync();
      // Not in 0out.hflush();
    }
  }
  @SuppressWarnings("unused")
  private void dumpHLog(Path log, FileSystem fs, Configuration conf) throws IOException {
    HLog.Entry entry;
    HLog.Reader in = HLog.getReader(fs, log, conf);
    while ((entry = in.next()) != null) {
      System.out.println(entry);
    }
  }
  private int countHLog(Path log, FileSystem fs, Configuration conf) throws IOException {
    int count = 0;
    HLog.Reader in = HLog.getReader(fs, log, conf);
    while (in.next() != null) {
      count++;
    }
    return count;
  }
  public long appendEntry(HLog.Writer writer, byte[] table, byte[] region,
                          byte[] row, byte[] family, byte[] qualifier,
                          byte[] value, long seq)
          throws IOException {
    long time = System.nanoTime();
    WALEdit edit = new WALEdit();
    seq++;
    edit.add(new KeyValue(row, family, qualifier, time, KeyValue.Type.Put, value));
    writer.append(new HLog.Entry(new HLogKey(region, table, seq, time), edit));
    writer.sync();
    return seq;
  }
  private void injectEmptyFile(String suffix, boolean closeFile)
          throws IOException {
    HLog.Writer writer = HLog.createWriter(
            fs, new Path(hlogDir, HLOG_FILE_PREFIX + suffix), conf);
    if (closeFile) writer.close();
  }
  @SuppressWarnings("unused")
  private void listLogs(FileSystem fs, Path dir) throws IOException {
    for (FileStatus file : fs.listStatus(dir)) {
      System.out.println(file.getPath());
    }
  }
 }
--- a/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRolling.java
+++ b/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestLogRolling.java
@ -31,13 +31,13 @@ import org.apache.hadoop.hbase.HBaseClusterTestCase;
 import org.apache.hadoop.hbase.HColumnDescriptor;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.regionserver.wal.HLog;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hdfs.DFSClient;
 import org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties;
 import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
@ -220,7 +220,7 @@ public class TestLogRolling extends HBaseClusterTestCase {
    assertTrue("Need HDFS-826 for this test", log.canGetCurReplicas());
    // don't run this test without append support (HDFS-200 & HDFS-142)
-    assertTrue("Need append support for this test", HLog.isAppend(conf));
+    assertTrue("Need append support for this test", FSUtils.isAppendSupported(conf));
    // add up the datanode count, to ensure proper replication when we kill 1
    dfsCluster.startDataNodes(conf, 1, true, null, null);