HBASE-2544 Forward port branch 0.20 WAL to TRUNK

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@944063 13f79535-47bb-0310-9956-ffa450edef68
2010-05-13 23:59:05 +00:00 · 2010-05-13 23:59:05 +00:00 · 635768d28a
parent eb11dd6c82
commit 635768d28a
5 changed files with 527 additions and 173 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -308,6 +308,7 @@ Release 0.21.0 - Unreleased
   HBASE-2431  Master does not respect generation stamps, may result in meta
               getting permanently offlined
   HBASE-2515  ChangeTableState considers split&&offline regions as being served
   HBASE-2544  Forward port branch 0.20 WAL to TRUNK
  IMPROVEMENTS
   HBASE-1760  Cleanup TODOs in HTable
--- a/core/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
+++ b/core/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
@ -1,5 +1,5 @@
 /**
- * Copyright 2007 The Apache Software Foundation
+ * Copyright 2010 The Apache Software Foundation
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
@ -19,34 +19,14 @@
 */
 package org.apache.hadoop.hbase.regionserver.wal;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Syncable;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HServerInfo;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.RemoteExceptionHandler;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.ClassSize;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.io.Writable;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.EOFException;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.io.UnsupportedEncodingException;
 import java.lang.reflect.Method;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.LinkedList;
@ -66,6 +46,32 @@ import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Syncable;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HServerInfo;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.RemoteExceptionHandler;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.ClassSize;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
 import org.apache.hadoop.hdfs.protocol.FSConstants;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Writable;
 /**
 * HLog stores all the edits to the HStore.  Its the hbase write-ahead-log
 * implementation.
@ -120,20 +126,21 @@ public class HLog implements HConstants, Syncable {
  private final AtomicInteger unflushedEntries = new AtomicInteger(0);
  private final Path oldLogDir;
  private OutputStream hdfs_out;     // OutputStream associated with the current SequenceFile.writer
  private int initialReplication;    // initial replication factor of SequenceFile.writer
  private Method getNumCurrentReplicas; // refers to DFSOutputStream.getNumCurrentReplicas
  final static Object [] NO_ARGS = new Object []{};
  // used to indirectly tell syncFs to force the sync
  private boolean forceSync = false;
  public interface Reader {
    void init(FileSystem fs, Path path, Configuration c) throws IOException;
    void close() throws IOException;
    Entry next() throws IOException;
    Entry next(Entry reuse) throws IOException;
    void seek(long pos) throws IOException;
    long getPosition() throws IOException;
  }
  public interface Writer {
@ -144,9 +151,6 @@ public class HLog implements HConstants, Syncable {
    long getLength() throws IOException;
  }
  // used to indirectly tell syncFs to force the sync
  private boolean forceSync = false;
  /*
   * Current log file.
   */
@ -168,11 +172,14 @@ public class HLog implements HConstants, Syncable {
  private final AtomicLong logSeqNum = new AtomicLong(0);
  // The timestamp (in ms) when the log file was created.
  private volatile long filenum = -1;
  //number of transactions in the current Hlog.
  private final AtomicInteger numEntries = new AtomicInteger(0);
-  // If > than this size, roll the log.
+  // If > than this size, roll the log. This is typically 0.95 times the size
  // of the default Hdfs block size.
  private final long logrollsize;
  // This lock prevents starting a log roll during a cache flush.
@ -272,7 +279,7 @@ public class HLog implements HConstants, Syncable {
    }
    fs.mkdirs(dir);
    this.oldLogDir = oldLogDir;
-    if(!fs.exists(oldLogDir)) {
+    if (!fs.exists(oldLogDir)) {
      fs.mkdirs(this.oldLogDir);
    }
    this.maxLogs = conf.getInt("hbase.regionserver.maxlogs", 32);
@ -282,7 +289,29 @@ public class HLog implements HConstants, Syncable {
      ", enabled=" + this.enabled +
      ", flushlogentries=" + this.flushlogentries +
      ", optionallogflushinternal=" + this.optionalFlushInterval + "ms");
    // rollWriter sets this.hdfs_out if it can.
    rollWriter();
    // handle the reflection necessary to call getNumCurrentReplicas()
    this.getNumCurrentReplicas = null;
    if(this.hdfs_out != null) {
      try {
        this.getNumCurrentReplicas = this.hdfs_out.getClass().
          getMethod("getNumCurrentReplicas", new Class<?> []{});
        this.getNumCurrentReplicas.setAccessible(true);
      } catch (NoSuchMethodException e) {
        // Thrown if getNumCurrentReplicas() function isn't available
      } catch (SecurityException e) {
        // Thrown if we can't get access to getNumCurrentReplicas()
        this.getNumCurrentReplicas = null; // could happen on setAccessible()
      }
    }
    if(this.getNumCurrentReplicas != null) {
      LOG.info("Using getNumCurrentReplicas--HDFS-826");
    } else {
      LOG.info("getNumCurrentReplicas--HDFS-826 not available" );
    }
    logSyncerThread = new LogSyncer(this.optionalFlushInterval);
    Threads.setDaemonThreadRunning(logSyncerThread,
        Thread.currentThread().getName() + ".logSyncer");
@ -355,6 +384,17 @@ public class HLog implements HConstants, Syncable {
        this.filenum = System.currentTimeMillis();
        Path newPath = computeFilename(this.filenum);
        this.writer = createWriter(fs, newPath, HBaseConfiguration.create(conf));
        this.initialReplication = fs.getFileStatus(newPath).getReplication();
        // Can we get at the dfsclient outputstream?  If an instance of
        // SFLW, it'll have done the necessary reflection to get at the
        // protected field name.
        this.hdfs_out = null;
        if (this.writer instanceof SequenceFileLogWriter) {
          this.hdfs_out =
            ((SequenceFileLogWriter)this.writer).getDFSCOutputStream();
        }
        LOG.info((oldFile != null?
            "Roll " + FSUtils.getPath(oldFile) + ", entries=" +
            this.numEntries.get() +
@ -628,7 +668,7 @@ public class HLog implements HConstants, Syncable {
  throws IOException {
    byte [] regionName = regionInfo.getRegionName();
    byte [] tableName = regionInfo.getTableDesc().getName();
-    this.append(regionInfo, makeKey(regionName, tableName, -1, now), logEdit, isMetaRegion);
+    this.append(regionInfo, makeKey(regionName, tableName, -1, now), logEdit);
  }
  /**
@ -650,8 +690,7 @@ public class HLog implements HConstants, Syncable {
   * @param logKey
   * @throws IOException
   */
-  public void append(HRegionInfo regionInfo, HLogKey logKey, WALEdit logEdit,
+  public void append(HRegionInfo regionInfo, HLogKey logKey, WALEdit logEdit)
                     final boolean isMetaRegion)
  throws IOException {
    if (this.closed) {
      throw new IOException("Cannot append; log is closed");
@ -672,7 +711,7 @@ public class HLog implements HConstants, Syncable {
    }
    // sync txn to file system
-    this.sync(isMetaRegion);
+    this.sync(regionInfo.isMetaRegion());
  }
  /**
@ -705,15 +744,14 @@ public class HLog implements HConstants, Syncable {
    if (this.closed) {
      throw new IOException("Cannot append; log is closed");
    }
    long seqNum = obtainSeqNum();
    synchronized (this.updateLock) {
      long seqNum = obtainSeqNum();
      // The 'lastSeqWritten' map holds the sequence number of the oldest
      // write for each region (i.e. the first edit added to the particular
      // memstore). . When the cache is flushed, the entry for the
      // region being flushed is removed if the sequence number of the flush
      // is greater than or equal to the value in lastSeqWritten.
      this.lastSeqWritten.putIfAbsent(regionName, seqNum);
      int counter = 0;
      HLogKey logKey = makeKey(regionName, tableName, seqNum, now);
      doWrite(info, logKey, edits);
      this.numEntries.incrementAndGet();
@ -799,10 +837,6 @@ public class HLog implements HConstants, Syncable {
      }
      lock.lock();
      try {
        if (syncerShuttingDown) {
          LOG.warn(getName() + " was shut down while waiting for sync");
          return;
        }
        if (syncerShuttingDown) {
          LOG.warn(getName() + " was shut down while waiting for sync");
          return;
@ -852,7 +886,24 @@ public class HLog implements HConstants, Syncable {
          syncOps++;
          this.forceSync = false;
          this.unflushedEntries.set(0);
-          // TODO: HBASE-2401
+
          // if the number of replicas in HDFS has fallen below the initial
          // value, then roll logs.
          try {
            int numCurrentReplicas = getLogReplication();
            if (numCurrentReplicas != 0 &&
                numCurrentReplicas < this.initialReplication) {
              LOG.warn("HDFS pipeline error detected. " +
                  "Found " + numCurrentReplicas + " replicas but expecting " +
                  this.initialReplication + " replicas. " +
                  " Requesting close of hlog.");
              requestLogRoll();
              logRollRequested = true;
            }
          } catch (Exception e) {
              LOG.warn("Unable to invoke DFSOutputStream.getNumCurrentReplicas" + e +
                       " still proceeding ahead...");
          }
        } catch (IOException e) {
          LOG.fatal("Could not append. Requesting close of hlog", e);
          requestLogRoll();
@ -866,6 +917,29 @@ public class HLog implements HConstants, Syncable {
    }
  }
  /**
   * This method gets the datanode replication count for the current HLog.
   *
   * If the pipeline isn't started yet or is empty, you will get the default
   * replication factor.  Therefore, if this function returns 0, it means you
   * are not properly running with the HDFS-826 patch.
   *
   * @throws Exception
   */
  int getLogReplication() throws Exception {
    if(this.getNumCurrentReplicas != null && this.hdfs_out != null) {
      Object repl = this.getNumCurrentReplicas.invoke(this.hdfs_out, NO_ARGS);
      if (repl instanceof Integer) {
        return ((Integer)repl).intValue();
      }
    }
    return 0;
  }
  boolean canGetCurReplicas() {
    return this.getNumCurrentReplicas != null;
  }
  public void hsync() throws IOException {
    // Not yet implemented up in hdfs so just call hflush.
    hflush();
@ -916,20 +990,6 @@ public class HLog implements HConstants, Syncable {
    return outputfiles.size();
  }
  /*
   * Obtain a specified number of sequence numbers
   *
   * @param num number of sequence numbers to obtain
   * @return array of sequence numbers
   */
  private long [] obtainSeqNum(int num) {
    long [] results = new long[num];
    for (int i = 0; i < num; i++) {
      results[i] = this.logSeqNum.incrementAndGet();
    }
    return results;
  }
  /**
   * By acquiring a log sequence ID, we can allow log messages to continue while
   * we flush the cache.
@ -968,10 +1028,10 @@ public class HLog implements HConstants, Syncable {
      }
      synchronized (updateLock) {
        long now = System.currentTimeMillis();
-        WALEdit edits = completeCacheFlushLogEdit();
+        WALEdit edit = completeCacheFlushLogEdit();
-        this.writer.append(new HLog.Entry(
+        HLogKey key = makeKey(regionName, tableName, logSeqId,
-          makeKey(regionName, tableName, logSeqId, System.currentTimeMillis()),
+            System.currentTimeMillis());
-          edits));
+        this.writer.append(new Entry(key, edit));
        writeTime += System.currentTimeMillis() - now;
        writeOps++;
        this.numEntries.incrementAndGet();
@ -1024,11 +1084,12 @@ public class HLog implements HConstants, Syncable {
   *                <code>${ROOTDIR}/log_HOST_PORT</code>
   * @param oldLogDir
   * @param fs FileSystem
-   * @param conf HBaseConfiguration
+   * @param conf Configuration
   * @throws IOException
   */
  public static List<Path> splitLog(final Path rootDir, final Path srcDir,
-    Path oldLogDir, final FileSystem fs, final Configuration conf) throws IOException {
+    Path oldLogDir, final FileSystem fs, final Configuration conf)
  throws IOException {
    long millis = System.currentTimeMillis();
    List<Path> splits = null;
@ -1104,7 +1165,8 @@ public class HLog implements HConstants, Syncable {
   */
  private static List<Path> splitLog(final Path rootDir,
    Path oldLogDir, final FileStatus[] logfiles, final FileSystem fs,
-    final Configuration conf) throws IOException {
+    final Configuration conf)
  throws IOException {
    final Map<byte [], WriterAndPath> logWriters =
      Collections.synchronizedMap(
        new TreeMap<byte [], WriterAndPath>(Bytes.BYTES_COMPARATOR));
@ -1115,35 +1177,51 @@ public class HLog implements HConstants, Syncable {
    int logWriterThreads =
      conf.getInt("hbase.regionserver.hlog.splitlog.writer.threads", 3);
-    // Number of logs to read concurrently when log splitting.
+    // Number of logs to read into memory before writing to their appropriate
-    // More means faster but bigger mem consumption  */
+    // regions when log splitting.  More means faster but bigger mem consumption
-    int concurrentLogReads =
+    int logFilesPerStep =
      conf.getInt("hbase.regionserver.hlog.splitlog.reader.threads", 3);
-    // Is append supported?
+
    // append support = we can avoid data loss (yay)
    // we open for append, then close to recover the correct file length
    final boolean appendSupport = isAppend(conf);
    // store corrupt logs for post-mortem analysis (empty string = discard)
    final String corruptDir =
      conf.get("hbase.regionserver.hlog.splitlog.corrupt.dir", ".corrupt");
    List<Path> finishedFiles = new LinkedList<Path>();
    List<Path> corruptFiles = new LinkedList<Path>();
    try {
      int maxSteps = Double.valueOf(Math.ceil((logfiles.length * 1.0) /
-          concurrentLogReads)).intValue();
+          logFilesPerStep)).intValue();
      for (int step = 0; step < maxSteps; step++) {
        // Step 1: read N log files into memory
        final Map<byte[], LinkedList<HLog.Entry>> logEntries =
          new TreeMap<byte[], LinkedList<HLog.Entry>>(Bytes.BYTES_COMPARATOR);
        // Stop at logfiles.length when it's the last step
        int endIndex = step == maxSteps - 1? logfiles.length:
-          step * concurrentLogReads + concurrentLogReads;
+          step * logFilesPerStep + logFilesPerStep;
-        for (int i = (step * concurrentLogReads); i < endIndex; i++) {
+        for (int i = (step * logFilesPerStep); i < endIndex; i++) {
-          // Check for possibly empty file. With appends, currently Hadoop
+          Path curLogFile = logfiles[i].getPath();
-          // reports a zero length even if the file has been sync'd. Revisit if
+
-          // HADOOP-4751 is committed.
+          // make sure we get the right file length before opening for read
-          long length = logfiles[i].getLen();
+          recoverLog(fs, curLogFile, appendSupport);
          long length = fs.getFileStatus(curLogFile).getLen();
          if (LOG.isDebugEnabled()) {
            LOG.debug("Splitting hlog " + (i + 1) + " of " + logfiles.length +
-              ": " + logfiles[i].getPath() + ", length=" + logfiles[i].getLen());
+              ": " + curLogFile + ", length=" + length);
          }
          Reader in = null;
          boolean cleanRead = false;
          int count = 0;
          try {
-            in = HLog.getReader(fs, logfiles[i].getPath(), conf);
+            in = HLog.getReader(fs, curLogFile, conf);
            try {
-              HLog.Entry entry;
+              Entry entry;
              while ((entry = in.next()) != null) {
                byte [] regionName = entry.getKey().getRegionName();
                LinkedList<HLog.Entry> queue = logEntries.get(regionName);
@ -1155,20 +1233,24 @@ public class HLog implements HConstants, Syncable {
                queue.push(entry);
                count++;
              }
-              LOG.debug("Pushed=" + count + " entries from " +
+              LOG.debug("Pushed=" + count + " entries from " + curLogFile);
-                logfiles[i].getPath());
+              cleanRead = true;
            } catch (IOException e) {
-              LOG.debug("IOE Pushed=" + count + " entries from " +
+              LOG.debug("IOE Pushed=" + count + " entries from " + curLogFile);
                logfiles[i].getPath());
              e = RemoteExceptionHandler.checkIOException(e);
              if (!(e instanceof EOFException)) {
-                LOG.warn("Exception processing " + logfiles[i].getPath() +
+                String msg = "Exception processing " + curLogFile +
-                    " -- continuing. Possible DATA LOSS!", e);
+                             " -- continuing. Possible DATA LOSS!";
                if (corruptDir.length() > 0) {
                  msg += "  Storing in hlog corruption directory.";
                }
                LOG.warn(msg, e);
              }
            }
          } catch (IOException e) {
            if (length <= 0) {
-              LOG.warn("Empty hlog, continuing: " + logfiles[i] + " count=" + count, e);
+              LOG.warn("Empty hlog, continuing: " + logfiles[i]);
              cleanRead = true;
              continue;
            }
            throw e;
@ -1178,8 +1260,16 @@ public class HLog implements HConstants, Syncable {
                in.close();
              }
            } catch (IOException e) {
-              LOG.warn("Close in finally threw exception -- continuing", e);
+              LOG.warn("File.close() threw exception -- continuing, "
                     + "but marking file as corrupt.", e);
              cleanRead = false;
            }
            if (cleanRead) {
              finishedFiles.add(curLogFile);
            } else {
              corruptFiles.add(curLogFile);
            }
            /* TODO FOR J-D REVIEW
            // Archive the input file now so we do not replay edits. We could
            // have gotten here because of an exception. If so, probably
            // nothing we can do about it. Replaying it, it could work but we
@ -1187,85 +1277,89 @@ public class HLog implements HConstants, Syncable {
            // could have lost some edits.
            fs.rename(logfiles[i].getPath(),
                getHLogArchivePath(oldLogDir, logfiles[i].getPath()));
            */
          }
        }
        // Step 2: Some regionserver log files have been read into memory.
        //         Assign them to the appropriate region directory.
        class ThreadWithException extends Thread {
          ThreadWithException(String name) { super(name); }
          public IOException exception = null;
        }
        List<ThreadWithException> threadList =
          new ArrayList<ThreadWithException>(logEntries.size());
        ExecutorService threadPool =
          Executors.newFixedThreadPool(logWriterThreads);
-        for (final byte[] key : logEntries.keySet()) {
+        for (final byte [] region: logEntries.keySet()) {
-          Thread thread = new Thread(Bytes.toStringBinary(key)) {
+          ThreadWithException thread =
              new ThreadWithException(Bytes.toStringBinary(region)) {
            @Override
            public void run() {
-              LinkedList<HLog.Entry> entries = logEntries.get(key);
+              LinkedList<HLog.Entry> entries = logEntries.get(region);
              LOG.debug("Thread got " + entries.size() + " to process");
              if(entries.size() <= 0) {
                LOG.warn("Got a region with no entries to process.");
                return;
              }
              long threadTime = System.currentTimeMillis();
              try {
                int count = 0;
-                // Items were added to the linkedlist oldest first. Pull them
+                // get the logfile associated with this region.  2 logs often
-                // out in that order.
+                // write to the same region, so persist this info across logs
-                for (ListIterator<HLog.Entry> i =
+                WriterAndPath wap = logWriters.get(region);
                  entries.listIterator(entries.size());
                    i.hasPrevious();) {
                  HLog.Entry logEntry = i.previous();
                  WriterAndPath wap = logWriters.get(key);
                if (wap == null) {
                  // first write to this region, make new logfile
                  assert entries.size() > 0;
                  Path logfile = new Path(HRegion.getRegionDir(HTableDescriptor
-                        .getTableDir(rootDir, logEntry.getKey().getTablename()),
+                      .getTableDir(rootDir,
-                        HRegionInfo.encodeRegionName(key)),
+                                   entries.getFirst().getKey().getTablename()),
                      HRegionInfo.encodeRegionName(region)),
                      HREGION_OLDLOGFILE_NAME);
-                    Path oldlogfile = null;
+
-                    Reader old = null;
+                  // If splitLog() was running when the user restarted his
                  // cluster, then we could already have a 'logfile'.
                  // Since we don't delete logs until everything is written to
                  // their respective regions, we can safely remove this tmp.
                  if (fs.exists(logfile)) {
-                      FileStatus stat = fs.getFileStatus(logfile);
+                    LOG.warn("Deleting old hlog file: " + logfile);
-                      if (stat.getLen() <= 0) {
+                    // TODO: Archive? 
-                        LOG.warn("Old hlog file " + logfile + " is zero " +
+                    fs.delete(logfile, true);
                          "length. Deleting existing file");
                        fs.delete(logfile, false);
                      } else {
                        LOG.warn("Old hlog file " + logfile + " already " +
                          "exists. Copying existing file to new file");
                        oldlogfile = new Path(logfile.toString() + ".old");
                        fs.rename(logfile, oldlogfile);
                        old = getReader(fs, oldlogfile, conf);
                      }
                    }
                    Writer w = createWriter(fs, logfile, conf);
                    wap = new WriterAndPath(logfile, w);
                    logWriters.put(key, wap);
                    if (LOG.isDebugEnabled()) {
                      LOG.debug("Creating new hlog file writer for path "
                          + logfile + " and region " + Bytes.toStringBinary(key));
                  }
-                    if (old != null) {
+                  // associate an OutputStream with this logfile
-                      // Copy from existing log file
+                  Writer w = createWriter(fs, logfile, conf);
-                      HLog.Entry entry;
+                  wap = new WriterAndPath(logfile, w);
-                      for (; (entry = old.next()) != null; count++) {
+                  logWriters.put(region, wap);
-                        if (LOG.isDebugEnabled() && count > 0
+                  if (LOG.isDebugEnabled()) {
-                            && count % 10000 == 0) {
+                    LOG.debug("Creating new hlog file writer for path "
-                          LOG.debug("Copied " + count + " edits");
+                        + logfile + " and region " + Bytes.toStringBinary(region));
                        }
                        w.append(entry);
                      }
                      old.close();
                      fs.delete(oldlogfile, true);
                  }
                }
-                  wap.w.append(logEntry);
+
                // Items were added to the linkedlist oldest first. Pull them
                // out in that order.
                for (ListIterator<HLog.Entry> i = entries.listIterator(entries.size());
                    i.hasPrevious();) {
                  wap.w.append(i.previous());
                  count++;
                }
                if (LOG.isDebugEnabled()) {
                  LOG.debug("Applied " + count + " total edits to "
-                      + Bytes.toStringBinary(key) + " in "
+                      + Bytes.toStringBinary(region) + " in "
                      + (System.currentTimeMillis() - threadTime) + "ms");
                }
              } catch (IOException e) {
                e = RemoteExceptionHandler.checkIOException(e);
-                LOG.warn("Got while writing region " + Bytes.toStringBinary(key)
+                LOG.warn("Got while writing region "
-                    + " log " + e);
+                    + Bytes.toStringBinary(region) + " log " + e);
                e.printStackTrace();
                exception = e;
              }
            }
          };
          threadList.add(thread);
          threadPool.execute(thread);
        }
        threadPool.shutdown();
@ -1274,9 +1368,19 @@ public class HLog implements HConstants, Syncable {
          for(int i = 0; !threadPool.awaitTermination(5, TimeUnit.SECONDS); i++) {
            LOG.debug("Waiting for hlog writers to terminate, iteration #" + i);
          }
-        }catch(InterruptedException ex) {
+        } catch (InterruptedException ex) {
-          LOG.warn("Hlog writers were interrupted, possible data loss!");
+          LOG.warn("Hlog writers were interrupted during splitLog().  "
              +"Retaining log files to avoid data loss.");
          throw new IOException(ex.getMessage(), ex.getCause());
        }
        // throw an exception if one of the threads reported one
        for (ThreadWithException t : threadList) {
          if (t.exception != null) {
            throw t.exception;
          }
        }
        // End of for loop. Rinse and repeat
      }
    } finally {
      splits = new ArrayList<Path>(logWriters.size());
@ -1286,9 +1390,79 @@ public class HLog implements HConstants, Syncable {
        splits.add(wap.p);
      }
    }
    // Step 3: All writes succeeded!  Get rid of the now-unnecessary logs
    for(Path p : finishedFiles) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Successfully split Hlog file.  Deleting " + p);
      }
      try {
        if (!fs.delete(p, true) && LOG.isDebugEnabled()) {
          LOG.debug("Delete of split Hlog (" + p + ") failed.");
        }
      } catch (IOException e) {
        // don't throw error here. worst case = double-read
        LOG.warn("Error deleting successfully split Hlog (" + p + ") -- " + e);
      }
    }
    for (Path p : corruptFiles) {
      if (corruptDir.length() > 0) {
        // store any corrupt logs for later analysis
        Path cp = new Path(conf.get(HBASE_DIR), corruptDir);
        if(!fs.exists(cp)) {
          fs.mkdirs(cp);
        }
        Path newp = new Path(cp, p.getName());
        if (!fs.exists(newp)) {
          if (!fs.rename(p, newp)) {
            LOG.warn("Rename of " + p + " to " + newp + " failed.");
          } else {
            LOG.warn("Corrupt Hlog (" + p + ") moved to " + newp);
          }
        } else {
          LOG.warn("Corrupt Hlog (" + p + ") already moved to " + newp +
                   ".  Ignoring");
        }
      } else {
        // data loss is less important than disk space, delete
        try {
          if (!fs.delete(p, true) ) {
            if (LOG.isDebugEnabled()) {
              LOG.debug("Delete of split Hlog " + p + " failed.");
            }
          } else {
            LOG.warn("Corrupt Hlog (" + p + ") deleted!");
          }
        } catch (IOException e) {
          LOG.warn("Error deleting corrupt Hlog (" + p + ") -- " + e);
        }
      }
    }
    return splits;
  }
  /*
   * @param conf
   * @return True if append enabled and we have the syncFs in our path.
   */
  static boolean isAppend(final Configuration conf) {
    boolean append = conf.getBoolean("dfs.support.append", false);
    if (append) {
      try {
        // TODO: The implementation that comes back when we do a createWriter
        // may not be using SequenceFile so the below is not a definitive test.
        // Will do for now (hdfs-200).
        SequenceFile.Writer.class.getMethod("syncFs", new Class<?> []{});
        append = true;
      } catch (SecurityException e) {
      } catch (NoSuchMethodException e) {
        append = false;
      }
    }
    return append;
  }
  /**
   * Utility class that lets us keep track of the edit with it's key
   * Only used when splitting logs
@ -1345,6 +1519,64 @@ public class HLog implements HConstants, Syncable {
    }
  }
  /*
   * Recover log.
   * Try and open log in append mode.
   * Doing this, we get a hold of the file that crashed writer
   * was writing to.  Once we have it, close it.  This will
   * allow subsequent reader to see up to last sync.
   * @param fs
   * @param p
   * @param append
   */
  public static void recoverLog(final FileSystem fs, final Path p,
      final boolean append) throws IOException {
    if (!append) {
      return;
    }
    // lease recovery not needed for local file system case.
    // currently, local file system doesn't implement append either.
    if (!(fs instanceof DistributedFileSystem)) {
      return;
    }
    LOG.debug("Recovering DFS lease for path " + p);
    long startWaiting = System.currentTimeMillis();
    // Trying recovery
    boolean recovered = false;
    while (!recovered) {
      try {
        FSDataOutputStream out = fs.append(p);
        out.close();
        recovered = true;
      } catch (IOException e) {
        e = RemoteExceptionHandler.checkIOException(e);
        if (e instanceof AlreadyBeingCreatedException) {
          // We expect that we'll get this message while the lease is still
          // within its soft limit, but if we get it past that, it means
          // that the RS is holding onto the file even though it lost its
          // znode. We could potentially abort after some time here.
          long waitedFor = System.currentTimeMillis() - startWaiting;
          if (waitedFor > FSConstants.LEASE_SOFTLIMIT_PERIOD) {
            LOG.warn("Waited " + waitedFor + "ms for lease recovery on " + p
            + ":" + e.getMessage());
          }
          try {
            Thread.sleep(1000);
          } catch (InterruptedException ex) {
            // ignore it and try again
          }
        } else {
          throw new IOException("Failed to open " + p + " for append", e);
        }
      }
    }
    LOG.info("Past out lease recovery");
  }
  /**
   * Construct the HLog directory name
   *
--- a/core/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceFileLogWriter.java
+++ b/core/src/main/java/org/apache/hadoop/hbase/regionserver/wal/SequenceFileLogWriter.java
@ -21,29 +21,42 @@
 package org.apache.hadoop.hbase.regionserver.wal;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.lang.reflect.Field;
 import java.lang.reflect.Method;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.regionserver.wal.HLog;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.SequenceFile.Metadata;
 import org.apache.hadoop.io.compress.DefaultCodec;
 /**
 * Implementation of {@link HLog.Writer} that delegates to
 * {@link SequenceFile.Writer}.
 */
 public class SequenceFileLogWriter implements HLog.Writer {
  private final Log LOG = LogFactory.getLog(this.getClass());
  // The sequence file we delegate to.
  private SequenceFile.Writer writer;
  // The dfsclient out stream gotten made accessible or null if not available.
  private OutputStream dfsClient_out;
  // The syncFs method from hdfs-200 or null if not available.
  private Method syncFs;
-  SequenceFile.Writer writer;
+  public SequenceFileLogWriter() {
-  FSDataOutputStream writer_out;
+    super();
-
+  }
  public SequenceFileLogWriter() { }
  @Override
  public void init(FileSystem fs, Path path, Configuration conf)
      throws IOException {
-    writer = SequenceFile.createWriter(fs, conf, path,
+    // Create a SF.Writer instance.
    this.writer = SequenceFile.createWriter(fs, conf, path,
      HLog.getKeyClass(conf), WALEdit.class,
      fs.getConf().getInt("io.file.buffer.size", 4096),
      (short) conf.getInt("hbase.regionserver.hlog.replication",
@ -58,19 +71,39 @@ public class SequenceFileLogWriter implements HLog.Writer {
    // Get at the private FSDataOutputStream inside in SequenceFile so we can
    // call sync on it.  Make it accessible.  Stash it aside for call up in
    // the sync method.
-    final Field fields[] = writer.getClass().getDeclaredFields();
+    final Field fields [] = this.writer.getClass().getDeclaredFields();
    final String fieldName = "out";
    for (int i = 0; i < fields.length; ++i) {
      if (fieldName.equals(fields[i].getName())) {
        try {
          // Make the 'out' field up in SF.Writer accessible.
          fields[i].setAccessible(true);
-          this.writer_out = (FSDataOutputStream)fields[i].get(writer);
+          FSDataOutputStream out =
            (FSDataOutputStream)fields[i].get(this.writer);
          this.dfsClient_out = out.getWrappedStream();
          break;
        } catch (IllegalAccessException ex) {
          throw new IOException("Accessing " + fieldName, ex);
        }
      }
    }
    // Now do dirty work to see if syncFs is available.
    // Test if syncfs is available.
    Method m = null;
    if (conf.getBoolean("dfs.support.append", false)) {
      try {
        // function pointer to writer.syncFs()
        m = this.writer.getClass().getMethod("syncFs", new Class<?> []{});
      } catch (SecurityException e) {
        throw new IOException("Failed test for syncfs", e);
      } catch (NoSuchMethodException e) {
        // Not available
      }
    }
    this.syncFs = m;
    LOG.info((this.syncFs != null)?
      "Using syncFs -- HDFS-200": "syncFs -- HDFS-200 -- not available");
  }
  @Override
@ -86,12 +119,25 @@ public class SequenceFileLogWriter implements HLog.Writer {
  @Override
  public void sync() throws IOException {
    this.writer.sync();
-
+    if (this.syncFs != null) {
-    this.writer.syncFs();
+      try {
       this.syncFs.invoke(this.writer, HLog.NO_ARGS);
      } catch (Exception e) {
        throw new IOException("Reflection", e);
      }
    }
  }
  @Override
  public long getLength() throws IOException {
    return this.writer.getLength();
  }
  /**
   * @return The dfsclient out stream up inside SF.Writer made accessible, or
   * null if not available.
   */
  public OutputStream getDFSCOutputStream() {
    return this.dfsClient_out;
  }
 }
--- a/core/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
+++ b/core/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java
@ -28,15 +28,14 @@ import java.util.concurrent.ConcurrentHashMap;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.hbase.client.HConnectionManager;
 import org.apache.hadoop.hbase.master.HMaster;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.HRegionServer;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.util.JVMClusterUtil;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.io.MapWritable;
 import org.apache.hadoop.security.UnixUserGroupInformation;
@ -129,6 +128,7 @@ public class MiniHBaseCluster implements HConstants {
   */
  public static class MiniHBaseClusterRegionServer extends HRegionServer {
    private static int index = 0;
    private Thread shutdownThread = null;
    public MiniHBaseClusterRegionServer(Configuration conf)
        throws IOException {
@ -159,11 +159,20 @@ public class MiniHBaseCluster implements HConstants {
    @Override
    protected void init(MapWritable c) throws IOException {
      super.init(c);
-      // Change shutdown hook to only shutdown the FileSystem added above by
+      // Run this thread to shutdown our filesystem on way out.
-      // {@link #getFileSystem(HBaseConfiguration)
+      this.shutdownThread = new SingleFileSystemShutdownThread(getFileSystem());
-      if (getFileSystem() instanceof DistributedFileSystem) {
+    }
-        Thread t = new SingleFileSystemShutdownThread(getFileSystem());
+
-        Runtime.getRuntime().addShutdownHook(t);
+    @Override
    public void run() {
      try {
        super.run();
      } finally {
        // Run this on the way out.
        if (this.shutdownThread != null) {
          this.shutdownThread.start();
          Threads.shutdown(this.shutdownThread, 30000);
        }
      }
    }
--- a/core/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java
+++ b/core/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestHLog.java
@ -19,6 +19,11 @@
 */
 package org.apache.hadoop.hbase.regionserver.wal;
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.Path;
@ -27,15 +32,10 @@ import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.HRegionInfo;
 import org.apache.hadoop.hbase.HTableDescriptor;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.regionserver.wal.HLog.Reader;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 /** JUnit test case for HLog */
 public class TestHLog extends HBaseTestCase implements HConstants {
  private Path dir;
@ -322,4 +322,70 @@ public class TestHLog extends HBaseTestCase implements HConstants {
      }
    }
  }
  /**
   * @throws IOException
   */
  public void testAppend() throws IOException {
    final int COL_COUNT = 10;
    final byte [] tableName = Bytes.toBytes("tablename");
    final byte [] row = Bytes.toBytes("row");
    this.conf.setBoolean("dfs.support.append", true);
    Reader reader = null;
    HLog log = new HLog(this.fs, dir, this.oldLogDir, this.conf, null);
    try {
      // Write columns named 1, 2, 3, etc. and then values of single byte
      // 1, 2, 3...
      long timestamp = System.currentTimeMillis();
      WALEdit cols = new WALEdit();
      for (int i = 0; i < COL_COUNT; i++) {
        cols.add(new KeyValue(row, Bytes.toBytes("column"),
          Bytes.toBytes(Integer.toString(i)),
          timestamp, new byte[] { (byte)(i + '0') }));
      }
      HRegionInfo hri = new HRegionInfo(new HTableDescriptor(tableName),
          HConstants.EMPTY_START_ROW, HConstants.EMPTY_END_ROW);
      log.append(hri, tableName, cols, System.currentTimeMillis());
      long logSeqId = log.startCacheFlush();
      log.completeCacheFlush(hri.getRegionName(), tableName, logSeqId, false);
      log.close();
      Path filename = log.computeFilename(log.getFilenum());
      log = null;
      // Now open a reader on the log and assert append worked.
      reader = HLog.getReader(fs, filename, conf);
      HLog.Entry entry = reader.next();
      assertEquals(COL_COUNT, entry.getEdit().size());
      int idx = 0;
      for (KeyValue val : entry.getEdit().getKeyValues()) {
        assertTrue(Bytes.equals(hri.getRegionName(),
          entry.getKey().getRegionName()));
        assertTrue(Bytes.equals(tableName, entry.getKey().getTablename()));
        assertTrue(Bytes.equals(row, val.getRow()));
        assertEquals((byte)(idx + '0'), val.getValue()[0]);
        System.out.println(entry.getKey() + " " + val);
        idx++;
      }
      // Get next row... the meta flushed row.
      entry = reader.next();
      assertEquals(1, entry.getEdit().size());
      for (KeyValue val : entry.getEdit().getKeyValues()) {
        assertTrue(Bytes.equals(hri.getRegionName(),
          entry.getKey().getRegionName()));
        assertTrue(Bytes.equals(tableName, entry.getKey().getTablename()));
        assertTrue(Bytes.equals(HLog.METAROW, val.getRow()));
        assertTrue(Bytes.equals(HLog.METAFAMILY, val.getFamily()));
        assertEquals(0, Bytes.compareTo(HLog.COMPLETE_CACHE_FLUSH,
          val.getValue()));
        System.out.println(entry.getKey() + " " + val);
      }
    } finally {
      if (log != null) {
        log.closeAndDelete();
      }
      if (reader != null) {
        reader.close();
      }
    }
  }
 }