HBASE-5542 Unify HRegion.mutateRowsWithLocks() and HRegion.processRow() (Scott Chen) part 2

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1303920 13f79535-47bb-0310-9956-ffa450edef68
2012-03-22 17:51:37 +00:00 · 2012-03-22 17:51:37 +00:00 · 536ff21825
parent 6fb055da00
commit 536ff21825
8 changed files with 1478 additions and 343 deletions
--- a/src/main/java/org/apache/hadoop/hbase/coprocessor/BaseRowProcessorEndpoint.java
+++ b/src/main/java/org/apache/hadoop/hbase/coprocessor/BaseRowProcessorEndpoint.java
@ -0,0 +1,57 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hadoop.hbase.coprocessor;
 import java.io.IOException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.RowProcessor;
 /**
 * This class demonstrates how to implement atomic read-modify-writes
 * using {@link HRegion#processRowsWithLocks()} and Coprocessor endpoints.
 */
@InterfaceAudience.Public
@InterfaceStability.Evolving
 public abstract class BaseRowProcessorEndpoint extends BaseEndpointCoprocessor
    implements RowProcessorProtocol {
  /**
   * Pass a processor to HRegion to process multiple rows atomically.
   * 
   * The RowProcessor implementations should be the inner classes of your
   * RowProcessorEndpoint. This way the RowProcessor can be class-loaded with
   * the Coprocessor endpoint together.
   *
   * See {@link TestRowProcessorEndpoint} for example.
   *
   * @param processor The object defines the read-modify-write procedure
   * @return The processing result
   */
  @Override
  public <T> T process(RowProcessor<T> processor)
      throws IOException {
    HRegion region =
        ((RegionCoprocessorEnvironment) getEnvironment()).getRegion();
    region.processRowsWithLocks(processor);
    return processor.getResult();
  }
 }
--- a/src/main/java/org/apache/hadoop/hbase/coprocessor/RowProcessorProtocol.java
+++ b/src/main/java/org/apache/hadoop/hbase/coprocessor/RowProcessorProtocol.java
@ -0,0 +1,41 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hadoop.hbase.coprocessor;
 import java.io.IOException;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.hbase.ipc.CoprocessorProtocol;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.RowProcessor;
 /**
 * Defines a protocol to perform multi row transactions.
 * See {@link BaseRowProcessorEndpoint} for the implementation.
 * See {@link HRegion#processRowsWithLocks()} for detials.
 */
@InterfaceAudience.Public
@InterfaceStability.Evolving
 public interface RowProcessorProtocol extends CoprocessorProtocol {
  /**
   * @param processor The processor defines how to process the row
   */
  <T> T process(RowProcessor<T> processor) throws IOException;
 }
--- a/src/main/java/org/apache/hadoop/hbase/regionserver/BaseRowProcessor.java
+++ b/src/main/java/org/apache/hadoop/hbase/regionserver/BaseRowProcessor.java
@ -0,0 +1,49 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hadoop.hbase.regionserver;
 import java.io.IOException;
 import java.util.UUID;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
 /**
 * Base class for RowProcessor with some default implementations.
 */
 public abstract class BaseRowProcessor<T> implements RowProcessor<T> {
  @Override
  public T getResult() {
    return null;
  }
  @Override
  public void preProcess(HRegion region, WALEdit walEdit) throws IOException {
  }
  @Override
  public void postProcess(HRegion region, WALEdit walEdit) throws IOException {
  }
  @Override
  public UUID getClusterId() {
    return HConstants.DEFAULT_CLUSTER_ID;
  }
 }
--- a/src/main/java/org/apache/hadoop/hbase/regionserver/MultiRowMutationProcessor.java
+++ b/src/main/java/org/apache/hadoop/hbase/regionserver/MultiRowMutationProcessor.java
@ -0,0 +1,126 @@
 /*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hadoop.hbase.regionserver;
 import java.io.IOException;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import org.apache.hadoop.hbase.DoNotRetryIOException;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.client.Delete;
 import org.apache.hadoop.hbase.client.Mutation;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
 import org.apache.hadoop.hbase.util.Bytes;
 /**
 * A <code>MultiRowProcessor</code> that performs multiple puts and deletes.
 */
 class MultiRowMutationProcessor extends BaseRowProcessor<Void> {
  Collection<byte[]> rowsToLock;
  Collection<Mutation> mutations;
  MultiRowMutationProcessor(Collection<Mutation> mutations,
                            Collection<byte[]> rowsToLock) {
    this.rowsToLock = rowsToLock;
    this.mutations = mutations;
  }
  @Override
  public Collection<byte[]> getRowsToLock() {
    return rowsToLock;
  }
  @Override
  public boolean readOnly() {
    return false;
  }
  @Override
  public void process(long now,
                      HRegion region,
                      List<KeyValue> mutationKvs,
                      WALEdit walEdit) throws IOException {
    byte[] byteNow = Bytes.toBytes(now);
    // Check mutations and apply edits to a single WALEdit
    for (Mutation m : mutations) {
      if (m instanceof Put) {
        Map<byte[], List<KeyValue>> familyMap = m.getFamilyMap();
        region.checkFamilies(familyMap.keySet());
        region.checkTimestamps(familyMap, now);
        region.updateKVTimestamps(familyMap.values(), byteNow);
      } else if (m instanceof Delete) {
        Delete d = (Delete) m;
        region.prepareDelete(d);
        region.prepareDeleteTimestamps(d, byteNow);
      } else {
        throw new DoNotRetryIOException(
            "Action must be Put or Delete. But was: "
            + m.getClass().getName());
      }
      for (List<KeyValue> edits : m.getFamilyMap().values()) {
        boolean writeToWAL = m.getWriteToWAL();
        for (KeyValue kv : edits) {
          mutationKvs.add(kv);
          if (writeToWAL) {
            walEdit.add(kv);
          }
        }
      }
    }
  }
  @Override
  public void preProcess(HRegion region, WALEdit walEdit) throws IOException {
    RegionCoprocessorHost coprocessorHost = region.getCoprocessorHost();
    if (coprocessorHost != null) {
      for (Mutation m : mutations) {
        if (m instanceof Put) {
          if (coprocessorHost.prePut((Put) m, walEdit, m.getWriteToWAL())) {
            // by pass everything
            return;
          }
        } else if (m instanceof Delete) {
          Delete d = (Delete) m;
          region.prepareDelete(d);
          if (coprocessorHost.preDelete(d, walEdit, d.getWriteToWAL())) {
            // by pass everything
            return;
          }
        }
      }
    }
  }
  @Override
  public void postProcess(HRegion region, WALEdit walEdit) throws IOException {
    RegionCoprocessorHost coprocessorHost = region.getCoprocessorHost();
    if (coprocessorHost != null) {
      for (Mutation m : mutations) {
        if (m instanceof Put) {
          coprocessorHost.postPut((Put) m, walEdit, m.getWriteToWAL());
        } else if (m instanceof Delete) {
          coprocessorHost.postDelete((Delete) m, walEdit, m.getWriteToWAL());
        }
      }
    }
  }
 }
--- a/src/main/java/org/apache/hadoop/hbase/regionserver/RowProcessor.java
+++ b/src/main/java/org/apache/hadoop/hbase/regionserver/RowProcessor.java
@ -15,30 +15,40 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.apache.hadoop.hbase.coprocessor;
+package org.apache.hadoop.hbase.regionserver;
 import java.io.IOException;
 import java.util.Collection;
 import java.util.List;
 import java.util.UUID;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
-import org.apache.hadoop.io.Writable;
+
@InterfaceAudience.Public
@InterfaceStability.Evolving
 /**
 * Defines the procedure to atomically perform multiple scans and mutations
- * on one single row. The generic type parameter T is the return type of
+ * on a HRegion.
 *
 * This is invoked by {@link HRegion#processRowsWithLocks()}.
 * This class performs scans and generates mutations and WAL edits.
 * The locks and MVCC will be handled by HRegion.
 *
 * The generic type parameter T is the return type of
 * RowProcessor.getResult().
 */
-@InterfaceAudience.Public
+public interface RowProcessor<T> {
 public interface RowProcessor<T> extends Writable {
  /**
-   * Which row to perform the read-write
+   * Rows to lock while operation.
   * They have to be sorted with <code>RowProcessor</code>
   * to avoid deadlock.
   */
-  byte[] getRow();
+  Collection<byte[]> getRowsToLock();
  /**
   * Obtain the processing result
@ -53,29 +63,40 @@ public interface RowProcessor<T> extends Writable {
  boolean readOnly();
  /**
-   * HRegion calls this to process a row. You should override this to create
+   * HRegion handles the locks and MVCC and invokes this method properly.
-   * your own RowProcessor.
+   * 
   * You should override this to create your own RowProcessor.
   *
   * If you are doing read-modify-write here, you should consider using
   * <code>IsolationLevel.READ_UNCOMMITTED</code> for scan because
   * we advance MVCC after releasing the locks for optimization purpose.
   *
   * @param now the current system millisecond
-   * @param scanner the call back object the can be used to scan the row
+   * @param region the HRegion
-   * @param mutations the mutations for HRegion to do
+   * @param mutations the output mutations to apply to memstore
-   * @param walEdit the wal edit here allows inject some other meta data
+   * @param walEdit the output WAL edits to apply to write ahead log
   */
  void process(long now,
-               RowProcessor.RowScanner scanner,
+               HRegion region,
               List<KeyValue> mutations,
               WALEdit walEdit) throws IOException;
  /**
-   * The call back provided by HRegion to perform the scans on the row
+   * The hook to be executed before process().
   *
   * @param region the HRegion
   * @param walEdit the output WAL edits to apply to write ahead log
   */
-  public interface RowScanner {
+  void preProcess(HRegion region, WALEdit walEdit) throws IOException;
-    /**
+
-     * @param scan The object defines what to read
+  /**
-     * @param result The scan results will be added here
+   * The hook to be executed after process().
-     */
+   *
-    void doScan(Scan scan, List<KeyValue> result) throws IOException;
+   * @param region the HRegion
-  }
+   * @param walEdit the output WAL edits to apply to write ahead log
   */
  void postProcess(HRegion region, WALEdit walEdit) throws IOException;
  /**
   * @return The replication cluster id.
--- a/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java.orig
+++ b/src/main/java/org/apache/hadoop/hbase/regionserver/SplitLogWorker.java.orig
@ -0,0 +1,564 @@
 /**
 * Copyright 2011 The Apache Software Foundation
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hadoop.hbase.regionserver;
 import static org.apache.hadoop.hbase.zookeeper.ZKSplitLog.Counters.*;
 import java.io.IOException;
 import java.io.InterruptedIOException;
 import java.util.List;
 import java.util.concurrent.atomic.AtomicLong;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.master.SplitLogManager;
 import org.apache.hadoop.hbase.regionserver.wal.HLogSplitter;
 import org.apache.hadoop.hbase.util.CancelableProgressable;
 import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.zookeeper.ZKSplitLog;
 import org.apache.hadoop.hbase.zookeeper.ZKSplitLog.TaskState;
 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.zookeeper.AsyncCallback;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.data.Stat;
 /**
 * This worker is spawned in every regionserver (should we also spawn one in
 * the master?). The Worker waits for log splitting tasks to be put up by the
 * {@link SplitLogManager} running in the master and races with other workers
 * in other serves to acquire those tasks. The coordination is done via
 * zookeeper. All the action takes place at /hbase/splitlog znode.
 * <p>
 * If a worker has successfully moved the task from state UNASSIGNED to
 * OWNED then it owns the task. It keeps heart beating the manager by
 * periodically moving the task from UNASSIGNED to OWNED state. On success it
 * moves the task to TASK_DONE. On unrecoverable error it moves task state to
 * ERR. If it cannot continue but wants the master to retry the task then it
 * moves the task state to RESIGNED.
 * <p>
 * The manager can take a task away from a worker by moving the task from
 * OWNED to UNASSIGNED. In the absence of a global lock there is a
 * unavoidable race here - a worker might have just finished its task when it
 * is stripped of its ownership. Here we rely on the idempotency of the log
 * splitting task for correctness
 */
@InterfaceAudience.Private
 public class SplitLogWorker extends ZooKeeperListener implements Runnable {
  private static final Log LOG = LogFactory.getLog(SplitLogWorker.class);
  Thread worker;
  private final String serverName;
  private final TaskExecutor splitTaskExecutor;
  private long zkretries;
  private Object taskReadyLock = new Object();
  volatile int taskReadySeq = 0;
  private volatile String currentTask = null;
  private int currentVersion;
  private volatile boolean exitWorker;
  private Object grabTaskLock = new Object();
  private boolean workerInGrabTask = false;
  public SplitLogWorker(ZooKeeperWatcher watcher, Configuration conf,
      String serverName, TaskExecutor splitTaskExecutor) {
    super(watcher);
    this.serverName = serverName;
    this.splitTaskExecutor = splitTaskExecutor;
    this.zkretries = conf.getLong("hbase.splitlog.zk.retries", 3);
  }
  public SplitLogWorker(ZooKeeperWatcher watcher, final Configuration conf,
      final String serverName) {
    this(watcher, conf, serverName, new TaskExecutor () {
      @Override
      public Status exec(String filename, CancelableProgressable p) {
        Path rootdir;
        FileSystem fs;
        try {
          rootdir = FSUtils.getRootDir(conf);
          fs = rootdir.getFileSystem(conf);
        } catch (IOException e) {
          LOG.warn("could not find root dir or fs", e);
          return Status.RESIGNED;
        }
        // TODO have to correctly figure out when log splitting has been
        // interrupted or has encountered a transient error and when it has
        // encountered a bad non-retry-able persistent error.
        try {
          String tmpname =
            ZKSplitLog.getSplitLogDirTmpComponent(serverName, filename);
          if (HLogSplitter.splitLogFileToTemp(rootdir, tmpname,
              fs.getFileStatus(new Path(filename)), fs, conf, p) == false) {
            return Status.PREEMPTED;
          }
        } catch (InterruptedIOException iioe) {
          LOG.warn("log splitting of " + filename + " interrupted, resigning",
              iioe);
          return Status.RESIGNED;
        } catch (IOException e) {
          Throwable cause = e.getCause();
          if (cause instanceof InterruptedException) {
            LOG.warn("log splitting of " + filename + " interrupted, resigning",
                e);
            return Status.RESIGNED;
          }
          LOG.warn("log splitting of " + filename + " failed, returning error",
              e);
          return Status.ERR;
        }
        return Status.DONE;
      }
    });
  }
  @Override
  public void run() {
   try {
    LOG.info("SplitLogWorker " + this.serverName + " starting");
    this.watcher.registerListener(this);
    int res;
    // wait for master to create the splitLogZnode
    res = -1;
    while (res == -1) {
      try {
        res = ZKUtil.checkExists(watcher, watcher.splitLogZNode);
      } catch (KeeperException e) {
        // ignore
        LOG.warn("Exception when checking for " + watcher.splitLogZNode +
            " ... retrying", e);
      }
      if (res == -1) {
        try {
          LOG.info(watcher.splitLogZNode + " znode does not exist," +
              " waiting for master to create one");
          Thread.sleep(1000);
        } catch (InterruptedException e) {
          LOG.debug("Interrupted while waiting for " + watcher.splitLogZNode);
          assert exitWorker == true;
        }
      }
    }
    taskLoop();
   } catch (Throwable t) {
 	   // only a logical error can cause here. Printing it out 
 	   // to make debugging easier
 	   LOG.error("unexpected error ", t);
   } finally {
 	   LOG.info("SplitLogWorker " + this.serverName + " exiting");
   }
  }
  /**
   * Wait for tasks to become available at /hbase/splitlog zknode. Grab a task
   * one at a time. This policy puts an upper-limit on the number of
   * simultaneous log splitting that could be happening in a cluster.
   * <p>
   * Synchronization using {@link #task_ready_signal_seq} ensures that it will
   * try to grab every task that has been put up
   */
  private void taskLoop() {
    while (true) {
      int seq_start = taskReadySeq;
      List<String> paths = getTaskList();
      if (paths == null) {
        LOG.warn("Could not get tasks, did someone remove " +
            this.watcher.splitLogZNode + " ... worker thread exiting.");
        return;
      }
      int offset = (int)(Math.random() * paths.size());
      for (int i = 0; i < paths.size(); i ++) {
        int idx = (i + offset) % paths.size();
        // don't call ZKSplitLog.getNodeName() because that will lead to
        // double encoding of the path name
        grabTask(ZKUtil.joinZNode(watcher.splitLogZNode, paths.get(idx)));
        if (exitWorker == true) {
          return;
        }
      }
      synchronized (taskReadyLock) {
        while (seq_start == taskReadySeq) {
          try {
            taskReadyLock.wait();
          } catch (InterruptedException e) {
            LOG.info("SplitLogWorker interrupted while waiting for task," +
              " exiting: " + e.toString());
            assert exitWorker == true;
            return;
          }
        }
      }
    }
  }
  /**
   * try to grab a 'lock' on the task zk node to own and execute the task.
   * <p>
   * @param path zk node for the task
   */
  private void grabTask(String path) {
    Stat stat = new Stat();
    long t = -1;
    byte[] data;
    synchronized (grabTaskLock) {
      currentTask = path;
      workerInGrabTask = true;
      if (Thread.interrupted()) {
        return;
      }
    }
    try {
      try {
        if ((data = ZKUtil.getDataNoWatch(this.watcher, path, stat)) == null) {
          tot_wkr_failed_to_grab_task_no_data.incrementAndGet();
          return;
        }
      } catch (KeeperException e) {
        LOG.warn("Failed to get data for znode " + path, e);
        tot_wkr_failed_to_grab_task_exception.incrementAndGet();
        return;
      }
      if (TaskState.TASK_UNASSIGNED.equals(data) == false) {
        tot_wkr_failed_to_grab_task_owned.incrementAndGet();
        return;
      }
      currentVersion = stat.getVersion();
      if (attemptToOwnTask(true) == false) {
        tot_wkr_failed_to_grab_task_lost_race.incrementAndGet();
        return;
      }
      if (ZKSplitLog.isRescanNode(watcher, currentTask)) {
        endTask(TaskState.TASK_DONE, tot_wkr_task_acquired_rescan);
        return;
      }
      LOG.info("worker " + serverName + " acquired task " + path);
      tot_wkr_task_acquired.incrementAndGet();
      getDataSetWatchAsync();
      t = System.currentTimeMillis();
      TaskExecutor.Status status;
      status = splitTaskExecutor.exec(ZKSplitLog.getFileName(currentTask),
          new CancelableProgressable() {
        @Override
        public boolean progress() {
          if (attemptToOwnTask(false) == false) {
            LOG.warn("Failed to heartbeat the task" + currentTask);
            return false;
          }
          return true;
        }
      });
      switch (status) {
        case DONE:
          endTask(TaskState.TASK_DONE, tot_wkr_task_done);
          break;
        case PREEMPTED:
          tot_wkr_preempt_task.incrementAndGet();
          LOG.warn("task execution prempted " + path);
          break;
        case ERR:
          if (!exitWorker) {
            endTask(TaskState.TASK_ERR, tot_wkr_task_err);
            break;
          }
          // if the RS is exiting then there is probably a tons of stuff
          // that can go wrong. Resign instead of signaling error.
          //$FALL-THROUGH$
        case RESIGNED:
          if (exitWorker) {
            LOG.info("task execution interrupted because worker is exiting " +
                path);
            endTask(TaskState.TASK_RESIGNED, tot_wkr_task_resigned);
          } else {
            tot_wkr_preempt_task.incrementAndGet();
            LOG.info("task execution interrupted via zk by manager " +
                path);
          }
          break;
      }
    } finally {
      if (t > 0) {
        LOG.info("worker " + serverName + " done with task " + path +
            " in " + (System.currentTimeMillis() - t) + "ms");
      }
      synchronized (grabTaskLock) {
        workerInGrabTask = false;
        // clear the interrupt from stopTask() otherwise the next task will
        // suffer
        Thread.interrupted();
      }
    }
    return;
  }
  /**
   * Try to own the task by transitioning the zk node data from UNASSIGNED to
   * OWNED.
   * <p>
   * This method is also used to periodically heartbeat the task progress by
   * transitioning the node from OWNED to OWNED.
   * <p>
   * @return true if task path is successfully locked
   */
  private boolean attemptToOwnTask(boolean isFirstTime) {
    try {
      Stat stat = this.watcher.getRecoverableZooKeeper().setData(currentTask,
          TaskState.TASK_OWNED.get(serverName), currentVersion);
      if (stat == null) {
        LOG.warn("zk.setData() returned null for path " + currentTask);
        tot_wkr_task_heartbeat_failed.incrementAndGet();
        return (false);
      }
      currentVersion = stat.getVersion();
      tot_wkr_task_heartbeat.incrementAndGet();
      return (true);
    } catch (KeeperException e) {
      if (!isFirstTime) {
        if (e.code().equals(KeeperException.Code.NONODE)) {
          LOG.warn("NONODE failed to assert ownership for " + currentTask, e);
        } else if (e.code().equals(KeeperException.Code.BADVERSION)) {
          LOG.warn("BADVERSION failed to assert ownership for " +
              currentTask, e);
        } else {
          LOG.warn("failed to assert ownership for " + currentTask, e);
        }
      }
    } catch (InterruptedException e1) {
      LOG.warn("Interrupted while trying to assert ownership of " +
          currentTask + " " + StringUtils.stringifyException(e1));
      Thread.currentThread().interrupt();
    }
    tot_wkr_task_heartbeat_failed.incrementAndGet();
    return (false);
  }
  /**
   * endTask() can fail and the only way to recover out of it is for the
   * {@link SplitLogManager} to timeout the task node.
   * @param ts
   * @param ctr
   */
  private void endTask(ZKSplitLog.TaskState ts, AtomicLong ctr) {
    String path = currentTask;
    currentTask = null;
    try {
      if (ZKUtil.setData(this.watcher, path, ts.get(serverName),
          currentVersion)) {
        LOG.info("successfully transitioned task " + path +
            " to final state " + ts);
        ctr.incrementAndGet();
        return;
      }
      LOG.warn("failed to transistion task " + path + " to end state " + ts +
          " because of version mismatch ");
    } catch (KeeperException.BadVersionException bve) {
      LOG.warn("transisition task " + path + " to " + ts +
          " failed because of version mismatch", bve);
    } catch (KeeperException.NoNodeException e) {
      LOG.fatal("logic error - end task " + path + " " + ts +
          " failed because task doesn't exist", e);
    } catch (KeeperException e) {
      LOG.warn("failed to end task, " + path + " " + ts, e);
    }
    tot_wkr_final_transistion_failed.incrementAndGet();
    return;
  }
  void getDataSetWatchAsync() {
    this.watcher.getRecoverableZooKeeper().getZooKeeper().
      getData(currentTask, this.watcher,
      new GetDataAsyncCallback(), null);
    tot_wkr_get_data_queued.incrementAndGet();
  }
  void getDataSetWatchSuccess(String path, byte[] data) {
    synchronized (grabTaskLock) {
      if (workerInGrabTask) {
        // currentTask can change but that's ok
        String taskpath = currentTask;
        if (taskpath != null && taskpath.equals(path)) {
          // have to compare data. cannot compare version because then there
          // will be race with attemptToOwnTask()
          // cannot just check whether the node has been transitioned to
          // UNASSIGNED because by the time this worker sets the data watch
          // the node might have made two transitions - from owned by this
          // worker to unassigned to owned by another worker
          if (! TaskState.TASK_OWNED.equals(data, serverName) &&
              ! TaskState.TASK_DONE.equals(data, serverName) &&
              ! TaskState.TASK_ERR.equals(data, serverName) &&
              ! TaskState.TASK_RESIGNED.equals(data, serverName)) {
            LOG.info("task " + taskpath + " preempted from " +
                serverName + ", current task state and owner=" +
                new String(data));
            stopTask();
          }
        }
      }
    }
  }
  void getDataSetWatchFailure(String path) {
    synchronized (grabTaskLock) {
      if (workerInGrabTask) {
        // currentTask can change but that's ok
        String taskpath = currentTask;
        if (taskpath != null && taskpath.equals(path)) {
          LOG.info("retrying data watch on " + path);
          tot_wkr_get_data_retry.incrementAndGet();
          getDataSetWatchAsync();
        } else {
          // no point setting a watch on the task which this worker is not
          // working upon anymore
        }
      }
    }
  }
  @Override
  public void nodeDataChanged(String path) {
    // there will be a self generated dataChanged event every time attemptToOwnTask()
    // heartbeats the task znode by upping its version
    synchronized (grabTaskLock) {
      if (workerInGrabTask) {
        // currentTask can change
        String taskpath = currentTask;
        if (taskpath!= null && taskpath.equals(path)) {
          getDataSetWatchAsync();
        }
      }
    }
  }
  private List<String> getTaskList() {
    for (int i = 0; i < zkretries; i++) {
      try {
        return (ZKUtil.listChildrenAndWatchForNewChildren(this.watcher,
            this.watcher.splitLogZNode));
      } catch (KeeperException e) {
        LOG.warn("Could not get children of znode " +
            this.watcher.splitLogZNode, e);
        try {
          Thread.sleep(1000);
        } catch (InterruptedException e1) {
          LOG.warn("Interrupted while trying to get task list ...", e1);
          Thread.currentThread().interrupt();
          return null;
        }
      }
    }
    LOG.warn("Tried " + zkretries + " times, still couldn't fetch " +
        "children of " + watcher.splitLogZNode + " giving up");
    return null;
  }
  @Override
  public void nodeChildrenChanged(String path) {
    if(path.equals(watcher.splitLogZNode)) {
      LOG.debug("tasks arrived or departed");
      synchronized (taskReadyLock) {
        taskReadySeq++;
        taskReadyLock.notify();
      }
    }
  }
  /**
   * If the worker is doing a task i.e. splitting a log file then stop the task.
   * It doesn't exit the worker thread.
   */
  void stopTask() {
    LOG.info("Sending interrupt to stop the worker thread");
    worker.interrupt(); // TODO interrupt often gets swallowed, do what else?
  }
  /**
   * start the SplitLogWorker thread
   */
  public void start() {
    worker = new Thread(null, this, "SplitLogWorker-" + serverName);
    exitWorker = false;
    worker.start();
    return;
  }
  /**
   * stop the SplitLogWorker thread
   */
  public void stop() {
    exitWorker = true;
    stopTask();
  }
  /**
   * Asynchronous handler for zk get-data-set-watch on node results.
   */
  class GetDataAsyncCallback implements AsyncCallback.DataCallback {
    private final Log LOG = LogFactory.getLog(GetDataAsyncCallback.class);
    @Override
    public void processResult(int rc, String path, Object ctx, byte[] data,
        Stat stat) {
      tot_wkr_get_data_result.incrementAndGet();
      if (rc != 0) {
        LOG.warn("getdata rc = " + KeeperException.Code.get(rc) + " " + path);
        getDataSetWatchFailure(path);
        return;
      }
      data = watcher.getRecoverableZooKeeper().removeMetaData(data);
      getDataSetWatchSuccess(path, data);
      return;
    }
  }
  /**
   * Objects implementing this interface actually do the task that has been
   * acquired by a {@link SplitLogWorker}. Since there isn't a water-tight
   * guarantee that two workers will not be executing the same task therefore it
   * is better to have workers prepare the task and then have the
   * {@link SplitLogManager} commit the work in SplitLogManager.TaskFinisher
   */
  static public interface TaskExecutor {
    static public enum Status {
      DONE(),
      ERR(),
      RESIGNED(),
      PREEMPTED();
    }
    public Status exec(String name, CancelableProgressable p);
  }
 }
--- a/src/test/java/org/apache/hadoop/hbase/coprocessor/TestProcessRowEndpoint.java
+++ b/src/test/java/org/apache/hadoop/hbase/coprocessor/TestProcessRowEndpoint.java
@ -1,321 +0,0 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hadoop.hbase.coprocessor;
 import static org.junit.Assert.assertEquals;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.UUID;
 import java.util.concurrent.CountDownLatch;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.SmallTests;
 import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.ipc.CoprocessorProtocol;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.wal.HLog;
 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import com.sun.org.apache.commons.logging.Log;
 import com.sun.org.apache.commons.logging.LogFactory;
 /**
 * Verifies ProcessRowEndpoint works.
 * The tested RowProcessor performs two scans and a read-modify-write.
 */
@Category(SmallTests.class)
 public class TestProcessRowEndpoint {
  static final Log LOG = LogFactory.getLog(TestProcessRowEndpoint.class);
  private static final byte[] TABLE = Bytes.toBytes("testtable");
  private static final byte[] TABLE2 = Bytes.toBytes("testtable2");
  private final static byte[] ROW = Bytes.toBytes("testrow");
  private final static byte[] FAM = Bytes.toBytes("friendlist");
  // Column names
  private final static byte[] A = Bytes.toBytes("a");
  private final static byte[] B = Bytes.toBytes("b");
  private final static byte[] C = Bytes.toBytes("c");
  private final static byte[] D = Bytes.toBytes("d");
  private final static byte[] E = Bytes.toBytes("e");
  private final static byte[] F = Bytes.toBytes("f");
  private final static byte[] G = Bytes.toBytes("g");
  private final static byte[] REQUESTS = Bytes.toBytes("requests");
  private static HBaseTestingUtility util = new HBaseTestingUtility();
  private volatile int numRequests;
  private CountDownLatch startSignal;
  private CountDownLatch doneSignal;
  @BeforeClass
  public static void setupBeforeClass() throws Exception {
    Configuration conf = util.getConfiguration();
    conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
        FriendsOfFriendsEndpoint.class.getName());
    util.startMiniCluster();
  }
  @AfterClass
  public static void tearDownAfterClass() throws Exception {
    util.shutdownMiniCluster();
  }
  @Test
  public void testSingle() throws Throwable {
    HTable table = prepareTestData(TABLE, util);
    verifyProcessRow(table);
    assertEquals(1, numRequests);
  }
  private void verifyProcessRow(HTable table) throws Throwable {
    FriendsOfFriendsProtocol processor =
      table.coprocessorProxy(FriendsOfFriendsProtocol.class, ROW);
    Result result = processor.query(ROW, A);
    Set<String> friendsOfFriends = new HashSet<String>();
    for (KeyValue kv : result.raw()) {
      if (Bytes.equals(kv.getQualifier(), REQUESTS)) {
        numRequests = Bytes.toInt(kv.getValue());
        continue;
      }
      for (byte val : kv.getValue()) {
        friendsOfFriends.add((char)val + "");
      }
    }
    Set<String> expected =
      new HashSet<String>(Arrays.asList(new String[]{"d", "e", "f", "g"}));
    assertEquals(expected, friendsOfFriends);
  }
  @Test
  public void testThreads() throws Exception {
    HTable table = prepareTestData(TABLE2, util);
    int numThreads = 1000;
    startSignal = new CountDownLatch(numThreads);
    doneSignal = new CountDownLatch(numThreads);
    for (int i = 0; i < numThreads; ++i) {
      new Thread(new QueryRunner(table)).start();
      startSignal.countDown();
    }
    doneSignal.await();
    Get get = new Get(ROW);
    LOG.debug("row keyvalues:" + stringifyKvs(table.get(get).list()));
    assertEquals(numThreads, numRequests);
  }
  class QueryRunner implements Runnable {
    final HTable table;
    QueryRunner(final HTable table) {
      this.table = table;
    }
    @Override
    public void run() {
      try {
        startSignal.await();
        verifyProcessRow(table);
      } catch (Throwable e) {
        e.printStackTrace();
      }
      doneSignal.countDown();
    }
  }
  static HTable prepareTestData(byte[] tableName, HBaseTestingUtility util)
      throws Exception {
    HTable table = util.createTable(tableName, FAM);
    Put put = new Put(ROW);
    put.add(FAM, A, Bytes.add(B, C));    // B, C are friends of A
    put.add(FAM, B, Bytes.add(D, E, F)); // D, E, F are friends of B
    put.add(FAM, C, G);                  // G is a friend of C
    table.put(put);
    return table;
  }
  /**
   * Coprocessor protocol that finds friends of friends of a person and
   * update the number of requests.
   */
  public static interface FriendsOfFriendsProtocol extends CoprocessorProtocol {
    /**
     * Query a person's friends of friends
     */
    Result query(byte[] row, byte[] person) throws IOException;
  }
  /**
   * Finds friends of friends of a person and update the number of requests.
   */
  public static class FriendsOfFriendsEndpoint extends BaseEndpointCoprocessor
      implements FriendsOfFriendsProtocol, RowProcessor<Result> {
    byte[] row = null;
    byte[] person = null;
    Result result = null;
    //
    // FriendsOfFriendsProtocol method
    //
    @Override
    public Result query(byte[] row, byte[] person) throws IOException {
      this.row = row;
      this.person = person;
      HRegion region =
        ((RegionCoprocessorEnvironment) getEnvironment()).getRegion();
      region.processRow(this);
      return this.getResult();
    }
    //
    // RowProcessor methods
    //
    FriendsOfFriendsEndpoint() {
    }
    @Override
    public byte[] getRow() {
      return row;
    }
    @Override
    public Result getResult() {
      return result;
    }
    @Override
    public boolean readOnly() {
      return false;
    }
    @Override
    public void process(long now, RowProcessor.RowScanner scanner,
        List<KeyValue> mutations, WALEdit walEdit) throws IOException {
      List<KeyValue> kvs = new ArrayList<KeyValue>();
      { // First scan to get friends of the person and numRequests
        Scan scan = new Scan(row, row);
        scan.addColumn(FAM, person);
        scan.addColumn(FAM, REQUESTS);
        scanner.doScan(scan, kvs);
      }
      LOG.debug("first scan:" + stringifyKvs(kvs));
      int numRequests = 0;
      // Second scan to get friends of friends
      Scan scan = new Scan(row, row);
      for (KeyValue kv : kvs) {
        if (Bytes.equals(kv.getQualifier(), REQUESTS)) {
          numRequests = Bytes.toInt(kv.getValue());
          continue;
        }
        byte[] friends = kv.getValue();
        for (byte f : friends) {
          scan.addColumn(FAM, new byte[]{f});
        }
      }
      scanner.doScan(scan, kvs);
      LOG.debug("second scan:" + stringifyKvs(kvs));
      numRequests += 1;
      // Construct mutations and Result
      KeyValue kv = new KeyValue(
          row, FAM, REQUESTS, now, Bytes.toBytes(numRequests));
      mutations.clear();
      mutations.add(kv);
      kvs.add(kv);
      LOG.debug("final result:" + stringifyKvs(kvs) +
                " mutations:" + stringifyKvs(mutations));
      result = new Result(kvs);
      // Inject some meta data to the walEdit
      KeyValue metaKv = new KeyValue(
          getRow(), HLog.METAFAMILY,
          Bytes.toBytes("FriendsOfFriends query"),
          person);
      walEdit.add(metaKv);
    }
    @Override
    public void readFields(DataInput in) throws IOException {
      this.person = Bytes.readByteArray(in);
      this.row = Bytes.readByteArray(in);
      this.result = new Result();
      result.readFields(in);
    }
    @Override
    public void write(DataOutput out) throws IOException {
      Bytes.writeByteArray(out, person);
      Bytes.writeByteArray(out, row);
      if (result == null) {
        new Result().write(out);
      } else {
        result.write(out);
      }
    }
    @Override
    public UUID getClusterId() {
      return HConstants.DEFAULT_CLUSTER_ID;
    }
  }
  static String stringifyKvs(Collection<KeyValue> kvs) {
    StringBuilder out = new StringBuilder();
    out.append("[");
    for (KeyValue kv : kvs) {
      byte[] col = kv.getQualifier();
      byte[] val = kv.getValue();
      if (Bytes.equals(col, REQUESTS)) {
        out.append(Bytes.toStringBinary(col) + ":" +
                   Bytes.toInt(val) + " ");
      } else {
        out.append(Bytes.toStringBinary(col) + ":" +
                   Bytes.toStringBinary(val) + " ");
      }
    }
    out.append("]");
    return out.toString();
  }
  @org.junit.Rule
  public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
    new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
 }
--- a/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRowProcessorEndpoint.java
+++ b/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRowProcessorEndpoint.java
@ -0,0 +1,598 @@
 /**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.hadoop.hbase.coprocessor;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HBaseTestingUtility;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.SmallTests;
 import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.IsolationLevel;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.regionserver.BaseRowProcessor;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.InternalScanner;
 import org.apache.hadoop.hbase.regionserver.wal.HLog;
 import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 import com.sun.org.apache.commons.logging.Log;
 import com.sun.org.apache.commons.logging.LogFactory;
 /**
 * Verifies ProcessRowEndpoint works.
 * The tested RowProcessor performs two scans and a read-modify-write.
 */
@Category(SmallTests.class)
 public class TestRowProcessorEndpoint {
  static final Log LOG = LogFactory.getLog(TestRowProcessorEndpoint.class);
  private static final byte[] TABLE = Bytes.toBytes("testtable");
  private final static byte[] ROW = Bytes.toBytes("testrow");
  private final static byte[] ROW2 = Bytes.toBytes("testrow2");
  private final static byte[] FAM = Bytes.toBytes("friendlist");
  // Column names
  private final static byte[] A = Bytes.toBytes("a");
  private final static byte[] B = Bytes.toBytes("b");
  private final static byte[] C = Bytes.toBytes("c");
  private final static byte[] D = Bytes.toBytes("d");
  private final static byte[] E = Bytes.toBytes("e");
  private final static byte[] F = Bytes.toBytes("f");
  private final static byte[] G = Bytes.toBytes("g");
  private final static byte[] COUNTER = Bytes.toBytes("counter");
  private final static AtomicLong myTimer = new AtomicLong(0);
  private final AtomicInteger failures = new AtomicInteger(0);
  private static HBaseTestingUtility util = new HBaseTestingUtility();
  private static volatile int expectedCounter = 0;
  private static int rowSize, row2Size;
  private volatile static HTable table = null;
  private volatile static boolean swapped = false;
  private volatile CountDownLatch startSignal;
  private volatile CountDownLatch doneSignal;
  @BeforeClass
  public static void setupBeforeClass() throws Exception {
    Configuration conf = util.getConfiguration();
    conf.set(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
        RowProcessorEndpoint.class.getName());
    conf.setInt("hbase.client.retries.number", 1);
    conf.setLong("hbase.hregion.row.processor.timeout", 1000L);
    util.startMiniCluster();
  }
  @AfterClass
  public static void tearDownAfterClass() throws Exception {
    util.shutdownMiniCluster();
  }
  public void prepareTestData() throws Exception {
    try {
      util.getHBaseAdmin().disableTable(TABLE);
      util.getHBaseAdmin().deleteTable(TABLE);
    } catch (Exception e) {
      // ignore table not found
    }
    table = util.createTable(TABLE, FAM);
    {
      Put put = new Put(ROW);
      put.add(FAM, A, Bytes.add(B, C));    // B, C are friends of A
      put.add(FAM, B, Bytes.add(D, E, F)); // D, E, F are friends of B
      put.add(FAM, C, G);                  // G is a friend of C
      table.put(put);
      rowSize = put.size();
    }
    Put put = new Put(ROW2);
    put.add(FAM, D, E);
    put.add(FAM, F, G);
    table.put(put);
    row2Size = put.size();
  }
  @Test
  public void testDoubleScan() throws Throwable {
    prepareTestData();
    RowProcessorProtocol protocol =
        table.coprocessorProxy(RowProcessorProtocol.class, ROW);
    RowProcessorEndpoint.FriendsOfFriendsProcessor processor = 
        new RowProcessorEndpoint.FriendsOfFriendsProcessor(ROW, A);
    Set<String> result = protocol.process(processor);
    Set<String> expected =
      new HashSet<String>(Arrays.asList(new String[]{"d", "e", "f", "g"}));
    Get get = new Get(ROW);
    LOG.debug("row keyvalues:" + stringifyKvs(table.get(get).list()));
    assertEquals(expected, result);
  }
  @Test
  public void testReadModifyWrite() throws Throwable {
    prepareTestData();
    failures.set(0);
    int numThreads = 1000;
    concurrentExec(new IncrementRunner(), numThreads);
    Get get = new Get(ROW);
    LOG.debug("row keyvalues:" + stringifyKvs(table.get(get).list()));
    int finalCounter = incrementCounter(table);
    assertEquals(numThreads + 1, finalCounter);
    assertEquals(0, failures.get());
  }
  class IncrementRunner implements Runnable {
    @Override
    public void run() {
      try {
        incrementCounter(table);
      } catch (Throwable e) {
        e.printStackTrace();
      }
    }
  }
  private int incrementCounter(HTable table) throws Throwable {
    RowProcessorProtocol protocol =
        table.coprocessorProxy(RowProcessorProtocol.class, ROW);
    RowProcessorEndpoint.IncrementCounterProcessor processor = 
        new RowProcessorEndpoint.IncrementCounterProcessor(ROW);
    int counterValue = protocol.process(processor);
    return counterValue;
  }
  private void concurrentExec(
      final Runnable task, final int numThreads) throws Throwable {
    startSignal = new CountDownLatch(numThreads);
    doneSignal = new CountDownLatch(numThreads);
    for (int i = 0; i < numThreads; ++i) {
      new Thread(new Runnable() {
        @Override
        public void run() {
          try {
            startSignal.countDown();
            startSignal.await();
            task.run();
          } catch (Throwable e) {
            failures.incrementAndGet();
            e.printStackTrace();
          }
          doneSignal.countDown();
        }
      }).start();
    }
    doneSignal.await();
  }
  @Test
  public void testMultipleRows() throws Throwable {
    prepareTestData();
    failures.set(0);
    int numThreads = 1000;
    concurrentExec(new SwapRowsRunner(), numThreads);
    LOG.debug("row keyvalues:" +
              stringifyKvs(table.get(new Get(ROW)).list()));
    LOG.debug("row2 keyvalues:" +
              stringifyKvs(table.get(new Get(ROW2)).list()));
    assertEquals(rowSize, table.get(new Get(ROW)).list().size());
    assertEquals(row2Size, table.get(new Get(ROW2)).list().size());
    assertEquals(0, failures.get());
  }
  class SwapRowsRunner implements Runnable {
    @Override
    public void run() {
      try {
        swapRows(table);
      } catch (Throwable e) {
        e.printStackTrace();
      }
    }
  }
  private void swapRows(HTable table) throws Throwable {
    RowProcessorProtocol protocol =
        table.coprocessorProxy(RowProcessorProtocol.class, ROW);
    RowProcessorEndpoint.RowSwapProcessor processor = 
        new RowProcessorEndpoint.RowSwapProcessor(ROW, ROW2);
    protocol.process(processor);
  }
  @Test
  public void testTimeout() throws Throwable {
    prepareTestData();
    RowProcessorProtocol protocol =
        table.coprocessorProxy(RowProcessorProtocol.class, ROW);
    RowProcessorEndpoint.TimeoutProcessor processor = 
        new RowProcessorEndpoint.TimeoutProcessor(ROW);
    boolean exceptionCaught = false;
    try {
      protocol.process(processor);
    } catch (Exception e) {
      exceptionCaught = true;
    }
    assertTrue(exceptionCaught);
  }
  /**
   * This class defines two RowProcessors:
   * IncrementCounterProcessor and FriendsOfFriendsProcessor.
   *
   * We define the RowProcessors as the inner class of the endpoint.
   * So they can be loaded with the endpoint on the coprocessor.
   */
  public static class RowProcessorEndpoint extends BaseRowProcessorEndpoint
      implements RowProcessorProtocol {
    public static class IncrementCounterProcessor extends
        BaseRowProcessor<Integer> implements Writable {
      int counter = 0;
      byte[] row = new byte[0];
      /**
       * Empty constructor for Writable
       */
      IncrementCounterProcessor() {
      }
      IncrementCounterProcessor(byte[] row) {
        this.row = row;
      }
      @Override
      public Collection<byte[]> getRowsToLock() {
        return Collections.singleton(row);
      }
      @Override
      public Integer getResult() {
        return counter;
      }
      @Override
      public boolean readOnly() {
        return false;
      }
      @Override
      public void process(long now, HRegion region,
          List<KeyValue> mutations, WALEdit walEdit) throws IOException {
        // Scan current counter
        List<KeyValue> kvs = new ArrayList<KeyValue>();
        Scan scan = new Scan(row, row);
        scan.addColumn(FAM, COUNTER);
        doScan(region, scan, kvs);
        counter = kvs.size() == 0 ? 0 :
          Bytes.toInt(kvs.iterator().next().getValue());
        // Assert counter value
        assertEquals(expectedCounter, counter);
        // Increment counter and send it to both memstore and wal edit
        counter += 1;
        expectedCounter += 1;
        KeyValue kv =
            new KeyValue(row, FAM, COUNTER, now, Bytes.toBytes(counter));
        mutations.add(kv);
        walEdit.add(kv);
        // We can also inject some meta data to the walEdit
        KeyValue metaKv = new KeyValue(
            row, HLog.METAFAMILY,
            Bytes.toBytes("I just increment counter"),
            Bytes.toBytes(counter));
        walEdit.add(metaKv);
      }
      @Override
      public void readFields(DataInput in) throws IOException {
        this.row = Bytes.readByteArray(in);
        this.counter = in.readInt();
      }
      @Override
      public void write(DataOutput out) throws IOException {
        Bytes.writeByteArray(out, row);
        out.writeInt(counter);
      }
    }
    public static class FriendsOfFriendsProcessor extends
        BaseRowProcessor<Set<String>> implements Writable {
      byte[] row = null;
      byte[] person = null;
      final Set<String> result = new HashSet<String>();
      /**
       * Empty constructor for Writable
       */
      FriendsOfFriendsProcessor() {
      }
      FriendsOfFriendsProcessor(byte[] row, byte[] person) {
        this.row = row;
        this.person = person;
      }
      @Override
      public Collection<byte[]> getRowsToLock() {
        return Collections.singleton(row);
      }
      @Override
      public Set<String> getResult() {
        return result;
      }
      @Override
      public boolean readOnly() {
        return true;
      }
      @Override
      public void process(long now, HRegion region,
          List<KeyValue> mutations, WALEdit walEdit) throws IOException {
        List<KeyValue> kvs = new ArrayList<KeyValue>();
        { // First scan to get friends of the person
          Scan scan = new Scan(row, row);
          scan.addColumn(FAM, person);
          doScan(region, scan, kvs);
        }
        // Second scan to get friends of friends
        Scan scan = new Scan(row, row);
        for (KeyValue kv : kvs) {
          byte[] friends = kv.getValue();
          for (byte f : friends) {
            scan.addColumn(FAM, new byte[]{f});
          }
        }
        doScan(region, scan, kvs);
        // Collect result
        result.clear();
        for (KeyValue kv : kvs) {
          for (byte b : kv.getValue()) {
            result.add((char)b + "");
          }
        }
      }
      @Override
      public void readFields(DataInput in) throws IOException {
        this.person = Bytes.readByteArray(in);
        this.row = Bytes.readByteArray(in);
        int size = in.readInt();
        result.clear();
        for (int i = 0; i < size; ++i) {
          result.add(Text.readString(in));
        }
      }
      @Override
      public void write(DataOutput out) throws IOException {
        Bytes.writeByteArray(out, person);
        Bytes.writeByteArray(out, row);
        out.writeInt(result.size());
        for (String s : result) {
          Text.writeString(out, s);
        }
      }
    }
    public static class RowSwapProcessor extends
        BaseRowProcessor<Set<String>> implements Writable {
      byte[] row1 = new byte[0];
      byte[] row2 = new byte[0];
      /**
       * Empty constructor for Writable
       */
      RowSwapProcessor() {
      }
      RowSwapProcessor(byte[] row1, byte[] row2) {
        this.row1 = row1;
        this.row2 = row2;
      }
      @Override
      public Collection<byte[]> getRowsToLock() {
        List<byte[]> rows = new ArrayList<byte[]>();
        rows.add(row1);
        rows.add(row2);
        return rows;
      }
      @Override
      public boolean readOnly() {
        return false;
      }
      @Override
      public void process(long now, HRegion region,
          List<KeyValue> mutations, WALEdit walEdit) throws IOException {
        // Override the time to avoid race-condition in the unit test caused by
        // inacurate timer on some machines
        now = myTimer.getAndIncrement();
        // Scan both rows
        List<KeyValue> kvs1 = new ArrayList<KeyValue>();
        List<KeyValue> kvs2 = new ArrayList<KeyValue>();
        doScan(region, new Scan(row1, row1), kvs1);
        doScan(region, new Scan(row2, row2), kvs2);
        // Assert swapped
        if (swapped) {
          assertEquals(rowSize, kvs2.size());
          assertEquals(row2Size, kvs1.size());
        } else {
          assertEquals(rowSize, kvs1.size());
          assertEquals(row2Size, kvs2.size());
        }
        swapped = !swapped;
        // Add and delete keyvalues
        List<List<KeyValue>> kvs = new ArrayList<List<KeyValue>>();
        kvs.add(kvs1);
        kvs.add(kvs2);
        byte[][] rows = new byte[][]{row1, row2};
        for (int i = 0; i < kvs.size(); ++i) {
          for (KeyValue kv : kvs.get(i)) {
            // Delete from the current row and add to the other row
            KeyValue kvDelete =
                new KeyValue(rows[i], kv.getFamily(), kv.getQualifier(),
                    kv.getTimestamp(), KeyValue.Type.Delete);
            KeyValue kvAdd =
                new KeyValue(rows[1 - i], kv.getFamily(), kv.getQualifier(),
                    now, kv.getValue());
            mutations.add(kvDelete);
            walEdit.add(kvDelete);
            mutations.add(kvAdd);
            walEdit.add(kvAdd);
          }
        }
      }
      @Override
      public void readFields(DataInput in) throws IOException {
        this.row1 = Bytes.readByteArray(in);
        this.row2 = Bytes.readByteArray(in);
      }
      @Override
      public void write(DataOutput out) throws IOException {
        Bytes.writeByteArray(out, row1);
        Bytes.writeByteArray(out, row2);
      }
    }
    public static class TimeoutProcessor extends
        BaseRowProcessor<Void> implements Writable {
      byte[] row = new byte[0];
      /**
       * Empty constructor for Writable
       */
      public TimeoutProcessor() {
      }
      public TimeoutProcessor(byte[] row) {
        this.row = row;
      }
      public Collection<byte[]> getRowsToLock() {
        return Collections.singleton(row);
      }
      @Override
      public void process(long now, HRegion region,
          List<KeyValue> mutations, WALEdit walEdit) throws IOException {
        try {
          // Sleep for a long time so it timeout
          Thread.sleep(100 * 1000L);
        } catch (Exception e) {
          throw new IOException(e);
        }
      }
      @Override
      public boolean readOnly() {
        return true;
      }
      @Override
      public void readFields(DataInput in) throws IOException {
        this.row = Bytes.readByteArray(in);
      }
      @Override
      public void write(DataOutput out) throws IOException {
        Bytes.writeByteArray(out, row);
      }
    }
    public static void doScan(
        HRegion region, Scan scan, List<KeyValue> result) throws IOException {
      InternalScanner scanner = null;
      try {
        scan.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED);
        scanner = region.getScanner(scan);
        result.clear();
        scanner.next(result);
      } finally {
        if (scanner != null) scanner.close();
      }
    }
  }
  static String stringifyKvs(Collection<KeyValue> kvs) {
    StringBuilder out = new StringBuilder();
    out.append("[");
    if (kvs != null) {
      for (KeyValue kv : kvs) {
        byte[] col = kv.getQualifier();
        byte[] val = kv.getValue();
        if (Bytes.equals(col, COUNTER)) {
          out.append(Bytes.toStringBinary(col) + ":" +
                     Bytes.toInt(val) + " ");
        } else {
          out.append(Bytes.toStringBinary(col) + ":" +
                     Bytes.toStringBinary(val) + " ");
        }
      }
    }
    out.append("]");
    return out.toString();
  }
  @org.junit.Rule
  public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu =
    new org.apache.hadoop.hbase.ResourceCheckerJUnitRule();
 }