HBASE-5542 Addendum - accidentally checked in .orig file
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1307562 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
06e70aa877
commit
f1a9b5bed1
|
@ -1,564 +0,0 @@
|
||||||
/**
|
|
||||||
* Copyright 2011 The Apache Software Foundation
|
|
||||||
*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one
|
|
||||||
* or more contributor license agreements. See the NOTICE file
|
|
||||||
* distributed with this work for additional information
|
|
||||||
* regarding copyright ownership. The ASF licenses this file
|
|
||||||
* to you under the Apache License, Version 2.0 (the
|
|
||||||
* "License"); you may not use this file except in compliance
|
|
||||||
* with the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.hadoop.hbase.regionserver;
|
|
||||||
|
|
||||||
import static org.apache.hadoop.hbase.zookeeper.ZKSplitLog.Counters.*;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InterruptedIOException;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
|
||||||
import org.apache.hadoop.hbase.master.SplitLogManager;
|
|
||||||
import org.apache.hadoop.hbase.regionserver.wal.HLogSplitter;
|
|
||||||
import org.apache.hadoop.hbase.util.CancelableProgressable;
|
|
||||||
import org.apache.hadoop.hbase.util.FSUtils;
|
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZKSplitLog;
|
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZKSplitLog.TaskState;
|
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
|
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
|
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
|
|
||||||
import org.apache.hadoop.util.StringUtils;
|
|
||||||
import org.apache.zookeeper.AsyncCallback;
|
|
||||||
import org.apache.zookeeper.KeeperException;
|
|
||||||
import org.apache.zookeeper.data.Stat;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This worker is spawned in every regionserver (should we also spawn one in
|
|
||||||
* the master?). The Worker waits for log splitting tasks to be put up by the
|
|
||||||
* {@link SplitLogManager} running in the master and races with other workers
|
|
||||||
* in other serves to acquire those tasks. The coordination is done via
|
|
||||||
* zookeeper. All the action takes place at /hbase/splitlog znode.
|
|
||||||
* <p>
|
|
||||||
* If a worker has successfully moved the task from state UNASSIGNED to
|
|
||||||
* OWNED then it owns the task. It keeps heart beating the manager by
|
|
||||||
* periodically moving the task from UNASSIGNED to OWNED state. On success it
|
|
||||||
* moves the task to TASK_DONE. On unrecoverable error it moves task state to
|
|
||||||
* ERR. If it cannot continue but wants the master to retry the task then it
|
|
||||||
* moves the task state to RESIGNED.
|
|
||||||
* <p>
|
|
||||||
* The manager can take a task away from a worker by moving the task from
|
|
||||||
* OWNED to UNASSIGNED. In the absence of a global lock there is a
|
|
||||||
* unavoidable race here - a worker might have just finished its task when it
|
|
||||||
* is stripped of its ownership. Here we rely on the idempotency of the log
|
|
||||||
* splitting task for correctness
|
|
||||||
*/
|
|
||||||
@InterfaceAudience.Private
|
|
||||||
public class SplitLogWorker extends ZooKeeperListener implements Runnable {
|
|
||||||
private static final Log LOG = LogFactory.getLog(SplitLogWorker.class);
|
|
||||||
|
|
||||||
Thread worker;
|
|
||||||
private final String serverName;
|
|
||||||
private final TaskExecutor splitTaskExecutor;
|
|
||||||
private long zkretries;
|
|
||||||
|
|
||||||
private Object taskReadyLock = new Object();
|
|
||||||
volatile int taskReadySeq = 0;
|
|
||||||
private volatile String currentTask = null;
|
|
||||||
private int currentVersion;
|
|
||||||
private volatile boolean exitWorker;
|
|
||||||
private Object grabTaskLock = new Object();
|
|
||||||
private boolean workerInGrabTask = false;
|
|
||||||
|
|
||||||
|
|
||||||
public SplitLogWorker(ZooKeeperWatcher watcher, Configuration conf,
|
|
||||||
String serverName, TaskExecutor splitTaskExecutor) {
|
|
||||||
super(watcher);
|
|
||||||
this.serverName = serverName;
|
|
||||||
this.splitTaskExecutor = splitTaskExecutor;
|
|
||||||
this.zkretries = conf.getLong("hbase.splitlog.zk.retries", 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
public SplitLogWorker(ZooKeeperWatcher watcher, final Configuration conf,
|
|
||||||
final String serverName) {
|
|
||||||
this(watcher, conf, serverName, new TaskExecutor () {
|
|
||||||
@Override
|
|
||||||
public Status exec(String filename, CancelableProgressable p) {
|
|
||||||
Path rootdir;
|
|
||||||
FileSystem fs;
|
|
||||||
try {
|
|
||||||
rootdir = FSUtils.getRootDir(conf);
|
|
||||||
fs = rootdir.getFileSystem(conf);
|
|
||||||
} catch (IOException e) {
|
|
||||||
LOG.warn("could not find root dir or fs", e);
|
|
||||||
return Status.RESIGNED;
|
|
||||||
}
|
|
||||||
// TODO have to correctly figure out when log splitting has been
|
|
||||||
// interrupted or has encountered a transient error and when it has
|
|
||||||
// encountered a bad non-retry-able persistent error.
|
|
||||||
try {
|
|
||||||
String tmpname =
|
|
||||||
ZKSplitLog.getSplitLogDirTmpComponent(serverName, filename);
|
|
||||||
if (HLogSplitter.splitLogFileToTemp(rootdir, tmpname,
|
|
||||||
fs.getFileStatus(new Path(filename)), fs, conf, p) == false) {
|
|
||||||
return Status.PREEMPTED;
|
|
||||||
}
|
|
||||||
} catch (InterruptedIOException iioe) {
|
|
||||||
LOG.warn("log splitting of " + filename + " interrupted, resigning",
|
|
||||||
iioe);
|
|
||||||
return Status.RESIGNED;
|
|
||||||
} catch (IOException e) {
|
|
||||||
Throwable cause = e.getCause();
|
|
||||||
if (cause instanceof InterruptedException) {
|
|
||||||
LOG.warn("log splitting of " + filename + " interrupted, resigning",
|
|
||||||
e);
|
|
||||||
return Status.RESIGNED;
|
|
||||||
}
|
|
||||||
LOG.warn("log splitting of " + filename + " failed, returning error",
|
|
||||||
e);
|
|
||||||
return Status.ERR;
|
|
||||||
}
|
|
||||||
return Status.DONE;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void run() {
|
|
||||||
try {
|
|
||||||
LOG.info("SplitLogWorker " + this.serverName + " starting");
|
|
||||||
this.watcher.registerListener(this);
|
|
||||||
int res;
|
|
||||||
// wait for master to create the splitLogZnode
|
|
||||||
res = -1;
|
|
||||||
while (res == -1) {
|
|
||||||
try {
|
|
||||||
res = ZKUtil.checkExists(watcher, watcher.splitLogZNode);
|
|
||||||
} catch (KeeperException e) {
|
|
||||||
// ignore
|
|
||||||
LOG.warn("Exception when checking for " + watcher.splitLogZNode +
|
|
||||||
" ... retrying", e);
|
|
||||||
}
|
|
||||||
if (res == -1) {
|
|
||||||
try {
|
|
||||||
LOG.info(watcher.splitLogZNode + " znode does not exist," +
|
|
||||||
" waiting for master to create one");
|
|
||||||
Thread.sleep(1000);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
LOG.debug("Interrupted while waiting for " + watcher.splitLogZNode);
|
|
||||||
assert exitWorker == true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
taskLoop();
|
|
||||||
} catch (Throwable t) {
|
|
||||||
// only a logical error can cause here. Printing it out
|
|
||||||
// to make debugging easier
|
|
||||||
LOG.error("unexpected error ", t);
|
|
||||||
} finally {
|
|
||||||
LOG.info("SplitLogWorker " + this.serverName + " exiting");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Wait for tasks to become available at /hbase/splitlog zknode. Grab a task
|
|
||||||
* one at a time. This policy puts an upper-limit on the number of
|
|
||||||
* simultaneous log splitting that could be happening in a cluster.
|
|
||||||
* <p>
|
|
||||||
* Synchronization using {@link #task_ready_signal_seq} ensures that it will
|
|
||||||
* try to grab every task that has been put up
|
|
||||||
*/
|
|
||||||
private void taskLoop() {
|
|
||||||
while (true) {
|
|
||||||
int seq_start = taskReadySeq;
|
|
||||||
List<String> paths = getTaskList();
|
|
||||||
if (paths == null) {
|
|
||||||
LOG.warn("Could not get tasks, did someone remove " +
|
|
||||||
this.watcher.splitLogZNode + " ... worker thread exiting.");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
int offset = (int)(Math.random() * paths.size());
|
|
||||||
for (int i = 0; i < paths.size(); i ++) {
|
|
||||||
int idx = (i + offset) % paths.size();
|
|
||||||
// don't call ZKSplitLog.getNodeName() because that will lead to
|
|
||||||
// double encoding of the path name
|
|
||||||
grabTask(ZKUtil.joinZNode(watcher.splitLogZNode, paths.get(idx)));
|
|
||||||
if (exitWorker == true) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
synchronized (taskReadyLock) {
|
|
||||||
while (seq_start == taskReadySeq) {
|
|
||||||
try {
|
|
||||||
taskReadyLock.wait();
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
LOG.info("SplitLogWorker interrupted while waiting for task," +
|
|
||||||
" exiting: " + e.toString());
|
|
||||||
assert exitWorker == true;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* try to grab a 'lock' on the task zk node to own and execute the task.
|
|
||||||
* <p>
|
|
||||||
* @param path zk node for the task
|
|
||||||
*/
|
|
||||||
private void grabTask(String path) {
|
|
||||||
Stat stat = new Stat();
|
|
||||||
long t = -1;
|
|
||||||
byte[] data;
|
|
||||||
synchronized (grabTaskLock) {
|
|
||||||
currentTask = path;
|
|
||||||
workerInGrabTask = true;
|
|
||||||
if (Thread.interrupted()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
try {
|
|
||||||
if ((data = ZKUtil.getDataNoWatch(this.watcher, path, stat)) == null) {
|
|
||||||
tot_wkr_failed_to_grab_task_no_data.incrementAndGet();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
} catch (KeeperException e) {
|
|
||||||
LOG.warn("Failed to get data for znode " + path, e);
|
|
||||||
tot_wkr_failed_to_grab_task_exception.incrementAndGet();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (TaskState.TASK_UNASSIGNED.equals(data) == false) {
|
|
||||||
tot_wkr_failed_to_grab_task_owned.incrementAndGet();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
currentVersion = stat.getVersion();
|
|
||||||
if (attemptToOwnTask(true) == false) {
|
|
||||||
tot_wkr_failed_to_grab_task_lost_race.incrementAndGet();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ZKSplitLog.isRescanNode(watcher, currentTask)) {
|
|
||||||
endTask(TaskState.TASK_DONE, tot_wkr_task_acquired_rescan);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
LOG.info("worker " + serverName + " acquired task " + path);
|
|
||||||
tot_wkr_task_acquired.incrementAndGet();
|
|
||||||
getDataSetWatchAsync();
|
|
||||||
|
|
||||||
t = System.currentTimeMillis();
|
|
||||||
TaskExecutor.Status status;
|
|
||||||
|
|
||||||
status = splitTaskExecutor.exec(ZKSplitLog.getFileName(currentTask),
|
|
||||||
new CancelableProgressable() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean progress() {
|
|
||||||
if (attemptToOwnTask(false) == false) {
|
|
||||||
LOG.warn("Failed to heartbeat the task" + currentTask);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
switch (status) {
|
|
||||||
case DONE:
|
|
||||||
endTask(TaskState.TASK_DONE, tot_wkr_task_done);
|
|
||||||
break;
|
|
||||||
case PREEMPTED:
|
|
||||||
tot_wkr_preempt_task.incrementAndGet();
|
|
||||||
LOG.warn("task execution prempted " + path);
|
|
||||||
break;
|
|
||||||
case ERR:
|
|
||||||
if (!exitWorker) {
|
|
||||||
endTask(TaskState.TASK_ERR, tot_wkr_task_err);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// if the RS is exiting then there is probably a tons of stuff
|
|
||||||
// that can go wrong. Resign instead of signaling error.
|
|
||||||
//$FALL-THROUGH$
|
|
||||||
case RESIGNED:
|
|
||||||
if (exitWorker) {
|
|
||||||
LOG.info("task execution interrupted because worker is exiting " +
|
|
||||||
path);
|
|
||||||
endTask(TaskState.TASK_RESIGNED, tot_wkr_task_resigned);
|
|
||||||
} else {
|
|
||||||
tot_wkr_preempt_task.incrementAndGet();
|
|
||||||
LOG.info("task execution interrupted via zk by manager " +
|
|
||||||
path);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
if (t > 0) {
|
|
||||||
LOG.info("worker " + serverName + " done with task " + path +
|
|
||||||
" in " + (System.currentTimeMillis() - t) + "ms");
|
|
||||||
}
|
|
||||||
synchronized (grabTaskLock) {
|
|
||||||
workerInGrabTask = false;
|
|
||||||
// clear the interrupt from stopTask() otherwise the next task will
|
|
||||||
// suffer
|
|
||||||
Thread.interrupted();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Try to own the task by transitioning the zk node data from UNASSIGNED to
|
|
||||||
* OWNED.
|
|
||||||
* <p>
|
|
||||||
* This method is also used to periodically heartbeat the task progress by
|
|
||||||
* transitioning the node from OWNED to OWNED.
|
|
||||||
* <p>
|
|
||||||
* @return true if task path is successfully locked
|
|
||||||
*/
|
|
||||||
private boolean attemptToOwnTask(boolean isFirstTime) {
|
|
||||||
try {
|
|
||||||
Stat stat = this.watcher.getRecoverableZooKeeper().setData(currentTask,
|
|
||||||
TaskState.TASK_OWNED.get(serverName), currentVersion);
|
|
||||||
if (stat == null) {
|
|
||||||
LOG.warn("zk.setData() returned null for path " + currentTask);
|
|
||||||
tot_wkr_task_heartbeat_failed.incrementAndGet();
|
|
||||||
return (false);
|
|
||||||
}
|
|
||||||
currentVersion = stat.getVersion();
|
|
||||||
tot_wkr_task_heartbeat.incrementAndGet();
|
|
||||||
return (true);
|
|
||||||
} catch (KeeperException e) {
|
|
||||||
if (!isFirstTime) {
|
|
||||||
if (e.code().equals(KeeperException.Code.NONODE)) {
|
|
||||||
LOG.warn("NONODE failed to assert ownership for " + currentTask, e);
|
|
||||||
} else if (e.code().equals(KeeperException.Code.BADVERSION)) {
|
|
||||||
LOG.warn("BADVERSION failed to assert ownership for " +
|
|
||||||
currentTask, e);
|
|
||||||
} else {
|
|
||||||
LOG.warn("failed to assert ownership for " + currentTask, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (InterruptedException e1) {
|
|
||||||
LOG.warn("Interrupted while trying to assert ownership of " +
|
|
||||||
currentTask + " " + StringUtils.stringifyException(e1));
|
|
||||||
Thread.currentThread().interrupt();
|
|
||||||
}
|
|
||||||
tot_wkr_task_heartbeat_failed.incrementAndGet();
|
|
||||||
return (false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* endTask() can fail and the only way to recover out of it is for the
|
|
||||||
* {@link SplitLogManager} to timeout the task node.
|
|
||||||
* @param ts
|
|
||||||
* @param ctr
|
|
||||||
*/
|
|
||||||
private void endTask(ZKSplitLog.TaskState ts, AtomicLong ctr) {
|
|
||||||
String path = currentTask;
|
|
||||||
currentTask = null;
|
|
||||||
try {
|
|
||||||
if (ZKUtil.setData(this.watcher, path, ts.get(serverName),
|
|
||||||
currentVersion)) {
|
|
||||||
LOG.info("successfully transitioned task " + path +
|
|
||||||
" to final state " + ts);
|
|
||||||
ctr.incrementAndGet();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
LOG.warn("failed to transistion task " + path + " to end state " + ts +
|
|
||||||
" because of version mismatch ");
|
|
||||||
} catch (KeeperException.BadVersionException bve) {
|
|
||||||
LOG.warn("transisition task " + path + " to " + ts +
|
|
||||||
" failed because of version mismatch", bve);
|
|
||||||
} catch (KeeperException.NoNodeException e) {
|
|
||||||
LOG.fatal("logic error - end task " + path + " " + ts +
|
|
||||||
" failed because task doesn't exist", e);
|
|
||||||
} catch (KeeperException e) {
|
|
||||||
LOG.warn("failed to end task, " + path + " " + ts, e);
|
|
||||||
}
|
|
||||||
tot_wkr_final_transistion_failed.incrementAndGet();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
void getDataSetWatchAsync() {
|
|
||||||
this.watcher.getRecoverableZooKeeper().getZooKeeper().
|
|
||||||
getData(currentTask, this.watcher,
|
|
||||||
new GetDataAsyncCallback(), null);
|
|
||||||
tot_wkr_get_data_queued.incrementAndGet();
|
|
||||||
}
|
|
||||||
|
|
||||||
void getDataSetWatchSuccess(String path, byte[] data) {
|
|
||||||
synchronized (grabTaskLock) {
|
|
||||||
if (workerInGrabTask) {
|
|
||||||
// currentTask can change but that's ok
|
|
||||||
String taskpath = currentTask;
|
|
||||||
if (taskpath != null && taskpath.equals(path)) {
|
|
||||||
// have to compare data. cannot compare version because then there
|
|
||||||
// will be race with attemptToOwnTask()
|
|
||||||
// cannot just check whether the node has been transitioned to
|
|
||||||
// UNASSIGNED because by the time this worker sets the data watch
|
|
||||||
// the node might have made two transitions - from owned by this
|
|
||||||
// worker to unassigned to owned by another worker
|
|
||||||
if (! TaskState.TASK_OWNED.equals(data, serverName) &&
|
|
||||||
! TaskState.TASK_DONE.equals(data, serverName) &&
|
|
||||||
! TaskState.TASK_ERR.equals(data, serverName) &&
|
|
||||||
! TaskState.TASK_RESIGNED.equals(data, serverName)) {
|
|
||||||
LOG.info("task " + taskpath + " preempted from " +
|
|
||||||
serverName + ", current task state and owner=" +
|
|
||||||
new String(data));
|
|
||||||
stopTask();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void getDataSetWatchFailure(String path) {
|
|
||||||
synchronized (grabTaskLock) {
|
|
||||||
if (workerInGrabTask) {
|
|
||||||
// currentTask can change but that's ok
|
|
||||||
String taskpath = currentTask;
|
|
||||||
if (taskpath != null && taskpath.equals(path)) {
|
|
||||||
LOG.info("retrying data watch on " + path);
|
|
||||||
tot_wkr_get_data_retry.incrementAndGet();
|
|
||||||
getDataSetWatchAsync();
|
|
||||||
} else {
|
|
||||||
// no point setting a watch on the task which this worker is not
|
|
||||||
// working upon anymore
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void nodeDataChanged(String path) {
|
|
||||||
// there will be a self generated dataChanged event every time attemptToOwnTask()
|
|
||||||
// heartbeats the task znode by upping its version
|
|
||||||
synchronized (grabTaskLock) {
|
|
||||||
if (workerInGrabTask) {
|
|
||||||
// currentTask can change
|
|
||||||
String taskpath = currentTask;
|
|
||||||
if (taskpath!= null && taskpath.equals(path)) {
|
|
||||||
getDataSetWatchAsync();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private List<String> getTaskList() {
|
|
||||||
for (int i = 0; i < zkretries; i++) {
|
|
||||||
try {
|
|
||||||
return (ZKUtil.listChildrenAndWatchForNewChildren(this.watcher,
|
|
||||||
this.watcher.splitLogZNode));
|
|
||||||
} catch (KeeperException e) {
|
|
||||||
LOG.warn("Could not get children of znode " +
|
|
||||||
this.watcher.splitLogZNode, e);
|
|
||||||
try {
|
|
||||||
Thread.sleep(1000);
|
|
||||||
} catch (InterruptedException e1) {
|
|
||||||
LOG.warn("Interrupted while trying to get task list ...", e1);
|
|
||||||
Thread.currentThread().interrupt();
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
LOG.warn("Tried " + zkretries + " times, still couldn't fetch " +
|
|
||||||
"children of " + watcher.splitLogZNode + " giving up");
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void nodeChildrenChanged(String path) {
|
|
||||||
if(path.equals(watcher.splitLogZNode)) {
|
|
||||||
LOG.debug("tasks arrived or departed");
|
|
||||||
synchronized (taskReadyLock) {
|
|
||||||
taskReadySeq++;
|
|
||||||
taskReadyLock.notify();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* If the worker is doing a task i.e. splitting a log file then stop the task.
|
|
||||||
* It doesn't exit the worker thread.
|
|
||||||
*/
|
|
||||||
void stopTask() {
|
|
||||||
LOG.info("Sending interrupt to stop the worker thread");
|
|
||||||
worker.interrupt(); // TODO interrupt often gets swallowed, do what else?
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* start the SplitLogWorker thread
|
|
||||||
*/
|
|
||||||
public void start() {
|
|
||||||
worker = new Thread(null, this, "SplitLogWorker-" + serverName);
|
|
||||||
exitWorker = false;
|
|
||||||
worker.start();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* stop the SplitLogWorker thread
|
|
||||||
*/
|
|
||||||
public void stop() {
|
|
||||||
exitWorker = true;
|
|
||||||
stopTask();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Asynchronous handler for zk get-data-set-watch on node results.
|
|
||||||
*/
|
|
||||||
class GetDataAsyncCallback implements AsyncCallback.DataCallback {
|
|
||||||
private final Log LOG = LogFactory.getLog(GetDataAsyncCallback.class);
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void processResult(int rc, String path, Object ctx, byte[] data,
|
|
||||||
Stat stat) {
|
|
||||||
tot_wkr_get_data_result.incrementAndGet();
|
|
||||||
if (rc != 0) {
|
|
||||||
LOG.warn("getdata rc = " + KeeperException.Code.get(rc) + " " + path);
|
|
||||||
getDataSetWatchFailure(path);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
data = watcher.getRecoverableZooKeeper().removeMetaData(data);
|
|
||||||
getDataSetWatchSuccess(path, data);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Objects implementing this interface actually do the task that has been
|
|
||||||
* acquired by a {@link SplitLogWorker}. Since there isn't a water-tight
|
|
||||||
* guarantee that two workers will not be executing the same task therefore it
|
|
||||||
* is better to have workers prepare the task and then have the
|
|
||||||
* {@link SplitLogManager} commit the work in SplitLogManager.TaskFinisher
|
|
||||||
*/
|
|
||||||
static public interface TaskExecutor {
|
|
||||||
static public enum Status {
|
|
||||||
DONE(),
|
|
||||||
ERR(),
|
|
||||||
RESIGNED(),
|
|
||||||
PREEMPTED();
|
|
||||||
}
|
|
||||||
public Status exec(String name, CancelableProgressable p);
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue