fix bug with worker disappearance

This commit is contained in:
Fangjin Yang 2013-03-01 19:51:33 -08:00
parent 7f6747194d
commit bb83ddb564
1 changed files with 10 additions and 6 deletions

View File

@ -478,7 +478,7 @@ public class RemoteTaskRunner implements TaskRunner
log.info("Task %s just disappeared!", taskId); log.info("Task %s just disappeared!", taskId);
retryTask(runningTasks.get(taskId), worker.getHost()); retryTask(runningTasks.get(taskId), worker.getHost());
} else { } else {
log.info("Lost a task I didn't know about: %s", taskId); log.info("A task disappeared I didn't know about: %s", taskId);
} }
} }
} }
@ -513,19 +513,23 @@ public class RemoteTaskRunner implements TaskRunner
ZkWorker zkWorker = zkWorkers.get(worker.getHost()); ZkWorker zkWorker = zkWorkers.get(worker.getHost());
if (zkWorker != null) { if (zkWorker != null) {
try { try {
Set<String> tasksPending = Sets.newHashSet( Set<String> tasksToRetry = Sets.newHashSet(
cf.getChildren() cf.getChildren()
.forPath(JOINER.join(config.getTaskPath(), worker.getHost())) .forPath(JOINER.join(config.getTaskPath(), worker.getHost()))
); );
log.info("%s had %d tasks pending", worker.getHost(), tasksPending.size()); tasksToRetry.addAll(
cf.getChildren()
.forPath(JOINER.join(config.getStatusPath(), worker.getHost()))
);
log.info("%s has %d tasks to retry", worker.getHost(), tasksToRetry.size());
for (String taskId : tasksPending) { for (String taskId : tasksToRetry) {
TaskRunnerWorkItem taskRunnerWorkItem = pendingTasks.get(taskId); TaskRunnerWorkItem taskRunnerWorkItem = runningTasks.get(taskId);
if (taskRunnerWorkItem != null) { if (taskRunnerWorkItem != null) {
cf.delete().guaranteed().forPath(JOINER.join(config.getTaskPath(), worker.getHost(), taskId)); cf.delete().guaranteed().forPath(JOINER.join(config.getTaskPath(), worker.getHost(), taskId));
retryTask(taskRunnerWorkItem, worker.getHost()); retryTask(taskRunnerWorkItem, worker.getHost());
} else { } else {
log.warn("RemoteTaskRunner has no knowledge of pending task %s", taskId); log.warn("RemoteTaskRunner has no knowledge of task %s", taskId);
} }
} }