mirror of https://github.com/apache/druid.git
Fix NPE in RemoteTaskRunner event handler causes JVM shutdown (#9610)
* Fix NPE in RemoteTaskRunner event handler causes JVM shutdown * address comments * fix compile * fix checkstyle * fix lgtm * fix merge * fix test * fix tests * change scope * address comments * address comments
This commit is contained in:
parent
6e50d29b4e
commit
b95a1b9878
|
@ -131,6 +131,18 @@
|
||||||
<artifactId>JUnitParams</artifactId>
|
<artifactId>JUnitParams</artifactId>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.codehaus.jackson</groupId>
|
||||||
|
<artifactId>jackson-core-asl</artifactId>
|
||||||
|
<version>${codehaus.jackson.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.codehaus.jackson</groupId>
|
||||||
|
<artifactId>jackson-mapper-asl</artifactId>
|
||||||
|
<version>${codehaus.jackson.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
|
|
|
@ -45,6 +45,7 @@ import com.google.common.util.concurrent.SettableFuture;
|
||||||
import org.apache.commons.lang.mutable.MutableInt;
|
import org.apache.commons.lang.mutable.MutableInt;
|
||||||
import org.apache.curator.framework.CuratorFramework;
|
import org.apache.curator.framework.CuratorFramework;
|
||||||
import org.apache.curator.framework.recipes.cache.PathChildrenCache;
|
import org.apache.curator.framework.recipes.cache.PathChildrenCache;
|
||||||
|
import org.apache.curator.framework.recipes.cache.PathChildrenCacheListener;
|
||||||
import org.apache.curator.utils.ZKPaths;
|
import org.apache.curator.utils.ZKPaths;
|
||||||
import org.apache.druid.concurrent.LifecycleLock;
|
import org.apache.druid.concurrent.LifecycleLock;
|
||||||
import org.apache.druid.curator.CuratorUtils;
|
import org.apache.druid.curator.CuratorUtils;
|
||||||
|
@ -969,8 +970,19 @@ public class RemoteTaskRunner implements WorkerTaskRunner, TaskLogStreamer
|
||||||
);
|
);
|
||||||
|
|
||||||
// Add status listener to the watcher for status changes
|
// Add status listener to the watcher for status changes
|
||||||
zkWorker.addListener(
|
zkWorker.addListener(getStatusListener(worker, zkWorker, retVal));
|
||||||
(client, event) -> {
|
zkWorker.start();
|
||||||
|
return retVal;
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
PathChildrenCacheListener getStatusListener(final Worker worker, final ZkWorker zkWorker, final SettableFuture<ZkWorker> retVal)
|
||||||
|
{
|
||||||
|
return (client, event) -> {
|
||||||
final String taskId;
|
final String taskId;
|
||||||
final RemoteTaskRunnerWorkItem taskRunnerWorkItem;
|
final RemoteTaskRunnerWorkItem taskRunnerWorkItem;
|
||||||
synchronized (statusLock) {
|
synchronized (statusLock) {
|
||||||
|
@ -978,6 +990,14 @@ public class RemoteTaskRunner implements WorkerTaskRunner, TaskLogStreamer
|
||||||
switch (event.getType()) {
|
switch (event.getType()) {
|
||||||
case CHILD_ADDED:
|
case CHILD_ADDED:
|
||||||
case CHILD_UPDATED:
|
case CHILD_UPDATED:
|
||||||
|
if (event.getData() == null) {
|
||||||
|
log.error("Unexpected null for event.getData() in handle new worker status for [%s]", event.getType().toString());
|
||||||
|
log.makeAlert("Unexpected null for event.getData() in handle new worker status")
|
||||||
|
.addData("worker", zkWorker.getWorker().getHost())
|
||||||
|
.addData("eventType", event.getType().toString())
|
||||||
|
.emit();
|
||||||
|
return;
|
||||||
|
}
|
||||||
taskId = ZKPaths.getNodeFromPath(event.getData().getPath());
|
taskId = ZKPaths.getNodeFromPath(event.getData().getPath());
|
||||||
final TaskAnnouncement announcement = jsonMapper.readValue(
|
final TaskAnnouncement announcement = jsonMapper.readValue(
|
||||||
event.getData().getData(), TaskAnnouncement.class
|
event.getData().getData(), TaskAnnouncement.class
|
||||||
|
@ -1032,6 +1052,14 @@ public class RemoteTaskRunner implements WorkerTaskRunner, TaskLogStreamer
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case CHILD_REMOVED:
|
case CHILD_REMOVED:
|
||||||
|
if (event.getData() == null) {
|
||||||
|
log.error("Unexpected null for event.getData() in handle new worker status for [%s]", event.getType().toString());
|
||||||
|
log.makeAlert("Unexpected null for event.getData() in handle new worker status")
|
||||||
|
.addData("worker", zkWorker.getWorker().getHost())
|
||||||
|
.addData("eventType", event.getType().toString())
|
||||||
|
.emit();
|
||||||
|
return;
|
||||||
|
}
|
||||||
taskId = ZKPaths.getNodeFromPath(event.getData().getPath());
|
taskId = ZKPaths.getNodeFromPath(event.getData().getPath());
|
||||||
taskRunnerWorkItem = runningTasks.remove(taskId);
|
taskRunnerWorkItem = runningTasks.remove(taskId);
|
||||||
if (taskRunnerWorkItem != null) {
|
if (taskRunnerWorkItem != null) {
|
||||||
|
@ -1047,7 +1075,7 @@ public class RemoteTaskRunner implements WorkerTaskRunner, TaskLogStreamer
|
||||||
retVal.set(zkWorker);
|
retVal.set(zkWorker);
|
||||||
} else {
|
} else {
|
||||||
final String message = StringUtils.format(
|
final String message = StringUtils.format(
|
||||||
"WTF?! Tried to add already-existing worker[%s]",
|
"This should not happen...tried to add already-existing worker[%s]",
|
||||||
worker.getHost()
|
worker.getHost()
|
||||||
);
|
);
|
||||||
log.makeAlert(message)
|
log.makeAlert(message)
|
||||||
|
@ -1065,20 +1093,18 @@ public class RemoteTaskRunner implements WorkerTaskRunner, TaskLogStreamer
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (Exception e) {
|
catch (Exception e) {
|
||||||
|
String znode = null;
|
||||||
|
if (event.getData() != null) {
|
||||||
|
znode = event.getData().getPath();
|
||||||
|
}
|
||||||
log.makeAlert(e, "Failed to handle new worker status")
|
log.makeAlert(e, "Failed to handle new worker status")
|
||||||
.addData("worker", zkWorker.getWorker().getHost())
|
.addData("worker", zkWorker.getWorker().getHost())
|
||||||
.addData("znode", event.getData().getPath())
|
.addData("znode", znode)
|
||||||
|
.addData("eventType", event.getType().toString())
|
||||||
.emit();
|
.emit();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
);
|
|
||||||
zkWorker.start();
|
|
||||||
return retVal;
|
|
||||||
}
|
|
||||||
catch (Exception e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -29,6 +29,7 @@ import com.google.common.collect.Lists;
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
import com.google.common.util.concurrent.ListenableFuture;
|
import com.google.common.util.concurrent.ListenableFuture;
|
||||||
import org.apache.curator.framework.CuratorFramework;
|
import org.apache.curator.framework.CuratorFramework;
|
||||||
|
import org.apache.curator.framework.recipes.cache.PathChildrenCache;
|
||||||
import org.apache.druid.indexer.TaskState;
|
import org.apache.druid.indexer.TaskState;
|
||||||
import org.apache.druid.indexer.TaskStatus;
|
import org.apache.druid.indexer.TaskStatus;
|
||||||
import org.apache.druid.indexing.common.IndexingServiceCondition;
|
import org.apache.druid.indexing.common.IndexingServiceCondition;
|
||||||
|
@ -44,6 +45,7 @@ import org.apache.druid.java.util.common.StringUtils;
|
||||||
import org.apache.druid.java.util.emitter.EmittingLogger;
|
import org.apache.druid.java.util.emitter.EmittingLogger;
|
||||||
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
|
import org.apache.druid.java.util.emitter.service.ServiceEmitter;
|
||||||
import org.apache.druid.testing.DeadlockDetectingTimeout;
|
import org.apache.druid.testing.DeadlockDetectingTimeout;
|
||||||
|
import org.easymock.Capture;
|
||||||
import org.easymock.EasyMock;
|
import org.easymock.EasyMock;
|
||||||
import org.joda.time.Period;
|
import org.joda.time.Period;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
|
@ -55,6 +57,7 @@ import org.junit.rules.TestRule;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.concurrent.Future;
|
import java.util.concurrent.Future;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
@ -944,4 +947,37 @@ public class RemoteTaskRunnerTest
|
||||||
Assert.assertTrue(taskFuture2.get().isSuccess());
|
Assert.assertTrue(taskFuture2.get().isSuccess());
|
||||||
Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size());
|
Assert.assertEquals(0, remoteTaskRunner.getBlackListedWorkers().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testStatusListenerEventDataNullShouldNotThrowException() throws Exception
|
||||||
|
{
|
||||||
|
// Set up mock emitter to verify log alert when exception is thrown inside the status listener
|
||||||
|
Worker worker = EasyMock.createMock(Worker.class);
|
||||||
|
EasyMock.expect(worker.getHost()).andReturn("host").atLeastOnce();
|
||||||
|
EasyMock.replay(worker);
|
||||||
|
ServiceEmitter emitter = EasyMock.createMock(ServiceEmitter.class);
|
||||||
|
Capture<EmittingLogger.EmittingAlertBuilder> capturedArgument = Capture.newInstance();
|
||||||
|
emitter.emit(EasyMock.capture(capturedArgument));
|
||||||
|
EasyMock.expectLastCall().atLeastOnce();
|
||||||
|
EmittingLogger.registerEmitter(emitter);
|
||||||
|
EasyMock.replay(emitter);
|
||||||
|
|
||||||
|
PathChildrenCache cache = new PathChildrenCache(cf, "/test", true);
|
||||||
|
testStartWithNoWorker();
|
||||||
|
cache.getListenable().addListener(remoteTaskRunner.getStatusListener(worker, new ZkWorker(worker, cache, jsonMapper), null));
|
||||||
|
cache.start(PathChildrenCache.StartMode.POST_INITIALIZED_EVENT);
|
||||||
|
|
||||||
|
// Status listener will recieve event with null data
|
||||||
|
Assert.assertTrue(
|
||||||
|
TestUtils.conditionValid(() -> cache.getCurrentData().size() == 1)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Verify that the log emitter was called
|
||||||
|
EasyMock.verify(worker);
|
||||||
|
EasyMock.verify(emitter);
|
||||||
|
Map<String, Object> alertDataMap = capturedArgument.getValue().build(null).getDataMap();
|
||||||
|
Assert.assertTrue(alertDataMap.containsKey("znode"));
|
||||||
|
Assert.assertNull(alertDataMap.get("znode"));
|
||||||
|
// Status listener should successfully completes without throwing exception
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1315,7 +1315,7 @@ name: Apache Curator
|
||||||
license_category: binary
|
license_category: binary
|
||||||
module: java-core
|
module: java-core
|
||||||
license_name: Apache License version 2.0
|
license_name: Apache License version 2.0
|
||||||
version: 4.1.0
|
version: 4.3.0
|
||||||
libraries:
|
libraries:
|
||||||
- org.apache.curator: curator-client
|
- org.apache.curator: curator-client
|
||||||
- org.apache.curator: curator-framework
|
- org.apache.curator: curator-framework
|
||||||
|
|
2
pom.xml
2
pom.xml
|
@ -76,7 +76,7 @@
|
||||||
<java.version>8</java.version>
|
<java.version>8</java.version>
|
||||||
<project.build.resourceEncoding>UTF-8</project.build.resourceEncoding>
|
<project.build.resourceEncoding>UTF-8</project.build.resourceEncoding>
|
||||||
<aether.version>0.9.0.M2</aether.version>
|
<aether.version>0.9.0.M2</aether.version>
|
||||||
<apache.curator.version>4.1.0</apache.curator.version>
|
<apache.curator.version>4.3.0</apache.curator.version>
|
||||||
<apache.curator.test.version>2.12.0</apache.curator.test.version>
|
<apache.curator.test.version>2.12.0</apache.curator.test.version>
|
||||||
<apache.kafka.version>2.2.2</apache.kafka.version>
|
<apache.kafka.version>2.2.2</apache.kafka.version>
|
||||||
<apache.ranger.version>2.0.0</apache.ranger.version>
|
<apache.ranger.version>2.0.0</apache.ranger.version>
|
||||||
|
|
|
@ -482,6 +482,18 @@ public class DiscoveryModule implements Module
|
||||||
{
|
{
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ServiceProviderBuilder<T> executorService(ExecutorService executorService)
|
||||||
|
{
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ServiceProviderBuilder<T> executorService(CloseableExecutorService closeableExecutorService)
|
||||||
|
{
|
||||||
|
return this;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class NoopServiceProvider<T> implements ServiceProvider<T>
|
private static class NoopServiceProvider<T> implements ServiceProvider<T>
|
||||||
|
|
Loading…
Reference in New Issue