mirror of https://github.com/apache/jclouds.git
Issue 989: handles transient node-not-found in nodeRunning
Sometimes on aws-ec2, the client returns null (i.e. node not found). This fix will cause us retry with the correct node-id, rather than "forgetting" the original nodeId and retrying with null each time.
This commit is contained in:
parent
1c6e2c64d3
commit
65b813b1be
|
@ -57,7 +57,7 @@ public class ComputeServiceTimeoutsModule extends AbstractModule {
|
||||||
@Singleton
|
@Singleton
|
||||||
@Named(TIMEOUT_NODE_RUNNING)
|
@Named(TIMEOUT_NODE_RUNNING)
|
||||||
protected Predicate<AtomicReference<NodeMetadata>> nodeRunning(AtomicNodeRunning statusRunning, Timeouts timeouts) {
|
protected Predicate<AtomicReference<NodeMetadata>> nodeRunning(AtomicNodeRunning statusRunning, Timeouts timeouts) {
|
||||||
return timeouts.nodeRunning == 0 ? statusRunning : new RetryablePredicate<AtomicReference<NodeMetadata>>(statusRunning,
|
return timeouts.nodeRunning == 0 ? statusRunning : new RetryablePredicateGuardingNull<NodeMetadata>(statusRunning,
|
||||||
timeouts.nodeRunning);
|
timeouts.nodeRunning);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,7 +74,7 @@ public class ComputeServiceTimeoutsModule extends AbstractModule {
|
||||||
@Singleton
|
@Singleton
|
||||||
@Named(TIMEOUT_NODE_SUSPENDED)
|
@Named(TIMEOUT_NODE_SUSPENDED)
|
||||||
protected Predicate<AtomicReference<NodeMetadata>> serverSuspended(AtomicNodeSuspended statusSuspended, Timeouts timeouts) {
|
protected Predicate<AtomicReference<NodeMetadata>> serverSuspended(AtomicNodeSuspended statusSuspended, Timeouts timeouts) {
|
||||||
return timeouts.nodeSuspended == 0 ? statusSuspended : new RetryablePredicate<AtomicReference<NodeMetadata>>(statusSuspended,
|
return timeouts.nodeSuspended == 0 ? statusSuspended : new RetryablePredicateGuardingNull<NodeMetadata>(statusSuspended,
|
||||||
timeouts.nodeSuspended);
|
timeouts.nodeSuspended);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -90,7 +90,7 @@ public class ComputeServiceTimeoutsModule extends AbstractModule {
|
||||||
@Singleton
|
@Singleton
|
||||||
@Named(TIMEOUT_IMAGE_AVAILABLE)
|
@Named(TIMEOUT_IMAGE_AVAILABLE)
|
||||||
protected Predicate<AtomicReference<Image>> imageAvailable(AtomicImageAvailable statusAvailable, Timeouts timeouts) {
|
protected Predicate<AtomicReference<Image>> imageAvailable(AtomicImageAvailable statusAvailable, Timeouts timeouts) {
|
||||||
return timeouts.imageAvailable == 0 ? statusAvailable : new RetryablePredicate<AtomicReference<Image>>(statusAvailable,
|
return timeouts.imageAvailable == 0 ? statusAvailable : new RetryablePredicateGuardingNull<Image>(statusAvailable,
|
||||||
timeouts.imageAvailable);
|
timeouts.imageAvailable);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,4 +106,46 @@ public class ComputeServiceTimeoutsModule extends AbstractModule {
|
||||||
protected void configure() {
|
protected void configure() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Avoids "losing" the ComputeMetadata if client returns null temporarily (issue #989).
|
||||||
|
* Ensures we always pass a non-null to the wrapped predicate, but will propagate the null to
|
||||||
|
* the caller qt the end.
|
||||||
|
*
|
||||||
|
* @author Aled Sage
|
||||||
|
*/
|
||||||
|
private static class RetryablePredicateGuardingNull<T> implements Predicate<AtomicReference<T>> {
|
||||||
|
private class AtomicRefAndOrig {
|
||||||
|
private final T orig;
|
||||||
|
private final AtomicReference<T> ref;
|
||||||
|
|
||||||
|
AtomicRefAndOrig(T orig, AtomicReference<T> ref) {
|
||||||
|
this.orig = orig;
|
||||||
|
this.ref = ref;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final RetryablePredicate<AtomicRefAndOrig> retryablePredicate;
|
||||||
|
|
||||||
|
public RetryablePredicateGuardingNull(final Predicate<AtomicReference<T>> predicate, long maxWait) {
|
||||||
|
Predicate<AtomicRefAndOrig> nonNullThingPredicate = new Predicate<AtomicRefAndOrig>() {
|
||||||
|
@Override
|
||||||
|
public boolean apply(AtomicRefAndOrig input) {
|
||||||
|
AtomicReference<T> ref = (input.ref.get() != null) ? input.ref : new AtomicReference<T>(input.orig);
|
||||||
|
try {
|
||||||
|
return predicate.apply(ref);
|
||||||
|
} finally {
|
||||||
|
input.ref.set(ref.get());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
retryablePredicate = new RetryablePredicate<AtomicRefAndOrig>(nonNullThingPredicate, maxWait);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean apply(AtomicReference<T> input) {
|
||||||
|
AtomicRefAndOrig refAndOrig = new AtomicRefAndOrig(input.get(), input);
|
||||||
|
return retryablePredicate.apply(refAndOrig);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -31,10 +31,13 @@ import java.util.Set;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
|
||||||
|
import org.easymock.EasyMock;
|
||||||
|
import org.easymock.IAnswer;
|
||||||
|
import org.jclouds.compute.config.ComputeServiceTimeoutsModule;
|
||||||
import org.jclouds.compute.config.CustomizationResponse;
|
import org.jclouds.compute.config.CustomizationResponse;
|
||||||
import org.jclouds.compute.domain.NodeMetadata;
|
import org.jclouds.compute.domain.NodeMetadata;
|
||||||
import org.jclouds.compute.domain.NodeMetadataBuilder;
|
|
||||||
import org.jclouds.compute.domain.NodeMetadata.Status;
|
import org.jclouds.compute.domain.NodeMetadata.Status;
|
||||||
|
import org.jclouds.compute.domain.NodeMetadataBuilder;
|
||||||
import org.jclouds.compute.functions.TemplateOptionsToStatement;
|
import org.jclouds.compute.functions.TemplateOptionsToStatement;
|
||||||
import org.jclouds.compute.options.TemplateOptions;
|
import org.jclouds.compute.options.TemplateOptions;
|
||||||
import org.jclouds.compute.predicates.AtomicNodeRunning;
|
import org.jclouds.compute.predicates.AtomicNodeRunning;
|
||||||
|
@ -45,7 +48,9 @@ import org.testng.Assert;
|
||||||
import org.testng.annotations.Test;
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
import com.google.common.base.Function;
|
import com.google.common.base.Function;
|
||||||
|
import com.google.common.base.Predicate;
|
||||||
import com.google.common.collect.ImmutableSet;
|
import com.google.common.collect.ImmutableSet;
|
||||||
|
import com.google.common.collect.Iterables;
|
||||||
import com.google.common.collect.LinkedHashMultimap;
|
import com.google.common.collect.LinkedHashMultimap;
|
||||||
import com.google.common.collect.Maps;
|
import com.google.common.collect.Maps;
|
||||||
import com.google.common.collect.Multimap;
|
import com.google.common.collect.Multimap;
|
||||||
|
@ -190,4 +195,58 @@ public class CustomizeNodeAndAddToGoodMapOrPutExceptionIntoBadMapTest {
|
||||||
// verify mocks
|
// verify mocks
|
||||||
verify(initScriptRunnerFactory, openSocketFinder);
|
verify(initScriptRunnerFactory, openSocketFinder);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRecoversWhenTemporarilyNodeNotFound() {
|
||||||
|
String nodeId = "myid";
|
||||||
|
|
||||||
|
InitializeRunScriptOnNodeOrPlaceInBadMap.Factory initScriptRunnerFactory = createMock(InitializeRunScriptOnNodeOrPlaceInBadMap.Factory.class);
|
||||||
|
OpenSocketFinder openSocketFinder = createMock(OpenSocketFinder.class);
|
||||||
|
Timeouts timeouts = new Timeouts();
|
||||||
|
Function<TemplateOptions, Statement> templateOptionsToStatement = new TemplateOptionsToStatement();
|
||||||
|
Set<NodeMetadata> goodNodes = Sets.newLinkedHashSet();
|
||||||
|
Map<NodeMetadata, Exception> badNodes = Maps.newLinkedHashMap();
|
||||||
|
Multimap<NodeMetadata, CustomizationResponse> customizationResponses = LinkedHashMultimap.create();
|
||||||
|
TemplateOptions options = new TemplateOptions();
|
||||||
|
|
||||||
|
final NodeMetadata pendingNode = new NodeMetadataBuilder().ids(nodeId).status(Status.PENDING).build();
|
||||||
|
final NodeMetadata runningNode = new NodeMetadataBuilder().ids(nodeId).status(Status.RUNNING).build();
|
||||||
|
GetNodeMetadataStrategy nodeClient = createMock(GetNodeMetadataStrategy.class);
|
||||||
|
AtomicNodeRunning nodeRunning = new AtomicNodeRunning(nodeClient);
|
||||||
|
Predicate<AtomicReference<NodeMetadata>> retryableNodeRunning = new ComputeServiceTimeoutsModule() {
|
||||||
|
public Predicate<AtomicReference<NodeMetadata>> nodeRunning(AtomicNodeRunning statusRunning, Timeouts timeouts) {
|
||||||
|
return super.nodeRunning(statusRunning, timeouts);
|
||||||
|
}
|
||||||
|
}.nodeRunning(nodeRunning, timeouts);
|
||||||
|
AtomicReference<NodeMetadata> atomicNode = new AtomicReference<NodeMetadata>(pendingNode);
|
||||||
|
|
||||||
|
// Simulate transient error: first call returns null; subsequent calls return the running node
|
||||||
|
EasyMock.expect(nodeClient.getNode(nodeId)).andAnswer(new IAnswer<NodeMetadata>() {
|
||||||
|
private int count = 0;
|
||||||
|
@Override
|
||||||
|
public NodeMetadata answer() throws Throwable {
|
||||||
|
count++;
|
||||||
|
if (count <= 1) {
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
return runningNode;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}).anyTimes();
|
||||||
|
|
||||||
|
// replay mocks
|
||||||
|
replay(initScriptRunnerFactory, openSocketFinder, nodeClient);
|
||||||
|
|
||||||
|
// run
|
||||||
|
new CustomizeNodeAndAddToGoodMapOrPutExceptionIntoBadMap(retryableNodeRunning, openSocketFinder, timeouts,
|
||||||
|
templateOptionsToStatement, initScriptRunnerFactory, options, atomicNode, goodNodes, badNodes,
|
||||||
|
customizationResponses).apply(atomicNode);
|
||||||
|
|
||||||
|
if (badNodes.size() > 0) Iterables.get(badNodes.values(), 0).printStackTrace();
|
||||||
|
assertEquals(badNodes.size(), 0);
|
||||||
|
assertEquals(goodNodes, ImmutableSet.of(runningNode));
|
||||||
|
assertEquals(customizationResponses.size(), 0);
|
||||||
|
|
||||||
|
// verify mocks
|
||||||
|
verify(initScriptRunnerFactory, openSocketFinder, nodeClient);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue