mirror of https://github.com/apache/jclouds.git
Issue 989: handles transient node-not-found in nodeRunning
Sometimes on aws-ec2, the client returns null (i.e. node not found). This fix will cause us retry with the correct node-id, rather than "forgetting" the original nodeId and retrying with null each time.
This commit is contained in:
parent
1c6e2c64d3
commit
65b813b1be
|
@ -57,7 +57,7 @@ public class ComputeServiceTimeoutsModule extends AbstractModule {
|
|||
@Singleton
|
||||
@Named(TIMEOUT_NODE_RUNNING)
|
||||
protected Predicate<AtomicReference<NodeMetadata>> nodeRunning(AtomicNodeRunning statusRunning, Timeouts timeouts) {
|
||||
return timeouts.nodeRunning == 0 ? statusRunning : new RetryablePredicate<AtomicReference<NodeMetadata>>(statusRunning,
|
||||
return timeouts.nodeRunning == 0 ? statusRunning : new RetryablePredicateGuardingNull<NodeMetadata>(statusRunning,
|
||||
timeouts.nodeRunning);
|
||||
}
|
||||
|
||||
|
@ -74,7 +74,7 @@ public class ComputeServiceTimeoutsModule extends AbstractModule {
|
|||
@Singleton
|
||||
@Named(TIMEOUT_NODE_SUSPENDED)
|
||||
protected Predicate<AtomicReference<NodeMetadata>> serverSuspended(AtomicNodeSuspended statusSuspended, Timeouts timeouts) {
|
||||
return timeouts.nodeSuspended == 0 ? statusSuspended : new RetryablePredicate<AtomicReference<NodeMetadata>>(statusSuspended,
|
||||
return timeouts.nodeSuspended == 0 ? statusSuspended : new RetryablePredicateGuardingNull<NodeMetadata>(statusSuspended,
|
||||
timeouts.nodeSuspended);
|
||||
}
|
||||
|
||||
|
@ -90,7 +90,7 @@ public class ComputeServiceTimeoutsModule extends AbstractModule {
|
|||
@Singleton
|
||||
@Named(TIMEOUT_IMAGE_AVAILABLE)
|
||||
protected Predicate<AtomicReference<Image>> imageAvailable(AtomicImageAvailable statusAvailable, Timeouts timeouts) {
|
||||
return timeouts.imageAvailable == 0 ? statusAvailable : new RetryablePredicate<AtomicReference<Image>>(statusAvailable,
|
||||
return timeouts.imageAvailable == 0 ? statusAvailable : new RetryablePredicateGuardingNull<Image>(statusAvailable,
|
||||
timeouts.imageAvailable);
|
||||
}
|
||||
|
||||
|
@ -106,4 +106,46 @@ public class ComputeServiceTimeoutsModule extends AbstractModule {
|
|||
protected void configure() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Avoids "losing" the ComputeMetadata if client returns null temporarily (issue #989).
|
||||
* Ensures we always pass a non-null to the wrapped predicate, but will propagate the null to
|
||||
* the caller qt the end.
|
||||
*
|
||||
* @author Aled Sage
|
||||
*/
|
||||
private static class RetryablePredicateGuardingNull<T> implements Predicate<AtomicReference<T>> {
|
||||
private class AtomicRefAndOrig {
|
||||
private final T orig;
|
||||
private final AtomicReference<T> ref;
|
||||
|
||||
AtomicRefAndOrig(T orig, AtomicReference<T> ref) {
|
||||
this.orig = orig;
|
||||
this.ref = ref;
|
||||
}
|
||||
}
|
||||
|
||||
private final RetryablePredicate<AtomicRefAndOrig> retryablePredicate;
|
||||
|
||||
public RetryablePredicateGuardingNull(final Predicate<AtomicReference<T>> predicate, long maxWait) {
|
||||
Predicate<AtomicRefAndOrig> nonNullThingPredicate = new Predicate<AtomicRefAndOrig>() {
|
||||
@Override
|
||||
public boolean apply(AtomicRefAndOrig input) {
|
||||
AtomicReference<T> ref = (input.ref.get() != null) ? input.ref : new AtomicReference<T>(input.orig);
|
||||
try {
|
||||
return predicate.apply(ref);
|
||||
} finally {
|
||||
input.ref.set(ref.get());
|
||||
}
|
||||
}
|
||||
};
|
||||
retryablePredicate = new RetryablePredicate<AtomicRefAndOrig>(nonNullThingPredicate, maxWait);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean apply(AtomicReference<T> input) {
|
||||
AtomicRefAndOrig refAndOrig = new AtomicRefAndOrig(input.get(), input);
|
||||
return retryablePredicate.apply(refAndOrig);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -31,10 +31,13 @@ import java.util.Set;
|
|||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
|
||||
import org.easymock.EasyMock;
|
||||
import org.easymock.IAnswer;
|
||||
import org.jclouds.compute.config.ComputeServiceTimeoutsModule;
|
||||
import org.jclouds.compute.config.CustomizationResponse;
|
||||
import org.jclouds.compute.domain.NodeMetadata;
|
||||
import org.jclouds.compute.domain.NodeMetadataBuilder;
|
||||
import org.jclouds.compute.domain.NodeMetadata.Status;
|
||||
import org.jclouds.compute.domain.NodeMetadataBuilder;
|
||||
import org.jclouds.compute.functions.TemplateOptionsToStatement;
|
||||
import org.jclouds.compute.options.TemplateOptions;
|
||||
import org.jclouds.compute.predicates.AtomicNodeRunning;
|
||||
|
@ -45,7 +48,9 @@ import org.testng.Assert;
|
|||
import org.testng.annotations.Test;
|
||||
|
||||
import com.google.common.base.Function;
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.LinkedHashMultimap;
|
||||
import com.google.common.collect.Maps;
|
||||
import com.google.common.collect.Multimap;
|
||||
|
@ -190,4 +195,58 @@ public class CustomizeNodeAndAddToGoodMapOrPutExceptionIntoBadMapTest {
|
|||
// verify mocks
|
||||
verify(initScriptRunnerFactory, openSocketFinder);
|
||||
}
|
||||
|
||||
public void testRecoversWhenTemporarilyNodeNotFound() {
|
||||
String nodeId = "myid";
|
||||
|
||||
InitializeRunScriptOnNodeOrPlaceInBadMap.Factory initScriptRunnerFactory = createMock(InitializeRunScriptOnNodeOrPlaceInBadMap.Factory.class);
|
||||
OpenSocketFinder openSocketFinder = createMock(OpenSocketFinder.class);
|
||||
Timeouts timeouts = new Timeouts();
|
||||
Function<TemplateOptions, Statement> templateOptionsToStatement = new TemplateOptionsToStatement();
|
||||
Set<NodeMetadata> goodNodes = Sets.newLinkedHashSet();
|
||||
Map<NodeMetadata, Exception> badNodes = Maps.newLinkedHashMap();
|
||||
Multimap<NodeMetadata, CustomizationResponse> customizationResponses = LinkedHashMultimap.create();
|
||||
TemplateOptions options = new TemplateOptions();
|
||||
|
||||
final NodeMetadata pendingNode = new NodeMetadataBuilder().ids(nodeId).status(Status.PENDING).build();
|
||||
final NodeMetadata runningNode = new NodeMetadataBuilder().ids(nodeId).status(Status.RUNNING).build();
|
||||
GetNodeMetadataStrategy nodeClient = createMock(GetNodeMetadataStrategy.class);
|
||||
AtomicNodeRunning nodeRunning = new AtomicNodeRunning(nodeClient);
|
||||
Predicate<AtomicReference<NodeMetadata>> retryableNodeRunning = new ComputeServiceTimeoutsModule() {
|
||||
public Predicate<AtomicReference<NodeMetadata>> nodeRunning(AtomicNodeRunning statusRunning, Timeouts timeouts) {
|
||||
return super.nodeRunning(statusRunning, timeouts);
|
||||
}
|
||||
}.nodeRunning(nodeRunning, timeouts);
|
||||
AtomicReference<NodeMetadata> atomicNode = new AtomicReference<NodeMetadata>(pendingNode);
|
||||
|
||||
// Simulate transient error: first call returns null; subsequent calls return the running node
|
||||
EasyMock.expect(nodeClient.getNode(nodeId)).andAnswer(new IAnswer<NodeMetadata>() {
|
||||
private int count = 0;
|
||||
@Override
|
||||
public NodeMetadata answer() throws Throwable {
|
||||
count++;
|
||||
if (count <= 1) {
|
||||
return null;
|
||||
} else {
|
||||
return runningNode;
|
||||
}
|
||||
}
|
||||
}).anyTimes();
|
||||
|
||||
// replay mocks
|
||||
replay(initScriptRunnerFactory, openSocketFinder, nodeClient);
|
||||
|
||||
// run
|
||||
new CustomizeNodeAndAddToGoodMapOrPutExceptionIntoBadMap(retryableNodeRunning, openSocketFinder, timeouts,
|
||||
templateOptionsToStatement, initScriptRunnerFactory, options, atomicNode, goodNodes, badNodes,
|
||||
customizationResponses).apply(atomicNode);
|
||||
|
||||
if (badNodes.size() > 0) Iterables.get(badNodes.values(), 0).printStackTrace();
|
||||
assertEquals(badNodes.size(), 0);
|
||||
assertEquals(goodNodes, ImmutableSet.of(runningNode));
|
||||
assertEquals(customizationResponses.size(), 0);
|
||||
|
||||
// verify mocks
|
||||
verify(initScriptRunnerFactory, openSocketFinder, nodeClient);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue