Issue 445: retry EC2ComputeService.cleanUpIncidentalResources due to failure if done too soon after last VM terminates!

This commit is contained in:
Aled Sage 2012-03-13 09:25:28 +00:00
parent 538c2129a8
commit 1ea877354b
5 changed files with 314 additions and 11 deletions

View File

@ -64,6 +64,7 @@ import org.jclouds.ec2.compute.domain.RegionNameAndIngressRules;
import org.jclouds.ec2.compute.options.EC2TemplateOptions;
import org.jclouds.ec2.domain.KeyPair;
import org.jclouds.ec2.domain.RunningInstance;
import org.jclouds.predicates.Retryables;
import org.jclouds.scriptbuilder.functions.InitAdminAccess;
import com.google.common.annotations.VisibleForTesting;
@ -195,7 +196,7 @@ public class EC2ComputeService extends BaseComputeService {
}
}
protected void cleanUpIncidentalResources(String region, String group){
protected void cleanUpIncidentalResources(final String region, final String group){
// For issue #445, tries to delete security groups first: ec2 throws exception if in use, but
// deleting a key pair does not.
// This is "belt-and-braces" because deleteKeyPair also does extractIdsFromInstances & usingKeyPairAndNotDead
@ -203,15 +204,27 @@ public class EC2ComputeService extends BaseComputeService {
// There is (probably?) still a race if someone is creating instances at the same time as deleting them:
// we may delete the key-pair just when the node-being-created was about to rely on the incidental
// resources existing.
// Also in #445, in aws-ec2 the deleteSecurityGroup sometimes fails after terminating the final VM using a
// given security group, if called very soon after the VM's state reports terminated. Emprically, it seems that
// waiting a small time (e.g. enabling logging or debugging!) then the tests pass. We therefore retry.
final int maxAttempts = 3;
Retryables.retryNumTimes(new Predicate<Void>() {
@Override
public boolean apply(Void input) {
try {
logger.debug(">> deleting incidentalResources(%s @ %s)", region, group);
deleteSecurityGroup(region, group);
deleteKeyPair(region, group); // not executed if securityGroup was in use
logger.debug("<< deleted incidentalResources(%s @ %s)", region, group);
return true;
} catch (IllegalStateException e) {
logger.debug("<< inUse incidentalResources(%s @ %s)", region, group);
return false;
}
}
}, (Void)null, maxAttempts);
}
/**
* returns template options, except of type {@link EC2TemplateOptions}.

View File

@ -0,0 +1,112 @@
/**
* Licensed to jclouds, Inc. (jclouds) under one or more
* contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. jclouds licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.jclouds.predicates;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static org.jclouds.util.Throwables2.getFirstThrowableOfType;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import javax.annotation.Resource;
import org.jclouds.logging.Logger;
import com.google.common.base.Predicate;
/**
*
* Retries a condition until it is met or the max number of retries have occurred.
* maxAttempts parameter is required.
* Initial retry period and retry maxPeriod are optionally configurable,
* defaulting to 50ms and 1000ms respectively,
* with the retrier increasing the interval by a factor of 1.5 each time within these constraints.
*
* @author Aled Sage
*/
public class RetryableNumTimesPredicate<T> implements Predicate<T> {
private final int maxAttempts;
private final long period;
private final long maxPeriod;
private final Predicate<T> predicate;
@Resource
protected Logger logger = Logger.NULL;
public RetryableNumTimesPredicate(Predicate<T> predicate, int maxAttempts, long period, long maxPeriod, TimeUnit unit) {
this.predicate = checkNotNull(predicate);
this.maxAttempts = maxAttempts;
this.period = unit.toMillis(period);
this.maxPeriod = unit.toMillis(maxPeriod);
checkArgument(maxAttempts >= 0, "maxAttempts must be greater than zero, but was "+maxAttempts);
checkArgument(period >= 0, "period must be greater than zero, but was "+period);
checkArgument(maxPeriod >= 0, "maxPeriod must be greater than zero, but was "+maxPeriod);
checkArgument(maxPeriod >= period, "maxPeriod must be greater than or equal to period, but was "+maxPeriod+" < "+period);
}
public RetryableNumTimesPredicate(Predicate<T> predicate, int maxAttempts, long period, TimeUnit unit) {
this(predicate, maxAttempts, period, period*10, unit);
}
public RetryableNumTimesPredicate(Predicate<T> predicate, int maxAttempts) {
this(predicate, maxAttempts, 50l, 1000l, TimeUnit.MILLISECONDS);
}
@Override
public boolean apply(T input) {
try {
for (int i = 1; i <= maxAttempts; Thread.sleep(nextMaxInterval(i++))) {
if (predicate.apply(input)) {
return true;
}
}
return false;
} catch (InterruptedException e) {
logger.warn(e, "predicate %s on %s interrupted, returning false", input, predicate);
Thread.currentThread().interrupt();
} catch (RuntimeException e) {
if (getFirstThrowableOfType(e, ExecutionException.class) != null) {
logger.warn(e, "predicate %s on %s errored [%s], returning false", input, predicate, e.getMessage());
return false;
} else if (getFirstThrowableOfType(e, IllegalStateException.class) != null) {
logger.warn(e, "predicate %s on %s illegal state [%s], returning false", input, predicate, e.getMessage());
return false;
} else if (getFirstThrowableOfType(e, CancellationException.class) != null) {
logger.warn(e, "predicate %s on %s cancelled [%s], returning false", input, predicate, e.getMessage());
return false;
} else if (getFirstThrowableOfType(e, TimeoutException.class) != null) {
logger.warn(e, "predicate %s on %s timed out [%s], returning false", input, predicate, e.getMessage());
return false;
} else
throw e;
}
return false;
}
protected long nextMaxInterval(long attempt) {
// Interval increases exponentially, at a rate of nextInterval *= 1.5
// Note that attempt starts counting at 1
long interval = (long) (period * Math.pow(1.5, (attempt-1)));
return (interval > maxPeriod ? maxPeriod : interval);
}
}

View File

@ -39,6 +39,14 @@ public class Retryables {
return new RetryablePredicate<Input>(predicate, maxWait, period, unit).apply(input);
}
public static <Input> boolean retryNumTimes(Predicate<Input> predicate, Input input, int maxAttempts) {
return new RetryableNumTimesPredicate<Input>(predicate, maxAttempts).apply(input);
}
public static <Input> boolean retryNumTimes(Predicate<Input> predicate, Input input, int maxAttempts, long period, long maxPeriod, TimeUnit unit) {
return new RetryableNumTimesPredicate<Input>(predicate, maxAttempts, period, maxPeriod, unit).apply(input);
}
public static <Input> void assertEventually(Predicate<Input> predicate, Input input,
long maxWaitMillis, String failureMessage) {
if (!new RetryablePredicate<Input>(predicate, maxWaitMillis).apply(input))
@ -51,11 +59,11 @@ public class Retryables {
throw (AssertionError)new AssertionError(failureMessage).initCause(predicate.getLastFailure());
return predicate.getResult();
}
public static <Input,Result> Result retryGettingResultOrFailing(PredicateWithResult<Input,Result> predicate,
Input input, long maxWait, long period, TimeUnit unit, String failureMessage) {
if (!new RetryablePredicate<Input>(predicate, maxWait, period, unit).apply(input))
throw (AssertionError)new AssertionError(failureMessage).initCause(predicate.getLastFailure());
return predicate.getResult();
}
}

View File

@ -0,0 +1,170 @@
/**
* Licensed to jclouds, Inc. (jclouds) under one or more
* contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. jclouds licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.jclouds.predicates;
import static org.jclouds.predicates.RetryablePredicateTest.assertCallTimes;
import static org.testng.Assert.fail;
import java.util.Arrays;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import org.jclouds.predicates.RetryablePredicateTest.RepeatedAttemptsPredicate;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import com.google.common.base.Stopwatch;
import com.google.common.base.Supplier;
/**
*
* @author Adrian Cole
*/
@Test(groups = "unit", singleThreaded = true)
public class RetryableNumTimesPredicateTest {
// Grace must be reasonably big; Thread.sleep can take a bit longer to wake up sometimes...
public static int SLOW_BUILD_SERVER_GRACE = 250;
// Sometimes returns sooner than timer would predict (e.g. observed 2999ms, when expected 3000ms)
public static int EARLY_RETURN_GRACE = 10;
private Stopwatch stopwatch;
@BeforeMethod
public void setUp() {
stopwatch = new Stopwatch();
}
@Test
void testFalseOnIllegalStateExeception() {
ensureImmediateReturnFor(new IllegalStateException());
}
@SuppressWarnings("serial")
@Test
void testFalseOnExecutionException() {
ensureImmediateReturnFor(new ExecutionException() {
});
}
@SuppressWarnings("serial")
@Test
void testFalseOnTimeoutException() {
ensureImmediateReturnFor(new TimeoutException() {
});
}
@SuppressWarnings("serial")
@Test(expectedExceptions = RuntimeException.class)
void testPropagateOnException() {
ensureImmediateReturnFor(new Exception() {
});
}
private void ensureImmediateReturnFor(final Exception ex) {
RetryableNumTimesPredicate<Supplier<String>> predicate = new RetryableNumTimesPredicate<Supplier<String>>(
new Predicate<Supplier<String>>() {
@Override
public boolean apply(Supplier<String> input) {
return "goo".equals(input.get());
}
}, 3, 1L, TimeUnit.SECONDS);
stopwatch.start();
assert !predicate.apply(new Supplier<String>() {
@Override
public String get() {
throw new RuntimeException(ex);
}
});
long duration = stopwatch.elapsedMillis();
assertOrdered(duration, SLOW_BUILD_SERVER_GRACE);
}
@Test
void testAlwaysTrue() {
// will call once immediately
RetryableNumTimesPredicate<String> predicate = new RetryableNumTimesPredicate<String>(Predicates.<String> alwaysTrue(),
1, 1L, TimeUnit.SECONDS);
stopwatch.start();
predicate.apply("");
long duration = stopwatch.elapsedMillis();
assertOrdered(duration, SLOW_BUILD_SERVER_GRACE);
}
@Test
void testAlwaysFalseMillis() {
// maxAttempts=3; period=1; maxPeriod defaults to 1*10
// will call at 0, 1, 1+(1*1.5) = 2.5secs
RetryableNumTimesPredicate<String> predicate = new RetryableNumTimesPredicate<String>(Predicates.<String> alwaysFalse(),
3, 1L, TimeUnit.SECONDS);
stopwatch.start();
predicate.apply("");
long duration = stopwatch.elapsedMillis();
assertOrdered(2500-EARLY_RETURN_GRACE, duration, 2500+SLOW_BUILD_SERVER_GRACE);
}
@Test
void testThirdTimeTrue() {
// maxAttempts=3; period=1; maxPeriod defaults to 1*10
// will call at 0, 1, 1+(1*1.5)
RepeatedAttemptsPredicate rawPredicate = new RepeatedAttemptsPredicate(2);
RetryableNumTimesPredicate<String> predicate = new RetryableNumTimesPredicate<String>(rawPredicate,
4, 1, TimeUnit.SECONDS);
stopwatch.start();
predicate.apply("");
long duration = stopwatch.elapsedMillis();
assertOrdered(2500-EARLY_RETURN_GRACE, duration, 2500+SLOW_BUILD_SERVER_GRACE);
assertCallTimes(rawPredicate.callTimes, 0, 1000, 1000+1500);
}
@Test
void testThirdTimeTrueLimitedMaxInterval() {
// maxAttempts=3; period=1; maxPeriod=1
// will call at 0, 1, 1+1
RepeatedAttemptsPredicate rawPredicate = new RepeatedAttemptsPredicate(2);
RetryableNumTimesPredicate<String> predicate = new RetryableNumTimesPredicate<String>(rawPredicate,
3, 1L, 1L, TimeUnit.SECONDS);
stopwatch.start();
predicate.apply("");
long duration = stopwatch.elapsedMillis();
assertOrdered(2000-EARLY_RETURN_GRACE, duration, 2000+SLOW_BUILD_SERVER_GRACE);
assertCallTimes(rawPredicate.callTimes, 0, 1000, 2000);
}
private static void assertOrdered(long... values) {
long prevVal = values[0];
for (long val : values) {
if (val < prevVal) {
fail(String.format("%s should be ordered", Arrays.toString(values)));
}
}
}
}

View File

@ -159,7 +159,7 @@ public class RetryablePredicateTest {
assertCallTimes(rawPredicate.callTimes, 0, 1000, 2000);
}
private static class RepeatedAttemptsPredicate implements Predicate<String> {
public static class RepeatedAttemptsPredicate implements Predicate<String> {
final List<Long> callTimes = new ArrayList<Long>();
private final int succeedOnAttempt;
private final Stopwatch stopwatch;