JCLOUDS-529: fix aws-ec2 cleanupIncidentalResources

- Some users get a DependencyVioloation, rather than InvalidGroup.InUse,
  when attempting to delete the security group. This caused
  cleanupIncidentalResources to propagate an exception.
- Fixes it by converting this to an IllegalStateException (in same
  way as is done for “InUse”)
- Adds tests (using MockWebServer) for happy-path and for failing
  to delete the security group with each of InUse and DependencyViolation
  responses.
- Adds Timeouts.cleanupIncidentalResources
- Use that timeout in EC2, when retrying the deletion of security group
  on VM deletion (previously hard-coded as 3 seconds).
- Configure that timeout in the tests, so deterministic number of retries
This commit is contained in:
Aled Sage 2014-12-29 21:04:43 +00:00 committed by Ignasi Barrera
parent 55f7e48d89
commit bdfd1facb9
11 changed files with 120 additions and 5 deletions

View File

@ -22,7 +22,6 @@ import static com.google.common.collect.Iterables.concat;
import static com.google.common.collect.Iterables.transform;
import static com.google.common.collect.Lists.newArrayList;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;
import static org.jclouds.compute.config.ComputeServiceProperties.RESOURCENAME_DELIMITER;
import static org.jclouds.compute.config.ComputeServiceProperties.TIMEOUT_NODE_RUNNING;
import static org.jclouds.compute.config.ComputeServiceProperties.TIMEOUT_NODE_SUSPENDED;
@ -108,6 +107,7 @@ public class EC2ComputeService extends BaseComputeService {
private final LoadingCache<RegionAndName, String> securityGroupMap;
private final Factory namingConvention;
private final boolean generateInstanceNames;
private final Timeouts timeouts;
@Inject
protected EC2ComputeService(ComputeServiceContext context, Map<String, Credentials> credentialStore,
@ -140,6 +140,7 @@ public class EC2ComputeService extends BaseComputeService {
this.securityGroupMap = securityGroupMap;
this.namingConvention = namingConvention;
this.generateInstanceNames = generateInstanceNames;
this.timeouts = timeouts;
}
@Override
@ -301,6 +302,7 @@ public class EC2ComputeService extends BaseComputeService {
// given security group, if called very soon after the VM's state reports terminated. Empirically, it seems that
// waiting a small time (e.g. enabling logging or debugging!) then the tests pass. We therefore retry.
// TODO: this could be moved to a config module, also the narrative above made more concise
long timeout = timeouts.cleanupIncidentalResources;
retry(new Predicate<RegionAndName>() {
public boolean apply(RegionAndName input) {
try {
@ -314,7 +316,7 @@ public class EC2ComputeService extends BaseComputeService {
return false;
}
}
}, SECONDS.toMillis(3), 50, 1000, MILLISECONDS).apply(new RegionAndName(region, group));
}, timeout, 50, 1000, MILLISECONDS).apply(new RegionAndName(region, group));
}
/**

View File

@ -0,0 +1,4 @@
<DeleteKeyPairResponse xmlns="http://ec2.amazonaws.com/doc/2014-10-01/">
<requestId>d4904fd9-82c2-4ea5-adfe-a9cc3EXAMPLE</requestId>
<return>true</return>
</DeleteKeyPairResponse>

View File

@ -0,0 +1,4 @@
<DeletePlacementGroupResponse xmlns="http://ec2.amazonaws.com/doc/2014-10-01/">
<requestId>d4904fd9-82c2-4ea5-adfe-a9cc3EXAMPLE</requestId>
<return>true</return>
</DeletePlacementGroupResponse>

View File

@ -0,0 +1,15 @@
<DescribeInstancesResponse xmlns="http://ec2.amazonaws.com/doc/2009-11-30/">
<reservationSet>
<item>
<reservationId>r-44a5402d</reservationId>
<ownerId>UYY3TLBUXIEON5NQVUUX6OMPWBZIQNFM</ownerId>
<groupSet>
<item>
<groupId>default</groupId>
</item>
</groupSet>
<instancesSet>
</instancesSet>
</item>
</reservationSet>
</DescribeInstancesResponse>

View File

@ -0,0 +1,8 @@
<DescribeKeyPairsResponse xmlns="http://ec2.amazonaws.com/doc/2009-11-30/">
<keySet>
<item>
<keyName>jclouds#sg-3c6ef654</keyName>
<keyFingerprint>1f:51:ae:28:bf:89:e9:d8:1f:25:5d:37:2d:7d:b8:ca:9f:f5:f1:6f</keyFingerprint>
</item>
</keySet>
</DescribeKeyPairsResponse>

View File

@ -107,8 +107,8 @@ public class ParseAWSErrorFromXmlContent implements HttpErrorHandler {
else if (errorCode != null && (errorCode.indexOf("NotFound") != -1 || errorCode.endsWith(".Unknown")))
exception = new ResourceNotFoundException(message, exception);
else if ("IncorrectState".equals(errorCode)
|| (errorCode != null && (error.getCode().endsWith(".Duplicate") || error.getCode().endsWith(
".InUse")))
|| (errorCode != null && (error.getCode().endsWith(".Duplicate")
|| error.getCode().endsWith(".InUse") || error.getCode().equals("DependencyViolation")))
|| (message != null && (message.indexOf("already exists") != -1 || message.indexOf("is in use") != -1)))
exception = new IllegalStateException(message, exception);
else if (errorCode != null && errorCode.indexOf("AuthFailure") != -1)

View File

@ -60,6 +60,15 @@ public final class ComputeServiceProperties {
*/
public static final String TIMEOUT_IMAGE_DELETED = "jclouds.compute.timeout.image-deleted";
/**
* time in milliseconds to try to clean up incidental resources
* (e.g. security groups, key-pairs, etc).
*
* Override {@link Timeouts#cleanupIncidentalResources default} by setting this property using
* {@link ContextBuilder#overrides}
*/
public static final String TIMEOUT_CLEANUP_INCIDENTAL_RESOURCES = "jclouds.compute.timeout.cleanup-incidental-resources";
/**
* overrides the default specified in the subclass of
* {@link BaseComputeServiceContextModule#provideTemplate}

View File

@ -20,6 +20,7 @@ import static org.jclouds.compute.config.ComputeServiceProperties.INIT_STATUS_MA
import static org.jclouds.compute.config.ComputeServiceProperties.OS_VERSION_MAP_JSON;
import static org.jclouds.compute.config.ComputeServiceProperties.POLL_INITIAL_PERIOD;
import static org.jclouds.compute.config.ComputeServiceProperties.POLL_MAX_PERIOD;
import static org.jclouds.compute.config.ComputeServiceProperties.TIMEOUT_CLEANUP_INCIDENTAL_RESOURCES;
import static org.jclouds.compute.config.ComputeServiceProperties.TIMEOUT_IMAGE_AVAILABLE;
import static org.jclouds.compute.config.ComputeServiceProperties.TIMEOUT_IMAGE_DELETED;
import static org.jclouds.compute.config.ComputeServiceProperties.TIMEOUT_NODE_RUNNING;
@ -126,6 +127,14 @@ public final class ComputeServiceConstants {
@Inject(optional = true)
@Named(TIMEOUT_IMAGE_AVAILABLE)
public long imageAvailable = TimeUnit.MINUTES.toMillis(45);
/**
* current value of {@link ComputeServiceProperties#TIMEOUT_CLEANUP_INCIDENTAL_RESOURCES} defaults to 3
* seconds.
*/
@Inject(optional = true)
@Named(TIMEOUT_CLEANUP_INCIDENTAL_RESOURCES)
public long cleanupIncidentalResources = TimeUnit.SECONDS.toMillis(3);
}
private ComputeServiceConstants() {

View File

@ -234,4 +234,61 @@ public class AWSEC2ComputeServiceApiMockTest extends BaseAWSEC2ApiMockTest {
assertPosted(DEFAULT_REGION, "Action=DescribeImages&ImageId.1=ami-aecd60c7");
assertPosted(DEFAULT_REGION, "Action=DescribeSpotInstanceRequests");
}
public void deleteIncidentalResourcesSuccessfully() throws Exception {
enqueueRegions(DEFAULT_REGION);
enqueueXml(DEFAULT_REGION, "/describe_securitygroups_extension_single.xml");
enqueueXml(DEFAULT_REGION, "/delete_securitygroup.xml");
enqueueXml(DEFAULT_REGION, "/describe_keypairs_jcloudssingle.xml");
enqueueXml(DEFAULT_REGION, "/describe_instances_empty.xml");
enqueueXml(DEFAULT_REGION, "/delete_keypair.xml");
enqueueXml(DEFAULT_REGION, "/describe_placement_groups.xml");
enqueueXml(DEFAULT_REGION, "/delete_placementgroup.xml");
enqueueXml(DEFAULT_REGION, "/describe_placement_groups_empty.xml");
AWSEC2ComputeService computeService = (AWSEC2ComputeService) computeService();
computeService.cleanUpIncidentalResources(DEFAULT_REGION, "sg-3c6ef654");
assertPosted(DEFAULT_REGION, "Action=DescribeRegions");
assertPosted(DEFAULT_REGION, "Action=DescribeSecurityGroups&GroupName.1=jclouds%23sg-3c6ef654");
assertPosted(DEFAULT_REGION, "Action=DeleteSecurityGroup&GroupName=jclouds%23sg-3c6ef654");
assertPosted(DEFAULT_REGION, "Action=DescribeKeyPairs&Filter.1.Name=key-name&Filter.1.Value.1=jclouds%23sg-3c6ef654%23us-east-1%2A");
assertPosted(DEFAULT_REGION, "Action=DescribeInstances&Filter.1.Name=instance-state-name&Filter.1.Value.1=terminated&Filter.1.Value.2=shutting-down&Filter.2.Name=key-name&Filter.2.Value.1=jclouds%23sg-3c6ef654");
assertPosted(DEFAULT_REGION, "Action=DeleteKeyPair&KeyName=jclouds%23sg-3c6ef654");
assertPosted(DEFAULT_REGION, "Action=DescribePlacementGroups&GroupName.1=jclouds%23sg-3c6ef654%23us-east-1");
assertPosted(DEFAULT_REGION, "Action=DeletePlacementGroup&GroupName=jclouds%23sg-3c6ef654%23us-east-1");
assertPosted(DEFAULT_REGION, "Action=DescribePlacementGroups&GroupName.1=jclouds%23sg-3c6ef654%23us-east-1");
}
public void deleteIncidentalResourcesGivingDependencyViolationForSecurityGroup() throws Exception {
runDeleteIncidentalResourcesGivingErrForSecurityGroup("DependencyViolation");
}
public void deleteIncidentalResourcesGivingInUseForSecurityGroup() throws Exception {
runDeleteIncidentalResourcesGivingErrForSecurityGroup("InvalidGroup.InUse");
}
protected void runDeleteIncidentalResourcesGivingErrForSecurityGroup(String errCode) throws Exception {
// Does not return delete_securitygroup.xml, but instead gives a 400 error.
// Because super.builder has set TIMEOUT_CLEANUP_INCIDENTAL_RESOURCES to 0, it will not retry.
enqueueRegions(DEFAULT_REGION);
enqueueXml(DEFAULT_REGION, "/describe_securitygroups_extension_single.xml");
enqueue(DEFAULT_REGION, new MockResponse().setResponseCode(400).setBody("<Response><Errors><Error><Code>" + errCode + "</Code><Message>resource sg-3c6ef654 has a dependent object</Message></Error></Errors><RequestID>e4f4c78f-4455-43dd-b5cb-9af0bc4bc804</RequestID></Response>"));
enqueueXml(DEFAULT_REGION, "/describe_placement_groups.xml");
enqueueXml(DEFAULT_REGION, "/delete_placementgroup.xml");
enqueueXml(DEFAULT_REGION, "/describe_placement_groups_empty.xml");
AWSEC2ComputeService computeService = (AWSEC2ComputeService) computeService();
computeService.cleanUpIncidentalResources(DEFAULT_REGION, "sg-3c6ef654");
assertPosted(DEFAULT_REGION, "Action=DescribeRegions");
assertPosted(DEFAULT_REGION, "Action=DescribeSecurityGroups&GroupName.1=jclouds%23sg-3c6ef654");
assertPosted(DEFAULT_REGION, "Action=DeleteSecurityGroup&GroupName=jclouds%23sg-3c6ef654");
assertPosted(DEFAULT_REGION, "Action=DescribePlacementGroups&GroupName.1=jclouds%23sg-3c6ef654%23us-east-1");
assertPosted(DEFAULT_REGION, "Action=DeletePlacementGroup&GroupName=jclouds%23sg-3c6ef654%23us-east-1");
assertPosted(DEFAULT_REGION, "Action=DescribePlacementGroups&GroupName.1=jclouds%23sg-3c6ef654%23us-east-1");
}
}

View File

@ -22,7 +22,6 @@ import static com.google.common.net.HttpHeaders.CONTENT_TYPE;
import static com.google.common.util.concurrent.MoreExecutors.sameThreadExecutor;
import static javax.ws.rs.core.MediaType.APPLICATION_XML;
import static org.assertj.core.api.Assertions.assertThat;
import static org.jclouds.aws.filters.FormSignerV4.ServiceAndRegion;
import static org.jclouds.util.Strings2.toStringAndClose;
import static org.testng.Assert.assertEquals;
@ -36,8 +35,10 @@ import org.jclouds.ContextBuilder;
import org.jclouds.aws.ec2.AWSEC2Api;
import org.jclouds.aws.ec2.AWSEC2ProviderMetadata;
import org.jclouds.aws.ec2.config.AWSEC2HttpApiModule;
import org.jclouds.aws.filters.FormSignerV4.ServiceAndRegion;
import org.jclouds.compute.ComputeService;
import org.jclouds.compute.ComputeServiceContext;
import org.jclouds.compute.config.ComputeServiceProperties;
import org.jclouds.concurrent.config.ExecutorServiceModule;
import org.jclouds.date.DateService;
import org.jclouds.rest.ConfiguresHttpApi;
@ -77,6 +78,7 @@ public class BaseAWSEC2ApiMockTest {
protected ContextBuilder builder(Properties overrides) {
MockWebServer defaultServer = regionToServers.get(DEFAULT_REGION);
overrides.setProperty(Constants.PROPERTY_MAX_RETRIES, "1");
overrides.setProperty(ComputeServiceProperties.TIMEOUT_CLEANUP_INCIDENTAL_RESOURCES, "0");
return ContextBuilder.newBuilder(new AWSEC2ProviderMetadata())
.credentials(ACCESS_KEY, SECRET_KEY)
.endpoint(defaultServer.getUrl("").toString())

View File

@ -0,0 +1,5 @@
<DescribePlacementGroupsResponse xmlns="http://ec2.amazonaws.com/doc/2010-06-15/">
<requestID>d4904fd9-82c2-4ea5-adfe-a9cc3EXAMPLE</requestID>
<placementGroupSet>
</placementGroupSet>
</DescribePlacementGroupsResponse>