ec2 suspend nodes may take up to 2 minutes

This commit is contained in:
Adrian Cole 2010-11-07 22:42:34 +01:00
parent 5f5b01ad35
commit 1347165118
3 changed files with 78 additions and 52 deletions

View File

@ -26,6 +26,7 @@ import static org.jclouds.aws.ec2.reference.EC2Constants.PROPERTY_EC2_CC_AMIs;
import static org.jclouds.aws.ec2.reference.EC2Constants.PROPERTY_ELB_ENDPOINT;
import static org.jclouds.aws.reference.AWSConstants.PROPERTY_AUTH_TAG;
import static org.jclouds.aws.reference.AWSConstants.PROPERTY_HEADER_TAG;
import static org.jclouds.compute.reference.ComputeServiceConstants.PROPERTY_TIMEOUT_NODE_SUSPENDED;
import java.util.Properties;
@ -49,11 +50,15 @@ public class EC2PropertiesBuilder extends PropertiesBuilder {
properties.setProperty(PROPERTY_EC2_AMI_OWNERS, "137112412989,063491364108,099720109477,411009282317");
// amis that work with the cluster instances
properties.setProperty(PROPERTY_EC2_CC_AMIs, "us-east-1/ami-7ea24a17");
// sometimes, like in ec2, stop takes a very long time, perhaps
// due to volume management. one example spent 2 minutes moving
// from stopping->stopped state on an ec2 micro
properties.setProperty(PROPERTY_TIMEOUT_NODE_SUSPENDED, 120 * 1000 + "");
// auth fail sometimes happens in EC2, as the rc.local script that injects the
// authorized key executes after ssh has started
properties.setProperty("jclouds.ssh.max_retries", "7");
properties.setProperty("jclouds.ssh.retryable_messages",
"Auth fail,invalid data,End of IO Stream Read,Connection reset,socket is not established");
"Auth fail,invalid data,End of IO Stream Read,Connection reset,socket is not established");
return properties;
}

View File

@ -137,7 +137,7 @@ public interface ComputeService {
* default, equivalent to {@code templateBuilder().any().options(templateOptions)}.
*/
Set<? extends NodeMetadata> runNodesWithTag(String tag, int count, TemplateOptions templateOptions)
throws RunNodesException;
throws RunNodesException;
/**
* Like {@link ComputeService#runNodesWithTag(String,int,TemplateOptions)}, except that the
@ -148,6 +148,10 @@ public interface ComputeService {
/**
* resume the node from {@link org.jclouds.compute.domain.NodeState#SUSPENDED suspended} state,
* given its id.
*
* <h4>note</h4>
*
* affected nodes may not resume with the same IP address(es)
*/
void resumeNode(String id);
@ -155,6 +159,10 @@ public interface ComputeService {
* nodes matching the filter are treated as a logical set. Using the resume command, you can save
* time by resumeing the nodes in parallel.
*
* <h4>note</h4>
*
* affected nodes may not resume with the same IP address(es)
*
* @throws UnsupportedOperationException
* if the underlying provider doesn't support suspend/resume
*/
@ -164,6 +172,10 @@ public interface ComputeService {
* suspend the node, given its id. This will result in
* {@link org.jclouds.compute.domain.NodeState#SUSPENDED suspended} state.
*
* <h4>note</h4>
*
* affected nodes may not resume with the same IP address(es)
*
* @throws UnsupportedOperationException
* if the underlying provider doesn't support suspend/resume
*/
@ -173,6 +185,9 @@ public interface ComputeService {
* nodes matching the filter are treated as a logical set. Using the suspend command, you can
* save time by suspending the nodes in parallel.
*
* <h4>note</h4>
*
* affected nodes may not resume with the same IP address(es)
*/
void suspendNodesMatching(Predicate<NodeMetadata> filter);
@ -224,7 +239,7 @@ public interface ComputeService {
* @see org.jclouds.compute.predicates.NodePredicates#runningWithTag(String)
*/
Map<? extends NodeMetadata, ExecResponse> runScriptOnNodesMatching(Predicate<NodeMetadata> filter, Payload runScript)
throws RunScriptOnNodesException;
throws RunScriptOnNodesException;
/**
* Run the script on all nodes with the specific tag.
@ -243,6 +258,6 @@ public interface ComputeService {
* @see org.jclouds.io.Payloads
*/
Map<? extends NodeMetadata, ExecResponse> runScriptOnNodesMatching(Predicate<NodeMetadata> filter,
Payload runScript, RunScriptOptions options) throws RunScriptOnNodesException;
Payload runScript, RunScriptOptions options) throws RunScriptOnNodesException;
}

View File

@ -161,8 +161,8 @@ public abstract class BaseComputeServiceLiveTest {
if (context != null)
context.close();
Properties props = setupProperties();
context = new ComputeServiceContextFactory().createContext(provider, ImmutableSet.of(new Log4JLoggingModule(),
getSshModule()), props);
context = new ComputeServiceContextFactory().createContext(provider,
ImmutableSet.of(new Log4JLoggingModule(), getSshModule()), props);
client = context.getComputeService();
}
@ -173,8 +173,8 @@ public abstract class BaseComputeServiceLiveTest {
public void testCorrectAuthException() throws Exception {
ComputeServiceContext context = null;
try {
context = new ComputeServiceContextFactory().createContext(provider, "MOMMA", "MIA", ImmutableSet
.<Module> of(new Log4JLoggingModule()));
context = new ComputeServiceContextFactory().createContext(provider, "MOMMA", "MIA",
ImmutableSet.<Module> of(new Log4JLoggingModule()));
context.getComputeService().listNodes();
} catch (AuthorizationException e) {
throw e;
@ -217,7 +217,7 @@ public abstract class BaseComputeServiceLiveTest {
OperatingSystem os = get(nodes, 0).getOperatingSystem();
try {
Map<? extends NodeMetadata, ExecResponse> responses = runScriptWithCreds(tag, os, new Credentials(
good.identity, "romeo"));
good.identity, "romeo"));
assert false : "shouldn't pass with a bad password\n" + responses;
} catch (RunScriptOnNodesException e) {
assert getRootCause(e).getMessage().contains("Auth fail") : e;
@ -276,7 +276,7 @@ public abstract class BaseComputeServiceLiveTest {
template = buildTemplate(client.templateBuilder());
template.getOptions().installPrivateKey(keyPair.get("private")).authorizePublicKey(keyPair.get("public"))
.runScript(buildScript(template.getImage().getOperatingSystem()));
.runScript(buildScript(template.getImage().getOperatingSystem()));
}
protected void checkImageIdMatchesTemplate(NodeMetadata node) {
@ -287,8 +287,8 @@ public abstract class BaseComputeServiceLiveTest {
protected void checkOsMatchesTemplate(NodeMetadata node) {
if (node.getOperatingSystem() != null)
assert node.getOperatingSystem().getFamily().equals(template.getImage().getOperatingSystem().getFamily()) : String
.format("expecting family %s but got %s", template.getImage().getOperatingSystem().getFamily(), node
.getOperatingSystem());
.format("expecting family %s but got %s", template.getImage().getOperatingSystem().getFamily(),
node.getOperatingSystem());
}
void assertLocationSameOrChild(Location test, Location expected) {
@ -320,10 +320,11 @@ public abstract class BaseComputeServiceLiveTest {
}
protected Map<? extends NodeMetadata, ExecResponse> runScriptWithCreds(final String tag, OperatingSystem os,
Credentials creds) throws RunScriptOnNodesException {
Credentials creds) throws RunScriptOnNodesException {
try {
return client.runScriptOnNodesMatching(runningWithTag(tag), newStringPayload(buildScript(os).render(
OsFamily.UNIX)), overrideCredentialsWith(creds).nameTask("runScriptWithCreds"));
return client.runScriptOnNodesMatching(runningWithTag(tag),
newStringPayload(buildScript(os).render(OsFamily.UNIX)),
overrideCredentialsWith(creds).nameTask("runScriptWithCreds"));
} catch (SshException e) {
throw e;
}
@ -353,16 +354,16 @@ public abstract class BaseComputeServiceLiveTest {
@Test(enabled = true, dependsOnMethods = "testCreateAnotherNodeWithANewContextToEnsureSharedMemIsntRequired")
public void testGet() throws Exception {
Map<String, ? extends NodeMetadata> metadataMap = newLinkedHashMap(uniqueIndex(filter(client
.listNodesDetailsMatching(all()), and(withTag(tag), not(TERMINATED))),
new Function<NodeMetadata, String>() {
Map<String, ? extends NodeMetadata> metadataMap = newLinkedHashMap(uniqueIndex(
filter(client.listNodesDetailsMatching(all()), and(withTag(tag), not(TERMINATED))),
new Function<NodeMetadata, String>() {
@Override
public String apply(NodeMetadata from) {
return from.getId();
}
@Override
public String apply(NodeMetadata from) {
return from.getId();
}
}));
}));
for (NodeMetadata node : nodes) {
metadataMap.remove(node.getId());
NodeMetadata metadata = client.getNodeMetadata(node.getId());
@ -372,15 +373,16 @@ public abstract class BaseComputeServiceLiveTest {
checkImageIdMatchesTemplate(metadata);
checkOsMatchesTemplate(metadata);
assertEquals(metadata.getState(), NodeState.RUNNING);
assertEquals(metadata.getPrivateAddresses(), node.getPrivateAddresses());
assertEquals(metadata.getPublicAddresses(), node.getPublicAddresses());
// due to DHCP the addresses can actually change in-between runs.
assertEquals(metadata.getPrivateAddresses().size(), node.getPrivateAddresses().size());
assertEquals(metadata.getPublicAddresses().size(), node.getPublicAddresses().size());
}
assertNodeZero(metadataMap.values());
}
protected void assertNodeZero(Collection<? extends NodeMetadata> metadataSet) {
assert metadataSet.size() == 0 : String.format("nodes left in set: [%s] which didn't match set: [%s]",
metadataSet, nodes);
metadataSet, nodes);
}
@Test(enabled = true, dependsOnMethods = "testGet")
@ -393,16 +395,20 @@ public abstract class BaseComputeServiceLiveTest {
@Test(enabled = true, dependsOnMethods = "testReboot")
public void testSuspendResume() throws Exception {
client.suspendNodesMatching(withTag(tag));
Set<? extends NodeMetadata> stoppedNodes = refreshNodes();
assert Iterables.all(stoppedNodes, new Predicate<NodeMetadata>() {
@Override
public boolean apply(NodeMetadata input) {
return input.getState() == NodeState.SUSPENDED;
boolean returnVal = input.getState() == NodeState.SUSPENDED;
if (!returnVal)
System.err.printf("warning: node %s in state %s%n", input.getId(), input.getState());
return returnVal;
}
}) : nodes;
}) : stoppedNodes;
client.resumeNodesMatching(withTag(tag));
testGet();
@ -462,11 +468,11 @@ public abstract class BaseComputeServiceLiveTest {
}
template = client.templateBuilder().options(blockOnComplete(false).blockOnPort(8080, 600).inboundPorts(22, 8080))
.build();
.build();
// note this is a dependency on the template resolution
template.getOptions().runScript(
RunScriptData.createScriptInstallAndStartJBoss(keyPair.get("public"), template.getImage()
.getOperatingSystem()));
RunScriptData.createScriptInstallAndStartJBoss(keyPair.get("public"), template.getImage()
.getOperatingSystem()));
try {
NodeMetadata node = getOnlyElement(client.runNodesWithTag(tag, 1, template));
@ -500,26 +506,26 @@ public abstract class BaseComputeServiceLiveTest {
assert location != location.getParent() : location;
assert location.getScope() != null : location;
switch (location.getScope()) {
case PROVIDER:
assertProvider(location);
break;
case REGION:
assertProvider(location.getParent());
break;
case ZONE:
Location provider = location.getParent().getParent();
// zone can be a direct descendant of provider
if (provider == null)
provider = location.getParent();
assertProvider(provider);
break;
case HOST:
Location provider2 = location.getParent().getParent().getParent();
// zone can be a direct descendant of provider
if (provider2 == null)
provider2 = location.getParent().getParent();
assertProvider(provider2);
break;
case PROVIDER:
assertProvider(location);
break;
case REGION:
assertProvider(location.getParent());
break;
case ZONE:
Location provider = location.getParent().getParent();
// zone can be a direct descendant of provider
if (provider == null)
provider = location.getParent();
assertProvider(provider);
break;
case HOST:
Location provider2 = location.getParent().getParent().getParent();
// zone can be a direct descendant of provider
if (provider2 == null)
provider2 = location.getParent().getParent();
assertProvider(provider2);
break;
}
}
}
@ -607,7 +613,7 @@ public abstract class BaseComputeServiceLiveTest {
assertEquals(hello.getOutput().trim(), "hello");
ExecResponse exec = ssh.exec("java -version");
assert exec.getError().indexOf("1.6") != -1 || exec.getOutput().indexOf("1.6") != -1 : exec + "\n"
+ ssh.exec("cat /tmp/bootstrap/stdout.log /tmp/bootstrap/stderr.log");
+ ssh.exec("cat /tmp/bootstrap/stdout.log /tmp/bootstrap/stderr.log");
} finally {
if (ssh != null)
ssh.disconnect();