Issue 244: fixed jsch bug and refactored ssh error handling

This commit is contained in:
Adrian Cole 2010-05-10 17:41:36 -07:00
parent 7363ae6e87
commit 6c14ae1831
16 changed files with 181 additions and 61 deletions

View File

@ -43,6 +43,11 @@ public class EC2PropertiesBuilder extends PropertiesBuilder {
properties.setProperty(PROPERTY_AWS_EXPIREINTERVAL, "60");
// alestic and canonical
properties.setProperty(PROPERTY_EC2_AMI_OWNERS, "063491364108,099720109477");
// auth fail sometimes happens in EC2, as the rc.local script that injects the
// authorized key executes after ssh has started
properties.setProperty("jclouds.ssh.max_retries", "6");
properties.setProperty("jclouds.ssh.retryable_messages",
"Auth fail,invalid data,End of IO Stream Read,Connection reset");
return properties;
}

View File

@ -18,8 +18,6 @@
*/
package org.jclouds.aws.ec2.compute;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutorService;
@ -30,9 +28,6 @@ import javax.inject.Named;
import javax.inject.Provider;
import javax.inject.Singleton;
import com.google.common.base.Function;
import com.google.common.base.Predicates;
import com.google.common.collect.Sets;
import org.jclouds.Constants;
import org.jclouds.aws.ec2.EC2Client;
import org.jclouds.aws.ec2.compute.config.EC2ComputeServiceContextModule.GetRegionFromNodeOrDefault;
@ -43,11 +38,9 @@ import org.jclouds.aws.ec2.domain.RunningInstance;
import org.jclouds.compute.ComputeServiceContext;
import org.jclouds.compute.domain.Image;
import org.jclouds.compute.domain.NodeMetadata;
import org.jclouds.compute.domain.NodeState;
import org.jclouds.compute.domain.Size;
import org.jclouds.compute.domain.TemplateBuilder;
import org.jclouds.compute.internal.BaseComputeService;
import org.jclouds.compute.predicates.NodePredicates;
import org.jclouds.compute.strategy.DestroyNodeStrategy;
import org.jclouds.compute.strategy.GetNodeMetadataStrategy;
import org.jclouds.compute.strategy.ListNodesStrategy;
@ -56,8 +49,10 @@ import org.jclouds.compute.strategy.RunNodesAndAddToSetStrategy;
import org.jclouds.compute.util.ComputeUtils;
import org.jclouds.domain.Location;
import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
/**
* @author Adrian Cole
@ -72,19 +67,19 @@ public class EC2ComputeService extends BaseComputeService {
@Inject
protected EC2ComputeService(ComputeServiceContext context,
Provider<Set<? extends Image>> images, Provider<Set<? extends Size>> sizes,
Provider<Set<? extends Location>> locations, ListNodesStrategy listNodesStrategy,
GetNodeMetadataStrategy getNodeMetadataStrategy,
RunNodesAndAddToSetStrategy runNodesAndAddToSetStrategy,
RebootNodeStrategy rebootNodeStrategy, DestroyNodeStrategy destroyNodeStrategy,
Provider<TemplateBuilder> templateBuilderProvider, ComputeUtils utils,
@Named(Constants.PROPERTY_USER_THREADS) ExecutorService executor, EC2Client ec2Client,
GetRegionFromNodeOrDefault getRegionFromNodeOrDefault,
Map<RegionTag, KeyPair> credentialsMap, Map<PortsRegionTag, String> securityGroupMap,
@Named("TERMINATED") Predicate<RunningInstance> instanceStateTerminated) {
Provider<Set<? extends Image>> images, Provider<Set<? extends Size>> sizes,
Provider<Set<? extends Location>> locations, ListNodesStrategy listNodesStrategy,
GetNodeMetadataStrategy getNodeMetadataStrategy,
RunNodesAndAddToSetStrategy runNodesAndAddToSetStrategy,
RebootNodeStrategy rebootNodeStrategy, DestroyNodeStrategy destroyNodeStrategy,
Provider<TemplateBuilder> templateBuilderProvider, ComputeUtils utils,
@Named(Constants.PROPERTY_USER_THREADS) ExecutorService executor, EC2Client ec2Client,
GetRegionFromNodeOrDefault getRegionFromNodeOrDefault,
Map<RegionTag, KeyPair> credentialsMap, Map<PortsRegionTag, String> securityGroupMap,
@Named("TERMINATED") Predicate<RunningInstance> instanceStateTerminated) {
super(context, images, sizes, locations, listNodesStrategy, getNodeMetadataStrategy,
runNodesAndAddToSetStrategy, rebootNodeStrategy, destroyNodeStrategy,
templateBuilderProvider, utils, executor);
runNodesAndAddToSetStrategy, rebootNodeStrategy, destroyNodeStrategy,
templateBuilderProvider, utils, executor);
this.ec2Client = ec2Client;
this.getRegionFromNodeOrDefault = getRegionFromNodeOrDefault;
this.credentialsMap = credentialsMap;
@ -118,13 +113,13 @@ public class EC2ComputeService extends BaseComputeService {
public void destroyNodesWithTag(String tag) {
super.destroyNodesWithTag(tag);
Set<String> regions = Sets.newHashSet(Iterables.transform(listNodesWithTag(tag),
new Function<NodeMetadata, String>() {
@Override
public String apply(@Nullable NodeMetadata nodeMetadata) {
return getRegionFromNodeOrDefault.apply(nodeMetadata);
}
}));
for(String region : regions) {
new Function<NodeMetadata, String>() {
@Override
public String apply(@Nullable NodeMetadata nodeMetadata) {
return getRegionFromNodeOrDefault.apply(nodeMetadata);
}
}));
for (String region : regions) {
deleteKeyPair(region, tag);
deleteSecurityGroup(region, tag);
}

View File

@ -39,7 +39,7 @@ import org.jclouds.concurrent.Timeout;
*
* @author Adrian Cole
*/
@Timeout(duration = 30, timeUnit = TimeUnit.SECONDS)
@Timeout(duration = 45, timeUnit = TimeUnit.SECONDS)
public interface AMIClient {
/**

View File

@ -36,7 +36,7 @@ import org.jclouds.concurrent.Timeout;
*
* @author Adrian Cole
*/
@Timeout(duration = 30, timeUnit = TimeUnit.SECONDS)
@Timeout(duration = 45, timeUnit = TimeUnit.SECONDS)
public interface AvailabilityZoneAndRegionClient {
/**

View File

@ -38,7 +38,7 @@ import org.jclouds.concurrent.Timeout;
*
* @author Adrian Cole
*/
@Timeout(duration = 30, timeUnit = TimeUnit.SECONDS)
@Timeout(duration = 45, timeUnit = TimeUnit.SECONDS)
public interface ElasticBlockStoreClient {
/**

View File

@ -34,7 +34,7 @@ import org.jclouds.concurrent.Timeout;
*
* @author Adrian Cole
*/
@Timeout(duration = 30, timeUnit = TimeUnit.SECONDS)
@Timeout(duration = 45, timeUnit = TimeUnit.SECONDS)
public interface ElasticIPAddressClient {
/**

View File

@ -32,7 +32,7 @@ import org.jclouds.concurrent.Timeout;
*
* @author Adrian Cole
*/
@Timeout(duration = 30, timeUnit = TimeUnit.SECONDS)
@Timeout(duration = 45, timeUnit = TimeUnit.SECONDS)
public interface KeyPairClient {
/**

View File

@ -33,7 +33,7 @@ import org.jclouds.concurrent.Timeout;
*
* @author Adrian Cole
*/
@Timeout(duration = 30, timeUnit = TimeUnit.SECONDS)
@Timeout(duration = 45, timeUnit = TimeUnit.SECONDS)
public interface MonitoringClient {
/**

View File

@ -34,7 +34,7 @@ import org.jclouds.concurrent.Timeout;
*
* @author Adrian Cole
*/
@Timeout(duration = 30, timeUnit = TimeUnit.SECONDS)
@Timeout(duration = 45, timeUnit = TimeUnit.SECONDS)
public interface SecurityGroupClient {
/**

View File

@ -24,6 +24,7 @@ import org.jclouds.compute.BaseComputeServiceLiveTest;
import org.jclouds.compute.domain.Architecture;
import org.jclouds.compute.domain.OsFamily;
import org.jclouds.compute.domain.Template;
import org.jclouds.compute.domain.TemplateBuilder;
import org.jclouds.ssh.jsch.config.JschSshClientModule;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@ -67,4 +68,9 @@ public class EC2ComputeServiceLiveTest extends BaseComputeServiceLiveTest {
return new JschSshClientModule();
}
@Override
protected Template buildTemplate(TemplateBuilder templateBuilder) {
return templateBuilder.imageId("ami-714ba518").build();
}
}

View File

@ -114,5 +114,4 @@ public abstract class BaseEC2AsyncClientTest<T> extends RestClientTest<T> {
}
};
}
}

View File

@ -78,8 +78,8 @@ public class AzureBlobClientErrorRetryHandler implements HttpRetryHandler {
AzureStorageError error = utils.parseAzureStorageErrorFromContent(command, response,
new ByteArrayInputStream(content));
if ("ContainerBeingDeleted".equals(error.getCode())) {
backoffHandler.imposeBackoffExponentialDelay(100L, 3, command.getFailureCount(),
command.toString());
backoffHandler.imposeBackoffExponentialDelay(100L, 3, retryCountLimit, command
.getFailureCount(), command.toString());
return true;
}
} catch (HttpException e) {

View File

@ -183,7 +183,13 @@ public abstract class BaseComputeServiceLiveTest {
template.getOptions().installPrivateKey(keyPair.get("private")).authorizePublicKey(
keyPair.get("public")).runScript(
buildScript(template.getImage().getOsFamily()).getBytes());
nodes = Sets.newTreeSet(client.runNodesWithTag(tag, 2, template));
try {
nodes = Sets.newTreeSet(client.runNodesWithTag(tag, 2, template));
} catch (RunNodesException e) {
nodes = Sets.newTreeSet(Iterables.concat(e.getSuccessfulNodes(), e.getNodeErrors()
.keySet()));
throw e;
}
assertEquals(nodes.size(), 2);
checkNodes(nodes, tag);
NodeMetadata node1 = nodes.first();

View File

@ -101,10 +101,10 @@ public class BackoffLimitedRetryHandler implements HttpRetryHandler {
}
public void imposeBackoffExponentialDelay(int failureCount, String commandDescription) {
imposeBackoffExponentialDelay(50L, 2, failureCount, commandDescription);
imposeBackoffExponentialDelay(50L, 2, failureCount, retryCountLimit, commandDescription);
}
public void imposeBackoffExponentialDelay(long period, int pow, int failureCount,
public void imposeBackoffExponentialDelay(long period, int pow, int failureCount, int max,
String commandDescription) {
long delayMs = (long) (period * Math.pow(failureCount, pow));
logger.debug("Retry %d/%d: delaying for %d ms: %s", failureCount, retryCountLimit, delayMs,

View File

@ -21,12 +21,18 @@ package org.jclouds.ssh.jsch;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.base.Predicates.instanceOf;
import static com.google.common.base.Predicates.or;
import static com.google.common.base.Throwables.getCausalChain;
import static com.google.common.base.Throwables.getRootCause;
import static com.google.common.collect.Iterables.any;
import java.io.IOException;
import java.io.InputStream;
import java.net.ConnectException;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.util.Arrays;
import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
@ -35,7 +41,6 @@ import javax.inject.Named;
import org.apache.commons.io.input.ProxyInputStream;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.jclouds.Constants;
import org.jclouds.http.handlers.BackoffLimitedRetryHandler;
import org.jclouds.logging.Logger;
import org.jclouds.ssh.ExecResponse;
@ -43,7 +48,9 @@ import org.jclouds.ssh.SshClient;
import org.jclouds.ssh.SshException;
import org.jclouds.util.Utils;
import com.google.common.base.Throwables;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Predicate;
import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.io.Closeables;
import com.google.inject.Inject;
@ -82,10 +89,21 @@ public class JschSshClient implements SshClient {
private final int port;
private final String username;
private final String password;
@Inject(optional = true)
@Named(Constants.PROPERTY_MAX_RETRIES)
private int sshRetries = 5;
@Inject(optional = true)
@Named("jclouds.ssh.max_retries")
@VisibleForTesting
int sshRetries = 5;
@Inject(optional = true)
@Named("jclouds.ssh.retryable_messages")
@VisibleForTesting
String retryableMessages = "invalid data,End of IO Stream Read,Connection reset";
@Inject(optional = true)
@Named("jclouds.ssh.retry_predicate")
private Predicate<Throwable> retryPredicate = or(instanceOf(ConnectException.class),
instanceOf(IOException.class));
@Resource
protected Logger logger = Logger.NULL;
private Session session;
@ -171,25 +189,15 @@ public class JschSshClient implements SshClient {
} catch (Exception from) {
e = from;
disconnect();
String rootMessage = Throwables.getRootCause(from).getMessage();
if (i == sshRetries)
throw propagate(from);
if (Iterables.size(Iterables.filter(Throwables.getCausalChain(from),
ConnectException.class)) >= 1
|| Iterables.size(Iterables.filter(Throwables.getCausalChain(from),
IOException.class)) >= 1
|| rootMessage.indexOf("invalid privatekey") != -1
|| rootMessage.indexOf("Auth fail") != -1// auth fail sometimes happens in EC2,
// as the script that injects the
// authorized key executes after ssh
// has started
|| rootMessage.indexOf("invalid data") != -1
|| rootMessage.indexOf("End of IO Stream Read") != -1) {
backoffLimitedRetryHandler.imposeBackoffExponentialDelay(200L, 2, i + 1, String
.format("%s@%s:%d: connection error: %s", username, host.getHostAddress(),
port, from.getMessage()));
if (shouldRetry(from)) {
backoffForAttempt(i + 1, from.getMessage());
continue;
}
throw propagate(from);
}
}
@ -197,6 +205,26 @@ public class JschSshClient implements SshClient {
throw propagate(e);
}
@VisibleForTesting
boolean shouldRetry(Exception from) {
final String rootMessage = getRootCause(from).getMessage();
return any(getCausalChain(from), retryPredicate)
|| Iterables.any(Splitter.on(",").split(retryableMessages), new Predicate<String>() {
@Override
public boolean apply(String input) {
return rootMessage.indexOf(input) != -1;
}
});
}
private void backoffForAttempt(int retryAttempt, String rootMessage) {
backoffLimitedRetryHandler.imposeBackoffExponentialDelay(200L, 2, retryAttempt, sshRetries,
String.format("%s@%s:%d: connection error: %s", username, host.getHostAddress(),
port, rootMessage));
}
private void newSession() throws JSchException {
JSch jsch = new JSch();
session = null;
@ -208,7 +236,9 @@ public class JschSshClient implements SshClient {
if (password != null) {
session.setPassword(password);
} else {
jsch.addIdentity(username, privateKey, null, emptyPassPhrase);
// jsch wipes out your private key
jsch.addIdentity(username, Arrays.copyOf(privateKey, privateKey.length), null,
emptyPassPhrase);
}
} catch (JSchException e) {
throw new SshException(String.format("%s@%s:%d: Error creating session.", username, host
@ -228,8 +258,10 @@ public class JschSshClient implements SshClient {
@PreDestroy
public void disconnect() {
if (session != null && session.isConnected())
if (session != null && session.isConnected()) {
session.disconnect();
session = null;
}
}
public ExecResponse exec(String command) {

View File

@ -0,0 +1,77 @@
/**
*
* Copyright (C) 2009 Cloud Conscious, LLC. <info@cloudconscious.com>
*
* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.jclouds.ssh.jsch;
import java.io.IOException;
import java.net.ConnectException;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.UnknownHostException;
import org.jclouds.ssh.SshClient;
import org.jclouds.ssh.jsch.config.JschSshClientModule;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import com.google.inject.Guice;
import com.google.inject.Injector;
import com.google.inject.Module;
import com.jcraft.jsch.JSchException;
/**
*
* @author Adrian Cole
*/
@Test
public class JschSshClientTest {
protected JschSshClient ssh;
@BeforeTest
public void setupSsh() throws UnknownHostException {
ssh = createClient();
}
protected JschSshClient createClient() throws UnknownHostException {
Injector i = Guice.createInjector(module());
SshClient.Factory factory = i.getInstance(SshClient.Factory.class);
JschSshClient ssh = JschSshClient.class.cast(factory.create(new InetSocketAddress(InetAddress
.getLocalHost(), 22), "username", "password"));
return ssh;
}
protected Module module() {
return new JschSshClientModule();
}
public void testExceptionClassesRetry() {
assert ssh.shouldRetry(new JSchException("io error", new IOException("socket closed")));
assert ssh.shouldRetry(new JSchException("connect error", new ConnectException("problem")));
}
public void testExceptionMessagesRetry() {
assert ssh.shouldRetry(new JSchException(
"Session.connect: java.io.IOException: End of IO Stream Read"));
assert ssh.shouldRetry(new JSchException("Session.connect: invalid data"));
assert ssh.shouldRetry(new JSchException(
"Session.connect: java.net.SocketException: Connection reset"));
}
}