Issue 636: hone ssh code config + tests to make it easier to isolate cause of stderr hang

This commit is contained in:
Adrian Cole 2011-07-25 03:10:55 -07:00
parent efa574c175
commit 732491fe17
9 changed files with 64 additions and 109 deletions

View File

@ -138,6 +138,8 @@ public class RunScriptOnNodeAsInitScriptUsingSsh implements RunScriptOnNode {
} catch (SshException e) {
// If there's a problem with the sftp configuration, we can try via ssh exec
logger.warn(e, "<< (%s) problem using sftp [%s], attempting via sshexec", ssh.toString(), e.getMessage());
ssh.disconnect();
ssh.connect();
ssh.exec("rm " + initFile);
ssh.exec(Statements.appendFile(initFile, Splitter.on('\n').split(init.render(OsFamily.UNIX)),
AppendFile.MARKER + "_" + init.getInstanceName()).render(OsFamily.UNIX));

View File

@ -1,45 +0,0 @@
/**
*
* Copyright (C) 2011 Cloud Conscious, LLC. <info@cloudconscious.com>
*
* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.jclouds.ssh;
/**
* @author Adrian Cole
*/
public class ChannelNotOpenException extends SshException {
/** The serialVersionUID */
private static final long serialVersionUID = 129347129837129837l;
public ChannelNotOpenException() {
super();
}
public ChannelNotOpenException(String arg0, Throwable arg1) {
super(arg0, arg1);
}
public ChannelNotOpenException(String arg0) {
super(arg0);
}
public ChannelNotOpenException(Throwable arg0) {
super(arg0);
}
}

View File

@ -20,7 +20,6 @@ package org.jclouds.compute;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Predicates.and;
import static com.google.common.base.Predicates.not;
import static com.google.common.base.Throwables.getRootCause;
import static com.google.common.collect.Iterables.concat;
import static com.google.common.collect.Iterables.get;
import static com.google.common.collect.Iterables.getOnlyElement;
@ -266,7 +265,8 @@ public abstract class BaseComputeServiceLiveTest {
good.identity, "romeo"));
assert responses.size() == 0 : "shouldn't pass with a bad password\n" + responses;
} catch (RunScriptOnNodesException e) {
assert getRootCause(e).getMessage().contains("Auth fail") : e;
assert Iterables.any(e.getNodeErrors().values(), Predicates.instanceOf(AuthorizationException.class)) : e
.getNodeErrors().values() + "not authexception!";
}
runScriptWithCreds(group, os, good);
@ -401,12 +401,8 @@ public abstract class BaseComputeServiceLiveTest {
protected Map<? extends NodeMetadata, ExecResponse> runScriptWithCreds(final String group, OperatingSystem os,
Credentials creds) throws RunScriptOnNodesException {
try {
return client.runScriptOnNodesMatching(runningInGroup(group), buildScript(os), overrideCredentialsWith(creds)
.nameTask("runScriptWithCreds"));
} catch (SshException e) {
throw e;
}
return client.runScriptOnNodesMatching(runningInGroup(group), buildScript(os), overrideCredentialsWith(creds)
.nameTask("runScriptWithCreds"));
}
protected void checkNodes(Iterable<? extends NodeMetadata> nodes, String group) throws IOException {

View File

@ -43,11 +43,11 @@ import org.jclouds.io.Payload;
import org.jclouds.net.IPSocket;
import org.jclouds.predicates.RetryablePredicate;
import org.jclouds.predicates.SocketOpen;
import org.jclouds.rest.AuthorizationException;
import org.jclouds.rest.RestContext;
import org.jclouds.scriptbuilder.statements.login.AdminAccess;
import org.jclouds.scriptbuilder.statements.login.AdminAccess.Configuration;
import org.jclouds.ssh.SshClient;
import org.jclouds.ssh.SshException;
import org.jclouds.util.Strings2;
import org.testng.annotations.Test;
@ -154,7 +154,7 @@ public class StubComputeServiceIntegrationTest extends BaseComputeServiceLiveTes
expect(factory.create(new IPSocket("144.175.1.2", 22), new Credentials("foo", "privateKey"))).andReturn(
client2Foo);
expect(factory.create(new IPSocket("144.175.1.2", 22), new Credentials("root", "romeo"))).andThrow(
new SshException("Auth fail"));
new AuthorizationException("Auth fail", null));
// run script without backgrounding (via predicate)
client2.connect();

View File

@ -44,13 +44,14 @@ import org.jclouds.io.Payload;
import org.jclouds.io.Payloads;
import org.jclouds.logging.Logger;
import org.jclouds.net.IPSocket;
import org.jclouds.ssh.ChannelNotOpenException;
import org.jclouds.rest.AuthorizationException;
import org.jclouds.ssh.SshClient;
import org.jclouds.ssh.SshException;
import org.jclouds.util.Strings2;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import com.google.common.base.Splitter;
import com.google.common.io.Closeables;
import com.google.inject.Inject;
@ -90,18 +91,23 @@ public class JschSshClient implements SshClient {
private final String password;
@Inject(optional = true)
@Named("jclouds.ssh.max_retries")
@Named("jclouds.ssh.max-retries")
@VisibleForTesting
int sshRetries = 5;
@Inject(optional = true)
@Named("jclouds.ssh.retryable_messages")
@Named("jclouds.ssh.retry-auth")
@VisibleForTesting
boolean retryAuth;
@Inject(optional = true)
@Named("jclouds.ssh.retryable-messages")
@VisibleForTesting
String retryableMessages = "failed to send channel request,channel is not opened,invalid data,End of IO Stream Read,Connection reset,connection is closed by foreign host,socket is not established";
@Inject(optional = true)
@Named("jclouds.ssh.retry_predicate")
private Predicate<Throwable> retryPredicate = or(instanceOf(ConnectException.class), instanceOf(IOException.class));
@Named("jclouds.ssh.retry-predicate")
Predicate<Throwable> retryPredicate = or(instanceOf(ConnectException.class), instanceOf(IOException.class));
@Resource
@Named("jclouds.ssh")
@ -166,7 +172,7 @@ public class JschSshClient implements SshClient {
java.util.Properties config = new java.util.Properties();
config.put("StrictHostKeyChecking", "no");
session.setConfig(config);
session.connect();
session.connect(timeout);
return session;
}
@ -178,7 +184,6 @@ public class JschSshClient implements SshClient {
protected <T, C extends Connection<T>> T acquire(C connection) {
connection.clear();
Exception e = null;
String errorMessage = String.format("(%s) error acquiring %s", toString(), connection);
for (int i = 0; i < sshRetries; i++) {
try {
@ -187,22 +192,18 @@ public class JschSshClient implements SshClient {
logger.debug("<< (%s) acquired %s", toString(), returnVal);
return returnVal;
} catch (Exception from) {
e = from;
connection.clear();
if (i == sshRetries)
if (i + 1 == sshRetries) {
throw propagate(from, errorMessage);
if (shouldRetry(from)) {
logger.warn("<< " + errorMessage + ": " + from.getMessage());
} else if (shouldRetry(from)) {
logger.warn(from, "<< " + errorMessage + ": " + from.getMessage());
backoffForAttempt(i + 1, errorMessage + ": " + from.getMessage());
continue;
}
throw propagate(from, errorMessage);
}
}
if (e != null)
throw propagate(e, errorMessage);
assert false : "should not reach here";
return null;
}
@ -226,11 +227,7 @@ public class JschSshClient implements SshClient {
checkConnected();
String channel = "sftp";
sftp = (ChannelSftp) session.openChannel(channel);
try {
sftp.connect();
} catch (JSchException e) {
throwChannelNotOpenExceptionOrPropagate(channel, e);
}
sftp.connect();
return sftp;
}
@ -240,11 +237,6 @@ public class JschSshClient implements SshClient {
}
};
public void throwChannelNotOpenExceptionOrPropagate(String channel, JSchException e) throws JSchException {
if (e.getMessage().indexOf("channel is not opened") != -1)
throw new ChannelNotOpenException(String.format("(%s) channel %s is not open", toString(), channel), e);
}
class GetConnection implements Connection<Payload> {
private final String path;
private ChannelSftp sftp;
@ -299,7 +291,6 @@ public class JschSshClient implements SshClient {
sftp.put(is, path);
} finally {
Closeables.closeQuietly(contents);
clear();
}
return null;
}
@ -317,8 +308,13 @@ public class JschSshClient implements SshClient {
@VisibleForTesting
boolean shouldRetry(Exception from) {
return any(getCausalChain(from), retryPredicate)
|| any(Splitter.on(",").split(retryableMessages), causalChainHasMessageContaining(from));
Predicate<Throwable> predicate = retryAuth ? Predicates.<Throwable>or(retryPredicate, instanceOf(AuthorizationException.class))
: retryPredicate;
if (any(getCausalChain(from), predicate))
return true;
if (!retryableMessages.equals(""))
return any(Splitter.on(",").split(retryableMessages), causalChainHasMessageContaining(from));
return false;
}
@VisibleForTesting
@ -344,9 +340,10 @@ public class JschSshClient implements SshClient {
backoffLimitedRetryHandler.imposeBackoffExponentialDelay(200L, 2, retryAttempt, sshRetries, message);
}
private SshException propagate(Exception e, String message) {
SshException propagate(Exception e, String message) {
message += ": " + e.getMessage();
logger.error(e, "<< " + message);
if (e.getMessage() != null && e.getMessage().indexOf("Auth fail") != -1)
throw new AuthorizationException("(" + toString() + ") " + message, e);
throw e instanceof SshException ? SshException.class.cast(e) : new SshException(
"(" + toString() + ") " + message, e);
}
@ -382,11 +379,7 @@ public class JschSshClient implements SshClient {
executor.setCommand(command);
ByteArrayOutputStream error = new ByteArrayOutputStream();
executor.setErrStream(error);
try {
executor.connect();
} catch (JSchException e) {
throwChannelNotOpenExceptionOrPropagate("exec", e);
}
executor.connect();
return executor;
}
@ -414,8 +407,8 @@ public class JschSshClient implements SshClient {
@Override
public ExecResponse create() throws Exception {
executor = acquire(execConnection(command));
try {
executor = acquire(execConnection(command));
String outputString = Strings2.toStringAndClose(executor.getInputStream());
int errorStatus = executor.getExitStatus();
int i = 0;
@ -434,8 +427,7 @@ public class JschSshClient implements SshClient {
String errorString = "";
return new ExecResponse(outputString, errorString, errorStatus);
} finally {
if (executor != null)
executor.disconnect();
clear();
}
}

View File

@ -24,6 +24,7 @@ import java.net.UnknownHostException;
import org.jclouds.domain.Credentials;
import org.jclouds.net.IPSocket;
import org.jclouds.rest.AuthorizationException;
import org.jclouds.ssh.SshClient;
import org.jclouds.ssh.jsch.config.JschSshClientModule;
import org.testng.annotations.BeforeTest;
@ -62,16 +63,28 @@ public class JschSshClientTest {
return new JschSshClientModule();
}
public void testExceptionClassesRetry() {
@Test(expectedExceptions = AuthorizationException.class)
public void testPropateConvertsAuthException() {
ssh.propagate(new JSchException("Auth fail"), "");
}
public void testExceptionClassesRetry() {
assert ssh.shouldRetry(new JSchException("io error", new IOException("socket closed")));
assert ssh.shouldRetry(new JSchException("connect error", new ConnectException("problem")));
assert ssh.shouldRetry(new IOException("channel %s is not open", new NullPointerException()));
assert ssh.shouldRetry(new IOException("channel %s is not open", new NullPointerException(null)));
}
public void testOnlyRetryAuthWhenSet() throws UnknownHostException {
JschSshClient ssh1 = createClient();
assert !ssh1.shouldRetry(new AuthorizationException("problem", null));
ssh1.retryAuth = true;
assert ssh1.shouldRetry(new AuthorizationException("problem", null));
}
public void testExceptionMessagesRetry() {
assert !ssh.shouldRetry(new NullPointerException(""));
assert !ssh.shouldRetry(new NullPointerException((String) null));
assert ssh.shouldRetry(new JSchException("Session.connect: java.io.IOException: End of IO Stream Read"));
assert ssh.shouldRetry(new JSchException("Session.connect: invalid data"));
assert ssh.shouldRetry(new JSchException("Session.connect: java.net.SocketException: Connection reset"));
@ -81,7 +94,7 @@ public class JschSshClientTest {
// http://sourceforge.net/mailarchive/forum.php?thread_name=CAARMrHVhASeku48xoAgWEb-nEpUuYkMA03PoA5TvvFdk%3DjGKMA%40mail.gmail.com&forum_name=jsch-users
assert !ssh.shouldRetry(new SftpException(ChannelSftp.SSH_FX_FAILURE, new NullPointerException().toString()));
}
public void testCausalChainHasMessageContaining() {
assert ssh.causalChainHasMessageContaining(
new JSchException("Session.connect: java.io.IOException: End of IO Stream Read")).apply(

View File

@ -46,11 +46,8 @@ public class AWSEC2PropertiesBuilder extends org.jclouds.ec2.EC2PropertiesBuilde
properties.setProperty(PROPERTY_TIMEOUT_NODE_SUSPENDED, 120 * 1000 + "");
// auth fail sometimes happens in EC2, as the rc.local script that injects the
// authorized key executes after ssh has started
properties.setProperty("jclouds.ssh.max_retries", "7");
properties
.setProperty(
"jclouds.ssh.retryable_messages",
"Auth fail,failed to send channel request,channel is not opened,invalid data,End of IO Stream Read,Connection reset,socket is not established,connection is closed by foreign host,socket is not established");
properties.setProperty("jclouds.ssh.max-retries", "7");
properties.setProperty("jclouds.ssh.retry-auth", "true");
properties.setProperty(PROPERTY_ENDPOINT, "https://ec2.us-east-1.amazonaws.com");
properties.putAll(Region.regionProperties());
properties.remove(PROPERTY_EC2_AMI_OWNERS);

View File

@ -72,7 +72,7 @@ public class AWSEC2ComputeServiceContextModule extends BaseComputeServiceContext
install(new EC2BindComputeSuppliersByClass());
bind(ReviseParsedImage.class).to(AWSEC2ReviseParsedImage.class);
bind(CreateKeyPairAndSecurityGroupsAsNeededAndReturnRunOptions.class).to(
CreateKeyPairPlacementAndSecurityGroupsAsNeededAndReturnRunOptions.class);
CreateKeyPairPlacementAndSecurityGroupsAsNeededAndReturnRunOptions.class);
bind(EC2HardwareSupplier.class).to(AWSEC2HardwareSupplier.class);
bind(EC2TemplateBuilderImpl.class).to(AWSEC2TemplateBuilderImpl.class);
bind(EC2GetNodeMetadataStrategy.class).to(AWSEC2GetNodeMetadataStrategy.class);
@ -90,14 +90,14 @@ public class AWSEC2ComputeServiceContextModule extends BaseComputeServiceContext
@Provides
@Singleton
protected Supplier<Map<RegionAndName, ? extends Image>> provideRegionAndNameToImageSupplierCache(
@Named(PROPERTY_SESSION_INTERVAL) long seconds, final AWSRegionAndNameToImageSupplier supplier) {
@Named(PROPERTY_SESSION_INTERVAL) long seconds, final AWSRegionAndNameToImageSupplier supplier) {
return new MemoizedRetryOnTimeOutButNotOnAuthorizationExceptionSupplier<Map<RegionAndName, ? extends Image>>(
authException, seconds, new Supplier<Map<RegionAndName, ? extends Image>>() {
@Override
public Map<RegionAndName, ? extends Image> get() {
return supplier.get();
}
});
authException, seconds, new Supplier<Map<RegionAndName, ? extends Image>>() {
@Override
public Map<RegionAndName, ? extends Image> get() {
return supplier.get();
}
});
}
@Override

View File

@ -38,7 +38,7 @@ public class SlicehostPropertiesBuilder extends PropertiesBuilder {
properties.setProperty(PROPERTY_ISO3166_CODES, "US-IL,US-TX,US-MO");
properties.setProperty(PROPERTY_ENDPOINT, "https://api.slicehost.com");
properties.setProperty(PROPERTY_API_VERSION, "1.4.1.1");
properties.setProperty("jclouds.ssh.max_retries", "8");
properties.setProperty("jclouds.ssh.max-retries", "8");
return properties;
}