Remove logic to retry a connection that reuses an existing SSM Session

After testing it was found that once an session is terminated via an
instance restart, console termination, or SSM agent restart. Any active
session will essentially be terminated and unusable. So knowing that it
is always best to start a new session and let the old one timeout get
terminated.
This commit is contained in:
Wilken Rivera 2020-10-27 16:05:02 -04:00
parent dff9cde775
commit 646b973bd3
1 changed files with 26 additions and 45 deletions

View File

@ -6,7 +6,6 @@ import (
"fmt"
"log"
"os/exec"
"strings"
"time"
"github.com/aws/aws-sdk-go/aws"
@ -36,7 +35,6 @@ type SSMDriver struct {
sessionParams ssm.StartSessionInput
pluginCmdFunc func(context.Context) error
retryConnection chan bool
retryAfterTermination chan bool
}
@ -57,7 +55,6 @@ func (d *SSMDriver) StartSession(ctx context.Context, input ssm.StartSessionInpu
return nil, fmt.Errorf("error encountered in starting session for instance %q: %s", aws.StringValue(input.Target), err)
}
d.retryConnection = make(chan bool, 1)
d.retryAfterTermination = make(chan bool, 1)
// Starts go routine that will keep listening to channels and retry the session creation/connection when needed.
// The log polling process will add data to the channels whenever a retryable error happens to the session or if it's terminated.
@ -71,24 +68,11 @@ func (d *SSMDriver) StartSession(ctx context.Context, input ssm.StartSessionInpu
return
}
if r {
log.Printf("[DEBUG] Restablishing SSM connection")
log.Printf("[DEBUG] Attempting to restablishing SSM connection")
_, _ = driver.StartSession(ctx, input)
// End this routine. Another routine will start.
return
}
case r, ok := <-driver.retryConnection:
if !ok {
return
}
// Tunnel is still open an we want to try to reconnect
if r {
log.Printf("[DEBUG] Retrying to establish SSM connection")
_, err := driver.StartSessionWithContext(ctx, input)
if err != nil {
return
}
}
}
}
}(ctx, d, input)
@ -156,34 +140,40 @@ func (d *SSMDriver) openTunnelForSession(ctx context.Context) error {
select {
case <-ctx.Done():
return
case output, ok := <-stderrCh:
case errorOut, ok := <-stderrCh:
if !ok {
stderrCh = nil
break
}
if output != "" {
if isRetryableError(output) {
log.Printf("[ERROR] Retryable error - %s: %s", prefix, output)
d.retryConnection <- true
continue
}
log.Printf("[ERROR] %s: %s", prefix, output)
if errorOut == "" {
continue
}
log.Printf("[ERROR] %s: %s", prefix, errorOut)
case output, ok := <-stdoutCh:
if !ok {
stdoutCh = nil
break
}
if output != "" {
log.Printf("[DEBUG] %s: %s", prefix, output)
if output == "" {
continue
}
log.Printf("[DEBUG] %s: %s", prefix, output)
}
if stdoutCh == nil && stderrCh == nil {
// It's possible that a remote SSM session was terminated prematurely due to a system reboot, or a termination
// on the AWS console, or that the SSMAgent service was stopped on the instance. This will try to reconnect
// until the build eventually fails or hits the ssh_timeout threshold.
log.Printf("[DEBUG] %s: %s", prefix, "active session has been terminated; stopping all log polling processes.")
d.retryAfterTermination <- true
// A call to StopSession will nullify any active sessions so only retry if StopSession has not be called,
// since StopSession will also close the retryAfterTermination channel
if d.session != nil {
d.retryAfterTermination <- true
}
return
}
}
@ -202,18 +192,6 @@ func (d *SSMDriver) openTunnelForSession(ctx context.Context) error {
return nil
}
func isRetryableError(output string) bool {
retryableError := []string{
"connection refused",
}
for _, err := range retryableError {
if strings.Contains(output, err) {
return true
}
}
return false
}
// StopSession terminates an active Session Manager session
func (d *SSMDriver) StopSession() error {
if d.session == nil || d.session.SessionId == nil {
@ -221,13 +199,16 @@ func (d *SSMDriver) StopSession() error {
aws.StringValue(d.sessionParams.Target))
}
_, err := d.SvcClient.TerminateSession(&ssm.TerminateSessionInput{SessionId: d.session.SessionId})
if err != nil {
err = fmt.Errorf("Error terminating SSM Session %q. Please terminate the session manually: %s", aws.StringValue(d.session.SessionId), err)
}
// Store session id before we nullify any active sessions
sid := aws.StringValue(d.session.SessionId)
// Stop retry polling process to avoid unwanted retries at this point
d.session = nil
close(d.retryAfterTermination)
close(d.retryConnection)
_, err := d.SvcClient.TerminateSession(&ssm.TerminateSessionInput{SessionId: aws.String(sid)})
if err != nil {
err = fmt.Errorf("Error terminating SSM Session %q. Please terminate the session manually: %s", sid, err)
}
return err
}