From 646b973bd370c501fd075e9b1c208c34deb8600d Mon Sep 17 00:00:00 2001 From: Wilken Rivera Date: Tue, 27 Oct 2020 16:05:02 -0400 Subject: [PATCH] Remove logic to retry a connection that reuses an existing SSM Session After testing it was found that once an session is terminated via an instance restart, console termination, or SSM agent restart. Any active session will essentially be terminated and unusable. So knowing that it is always best to start a new session and let the old one timeout get terminated. --- builder/amazon/common/ssm_driver.go | 71 +++++++++++------------------ 1 file changed, 26 insertions(+), 45 deletions(-) diff --git a/builder/amazon/common/ssm_driver.go b/builder/amazon/common/ssm_driver.go index 2314cf3f9..9b95b6fe3 100644 --- a/builder/amazon/common/ssm_driver.go +++ b/builder/amazon/common/ssm_driver.go @@ -6,7 +6,6 @@ import ( "fmt" "log" "os/exec" - "strings" "time" "github.com/aws/aws-sdk-go/aws" @@ -36,7 +35,6 @@ type SSMDriver struct { sessionParams ssm.StartSessionInput pluginCmdFunc func(context.Context) error - retryConnection chan bool retryAfterTermination chan bool } @@ -57,7 +55,6 @@ func (d *SSMDriver) StartSession(ctx context.Context, input ssm.StartSessionInpu return nil, fmt.Errorf("error encountered in starting session for instance %q: %s", aws.StringValue(input.Target), err) } - d.retryConnection = make(chan bool, 1) d.retryAfterTermination = make(chan bool, 1) // Starts go routine that will keep listening to channels and retry the session creation/connection when needed. // The log polling process will add data to the channels whenever a retryable error happens to the session or if it's terminated. @@ -71,24 +68,11 @@ func (d *SSMDriver) StartSession(ctx context.Context, input ssm.StartSessionInpu return } if r { - log.Printf("[DEBUG] Restablishing SSM connection") + log.Printf("[DEBUG] Attempting to restablishing SSM connection") _, _ = driver.StartSession(ctx, input) // End this routine. Another routine will start. return } - case r, ok := <-driver.retryConnection: - if !ok { - return - } - - // Tunnel is still open an we want to try to reconnect - if r { - log.Printf("[DEBUG] Retrying to establish SSM connection") - _, err := driver.StartSessionWithContext(ctx, input) - if err != nil { - return - } - } } } }(ctx, d, input) @@ -156,34 +140,40 @@ func (d *SSMDriver) openTunnelForSession(ctx context.Context) error { select { case <-ctx.Done(): return - case output, ok := <-stderrCh: + case errorOut, ok := <-stderrCh: if !ok { stderrCh = nil break } - if output != "" { - if isRetryableError(output) { - log.Printf("[ERROR] Retryable error - %s: %s", prefix, output) - d.retryConnection <- true - continue - } - log.Printf("[ERROR] %s: %s", prefix, output) + if errorOut == "" { + continue } + + log.Printf("[ERROR] %s: %s", prefix, errorOut) case output, ok := <-stdoutCh: if !ok { stdoutCh = nil break } - if output != "" { - log.Printf("[DEBUG] %s: %s", prefix, output) + if output == "" { + continue } + log.Printf("[DEBUG] %s: %s", prefix, output) } if stdoutCh == nil && stderrCh == nil { + // It's possible that a remote SSM session was terminated prematurely due to a system reboot, or a termination + // on the AWS console, or that the SSMAgent service was stopped on the instance. This will try to reconnect + // until the build eventually fails or hits the ssh_timeout threshold. log.Printf("[DEBUG] %s: %s", prefix, "active session has been terminated; stopping all log polling processes.") - d.retryAfterTermination <- true + + // A call to StopSession will nullify any active sessions so only retry if StopSession has not be called, + // since StopSession will also close the retryAfterTermination channel + if d.session != nil { + d.retryAfterTermination <- true + } return } } @@ -202,18 +192,6 @@ func (d *SSMDriver) openTunnelForSession(ctx context.Context) error { return nil } -func isRetryableError(output string) bool { - retryableError := []string{ - "connection refused", - } - for _, err := range retryableError { - if strings.Contains(output, err) { - return true - } - } - return false -} - // StopSession terminates an active Session Manager session func (d *SSMDriver) StopSession() error { if d.session == nil || d.session.SessionId == nil { @@ -221,13 +199,16 @@ func (d *SSMDriver) StopSession() error { aws.StringValue(d.sessionParams.Target)) } - _, err := d.SvcClient.TerminateSession(&ssm.TerminateSessionInput{SessionId: d.session.SessionId}) - if err != nil { - err = fmt.Errorf("Error terminating SSM Session %q. Please terminate the session manually: %s", aws.StringValue(d.session.SessionId), err) - } + // Store session id before we nullify any active sessions + sid := aws.StringValue(d.session.SessionId) + // Stop retry polling process to avoid unwanted retries at this point + d.session = nil close(d.retryAfterTermination) - close(d.retryConnection) + _, err := d.SvcClient.TerminateSession(&ssm.TerminateSessionInput{SessionId: aws.String(sid)}) + if err != nil { + err = fmt.Errorf("Error terminating SSM Session %q. Please terminate the session manually: %s", sid, err) + } return err }