From c16c467e63c0e1e500c4dcdb20b3c7751a58fda3 Mon Sep 17 00:00:00 2001 From: Mitchell Hashimoto Date: Tue, 27 Aug 2013 22:12:21 -0700 Subject: [PATCH] communicator/ssh: heartbeat the SSH connection to detect drops [GH-200] --- CHANGELOG.md | 2 ++ communicator/ssh/communicator.go | 54 ++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea9823ee4..cd8c99263 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,8 @@ BUG FIXES: * core: Fixed a couple cases where a double ctrl-C could panic. * core: Template validation fails if an override is specified for a non-existent builder. [GH-336] +* core: The SSH connection is heartbeated so that drops can be + detected. [GH-200] * builder/amazon/instance: Remove check for ec2-ami-tools because it didn't allow absolute paths to work properly. [GH-330] * builder/digitalocean: Send a soft shutdown request so that files diff --git a/communicator/ssh/communicator.go b/communicator/ssh/communicator.go index 5b8450365..ed5af22d7 100644 --- a/communicator/ssh/communicator.go +++ b/communicator/ssh/communicator.go @@ -12,6 +12,8 @@ import ( "net" "os" "path/filepath" + "sync" + "time" ) type comm struct { @@ -80,10 +82,16 @@ func (c *comm) Start(cmd *packer.RemoteCmd) (err error) { return } + // A channel to keep track of our done state + doneCh := make(chan struct{}) + sessionLock := new(sync.Mutex) + timedOut := false + // Start a goroutine to wait for the session to end and set the // exit boolean and status. go func() { defer session.Close() + err := session.Wait() exitStatus := 0 if err != nil { @@ -93,8 +101,54 @@ func (c *comm) Start(cmd *packer.RemoteCmd) (err error) { } } + sessionLock.Lock() + defer sessionLock.Unlock() + + if timedOut { + // We timed out, so set the exit status to -1 + exitStatus = -1 + } + log.Printf("remote command exited with '%d': %s", exitStatus, cmd.Command) cmd.SetExited(exitStatus) + close(doneCh) + }() + + go func() { + failures := 0 + for { + dummy, err := c.config.Connection() + if err == nil { + dummy.Close() + } + + select { + case <-doneCh: + return + default: + } + + if err != nil { + log.Printf("background SSH connection checker failure: %s", err) + failures += 1 + } + + if failures < 5 { + time.Sleep(5 * time.Second) + continue + } + + // Acquire a lock in order to modify session state + sessionLock.Lock() + defer sessionLock.Unlock() + + // Kill the connection and mark that we timed out. + log.Printf("Too many SSH connection failures. Killing it!") + c.conn.Close() + timedOut = true + + return + } }() return