builder/digitalocean: Reattempt SSH handshake a few times

I ran into a few cases where the droplet was active and a TCP connection
could be made, but SSH wasn't running yet and the handshake failed. A
race condition with the machine boot. This will retry the SSH handshake
a few times.

/cc @pearkes
This commit is contained in:
Mitchell Hashimoto 2013-06-21 23:02:13 -07:00
parent 1ecfe4b274
commit 1ced19c3ce
1 changed files with 26 additions and 16 deletions

View File

@ -42,17 +42,19 @@ func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction
}
// Start trying to connect to SSH
connected := make(chan bool, 1)
connected := make(chan error, 1)
connectQuit := make(chan bool, 1)
defer func() {
connectQuit <- true
}()
var comm packer.Communicator
go func() {
var err error
ui.Say("Connecting to the droplet via SSH...")
attempts := 0
handshakeAttempts := 0
for {
select {
case <-connectQuit:
@ -69,7 +71,20 @@ func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction
fmt.Sprintf("%s:%d", ipAddress, config.SSHPort),
10*time.Second)
if err == nil {
break
log.Println("TCP connection made. Attempting SSH handshake.")
comm, err = ssh.New(s.conn, sshConfig)
if err == nil {
log.Println("Connected to SSH!")
break
}
handshakeAttempts += 1
log.Printf("SSH handshake error: %s", err)
if handshakeAttempts > 5 {
connected <- err
return
}
}
// A brief sleep so we're not being overly zealous attempting
@ -77,7 +92,7 @@ func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction
time.Sleep(500 * time.Millisecond)
}
connected <- true
connected <- nil
}()
log.Printf("Waiting up to %s for SSH connection", config.SSHTimeout)
@ -86,7 +101,14 @@ func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction
ConnectWaitLoop:
for {
select {
case <-connected:
case err := <-connected:
if err != nil {
err := fmt.Errorf("Error connecting to SSH: %s", err)
state["error"] = err
ui.Error(err.Error())
return multistep.ActionHalt
}
// We connected. Just break the loop.
break ConnectWaitLoop
case <-timeout:
@ -102,18 +124,6 @@ ConnectWaitLoop:
}
}
var comm packer.Communicator
if err == nil {
comm, err = ssh.New(s.conn, sshConfig)
}
if err != nil {
err := fmt.Errorf("Error connecting to SSH: %s", err)
state["error"] = err
ui.Error(err.Error())
return multistep.ActionHalt
}
// Set the communicator on the state bag so it can be used later
state["communicator"] = comm