builder/digitalocean: Reattempt SSH handshake a few times

I ran into a few cases where the droplet was active and a TCP connection
could be made, but SSH wasn't running yet and the handshake failed. A
race condition with the machine boot. This will retry the SSH handshake
a few times.

/cc @pearkes
This commit is contained in:
Mitchell Hashimoto 2013-06-21 23:02:13 -07:00
parent 1ecfe4b274
commit 1ced19c3ce
1 changed files with 26 additions and 16 deletions

View File

@ -42,17 +42,19 @@ func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction
} }
// Start trying to connect to SSH // Start trying to connect to SSH
connected := make(chan bool, 1) connected := make(chan error, 1)
connectQuit := make(chan bool, 1) connectQuit := make(chan bool, 1)
defer func() { defer func() {
connectQuit <- true connectQuit <- true
}() }()
var comm packer.Communicator
go func() { go func() {
var err error var err error
ui.Say("Connecting to the droplet via SSH...") ui.Say("Connecting to the droplet via SSH...")
attempts := 0 attempts := 0
handshakeAttempts := 0
for { for {
select { select {
case <-connectQuit: case <-connectQuit:
@ -69,7 +71,20 @@ func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction
fmt.Sprintf("%s:%d", ipAddress, config.SSHPort), fmt.Sprintf("%s:%d", ipAddress, config.SSHPort),
10*time.Second) 10*time.Second)
if err == nil { if err == nil {
break log.Println("TCP connection made. Attempting SSH handshake.")
comm, err = ssh.New(s.conn, sshConfig)
if err == nil {
log.Println("Connected to SSH!")
break
}
handshakeAttempts += 1
log.Printf("SSH handshake error: %s", err)
if handshakeAttempts > 5 {
connected <- err
return
}
} }
// A brief sleep so we're not being overly zealous attempting // A brief sleep so we're not being overly zealous attempting
@ -77,7 +92,7 @@ func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction
time.Sleep(500 * time.Millisecond) time.Sleep(500 * time.Millisecond)
} }
connected <- true connected <- nil
}() }()
log.Printf("Waiting up to %s for SSH connection", config.SSHTimeout) log.Printf("Waiting up to %s for SSH connection", config.SSHTimeout)
@ -86,7 +101,14 @@ func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction
ConnectWaitLoop: ConnectWaitLoop:
for { for {
select { select {
case <-connected: case err := <-connected:
if err != nil {
err := fmt.Errorf("Error connecting to SSH: %s", err)
state["error"] = err
ui.Error(err.Error())
return multistep.ActionHalt
}
// We connected. Just break the loop. // We connected. Just break the loop.
break ConnectWaitLoop break ConnectWaitLoop
case <-timeout: case <-timeout:
@ -102,18 +124,6 @@ ConnectWaitLoop:
} }
} }
var comm packer.Communicator
if err == nil {
comm, err = ssh.New(s.conn, sshConfig)
}
if err != nil {
err := fmt.Errorf("Error connecting to SSH: %s", err)
state["error"] = err
ui.Error(err.Error())
return multistep.ActionHalt
}
// Set the communicator on the state bag so it can be used later // Set the communicator on the state bag so it can be used later
state["communicator"] = comm state["communicator"] = comm