builder/digitalocean: Reattempt SSH handshake a few times
I ran into a few cases where the droplet was active and a TCP connection could be made, but SSH wasn't running yet and the handshake failed. A race condition with the machine boot. This will retry the SSH handshake a few times. /cc @pearkes
This commit is contained in:
parent
1ecfe4b274
commit
1ced19c3ce
|
@ -42,17 +42,19 @@ func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start trying to connect to SSH
|
// Start trying to connect to SSH
|
||||||
connected := make(chan bool, 1)
|
connected := make(chan error, 1)
|
||||||
connectQuit := make(chan bool, 1)
|
connectQuit := make(chan bool, 1)
|
||||||
defer func() {
|
defer func() {
|
||||||
connectQuit <- true
|
connectQuit <- true
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
var comm packer.Communicator
|
||||||
go func() {
|
go func() {
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
ui.Say("Connecting to the droplet via SSH...")
|
ui.Say("Connecting to the droplet via SSH...")
|
||||||
attempts := 0
|
attempts := 0
|
||||||
|
handshakeAttempts := 0
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-connectQuit:
|
case <-connectQuit:
|
||||||
|
@ -69,15 +71,28 @@ func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction
|
||||||
fmt.Sprintf("%s:%d", ipAddress, config.SSHPort),
|
fmt.Sprintf("%s:%d", ipAddress, config.SSHPort),
|
||||||
10*time.Second)
|
10*time.Second)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
log.Println("TCP connection made. Attempting SSH handshake.")
|
||||||
|
comm, err = ssh.New(s.conn, sshConfig)
|
||||||
|
if err == nil {
|
||||||
|
log.Println("Connected to SSH!")
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
|
handshakeAttempts += 1
|
||||||
|
log.Printf("SSH handshake error: %s", err)
|
||||||
|
|
||||||
|
if handshakeAttempts > 5 {
|
||||||
|
connected <- err
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// A brief sleep so we're not being overly zealous attempting
|
// A brief sleep so we're not being overly zealous attempting
|
||||||
// to connect to the instance.
|
// to connect to the instance.
|
||||||
time.Sleep(500 * time.Millisecond)
|
time.Sleep(500 * time.Millisecond)
|
||||||
}
|
}
|
||||||
|
|
||||||
connected <- true
|
connected <- nil
|
||||||
}()
|
}()
|
||||||
|
|
||||||
log.Printf("Waiting up to %s for SSH connection", config.SSHTimeout)
|
log.Printf("Waiting up to %s for SSH connection", config.SSHTimeout)
|
||||||
|
@ -86,7 +101,14 @@ func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction
|
||||||
ConnectWaitLoop:
|
ConnectWaitLoop:
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-connected:
|
case err := <-connected:
|
||||||
|
if err != nil {
|
||||||
|
err := fmt.Errorf("Error connecting to SSH: %s", err)
|
||||||
|
state["error"] = err
|
||||||
|
ui.Error(err.Error())
|
||||||
|
return multistep.ActionHalt
|
||||||
|
}
|
||||||
|
|
||||||
// We connected. Just break the loop.
|
// We connected. Just break the loop.
|
||||||
break ConnectWaitLoop
|
break ConnectWaitLoop
|
||||||
case <-timeout:
|
case <-timeout:
|
||||||
|
@ -102,18 +124,6 @@ ConnectWaitLoop:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var comm packer.Communicator
|
|
||||||
if err == nil {
|
|
||||||
comm, err = ssh.New(s.conn, sshConfig)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
err := fmt.Errorf("Error connecting to SSH: %s", err)
|
|
||||||
state["error"] = err
|
|
||||||
ui.Error(err.Error())
|
|
||||||
return multistep.ActionHalt
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set the communicator on the state bag so it can be used later
|
// Set the communicator on the state bag so it can be used later
|
||||||
state["communicator"] = comm
|
state["communicator"] = comm
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue