builder/amazonebs: retry SSH handshakes [GH-130]
This commit is contained in:
parent
019ab13f53
commit
3e8678f76d
|
@ -22,6 +22,7 @@ BUG FIXES:
|
|||
|
||||
* core: Non-200 response codes on downloads now show proper errors.
|
||||
[GH-141]
|
||||
* amazon-ebs: SSH handshake is retried. [GH-130]
|
||||
* vagrant: The `BuildName` template propery works properly in
|
||||
the output path.
|
||||
* vagrant: Properly configure the provider-specific post-processors so
|
||||
|
|
|
@ -14,10 +14,64 @@ import (
|
|||
)
|
||||
|
||||
type stepConnectSSH struct {
|
||||
conn net.Conn
|
||||
cancel bool
|
||||
conn net.Conn
|
||||
}
|
||||
|
||||
func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction {
|
||||
config := state["config"].(config)
|
||||
ui := state["ui"].(packer.Ui)
|
||||
|
||||
var comm packer.Communicator
|
||||
var err error
|
||||
|
||||
waitDone := make(chan bool, 1)
|
||||
go func() {
|
||||
comm, err = s.waitForSSH(state)
|
||||
waitDone <- true
|
||||
}()
|
||||
|
||||
log.Printf("Waiting for SSH, up to timeout: %s", config.SSHTimeout.String())
|
||||
|
||||
timeout := time.After(config.SSHTimeout)
|
||||
WaitLoop:
|
||||
for {
|
||||
// Wait for either SSH to become available, a timeout to occur,
|
||||
// or an interrupt to come through.
|
||||
select {
|
||||
case <-waitDone:
|
||||
if err != nil {
|
||||
ui.Error(fmt.Sprintf("Error waiting for SSH: %s", err))
|
||||
return multistep.ActionHalt
|
||||
}
|
||||
|
||||
state["communicator"] = comm
|
||||
break WaitLoop
|
||||
case <-timeout:
|
||||
ui.Error("Timeout waiting for SSH.")
|
||||
s.cancel = true
|
||||
return multistep.ActionHalt
|
||||
case <-time.After(1 * time.Second):
|
||||
if _, ok := state[multistep.StateCancelled]; ok {
|
||||
log.Println("Interrupt detected, quitting waiting for SSH.")
|
||||
return multistep.ActionHalt
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return multistep.ActionContinue
|
||||
}
|
||||
|
||||
func (s *stepConnectSSH) Cleanup(map[string]interface{}) {
|
||||
if s.conn != nil {
|
||||
s.conn.Close()
|
||||
s.conn = nil
|
||||
}
|
||||
}
|
||||
|
||||
// This blocks until SSH becomes available, and sends the communicator
|
||||
// on the given channel.
|
||||
func (s *stepConnectSSH) waitForSSH(state map[string]interface{}) (packer.Communicator, error) {
|
||||
config := state["config"].(config)
|
||||
instance := state["instance"].(*ec2.Instance)
|
||||
privateKey := state["privateKey"].(string)
|
||||
|
@ -28,98 +82,70 @@ func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction
|
|||
keyring := &ssh.SimpleKeychain{}
|
||||
err := keyring.AddPEMKey(privateKey)
|
||||
if err != nil {
|
||||
err := fmt.Errorf("Error setting up SSH config: %s", err)
|
||||
state["error"] = err
|
||||
ui.Error(err.Error())
|
||||
return multistep.ActionHalt
|
||||
}
|
||||
|
||||
// Build the actual SSH client configuration
|
||||
sshConfig := &gossh.ClientConfig{
|
||||
User: config.SSHUsername,
|
||||
Auth: []gossh.ClientAuth{
|
||||
gossh.ClientAuthKeyring(keyring),
|
||||
},
|
||||
}
|
||||
|
||||
// Start trying to connect to SSH
|
||||
connected := make(chan bool, 1)
|
||||
connectQuit := make(chan bool, 1)
|
||||
defer func() {
|
||||
connectQuit <- true
|
||||
}()
|
||||
|
||||
go func() {
|
||||
var err error
|
||||
|
||||
ui.Say("Connecting to the instance via SSH...")
|
||||
attempts := 0
|
||||
for {
|
||||
select {
|
||||
case <-connectQuit:
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
attempts += 1
|
||||
log.Printf(
|
||||
"Opening TCP conn for SSH to %s:%d (attempt %d)",
|
||||
instance.DNSName, config.SSHPort, attempts)
|
||||
s.conn, err = net.Dial("tcp", fmt.Sprintf("%s:%d", instance.DNSName, config.SSHPort))
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
|
||||
// A brief sleep so we're not being overly zealous attempting
|
||||
// to connect to the instance.
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
}
|
||||
|
||||
connected <- true
|
||||
}()
|
||||
|
||||
log.Printf("Waiting up to %s for SSH connection", config.SSHTimeout)
|
||||
timeout := time.After(config.SSHTimeout)
|
||||
|
||||
ConnectWaitLoop:
|
||||
for {
|
||||
select {
|
||||
case <-connected:
|
||||
// We connected. Just break the loop.
|
||||
break ConnectWaitLoop
|
||||
case <-timeout:
|
||||
err := errors.New("Timeout waiting for SSH to become available.")
|
||||
state["error"] = err
|
||||
ui.Error(err.Error())
|
||||
return multistep.ActionHalt
|
||||
case <-time.After(1 * time.Second):
|
||||
if _, ok := state[multistep.StateCancelled]; ok {
|
||||
log.Println("Interrupt detected, quitting waiting for SSH.")
|
||||
return multistep.ActionHalt
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("Error setting up SSH config: %s", err)
|
||||
}
|
||||
|
||||
ui.Say("Waiting for SSH to become available...")
|
||||
var comm packer.Communicator
|
||||
if err == nil {
|
||||
comm, err = ssh.New(s.conn, sshConfig)
|
||||
var nc net.Conn
|
||||
for {
|
||||
if nc != nil {
|
||||
nc.Close()
|
||||
}
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
|
||||
if s.cancel {
|
||||
log.Println("SSH wait cancelled. Exiting loop.")
|
||||
return nil, errors.New("SSH wait cancelled")
|
||||
}
|
||||
|
||||
// Attempt to connect to SSH port
|
||||
log.Printf(
|
||||
"Opening TCP conn for SSH to %s:%d",
|
||||
instance.DNSName, config.SSHPort)
|
||||
nc, err := net.Dial("tcp",
|
||||
fmt.Sprintf("%s:%d", instance.DNSName, config.SSHPort))
|
||||
if err != nil {
|
||||
log.Printf("TCP connection to SSH ip/port failed: %s", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Build the actual SSH client configuration
|
||||
sshConfig := &gossh.ClientConfig{
|
||||
User: config.SSHUsername,
|
||||
Auth: []gossh.ClientAuth{
|
||||
gossh.ClientAuthKeyring(keyring),
|
||||
},
|
||||
}
|
||||
|
||||
sshConnectSuccess := make(chan bool, 1)
|
||||
go func() {
|
||||
comm, err = ssh.New(nc, sshConfig)
|
||||
if err != nil {
|
||||
log.Printf("SSH connection fail: %s", err)
|
||||
sshConnectSuccess <- false
|
||||
return
|
||||
}
|
||||
|
||||
sshConnectSuccess <- true
|
||||
}()
|
||||
|
||||
select {
|
||||
case success := <-sshConnectSuccess:
|
||||
if !success {
|
||||
continue
|
||||
}
|
||||
case <-time.After(5 * time.Second):
|
||||
log.Printf("SSH handshake timeout. Trying again.")
|
||||
continue
|
||||
}
|
||||
|
||||
ui.Say("Connected via SSH!")
|
||||
break
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
err := fmt.Errorf("Error connecting to SSH: %s", err)
|
||||
state["error"] = err
|
||||
ui.Error(err.Error())
|
||||
return multistep.ActionHalt
|
||||
}
|
||||
|
||||
// Set the communicator on the state bag so it can be used later
|
||||
state["communicator"] = comm
|
||||
|
||||
return multistep.ActionContinue
|
||||
}
|
||||
|
||||
func (s *stepConnectSSH) Cleanup(map[string]interface{}) {
|
||||
if s.conn != nil {
|
||||
s.conn.Close()
|
||||
}
|
||||
// Store the connection so we can close it later
|
||||
s.conn = nc
|
||||
return comm, nil
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue