builder/amazonebs: retry SSH handshakes [GH-130]

This commit is contained in:
Mitchell Hashimoto 2013-07-07 20:37:43 -07:00
parent 019ab13f53
commit 3e8678f76d
2 changed files with 117 additions and 90 deletions

View File

@ -22,6 +22,7 @@ BUG FIXES:
* core: Non-200 response codes on downloads now show proper errors.
[GH-141]
* amazon-ebs: SSH handshake is retried. [GH-130]
* vagrant: The `BuildName` template propery works properly in
the output path.
* vagrant: Properly configure the provider-specific post-processors so

View File

@ -14,10 +14,64 @@ import (
)
type stepConnectSSH struct {
conn net.Conn
cancel bool
conn net.Conn
}
func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction {
config := state["config"].(config)
ui := state["ui"].(packer.Ui)
var comm packer.Communicator
var err error
waitDone := make(chan bool, 1)
go func() {
comm, err = s.waitForSSH(state)
waitDone <- true
}()
log.Printf("Waiting for SSH, up to timeout: %s", config.SSHTimeout.String())
timeout := time.After(config.SSHTimeout)
WaitLoop:
for {
// Wait for either SSH to become available, a timeout to occur,
// or an interrupt to come through.
select {
case <-waitDone:
if err != nil {
ui.Error(fmt.Sprintf("Error waiting for SSH: %s", err))
return multistep.ActionHalt
}
state["communicator"] = comm
break WaitLoop
case <-timeout:
ui.Error("Timeout waiting for SSH.")
s.cancel = true
return multistep.ActionHalt
case <-time.After(1 * time.Second):
if _, ok := state[multistep.StateCancelled]; ok {
log.Println("Interrupt detected, quitting waiting for SSH.")
return multistep.ActionHalt
}
}
}
return multistep.ActionContinue
}
func (s *stepConnectSSH) Cleanup(map[string]interface{}) {
if s.conn != nil {
s.conn.Close()
s.conn = nil
}
}
// This blocks until SSH becomes available, and sends the communicator
// on the given channel.
func (s *stepConnectSSH) waitForSSH(state map[string]interface{}) (packer.Communicator, error) {
config := state["config"].(config)
instance := state["instance"].(*ec2.Instance)
privateKey := state["privateKey"].(string)
@ -28,98 +82,70 @@ func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction
keyring := &ssh.SimpleKeychain{}
err := keyring.AddPEMKey(privateKey)
if err != nil {
err := fmt.Errorf("Error setting up SSH config: %s", err)
state["error"] = err
ui.Error(err.Error())
return multistep.ActionHalt
}
// Build the actual SSH client configuration
sshConfig := &gossh.ClientConfig{
User: config.SSHUsername,
Auth: []gossh.ClientAuth{
gossh.ClientAuthKeyring(keyring),
},
}
// Start trying to connect to SSH
connected := make(chan bool, 1)
connectQuit := make(chan bool, 1)
defer func() {
connectQuit <- true
}()
go func() {
var err error
ui.Say("Connecting to the instance via SSH...")
attempts := 0
for {
select {
case <-connectQuit:
return
default:
}
attempts += 1
log.Printf(
"Opening TCP conn for SSH to %s:%d (attempt %d)",
instance.DNSName, config.SSHPort, attempts)
s.conn, err = net.Dial("tcp", fmt.Sprintf("%s:%d", instance.DNSName, config.SSHPort))
if err == nil {
break
}
// A brief sleep so we're not being overly zealous attempting
// to connect to the instance.
time.Sleep(500 * time.Millisecond)
}
connected <- true
}()
log.Printf("Waiting up to %s for SSH connection", config.SSHTimeout)
timeout := time.After(config.SSHTimeout)
ConnectWaitLoop:
for {
select {
case <-connected:
// We connected. Just break the loop.
break ConnectWaitLoop
case <-timeout:
err := errors.New("Timeout waiting for SSH to become available.")
state["error"] = err
ui.Error(err.Error())
return multistep.ActionHalt
case <-time.After(1 * time.Second):
if _, ok := state[multistep.StateCancelled]; ok {
log.Println("Interrupt detected, quitting waiting for SSH.")
return multistep.ActionHalt
}
}
return nil, fmt.Errorf("Error setting up SSH config: %s", err)
}
ui.Say("Waiting for SSH to become available...")
var comm packer.Communicator
if err == nil {
comm, err = ssh.New(s.conn, sshConfig)
var nc net.Conn
for {
if nc != nil {
nc.Close()
}
time.Sleep(5 * time.Second)
if s.cancel {
log.Println("SSH wait cancelled. Exiting loop.")
return nil, errors.New("SSH wait cancelled")
}
// Attempt to connect to SSH port
log.Printf(
"Opening TCP conn for SSH to %s:%d",
instance.DNSName, config.SSHPort)
nc, err := net.Dial("tcp",
fmt.Sprintf("%s:%d", instance.DNSName, config.SSHPort))
if err != nil {
log.Printf("TCP connection to SSH ip/port failed: %s", err)
continue
}
// Build the actual SSH client configuration
sshConfig := &gossh.ClientConfig{
User: config.SSHUsername,
Auth: []gossh.ClientAuth{
gossh.ClientAuthKeyring(keyring),
},
}
sshConnectSuccess := make(chan bool, 1)
go func() {
comm, err = ssh.New(nc, sshConfig)
if err != nil {
log.Printf("SSH connection fail: %s", err)
sshConnectSuccess <- false
return
}
sshConnectSuccess <- true
}()
select {
case success := <-sshConnectSuccess:
if !success {
continue
}
case <-time.After(5 * time.Second):
log.Printf("SSH handshake timeout. Trying again.")
continue
}
ui.Say("Connected via SSH!")
break
}
if err != nil {
err := fmt.Errorf("Error connecting to SSH: %s", err)
state["error"] = err
ui.Error(err.Error())
return multistep.ActionHalt
}
// Set the communicator on the state bag so it can be used later
state["communicator"] = comm
return multistep.ActionContinue
}
func (s *stepConnectSSH) Cleanup(map[string]interface{}) {
if s.conn != nil {
s.conn.Close()
}
// Store the connection so we can close it later
s.conn = nc
return comm, nil
}