diff --git a/CHANGELOG.md b/CHANGELOG.md index 9e8a6f269..8f73e247e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ BUG FIXES: * core: Non-200 response codes on downloads now show proper errors. [GH-141] +* amazon-ebs: SSH handshake is retried. [GH-130] * vagrant: The `BuildName` template propery works properly in the output path. * vagrant: Properly configure the provider-specific post-processors so diff --git a/builder/amazonebs/step_connect_ssh.go b/builder/amazonebs/step_connect_ssh.go index 770ec27c2..903439341 100644 --- a/builder/amazonebs/step_connect_ssh.go +++ b/builder/amazonebs/step_connect_ssh.go @@ -14,10 +14,64 @@ import ( ) type stepConnectSSH struct { - conn net.Conn + cancel bool + conn net.Conn } func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction { + config := state["config"].(config) + ui := state["ui"].(packer.Ui) + + var comm packer.Communicator + var err error + + waitDone := make(chan bool, 1) + go func() { + comm, err = s.waitForSSH(state) + waitDone <- true + }() + + log.Printf("Waiting for SSH, up to timeout: %s", config.SSHTimeout.String()) + + timeout := time.After(config.SSHTimeout) +WaitLoop: + for { + // Wait for either SSH to become available, a timeout to occur, + // or an interrupt to come through. + select { + case <-waitDone: + if err != nil { + ui.Error(fmt.Sprintf("Error waiting for SSH: %s", err)) + return multistep.ActionHalt + } + + state["communicator"] = comm + break WaitLoop + case <-timeout: + ui.Error("Timeout waiting for SSH.") + s.cancel = true + return multistep.ActionHalt + case <-time.After(1 * time.Second): + if _, ok := state[multistep.StateCancelled]; ok { + log.Println("Interrupt detected, quitting waiting for SSH.") + return multistep.ActionHalt + } + } + } + + return multistep.ActionContinue +} + +func (s *stepConnectSSH) Cleanup(map[string]interface{}) { + if s.conn != nil { + s.conn.Close() + s.conn = nil + } +} + +// This blocks until SSH becomes available, and sends the communicator +// on the given channel. +func (s *stepConnectSSH) waitForSSH(state map[string]interface{}) (packer.Communicator, error) { config := state["config"].(config) instance := state["instance"].(*ec2.Instance) privateKey := state["privateKey"].(string) @@ -28,98 +82,70 @@ func (s *stepConnectSSH) Run(state map[string]interface{}) multistep.StepAction keyring := &ssh.SimpleKeychain{} err := keyring.AddPEMKey(privateKey) if err != nil { - err := fmt.Errorf("Error setting up SSH config: %s", err) - state["error"] = err - ui.Error(err.Error()) - return multistep.ActionHalt - } - - // Build the actual SSH client configuration - sshConfig := &gossh.ClientConfig{ - User: config.SSHUsername, - Auth: []gossh.ClientAuth{ - gossh.ClientAuthKeyring(keyring), - }, - } - - // Start trying to connect to SSH - connected := make(chan bool, 1) - connectQuit := make(chan bool, 1) - defer func() { - connectQuit <- true - }() - - go func() { - var err error - - ui.Say("Connecting to the instance via SSH...") - attempts := 0 - for { - select { - case <-connectQuit: - return - default: - } - - attempts += 1 - log.Printf( - "Opening TCP conn for SSH to %s:%d (attempt %d)", - instance.DNSName, config.SSHPort, attempts) - s.conn, err = net.Dial("tcp", fmt.Sprintf("%s:%d", instance.DNSName, config.SSHPort)) - if err == nil { - break - } - - // A brief sleep so we're not being overly zealous attempting - // to connect to the instance. - time.Sleep(500 * time.Millisecond) - } - - connected <- true - }() - - log.Printf("Waiting up to %s for SSH connection", config.SSHTimeout) - timeout := time.After(config.SSHTimeout) - -ConnectWaitLoop: - for { - select { - case <-connected: - // We connected. Just break the loop. - break ConnectWaitLoop - case <-timeout: - err := errors.New("Timeout waiting for SSH to become available.") - state["error"] = err - ui.Error(err.Error()) - return multistep.ActionHalt - case <-time.After(1 * time.Second): - if _, ok := state[multistep.StateCancelled]; ok { - log.Println("Interrupt detected, quitting waiting for SSH.") - return multistep.ActionHalt - } - } + return nil, fmt.Errorf("Error setting up SSH config: %s", err) } + ui.Say("Waiting for SSH to become available...") var comm packer.Communicator - if err == nil { - comm, err = ssh.New(s.conn, sshConfig) + var nc net.Conn + for { + if nc != nil { + nc.Close() + } + + time.Sleep(5 * time.Second) + + if s.cancel { + log.Println("SSH wait cancelled. Exiting loop.") + return nil, errors.New("SSH wait cancelled") + } + + // Attempt to connect to SSH port + log.Printf( + "Opening TCP conn for SSH to %s:%d", + instance.DNSName, config.SSHPort) + nc, err := net.Dial("tcp", + fmt.Sprintf("%s:%d", instance.DNSName, config.SSHPort)) + if err != nil { + log.Printf("TCP connection to SSH ip/port failed: %s", err) + continue + } + + // Build the actual SSH client configuration + sshConfig := &gossh.ClientConfig{ + User: config.SSHUsername, + Auth: []gossh.ClientAuth{ + gossh.ClientAuthKeyring(keyring), + }, + } + + sshConnectSuccess := make(chan bool, 1) + go func() { + comm, err = ssh.New(nc, sshConfig) + if err != nil { + log.Printf("SSH connection fail: %s", err) + sshConnectSuccess <- false + return + } + + sshConnectSuccess <- true + }() + + select { + case success := <-sshConnectSuccess: + if !success { + continue + } + case <-time.After(5 * time.Second): + log.Printf("SSH handshake timeout. Trying again.") + continue + } + + ui.Say("Connected via SSH!") + break } - if err != nil { - err := fmt.Errorf("Error connecting to SSH: %s", err) - state["error"] = err - ui.Error(err.Error()) - return multistep.ActionHalt - } - - // Set the communicator on the state bag so it can be used later - state["communicator"] = comm - - return multistep.ActionContinue -} - -func (s *stepConnectSSH) Cleanup(map[string]interface{}) { - if s.conn != nil { - s.conn.Close() - } + // Store the connection so we can close it later + s.conn = nc + return comm, nil }