retry spot instance creation when an "Invalid IAM Instance Profile name" error pops up (#9810)
PutRolePolicy & AddRoleToInstanceProfile are eventually consistent but it is not possible to wait for them to be done here: 0785c2f6fc/builder/amazon/common/step_iam_instance_profile.go (L117-L134)
which was causing the `CreateFleet` to fail (100% for me). So for now we retry a bit later. Waiting 5 seconds after the previously linked code also fixed this.
Test file:
```json
{
"builders": [
{
"type": "amazon-ebs",
"region": "eu-west-1",
"ami_name": "ubuntu-16.04 test {{timestamp}}",
"ami_description": "Ubuntu 16.04 LTS - expand root partition",
"source_ami_filter": {
"filters": {
"virtualization-type": "hvm",
"name": "ubuntu/images/*/ubuntu-xenial-16.04-amd64-server-*",
"root-device-type": "ebs"
},
"owners": [
"099720109477"
],
"most_recent": true
},
"spot_price": "0.03",
"spot_instance_types": [
"t2.small"
],
"encrypt_boot": true,
"ssh_username": "ubuntu",
"ssh_interface": "session_manager",
"temporary_iam_instance_profile_policy_document": {
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"*"
],
"Resource": "*"
}
]
},
"communicator": "ssh"
}
]}
```
This commit is contained in:
parent
1252658848
commit
a0c09e85df
|
@ -6,9 +6,11 @@ import (
|
|||
"fmt"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/aws/aws-sdk-go/aws"
|
||||
"github.com/aws/aws-sdk-go/aws/request"
|
||||
"github.com/aws/aws-sdk-go/service/ec2"
|
||||
"github.com/hashicorp/packer/common/random"
|
||||
"github.com/hashicorp/packer/common/retry"
|
||||
|
@ -278,23 +280,39 @@ func (s *StepRunSpotInstance) Run(ctx context.Context, state multistep.StateBag)
|
|||
Type: aws.String("instant"),
|
||||
}
|
||||
|
||||
var createOutput *ec2.CreateFleetOutput
|
||||
|
||||
err = retry.Config{
|
||||
Tries: 11,
|
||||
ShouldRetry: func(err error) bool {
|
||||
if strings.Contains(err.Error(), "Invalid IAM Instance Profile name") {
|
||||
// eventual consistency of the profile. PutRolePolicy &
|
||||
// AddRoleToInstanceProfile are eventually consistent and once
|
||||
// we can wait on those operations, this can be removed.
|
||||
return true
|
||||
}
|
||||
return request.IsErrorRetryable(err)
|
||||
},
|
||||
RetryDelay: (&retry.Backoff{InitialBackoff: 500 * time.Millisecond, MaxBackoff: 30 * time.Second, Multiplier: 2}).Linear,
|
||||
}.Run(ctx, func(ctx context.Context) error {
|
||||
createOutput, err = ec2conn.CreateFleet(createFleetInput)
|
||||
|
||||
if err == nil && createOutput.Errors != nil {
|
||||
err = fmt.Errorf("errors: %v", createOutput.Errors)
|
||||
}
|
||||
if err != nil {
|
||||
log.Printf("create request failed %v", err)
|
||||
}
|
||||
return err
|
||||
})
|
||||
|
||||
// Create the request for the spot instance.
|
||||
req, createOutput := ec2conn.CreateFleetRequest(createFleetInput)
|
||||
ui.Message(fmt.Sprintf("Sending spot request (%s)...", req.RequestID))
|
||||
// Actually send the spot connection request.
|
||||
err = req.Send()
|
||||
if err != nil {
|
||||
if createOutput.FleetId != nil {
|
||||
err = fmt.Errorf("Error waiting for fleet request (%s): %s", *createOutput.FleetId, err)
|
||||
} else {
|
||||
err = fmt.Errorf("Error waiting for fleet request: %s", err)
|
||||
}
|
||||
state.Put("error", err)
|
||||
ui.Error(err.Error())
|
||||
return multistep.ActionHalt
|
||||
}
|
||||
|
||||
if len(createOutput.Instances) == 0 {
|
||||
// We can end up with errors because one of the allowed availability
|
||||
// zones doesn't have one of the allowed instance types; as long as
|
||||
// an instance is launched, these errors aren't important.
|
||||
|
@ -308,6 +326,9 @@ func (s *StepRunSpotInstance) Run(ctx context.Context, state multistep.StateBag)
|
|||
ui.Error(err.Error())
|
||||
return multistep.ActionHalt
|
||||
}
|
||||
state.Put("error", err)
|
||||
ui.Error(err.Error())
|
||||
return multistep.ActionHalt
|
||||
}
|
||||
|
||||
instanceId = *createOutput.Instances[0].InstanceIds[0]
|
||||
|
|
Loading…
Reference in New Issue