retry spot instance creation when an "Invalid IAM Instance Profile name" error pops up (#9810)

PutRolePolicy & AddRoleToInstanceProfile are eventually consistent but it is not possible to wait for them to be done here: 0785c2f6fc/builder/amazon/common/step_iam_instance_profile.go (L117-L134) which was causing the `CreateFleet` to fail (100% for me). So for now we retry a bit later. Waiting 5 seconds after the previously linked code also fixed this.

Test file:

```json
{
	"builders": [
		{
			"type": "amazon-ebs",
			"region": "eu-west-1",
			"ami_name": "ubuntu-16.04 test {{timestamp}}",
			"ami_description": "Ubuntu 16.04 LTS - expand root partition",
			"source_ami_filter": {
				"filters": {
					"virtualization-type": "hvm",
					"name": "ubuntu/images/*/ubuntu-xenial-16.04-amd64-server-*",
					"root-device-type": "ebs"
				},
				"owners": [
					"099720109477"
				],
				"most_recent": true
			},
			"spot_price": "0.03",
			"spot_instance_types": [
				"t2.small"
			],
			"encrypt_boot": true,
			"ssh_username": "ubuntu",
			"ssh_interface": "session_manager",
			"temporary_iam_instance_profile_policy_document": {
				"Version": "2012-10-17",
				"Statement": [
					{
						"Effect": "Allow",
						"Action": [
							"*"
						],
						"Resource": "*"
					}
				]
			},
			"communicator": "ssh"
		}
]}
```
This commit is contained in:
Adrien Delorme 2020-08-25 10:10:32 +02:00 committed by GitHub
parent 1252658848
commit a0c09e85df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 31 additions and 10 deletions

View File

@ -6,9 +6,11 @@ import (
"fmt"
"io/ioutil"
"log"
"strings"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/request"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/hashicorp/packer/common/random"
"github.com/hashicorp/packer/common/retry"
@ -278,23 +280,39 @@ func (s *StepRunSpotInstance) Run(ctx context.Context, state multistep.StateBag)
Type: aws.String("instant"),
}
var createOutput *ec2.CreateFleetOutput
err = retry.Config{
Tries: 11,
ShouldRetry: func(err error) bool {
if strings.Contains(err.Error(), "Invalid IAM Instance Profile name") {
// eventual consistency of the profile. PutRolePolicy &
// AddRoleToInstanceProfile are eventually consistent and once
// we can wait on those operations, this can be removed.
return true
}
return request.IsErrorRetryable(err)
},
RetryDelay: (&retry.Backoff{InitialBackoff: 500 * time.Millisecond, MaxBackoff: 30 * time.Second, Multiplier: 2}).Linear,
}.Run(ctx, func(ctx context.Context) error {
createOutput, err = ec2conn.CreateFleet(createFleetInput)
if err == nil && createOutput.Errors != nil {
err = fmt.Errorf("errors: %v", createOutput.Errors)
}
if err != nil {
log.Printf("create request failed %v", err)
}
return err
})
// Create the request for the spot instance.
req, createOutput := ec2conn.CreateFleetRequest(createFleetInput)
ui.Message(fmt.Sprintf("Sending spot request (%s)...", req.RequestID))
// Actually send the spot connection request.
err = req.Send()
if err != nil {
if createOutput.FleetId != nil {
err = fmt.Errorf("Error waiting for fleet request (%s): %s", *createOutput.FleetId, err)
} else {
err = fmt.Errorf("Error waiting for fleet request: %s", err)
}
state.Put("error", err)
ui.Error(err.Error())
return multistep.ActionHalt
}
if len(createOutput.Instances) == 0 {
// We can end up with errors because one of the allowed availability
// zones doesn't have one of the allowed instance types; as long as
// an instance is launched, these errors aren't important.
@ -308,6 +326,9 @@ func (s *StepRunSpotInstance) Run(ctx context.Context, state multistep.StateBag)
ui.Error(err.Error())
return multistep.ActionHalt
}
state.Put("error", err)
ui.Error(err.Error())
return multistep.ActionHalt
}
instanceId = *createOutput.Instances[0].InstanceIds[0]