mirror of
https://github.com/awslabs/amazon-bedrock-agentcore-samples.git
synced 2025-09-08 20:50:46 +00:00
* Add multi-region support for SRE-Agent - Add AWS region configuration parameter to agent_config.yaml - Update gateway main.py to validate region matches endpoint URL - Modify SRE agent to read region from config and pass through function chain - Update memory client and LLM creation to use configurable region - Fixes hardcoded us-east-1 region dependencies Closes #245 * Move architecture file to docs/ and improve setup instructions - Move sre_agent_architecture.md to docs/ folder for better organization - Update graph export code to generate architecture file in docs/ folder - Add automatic docs directory creation if it doesn't exist - Improve README setup instructions: - Fix .env.example copy path to use sre_agent folder - Add note that Amazon Bedrock users don't need to modify .env - Add START_API_BACKEND variable to conditionally start backend servers - Useful for workshop environments where backends are already running * Improve gateway configuration documentation and setup instructions - Update config.yaml.example to use REGION placeholder instead of hardcoded us-east-1 - Add gateway configuration step to README setup instructions - Document .cognito_config file in auth.md automated setup section - Remove duplicate credential_provider_name from config.yaml.example - Update configuration.md to include .cognito_config in files overview - Add clear instructions to copy and edit gateway/config.yaml before creating gateway * Improve IAM role guidance and region handling - Add clear guidance about IAM role options in gateway/config.yaml.example - Explain that testing can use current EC2/notebook role - Recommend dedicated role for production deployments - Add aws sts get-caller-identity command to help users find their role - Update deployment scripts to use AWS_REGION env var as fallback - Scripts now follow: CLI arg -> AWS_REGION env var -> us-east-1 default * Remove unnecessary individual Cognito ID files - Remove creation of .cognito_user_pool_id file - Remove creation of .cognito_client_id file - Keep only .cognito_config as the single source of truth - Simplifies configuration management * Implement region fallback logic for SRE Agent - Added region fallback chain: agent_config.yaml -> AWS_REGION env -> us-east-1 - Modified agent_config.yaml to comment out region parameter to enable fallback - Updated multi_agent_langgraph.py with comprehensive fallback implementation - Added logging to show which region source is being used - Ensures flexible region configuration without breaking existing deployments - Maintains backward compatibility while adding multi-region support
307 lines
11 KiB
Python
307 lines
11 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import os
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import boto3
|
|
from botocore.exceptions import ClientError
|
|
from dotenv import load_dotenv
|
|
|
|
# Configuration constants
|
|
DELETION_WAIT_TIME = 150 # seconds to wait after runtime deletion before recreating
|
|
|
|
# Configure logging with basicConfig
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
# Define log message format
|
|
format="%(asctime)s,p%(process)s,{%(filename)s:%(lineno)d},%(levelname)s,%(message)s",
|
|
)
|
|
|
|
|
|
def _write_agent_arn_to_file(agent_arn: str, output_dir: str = None) -> None:
|
|
"""Write agent ARN to .agent_arn file."""
|
|
if output_dir is None:
|
|
output_dir = Path(__file__).parent
|
|
else:
|
|
output_dir = Path(output_dir)
|
|
|
|
arn_file = output_dir / ".agent_arn"
|
|
|
|
try:
|
|
with open(arn_file, "w") as f:
|
|
f.write(agent_arn)
|
|
logging.info(f"💾 Agent Runtime ARN saved to {arn_file}")
|
|
except Exception as e:
|
|
logging.error(f"Failed to write agent ARN to file: {e}")
|
|
|
|
|
|
def _get_agent_runtime_id_by_name(client: boto3.client, runtime_name: str) -> str:
|
|
"""Get agent runtime ID by name."""
|
|
try:
|
|
response = client.list_agent_runtimes()
|
|
agent_runtimes = response.get("agentRuntimes", [])
|
|
|
|
for runtime in agent_runtimes:
|
|
if runtime["agentRuntimeName"] == runtime_name:
|
|
return runtime["agentRuntimeId"]
|
|
|
|
return None
|
|
|
|
except ClientError as e:
|
|
logging.error(f"Failed to get agent runtime ID: {e}")
|
|
return None
|
|
|
|
|
|
def _delete_agent_runtime(client: boto3.client, runtime_id: str) -> bool:
|
|
"""Delete an agent runtime by ID."""
|
|
try:
|
|
logging.info(f"Deleting agent runtime with ID: {runtime_id}")
|
|
client.delete_agent_runtime(agentRuntimeId=runtime_id)
|
|
logging.info("Agent runtime deleted successfully")
|
|
return True
|
|
|
|
except ClientError as e:
|
|
logging.error(f"Failed to delete agent runtime: {e}")
|
|
return False
|
|
|
|
|
|
def _list_existing_agent_runtimes(client: boto3.client) -> None:
|
|
"""List all existing agent runtimes."""
|
|
try:
|
|
response = client.list_agent_runtimes()
|
|
agent_runtimes = response.get("agentRuntimes", [])
|
|
|
|
if not agent_runtimes:
|
|
logging.info("No existing agent runtimes found.")
|
|
return
|
|
|
|
logging.info("Existing agent runtimes:")
|
|
for runtime in agent_runtimes:
|
|
logging.info(json.dumps(runtime, indent=2, default=str))
|
|
|
|
except ClientError as e:
|
|
logging.error(f"Failed to list agent runtimes: {e}")
|
|
|
|
|
|
def _create_agent_runtime(
|
|
client: boto3.client,
|
|
runtime_name: str,
|
|
container_uri: str,
|
|
role_arn: str,
|
|
anthropic_api_key: str,
|
|
gateway_access_token: str,
|
|
llm_provider: str = "bedrock",
|
|
force_recreate: bool = False,
|
|
) -> None:
|
|
"""Create an agent runtime with error handling for conflicts."""
|
|
# Build environment variables
|
|
env_vars = {
|
|
"GATEWAY_ACCESS_TOKEN": gateway_access_token,
|
|
"LLM_PROVIDER": llm_provider,
|
|
}
|
|
|
|
# Only add ANTHROPIC_API_KEY if it exists
|
|
if anthropic_api_key:
|
|
env_vars["ANTHROPIC_API_KEY"] = anthropic_api_key
|
|
|
|
# Check for DEBUG environment variable
|
|
debug_mode = os.getenv("DEBUG", "false")
|
|
if debug_mode.lower() in ("true", "1", "yes"):
|
|
env_vars["DEBUG"] = "true"
|
|
logging.info("Debug mode enabled for agent runtime")
|
|
|
|
# Log environment variables being passed to AgentCore (mask sensitive data)
|
|
logging.info("🚀 Environment variables being passed to AgentCore Runtime:")
|
|
for key, value in env_vars.items():
|
|
if key in ["ANTHROPIC_API_KEY", "GATEWAY_ACCESS_TOKEN"]:
|
|
masked_value = f"{'*' * 20}...{value[-8:] if len(value) > 8 else '***'}"
|
|
logging.info(f" {key}: {masked_value}")
|
|
else:
|
|
logging.info(f" {key}: {value}")
|
|
try:
|
|
response = client.create_agent_runtime(
|
|
agentRuntimeName=runtime_name,
|
|
agentRuntimeArtifact={
|
|
"containerConfiguration": {"containerUri": container_uri}
|
|
},
|
|
networkConfiguration={"networkMode": "PUBLIC"},
|
|
roleArn=role_arn,
|
|
environmentVariables=env_vars,
|
|
)
|
|
|
|
logging.info("Agent Runtime created successfully!")
|
|
logging.info(f"Agent Runtime ARN: {response['agentRuntimeArn']}")
|
|
logging.info(f"Status: {response['status']}")
|
|
_write_agent_arn_to_file(response["agentRuntimeArn"])
|
|
|
|
except ClientError as e:
|
|
error_code = e.response.get("Error", {}).get("Code", "")
|
|
|
|
# Handle non-conflict errors immediately
|
|
if error_code != "ConflictException":
|
|
logging.error(f"Failed to create agent runtime: {e}")
|
|
raise
|
|
|
|
# Handle conflict - runtime already exists
|
|
logging.error(f"Agent runtime '{runtime_name}' already exists.")
|
|
logging.info("Listing existing agent runtimes:")
|
|
_list_existing_agent_runtimes(client)
|
|
|
|
# If not forcing recreate, provide guidance and exit
|
|
if not force_recreate:
|
|
logging.info(
|
|
"Please retry with a new agent name using the --runtime-name parameter, or use --force-recreate to delete and recreate."
|
|
)
|
|
return
|
|
|
|
# Handle force recreate scenario
|
|
logging.info(
|
|
"Force recreate requested, attempting to delete existing runtime..."
|
|
)
|
|
runtime_id = _get_agent_runtime_id_by_name(client, runtime_name)
|
|
|
|
if not runtime_id:
|
|
logging.error(f"Could not find runtime ID for '{runtime_name}'")
|
|
return
|
|
|
|
if not _delete_agent_runtime(client, runtime_id):
|
|
logging.error("Failed to delete existing runtime")
|
|
return
|
|
|
|
# Wait for deletion to complete
|
|
logging.info(
|
|
f"Waiting {DELETION_WAIT_TIME} seconds for deletion to complete..."
|
|
)
|
|
time.sleep(DELETION_WAIT_TIME)
|
|
|
|
# Recreate the runtime after successful deletion
|
|
logging.info("Attempting to recreate agent runtime...")
|
|
try:
|
|
response = client.create_agent_runtime(
|
|
agentRuntimeName=runtime_name,
|
|
agentRuntimeArtifact={
|
|
"containerConfiguration": {"containerUri": container_uri}
|
|
},
|
|
networkConfiguration={"networkMode": "PUBLIC"},
|
|
roleArn=role_arn,
|
|
environmentVariables=env_vars,
|
|
)
|
|
except ClientError as e:
|
|
if e.response["Error"]["Code"] == "ConflictException":
|
|
logging.error("\n" + "=" * 70)
|
|
logging.error("⚠️ AGENT NAME CONFLICT - AWS CLEANUP STILL IN PROGRESS")
|
|
logging.error("=" * 70)
|
|
logging.error(
|
|
f"Even after waiting {DELETION_WAIT_TIME} seconds, the agent name"
|
|
)
|
|
logging.error(f"'{runtime_name}' is still not available.")
|
|
logging.error("")
|
|
logging.error(
|
|
"This is an AWS internal cleanup delay. Please try one of:"
|
|
)
|
|
logging.error("1. Wait 1-2 more minutes and run the script again")
|
|
logging.error("2. Use a different agent name (e.g., add a timestamp)")
|
|
logging.error(f" ./deployment/build_and_deploy.sh {runtime_name}_v2")
|
|
logging.error("=" * 70)
|
|
print(
|
|
"\n⚠️ Please wait 1-2 minutes for AWS to complete agent deletion,"
|
|
)
|
|
print(" then try running the deployment script again.")
|
|
raise
|
|
|
|
logging.info("Agent Runtime recreated successfully!")
|
|
logging.info(f"Agent Runtime ARN: {response['agentRuntimeArn']}")
|
|
logging.info(f"Status: {response['status']}")
|
|
_write_agent_arn_to_file(response["agentRuntimeArn"])
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Deploy SRE Agent to AgentCore Runtime"
|
|
)
|
|
parser.add_argument(
|
|
"--runtime-name",
|
|
default="sre-agent",
|
|
help="Name for the agent runtime (default: sre-agent)",
|
|
)
|
|
parser.add_argument(
|
|
"--container-uri",
|
|
required=True,
|
|
help="Container URI (e.g., account-id.dkr.ecr.us-west-2.amazonaws.com/my-agent:latest)",
|
|
)
|
|
parser.add_argument(
|
|
"--role-arn", required=True, help="IAM role ARN for the agent runtime"
|
|
)
|
|
parser.add_argument(
|
|
"--region",
|
|
default=os.environ.get("AWS_REGION", "us-east-1"),
|
|
help="AWS region (default: AWS_REGION env var or us-east-1)"
|
|
)
|
|
parser.add_argument(
|
|
"--force-recreate",
|
|
action="store_true",
|
|
help="Delete existing runtime if it exists and recreate it",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Load environment variables from .env file
|
|
script_dir = Path(__file__).parent
|
|
env_file = script_dir / ".env"
|
|
|
|
if env_file.exists():
|
|
load_dotenv(env_file)
|
|
logging.info(f"Loaded environment variables from {env_file}")
|
|
else:
|
|
logging.error(f".env file not found at {env_file}")
|
|
raise FileNotFoundError(
|
|
f"Please create a .env file at {env_file} with GATEWAY_ACCESS_TOKEN and optionally ANTHROPIC_API_KEY"
|
|
)
|
|
|
|
# Get environment variables
|
|
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
|
|
gateway_access_token = os.getenv("GATEWAY_ACCESS_TOKEN")
|
|
llm_provider = os.getenv("LLM_PROVIDER", "bedrock")
|
|
|
|
# Log environment variable values (mask sensitive data)
|
|
logging.info("📋 Environment variables loaded:")
|
|
logging.info(f" LLM_PROVIDER: {llm_provider}")
|
|
if anthropic_api_key:
|
|
logging.info(
|
|
f" ANTHROPIC_API_KEY: {'*' * 20}...{anthropic_api_key[-8:] if len(anthropic_api_key) > 8 else '***'}"
|
|
)
|
|
else:
|
|
logging.info(
|
|
" ANTHROPIC_API_KEY: Not set - Amazon Bedrock will be used as the provider"
|
|
)
|
|
|
|
if gateway_access_token:
|
|
logging.info(
|
|
f" GATEWAY_ACCESS_TOKEN: {'*' * 20}...{gateway_access_token[-8:] if len(gateway_access_token) > 8 else '***'}"
|
|
)
|
|
|
|
if not gateway_access_token:
|
|
logging.error("GATEWAY_ACCESS_TOKEN not found in .env")
|
|
raise ValueError("GATEWAY_ACCESS_TOKEN must be set in .env")
|
|
|
|
client = boto3.client("bedrock-agentcore-control", region_name=args.region)
|
|
|
|
_create_agent_runtime(
|
|
client=client,
|
|
runtime_name=args.runtime_name,
|
|
container_uri=args.container_uri,
|
|
role_arn=args.role_arn,
|
|
anthropic_api_key=anthropic_api_key,
|
|
gateway_access_token=gateway_access_token,
|
|
llm_provider=llm_provider,
|
|
force_recreate=args.force_recreate,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|