amazon-bedrock-agentcore-sa.../02-use-cases/SRE-agent/sre_agent/graph_builder.py

#!/usr/bin/env python3

import logging
from typing import Any, Dict, List, Literal

from langchain_core.messages import HumanMessage
from langchain_core.tools import BaseTool
from langgraph.graph import END, StateGraph

from .agent_nodes import (
    create_kubernetes_agent,
    create_logs_agent,
    create_metrics_agent,
    create_runbooks_agent,
)
from .agent_state import AgentState
from .supervisor import SupervisorAgent

# Configure logging with basicConfig
logging.basicConfig(
    level=logging.INFO,  # Set the log level to INFO
    # Define log message format
    format="%(asctime)s,p%(process)s,{%(filename)s:%(lineno)d},%(levelname)s,%(message)s",
)

logger = logging.getLogger(__name__)


def _should_continue(state: AgentState) -> Literal["supervisor", "FINISH"]:
    """Determine if we should continue or finish."""
    next_agent = state.get("next", "FINISH")

    if next_agent == "FINISH":
        return "FINISH"

    # Check if we've already invoked this agent (avoid loops)
    agents_invoked = state.get("agents_invoked", [])
    if next_agent in agents_invoked and not state.get("requires_collaboration", False):
        logger.warning(f"Agent {next_agent} already invoked, finishing to avoid loop")
        return "FINISH"

    return "supervisor"


def _route_supervisor(state: AgentState) -> str:
    """Route from supervisor to the appropriate agent or finish."""
    next_agent = state.get("next", "FINISH")

    if next_agent == "FINISH":
        return "aggregate"

    # Map to actual node names
    agent_map = {
        "kubernetes": "kubernetes_agent",
        "logs": "logs_agent",
        "metrics": "metrics_agent",
        "runbooks": "runbooks_agent",
    }

    return agent_map.get(next_agent, "aggregate")


async def _prepare_initial_state(state: AgentState) -> Dict[str, Any]:
    """Prepare the initial state with the user's query."""
    messages = state.get("messages", [])

    # Extract the current query from the last human message
    current_query = ""
    for msg in reversed(messages):
        if isinstance(msg, HumanMessage):
            current_query = msg.content
            break

    return {
        "current_query": current_query,
        "agent_results": {},
        "agents_invoked": [],
        "requires_collaboration": False,
        "metadata": {},
    }


def build_multi_agent_graph(
    tools: List[BaseTool], llm_provider: str = "bedrock", **llm_kwargs
) -> StateGraph:
    """Build the multi-agent collaboration graph.

    Args:
        tools: List of all available tools
        llm_provider: LLM provider to use
        **llm_kwargs: Additional arguments for LLM

    Returns:
        Compiled StateGraph for multi-agent collaboration
    """
    logger.info("Building multi-agent collaboration graph")

    # Create the state graph
    workflow = StateGraph(AgentState)

    # Create supervisor
    supervisor = SupervisorAgent(llm_provider=llm_provider, **llm_kwargs)

    # Create agent nodes with filtered tools
    kubernetes_agent = create_kubernetes_agent(
        tools, llm_provider=llm_provider, **llm_kwargs
    )
    logs_agent = create_logs_agent(tools, llm_provider=llm_provider, **llm_kwargs)
    metrics_agent = create_metrics_agent(tools, llm_provider=llm_provider, **llm_kwargs)
    runbooks_agent = create_runbooks_agent(
        tools, llm_provider=llm_provider, **llm_kwargs
    )

    # Add nodes to the graph
    workflow.add_node("prepare", _prepare_initial_state)
    workflow.add_node("supervisor", supervisor.route)
    workflow.add_node("kubernetes_agent", kubernetes_agent)
    workflow.add_node("logs_agent", logs_agent)
    workflow.add_node("metrics_agent", metrics_agent)
    workflow.add_node("runbooks_agent", runbooks_agent)
    workflow.add_node("aggregate", supervisor.aggregate_responses)

    # Set entry point
    workflow.set_entry_point("prepare")

    # Add edges from prepare to supervisor
    workflow.add_edge("prepare", "supervisor")

    # Add conditional edges from supervisor
    workflow.add_conditional_edges(
        "supervisor",
        _route_supervisor,
        {
            "kubernetes_agent": "kubernetes_agent",
            "logs_agent": "logs_agent",
            "metrics_agent": "metrics_agent",
            "runbooks_agent": "runbooks_agent",
            "aggregate": "aggregate",
        },
    )

    # Add edges from agents back to supervisor
    workflow.add_edge("kubernetes_agent", "supervisor")
    workflow.add_edge("logs_agent", "supervisor")
    workflow.add_edge("metrics_agent", "supervisor")
    workflow.add_edge("runbooks_agent", "supervisor")

    # Add edge from aggregate to END
    workflow.add_edge("aggregate", END)

    # Compile the graph
    compiled_graph = workflow.compile()

    logger.info("Multi-agent collaboration graph built successfully")
    return compiled_graph
feat(agentcore): Adding tutorials, samples, and integrations 2025-07-16 14:07:30 -04:00			`#!/usr/bin/env python3`

			`import logging`
			`from typing import Any, Dict, List, Literal`

			`from langchain_core.messages import HumanMessage`
			`from langchain_core.tools import BaseTool`
			`from langgraph.graph import END, StateGraph`

			`from .agent_nodes import (`
			`create_kubernetes_agent,`
			`create_logs_agent,`
			`create_metrics_agent,`
			`create_runbooks_agent,`
			`)`
			`from .agent_state import AgentState`
			`from .supervisor import SupervisorAgent`

			`# Configure logging with basicConfig`
			`logging.basicConfig(`
			`level=logging.INFO, # Set the log level to INFO`
			`# Define log message format`
			`format="%(asctime)s,p%(process)s,{%(filename)s:%(lineno)d},%(levelname)s,%(message)s",`
			`)`

			`logger = logging.getLogger(__name__)`


			`def _should_continue(state: AgentState) -> Literal["supervisor", "FINISH"]:`
			`"""Determine if we should continue or finish."""`
			`next_agent = state.get("next", "FINISH")`

			`if next_agent == "FINISH":`
			`return "FINISH"`

			`# Check if we've already invoked this agent (avoid loops)`
			`agents_invoked = state.get("agents_invoked", [])`
			`if next_agent in agents_invoked and not state.get("requires_collaboration", False):`
			`logger.warning(f"Agent {next_agent} already invoked, finishing to avoid loop")`
			`return "FINISH"`

			`return "supervisor"`


			`def _route_supervisor(state: AgentState) -> str:`
			`"""Route from supervisor to the appropriate agent or finish."""`
			`next_agent = state.get("next", "FINISH")`

			`if next_agent == "FINISH":`
			`return "aggregate"`

			`# Map to actual node names`
			`agent_map = {`
			`"kubernetes": "kubernetes_agent",`
			`"logs": "logs_agent",`
			`"metrics": "metrics_agent",`
			`"runbooks": "runbooks_agent",`
			`}`

			`return agent_map.get(next_agent, "aggregate")`


			`async def _prepare_initial_state(state: AgentState) -> Dict[str, Any]:`
			`"""Prepare the initial state with the user's query."""`
			`messages = state.get("messages", [])`

			`# Extract the current query from the last human message`
			`current_query = ""`
			`for msg in reversed(messages):`
			`if isinstance(msg, HumanMessage):`
			`current_query = msg.content`
			`break`

			`return {`
			`"current_query": current_query,`
			`"agent_results": {},`
			`"agents_invoked": [],`
			`"requires_collaboration": False,`
			`"metadata": {},`
			`}`


			`def build_multi_agent_graph(`
fix(SRE Agent)- Deploy SRE Agent on Amazon Bedrock AgentCore Runtime with Enhanced Architecture (#158) * feat: Deploy SRE agent on Amazon Bedrock AgentCore Runtime - Add agent_runtime.py with FastAPI endpoints for AgentCore compatibility - Create Dockerfile for ARM64-based containerization - Add deployment scripts for automated ECR push and AgentCore deployment - Update backend API URLs from placeholders to actual endpoints - Update gateway configuration for production use - Add dependencies for AgentCore runtime support Implements #143 * chore: Add deployment artifacts to .gitignore - Add deployment/.sre_agent_uri, deployment/.env, and deployment/.agent_arn to .gitignore - Remove already tracked deployment artifacts from git * feat: Make ANTHROPIC_API_KEY optional in deployment - Update deploy_agent_runtime.py to conditionally include ANTHROPIC_API_KEY - Show info message when using Amazon Bedrock as provider - Update .env.example to clarify ANTHROPIC_API_KEY is optional - Only include ANTHROPIC_API_KEY in environment variables if it exists * fix: Use uv run python instead of python in build script - Update build_and_deploy.sh to use 'uv run python' for deployment - Change to parent directory to ensure uv environment is available - Fixes 'python: command not found' error during deployment * refactor: Improve deployment script structure and create .env symlink - Flatten nested if-else blocks in deploy_agent_runtime.py for better readability - Add 10-second sleep after deletion to ensure cleanup completes - Create symlink from deployment/.env to sre_agent/.env to avoid duplication - Move time import to top of file with other imports * feat: Add debug mode support and comprehensive deployment guide Add --debug command line flag and DEBUG environment variable support: - Created shared logging configuration module - Updated CLI and runtime to support --debug flag - Made debug traces conditional on DEBUG environment variable - Added debug mode for container and AgentCore deployments Enhanced build and deployment script: - Added command line argument for ECR repository name - Added help documentation and usage examples - Added support for local builds (x86_64) vs AgentCore builds (arm64) - Added environment variable pass-through for DEBUG, LLM_PROVIDER, ANTHROPIC_API_KEY Created comprehensive deployment guide: - Step-by-step instructions from local testing to production - Docker platform documentation (x86_64 vs arm64) - Environment variable configuration with .env file usage - Debug mode examples and troubleshooting guide - Provider configuration for Bedrock and Anthropic Updated README with AgentCore Runtime deployment section and documentation links. * docs: Update SRE Agent README with deployment flow diagram and fix directory reference - Fix reference from 04-SRE-agent to SRE-agent in README - Add comprehensive flowchart showing development to production deployment flow - Update overview to mention Amazon Bedrock AgentCore Runtime deployment - Remove emojis from documentation for professional appearance * docs: Replace mermaid diagram with ASCII step-by-step flow diagram - Change from block-style mermaid diagram to ASCII flow diagram - Show clear step-by-step progression from development to production - Improve readability with structured boxes and arrows - Minor text improvements for clarity * feat: Implement comprehensive prompt management system and enhance deployment guide - Create centralized prompt template system with external files in config/prompts/ - Add PromptLoader utility class with LRU caching and template variable substitution - Integrate PromptConfig into SREConstants for centralized configuration management - Update all agents (nodes, supervisor, output_formatter) to use prompt loader - Replace 150+ lines of hardcoded prompts with modular, maintainable template system - Enhance deployment guide with consistent naming (my_custom_sre_agent) throughout - Add quick-start copy-paste command sequence for streamlined deployment - Improve constants system with comprehensive model, AWS, timeout, and prompt configs - Add architectural assessment document to .gitignore for local analysis - Run black formatting across all updated Python files * docs: Consolidate deployment and security documentation - Rename deployment-and-security.md to security.md and remove redundant deployment content - Enhance security.md with comprehensive production security guidelines including: - Authentication and authorization best practices - Encryption and data protection requirements - Operational security monitoring and logging - Input validation and prompt security measures - Infrastructure security recommendations - Compliance and governance frameworks - Update README.md to reference new security.md file - Eliminate redundancy between deployment-guide.md and deployment-and-security.md - Improve documentation organization with clear separation of concerns * config: Replace hardcoded endpoints with placeholder domains - Update OpenAPI specifications to use placeholder domain 'your-backend-domain.com' - k8s_api.yaml: mcpgateway.ddns.net:8011 -> your-backend-domain.com:8011 - logs_api.yaml: mcpgateway.ddns.net:8012 -> your-backend-domain.com:8012 - metrics_api.yaml: mcpgateway.ddns.net:8013 -> your-backend-domain.com:8013 - runbooks_api.yaml: mcpgateway.ddns.net:8014 -> your-backend-domain.com:8014 - Update agent configuration to use placeholder AgentCore gateway endpoint - agent_config.yaml: Replace specific gateway ID with 'your-agentcore-gateway-endpoint' - Improve security by removing hardcoded production endpoints from repository - Enable template-based configuration that users can customize during setup - Align with existing documentation patterns for placeholder domain replacement 2025-07-27 15:05:03 -04:00			`tools: List[BaseTool], llm_provider: str = "bedrock", **llm_kwargs`
feat(agentcore): Adding tutorials, samples, and integrations 2025-07-16 14:07:30 -04:00			`) -> StateGraph:`
			`"""Build the multi-agent collaboration graph.`

			`Args:`
			`tools: List of all available tools`
			`llm_provider: LLM provider to use`
			`**llm_kwargs: Additional arguments for LLM`

			`Returns:`
			`Compiled StateGraph for multi-agent collaboration`
			`"""`
			`logger.info("Building multi-agent collaboration graph")`

			`# Create the state graph`
			`workflow = StateGraph(AgentState)`

			`# Create supervisor`
			`supervisor = SupervisorAgent(llm_provider=llm_provider, **llm_kwargs)`

			`# Create agent nodes with filtered tools`
			`kubernetes_agent = create_kubernetes_agent(`
			`tools, llm_provider=llm_provider, **llm_kwargs`
			`)`
			`logs_agent = create_logs_agent(tools, llm_provider=llm_provider, **llm_kwargs)`
			`metrics_agent = create_metrics_agent(tools, llm_provider=llm_provider, **llm_kwargs)`
			`runbooks_agent = create_runbooks_agent(`
			`tools, llm_provider=llm_provider, **llm_kwargs`
			`)`

			`# Add nodes to the graph`
			`workflow.add_node("prepare", _prepare_initial_state)`
			`workflow.add_node("supervisor", supervisor.route)`
			`workflow.add_node("kubernetes_agent", kubernetes_agent)`
			`workflow.add_node("logs_agent", logs_agent)`
			`workflow.add_node("metrics_agent", metrics_agent)`
			`workflow.add_node("runbooks_agent", runbooks_agent)`
			`workflow.add_node("aggregate", supervisor.aggregate_responses)`

			`# Set entry point`
			`workflow.set_entry_point("prepare")`

			`# Add edges from prepare to supervisor`
			`workflow.add_edge("prepare", "supervisor")`

			`# Add conditional edges from supervisor`
			`workflow.add_conditional_edges(`
			`"supervisor",`
			`_route_supervisor,`
			`{`
			`"kubernetes_agent": "kubernetes_agent",`
			`"logs_agent": "logs_agent",`
			`"metrics_agent": "metrics_agent",`
			`"runbooks_agent": "runbooks_agent",`
			`"aggregate": "aggregate",`
			`},`
			`)`

			`# Add edges from agents back to supervisor`
			`workflow.add_edge("kubernetes_agent", "supervisor")`
			`workflow.add_edge("logs_agent", "supervisor")`
			`workflow.add_edge("metrics_agent", "supervisor")`
			`workflow.add_edge("runbooks_agent", "supervisor")`

			`# Add edge from aggregate to END`
			`workflow.add_edge("aggregate", END)`

			`# Compile the graph`
			`compiled_graph = workflow.compile()`

			`logger.info("Multi-agent collaboration graph built successfully")`
			`return compiled_graph`