mirror of
https://github.com/awslabs/amazon-bedrock-agentcore-samples.git
synced 2025-09-08 20:50:46 +00:00
* Add missing credential_provider_name parameter to config.yaml.example * Fix get_config function to properly parse YAML values with inline comments * Enhanced get_config to prevent copy-paste whitespace errors in AWS identifiers * Improve LLM provider configuration and error handling with bedrock as default * Add OpenAPI templating system and fix hardcoded regions * Add backend template build to Readme * delete old yaml files * Fix Cognito setup with automation script and missing domain creation steps * docs: Add EC2 instance port configuration documentation - Document required inbound ports (443, 8011-8014) - Include SSL/TLS security requirements - Add AWS security group best practices - Provide port usage summary table * docs: Add hyperlinks to prerequisites in README - Link EC2 port configuration documentation - Link IAM role authentication setup - Improve navigation to detailed setup instructions * docs: Add BACKEND_API_KEY to configuration documentation - Document gateway environment variables section - Add BACKEND_API_KEY requirement for credential provider - Include example .env file format for gateway directory - Explain usage in create_gateway.sh script * docs: Add BACKEND_API_KEY to deployment guide environment variables - Include BACKEND_API_KEY in environment variables reference table - Mark as required for gateway setup - Provide quick reference alongside other required variables * docs: Add BedrockAgentCoreFullAccess policy and trust policy documentation - Document AWS managed policy BedrockAgentCoreFullAccess - Add trust policy requirements for bedrock-agentcore.amazonaws.com - Reorganize IAM permissions for better clarity - Remove duplicate trust policy section - Add IAM role requirement to deployment prerequisites * docs: Document role_name field in gateway config example - Explain that role_name is used to create and manage the gateway - Specify BedrockAgentCoreFullAccess policy requirement - Note trust policy requirement for bedrock-agentcore.amazonaws.com - Improve clarity for gateway configuration setup * docs: Add AWS IP address ranges for production security enhancement - Document AWS IP ranges JSON download for restricting access - Reference official AWS documentation for IP address ranges - Provide security alternatives to 0.0.0.0/0 for production - Include examples of restricted security group configurations - Enable egress filtering and region-specific access control * style: Format Python code with black - Reformat 14 Python files for consistent code style - Apply PEP 8 formatting standards - Improve code readability and maintainability * docs: Update SRE agent prerequisites and setup documentation - Convert prerequisites section to markdown table format - Add SSL certificate provider examples (no-ip.com, letsencrypt.org) - Add Identity Provider (IDP) requirement with setup_cognito.sh reference - Clarify that all prerequisites must be completed before setup - Add reference to domain name and cert paths needed for BACKEND_DOMAIN - Remove Managing OpenAPI Specifications section (covered in use-case setup) - Add Deployment Guide link to Development to Production section Addresses issues #171 and #174 * fix: Replace 'AWS Bedrock' with 'Amazon Bedrock' in SRE agent files - Updated error messages in llm_utils.py - Updated comments in both .env.example files - Ensures consistent naming convention across SRE agent codebase --------- Co-authored-by: dheerajoruganty <dheo@amazon.com> Co-authored-by: Amit Arora <aroraai@amazon.com>
235 lines
7.7 KiB
Python
Executable File
235 lines
7.7 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
SRE Report Verification Tool
|
|
|
|
This tool compares SRE investigation reports against ground truth data to identify
|
|
hallucinations and verify the accuracy of claims made in the reports.
|
|
"""
|
|
|
|
import argparse
|
|
import logging
|
|
import os
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional
|
|
|
|
import anthropic
|
|
from dotenv import load_dotenv
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s,p%(process)s,{%(filename)s:%(lineno)d},%(levelname)s,%(message)s",
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Load environment variables
|
|
load_dotenv(Path(__file__).parent / "sre_agent" / ".env")
|
|
|
|
|
|
def _get_anthropic_api_key() -> str:
|
|
"""Get Anthropic API key from environment variables."""
|
|
api_key = os.getenv("ANTHROPIC_API_KEY")
|
|
if not api_key:
|
|
raise ValueError(
|
|
"ANTHROPIC_API_KEY environment variable is required for verification"
|
|
)
|
|
return api_key
|
|
|
|
|
|
def _read_file(file_path: str) -> str:
|
|
"""Read content from a file."""
|
|
try:
|
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
return f.read()
|
|
except FileNotFoundError:
|
|
logger.error(f"File not found: {file_path}")
|
|
sys.exit(1)
|
|
except Exception as e:
|
|
logger.error(f"Error reading file {file_path}: {e}")
|
|
sys.exit(1)
|
|
|
|
|
|
def _create_verification_prompt(report_content: str, ground_truth_content: str) -> str:
|
|
"""Create the verification prompt for Claude."""
|
|
return f"""<task>
|
|
You are an expert SRE data verification specialist. Your task is to verify the accuracy of an SRE investigation report by comparing it against ground truth data.
|
|
|
|
<report>
|
|
{report_content}
|
|
</report>
|
|
|
|
<ground_truth_data>
|
|
{ground_truth_content}
|
|
</ground_truth_data>
|
|
</task>
|
|
|
|
<critical_context>
|
|
IMPORTANT: The ground truth data contains a comprehensive dataset representing the ENTIRE infrastructure state, including:
|
|
- Multiple services (some healthy, some with issues)
|
|
- Historical data across different time periods
|
|
- Various pod states (running, failed, crashed, etc.)
|
|
- Mixed performance metrics (good and bad)
|
|
- Different log patterns and error conditions
|
|
|
|
DO NOT expect every entity in the report to have problems in the ground truth. The ground truth shows the complete picture, so:
|
|
- Some services may be healthy while others have issues
|
|
- Some pods may be running fine while others are failing
|
|
- Performance metrics may show both good and bad patterns
|
|
- Only verify that the SPECIFIC claims in the report match what's actually in the data
|
|
|
|
Focus on accuracy of SPECIFIC claims made in the report, not whether the overall system appears healthy or unhealthy.
|
|
</critical_context>
|
|
|
|
<instructions>
|
|
Carefully analyze the SRE investigation report and compare ALL specific claims against the ground truth data. Focus on verifying:
|
|
|
|
1. **Pod Names** - Any pod names mentioned (e.g., api-service-xyz, database-pod-abc)
|
|
2. **Application Names** - Service names referenced
|
|
3. **Timestamps** - Specific times mentioned in logs or metrics
|
|
4. **Log Entries** - Exact log messages quoted
|
|
5. **Metrics Values** - Performance numbers, response times, error rates
|
|
6. **Resource Usage** - CPU, memory percentages
|
|
7. **Error Counts** - Number of errors or occurrences
|
|
8. **Status Information** - Pod states, service health
|
|
|
|
For each entity mentioned in the report:
|
|
- Check if it exists in the ground truth data
|
|
- Verify if the details (timestamps, values, status) match exactly
|
|
- Identify any fabricated or hallucinated information
|
|
- Remember: The absence of problems for a service in the ground truth does NOT invalidate the report unless the report specifically claims that service has issues
|
|
|
|
<output_format>
|
|
If you find hallucinations, respond with:
|
|
|
|
# ❌ HALLUCINATIONS DETECTED
|
|
|
|
## Fabricated Claims:
|
|
- **[Entity Type]**: [Specific claim]
|
|
- **Report Claims**: [What the report states]
|
|
- **Ground Truth**: [What the data actually shows or "NOT FOUND"]
|
|
- **Verification**: FABRICATED/INACCURATE
|
|
|
|
## Additional Issues:
|
|
[Any other accuracy problems found]
|
|
|
|
---
|
|
|
|
If NO hallucinations are found, respond with:
|
|
|
|
# ✅ REPORT VERIFIED ACCURATE
|
|
|
|
## Important Entities Found:
|
|
- **[Entity Type]**: [Entity name/value]
|
|
- **Ground Truth Reference**: Line [X]: "[exact text from ground truth]"
|
|
- **Report Context**: [How it was used in the report]
|
|
|
|
## Verification Summary:
|
|
All claims in the report have been verified against the ground truth data. No fabricated information detected.
|
|
</output_format>
|
|
|
|
Be extremely thorough and precise. SRE operations require absolute accuracy - even small discrepancies in timestamps, pod names, or metric values are critical to identify.
|
|
</instructions>"""
|
|
|
|
|
|
def _verify_report_with_claude(
|
|
report_content: str, ground_truth_content: str, api_key: str
|
|
) -> str:
|
|
"""Use Claude to verify the report against ground truth data."""
|
|
try:
|
|
client = anthropic.Anthropic(api_key=api_key)
|
|
|
|
prompt = _create_verification_prompt(report_content, ground_truth_content)
|
|
|
|
logger.info("Sending verification request to Claude 4 Sonnet...")
|
|
|
|
response = client.messages.create(
|
|
model="claude-sonnet-4-20250514",
|
|
max_tokens=4096,
|
|
temperature=0.1, # Low temperature for consistent, accurate analysis
|
|
messages=[{"role": "user", "content": prompt}],
|
|
)
|
|
|
|
return response.content[0].text
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error calling Claude API: {e}")
|
|
sys.exit(1)
|
|
|
|
|
|
def main():
|
|
"""Main function for report verification."""
|
|
parser = argparse.ArgumentParser(
|
|
description="Verify SRE investigation reports against ground truth data"
|
|
)
|
|
parser.add_argument(
|
|
"report_path", help="Path to the SRE investigation report (markdown file)"
|
|
)
|
|
parser.add_argument(
|
|
"--data-path",
|
|
default="backend/data/all_data_dump.txt",
|
|
help="Path to the ground truth data file (default: backend/data/all_data_dump.txt)",
|
|
)
|
|
parser.add_argument(
|
|
"--output", help="Optional output file to save verification results"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Validate input files
|
|
if not os.path.exists(args.report_path):
|
|
logger.error(f"Report file not found: {args.report_path}")
|
|
sys.exit(1)
|
|
|
|
if not os.path.exists(args.data_path):
|
|
logger.error(f"Ground truth data file not found: {args.data_path}")
|
|
sys.exit(1)
|
|
|
|
# Get API key
|
|
try:
|
|
api_key = _get_anthropic_api_key()
|
|
except ValueError as e:
|
|
logger.error(f"API key error: {e}")
|
|
sys.exit(1)
|
|
|
|
# Read files
|
|
logger.info(f"Reading report: {args.report_path}")
|
|
report_content = _read_file(args.report_path)
|
|
|
|
logger.info(f"Reading ground truth data: {args.data_path}")
|
|
ground_truth_content = _read_file(args.data_path)
|
|
|
|
# Verify report
|
|
logger.info("Starting verification process...")
|
|
verification_result = _verify_report_with_claude(
|
|
report_content, ground_truth_content, api_key
|
|
)
|
|
|
|
# Output results
|
|
print("\n" + "=" * 80)
|
|
print("SRE REPORT VERIFICATION RESULTS")
|
|
print("=" * 80)
|
|
print(verification_result)
|
|
print("=" * 80)
|
|
|
|
# Save to output file if specified
|
|
if args.output:
|
|
try:
|
|
with open(args.output, "w", encoding="utf-8") as f:
|
|
f.write(f"# SRE Report Verification Results\n\n")
|
|
f.write(f"**Report**: {args.report_path}\n")
|
|
f.write(f"**Ground Truth**: {args.data_path}\n")
|
|
f.write(f"**Verified on**: {Path().cwd()}\n\n")
|
|
f.write("---\n\n")
|
|
f.write(verification_result)
|
|
logger.info(f"Verification results saved to: {args.output}")
|
|
except Exception as e:
|
|
logger.error(f"Error saving output file: {e}")
|
|
|
|
logger.info("Verification complete!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|