discourse-ai/lib/ai_bot/tools/github_file_content.rb
Sam fb81307c59
FEATURE: web browsing tool (#548)
This pull request makes several improvements and additions to the GitHub-related tools and personas in the `discourse-ai` repository:

1. It adds the `WebBrowser` tool to the  `Researcher` persona, allowing the AI to visit web pages, retrieve HTML content, extract the main content, and convert it to plain text.

2. It updates the `GithubFileContent`, `GithubPullRequestDiff`, and `GithubSearchCode` tools to handle HTTP responses more robustly (introducing size limits). 

3. It refactors the `send_http_request` method in the `Tool` class to follow redirects when specified, and to read the response body in chunks to avoid memory issues with large responses. (only for WebBrowser)

4. It updates the system prompt for the `Researcher` persona to provide more detailed guidance on when to use Google search vs web browsing, and how to optimize tool usage and reduce redundant requests.

5. It adds a new `web_browser_spec.rb` file with tests for the `WebBrowser` tool, covering various scenarios like handling different HTML structures and following redirects.
2024-03-28 16:01:58 +11:00

103 lines
2.9 KiB
Ruby

# frozen_string_literal: true
module DiscourseAi
module AiBot
module Tools
class GithubFileContent < Tool
def self.signature
{
name: name,
description: "Retrieves the content of specified GitHub files",
parameters: [
{
name: "repo_name",
description: "The name of the GitHub repository (e.g., 'discourse/discourse')",
type: "string",
required: true,
},
{
name: "file_paths",
description: "The paths of the files to retrieve within the repository",
type: "array",
item_type: "string",
required: true,
},
{
name: "branch",
description:
"The branch or commit SHA to retrieve the files from (default: 'main')",
type: "string",
required: false,
},
],
}
end
def self.name
"github_file_content"
end
def repo_name
parameters[:repo_name]
end
def file_paths
parameters[:file_paths]
end
def branch
parameters[:branch] || "main"
end
def description_args
{ repo_name: repo_name, file_paths: file_paths.join(", "), branch: branch }
end
def invoke(_bot_user, llm)
owner, repo = repo_name.split("/")
file_contents = {}
missing_files = []
file_paths.each do |file_path|
api_url =
"https://api.github.com/repos/#{owner}/#{repo}/contents/#{file_path}?ref=#{branch}"
response_code = "-1 unknown"
body = nil
send_http_request(
api_url,
headers: {
"Accept" => "application/vnd.github.v3+json",
},
authenticate_github: true,
) do |response|
response_code = response.code
body = read_response_body(response)
end
if response_code == "200"
file_data = JSON.parse(body)
content = Base64.decode64(file_data["content"])
file_contents[file_path] = content
else
missing_files << file_path
end
end
result = {}
unless file_contents.empty?
blob =
file_contents.map { |path, content| "File Path: #{path}:\n#{content}" }.join("\n")
truncated_blob = truncate(blob, max_length: 20_000, percent_length: 0.3, llm: llm)
result[:file_contents] = truncated_blob
end
result[:missing_files] = missing_files unless missing_files.empty?
result.empty? ? { error: "No files found or retrieved." } : result
end
end
end
end
end