mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-02-05 19:18:11 +00:00
fb81307c59
This pull request makes several improvements and additions to the GitHub-related tools and personas in the `discourse-ai` repository: 1. It adds the `WebBrowser` tool to the `Researcher` persona, allowing the AI to visit web pages, retrieve HTML content, extract the main content, and convert it to plain text. 2. It updates the `GithubFileContent`, `GithubPullRequestDiff`, and `GithubSearchCode` tools to handle HTTP responses more robustly (introducing size limits). 3. It refactors the `send_http_request` method in the `Tool` class to follow redirects when specified, and to read the response body in chunks to avoid memory issues with large responses. (only for WebBrowser) 4. It updates the system prompt for the `Researcher` persona to provide more detailed guidance on when to use Google search vs web browsing, and how to optimize tool usage and reduce redundant requests. 5. It adds a new `web_browser_spec.rb` file with tests for the `WebBrowser` tool, covering various scenarios like handling different HTML structures and following redirects.
103 lines
2.9 KiB
Ruby
103 lines
2.9 KiB
Ruby
# frozen_string_literal: true
|
|
module DiscourseAi
|
|
module AiBot
|
|
module Tools
|
|
class GithubFileContent < Tool
|
|
def self.signature
|
|
{
|
|
name: name,
|
|
description: "Retrieves the content of specified GitHub files",
|
|
parameters: [
|
|
{
|
|
name: "repo_name",
|
|
description: "The name of the GitHub repository (e.g., 'discourse/discourse')",
|
|
type: "string",
|
|
required: true,
|
|
},
|
|
{
|
|
name: "file_paths",
|
|
description: "The paths of the files to retrieve within the repository",
|
|
type: "array",
|
|
item_type: "string",
|
|
required: true,
|
|
},
|
|
{
|
|
name: "branch",
|
|
description:
|
|
"The branch or commit SHA to retrieve the files from (default: 'main')",
|
|
type: "string",
|
|
required: false,
|
|
},
|
|
],
|
|
}
|
|
end
|
|
|
|
def self.name
|
|
"github_file_content"
|
|
end
|
|
|
|
def repo_name
|
|
parameters[:repo_name]
|
|
end
|
|
|
|
def file_paths
|
|
parameters[:file_paths]
|
|
end
|
|
|
|
def branch
|
|
parameters[:branch] || "main"
|
|
end
|
|
|
|
def description_args
|
|
{ repo_name: repo_name, file_paths: file_paths.join(", "), branch: branch }
|
|
end
|
|
|
|
def invoke(_bot_user, llm)
|
|
owner, repo = repo_name.split("/")
|
|
file_contents = {}
|
|
missing_files = []
|
|
|
|
file_paths.each do |file_path|
|
|
api_url =
|
|
"https://api.github.com/repos/#{owner}/#{repo}/contents/#{file_path}?ref=#{branch}"
|
|
|
|
response_code = "-1 unknown"
|
|
body = nil
|
|
|
|
send_http_request(
|
|
api_url,
|
|
headers: {
|
|
"Accept" => "application/vnd.github.v3+json",
|
|
},
|
|
authenticate_github: true,
|
|
) do |response|
|
|
response_code = response.code
|
|
body = read_response_body(response)
|
|
end
|
|
|
|
if response_code == "200"
|
|
file_data = JSON.parse(body)
|
|
content = Base64.decode64(file_data["content"])
|
|
file_contents[file_path] = content
|
|
else
|
|
missing_files << file_path
|
|
end
|
|
end
|
|
|
|
result = {}
|
|
unless file_contents.empty?
|
|
blob =
|
|
file_contents.map { |path, content| "File Path: #{path}:\n#{content}" }.join("\n")
|
|
truncated_blob = truncate(blob, max_length: 20_000, percent_length: 0.3, llm: llm)
|
|
result[:file_contents] = truncated_blob
|
|
end
|
|
|
|
result[:missing_files] = missing_files unless missing_files.empty?
|
|
|
|
result.empty? ? { error: "No files found or retrieved." } : result
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|