FEATURE: Add GitHub Helper AI Bot persona and tools (#513)

Introduces a new AI Bot persona called 'GitHub Helper' which is specialized in assisting with GitHub-related tasks and questions. It includes the following key changes: - Implements the GitHub Helper persona class with its system prompt and available tools - Adds three new AI Bot tools for GitHub interactions: - github_file_content: Retrieves content of files from a GitHub repository - github_pull_request_diff: Retrieves the diff for a GitHub pull request - github_search_code: Searches for code in a GitHub repository - Updates the AI Bot dialects to support the new GitHub tools - Implements multiple function calls for standard tool dialect
2024-03-08 06:37:23 +11:00 · 2024-03-08 06:37:23 +11:00 · 2ad743d246
parent 176a4458f2
commit 2ad743d246
25 changed files with 798 additions and 69 deletions
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@ -96,6 +96,7 @@ en:
    ai_bot_allowed_groups: "When the GPT Bot has access to the PM, it will reply to members of these groups."
    ai_bot_enabled_chat_bots: "Available models to act as an AI Bot"
    ai_bot_add_to_header: "Display a button in the header to start a PM with a AI Bot"
+    ai_bot_github_access_token: "GitHub access token for use with GitHub AI tools (required for search support)"

    ai_stability_api_key: "API key for the stability.ai API"
    ai_stability_engine: "Image generation engine to use for the stability.ai API"
@ -154,6 +155,9 @@ en:
      personas:
        cannot_delete_system_persona: "System personas cannot be deleted, please disable it instead"
        cannot_edit_system_persona: "System personas can only be renamed, you may not edit commands or system prompt, instead disable and make a copy"
+        github_helper:
+          name: "GitHub Helper"
+          description: "AI Bot specialized in assisting with GitHub-related tasks and questions"
        general:
          name: Forum Helper
          description: "General purpose AI Bot capable of performing various tasks"
@ -190,6 +194,9 @@ en:
            name: "Base Search Query"
            description: "Base query to use when searching. Example: '#urgent' will prepend '#urgent' to the search query and only include topics with the urgent category or tag."
      command_summary:
+        github_search_code: "GitHub code search"
+        github_file_content: "GitHub file content"
+        github_pull_request_diff: "GitHub pull request diff"
        random_picker: "Random Picker"
        categories: "List categories"
        search: "Search"
@ -205,6 +212,9 @@ en:
        dall_e: "Generate image"
        search_meta_discourse: "Search Meta Discourse"
      command_help:
+        github_search_code: "Search for code in a GitHub repository"
+        github_file_content: "Retrieve content of files from a GitHub repository"
+        github_pull_request_diff: "Retrieve a GitHub pull request diff"
        random_picker: "Pick a random number or a random element of a list"
        categories: "List all publicly visible categories on the forum"
        search: "Search all public topics on the forum"
@ -220,6 +230,9 @@ en:
        dall_e: "Generate image using DALL-E 3"
        search_meta_discourse: "Search Meta Discourse"
      command_description:
+        github_search_code: "Searched for '%{query}' in %{repo}"
+        github_pull_request_diff: "<a href='%{url}'>%{repo} %{pull_id}</a>"
+        github_file_content: "Retrieved content of %{file_paths} from %{repo_name}@%{branch}"
        random_picker: "Picking from %{options}, picked: %{result}"
        read: "Reading: <a href='%{url}'>%{title}</a>"
        time: "Time in %{timezone} is %{time}"
--- a/config/settings.yml
+++ b/config/settings.yml
@ -318,3 +318,6 @@ discourse_ai:
  ai_bot_add_to_header:
    default: true
    client: true
+  ai_bot_github_access_token:
+    default: ""
+    secret: true
--- a/lib/ai_bot/entry_point.rb
+++ b/lib/ai_bot/entry_point.rb
@ -2,6 +2,8 @@

 module DiscourseAi
  module AiBot
+    USER_AGENT = "Discourse AI Bot 1.0 (https://www.discourse.org)"
+
    class EntryPoint
      REQUIRE_TITLE_UPDATE = "discourse-ai-title-update"

--- a/lib/ai_bot/personas/github_helper.rb
+++ b/lib/ai_bot/personas/github_helper.rb
@ -0,0 +1,24 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module AiBot
+    module Personas
+      class GithubHelper < Persona
+        def tools
+          [Tools::GithubFileContent, Tools::GithubPullRequestDiff, Tools::GithubSearchCode]
+        end
+
+        def system_prompt
+          <<~PROMPT
+            You are a helpful GitHub assistant.
+            You _understand_ and **generate** Discourse Flavored Markdown.
+            You live in a Discourse Forum Message.
+
+            Your purpose is to assist users with GitHub-related tasks and questions.
+            When asked about a specific repository, pull request, or file, try to use the available tools to provide accurate and helpful information.
+          PROMPT
+        end
+      end
+    end
+  end
+end
--- a/lib/ai_bot/personas/persona.rb
+++ b/lib/ai_bot/personas/persona.rb
@ -15,6 +15,7 @@ module DiscourseAi
              Personas::Creative => -6,
              Personas::DallE3 => -7,
              Personas::DiscourseHelper => -8,
+              Personas::GithubHelper => -9,
            }
          end

@ -64,8 +65,12 @@ module DiscourseAi
              Tools::SettingContext,
              Tools::RandomPicker,
              Tools::DiscourseMetaSearch,
+              Tools::GithubFileContent,
+              Tools::GithubPullRequestDiff,
            ]

+            tools << Tools::GithubSearchCode if SiteSetting.ai_bot_github_access_token.present?
+
            tools << Tools::ListTags if SiteSetting.tagging_enabled
            tools << Tools::Image if SiteSetting.ai_stability_api_key.present?

@ -162,7 +167,7 @@ module DiscourseAi

          tool_klass.new(
            arguments,
-            tool_call_id: function_id,
+            tool_call_id: function_id || function_name,
            persona_options: options[tool_klass].to_h,
          )
        end
--- a/lib/ai_bot/playground.rb
+++ b/lib/ai_bot/playground.rb
@ -276,6 +276,14 @@ module DiscourseAi
        publish_final_update(reply_post) if stream_reply
      end

+      def available_bot_usernames
+        @bot_usernames ||=
+          AiPersona
+            .joins(:user)
+            .pluck(:username)
+            .concat(DiscourseAi::AiBot::EntryPoint::BOTS.map(&:second))
+      end
+
      private

      def publish_final_update(reply_post)
@ -348,10 +356,6 @@ module DiscourseAi
          max_backlog_age: 60,
        )
      end
-
-      def available_bot_usernames
-        @bot_usernames ||= DiscourseAi::AiBot::EntryPoint::BOTS.map(&:second)
-      end
    end
  end
 end
--- a/lib/ai_bot/tools/github_file_content.rb
+++ b/lib/ai_bot/tools/github_file_content.rb
@ -0,0 +1,97 @@
+# frozen_string_literal: true
+module DiscourseAi
+  module AiBot
+    module Tools
+      class GithubFileContent < Tool
+        def self.signature
+          {
+            name: name,
+            description: "Retrieves the content of specified GitHub files",
+            parameters: [
+              {
+                name: "repo_name",
+                description: "The name of the GitHub repository (e.g., 'discourse/discourse')",
+                type: "string",
+                required: true,
+              },
+              {
+                name: "file_paths",
+                description: "The paths of the files to retrieve within the repository",
+                type: "array",
+                item_type: "string",
+                required: true,
+              },
+              {
+                name: "branch",
+                description:
+                  "The branch or commit SHA to retrieve the files from (default: 'main')",
+                type: "string",
+                required: false,
+              },
+            ],
+          }
+        end
+
+        def self.name
+          "github_file_content"
+        end
+
+        def repo_name
+          parameters[:repo_name]
+        end
+
+        def file_paths
+          parameters[:file_paths]
+        end
+
+        def branch
+          parameters[:branch] || "main"
+        end
+
+        def description_args
+          { repo_name: repo_name, file_paths: file_paths.join(", "), branch: branch }
+        end
+
+        def invoke(_bot_user, llm)
+          owner, repo = repo_name.split("/")
+          file_contents = {}
+          missing_files = []
+
+          file_paths.each do |file_path|
+            api_url =
+              "https://api.github.com/repos/#{owner}/#{repo}/contents/#{file_path}?ref=#{branch}"
+
+            response =
+              send_http_request(
+                api_url,
+                headers: {
+                  "Accept" => "application/vnd.github.v3+json",
+                },
+                authenticate_github: true,
+              )
+
+            if response.code == "200"
+              file_data = JSON.parse(response.body)
+              content = Base64.decode64(file_data["content"])
+              file_contents[file_path] = content
+            else
+              missing_files << file_path
+            end
+          end
+
+          result = {}
+          unless file_contents.empty?
+            blob =
+              file_contents.map { |path, content| "File Path: #{path}:\n#{content}" }.join("\n")
+            truncated_blob = truncate(blob, max_length: 20_000, percent_length: 0.3, llm: llm)
+            result[:file_contents] = truncated_blob
+          end
+
+          result[:missing_files] = missing_files unless missing_files.empty?
+
+          result.empty? ? { error: "No files found or retrieved." } : result
+        end
+      end
+    end
+  end
+end
--- a/lib/ai_bot/tools/github_pull_request_diff.rb
+++ b/lib/ai_bot/tools/github_pull_request_diff.rb
@ -0,0 +1,72 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module AiBot
+    module Tools
+      class GithubPullRequestDiff < Tool
+        def self.signature
+          {
+            name: name,
+            description: "Retrieves the diff for a GitHub pull request",
+            parameters: [
+              {
+                name: "repo",
+                description: "The repository name in the format 'owner/repo'",
+                type: "string",
+                required: true,
+              },
+              {
+                name: "pull_id",
+                description: "The ID of the pull request",
+                type: "integer",
+                required: true,
+              },
+            ],
+          }
+        end
+
+        def self.name
+          "github_pull_request_diff"
+        end
+
+        def repo
+          parameters[:repo]
+        end
+
+        def pull_id
+          parameters[:pull_id]
+        end
+
+        def url
+          @url
+        end
+
+        def invoke(_bot_user, llm)
+          api_url = "https://api.github.com/repos/#{repo}/pulls/#{pull_id}"
+          @url = "https://github.com/#{repo}/pull/#{pull_id}"
+
+          response =
+            send_http_request(
+              api_url,
+              headers: {
+                "Accept" => "application/vnd.github.v3.diff",
+              },
+              authenticate_github: true,
+            )
+
+          if response.code == "200"
+            diff = response.body
+            diff = truncate(diff, max_length: 20_000, percent_length: 0.3, llm: llm)
+            { diff: diff }
+          else
+            { error: "Failed to retrieve the diff. Status code: #{response.code}" }
+          end
+        end
+
+        def description_args
+          { repo: repo, pull_id: pull_id, url: url }
+        end
+      end
+    end
+  end
+end
--- a/lib/ai_bot/tools/github_search_code.rb
+++ b/lib/ai_bot/tools/github_search_code.rb
@ -0,0 +1,72 @@
+# frozen_string_literal: true
+
+module DiscourseAi
+  module AiBot
+    module Tools
+      class GithubSearchCode < Tool
+        def self.signature
+          {
+            name: name,
+            description: "Searches for code in a GitHub repository",
+            parameters: [
+              {
+                name: "repo",
+                description: "The repository name in the format 'owner/repo'",
+                type: "string",
+                required: true,
+              },
+              {
+                name: "query",
+                description: "The search query (e.g., a function name, variable, or code snippet)",
+                type: "string",
+                required: true,
+              },
+            ],
+          }
+        end
+
+        def self.name
+          "github_search_code"
+        end
+
+        def repo
+          parameters[:repo]
+        end
+
+        def query
+          parameters[:query]
+        end
+
+        def description_args
+          { repo: repo, query: query }
+        end
+
+        def invoke(_bot_user, llm)
+          api_url = "https://api.github.com/search/code?q=#{query}+repo:#{repo}"
+
+          response =
+            send_http_request(
+              api_url,
+              headers: {
+                "Accept" => "application/vnd.github.v3.text-match+json",
+              },
+              authenticate_github: true,
+            )
+
+          if response.code == "200"
+            search_data = JSON.parse(response.body)
+            results =
+              search_data["items"]
+                .map { |item| "#{item["path"]}:\n#{item["text_matches"][0]["fragment"]}" }
+                .join("\n---\n")
+
+            results = truncate(results, max_length: 20_000, percent_length: 0.3, llm: llm)
+            { search_results: results }
+          else
+            { error: "Failed to perform code search. Status code: #{response.code}" }
+          end
+        end
+      end
+    end
+  end
+end
--- a/lib/ai_bot/tools/tool.rb
+++ b/lib/ai_bot/tools/tool.rb
@ -77,6 +77,35 @@ module DiscourseAi

        protected

+        def send_http_request(url, headers: {}, authenticate_github: false)
+          uri = URI(url)
+          request = FinalDestination::HTTP::Get.new(uri)
+          request["User-Agent"] = DiscourseAi::AiBot::USER_AGENT
+          headers.each { |k, v| request[k] = v }
+          if authenticate_github
+            request["Authorization"] = "Bearer #{SiteSetting.ai_bot_github_access_token}"
+          end
+
+          FinalDestination::HTTP.start(uri.hostname, uri.port, use_ssl: uri.port != 80) do |http|
+            http.request(request)
+          end
+        end
+
+        def truncate(text, llm:, percent_length: nil, max_length: nil)
+          if !percent_length && !max_length
+            raise ArgumentError, "You must provide either percent_length or max_length"
+          end
+
+          target = llm.max_prompt_tokens
+          target = (target * percent_length).to_i if percent_length
+
+          if max_length
+            target = max_length if target > max_length
+          end
+
+          llm.tokenizer.truncate(text, target)
+        end
+
        def accepted_options
          []
        end
--- a/lib/completions/dialects/dialect.rb
+++ b/lib/completions/dialects/dialect.rb
@ -36,7 +36,8 @@ module DiscourseAi
          def tool_preamble
            <<~TEXT
              In this environment you have access to a set of tools you can use to answer the user's question.
-              You may call them like this. Only invoke one function at a time and wait for the results before invoking another function:
+              You may call them like this.
+
              <function_calls>
              <invoke>
              <tool_name>$TOOL_NAME</tool_name>
@ -47,9 +48,12 @@ module DiscourseAi
              </invoke>
              </function_calls>

-              if a parameter type is an array, return a JSON array of values. For example:
+              If a parameter type is an array, return a JSON array of values. For example:
              [1,"two",3.0]

+              Always wrap <invoke> calls in <function_calls> tags.
+              You may call multiple function via <invoke> in a single <function_calls> block.
+
              Here are the tools available:
            TEXT
          end
--- a/lib/completions/endpoints/anthropic_messages.rb
+++ b/lib/completions/endpoints/anthropic_messages.rb
@ -27,8 +27,11 @@ module DiscourseAi
          model_params
        end

-        def default_options
-          { model: model + "-20240229", max_tokens: 3_000, stop_sequences: ["</function_calls>"] }
+        def default_options(dialect)
+          options = { model: model + "-20240229", max_tokens: 3_000 }
+
+          options[:stop_sequences] = ["</function_calls>"] if dialect.prompt.has_tools?
+          options
        end

        def provider_id
@ -46,8 +49,8 @@ module DiscourseAi
          @uri ||= URI("https://api.anthropic.com/v1/messages")
        end

-        def prepare_payload(prompt, model_params, _dialect)
-          payload = default_options.merge(model_params).merge(messages: prompt.messages)
+        def prepare_payload(prompt, model_params, dialect)
+          payload = default_options(dialect).merge(model_params).merge(messages: prompt.messages)

          payload[:system] = prompt.system_prompt if prompt.system_prompt.present?
          payload[:stream] = true if @streaming_mode
--- a/lib/completions/endpoints/base.rb
+++ b/lib/completions/endpoints/base.rb
@ -64,6 +64,7 @@ module DiscourseAi
        end

        def perform_completion!(dialect, user, model_params = {})
+          allow_tools = dialect.prompt.has_tools?
          model_params = normalize_model_params(model_params)

          @streaming_mode = block_given?
@ -110,9 +111,11 @@ module DiscourseAi
                response_data = extract_completion_from(response_raw)
                partials_raw = response_data.to_s

-                if has_tool?(response_data)
+                if allow_tools && has_tool?(response_data)
                  function_buffer = build_buffer # Nokogiri document
-                  function_buffer = add_to_buffer(function_buffer, "", response_data)
+                  function_buffer = add_to_function_buffer(function_buffer, payload: response_data)
+
+                  normalize_function_ids!(function_buffer)

                  response_data = +function_buffer.at("function_calls").to_s
                  response_data << "\n"
@ -156,6 +159,7 @@ module DiscourseAi
                  end

                  json_error = false
+                  buffered_partials = []

                  raw_partials.each do |raw_partial|
                    json_error = false
@ -173,14 +177,30 @@ module DiscourseAi

                      # Stop streaming the response as soon as you find a tool.
                      # We'll buffer and yield it later.
-                      has_tool = true if has_tool?(partials_raw)
+                      has_tool = true if allow_tools && has_tool?(partials_raw)

                      if has_tool
-                        function_buffer = add_to_buffer(function_buffer, partials_raw, partial)
+                        if buffered_partials.present?
+                          joined = buffered_partials.join
+                          joined = joined.gsub(/<.+/, "")
+                          yield joined, cancel if joined.present?
+                          buffered_partials = []
+                        end
+                        function_buffer = add_to_function_buffer(function_buffer, partial: partial)
                      else
-                        response_data << partial
-
-                        yield partial, cancel if partial
+                        if maybe_has_tool?(partials_raw)
+                          buffered_partials << partial
+                        else
+                          if buffered_partials.present?
+                            buffered_partials.each do |buffered_partial|
+                              response_data << buffered_partial
+                              yield buffered_partial, cancel
+                            end
+                            buffered_partials = []
+                          end
+                          response_data << partial
+                          yield partial, cancel if partial
+                        end
                      end
                    rescue JSON::ParserError
                      leftover = redo_chunk
@ -201,7 +221,11 @@ module DiscourseAi

              # Once we have the full response, try to return the tool as a XML doc.
              if has_tool
+                function_buffer = add_to_function_buffer(function_buffer, payload: partials_raw)
+
                if function_buffer.at("tool_name").text.present?
+                  normalize_function_ids!(function_buffer)
+
                  invocation = +function_buffer.at("function_calls").to_s
                  invocation << "\n"

@ -226,6 +250,21 @@ module DiscourseAi
          end
        end

+        def normalize_function_ids!(function_buffer)
+          function_buffer
+            .css("invoke")
+            .each_with_index do |invoke, index|
+              if invoke.at("tool_id")
+                invoke.at("tool_id").content = "tool_#{index}" if invoke
+                  .at("tool_id")
+                  .content
+                  .blank?
+              else
+                invoke.add_child("<tool_id>tool_#{index}</tool_id>\n") if !invoke.at("tool_id")
+              end
+            end
+        end
+
        def final_log_update(log)
          # for people that need to override
        end
@ -291,48 +330,36 @@ module DiscourseAi
          (<<~TEXT).strip
            <invoke>
            <tool_name></tool_name>
-            <tool_id></tool_id>
            <parameters>
            </parameters>
+            <tool_id></tool_id>
            </invoke>
          TEXT
        end

        def has_tool?(response)
-          response.include?("<function")
+          response.include?("<function_calls>")
        end

-        def add_to_buffer(function_buffer, response_data, partial)
-          raw_data = (response_data + partial)
+        def maybe_has_tool?(response)
+          # 16 is the length of function calls
+          substring = response[-16..-1] || response
+          split = substring.split("<")

-          # recover stop word potentially
-          raw_data =
-            raw_data.split("</invoke>").first + "</invoke>\n</function_calls>" if raw_data.split(
-            "</invoke>",
-          ).length > 1
-
-          return function_buffer unless raw_data.include?("</invoke>")
-
-          read_function = Nokogiri::HTML5.fragment(raw_data)
-
-          if tool_name = read_function.at("tool_name")&.text
-            function_buffer.at("tool_name").inner_html = tool_name
-            function_buffer.at("tool_id").inner_html = tool_name
+          if split.length > 1
+            match = "<" + split.last
+            "<function_calls>".start_with?(match)
+          else
+            false
          end
+        end

-          read_function
-            .at("parameters")
-            &.elements
-            .to_a
-            .each do |elem|
-              if parameter = function_buffer.at(elem.name)&.text
-                function_buffer.at(elem.name).inner_html = parameter
-              else
-                param_node = read_function.at(elem.name)
-                function_buffer.at("parameters").add_child(param_node)
-                function_buffer.at("parameters").add_child("\n")
-              end
-            end
+        def add_to_function_buffer(function_buffer, partial: nil, payload: nil)
+          if payload&.include?("</invoke>")
+            matches = payload.match(%r{<function_calls>.*</invoke>}m)
+            function_buffer =
+              Nokogiri::HTML5.fragment(matches[0] + "\n</function_calls>") if matches
+          end

          function_buffer
        end
--- a/lib/completions/endpoints/gemini.rb
+++ b/lib/completions/endpoints/gemini.rb
@ -104,12 +104,20 @@ module DiscourseAi
          @has_function_call
        end

-        def add_to_buffer(function_buffer, _response_data, partial)
-          if partial[:name].present?
-            function_buffer.at("tool_name").content = partial[:name]
-            function_buffer.at("tool_id").content = partial[:name]
+        def maybe_has_tool?(_partial_raw)
+          # we always get a full partial
+          false
+        end
+
+        def add_to_function_buffer(function_buffer, payload: nil, partial: nil)
+          if @streaming_mode
+            return function_buffer if !partial
+          else
+            partial = payload
          end

+          function_buffer.at("tool_name").content = partial[:name] if partial[:name].present?
+
          if partial[:args]
            argument_fragments =
              partial[:args].reduce(+"") do |memo, (arg_name, value)|
--- a/lib/completions/endpoints/open_ai.rb
+++ b/lib/completions/endpoints/open_ai.rb
@ -163,7 +163,18 @@ module DiscourseAi
          @has_function_call
        end

-        def add_to_buffer(function_buffer, _response_data, partial)
+        def maybe_has_tool?(_partial_raw)
+          # we always get a full partial
+          false
+        end
+
+        def add_to_function_buffer(function_buffer, partial: nil, payload: nil)
+          if @streaming_mode
+            return function_buffer if !partial
+          else
+            partial = payload
+          end
+
          @args_buffer ||= +""

          f_name = partial.dig(:function, :name)
--- a/lib/completions/prompt.rb
+++ b/lib/completions/prompt.rb
@ -49,6 +49,10 @@ module DiscourseAi
        messages << new_message
      end

+      def has_tools?
+        tools.present?
+      end
+
      private

      def validate_message(message)
--- a/spec/lib/completions/endpoints/anthropic_messages_spec.rb
+++ b/spec/lib/completions/endpoints/anthropic_messages_spec.rb
@ -10,6 +10,36 @@ RSpec.describe DiscourseAi::Completions::Endpoints::AnthropicMessages do
    )
  end

+  let(:echo_tool) do
+    {
+      name: "echo",
+      description: "echo something",
+      parameters: [{ name: "text", type: "string", description: "text to echo", required: true }],
+    }
+  end
+
+  let(:google_tool) do
+    {
+      name: "google",
+      description: "google something",
+      parameters: [
+        { name: "query", type: "string", description: "text to google", required: true },
+      ],
+    }
+  end
+
+  let(:prompt_with_echo_tool) do
+    prompt_with_tools = prompt
+    prompt.tools = [echo_tool]
+    prompt_with_tools
+  end
+
+  let(:prompt_with_google_tool) do
+    prompt_with_tools = prompt
+    prompt.tools = [echo_tool]
+    prompt_with_tools
+  end
+
  before { SiteSetting.ai_anthropic_api_key = "123" }

  it "does not eat spaces with tool calls" do
@ -165,16 +195,18 @@ RSpec.describe DiscourseAi::Completions::Endpoints::AnthropicMessages do
    stub_request(:post, "https://api.anthropic.com/v1/messages").to_return(status: 200, body: body)

    result = +""
-    llm.generate(prompt, user: Discourse.system_user) { |partial| result << partial }
+    llm.generate(prompt_with_google_tool, user: Discourse.system_user) do |partial|
+      result << partial
+    end

    expected = (<<~TEXT).strip
      <function_calls>
      <invoke>
      <tool_name>google</tool_name>
-      <tool_id>google</tool_id>
      <parameters>
      <query>top 10 things to do in japan for tourists</query>
      </parameters>
+      <tool_id>tool_0</tool_id>
      </invoke>
      </function_calls>
    TEXT
@ -232,7 +264,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::AnthropicMessages do
    expected_body = {
      model: "claude-3-opus-20240229",
      max_tokens: 3000,
-      stop_sequences: ["</function_calls>"],
      messages: [{ role: "user", content: "user1: hello" }],
      system: "You are hello bot",
      stream: true,
@ -245,6 +276,70 @@ RSpec.describe DiscourseAi::Completions::Endpoints::AnthropicMessages do
    expect(log.response_tokens).to eq(15)
  end

+  it "can return multiple function calls" do
+    functions = <<~FUNCTIONS
+      <function_calls>
+      <invoke>
+      <tool_name>echo</tool_name>
+      <parameters>
+      <text>something</text>
+      </parameters>
+      </invoke>
+      <invoke>
+      <tool_name>echo</tool_name>
+      <parameters>
+      <text>something else</text>
+      </parameters>
+      </invoke>
+    FUNCTIONS
+
+    body = <<~STRING
+      {
+        "content": [
+          {
+            "text": "Hello!\n\n#{functions}\njunk",
+            "type": "text"
+          }
+        ],
+        "id": "msg_013Zva2CMHLNnXjNJJKqJ2EF",
+        "model": "claude-3-opus-20240229",
+        "role": "assistant",
+        "stop_reason": "end_turn",
+        "stop_sequence": null,
+        "type": "message",
+        "usage": {
+          "input_tokens": 10,
+          "output_tokens": 25
+        }
+      }
+    STRING
+
+    stub_request(:post, "https://api.anthropic.com/v1/messages").to_return(status: 200, body: body)
+
+    result = llm.generate(prompt_with_echo_tool, user: Discourse.system_user)
+
+    expected = (<<~EXPECTED).strip
+      <function_calls>
+      <invoke>
+      <tool_name>echo</tool_name>
+      <parameters>
+      <text>something</text>
+      </parameters>
+      <tool_id>tool_0</tool_id>
+      </invoke>
+      <invoke>
+      <tool_name>echo</tool_name>
+      <parameters>
+      <text>something else</text>
+      </parameters>
+      <tool_id>tool_1</tool_id>
+      </invoke>
+      </function_calls>
+    EXPECTED
+
+    expect(result.strip).to eq(expected)
+  end
+
  it "can operate in regular mode" do
    body = <<~STRING
      {
@ -287,7 +382,6 @@ RSpec.describe DiscourseAi::Completions::Endpoints::AnthropicMessages do
    expected_body = {
      model: "claude-3-opus-20240229",
      max_tokens: 3000,
-      stop_sequences: ["</function_calls>"],
      messages: [{ role: "user", content: "user1: hello" }],
      system: "You are hello bot",
    }
--- a/spec/lib/completions/endpoints/endpoint_compliance.rb
+++ b/spec/lib/completions/endpoints/endpoint_compliance.rb
@ -66,11 +66,11 @@ class EndpointMock
      <function_calls>
      <invoke>
      <tool_name>get_weather</tool_name>
-      <tool_id>#{tool_id}</tool_id>
      <parameters>
      <location>Sydney</location>
      <unit>c</unit>
      </parameters>
+      <tool_id>tool_0</tool_id>
      </invoke>
      </function_calls>
    TEXT
--- a/spec/lib/completions/endpoints/gemini_spec.rb
+++ b/spec/lib/completions/endpoints/gemini_spec.rb
@ -132,7 +132,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do

  fab!(:user)

-  let(:bedrock_mock) { GeminiMock.new(endpoint) }
+  let(:gemini_mock) { GeminiMock.new(endpoint) }

  let(:compliance) do
    EndpointsCompliance.new(self, endpoint, DiscourseAi::Completions::Dialects::Gemini, user)
@ -142,13 +142,13 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
    context "when using regular mode" do
      context "with simple prompts" do
        it "completes a trivial prompt and logs the response" do
-          compliance.regular_mode_simple_prompt(bedrock_mock)
+          compliance.regular_mode_simple_prompt(gemini_mock)
        end
      end

      context "with tools" do
        it "returns a function invocation" do
-          compliance.regular_mode_tools(bedrock_mock)
+          compliance.regular_mode_tools(gemini_mock)
        end
      end
    end
@ -156,13 +156,13 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Gemini do
    describe "when using streaming mode" do
      context "with simple prompts" do
        it "completes a trivial prompt and logs the response" do
-          compliance.streaming_mode_simple_prompt(bedrock_mock)
+          compliance.streaming_mode_simple_prompt(gemini_mock)
        end
      end

      context "with tools" do
        it "returns a function invocation" do
-          compliance.streaming_mode_tools(bedrock_mock)
+          compliance.streaming_mode_tools(gemini_mock)
        end
      end
    end
--- a/spec/lib/completions/endpoints/open_ai_spec.rb
+++ b/spec/lib/completions/endpoints/open_ai_spec.rb
@ -102,7 +102,7 @@ class OpenAiMock < EndpointMock
  end

  def tool_id
-    "eujbuebfe"
+    "tool_0"
  end

  def tool_payload
@ -149,6 +149,16 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do

  fab!(:user)

+  let(:echo_tool) do
+    {
+      name: "echo",
+      description: "echo something",
+      parameters: [{ name: "text", type: "string", description: "text to echo", required: true }],
+    }
+  end
+
+  let(:tools) { [echo_tool] }
+
  let(:open_ai_mock) { OpenAiMock.new(endpoint) }

  let(:compliance) do
@ -260,23 +270,25 @@ TEXT
          open_ai_mock.stub_raw(raw_data)
          content = +""

-          endpoint.perform_completion!(compliance.dialect, user) { |partial| content << partial }
+          dialect = compliance.dialect(prompt: compliance.generic_prompt(tools: tools))
+
+          endpoint.perform_completion!(dialect, user) { |partial| content << partial }

          expected = <<~TEXT
            <function_calls>
            <invoke>
            <tool_name>search</tool_name>
-            <tool_id>call_3Gyr3HylFJwfrtKrL6NaIit1</tool_id>
            <parameters>
            <search_query>Discourse AI bot</search_query>
            </parameters>
+            <tool_id>call_3Gyr3HylFJwfrtKrL6NaIit1</tool_id>
            </invoke>
            <invoke>
            <tool_name>search</tool_name>
-            <tool_id>call_H7YkbgYurHpyJqzwUN4bghwN</tool_id>
            <parameters>
            <query>Discourse AI bot</query>
            </parameters>
+            <tool_id>call_H7YkbgYurHpyJqzwUN4bghwN</tool_id>
            </invoke>
            </function_calls>
          TEXT
@ -321,7 +333,8 @@ TEXT
            open_ai_mock.stub_raw(chunks)
            partials = []

-            endpoint.perform_completion!(compliance.dialect, user) { |partial| partials << partial }
+            dialect = compliance.dialect(prompt: compliance.generic_prompt(tools: tools))
+            endpoint.perform_completion!(dialect, user) { |partial| partials << partial }

            expect(partials.length).to eq(1)

@ -329,10 +342,10 @@ TEXT
            <function_calls>
            <invoke>
            <tool_name>google</tool_name>
-            <tool_id>func_id</tool_id>
            <parameters>
            <query>Adabas 9.1</query>
            </parameters>
+            <tool_id>func_id</tool_id>
            </invoke>
            </function_calls>
            TXT
--- a/spec/lib/modules/ai_bot/personas/persona_spec.rb
+++ b/spec/lib/modules/ai_bot/personas/persona_spec.rb
@ -153,6 +153,7 @@ RSpec.describe DiscourseAi::AiBot::Personas::Persona do
          DiscourseAi::AiBot::Personas::Artist,
          DiscourseAi::AiBot::Personas::Creative,
          DiscourseAi::AiBot::Personas::DiscourseHelper,
+          DiscourseAi::AiBot::Personas::GithubHelper,
          DiscourseAi::AiBot::Personas::Researcher,
          DiscourseAi::AiBot::Personas::SettingsExplorer,
          DiscourseAi::AiBot::Personas::SqlHelper,
@ -169,6 +170,7 @@ RSpec.describe DiscourseAi::AiBot::Personas::Persona do
        DiscourseAi::AiBot::Personas::SettingsExplorer,
        DiscourseAi::AiBot::Personas::Creative,
        DiscourseAi::AiBot::Personas::DiscourseHelper,
+        DiscourseAi::AiBot::Personas::GithubHelper,
      )

      AiPersona.find(
@ -182,6 +184,7 @@ RSpec.describe DiscourseAi::AiBot::Personas::Persona do
        DiscourseAi::AiBot::Personas::SettingsExplorer,
        DiscourseAi::AiBot::Personas::Creative,
        DiscourseAi::AiBot::Personas::DiscourseHelper,
+        DiscourseAi::AiBot::Personas::GithubHelper,
      )
    end
  end
--- a/spec/lib/modules/ai_bot/playground_spec.rb
+++ b/spec/lib/modules/ai_bot/playground_spec.rb
@ -337,6 +337,15 @@ RSpec.describe DiscourseAi::AiBot::Playground do
    end
  end

+  describe "#available_bot_usernames" do
+    it "includes persona users" do
+      persona = Fabricate(:ai_persona)
+      persona.create_user!
+
+      expect(playground.available_bot_usernames).to include(persona.user.username)
+    end
+  end
+
  describe "#conversation_context" do
    context "with limited context" do
      before do
--- a/spec/lib/modules/ai_bot/tools/github_file_content_spec.rb
+++ b/spec/lib/modules/ai_bot/tools/github_file_content_spec.rb
@ -0,0 +1,78 @@
+# frozen_string_literal: true
+
+require "rails_helper"
+
+RSpec.describe DiscourseAi::AiBot::Tools::GithubFileContent do
+  let(:tool) do
+    described_class.new(
+      {
+        repo_name: "discourse/discourse-ai",
+        file_paths: %w[lib/database/connection.rb lib/ai_bot/tools/github_pull_request_diff.rb],
+        branch: "8b382d6098fde879d28bbee68d3cbe0a193e4ffc",
+      },
+    )
+  end
+
+  let(:llm) { DiscourseAi::Completions::Llm.proxy("open_ai:gpt-4") }
+
+  describe "#invoke" do
+    before do
+      stub_request(
+        :get,
+        "https://api.github.com/repos/discourse/discourse-ai/contents/lib/database/connection.rb?ref=8b382d6098fde879d28bbee68d3cbe0a193e4ffc",
+      ).to_return(
+        status: 200,
+        body: { content: Base64.encode64("content of connection.rb") }.to_json,
+      )
+
+      stub_request(
+        :get,
+        "https://api.github.com/repos/discourse/discourse-ai/contents/lib/ai_bot/tools/github_pull_request_diff.rb?ref=8b382d6098fde879d28bbee68d3cbe0a193e4ffc",
+      ).to_return(
+        status: 200,
+        body: { content: Base64.encode64("content of github_pull_request_diff.rb") }.to_json,
+      )
+    end
+
+    it "retrieves the content of the specified GitHub files" do
+      result = tool.invoke(nil, llm)
+      expected = {
+        file_contents:
+          "File Path: lib/database/connection.rb:\ncontent of connection.rb\nFile Path: lib/ai_bot/tools/github_pull_request_diff.rb:\ncontent of github_pull_request_diff.rb",
+      }
+
+      expect(result).to eq(expected)
+    end
+  end
+
+  describe ".signature" do
+    it "returns the tool signature" do
+      signature = described_class.signature
+      expect(signature[:name]).to eq("github_file_content")
+      expect(signature[:description]).to eq("Retrieves the content of specified GitHub files")
+      expect(signature[:parameters]).to eq(
+        [
+          {
+            name: "repo_name",
+            description: "The name of the GitHub repository (e.g., 'discourse/discourse')",
+            type: "string",
+            required: true,
+          },
+          {
+            name: "file_paths",
+            description: "The paths of the files to retrieve within the repository",
+            type: "array",
+            item_type: "string",
+            required: true,
+          },
+          {
+            name: "branch",
+            description: "The branch or commit SHA to retrieve the files from (default: 'main')",
+            type: "string",
+            required: false,
+          },
+        ],
+      )
+    end
+  end
+end
--- a/spec/lib/modules/ai_bot/tools/github_pull_request_diff_spec.rb
+++ b/spec/lib/modules/ai_bot/tools/github_pull_request_diff_spec.rb
@ -0,0 +1,61 @@
+# frozen_string_literal: true
+
+require "rails_helper"
+
+RSpec.describe DiscourseAi::AiBot::Tools::GithubPullRequestDiff do
+  let(:tool) { described_class.new({ repo: repo, pull_id: pull_id }) }
+  let(:bot_user) { Fabricate(:user) }
+  let(:llm) { DiscourseAi::Completions::Llm.proxy("open_ai:gpt-4") }
+
+  context "with a valid pull request" do
+    let(:repo) { "discourse/discourse-automation" }
+    let(:pull_id) { 253 }
+
+    it "retrieves the diff for the pull request" do
+      stub_request(:get, "https://api.github.com/repos/#{repo}/pulls/#{pull_id}").with(
+        headers: {
+          "Accept" => "application/vnd.github.v3.diff",
+          "User-Agent" => DiscourseAi::AiBot::USER_AGENT,
+        },
+      ).to_return(status: 200, body: "sample diff")
+
+      result = tool.invoke(bot_user, llm)
+      expect(result[:diff]).to eq("sample diff")
+      expect(result[:error]).to be_nil
+    end
+
+    it "uses the github access token if present" do
+      SiteSetting.ai_bot_github_access_token = "ABC"
+
+      stub_request(:get, "https://api.github.com/repos/#{repo}/pulls/#{pull_id}").with(
+        headers: {
+          "Accept" => "application/vnd.github.v3.diff",
+          "User-Agent" => DiscourseAi::AiBot::USER_AGENT,
+          "Authorization" => "Bearer ABC",
+        },
+      ).to_return(status: 200, body: "sample diff")
+
+      result = tool.invoke(bot_user, llm)
+      expect(result[:diff]).to eq("sample diff")
+      expect(result[:error]).to be_nil
+    end
+  end
+
+  context "with an invalid pull request" do
+    let(:repo) { "invalid/repo" }
+    let(:pull_id) { 999 }
+
+    it "returns an error message" do
+      stub_request(:get, "https://api.github.com/repos/#{repo}/pulls/#{pull_id}").with(
+        headers: {
+          "Accept" => "application/vnd.github.v3.diff",
+          "User-Agent" => DiscourseAi::AiBot::USER_AGENT,
+        },
+      ).to_return(status: 404)
+
+      result = tool.invoke(bot_user, nil)
+      expect(result[:diff]).to be_nil
+      expect(result[:error]).to include("Failed to retrieve the diff")
+    end
+  end
+end
--- a/spec/lib/modules/ai_bot/tools/github_search_code_spec.rb
+++ b/spec/lib/modules/ai_bot/tools/github_search_code_spec.rb
@ -0,0 +1,93 @@
+# frozen_string_literal: true
+
+require "rails_helper"
+
+RSpec.describe DiscourseAi::AiBot::Tools::GithubSearchCode do
+  let(:tool) { described_class.new({ repo: repo, query: query }) }
+  let(:bot_user) { Fabricate(:user) }
+  let(:llm) { DiscourseAi::Completions::Llm.proxy("open_ai:gpt-4") }
+
+  context "with valid search results" do
+    let(:repo) { "discourse/discourse" }
+    let(:query) { "def hello" }
+
+    it "searches for code in the repository" do
+      stub_request(
+        :get,
+        "https://api.github.com/search/code?q=def%20hello+repo:discourse/discourse",
+      ).with(
+        headers: {
+          "Accept" => "application/vnd.github.v3.text-match+json",
+          "User-Agent" => DiscourseAi::AiBot::USER_AGENT,
+        },
+      ).to_return(
+        status: 200,
+        body: {
+          total_count: 1,
+          items: [
+            {
+              path: "test/hello.rb",
+              name: "hello.rb",
+              text_matches: [{ fragment: "def hello\n  puts 'hello'\nend" }],
+            },
+          ],
+        }.to_json,
+      )
+
+      result = tool.invoke(bot_user, llm)
+      expect(result[:search_results]).to include("def hello\n  puts 'hello'\nend")
+      expect(result[:search_results]).to include("test/hello.rb")
+      expect(result[:error]).to be_nil
+    end
+  end
+
+  context "with an empty search result" do
+    let(:repo) { "discourse/discourse" }
+    let(:query) { "nonexistent_method" }
+
+    describe "#description_args" do
+      it "returns the repo and query" do
+        expect(tool.description_args).to eq(repo: repo, query: query)
+      end
+    end
+
+    it "returns an empty result" do
+      SiteSetting.ai_bot_github_access_token = "ABC"
+      stub_request(
+        :get,
+        "https://api.github.com/search/code?q=nonexistent_method+repo:discourse/discourse",
+      ).with(
+        headers: {
+          "Accept" => "application/vnd.github.v3.text-match+json",
+          "User-Agent" => DiscourseAi::AiBot::USER_AGENT,
+          "Authorization" => "Bearer ABC",
+        },
+      ).to_return(status: 200, body: { total_count: 0, items: [] }.to_json)
+
+      result = tool.invoke(bot_user, llm)
+      expect(result[:search_results]).to be_empty
+      expect(result[:error]).to be_nil
+    end
+  end
+
+  context "with an error response" do
+    let(:repo) { "discourse/discourse" }
+    let(:query) { "def hello" }
+
+    it "returns an error message" do
+      stub_request(
+        :get,
+        "https://api.github.com/search/code?q=def%20hello+repo:discourse/discourse",
+      ).with(
+        headers: {
+          "Accept" => "application/vnd.github.v3.text-match+json",
+          "User-Agent" => DiscourseAi::AiBot::USER_AGENT,
+        },
+      ).to_return(status: 403)
+
+      result = tool.invoke(bot_user, llm)
+      expect(result[:search_results]).to be_nil
+      expect(result[:error]).to include("Failed to perform code search")
+    end
+  end
+end