FEATURE: implement thinking token support (#1155)

adds support for "thinking tokens" - a feature that exposes the model's reasoning process before providing the final response. Key improvements include: - Add a new Thinking class to handle thinking content from LLMs - Modify endpoints (Claude, AWS Bedrock) to handle thinking output - Update AI bot to display thinking in collapsible details section - Fix SEARCH/REPLACE blocks to support empty replacement strings and general improvements to artifact editing - Allow configurable temperature in triage and report automations - Various bug fixes and improvements to diff parsing
2025-07-12 00:53:27 +00:00 · 2025-03-04 12:22:30 +11:00 · 2025-03-04 12:22:30 +11:00 · f6eedf3e0b
commit f6eedf3e0b
parent 3f20b24aa3
30 changed files with 957 additions and 144 deletions
--- a/assets/javascripts/discourse/connectors/composer-fields/persona-llm-selector.gjs
+++ b/assets/javascripts/discourse/connectors/composer-fields/persona-llm-selector.gjs
@ -168,12 +168,14 @@ export default class BotSelector extends Component {
      .filter((bot) => !bot.is_persona)
      .filter(Boolean);
-    return availableBots.map((bot) => {
+    return availableBots
      .map((bot) => {
        return {
          id: bot.id,
          name: bot.display_name,
        };
-    });
+      })
      .sort((a, b) => a.name.localeCompare(b.name));
  }
  <template>
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -85,10 +85,10 @@ en:
              description: "Prioritize content from this group in the report"
            temperature:
              label: "Temperature"
-              description: "Temperature to use for the LLM. Increase to increase randomness (0 to use model default)"
+              description: "Temperature to use for the LLM. Increase to increase randomness (leave empty to use model default)"
            top_p:
              label: "Top P"
-              description: "Top P to use for the LLM, increase to increase randomness (0 to use model default)"
+              description: "Top P to use for the LLM, increase to increase randomness (leave empty to use model default)"
        llm_triage:
          fields:
@ -131,6 +131,9 @@ en:
            model:
              label: "Model"
              description: "Language model used for triage"
            temperature:
              label: "Temperature"
              description: "Temperature to use for the LLM. Increase to increase randomness (leave empty to use model default)"
    discourse_ai:
      title: "AI"
@ -403,7 +406,7 @@ en:
          open_ai-o1: "Open AI's most capable reasoning model"
          open_ai-o3-mini: "Advanced Cost-efficient reasoning model"
          samba_nova-Meta-Llama-3-1-8B-Instruct: "Efficient lightweight multilingual model"
-          samba_nova-Meta-Llama-3-1-70B-Instruct": "Powerful multipurpose model"
+          samba_nova-Meta-Llama-3-3-70B-Instruct": "Powerful multipurpose model"
          mistral-mistral-large-latest: "Mistral's most powerful model"
          mistral-pixtral-large-latest: "Mistral's most powerful vision capable model"
--- a/config/locales/server.en.yml
+++ b/config/locales/server.en.yml
@ -261,6 +261,7 @@ en:
    ai_bot:
      reply_error: "Sorry, it looks like our system encountered an unexpected issue while trying to reply.\n\n[details='Error details']\n%{details}\n[/details]"
      default_pm_prefix: "[Untitled AI bot PM]"
      thinking: "Thinking..."
      personas:
        default_llm_required: "Default LLM model is required prior to enabling Chat"
        cannot_delete_system_persona: "System personas cannot be deleted, please disable it instead"
--- a/discourse_automation/llm_report.rb
+++ b/discourse_automation/llm_report.rb
@ -37,8 +37,8 @@ if defined?(DiscourseAutomation)
    field :allow_secure_categories, component: :boolean
-    field :top_p, component: :text, required: true, default_value: 0.1
+    field :top_p, component: :text
-    field :temperature, component: :text, required: true, default_value: 0.2
+    field :temperature, component: :text
    field :suppress_notifications, component: :boolean
    field :debug_mode, component: :boolean
@ -64,12 +64,19 @@ if defined?(DiscourseAutomation)
        exclude_category_ids = fields.dig("exclude_categories", "value")
        exclude_tags = fields.dig("exclude_tags", "value")
-        # set defaults in code to support easy migration for old rules
+        top_p = fields.dig("top_p", "value")
-        top_p = 0.1
+        if top_p == "" || top_p.nil?
-        top_p = fields.dig("top_p", "value").to_f if fields.dig("top_p", "value")
+          top_p = nil
        else
          top_p = top_p.to_f
        end
-        temperature = 0.2
+        temperature = fields.dig("temperature", "value")
-        temperature = fields.dig("temperature", "value").to_f if fields.dig("temperature", "value")
+        if temperature == "" || temperature.nil?
          temperature = nil
        else
          temperature = temperature.to_f
        end
        suppress_notifications = !!fields.dig("suppress_notifications", "value")
        DiscourseAi::Automation::ReportRunner.run!(
--- a/discourse_automation/llm_triage.rb
+++ b/discourse_automation/llm_triage.rb
@ -24,6 +24,7 @@ if defined?(DiscourseAutomation)
    field :hide_topic, component: :boolean
    field :flag_post, component: :boolean
    field :include_personal_messages, component: :boolean
    field :temperature, component: :text
    field :flag_type,
          component: :choices,
          required: false,
@ -53,6 +54,12 @@ if defined?(DiscourseAutomation)
      flag_post = fields.dig("flag_post", "value")
      flag_type = fields.dig("flag_type", "value")
      max_post_tokens = fields.dig("max_post_tokens", "value").to_i
      temperature = fields.dig("temperature", "value")
      if temperature == "" || temperature.nil?
        temperature = nil
      else
        temperature = temperature.to_f
      end
      max_post_tokens = nil if max_post_tokens <= 0
@ -93,6 +100,7 @@ if defined?(DiscourseAutomation)
          max_post_tokens: max_post_tokens,
          stop_sequences: stop_sequences,
          automation: self.automation,
          temperature: temperature,
        )
      rescue => e
        Discourse.warn_exception(e, message: "llm_triage: skipped triage on post #{post.id}")
--- a/lib/ai_bot/artifact_update_strategies/diff.rb
+++ b/lib/ai_bot/artifact_update_strategies/diff.rb
@ -90,15 +90,45 @@ module DiscourseAi
        def extract_search_replace_blocks(content)
          return nil if content.blank? || content.to_s.strip.downcase.match?(/^\(?no changes?\)?$/m)
-          return [{ replace: content }] if !content.match?(/<<+\s*SEARCH/)
+          return [{ replace: content }] if !content.include?("<<< SEARCH")
          blocks = []
-          remaining = content
+          current_block = {}
          state = :initial
          search_lines = []
          replace_lines = []
-          pattern = /<<+\s*SEARCH\s*\n(.*?)\n=+\s*\n(.*?)\n>>+\s*REPLACE/m
+          content.each_line do |line|
-          while remaining =~ pattern
+            line = line.chomp
-            blocks << { search: $1.strip, replace: $2.strip }
+
-            remaining = $'
+            case state
            when :initial
              state = :collecting_search if line.match?(/^<<<* SEARCH/)
            when :collecting_search
              if line.start_with?("===")
                current_block[:search] = search_lines.join("\n").strip
                search_lines = []
                state = :collecting_replace
              else
                search_lines << line
              end
            when :collecting_replace
              if line.match?(/>>>* REPLACE/)
                current_block[:replace] = replace_lines.join("\n").strip
                replace_lines = []
                blocks << current_block
                current_block = {}
                state = :initial
              else
                replace_lines << line
              end
            end
          end
          # Handle any remaining block
          if state == :collecting_replace && !replace_lines.empty?
            current_block[:replace] = replace_lines.join("\n").strip
            blocks << current_block
          end
          blocks.empty? ? nil : blocks
@ -108,26 +138,50 @@ module DiscourseAi
          <<~PROMPT
            You are a web development expert generating precise search/replace changes for updating HTML, CSS, and JavaScript code.
-            Important rules:
+            CRITICAL RULES:
            1. Use EXACTLY this format for changes:
               <<<<<<< SEARCH
-               (first line of code to replace)
+               (code to replace)
               (other lines of code to avoid ambiguity)
               (last line of code to replace)
               =======
               (replacement code)
               >>>>>>> REPLACE
-            2. DO NOT modify the markers or add spaces around them
+
-            3. DO NOT add explanations or comments within sections
+            2. SEARCH blocks MUST be 8 lines or less. Break larger changes into multiple smaller search/replace blocks.
-            4. ONLY include [HTML], [CSS], and [JavaScript] sections if they have changes
+
-            5. HTML should not include <html>, <head>, or <body> tags, it is injected into a template
+            3. DO NOT modify the markers or add spaces around them.
-            6. When specifying a SEARCH block, ALWAYS keep it 8 lines or less, you will be interrupted and a retry will be required if you exceed this limit
+
-            7. NEVER EVER ask followup questions, ALL changes must be performed in a single response, you are consumed via an API, there is no opportunity for humans in the loop
+            4. DO NOT add explanations or comments within sections.
-            8. When performing a non-contiguous search, ALWAYS use ... to denote the skipped lines
+
-            9. Be mindful that ... non-contiguous search is not greedy, the following line will only match the first occurrence of the search block
+            5. ONLY include [HTML], [CSS], and [JavaScript] sections if they have changes.
-            10. Never mix a full section replacement with a search/replace block in the same section
+
-            11. ALWAYS skip sections you to not want to change, do not include them in the response
+            6. HTML should not include <html>, <head>, or <body> tags, it is injected into a template.
            7. NEVER EVER ask followup questions, ALL changes must be performed in a single response.
            8. When performing a non-contiguous search, ALWAYS use ... to denote the skipped lines.
            9. Be mindful that ... non-contiguous search is not greedy, it will only match the first occurrence.
            10. Never mix a full section replacement with a search/replace block in the same section.
            11. ALWAYS skip sections you do not want to change, do not include them in the response.
            HANDLING LARGE CHANGES:
            - Break large HTML structures into multiple smaller search/replace blocks.
            - Use strategic anchor points like unique IDs or class names to target specific elements.
            - Consider replacing entire components rather than modifying complex internals.
            - When elements contain dynamic content, use precise context markers or replace entire containers.
            VALIDATION CHECKLIST:
            - Each SEARCH block is 8 lines or less
            - Every SEARCH has exactly one matching REPLACE
            - All blocks are properly closed
            - No SEARCH/REPLACE blocks are nested
            - Each change is a complete, separate block with its own SEARCH/REPLACE markers
            WARNING: Never nest search/replace blocks. Each change must be a complete sequence.
            JavaScript libraries must be sourced from the following CDNs, otherwise CSP will reject it:
            #{AiArtifact::ALLOWED_CDN_SOURCES.join("\n")}
@ -143,7 +197,7 @@ module DiscourseAi
            (changes or empty if no changes or entire JavaScript)
            [/JavaScript]
-            Example - Multiple changes in one file:
+            EXAMPLE 1 - Multiple small changes in one file:
            [JavaScript]
            <<<<<<< SEARCH
@ -158,39 +212,35 @@ module DiscourseAi
            >>>>>>> REPLACE
            [/JavaScript]
-            Example - CSS with multiple blocks:
+            EXAMPLE 2 - Breaking up large HTML changes:
-            [CSS]
+            [HTML]
            <<<<<<< SEARCH
-            .button { color: blue; }
+            <div class="header">
              <div class="logo">
                <img src="old-logo.png">
              </div>
            =======
-            .button { color: red; }
+            <div class="header">
              <div class="logo">
                <img src="new-logo.png">
              </div>
            >>>>>>> REPLACE
            <<<<<<< SEARCH
-            .text { font-size: 12px; }
+              <div class="navigation">
                <ul>
                  <li>Home</li>
                  <li>Products</li>
            =======
-            .text { font-size: 16px; }
+              <div class="navigation">
                <ul>
                  <li>Home</li>
                  <li>Services</li>
            >>>>>>> REPLACE
-            [/CSS]
+            [/HTML]
-            Example - Non contiguous search in CSS (replace most CSS with new CSS)
+            EXAMPLE 3 - Non-contiguous search in CSS:
            Original CSS:
            [CSS]
            body {
              color: red;
            }
            .button {
              color: blue;
            }
            .alert {
              background-color: green;
            }
            .alert2 {
              background-color: green;
            }
            [/CSS]
            [CSS]
            <<<<<<< SEARCH
@ -203,23 +253,13 @@ module DiscourseAi
              color: red;
            }
            >>>>>>> REPLACE
            RESULT:
            [CSS]
            body {
              color: red;
            }
            .alert2 {
              background-color: green;
            }
            [/CSS]
-            Example - full HTML replacement:
+            EXAMPLE 4 - Full HTML replacement:
            [HTML]
            <div>something old</div>
-            <div>another somethin old</div>
+            <div>another something old</div>
            [/HTML]
            output:
@ -227,13 +267,6 @@ module DiscourseAi
            [HTML]
            <div>something new</div>
            [/HTML]
            result:
            [HTML]
            <div>something new</div>
            [/HTML]
          PROMPT
        end
--- a/lib/ai_bot/bot.rb
+++ b/lib/ai_bot/bot.rb
@ -6,8 +6,10 @@ module DiscourseAi
      attr_reader :model
      BOT_NOT_FOUND = Class.new(StandardError)
-      MAX_COMPLETIONS = 5
+      # the future is agentic, allow for more turns
-      MAX_TOOLS = 5
+      MAX_COMPLETIONS = 8
      # limit is arbitrary, but 5 which was used in the past was too low
      MAX_TOOLS = 20
      def self.as(bot_user, persona: DiscourseAi::AiBot::Personas::General.new, model: nil)
        new(bot_user, persona, model)
@ -111,12 +113,14 @@ module DiscourseAi
          allow_partial_tool_calls = persona.allow_partial_tool_calls?
          existing_tools = Set.new
          current_thinking = []
          result =
            llm.generate(
              prompt,
              feature_name: "bot",
              partial_tool_calls: allow_partial_tool_calls,
              output_thinking: true,
              **llm_kwargs,
            ) do |partial, cancel|
              tool =
@ -147,7 +151,17 @@ module DiscourseAi
                  needs_newlines = false
                end
-                process_tool(tool, raw_context, llm, cancel, update_blk, prompt, context)
+                process_tool(
                  tool: tool,
                  raw_context: raw_context,
                  llm: llm,
                  cancel: cancel,
                  update_blk: update_blk,
                  prompt: prompt,
                  context: context,
                  current_thinking: current_thinking,
                )
                tools_ran += 1
                ongoing_chain &&= tool.chain_next_response?
@ -157,28 +171,80 @@ module DiscourseAi
                needs_newlines = true
                if partial.is_a?(DiscourseAi::Completions::ToolCall)
                  Rails.logger.warn("DiscourseAi: Tool not found: #{partial.name}")
                else
                  if partial.is_a?(DiscourseAi::Completions::Thinking)
                    if partial.partial? && partial.message.present?
                      update_blk.call(partial.message, cancel, nil, :thinking)
                    end
                    if !partial.partial?
                      # this will be dealt with later
                      raw_context << partial
                      current_thinking << partial
                    end
                  else
                    update_blk.call(partial, cancel)
                  end
                end
              end
            end
          if !tool_found
            ongoing_chain = false
-            raw_context << [result, bot_user.username]
+            text = result
            # we must strip out thinking and other types of blocks
            if result.is_a?(Array)
              text = +""
              result.each { |item| text << item if item.is_a?(String) }
            end
            raw_context << [text, bot_user.username]
          end
          total_completions += 1
          # do not allow tools when we are at the end of a chain (total_completions == MAX_COMPLETIONS)
          prompt.tools = [] if total_completions == MAX_COMPLETIONS
        end
-        raw_context
+        embed_thinking(raw_context)
      end
      private
-      def process_tool(tool, raw_context, llm, cancel, update_blk, prompt, context)
+      def embed_thinking(raw_context)
        embedded_thinking = []
        thinking_info = nil
        raw_context.each do |context|
          if context.is_a?(DiscourseAi::Completions::Thinking)
            thinking_info ||= {}
            if context.redacted
              thinking_info[:redacted_thinking_signature] = context.signature
            else
              thinking_info[:thinking] = context.message
              thinking_info[:thinking_signature] = context.signature
            end
          else
            if thinking_info
              context = context.dup
              context[4] = thinking_info
            end
            embedded_thinking << context
          end
        end
        embedded_thinking
      end
      def process_tool(
        tool:,
        raw_context:,
        llm:,
        cancel:,
        update_blk:,
        prompt:,
        context:,
        current_thinking:
      )
        tool_call_id = tool.tool_call_id
        invocation_result_json = invoke_tool(tool, llm, cancel, context, &update_blk).to_json
@ -189,6 +255,17 @@ module DiscourseAi
          name: tool.name,
        }
        if current_thinking.present?
          current_thinking.each do |thinking|
            if thinking.redacted
              tool_call_message[:redacted_thinking_signature] = thinking.signature
            else
              tool_call_message[:thinking] = thinking.message
              tool_call_message[:thinking_signature] = thinking.signature
            end
          end
        end
        tool_message = {
          type: :tool,
          id: tool_call_id,
--- a/lib/ai_bot/playground.rb
+++ b/lib/ai_bot/playground.rb
@ -119,7 +119,7 @@ module DiscourseAi
          bot_user ||= User.find_by(id: mentioned[:user_id]) if mentioned
        end
-        if bot_user && post.reply_to_post_number && !post.reply_to_post.user&.bot?
+        if !mentioned && bot_user && post.reply_to_post_number && !post.reply_to_post.user&.bot?
          # replying to a non-bot user
          return
        end
@ -220,6 +220,9 @@ module DiscourseAi
                custom_context[:id] = message[1] if custom_context[:type] != :model
                custom_context[:name] = message[3] if message[3]
                thinking = message[4]
                custom_context[:thinking] = thinking if thinking
                builder.push(**custom_context)
              end
            end
@ -473,8 +476,20 @@ module DiscourseAi
        post_streamer = PostStreamer.new(delay: Rails.env.test? ? 0 : 0.5) if stream_reply
        started_thinking = false
        new_custom_prompts =
          bot.reply(context) do |partial, cancel, placeholder, type|
            if type == :thinking && !started_thinking
              reply << "<details><summary>#{I18n.t("discourse_ai.ai_bot.thinking")}</summary>"
              started_thinking = true
            end
            if type != :thinking && started_thinking
              reply << "</details>\n\n"
              started_thinking = false
            end
            reply << partial
            raw = reply.dup
            raw << "\n\n" << placeholder if placeholder.present?
@ -527,8 +542,10 @@ module DiscourseAi
            )
        end
-        # we do not need to add a custom prompt for a single reply
+        # a bit messy internally, but this is how we tell
-        if new_custom_prompts.length > 1
+        is_thinking = new_custom_prompts.any? { |prompt| prompt[4].present? }
        if is_thinking || new_custom_prompts.length > 1
          reply_post.post_custom_prompt ||= reply_post.build_post_custom_prompt(custom_prompt: [])
          prompt = reply_post.post_custom_prompt.custom_prompt || []
          prompt.concat(new_custom_prompts)
--- a/lib/ai_bot/tools/github_pull_request_diff.rb
+++ b/lib/ai_bot/tools/github_pull_request_diff.rb
@ -47,9 +47,22 @@ module DiscourseAi
          api_url = "https://api.github.com/repos/#{repo}/pulls/#{pull_id}"
          @url = "https://github.com/#{repo}/pull/#{pull_id}"
-          body = nil
+          pr_info = nil
          diff_body = nil
          response_code = "unknown error"
          send_http_request(
            api_url,
            headers: {
              "Accept" => "application/json",
            },
            authenticate_github: true,
          ) do |response|
            response_code = response.code
            pr_info = JSON.parse(read_response_body(response)) if response_code == "200"
          end
          if response_code == "200"
            send_http_request(
              api_url,
              headers: {
@ -58,16 +71,41 @@ module DiscourseAi
              authenticate_github: true,
            ) do |response|
              response_code = response.code
-            body = read_response_body(response)
+              diff_body = read_response_body(response)
            end
          end
-          if response_code == "200"
+          if response_code == "200" && pr_info && diff_body
-            diff = body
+            diff = diff_body
            diff = self.class.sort_and_shorten_diff(diff)
            diff = truncate(diff, max_length: 20_000, percent_length: 0.3, llm: llm)
-            { diff: diff }
+
            source_repo = pr_info.dig("head", "repo", "full_name")
            source_branch = pr_info.dig("head", "ref")
            source_sha = pr_info.dig("head", "sha")
            {
              diff: diff,
              pr_info: {
                title: pr_info["title"],
                state: pr_info["state"],
                source: {
                  repo: source_repo,
                  branch: source_branch,
                  sha: source_sha,
                  url: "https://github.com/#{source_repo}/tree/#{source_branch}",
                },
                target: {
                  repo: pr_info["base"]["repo"]["full_name"],
                  branch: pr_info["base"]["ref"],
                },
                author: pr_info["user"]["login"],
                created_at: pr_info["created_at"],
                updated_at: pr_info["updated_at"],
              },
            }
          else
-            { error: "Failed to retrieve the diff. Status code: #{response_code}" }
+            { error: "Failed to retrieve the PR information. Status code: #{response_code}" }
          end
        end
--- a/lib/automation/llm_triage.rb
+++ b/lib/automation/llm_triage.rb
@ -17,7 +17,8 @@ module DiscourseAi
        flag_type: nil,
        automation: nil,
        max_post_tokens: nil,
-        stop_sequences: nil
+        stop_sequences: nil,
        temperature: nil
      )
        if category_id.blank? && tags.blank? && canned_reply.blank? && hide_topic.blank? &&
             flag_post.blank?
@ -40,7 +41,7 @@ module DiscourseAi
        result =
          llm.generate(
            prompt,
-            temperature: 0,
+            temperature: temperature,
            max_tokens: 700, # ~500 words
            user: Discourse.system_user,
            stop_sequences: stop_sequences,
--- a/lib/automation/report_runner.rb
+++ b/lib/automation/report_runner.rb
@ -84,8 +84,8 @@ module DiscourseAi
        @top_p = top_p
        @temperature = temperature
-        @top_p = nil if top_p <= 0
+        @top_p = nil if top_p.to_f < 0
-        @temperature = nil if temperature <= 0
+        @temperature = nil if temperature.to_f < 0
        @suppress_notifications = suppress_notifications
        if !@topic_id && !@receivers.present? && !@email_receivers.present?
--- a/lib/completions/anthropic_message_processor.rb
+++ b/lib/completions/anthropic_message_processor.rb
@ -44,13 +44,15 @@ class DiscourseAi::Completions::AnthropicMessageProcessor
    end
  end
-  attr_reader :tool_calls, :input_tokens, :output_tokens
+  attr_reader :tool_calls, :input_tokens, :output_tokens, :output_thinking
-  def initialize(streaming_mode:, partial_tool_calls: false)
+  def initialize(streaming_mode:, partial_tool_calls: false, output_thinking: false)
    @streaming_mode = streaming_mode
    @tool_calls = []
    @current_tool_call = nil
    @partial_tool_calls = partial_tool_calls
    @output_thinking = output_thinking
    @thinking = nil
  end
  def to_tool_calls
@ -69,13 +71,48 @@ class DiscourseAi::Completions::AnthropicMessageProcessor
          tool_id,
          partial_tool_calls: @partial_tool_calls,
        ) if tool_name
    elsif parsed[:type] == "content_block_start" && parsed.dig(:content_block, :type) == "thinking"
      if @output_thinking
        @thinking =
          DiscourseAi::Completions::Thinking.new(
            message: +parsed.dig(:content_block, :thinking).to_s,
            signature: +"",
            partial: true,
          )
        result = @thinking.dup
      end
    elsif parsed[:type] == "content_block_delta" && parsed.dig(:delta, :type) == "thinking_delta"
      if @output_thinking
        delta = parsed.dig(:delta, :thinking)
        @thinking.message << delta if @thinking
        result = DiscourseAi::Completions::Thinking.new(message: delta, partial: true)
      end
    elsif parsed[:type] == "content_block_delta" && parsed.dig(:delta, :type) == "signature_delta"
      if @output_thinking
        @thinking.signature << parsed.dig(:delta, :signature) if @thinking
      end
    elsif parsed[:type] == "content_block_stop" && @thinking
      @thinking.partial = false
      result = @thinking
      @thinking = nil
    elsif parsed[:type] == "content_block_start" || parsed[:type] == "content_block_delta"
      if @current_tool_call
        tool_delta = parsed.dig(:delta, :partial_json).to_s
        @current_tool_call.append(tool_delta)
        result = @current_tool_call.partial_tool_call if @current_tool_call.has_partial?
      elsif parsed.dig(:content_block, :type) == "redacted_thinking"
        if @output_thinking
          result =
            DiscourseAi::Completions::Thinking.new(
              message: nil,
              signature: parsed.dig(:content_block, :data),
              redacted: true,
            )
        end
      else
        result = parsed.dig(:delta, :text).to_s
        # no need to return empty strings for streaming, no value
        result = nil if result == ""
      end
    elsif parsed[:type] == "content_block_stop"
      if @current_tool_call
@ -105,15 +142,32 @@ class DiscourseAi::Completions::AnthropicMessageProcessor
    content = parsed.dig(:content)
    if content.is_a?(Array)
      result =
-        content.map do |data|
+        content
          .map do |data|
            if data[:type] == "tool_use"
              call = AnthropicToolCall.new(data[:name], data[:id])
              call.append(data[:input].to_json)
              call.to_tool_call
            elsif data[:type] == "thinking"
              if @output_thinking
                DiscourseAi::Completions::Thinking.new(
                  message: data[:thinking],
                  signature: data[:signature],
                )
              end
            elsif data[:type] == "redacted_thinking"
              if @output_thinking
                DiscourseAi::Completions::Thinking.new(
                  message: nil,
                  signature: data[:data],
                  redacted: true,
                )
              end
            else
              data[:text]
            end
          end
          .compact
    end
    @input_tokens = parsed.dig(:usage, :input_tokens)
--- a/lib/completions/dialects/claude.rb
+++ b/lib/completions/dialects/claude.rb
@ -87,8 +87,31 @@ module DiscourseAi
        end
        def model_msg(msg)
          if msg[:thinking] || msg[:redacted_thinking_signature]
            content_array = []
            if msg[:thinking]
              content_array << {
                type: "thinking",
                thinking: msg[:thinking],
                signature: msg[:thinking_signature],
              }
            end
            if msg[:redacted_thinking_signature]
              content_array << {
                type: "redacted_thinking",
                data: msg[:redacted_thinking_signature],
              }
            end
            content_array << { type: "text", text: msg[:content] }
            { role: "assistant", content: content_array }
          else
            { role: "assistant", content: msg[:content] }
          end
        end
        def system_msg(msg)
          msg = { role: "system", content: msg[:content] }
--- a/lib/completions/dialects/claude_tools.rb
+++ b/lib/completions/dialects/claude_tools.rb
@ -45,15 +45,35 @@ module DiscourseAi
        def from_raw_tool_call(raw_message)
          call_details = JSON.parse(raw_message[:content], symbolize_names: true)
          result = []
          if raw_message[:thinking] || raw_message[:redacted_thinking_signature]
            if raw_message[:thinking]
              result << {
                type: "thinking",
                thinking: raw_message[:thinking],
                signature: raw_message[:thinking_signature],
              }
            end
            if raw_message[:redacted_thinking_signature]
              result << {
                type: "redacted_thinking",
                data: raw_message[:redacted_thinking_signature],
              }
            end
          end
          tool_call_id = raw_message[:id]
-          [
+
-            {
+          result << {
            type: "tool_use",
            id: tool_call_id,
            name: raw_message[:name],
            input: call_details[:arguments],
-            },
+          }
-          ]
+
          result
        end
        def from_raw_tool(raw_message)
--- a/lib/completions/endpoints/anthropic.rb
+++ b/lib/completions/endpoints/anthropic.rb
@ -34,13 +34,15 @@ module DiscourseAi
          # Note: Anthropic requires this param
          max_tokens = 4096
-          max_tokens = 8192 if mapped_model.match?(/3.5/)
+          # 3.5 and 3.7 models have a higher token limit
          max_tokens = 8192 if mapped_model.match?(/3.[57]/)
          options = { model: mapped_model, max_tokens: max_tokens }
          # reasoning has even higher token limits
          if llm_model.lookup_custom_param("enable_reasoning")
            reasoning_tokens =
-              llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536)
+              llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(1024, 32_768)
            # this allows for lots of tokens beyond reasoning
            options[:max_tokens] = reasoning_tokens + 30_000
@ -123,6 +125,7 @@ module DiscourseAi
            DiscourseAi::Completions::AnthropicMessageProcessor.new(
              streaming_mode: @streaming_mode,
              partial_tool_calls: partial_tool_calls,
              output_thinking: output_thinking,
            )
        end
--- a/lib/completions/endpoints/aws_bedrock.rb
+++ b/lib/completions/endpoints/aws_bedrock.rb
@ -24,12 +24,14 @@ module DiscourseAi
          options =
            if dialect.is_a?(DiscourseAi::Completions::Dialects::Claude)
              max_tokens = 4096
-              max_tokens = 8192 if bedrock_model_id.match?(/3.5/)
+              max_tokens = 8192 if bedrock_model_id.match?(/3.[57]/)
              result = { anthropic_version: "bedrock-2023-05-31" }
              if llm_model.lookup_custom_param("enable_reasoning")
                # we require special headers to go over 64k output tokens, lets
                # wait for feature requests before enabling this
                reasoning_tokens =
-                  llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(100, 65_536)
+                  llm_model.lookup_custom_param("reasoning_tokens").to_i.clamp(1024, 32_768)
                # this allows for ample tokens beyond reasoning
                max_tokens = reasoning_tokens + 30_000
--- a/lib/completions/endpoints/base.rb
+++ b/lib/completions/endpoints/base.rb
@ -4,7 +4,7 @@ module DiscourseAi
  module Completions
    module Endpoints
      class Base
-        attr_reader :partial_tool_calls
+        attr_reader :partial_tool_calls, :output_thinking
        CompletionFailed = Class.new(StandardError)
        # 6 minutes
@ -67,12 +67,15 @@ module DiscourseAi
          feature_name: nil,
          feature_context: nil,
          partial_tool_calls: false,
          output_thinking: false,
          &blk
        )
          LlmQuota.check_quotas!(@llm_model, user)
          start_time = Time.now
          @partial_tool_calls = partial_tool_calls
          @output_thinking = output_thinking
          model_params = normalize_model_params(model_params)
          orig_blk = blk
@ -85,6 +88,7 @@ module DiscourseAi
                feature_name: feature_name,
                feature_context: feature_context,
                partial_tool_calls: partial_tool_calls,
                output_thinking: output_thinking,
              )
            wrapped = result
--- a/lib/completions/endpoints/canned_response.rb
+++ b/lib/completions/endpoints/canned_response.rb
@ -29,7 +29,8 @@ module DiscourseAi
          model_params,
          feature_name: nil,
          feature_context: nil,
-          partial_tool_calls: false
+          partial_tool_calls: false,
          output_thinking: false
        )
          @dialect = dialect
          @model_params = model_params
@ -51,6 +52,8 @@ module DiscourseAi
            as_array.each do |response|
              if is_tool?(response)
                yield(response, cancel_fn)
              elsif is_thinking?(response)
                yield(response, cancel_fn)
              else
                response.each_char do |char|
                  break if cancelled
@ -70,6 +73,10 @@ module DiscourseAi
        private
        def is_thinking?(response)
          response.is_a?(DiscourseAi::Completions::Thinking)
        end
        def is_tool?(response)
          response.is_a?(DiscourseAi::Completions::ToolCall)
        end
--- a/lib/completions/endpoints/fake.rb
+++ b/lib/completions/endpoints/fake.rb
@ -121,7 +121,8 @@ module DiscourseAi
          model_params = {},
          feature_name: nil,
          feature_context: nil,
-          partial_tool_calls: false
+          partial_tool_calls: false,
          output_thinking: false
        )
          last_call = { dialect: dialect, user: user, model_params: model_params }
          self.class.last_call = last_call
--- a/lib/completions/endpoints/open_ai.rb
+++ b/lib/completions/endpoints/open_ai.rb
@ -42,6 +42,7 @@ module DiscourseAi
          feature_name: nil,
          feature_context: nil,
          partial_tool_calls: false,
          output_thinking: false,
          &blk
        )
          @disable_native_tools = dialect.disable_native_tools?
--- a/lib/completions/llm.rb
+++ b/lib/completions/llm.rb
@ -172,8 +172,9 @@ module DiscourseAi
          @canned_response = DiscourseAi::Completions::Endpoints::CannedResponse.new(responses)
          @canned_llm = llm
          @prompts = []
          @prompt_options = []
-          yield(@canned_response, llm, @prompts)
+          yield(@canned_response, llm, @prompts, @prompt_options)
        ensure
          # Don't leak prepared response if there's an exception.
          @canned_response = nil
@ -181,8 +182,13 @@ module DiscourseAi
          @prompts = nil
        end
-        def record_prompt(prompt)
+        def record_prompt(prompt, options)
          @prompts << prompt.dup if @prompts
          @prompt_options << options if @prompt_options
        end
        def prompt_options
          @prompt_options
        end
        def prompts
@ -234,6 +240,7 @@ module DiscourseAi
      # @param feature_name { String - Optional } - The feature name to use for the completion.
      # @param feature_context { Hash - Optional } - The feature context to use for the completion.
      # @param partial_tool_calls { Boolean - Optional } - If true, the completion will return partial tool calls.
      # @param output_thinking { Boolean - Optional } - If true, the completion will return the thinking output for thinking models.
      #
      # @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function.
      #
@ -250,9 +257,23 @@ module DiscourseAi
        feature_name: nil,
        feature_context: nil,
        partial_tool_calls: false,
        output_thinking: false,
        &partial_read_blk
      )
-        self.class.record_prompt(prompt)
+        self.class.record_prompt(
          prompt,
          {
            temperature: temperature,
            top_p: top_p,
            max_tokens: max_tokens,
            stop_sequences: stop_sequences,
            user: user,
            feature_name: feature_name,
            feature_context: feature_context,
            partial_tool_calls: partial_tool_calls,
            output_thinking: output_thinking,
          },
        )
        model_params = { max_tokens: max_tokens, stop_sequences: stop_sequences }
@ -285,6 +306,7 @@ module DiscourseAi
          feature_name: feature_name,
          feature_context: feature_context,
          partial_tool_calls: partial_tool_calls,
          output_thinking: output_thinking,
          &partial_read_blk
        )
      end
--- a/lib/completions/prompt.rb
+++ b/lib/completions/prompt.rb
@ -41,12 +41,26 @@ module DiscourseAi
        @tool_choice = tool_choice
      end
-      def push(type:, content:, id: nil, name: nil, upload_ids: nil)
+      def push(
        type:,
        content:,
        id: nil,
        name: nil,
        upload_ids: nil,
        thinking: nil,
        thinking_signature: nil,
        redacted_thinking_signature: nil
      )
        return if type == :system
        new_message = { type: type, content: content }
        new_message[:name] = name.to_s if name
        new_message[:id] = id.to_s if id
        new_message[:upload_ids] = upload_ids if upload_ids
        new_message[:thinking] = thinking if thinking
        new_message[:thinking_signature] = thinking_signature if thinking_signature
        new_message[
          :redacted_thinking_signature
        ] = redacted_thinking_signature if redacted_thinking_signature
        validate_message(new_message)
        validate_turn(messages.last, new_message)
@ -73,7 +87,16 @@ module DiscourseAi
          raise ArgumentError, "message type must be one of #{valid_types}"
        end
-        valid_keys = %i[type content id name upload_ids]
+        valid_keys = %i[
          type
          content
          id
          name
          upload_ids
          thinking
          thinking_signature
          redacted_thinking_signature
        ]
        if (invalid_keys = message.keys - valid_keys).any?
          raise ArgumentError, "message contains invalid keys: #{invalid_keys}"
        end
--- a/lib/completions/prompt_messages_builder.rb
+++ b/lib/completions/prompt_messages_builder.rb
@ -102,7 +102,7 @@ module DiscourseAi
        end
      end
-      def push(type:, content:, name: nil, upload_ids: nil, id: nil)
+      def push(type:, content:, name: nil, upload_ids: nil, id: nil, thinking: nil)
        if !%i[user model tool tool_call system].include?(type)
          raise ArgumentError, "type must be either :user, :model, :tool, :tool_call or :system"
        end
@ -112,6 +112,15 @@ module DiscourseAi
        message[:name] = name.to_s if name
        message[:upload_ids] = upload_ids if upload_ids
        message[:id] = id.to_s if id
        if thinking
          message[:thinking] = thinking["thinking"] if thinking["thinking"]
          message[:thinking_signature] = thinking["thinking_signature"] if thinking[
            "thinking_signature"
          ]
          message[:redacted_thinking_signature] = thinking[
            "redacted_thinking_signature"
          ] if thinking["redacted_thinking_signature"]
        end
        @raw_messages << message
      end
--- a/lib/completions/thinking.rb
+++ b/lib/completions/thinking.rb
@ -0,0 +1,38 @@
 # frozen_string_literal: true
 module DiscourseAi
  module Completions
    class Thinking
      attr_accessor :message, :signature, :redacted, :partial
      def initialize(message:, signature: nil, redacted: false, partial: false)
        @message = message
        @signature = signature
        @redacted = redacted
        @partial = partial
      end
      def partial?
        !!@partial
      end
      def ==(other)
        message == other.message && signature == other.signature && redacted == other.redacted &&
          partial == other.partial
      end
      def dup
        Thinking.new(
          message: message.dup,
          signature: signature.dup,
          redacted: redacted,
          partial: partial,
        )
      end
      def to_s
        "#{message} - #{signature} - #{redacted} - #{partial}"
      end
    end
  end
 end
--- a/spec/lib/completions/anthropic_message_processor_spec.rb
+++ b/spec/lib/completions/anthropic_message_processor_spec.rb
@ -0,0 +1,73 @@
 # frozen_string_literal: true
 describe DiscourseAi::Completions::AnthropicMessageProcessor do
  it "correctly handles and combines partial thinking chunks into complete thinking objects" do
    processor =
      DiscourseAi::Completions::AnthropicMessageProcessor.new(
        streaming_mode: true,
        partial_tool_calls: false,
        output_thinking: true,
      )
    # Simulate streaming thinking output in multiple chunks
    result1 =
      processor.process_streamed_message(
        { type: "content_block_start", content_block: { type: "thinking", thinking: "" } },
      )
    result2 =
      processor.process_streamed_message(
        {
          type: "content_block_delta",
          delta: {
            type: "thinking_delta",
            thinking: "First part of thinking",
          },
        },
      )
    result3 =
      processor.process_streamed_message(
        {
          type: "content_block_delta",
          delta: {
            type: "thinking_delta",
            thinking: " and second part",
          },
        },
      )
    _result4 =
      processor.process_streamed_message(
        {
          type: "content_block_delta",
          delta: {
            type: "signature_delta",
            signature: "thinking-sig-123",
          },
        },
      )
    # Finish the thinking block
    final_result = processor.process_streamed_message({ type: "content_block_stop" })
    # Verify the partial thinking chunks
    expect(result1).to be_a(DiscourseAi::Completions::Thinking)
    expect(result1.message).to eq("")
    expect(result1.partial?).to eq(true)
    expect(result2).to be_a(DiscourseAi::Completions::Thinking)
    expect(result2.message).to eq("First part of thinking")
    expect(result2.partial?).to eq(true)
    expect(result3).to be_a(DiscourseAi::Completions::Thinking)
    expect(result3.message).to eq(" and second part")
    expect(result3.partial?).to eq(true)
    # Verify the final complete thinking object
    expect(final_result).to be_a(DiscourseAi::Completions::Thinking)
    expect(final_result.message).to eq("First part of thinking and second part")
    expect(final_result.signature).to eq("thinking-sig-123")
    expect(final_result.partial?).to eq(false)
  end
 end
--- a/spec/lib/completions/endpoints/anthropic_spec.rb
+++ b/spec/lib/completions/endpoints/anthropic_spec.rb
@ -449,4 +449,219 @@ RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do
    expect(log.request_tokens).to eq(10)
    expect(log.response_tokens).to eq(25)
  end
  it "can send through thinking tokens via a completion prompt" do
    body = {
      id: "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY",
      type: "message",
      role: "assistant",
      content: [{ type: "text", text: "world" }],
      model: "claude-3-7-sonnet-20250219",
      stop_reason: "end_turn",
      usage: {
        input_tokens: 25,
        output_tokens: 40,
      },
    }.to_json
    parsed_body = nil
    stub_request(:post, url).with(
      body: ->(req_body) { parsed_body = JSON.parse(req_body) },
      headers: {
        "Content-Type" => "application/json",
        "X-Api-Key" => "123",
        "Anthropic-Version" => "2023-06-01",
      },
    ).to_return(status: 200, body: body)
    prompt = DiscourseAi::Completions::Prompt.new("system prompt")
    prompt.push(type: :user, content: "hello")
    prompt.push(
      type: :model,
      id: "user1",
      content: "hello",
      thinking: "I am thinking",
      thinking_signature: "signature",
      redacted_thinking_signature: "redacted_signature",
    )
    result = llm.generate(prompt, user: Discourse.system_user)
    expect(result).to eq("world")
    expected_body = {
      "model" => "claude-3-opus-20240229",
      "max_tokens" => 4096,
      "messages" => [
        { "role" => "user", "content" => "hello" },
        {
          "role" => "assistant",
          "content" => [
            { "type" => "thinking", "thinking" => "I am thinking", "signature" => "signature" },
            { "type" => "redacted_thinking", "data" => "redacted_signature" },
            { "type" => "text", "text" => "hello" },
          ],
        },
      ],
      "system" => "system prompt",
    }
    expect(parsed_body).to eq(expected_body)
  end
  it "can handle a response with thinking blocks in non-streaming mode" do
    body = {
      id: "msg_1nZdL29xx5MUA1yADyHTEsnR8uuvGzszyY",
      type: "message",
      role: "assistant",
      content: [
        {
          type: "thinking",
          thinking: "This is my thinking process about prime numbers...",
          signature: "abc123signature",
        },
        { type: "redacted_thinking", data: "abd456signature" },
        { type: "text", text: "Yes, there are infinitely many prime numbers where n mod 4 = 3." },
      ],
      model: "claude-3-7-sonnet-20250219",
      stop_reason: "end_turn",
      usage: {
        input_tokens: 25,
        output_tokens: 40,
      },
    }.to_json
    stub_request(:post, url).with(
      headers: {
        "Content-Type" => "application/json",
        "X-Api-Key" => "123",
        "Anthropic-Version" => "2023-06-01",
      },
    ).to_return(status: 200, body: body)
    result =
      llm.generate(
        "hello",
        user: Discourse.system_user,
        feature_name: "testing",
        output_thinking: true,
      )
    # Result should be an array with both thinking and text content
    expect(result).to be_an(Array)
    expect(result.length).to eq(3)
    # First item should be a Thinking object
    expect(result[0]).to be_a(DiscourseAi::Completions::Thinking)
    expect(result[0].message).to eq("This is my thinking process about prime numbers...")
    expect(result[0].signature).to eq("abc123signature")
    expect(result[1]).to be_a(DiscourseAi::Completions::Thinking)
    expect(result[1].signature).to eq("abd456signature")
    expect(result[1].redacted).to eq(true)
    # Second item should be the text response
    expect(result[2]).to eq("Yes, there are infinitely many prime numbers where n mod 4 = 3.")
    # Verify audit log
    log = AiApiAuditLog.order(:id).last
    expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
    expect(log.feature_name).to eq("testing")
    expect(log.response_tokens).to eq(40)
  end
  it "can stream a response with thinking blocks" do
    body = (<<~STRING).strip
      event: message_start
      data: {"type": "message_start", "message": {"id": "msg_01...", "type": "message", "role": "assistant", "content": [], "model": "claude-3-opus-20240229", "stop_reason": null, "stop_sequence": null, "usage": {"input_tokens": 25}}}
      event: content_block_start
      data: {"type": "content_block_start", "index": 0, "content_block": {"type": "thinking", "thinking": ""}}
      event: content_block_delta
      data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "Let me solve this step by step:\\n\\n1. First break down 27 * 453"}}
      event: content_block_delta
      data: {"type": "content_block_delta", "index": 0, "delta": {"type": "thinking_delta", "thinking": "\\n2. 453 = 400 + 50 + 3"}}
      event: content_block_delta
      data: {"type": "content_block_delta", "index": 0, "delta": {"type": "signature_delta", "signature": "EqQBCgIYAhIM1gbcDa9GJwZA2b3hGgxBdjrkzLoky3dl1pkiMOYds..."}}
      event: content_block_stop
      data: {"type": "content_block_stop", "index": 0}
      event: content_block_start
 data: {"type":"content_block_start","index":0,"content_block":{"type":"redacted_thinking","data":"AAA=="} }
      event: ping
      data: {"type": "ping"}
      event: content_block_stop
      data: {"type":"content_block_stop","index":0 }
      event: content_block_start
      data: {"type": "content_block_start", "index": 1, "content_block": {"type": "text", "text": ""}}
      event: content_block_delta
      data: {"type": "content_block_delta", "index": 1, "delta": {"type": "text_delta", "text": "27 * 453 = 12,231"}}
      event: content_block_stop
      data: {"type": "content_block_stop", "index": 1}
      event: message_delta
      data: {"type": "message_delta", "delta": {"stop_reason": "end_turn", "stop_sequence": null, "usage": {"output_tokens": 30}}}
      event: message_stop
      data: {"type": "message_stop"}
    STRING
    parsed_body = nil
    stub_request(:post, url).with(
      headers: {
        "Content-Type" => "application/json",
        "X-Api-Key" => "123",
        "Anthropic-Version" => "2023-06-01",
      },
    ).to_return(status: 200, body: body)
    thinking_chunks = []
    text_chunks = []
    llm.generate(
      "hello there",
      user: Discourse.system_user,
      feature_name: "testing",
      output_thinking: true,
    ) do |partial, cancel|
      if partial.is_a?(DiscourseAi::Completions::Thinking)
        thinking_chunks << partial
      else
        text_chunks << partial
      end
    end
    expected_thinking = [
      DiscourseAi::Completions::Thinking.new(message: "", signature: "", partial: true),
      DiscourseAi::Completions::Thinking.new(
        message: "Let me solve this step by step:\n\n1. First break down 27 * 453",
        partial: true,
      ),
      DiscourseAi::Completions::Thinking.new(message: "\n2. 453 = 400 + 50 + 3", partial: true),
      DiscourseAi::Completions::Thinking.new(
        message:
          "Let me solve this step by step:\n\n1. First break down 27 * 453\n2. 453 = 400 + 50 + 3",
        signature: "EqQBCgIYAhIM1gbcDa9GJwZA2b3hGgxBdjrkzLoky3dl1pkiMOYds...",
        partial: false,
      ),
      DiscourseAi::Completions::Thinking.new(message: nil, signature: "AAA==", redacted: true),
    ]
    expect(thinking_chunks).to eq(expected_thinking)
    expect(text_chunks).to eq(["27 * 453 = 12,231"])
    log = AiApiAuditLog.order(:id).last
    expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
    expect(log.feature_name).to eq("testing")
    expect(log.response_tokens).to eq(30)
  end
 end
--- a/spec/lib/discourse_automation/llm_triage_spec.rb
+++ b/spec/lib/discourse_automation/llm_triage_spec.rb
@ -94,13 +94,20 @@ describe DiscourseAi::Automation::LlmTriage do
    # PM
    reply_user.update!(admin: true)
    add_automation_field("include_personal_messages", true, type: :boolean)
    add_automation_field("temperature", "0.2")
    post = Fabricate(:post, topic: personal_message)
-    DiscourseAi::Completions::Llm.with_prepared_responses(["bad"]) do
+    prompt_options = nil
    DiscourseAi::Completions::Llm.with_prepared_responses(
      ["bad"],
    ) do |_resp, _llm, _prompts, _prompt_options|
      automation.running_in_background!
      automation.trigger!({ "post" => post })
      prompt_options = _prompt_options.first
    end
    expect(prompt_options[:temperature]).to eq(0.2)
    last_post = post.topic.reload.posts.order(:post_number).last
    expect(last_post.raw).to eq(canned_reply_text)
  end
--- a/spec/lib/modules/ai_bot/artifact_update_strategies/diff_spec.rb
+++ b/spec/lib/modules/ai_bot/artifact_update_strategies/diff_spec.rb
@ -108,6 +108,34 @@ RSpec.describe DiscourseAi::AiBot::ArtifactUpdateStrategies::Diff do
      expect(artifact.versions.last.css).to eq("body {\n  color: red;\n}")
    end
    it "can handle removal with blank blocks" do
      original_css = <<~CSS
        body {
          color: red;
        }
        .button {
          color: blue;
        }
      CSS
      artifact.update!(css: original_css)
      response = <<~RESPONSE
        [CSS]
        <<<<<<< SEARCH
        body {
          color: red;
        }
        =======
        >>>>>>> REPLACE
        [/CSS]
      RESPONSE
      DiscourseAi::Completions::Llm.with_prepared_responses([response]) { strategy.apply }
      expect(artifact.versions.last.css.strip).to eq(".button {\n  color: blue;\n}")
    end
    it "tracks failed searches" do
      original_css = ".button { color: blue; }"
      artifact.update!(css: original_css)
--- a/spec/lib/modules/ai_bot/playground_spec.rb
+++ b/spec/lib/modules/ai_bot/playground_spec.rb
@ -828,6 +828,61 @@ RSpec.describe DiscourseAi::AiBot::Playground do
  end
  describe "#reply_to" do
    it "preserves thinking context between replies and correctly renders" do
      thinking_progress =
        DiscourseAi::Completions::Thinking.new(message: "I should say hello", partial: true)
      thinking =
        DiscourseAi::Completions::Thinking.new(
          message: "I should say hello",
          signature: "thinking-signature-123",
          partial: false,
        )
      thinking_redacted =
        DiscourseAi::Completions::Thinking.new(
          message: nil,
          signature: "thinking-redacted-signature-123",
          partial: false,
          redacted: true,
        )
      first_responses = [[thinking_progress, thinking, thinking_redacted, "Hello Sam"]]
      DiscourseAi::Completions::Llm.with_prepared_responses(first_responses) do
        playground.reply_to(third_post)
      end
      new_post = third_post.topic.reload.posts.order(:post_number).last
      # confirm message is there
      expect(new_post.raw).to include("Hello Sam")
      # confirm thinking is there
      expect(new_post.raw).to include("I should say hello")
      post = Fabricate(:post, topic: third_post.topic, user: user, raw: "Say Cat")
      prompt_detail = nil
      # Capture the prompt to verify thinking context was included
      DiscourseAi::Completions::Llm.with_prepared_responses(["Cat"]) do |_, _, prompts|
        playground.reply_to(post)
        prompt_detail = prompts.first
      end
      last_messages = prompt_detail.messages.last(2)
      expect(last_messages).to eq(
        [
          {
            type: :model,
            content: "Hello Sam",
            thinking: "I should say hello",
            thinking_signature: "thinking-signature-123",
            redacted_thinking_signature: "thinking-redacted-signature-123",
          },
          { type: :user, content: "Say Cat", id: user.username },
        ],
      )
    end
    it "streams the bot reply through MB and create a new post in the PM with a cooked responses" do
      expected_bot_response =
        "Hello this is a bot and what you just said is an interesting question"
--- a/spec/lib/modules/ai_bot/tools/github_pull_request_diff_spec.rb
+++ b/spec/lib/modules/ai_bot/tools/github_pull_request_diff_spec.rb
@ -57,7 +57,39 @@ RSpec.describe DiscourseAi::AiBot::Tools::GithubPullRequestDiff do
      -module DiscourseAutomation
    DIFF
-    it "retrieves the diff for the pull request" do
+    let(:pr_info) do
      {
        "title" => "Test PR",
        "state" => "open",
        "user" => {
          "login" => "test-user",
        },
        "created_at" => "2023-01-01T00:00:00Z",
        "updated_at" => "2023-01-02T00:00:00Z",
        "head" => {
          "repo" => {
            "full_name" => "test/repo",
          },
          "ref" => "feature-branch",
          "sha" => "abc123",
        },
        "base" => {
          "repo" => {
            "full_name" => "main/repo",
          },
          "ref" => "main",
        },
      }
    end
    it "retrieves both PR info and diff" do
      stub_request(:get, "https://api.github.com/repos/#{repo}/pulls/#{pull_id}").with(
        headers: {
          "Accept" => "application/json",
          "User-Agent" => DiscourseAi::AiBot::USER_AGENT,
        },
      ).to_return(status: 200, body: pr_info.to_json)
      stub_request(:get, "https://api.github.com/repos/#{repo}/pulls/#{pull_id}").with(
        headers: {
          "Accept" => "application/vnd.github.v3.diff",
@ -67,12 +99,21 @@ RSpec.describe DiscourseAi::AiBot::Tools::GithubPullRequestDiff do
      result = tool.invoke
      expect(result[:diff]).to eq(diff)
      expect(result[:pr_info]).to include(title: "Test PR", state: "open", author: "test-user")
      expect(result[:error]).to be_nil
    end
    it "uses the github access token if present" do
      SiteSetting.ai_bot_github_access_token = "ABC"
      stub_request(:get, "https://api.github.com/repos/#{repo}/pulls/#{pull_id}").with(
        headers: {
          "Accept" => "application/json",
          "User-Agent" => DiscourseAi::AiBot::USER_AGENT,
          "Authorization" => "Bearer ABC",
        },
      ).to_return(status: 200, body: pr_info.to_json)
      stub_request(:get, "https://api.github.com/repos/#{repo}/pulls/#{pull_id}").with(
        headers: {
          "Accept" => "application/vnd.github.v3.diff",
@ -94,14 +135,14 @@ RSpec.describe DiscourseAi::AiBot::Tools::GithubPullRequestDiff do
    it "returns an error message" do
      stub_request(:get, "https://api.github.com/repos/#{repo}/pulls/#{pull_id}").with(
        headers: {
-          "Accept" => "application/vnd.github.v3.diff",
+          "Accept" => "application/json",
          "User-Agent" => DiscourseAi::AiBot::USER_AGENT,
        },
      ).to_return(status: 404)
      result = tool.invoke
      expect(result[:diff]).to be_nil
-      expect(result[:error]).to include("Failed to retrieve the diff")
+      expect(result[:error]).to include("Failed to retrieve the PR information")
    end
  end
 end