discourse-ai/lib/ai_bot/tools/summarize.rb

#frozen_string_literal: true

module DiscourseAi
  module AiBot
    module Tools
      class Summarize < Tool
        def self.signature
          {
            name: name,
            description: "Will summarize a topic attempting to answer question in guidance",
            parameters: [
              {
                name: "topic_id",
                description: "The discourse topic id to summarize",
                type: "integer",
                required: true,
              },
              {
                name: "guidance",
                description: "Special guidance on how to summarize the topic",
                type: "string",
              },
            ],
          }
        end

        def self.name
          "summary"
        end

        def topic_id
          parameters[:topic_id].to_i
        end

        def guidance
          parameters[:guidance]
        end

        def chain_next_response?
          false
        end

        def standalone?
          true
        end

        def low_cost?
          true
        end

        def custom_raw
          @last_summary || I18n.t("discourse_ai.ai_bot.topic_not_found")
        end

        def invoke(bot_user, llm, &progress_blk)
          topic = nil
          if topic_id > 0
            topic = Topic.find_by(id: topic_id)
            topic = nil if !topic || !Guardian.new.can_see?(topic)
          end

          @last_summary = nil

          if topic
            @last_topic_title = topic.title

            posts =
              Post
                .where(topic_id: topic.id)
                .where("post_type in (?)", [Post.types[:regular], Post.types[:small_action]])
                .where("not hidden")
                .order(:post_number)

            columns = ["posts.id", :post_number, :raw, :username]

            current_post_numbers = posts.limit(5).pluck(:post_number)
            current_post_numbers += posts.reorder("posts.score desc").limit(50).pluck(:post_number)
            current_post_numbers += posts.reorder("post_number desc").limit(5).pluck(:post_number)

            data =
              Post
                .where(topic_id: topic.id)
                .joins(:user)
                .where("post_number in (?)", current_post_numbers)
                .order(:post_number)
                .pluck(*columns)

            @last_summary = summarize(data, topic, guidance, bot_user, llm, &progress_blk)
          end

          if !@last_summary
            "Say: No topic found!"
          else
            "Topic summarized"
          end
        end

        protected

        def description_args
          { url: "#{Discourse.base_path}/t/-/#{@last_topic_id}", title: @last_topic_title || "" }
        end

        private

        def summarize(data, topic, guidance, bot_user, llm, &progress_blk)
          text = +""
          data.each do |id, post_number, raw, username|
            text << "(#{post_number} #{username} said: #{raw}"
          end

          summaries = []
          current_section = +""
          split = []

          text
            .split(/\s+/)
            .each_slice(20) do |slice|
              current_section << " "
              current_section << slice.join(" ")

              # somehow any more will get closer to limits
              if llm.tokenizer.tokenize(current_section).length > 2500
                split << current_section
                current_section = +""
              end
            end

          split << current_section if current_section.present?

          split = split[0..3] + split[-3..-1] if split.length > 5

          progress = +I18n.t("discourse_ai.ai_bot.summarizing")
          progress_blk.call(progress)

          split.each do |section|
            progress << "."
            progress_blk.call(progress)

            prompt = section_prompt(topic, section, guidance)

            summary = llm.generate(prompt, temperature: 0.6, max_tokens: 400, user: bot_user)

            summaries << summary
          end

          if summaries.length > 1
            progress << "."
            progress_blk.call(progress)

            contatenation_prompt = {
              insts: "You are a helpful bot",
              input:
                "concatenated the disjoint summaries, creating a cohesive narrative:\n#{summaries.join("\n")}}",
            }

            llm.generate(contatenation_prompt, temperature: 0.6, max_tokens: 500, user: bot_user)
          else
            summaries.first
          end
        end

        def section_prompt(topic, text, guidance)
          insts = <<~TEXT
          You are a summarization bot.
          You effectively summarise any text.
          You condense it into a shorter version.
          You understand and generate Discourse forum markdown.
          Try generating links as well the format is #{topic.url}/POST_NUMBER. eg: [ref](#{topic.url}/77)
          TEXT

          { insts: insts, input: <<~TEXT }
            Guidance: #{guidance}
            You are summarizing the topic: #{topic.title}
            Summarize the following in 400 words:

            #{text}
            TEXT
        end
      end
    end
  end
end