discourse-ai/lib/ai_bot/tools/summarize.rb

187 lines
5.2 KiB
Ruby

#frozen_string_literal: true
module DiscourseAi
module AiBot
module Tools
class Summarize < Tool
def self.signature
{
name: name,
description: "Will summarize a topic attempting to answer question in guidance",
parameters: [
{
name: "topic_id",
description: "The discourse topic id to summarize",
type: "integer",
required: true,
},
{
name: "guidance",
description: "Special guidance on how to summarize the topic",
type: "string",
},
],
}
end
def self.name
"summary"
end
def topic_id
parameters[:topic_id].to_i
end
def guidance
parameters[:guidance]
end
def chain_next_response?
false
end
def standalone?
true
end
def low_cost?
true
end
def custom_raw
@last_summary || I18n.t("discourse_ai.ai_bot.topic_not_found")
end
def invoke(bot_user, llm, &progress_blk)
topic = nil
if topic_id > 0
topic = Topic.find_by(id: topic_id)
topic = nil if !topic || !Guardian.new.can_see?(topic)
end
@last_summary = nil
if topic
@last_topic_title = topic.title
posts =
Post
.where(topic_id: topic.id)
.where("post_type in (?)", [Post.types[:regular], Post.types[:small_action]])
.where("not hidden")
.order(:post_number)
columns = ["posts.id", :post_number, :raw, :username]
current_post_numbers = posts.limit(5).pluck(:post_number)
current_post_numbers += posts.reorder("posts.score desc").limit(50).pluck(:post_number)
current_post_numbers += posts.reorder("post_number desc").limit(5).pluck(:post_number)
data =
Post
.where(topic_id: topic.id)
.joins(:user)
.where("post_number in (?)", current_post_numbers)
.order(:post_number)
.pluck(*columns)
@last_summary = summarize(data, topic, guidance, bot_user, llm, &progress_blk)
end
if !@last_summary
"Say: No topic found!"
else
"Topic summarized"
end
end
protected
def description_args
{ url: "#{Discourse.base_path}/t/-/#{@last_topic_id}", title: @last_topic_title || "" }
end
private
def summarize(data, topic, guidance, bot_user, llm, &progress_blk)
text = +""
data.each do |id, post_number, raw, username|
text << "(#{post_number} #{username} said: #{raw}"
end
summaries = []
current_section = +""
split = []
text
.split(/\s+/)
.each_slice(20) do |slice|
current_section << " "
current_section << slice.join(" ")
# somehow any more will get closer to limits
if llm.tokenizer.tokenize(current_section).length > 2500
split << current_section
current_section = +""
end
end
split << current_section if current_section.present?
split = split[0..3] + split[-3..-1] if split.length > 5
progress = +I18n.t("discourse_ai.ai_bot.summarizing")
progress_blk.call(progress)
split.each do |section|
progress << "."
progress_blk.call(progress)
prompt = section_prompt(topic, section, guidance)
summary = llm.generate(prompt, temperature: 0.6, max_tokens: 400, user: bot_user)
summaries << summary
end
if summaries.length > 1
progress << "."
progress_blk.call(progress)
concatenation_prompt = {
insts: "You are a helpful bot",
input:
"concatenated the disjoint summaries, creating a cohesive narrative:\n#{summaries.join("\n")}}",
}
llm.generate(concatenation_prompt, temperature: 0.6, max_tokens: 500, user: bot_user)
else
summaries.first
end
end
def section_prompt(topic, text, guidance)
system_prompt = <<~TEXT
You are a summarization bot.
You effectively summarise any text.
You condense it into a shorter version.
You understand and generate Discourse forum markdown.
Try generating links as well the format is #{topic.url}/POST_NUMBER. eg: [ref](#{topic.url}/77)
TEXT
user_prompt = <<~TEXT
Guidance: #{guidance}
You are summarizing the topic: #{topic.title}
Summarize the following in 400 words:
#{text}
TEXT
messages = [{ type: :user, content: user_prompt }]
DiscourseAi::Completions::Prompt.new(system_prompt, messages: messages)
end
end
end
end
end