2024-01-04 10:44:07 -03:00
|
|
|
#frozen_string_literal: true
|
|
|
|
|
|
|
|
module DiscourseAi
|
|
|
|
module AiBot
|
|
|
|
module Tools
|
|
|
|
class Summarize < Tool
|
|
|
|
def self.signature
|
|
|
|
{
|
|
|
|
name: name,
|
|
|
|
description: "Will summarize a topic attempting to answer question in guidance",
|
|
|
|
parameters: [
|
|
|
|
{
|
|
|
|
name: "topic_id",
|
|
|
|
description: "The discourse topic id to summarize",
|
|
|
|
type: "integer",
|
|
|
|
required: true,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: "guidance",
|
|
|
|
description: "Special guidance on how to summarize the topic",
|
|
|
|
type: "string",
|
|
|
|
},
|
|
|
|
],
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.name
|
|
|
|
"summary"
|
|
|
|
end
|
|
|
|
|
|
|
|
def topic_id
|
|
|
|
parameters[:topic_id].to_i
|
|
|
|
end
|
|
|
|
|
|
|
|
def guidance
|
|
|
|
parameters[:guidance]
|
|
|
|
end
|
|
|
|
|
|
|
|
def chain_next_response?
|
|
|
|
false
|
|
|
|
end
|
|
|
|
|
|
|
|
def standalone?
|
|
|
|
true
|
|
|
|
end
|
|
|
|
|
|
|
|
def low_cost?
|
|
|
|
true
|
|
|
|
end
|
|
|
|
|
|
|
|
def custom_raw
|
|
|
|
@last_summary || I18n.t("discourse_ai.ai_bot.topic_not_found")
|
|
|
|
end
|
|
|
|
|
|
|
|
def invoke(bot_user, llm, &progress_blk)
|
|
|
|
topic = nil
|
|
|
|
if topic_id > 0
|
|
|
|
topic = Topic.find_by(id: topic_id)
|
|
|
|
topic = nil if !topic || !Guardian.new.can_see?(topic)
|
|
|
|
end
|
|
|
|
|
|
|
|
@last_summary = nil
|
|
|
|
|
|
|
|
if topic
|
|
|
|
@last_topic_title = topic.title
|
|
|
|
|
|
|
|
posts =
|
|
|
|
Post
|
|
|
|
.where(topic_id: topic.id)
|
|
|
|
.where("post_type in (?)", [Post.types[:regular], Post.types[:small_action]])
|
|
|
|
.where("not hidden")
|
|
|
|
.order(:post_number)
|
|
|
|
|
|
|
|
columns = ["posts.id", :post_number, :raw, :username]
|
|
|
|
|
|
|
|
current_post_numbers = posts.limit(5).pluck(:post_number)
|
|
|
|
current_post_numbers += posts.reorder("posts.score desc").limit(50).pluck(:post_number)
|
|
|
|
current_post_numbers += posts.reorder("post_number desc").limit(5).pluck(:post_number)
|
|
|
|
|
|
|
|
data =
|
|
|
|
Post
|
|
|
|
.where(topic_id: topic.id)
|
|
|
|
.joins(:user)
|
|
|
|
.where("post_number in (?)", current_post_numbers)
|
|
|
|
.order(:post_number)
|
|
|
|
.pluck(*columns)
|
|
|
|
|
|
|
|
@last_summary = summarize(data, topic, guidance, bot_user, llm, &progress_blk)
|
|
|
|
end
|
|
|
|
|
|
|
|
if !@last_summary
|
|
|
|
"Say: No topic found!"
|
|
|
|
else
|
|
|
|
"Topic summarized"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
protected
|
|
|
|
|
|
|
|
def description_args
|
|
|
|
{ url: "#{Discourse.base_path}/t/-/#{@last_topic_id}", title: @last_topic_title || "" }
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def summarize(data, topic, guidance, bot_user, llm, &progress_blk)
|
|
|
|
text = +""
|
|
|
|
data.each do |id, post_number, raw, username|
|
|
|
|
text << "(#{post_number} #{username} said: #{raw}"
|
|
|
|
end
|
|
|
|
|
|
|
|
summaries = []
|
|
|
|
current_section = +""
|
|
|
|
split = []
|
|
|
|
|
|
|
|
text
|
|
|
|
.split(/\s+/)
|
|
|
|
.each_slice(20) do |slice|
|
|
|
|
current_section << " "
|
|
|
|
current_section << slice.join(" ")
|
|
|
|
|
|
|
|
# somehow any more will get closer to limits
|
|
|
|
if llm.tokenizer.tokenize(current_section).length > 2500
|
|
|
|
split << current_section
|
|
|
|
current_section = +""
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
split << current_section if current_section.present?
|
|
|
|
|
|
|
|
split = split[0..3] + split[-3..-1] if split.length > 5
|
|
|
|
|
|
|
|
progress = +I18n.t("discourse_ai.ai_bot.summarizing")
|
|
|
|
progress_blk.call(progress)
|
|
|
|
|
|
|
|
split.each do |section|
|
|
|
|
progress << "."
|
|
|
|
progress_blk.call(progress)
|
|
|
|
|
|
|
|
prompt = section_prompt(topic, section, guidance)
|
|
|
|
|
|
|
|
summary = llm.generate(prompt, temperature: 0.6, max_tokens: 400, user: bot_user)
|
|
|
|
|
|
|
|
summaries << summary
|
|
|
|
end
|
|
|
|
|
|
|
|
if summaries.length > 1
|
|
|
|
progress << "."
|
|
|
|
progress_blk.call(progress)
|
|
|
|
|
2024-01-19 12:51:26 +01:00
|
|
|
concatenation_prompt = {
|
2024-01-04 10:44:07 -03:00
|
|
|
insts: "You are a helpful bot",
|
|
|
|
input:
|
|
|
|
"concatenated the disjoint summaries, creating a cohesive narrative:\n#{summaries.join("\n")}}",
|
|
|
|
}
|
|
|
|
|
2024-01-19 12:51:26 +01:00
|
|
|
llm.generate(concatenation_prompt, temperature: 0.6, max_tokens: 500, user: bot_user)
|
2024-01-04 10:44:07 -03:00
|
|
|
else
|
|
|
|
summaries.first
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def section_prompt(topic, text, guidance)
|
2024-01-16 15:21:58 +11:00
|
|
|
system_prompt = <<~TEXT
|
2024-01-04 10:44:07 -03:00
|
|
|
You are a summarization bot.
|
|
|
|
You effectively summarise any text.
|
|
|
|
You condense it into a shorter version.
|
|
|
|
You understand and generate Discourse forum markdown.
|
|
|
|
Try generating links as well the format is #{topic.url}/POST_NUMBER. eg: [ref](#{topic.url}/77)
|
|
|
|
TEXT
|
|
|
|
|
2024-01-16 15:21:58 +11:00
|
|
|
user_prompt = <<~TEXT
|
2024-01-04 10:44:07 -03:00
|
|
|
Guidance: #{guidance}
|
|
|
|
You are summarizing the topic: #{topic.title}
|
|
|
|
Summarize the following in 400 words:
|
2024-01-16 15:21:58 +11:00
|
|
|
|
2024-01-04 10:44:07 -03:00
|
|
|
#{text}
|
2024-01-16 15:21:58 +11:00
|
|
|
TEXT
|
|
|
|
|
|
|
|
messages = [{ type: :user, content: user_prompt }]
|
|
|
|
DiscourseAi::Completions::Prompt.new(system_prompt, messages: messages)
|
2024-01-04 10:44:07 -03:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|