discourse-ai/lib/ai_bot/tools/summarize.rb
Roman Rizzi f9d7d7f5f0
DEV: AI bot migration to the Llm pattern. (#343)
* DEV: AI bot migration to the Llm pattern.

We added tool and conversation context support to the Llm service in discourse-ai#366, meaning we met all the conditions to migrate this module.

This PR migrates to the new pattern, meaning adding a new bot now requires minimal effort as long as the service supports it. On top of this, we introduce the concept of a "Playground" to separate the PM-specific bits from the completion, allowing us to use the bot in other contexts like chat in the future. Commands are called tools, and we simplified all the placeholder logic to perform updates in a single place, making the flow more one-wayish.

* Followup fixes based on testing

* Cleanup unused inference code

* FIX: text-based tools could be in the middle of a sentence

* GPT-4-turbo support

* Use new LLM API
2024-01-04 10:44:07 -03:00

184 lines
5.1 KiB
Ruby

#frozen_string_literal: true
module DiscourseAi
module AiBot
module Tools
class Summarize < Tool
def self.signature
{
name: name,
description: "Will summarize a topic attempting to answer question in guidance",
parameters: [
{
name: "topic_id",
description: "The discourse topic id to summarize",
type: "integer",
required: true,
},
{
name: "guidance",
description: "Special guidance on how to summarize the topic",
type: "string",
},
],
}
end
def self.name
"summary"
end
def topic_id
parameters[:topic_id].to_i
end
def guidance
parameters[:guidance]
end
def chain_next_response?
false
end
def standalone?
true
end
def low_cost?
true
end
def custom_raw
@last_summary || I18n.t("discourse_ai.ai_bot.topic_not_found")
end
def invoke(bot_user, llm, &progress_blk)
topic = nil
if topic_id > 0
topic = Topic.find_by(id: topic_id)
topic = nil if !topic || !Guardian.new.can_see?(topic)
end
@last_summary = nil
if topic
@last_topic_title = topic.title
posts =
Post
.where(topic_id: topic.id)
.where("post_type in (?)", [Post.types[:regular], Post.types[:small_action]])
.where("not hidden")
.order(:post_number)
columns = ["posts.id", :post_number, :raw, :username]
current_post_numbers = posts.limit(5).pluck(:post_number)
current_post_numbers += posts.reorder("posts.score desc").limit(50).pluck(:post_number)
current_post_numbers += posts.reorder("post_number desc").limit(5).pluck(:post_number)
data =
Post
.where(topic_id: topic.id)
.joins(:user)
.where("post_number in (?)", current_post_numbers)
.order(:post_number)
.pluck(*columns)
@last_summary = summarize(data, topic, guidance, bot_user, llm, &progress_blk)
end
if !@last_summary
"Say: No topic found!"
else
"Topic summarized"
end
end
protected
def description_args
{ url: "#{Discourse.base_path}/t/-/#{@last_topic_id}", title: @last_topic_title || "" }
end
private
def summarize(data, topic, guidance, bot_user, llm, &progress_blk)
text = +""
data.each do |id, post_number, raw, username|
text << "(#{post_number} #{username} said: #{raw}"
end
summaries = []
current_section = +""
split = []
text
.split(/\s+/)
.each_slice(20) do |slice|
current_section << " "
current_section << slice.join(" ")
# somehow any more will get closer to limits
if llm.tokenizer.tokenize(current_section).length > 2500
split << current_section
current_section = +""
end
end
split << current_section if current_section.present?
split = split[0..3] + split[-3..-1] if split.length > 5
progress = +I18n.t("discourse_ai.ai_bot.summarizing")
progress_blk.call(progress)
split.each do |section|
progress << "."
progress_blk.call(progress)
prompt = section_prompt(topic, section, guidance)
summary = llm.generate(prompt, temperature: 0.6, max_tokens: 400, user: bot_user)
summaries << summary
end
if summaries.length > 1
progress << "."
progress_blk.call(progress)
contatenation_prompt = {
insts: "You are a helpful bot",
input:
"concatenated the disjoint summaries, creating a cohesive narrative:\n#{summaries.join("\n")}}",
}
llm.generate(contatenation_prompt, temperature: 0.6, max_tokens: 500, user: bot_user)
else
summaries.first
end
end
def section_prompt(topic, text, guidance)
insts = <<~TEXT
You are a summarization bot.
You effectively summarise any text.
You condense it into a shorter version.
You understand and generate Discourse forum markdown.
Try generating links as well the format is #{topic.url}/POST_NUMBER. eg: [ref](#{topic.url}/77)
TEXT
{ insts: insts, input: <<~TEXT }
Guidance: #{guidance}
You are summarizing the topic: #{topic.title}
Summarize the following in 400 words:
#{text}
TEXT
end
end
end
end
end