mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-03-09 11:48:47 +00:00
* DEV: AI bot migration to the Llm pattern. We added tool and conversation context support to the Llm service in discourse-ai#366, meaning we met all the conditions to migrate this module. This PR migrates to the new pattern, meaning adding a new bot now requires minimal effort as long as the service supports it. On top of this, we introduce the concept of a "Playground" to separate the PM-specific bits from the completion, allowing us to use the bot in other contexts like chat in the future. Commands are called tools, and we simplified all the placeholder logic to perform updates in a single place, making the flow more one-wayish. * Followup fixes based on testing * Cleanup unused inference code * FIX: text-based tools could be in the middle of a sentence * GPT-4-turbo support * Use new LLM API
184 lines
5.1 KiB
Ruby
184 lines
5.1 KiB
Ruby
#frozen_string_literal: true
|
|
|
|
module DiscourseAi
|
|
module AiBot
|
|
module Tools
|
|
class Summarize < Tool
|
|
def self.signature
|
|
{
|
|
name: name,
|
|
description: "Will summarize a topic attempting to answer question in guidance",
|
|
parameters: [
|
|
{
|
|
name: "topic_id",
|
|
description: "The discourse topic id to summarize",
|
|
type: "integer",
|
|
required: true,
|
|
},
|
|
{
|
|
name: "guidance",
|
|
description: "Special guidance on how to summarize the topic",
|
|
type: "string",
|
|
},
|
|
],
|
|
}
|
|
end
|
|
|
|
def self.name
|
|
"summary"
|
|
end
|
|
|
|
def topic_id
|
|
parameters[:topic_id].to_i
|
|
end
|
|
|
|
def guidance
|
|
parameters[:guidance]
|
|
end
|
|
|
|
def chain_next_response?
|
|
false
|
|
end
|
|
|
|
def standalone?
|
|
true
|
|
end
|
|
|
|
def low_cost?
|
|
true
|
|
end
|
|
|
|
def custom_raw
|
|
@last_summary || I18n.t("discourse_ai.ai_bot.topic_not_found")
|
|
end
|
|
|
|
def invoke(bot_user, llm, &progress_blk)
|
|
topic = nil
|
|
if topic_id > 0
|
|
topic = Topic.find_by(id: topic_id)
|
|
topic = nil if !topic || !Guardian.new.can_see?(topic)
|
|
end
|
|
|
|
@last_summary = nil
|
|
|
|
if topic
|
|
@last_topic_title = topic.title
|
|
|
|
posts =
|
|
Post
|
|
.where(topic_id: topic.id)
|
|
.where("post_type in (?)", [Post.types[:regular], Post.types[:small_action]])
|
|
.where("not hidden")
|
|
.order(:post_number)
|
|
|
|
columns = ["posts.id", :post_number, :raw, :username]
|
|
|
|
current_post_numbers = posts.limit(5).pluck(:post_number)
|
|
current_post_numbers += posts.reorder("posts.score desc").limit(50).pluck(:post_number)
|
|
current_post_numbers += posts.reorder("post_number desc").limit(5).pluck(:post_number)
|
|
|
|
data =
|
|
Post
|
|
.where(topic_id: topic.id)
|
|
.joins(:user)
|
|
.where("post_number in (?)", current_post_numbers)
|
|
.order(:post_number)
|
|
.pluck(*columns)
|
|
|
|
@last_summary = summarize(data, topic, guidance, bot_user, llm, &progress_blk)
|
|
end
|
|
|
|
if !@last_summary
|
|
"Say: No topic found!"
|
|
else
|
|
"Topic summarized"
|
|
end
|
|
end
|
|
|
|
protected
|
|
|
|
def description_args
|
|
{ url: "#{Discourse.base_path}/t/-/#{@last_topic_id}", title: @last_topic_title || "" }
|
|
end
|
|
|
|
private
|
|
|
|
def summarize(data, topic, guidance, bot_user, llm, &progress_blk)
|
|
text = +""
|
|
data.each do |id, post_number, raw, username|
|
|
text << "(#{post_number} #{username} said: #{raw}"
|
|
end
|
|
|
|
summaries = []
|
|
current_section = +""
|
|
split = []
|
|
|
|
text
|
|
.split(/\s+/)
|
|
.each_slice(20) do |slice|
|
|
current_section << " "
|
|
current_section << slice.join(" ")
|
|
|
|
# somehow any more will get closer to limits
|
|
if llm.tokenizer.tokenize(current_section).length > 2500
|
|
split << current_section
|
|
current_section = +""
|
|
end
|
|
end
|
|
|
|
split << current_section if current_section.present?
|
|
|
|
split = split[0..3] + split[-3..-1] if split.length > 5
|
|
|
|
progress = +I18n.t("discourse_ai.ai_bot.summarizing")
|
|
progress_blk.call(progress)
|
|
|
|
split.each do |section|
|
|
progress << "."
|
|
progress_blk.call(progress)
|
|
|
|
prompt = section_prompt(topic, section, guidance)
|
|
|
|
summary = llm.generate(prompt, temperature: 0.6, max_tokens: 400, user: bot_user)
|
|
|
|
summaries << summary
|
|
end
|
|
|
|
if summaries.length > 1
|
|
progress << "."
|
|
progress_blk.call(progress)
|
|
|
|
contatenation_prompt = {
|
|
insts: "You are a helpful bot",
|
|
input:
|
|
"concatenated the disjoint summaries, creating a cohesive narrative:\n#{summaries.join("\n")}}",
|
|
}
|
|
|
|
llm.generate(contatenation_prompt, temperature: 0.6, max_tokens: 500, user: bot_user)
|
|
else
|
|
summaries.first
|
|
end
|
|
end
|
|
|
|
def section_prompt(topic, text, guidance)
|
|
insts = <<~TEXT
|
|
You are a summarization bot.
|
|
You effectively summarise any text.
|
|
You condense it into a shorter version.
|
|
You understand and generate Discourse forum markdown.
|
|
Try generating links as well the format is #{topic.url}/POST_NUMBER. eg: [ref](#{topic.url}/77)
|
|
TEXT
|
|
|
|
{ insts: insts, input: <<~TEXT }
|
|
Guidance: #{guidance}
|
|
You are summarizing the topic: #{topic.title}
|
|
Summarize the following in 400 words:
|
|
|
|
#{text}
|
|
TEXT
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|