mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-06-30 11:32:18 +00:00
This commit introduces a new Forum Researcher persona specialized in deep forum content analysis along with comprehensive improvements to our AI infrastructure. Key additions: New Forum Researcher persona with advanced filtering and analysis capabilities Robust filtering system supporting tags, categories, dates, users, and keywords LLM formatter to efficiently process and chunk research results Infrastructure improvements: Implemented CancelManager class to centrally manage AI completion cancellations Replaced callback-based cancellation with a more robust pattern Added systematic cancellation monitoring with callbacks Other improvements: Added configurable default_enabled flag to control which personas are enabled by default Updated translation strings for the new researcher functionality Added comprehensive specs for the new components Renames Researcher -> Web Researcher This change makes our AI platform more stable while adding powerful research capabilities that can analyze forum trends and surface relevant content.
526 lines
17 KiB
Ruby
526 lines
17 KiB
Ruby
# frozen_string_literal: true
|
|
#
|
|
module DiscourseAi
|
|
module Completions
|
|
class PromptMessagesBuilder
|
|
MAX_CHAT_UPLOADS = 5
|
|
MAX_TOPIC_UPLOADS = 5
|
|
attr_reader :chat_context_posts
|
|
attr_accessor :topic
|
|
|
|
def self.messages_from_chat(
|
|
message,
|
|
channel:,
|
|
context_post_ids:,
|
|
max_messages:,
|
|
include_uploads:,
|
|
bot_user_ids:,
|
|
instruction_message: nil
|
|
)
|
|
include_thread_titles = !channel.direct_message_channel? && !message.thread_id
|
|
|
|
current_id = message.id
|
|
messages = nil
|
|
|
|
if !message.thread_id && channel.direct_message_channel?
|
|
messages = [message]
|
|
elsif !channel.direct_message_channel? && !message.thread_id
|
|
messages =
|
|
Chat::Message
|
|
.joins("left join chat_threads on chat_threads.id = chat_messages.thread_id")
|
|
.where(chat_channel_id: channel.id)
|
|
.where(
|
|
"chat_messages.thread_id IS NULL OR chat_threads.original_message_id = chat_messages.id",
|
|
)
|
|
.order(id: :desc)
|
|
.limit(max_messages)
|
|
.to_a
|
|
.reverse
|
|
end
|
|
|
|
messages ||=
|
|
ChatSDK::Thread.last_messages(
|
|
thread_id: message.thread_id,
|
|
guardian: Discourse.system_user.guardian,
|
|
page_size: max_messages,
|
|
)
|
|
|
|
builder = new
|
|
|
|
guardian = Guardian.new(message.user)
|
|
if context_post_ids
|
|
builder.set_chat_context_posts(
|
|
context_post_ids,
|
|
guardian,
|
|
include_uploads: include_uploads,
|
|
)
|
|
end
|
|
|
|
messages.each do |m|
|
|
# restore stripped message
|
|
m.message = instruction_message if m.id == current_id && instruction_message
|
|
|
|
if bot_user_ids.include?(m.user_id)
|
|
builder.push(type: :model, content: m.message)
|
|
else
|
|
upload_ids = nil
|
|
upload_ids = m.uploads.map(&:id) if include_uploads && m.uploads.present?
|
|
mapped_message = m.message
|
|
|
|
thread_title = nil
|
|
thread_title = m.thread&.title if include_thread_titles && m.thread_id
|
|
mapped_message = "(#{thread_title})\n#{m.message}" if thread_title
|
|
|
|
if m.uploads.present?
|
|
mapped_message =
|
|
"#{mapped_message} -- uploaded(#{m.uploads.map(&:short_url).join(", ")})"
|
|
end
|
|
|
|
builder.push(
|
|
type: :user,
|
|
content: mapped_message,
|
|
id: m.user.username,
|
|
upload_ids: upload_ids,
|
|
)
|
|
end
|
|
end
|
|
|
|
builder.to_a(
|
|
limit: max_messages,
|
|
style: channel.direct_message_channel? ? :chat_with_context : :chat,
|
|
)
|
|
end
|
|
|
|
def self.messages_from_post(post, style: nil, max_posts:, bot_usernames:, include_uploads:)
|
|
# Pay attention to the `post_number <= ?` here.
|
|
# We want to inject the last post as context because they are translated differently.
|
|
|
|
post_types = [Post.types[:regular]]
|
|
post_types << Post.types[:whisper] if post.post_type == Post.types[:whisper]
|
|
|
|
context =
|
|
post
|
|
.topic
|
|
.posts
|
|
.joins(:user)
|
|
.joins("LEFT JOIN post_custom_prompts ON post_custom_prompts.post_id = posts.id")
|
|
.where("post_number <= ?", post.post_number)
|
|
.order("post_number desc")
|
|
.where("post_type in (?)", post_types)
|
|
.limit(max_posts)
|
|
.pluck(
|
|
"posts.raw",
|
|
"users.username",
|
|
"post_custom_prompts.custom_prompt",
|
|
"(
|
|
SELECT array_agg(ref.upload_id)
|
|
FROM upload_references ref
|
|
WHERE ref.target_type = 'Post' AND ref.target_id = posts.id
|
|
) as upload_ids",
|
|
"posts.created_at",
|
|
)
|
|
|
|
builder = new
|
|
builder.topic = post.topic
|
|
|
|
context.reverse_each do |raw, username, custom_prompt, upload_ids, created_at|
|
|
custom_prompt_translation =
|
|
Proc.new do |message|
|
|
# We can't keep backwards-compatibility for stored functions.
|
|
# Tool syntax requires a tool_call_id which we don't have.
|
|
if message[2] != "function"
|
|
custom_context = {
|
|
content: message[0],
|
|
type: message[2].present? ? message[2].to_sym : :model,
|
|
}
|
|
|
|
custom_context[:id] = message[1] if custom_context[:type] != :model
|
|
custom_context[:name] = message[3] if message[3]
|
|
|
|
thinking = message[4]
|
|
custom_context[:thinking] = thinking if thinking
|
|
custom_context[:created_at] = created_at
|
|
|
|
builder.push(**custom_context)
|
|
end
|
|
end
|
|
|
|
if custom_prompt.present?
|
|
custom_prompt.each(&custom_prompt_translation)
|
|
else
|
|
context = { content: raw, type: (bot_usernames.include?(username) ? :model : :user) }
|
|
|
|
context[:id] = username if context[:type] == :user
|
|
|
|
if upload_ids.present? && context[:type] == :user && include_uploads
|
|
context[:upload_ids] = upload_ids.compact
|
|
end
|
|
context[:created_at] = created_at
|
|
|
|
builder.push(**context)
|
|
end
|
|
end
|
|
|
|
builder.to_a(style: style || (post.topic.private_message? ? :bot : :topic))
|
|
end
|
|
|
|
def initialize
|
|
@raw_messages = []
|
|
@timestamps = {}
|
|
end
|
|
|
|
def set_chat_context_posts(post_ids, guardian, include_uploads:)
|
|
posts = []
|
|
Post
|
|
.where(id: post_ids)
|
|
.order("id asc")
|
|
.each do |post|
|
|
next if !guardian.can_see?(post)
|
|
posts << post
|
|
end
|
|
if posts.present?
|
|
posts_context = []
|
|
posts_context << "\nThis chat is in the context of the Discourse topic '#{posts[0].topic.title}':\n\n"
|
|
posts_context << "{{{\n"
|
|
posts.each do |post|
|
|
posts_context << "url: #{post.url}\n"
|
|
posts_context << "#{post.username}: #{post.raw}\n\n"
|
|
if include_uploads
|
|
post.uploads.each { |upload| posts_context << { upload_id: upload.id } }
|
|
end
|
|
end
|
|
posts_context << "}}}"
|
|
@chat_context_posts = posts_context
|
|
end
|
|
end
|
|
|
|
def to_a(limit: nil, style: nil)
|
|
# topic and chat array are special, they are single messages that contain all history
|
|
return chat_array(limit: limit) if style == :chat
|
|
return topic_array if style == :topic
|
|
|
|
# the rest of the styles can include multiple messages
|
|
result = valid_messages_array(@raw_messages)
|
|
prepend_chat_post_context(result) if style == :chat_with_context
|
|
|
|
if limit
|
|
result[0..limit]
|
|
else
|
|
result
|
|
end
|
|
end
|
|
|
|
def push(type:, content:, name: nil, upload_ids: nil, id: nil, thinking: nil, created_at: nil)
|
|
if !%i[user model tool tool_call system].include?(type)
|
|
raise ArgumentError, "type must be either :user, :model, :tool, :tool_call or :system"
|
|
end
|
|
raise ArgumentError, "upload_ids must be an array" if upload_ids && !upload_ids.is_a?(Array)
|
|
|
|
content = [content, *upload_ids.map { |upload_id| { upload_id: upload_id } }] if upload_ids
|
|
message = { type: type, content: content }
|
|
message[:name] = name.to_s if name
|
|
message[:id] = id.to_s if id
|
|
if thinking
|
|
message[:thinking] = thinking["thinking"] if thinking["thinking"]
|
|
message[:thinking_signature] = thinking["thinking_signature"] if thinking[
|
|
"thinking_signature"
|
|
]
|
|
message[:redacted_thinking_signature] = thinking[
|
|
"redacted_thinking_signature"
|
|
] if thinking["redacted_thinking_signature"]
|
|
end
|
|
|
|
@raw_messages << message
|
|
@timestamps[message] = created_at if created_at
|
|
|
|
message
|
|
end
|
|
|
|
private
|
|
|
|
def valid_messages_array(messages)
|
|
result = []
|
|
|
|
# this will create a "valid" messages array
|
|
# 1. ensures we always start with a user message
|
|
# 2. ensures we always end with a user message
|
|
# 3. ensures we always interleave user and model messages
|
|
last_type = nil
|
|
messages.each do |message|
|
|
if message[:type] == :model && !message[:content]
|
|
message[:content] = "Reply cancelled by user."
|
|
end
|
|
|
|
next if !last_type && message[:type] != :user
|
|
|
|
if last_type == :tool_call && message[:type] != :tool
|
|
result.pop
|
|
last_type = result.length > 0 ? result[-1][:type] : nil
|
|
end
|
|
|
|
next if message[:type] == :tool && last_type != :tool_call
|
|
|
|
if message[:type] == last_type
|
|
# merge the message for :user message
|
|
# replace the message for other messages
|
|
last_message = result[-1]
|
|
|
|
if message[:type] == :user
|
|
old_name = last_message.delete(:id)
|
|
last_message[:content] = ["#{old_name}: ", last_message[:content]].flatten if old_name
|
|
|
|
new_content = message[:content]
|
|
new_content = ["#{message[:id]}: ", new_content].flatten if message[:id]
|
|
|
|
if !last_message[:content].is_a?(Array)
|
|
last_message[:content] = [last_message[:content]]
|
|
end
|
|
last_message[:content].concat(["\n", new_content].flatten)
|
|
|
|
compressed =
|
|
compress_messages_buffer(last_message[:content], max_uploads: MAX_TOPIC_UPLOADS)
|
|
last_message[:content] = compressed
|
|
else
|
|
last_message[:content] = message[:content]
|
|
end
|
|
else
|
|
result << message
|
|
end
|
|
|
|
last_type = message[:type]
|
|
end
|
|
|
|
result
|
|
end
|
|
|
|
def prepend_chat_post_context(messages)
|
|
return if @chat_context_posts.blank?
|
|
|
|
old_content = messages[0][:content]
|
|
old_content = [old_content] if !old_content.is_a?(Array)
|
|
|
|
new_content = []
|
|
new_content << "You are replying inside a Discourse chat.\n"
|
|
new_content.concat(@chat_context_posts)
|
|
new_content << "\n"
|
|
new_content << "Your instructions are:\n"
|
|
new_content.concat(old_content)
|
|
|
|
compressed = compress_messages_buffer(new_content.flatten, max_uploads: MAX_CHAT_UPLOADS)
|
|
|
|
messages[0][:content] = compressed
|
|
end
|
|
|
|
def format_user_info(user)
|
|
info = []
|
|
info << user_role(user)
|
|
info << "Trust level #{user.trust_level}" if user.trust_level > 0
|
|
info << "#{account_age(user)}"
|
|
info << "#{user.user_stat.post_count} posts" if user.user_stat.post_count.to_i > 0
|
|
"#{user.username} (#{user.name}): #{info.compact.join(", ")}"
|
|
end
|
|
|
|
def format_timestamp(timestamp)
|
|
return nil unless timestamp
|
|
|
|
time_diff = Time.now - timestamp
|
|
|
|
if time_diff < 1.minute
|
|
"just now"
|
|
elsif time_diff < 1.hour
|
|
mins = (time_diff / 1.minute).round
|
|
"#{mins} #{mins == 1 ? "minute" : "minutes"} ago"
|
|
elsif time_diff < 1.day
|
|
hours = (time_diff / 1.hour).round
|
|
"#{hours} #{hours == 1 ? "hour" : "hours"} ago"
|
|
elsif time_diff < 7.days
|
|
days = (time_diff / 1.day).round
|
|
"#{days} #{days == 1 ? "day" : "days"} ago"
|
|
elsif time_diff < 30.days
|
|
weeks = (time_diff / 7.days).round
|
|
"#{weeks} #{weeks == 1 ? "week" : "weeks"} ago"
|
|
elsif time_diff < 365.days
|
|
months = (time_diff / 30.days).round
|
|
"#{months} #{months == 1 ? "month" : "months"} ago"
|
|
else
|
|
years = (time_diff / 365.days).round
|
|
"#{years} #{years == 1 ? "year" : "years"} ago"
|
|
end
|
|
end
|
|
|
|
def user_role(user)
|
|
return "moderator" if user.moderator?
|
|
return "admin" if user.admin?
|
|
nil
|
|
end
|
|
|
|
def account_age(user)
|
|
years = ((Time.now - user.created_at) / 1.year).round
|
|
months = ((Time.now - user.created_at) / 1.month).round % 12
|
|
|
|
output = []
|
|
if years > 0
|
|
output << years.to_s
|
|
output << "year" if years == 1
|
|
output << "years" if years > 1
|
|
end
|
|
if months > 0
|
|
output << months.to_s
|
|
output << "month" if months == 1
|
|
output << "months" if months > 1
|
|
end
|
|
|
|
if output.empty?
|
|
"new account"
|
|
else
|
|
"account age: " + output.join(" ")
|
|
end
|
|
end
|
|
|
|
def format_topic_info(topic)
|
|
content_array = []
|
|
|
|
if topic.private_message?
|
|
content_array << "Private message info.\n"
|
|
else
|
|
content_array << "Topic information:\n"
|
|
end
|
|
|
|
content_array << "- URL: #{topic.url}\n"
|
|
content_array << "- Title: #{topic.title}\n"
|
|
if SiteSetting.tagging_enabled
|
|
tags = topic.tags.pluck(:name)
|
|
tags -= DiscourseTagging.hidden_tag_names if tags.present?
|
|
content_array << "- Tags: #{tags.join(", ")}\n" if tags.present?
|
|
end
|
|
if !topic.private_message?
|
|
content_array << "- Category: #{topic.category.name}\n" if topic.category
|
|
end
|
|
content_array << "- Number of replies: #{topic.posts_count - 1}\n\n"
|
|
|
|
content_array.join
|
|
end
|
|
|
|
def format_user_infos(usernames)
|
|
content_array = []
|
|
|
|
if usernames.present?
|
|
users_details =
|
|
User
|
|
.where(username: usernames)
|
|
.includes(:user_stat)
|
|
.map { |user| format_user_info(user) }
|
|
.compact
|
|
content_array << "User information:\n"
|
|
content_array << "- #{users_details.join("\n- ")}\n\n" if users_details.present?
|
|
end
|
|
content_array.join
|
|
end
|
|
|
|
def topic_array
|
|
raw_messages = @raw_messages.dup
|
|
content_array = []
|
|
content_array << "You are operating in a Discourse forum.\n\n"
|
|
content_array << format_topic_info(@topic) if @topic
|
|
|
|
if raw_messages.present?
|
|
usernames =
|
|
raw_messages.filter { |message| message[:type] == :user }.map { |message| message[:id] }
|
|
|
|
content_array << format_user_infos(usernames) if usernames.present?
|
|
end
|
|
|
|
last_user_message = raw_messages.pop
|
|
|
|
if raw_messages.present?
|
|
content_array << "Here is the conversation so far:\n"
|
|
raw_messages.each do |message|
|
|
content_array << "#{message[:id] || "User"}: "
|
|
timestamp = @timestamps[message]
|
|
content_array << "(#{format_timestamp(timestamp)}) " if timestamp
|
|
content_array << message[:content]
|
|
content_array << "\n\n"
|
|
end
|
|
end
|
|
|
|
if last_user_message
|
|
content_array << "Latest post is by #{last_user_message[:id] || "User"} who just posted:\n"
|
|
content_array << last_user_message[:content]
|
|
end
|
|
|
|
content_array =
|
|
compress_messages_buffer(content_array.flatten, max_uploads: MAX_TOPIC_UPLOADS)
|
|
|
|
user_message = { type: :user, content: content_array }
|
|
|
|
[user_message]
|
|
end
|
|
|
|
def chat_array(limit:)
|
|
if @raw_messages.length > 1
|
|
buffer = [
|
|
+"You are replying inside a Discourse chat channel. Here is a summary of the conversation so far:\n{{{",
|
|
]
|
|
|
|
@raw_messages[0..-2].each do |message|
|
|
buffer << "\n"
|
|
|
|
if message[:type] == :user
|
|
buffer << "#{message[:id] || "User"}: "
|
|
else
|
|
buffer << "Bot: "
|
|
end
|
|
|
|
buffer << message[:content]
|
|
end
|
|
|
|
buffer << "\n}}}"
|
|
buffer << "\n\n"
|
|
buffer << "Your instructions:"
|
|
buffer << "\n"
|
|
end
|
|
|
|
last_message = @raw_messages[-1]
|
|
buffer << "#{last_message[:id] || "User"}: "
|
|
buffer << last_message[:content]
|
|
|
|
buffer = compress_messages_buffer(buffer.flatten, max_uploads: MAX_CHAT_UPLOADS)
|
|
|
|
message = { type: :user, content: buffer }
|
|
[message]
|
|
end
|
|
|
|
# caps uploads to maximum uploads allowed in message stream
|
|
# and concats string elements
|
|
def compress_messages_buffer(buffer, max_uploads:)
|
|
compressed = []
|
|
current_text = +""
|
|
upload_count = 0
|
|
|
|
buffer.each do |item|
|
|
if item.is_a?(String)
|
|
current_text << item
|
|
elsif item.is_a?(Hash)
|
|
compressed << current_text if current_text.present?
|
|
compressed << item
|
|
current_text = +""
|
|
upload_count += 1
|
|
end
|
|
end
|
|
|
|
compressed << current_text if current_text.present?
|
|
|
|
if upload_count > max_uploads
|
|
to_remove = upload_count - max_uploads
|
|
removed = 0
|
|
compressed.delete_if { |item| item.is_a?(Hash) && (removed += 1) <= to_remove }
|
|
end
|
|
|
|
compressed = compressed[0] if compressed.length == 1 && compressed[0].is_a?(String)
|
|
|
|
compressed
|
|
end
|
|
end
|
|
end
|
|
end
|