discourse-ai/app/models/ai_persona.rb
Roman Rizzi 1f1c94e5c6
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.

For now, we'll only allow plain-text files, but this will change in the future.

Commits:

* FEATURE: RAG embeddings for the AI Bot

This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.

* Basic asymmetric similarity search to provide guidance in system prompt

* Fix tests and lint

* Apply reranker to fragments

* Uploads filter, css adjustments and file validations

* Add placeholder for rag fragments

* Update annotations
2024-04-01 13:43:34 -03:00

363 lines
9.4 KiB
Ruby

# frozen_string_literal: true
class AiPersona < ActiveRecord::Base
# places a hard limit, so per site we cache a maximum of 500 classes
MAX_PERSONAS_PER_SITE = 500
validates :name, presence: true, uniqueness: true, length: { maximum: 100 }
validates :description, presence: true, length: { maximum: 2000 }
validates :system_prompt, presence: true, length: { maximum: 10_000_000 }
validate :system_persona_unchangeable, on: :update, if: :system
validates :max_context_posts, numericality: { greater_than: 0 }, allow_nil: true
# leaves some room for growth but sets a maximum to avoid memory issues
# we may want to revisit this in the future
validates :vision_max_pixels, numericality: { greater_than: 0, maximum: 4_000_000 }
belongs_to :created_by, class_name: "User"
belongs_to :user
has_many :upload_references, as: :target, dependent: :destroy
has_many :uploads, through: :upload_references
has_many :rag_document_fragment, dependent: :destroy
has_many :rag_document_fragments, through: :ai_persona_rag_document_fragments
before_destroy :ensure_not_system
class MultisiteHash
def initialize(id)
@hash = Hash.new { |h, k| h[k] = {} }
@id = id
MessageBus.subscribe(channel_name) { |message| @hash[message.data] = {} }
end
def channel_name
"/multisite-hash-#{@id}"
end
def current_db
RailsMultisite::ConnectionManagement.current_db
end
def [](key)
@hash.dig(current_db, key)
end
def []=(key, val)
@hash[current_db][key] = val
end
def flush!
@hash[current_db] = {}
MessageBus.publish(channel_name, current_db)
end
end
def self.persona_cache
@persona_cache ||= MultisiteHash.new("persona_cache")
end
scope :ordered, -> { order("priority DESC, lower(name) ASC") }
def self.all_personas
persona_cache[:value] ||= AiPersona
.ordered
.where(enabled: true)
.all
.limit(MAX_PERSONAS_PER_SITE)
.map(&:class_instance)
end
def self.mentionables(user: nil)
all_mentionables =
persona_cache[:mentionable_usernames] ||= AiPersona
.where(mentionable: true)
.where(enabled: true)
.joins(:user)
.pluck("ai_personas.id, users.id, users.username_lower, allowed_group_ids, default_llm")
.map do |id, user_id, username, allowed_group_ids, default_llm|
{
id: id,
user_id: user_id,
username: username,
allowed_group_ids: allowed_group_ids,
default_llm: default_llm,
}
end
if user
all_mentionables.select { |mentionable| user.in_any_groups?(mentionable[:allowed_group_ids]) }
else
all_mentionables
end
end
after_commit :bump_cache
def bump_cache
self.class.persona_cache.flush!
end
def class_instance
allowed_group_ids = self.allowed_group_ids
id = self.id
system = self.system
user_id = self.user_id
mentionable = self.mentionable
default_llm = self.default_llm
max_context_posts = self.max_context_posts
vision_enabled = self.vision_enabled
vision_max_pixels = self.vision_max_pixels
persona_class = DiscourseAi::AiBot::Personas::Persona.system_personas_by_id[self.id]
if persona_class
persona_class.define_singleton_method :allowed_group_ids do
allowed_group_ids
end
persona_class.define_singleton_method :id do
id
end
persona_class.define_singleton_method :system do
system
end
persona_class.define_singleton_method :user_id do
user_id
end
persona_class.define_singleton_method :mentionable do
mentionable
end
persona_class.define_singleton_method :default_llm do
default_llm
end
persona_class.define_singleton_method :max_context_posts do
max_context_posts
end
persona_class.define_singleton_method :vision_enabled do
vision_enabled
end
persona_class.define_singleton_method :vision_max_pixels do
vision_max_pixels
end
return persona_class
end
name = self.name
description = self.description
ai_persona_id = self.id
options = {}
tools = self.respond_to?(:commands) ? self.commands : self.tools
tools =
tools.filter_map do |element|
inner_name = element
current_options = nil
if element.is_a?(Array)
inner_name = element[0]
current_options = element[1]
end
# Won't migrate data yet. Let's rewrite to the tool name.
inner_name = inner_name.gsub("Command", "")
inner_name = "List#{inner_name}" if %w[Categories Tags].include?(inner_name)
begin
klass = ("DiscourseAi::AiBot::Tools::#{inner_name}").constantize
options[klass] = current_options if current_options
klass
rescue StandardError
nil
end
end
Class.new(DiscourseAi::AiBot::Personas::Persona) do
define_singleton_method :id do
id
end
define_singleton_method :name do
name
end
define_singleton_method :user_id do
user_id
end
define_singleton_method :description do
description
end
define_singleton_method :system do
system
end
define_singleton_method :allowed_group_ids do
allowed_group_ids
end
define_singleton_method :user_id do
user_id
end
define_singleton_method :mentionable do
mentionable
end
define_singleton_method :default_llm do
default_llm
end
define_singleton_method :max_context_posts do
max_context_posts
end
define_singleton_method :vision_enabled do
vision_enabled
end
define_singleton_method :vision_max_pixels do
vision_max_pixels
end
define_singleton_method :to_s do
"#<DiscourseAi::AiBot::Personas::Persona::Custom @name=#{self.name} @allowed_group_ids=#{self.allowed_group_ids.join(",")}>"
end
define_singleton_method :inspect do
"#<DiscourseAi::AiBot::Personas::Persona::Custom @name=#{self.name} @allowed_group_ids=#{self.allowed_group_ids.join(",")}>"
end
define_method :initialize do |*args, **kwargs|
@ai_persona = AiPersona.find_by(id: ai_persona_id)
super(*args, **kwargs)
end
define_method :persona_id do
@ai_persona&.id
end
define_method :tools do
tools
end
define_method :options do
options
end
define_method :temperature do
@ai_persona&.temperature
end
define_method :top_p do
@ai_persona&.top_p
end
define_method :system_prompt do
@ai_persona&.system_prompt || "You are a helpful bot."
end
define_method :uploads do
@ai_persona&.uploads
end
end
end
FIRST_PERSONA_USER_ID = -1200
def create_user!
raise "User already exists" if user_id && User.exists?(user_id)
# find the first id smaller than FIRST_USER_ID that is not taken
id = nil
id = DB.query_single(<<~SQL, FIRST_PERSONA_USER_ID, FIRST_PERSONA_USER_ID - 200).first
WITH seq AS (
SELECT generate_series(?, ?, -1) AS id
)
SELECT seq.id FROM seq
LEFT JOIN users ON users.id = seq.id
WHERE users.id IS NULL
ORDER BY seq.id DESC
SQL
id = DB.query_single(<<~SQL).first if id.nil?
SELECT min(id) - 1 FROM users
SQL
# note .invalid is a reserved TLD which will route nowhere
user =
User.new(
email: "#{SecureRandom.hex}@does-not-exist.invalid",
name: name.titleize,
username: UserNameSuggester.suggest(name + "_bot"),
active: true,
approved: true,
trust_level: TrustLevel[4],
id: id,
)
user.save!(validate: false)
update!(user_id: user.id)
user
end
private
def system_persona_unchangeable
if top_p_changed? || temperature_changed? || system_prompt_changed? || commands_changed? ||
name_changed? || description_changed?
errors.add(:base, I18n.t("discourse_ai.ai_bot.personas.cannot_edit_system_persona"))
end
end
def ensure_not_system
if system
errors.add(:base, I18n.t("discourse_ai.ai_bot.personas.cannot_delete_system_persona"))
throw :abort
end
end
end
# == Schema Information
#
# Table name: ai_personas
#
# id :bigint not null, primary key
# name :string(100) not null
# description :string(2000) not null
# commands :json not null
# system_prompt :string(10000000) not null
# allowed_group_ids :integer default([]), not null, is an Array
# created_by_id :integer
# enabled :boolean default(TRUE), not null
# created_at :datetime not null
# updated_at :datetime not null
# system :boolean default(FALSE), not null
# priority :boolean default(FALSE), not null
# temperature :float
# top_p :float
# user_id :integer
# mentionable :boolean default(FALSE), not null
# default_llm :text
# max_context_posts :integer
# vision_enabled :boolean default(FALSE), not null
# vision_max_pixels :integer default(1048576), not null
#
# Indexes
#
# index_ai_personas_on_name (name) UNIQUE
#