mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-07-06 06:22:19 +00:00
FEATURE: Use personas for generating hypothetical posts (#1482)
* FEATURE: Use personas for generating hypothetica posts * Update prompt
This commit is contained in:
parent
40fa527633
commit
75fb37144f
@ -222,6 +222,10 @@ en:
|
||||
name: "Search"
|
||||
description: "Enhances search experience by providing AI-generated answers to queries"
|
||||
discoveries: "Discoveries"
|
||||
embeddings:
|
||||
name: "Embeddings"
|
||||
description: "Powers features like Related Topics and AI Search by generating semantic representations of text"
|
||||
hyde: "HyDE"
|
||||
discord:
|
||||
name: "Discord integration"
|
||||
description: "Adds the ability to search Discord channels"
|
||||
|
@ -394,6 +394,9 @@ en:
|
||||
spam_detector:
|
||||
name: "Spam detector"
|
||||
description: "Default persona powering our Spam detection feature"
|
||||
content_creator:
|
||||
name: "Content creator"
|
||||
description: "Default persona powering HyDE search"
|
||||
|
||||
topic_not_found: "Summary unavailable, topic not found!"
|
||||
summarizing: "Summarizing topic"
|
||||
|
@ -222,21 +222,30 @@ discourse_ai:
|
||||
default: false
|
||||
client: true
|
||||
validator: "DiscourseAi::Configuration::EmbeddingsModuleValidator"
|
||||
area: "ai-features/embeddings"
|
||||
ai_embeddings_selected_model:
|
||||
type: enum
|
||||
default: ""
|
||||
allow_any: false
|
||||
enum: "DiscourseAi::Configuration::EmbeddingDefsEnumerator"
|
||||
validator: "DiscourseAi::Configuration::EmbeddingDefsValidator"
|
||||
area: "ai-features/embeddings"
|
||||
ai_embeddings_per_post_enabled:
|
||||
default: false
|
||||
hidden: true
|
||||
ai_embeddings_generate_for_pms: false
|
||||
ai_embeddings_generate_for_pms:
|
||||
default: false
|
||||
area: "ai-features/embeddings"
|
||||
ai_embeddings_semantic_related_topics_enabled:
|
||||
default: false
|
||||
client: true
|
||||
ai_embeddings_semantic_related_topics: 5
|
||||
ai_embeddings_semantic_related_include_closed_topics: true
|
||||
area: "ai-features/embeddings"
|
||||
ai_embeddings_semantic_related_topics:
|
||||
default: 5
|
||||
area: "ai-features/embeddings"
|
||||
ai_embeddings_semantic_related_include_closed_topics:
|
||||
default: true
|
||||
area: "ai-features/embeddings"
|
||||
ai_embeddings_backfill_batch_size:
|
||||
default: 250
|
||||
hidden: true
|
||||
@ -244,12 +253,14 @@ discourse_ai:
|
||||
default: false
|
||||
client: true
|
||||
validator: "DiscourseAi::Configuration::LlmDependencyValidator"
|
||||
area: "ai-features/embeddings"
|
||||
ai_embeddings_semantic_search_hyde_model:
|
||||
default: ""
|
||||
type: enum
|
||||
allow_any: false
|
||||
enum: "DiscourseAi::Configuration::LlmEnumerator"
|
||||
validator: "DiscourseAi::Configuration::LlmValidator"
|
||||
area: "ai-features/embeddings"
|
||||
ai_embeddings_semantic_search_hyde_model_allowed_seeded_models:
|
||||
default: ""
|
||||
hidden: true
|
||||
@ -259,6 +270,12 @@ discourse_ai:
|
||||
default: false
|
||||
client: true
|
||||
hidden: true
|
||||
area: "ai-features/embeddings"
|
||||
ai_embeddings_semantic_search_hyde_persona:
|
||||
default: "-32"
|
||||
type: enum
|
||||
enum: "DiscourseAi::Configuration::PersonaEnumerator"
|
||||
area: "ai-features/embeddings"
|
||||
|
||||
ai_embeddings_discourse_service_api_endpoint:
|
||||
default: ""
|
||||
|
@ -36,6 +36,8 @@ DiscourseAi::Personas::Persona.system_personas.each do |persona_class, id|
|
||||
setting_name = "ai_helper_custom_prompts_allowed_groups"
|
||||
default_groups = [Group::AUTO_GROUPS[:staff]]
|
||||
persona.allowed_group_ids = from_setting(setting_name) || default_groups
|
||||
elsif persona_class == DiscourseAi::Personas::ContentCreator
|
||||
persona.allowed_group_ids = [Group::AUTO_GROUPS[:everyone]]
|
||||
else
|
||||
persona.allowed_group_ids = [Group::AUTO_GROUPS[:trust_level_0]]
|
||||
end
|
||||
|
@ -144,6 +144,17 @@ module DiscourseAi
|
||||
]
|
||||
end
|
||||
|
||||
def embeddings_features
|
||||
feature_cache[:embeddings] ||= [
|
||||
new(
|
||||
"hyde",
|
||||
"ai_embeddings_semantic_search_hyde_persona",
|
||||
DiscourseAi::Configuration::Module::EMBEDDINGS_ID,
|
||||
DiscourseAi::Configuration::Module::EMBEDDINGS,
|
||||
),
|
||||
]
|
||||
end
|
||||
|
||||
def lookup_bot_persona_ids
|
||||
AiPersona
|
||||
.where(enabled: true)
|
||||
@ -196,6 +207,7 @@ module DiscourseAi
|
||||
translation_features,
|
||||
bot_features,
|
||||
spam_features,
|
||||
embeddings_features,
|
||||
].flatten
|
||||
end
|
||||
|
||||
@ -241,6 +253,8 @@ module DiscourseAi
|
||||
DiscourseAi::AiHelper::Assistant.find_ai_helper_model(name, persona_klass)
|
||||
when DiscourseAi::Configuration::Module::TRANSLATION
|
||||
DiscourseAi::Translation::BaseTranslator.preferred_llm_model(persona_klass)
|
||||
when DiscourseAi::Configuration::Module::EMBEDDINGS
|
||||
DiscourseAi::Embeddings::SemanticSearch.new(nil).find_ai_hyde_model(persona_klass)
|
||||
end
|
||||
|
||||
if llm_model.blank? && persona.default_llm_id
|
||||
|
@ -11,8 +11,19 @@ module DiscourseAi
|
||||
TRANSLATION = "translation"
|
||||
BOT = "bot"
|
||||
SPAM = "spam"
|
||||
EMBEDDINGS = "embeddings"
|
||||
|
||||
NAMES = [SUMMARIZATION, SEARCH, DISCORD, INFERENCE, AI_HELPER, TRANSLATION, BOT, SPAM].freeze
|
||||
NAMES = [
|
||||
SUMMARIZATION,
|
||||
SEARCH,
|
||||
DISCORD,
|
||||
INFERENCE,
|
||||
AI_HELPER,
|
||||
TRANSLATION,
|
||||
BOT,
|
||||
SPAM,
|
||||
EMBEDDINGS,
|
||||
].freeze
|
||||
|
||||
SUMMARIZATION_ID = 1
|
||||
SEARCH_ID = 2
|
||||
@ -22,6 +33,7 @@ module DiscourseAi
|
||||
TRANSLATION_ID = 6
|
||||
BOT_ID = 7
|
||||
SPAM_ID = 8
|
||||
EMBEDDINGS_ID = 9
|
||||
|
||||
class << self
|
||||
def all
|
||||
@ -75,6 +87,13 @@ module DiscourseAi
|
||||
enabled_by_setting: "ai_spam_detection_enabled",
|
||||
features: DiscourseAi::Configuration::Feature.spam_features,
|
||||
),
|
||||
new(
|
||||
EMBEDDINGS_ID,
|
||||
EMBEDDINGS,
|
||||
enabled_by_setting: "ai_embeddings_enabled",
|
||||
features: DiscourseAi::Configuration::Feature.embeddings_features,
|
||||
extra_check: -> { SiteSetting.ai_embeddings_semantic_search_enabled },
|
||||
),
|
||||
]
|
||||
end
|
||||
|
||||
|
@ -78,7 +78,9 @@ module DiscourseAi
|
||||
return Post.none
|
||||
end
|
||||
|
||||
search_embedding = hyde ? hyde_embedding(search_term) : embedding(search_term)
|
||||
search_embedding = nil
|
||||
search_embedding = hyde_embedding(search_term) if hyde
|
||||
search_embedding = embedding(search_term) if search_embedding.blank?
|
||||
|
||||
over_selection_limit = limit * OVER_SELECTION_FACTOR
|
||||
|
||||
@ -176,26 +178,47 @@ module DiscourseAi
|
||||
end
|
||||
|
||||
def hypothetical_post_from(search_term)
|
||||
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
|
||||
You are a content creator for a forum. The forum description is as follows:
|
||||
#{SiteSetting.title}
|
||||
#{SiteSetting.site_description}
|
||||
context =
|
||||
DiscourseAi::Personas::BotContext.new(
|
||||
user: @guardian.user,
|
||||
skip_tool_details: true,
|
||||
feature_name: "semantic_search_hyde",
|
||||
messages: [{ type: :user, content: search_term }],
|
||||
)
|
||||
|
||||
Put the forum post between <ai></ai> tags.
|
||||
TEXT
|
||||
bot = build_bot(@guardian.user)
|
||||
return nil if bot.nil?
|
||||
|
||||
prompt.push(type: :user, content: <<~TEXT.strip)
|
||||
Using this description, write a forum post about the subject inside the <input></input> XML tags:
|
||||
structured_output = nil
|
||||
raw_response = +""
|
||||
hyde_schema_key = bot.persona.response_format&.first.to_h
|
||||
|
||||
<input>#{search_term}</input>
|
||||
TEXT
|
||||
buffer_blk =
|
||||
Proc.new do |partial, _, type|
|
||||
if type == :structured_output
|
||||
structured_output = partial
|
||||
elsif type.blank?
|
||||
# Assume response is a regular completion.
|
||||
raw_response << partial
|
||||
end
|
||||
end
|
||||
|
||||
llm_response =
|
||||
DiscourseAi::Completions::Llm.proxy(
|
||||
SiteSetting.ai_embeddings_semantic_search_hyde_model,
|
||||
).generate(prompt, user: @guardian.user, feature_name: "semantic_search_hyde")
|
||||
bot.reply(context, &buffer_blk)
|
||||
|
||||
Nokogiri::HTML5.fragment(llm_response).at("ai")&.text.presence || llm_response
|
||||
structured_output&.read_buffered_property(hyde_schema_key["key"]&.to_sym) || raw_response
|
||||
end
|
||||
|
||||
# Priorities are:
|
||||
# 1. Persona's default LLM
|
||||
# 2. `ai_embeddings_semantic_search_hyde_model` setting.
|
||||
def find_ai_hyde_model(persona_klass)
|
||||
model_id =
|
||||
persona_klass.default_llm_id ||
|
||||
SiteSetting.ai_embeddings_semantic_search_hyde_model&.split(":")&.last
|
||||
|
||||
return if model_id.blank?
|
||||
|
||||
LlmModel.find_by(id: model_id)
|
||||
end
|
||||
|
||||
private
|
||||
@ -209,6 +232,18 @@ module DiscourseAi
|
||||
def build_embedding_key(digest, hyde_model, embedding_model)
|
||||
"#{build_hyde_key(digest, hyde_model)}-#{embedding_model}"
|
||||
end
|
||||
|
||||
def build_bot(user)
|
||||
persona_id = SiteSetting.ai_embeddings_semantic_search_hyde_persona
|
||||
|
||||
persona_klass = AiPersona.find_by(id: persona_id)&.class_instance
|
||||
return if persona_klass.nil?
|
||||
|
||||
llm_model = find_ai_hyde_model(persona_klass)
|
||||
return if llm_model.nil?
|
||||
|
||||
DiscourseAi::Personas::Bot.as(user, persona: persona_klass.new, model: llm_model)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -171,7 +171,7 @@ module DiscourseAi
|
||||
text = +""
|
||||
result.each { |item| text << item if item.is_a?(String) }
|
||||
end
|
||||
raw_context << [text, bot_user.username]
|
||||
raw_context << [text, bot_user&.username]
|
||||
end
|
||||
|
||||
total_completions += 1
|
||||
|
33
lib/personas/content_creator.rb
Normal file
33
lib/personas/content_creator.rb
Normal file
@ -0,0 +1,33 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Personas
|
||||
class ContentCreator < Persona
|
||||
def self.default_enabled
|
||||
false
|
||||
end
|
||||
|
||||
def system_prompt
|
||||
<<~PROMPT.strip
|
||||
You are a content creator for a forum. The forum title and description is as follows:
|
||||
* Ttitle: {site_title}
|
||||
* Description: {site_description}
|
||||
|
||||
You will receive a couple of keywords and must create a post about the keywords, keeping the previous information in mind.
|
||||
|
||||
Format your response as a JSON object with a single key named "output", which has the created content.
|
||||
Your output should be in the following format:
|
||||
<output>
|
||||
{"output": "xx"}
|
||||
</output>
|
||||
|
||||
Where "xx" is replaced by the content.
|
||||
PROMPT
|
||||
end
|
||||
|
||||
def response_format
|
||||
[{ "key" => "output", "type" => "string" }]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
@ -69,6 +69,7 @@ module DiscourseAi
|
||||
TopicTitleTranslator => -29,
|
||||
ShortTextTranslator => -30,
|
||||
SpamDetector => -31,
|
||||
ContentCreator => -32,
|
||||
}
|
||||
end
|
||||
|
||||
|
@ -27,7 +27,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
|
||||
end
|
||||
|
||||
def trigger_search(query)
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses(["<ai>#{hypothetical_post}</ai>"]) do
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([hypothetical_post]) do
|
||||
subject.search_for_topics(query)
|
||||
end
|
||||
end
|
||||
@ -123,9 +123,9 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
|
||||
context "while searching as anon" do
|
||||
it "returns an empty list" do
|
||||
posts =
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses(
|
||||
["<ai>#{hypothetical_post}</ai>"],
|
||||
) { described_class.new(Guardian.new(nil)).search_for_topics(query) }
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([hypothetical_post]) do
|
||||
described_class.new(Guardian.new(nil)).search_for_topics(query)
|
||||
end
|
||||
|
||||
expect(posts).to be_empty
|
||||
end
|
||||
|
@ -125,7 +125,7 @@ RSpec.describe DiscourseAi::Personas::Tools::Search do
|
||||
DiscourseAi::Embeddings::Schema.for(Topic).store(post1.topic, hyde_embedding, "digest")
|
||||
|
||||
results =
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses(["<ai>#{query}</ai>"]) do
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([query]) do
|
||||
search.invoke(&progress_blk)
|
||||
end
|
||||
|
||||
@ -144,7 +144,7 @@ RSpec.describe DiscourseAi::Personas::Tools::Search do
|
||||
|
||||
# results will be expanded by semantic search, but it will find nothing
|
||||
results =
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses(["<ai>#{query}</ai>"]) do
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([query]) do
|
||||
search.invoke(&progress_blk)
|
||||
end
|
||||
|
||||
|
@ -154,7 +154,7 @@ RSpec.describe DiscourseAi::Utils::Search do
|
||||
|
||||
# Using a completely different search query, should still find via semantic search
|
||||
results =
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses(["<ai>#{query}</ai>"]) do
|
||||
DiscourseAi::Completions::Llm.with_prepared_responses([query]) do
|
||||
described_class.perform_search(
|
||||
search_query: "totally different query",
|
||||
current_user: admin,
|
||||
|
@ -19,7 +19,7 @@ RSpec.describe DiscourseAi::Admin::AiFeaturesController do
|
||||
get "/admin/plugins/discourse-ai/ai-features.json"
|
||||
|
||||
expect(response.status).to eq(200)
|
||||
expect(response.parsed_body["ai_features"].count).to eq(8)
|
||||
expect(response.parsed_body["ai_features"].count).to eq(9)
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -28,7 +28,7 @@ RSpec.describe "Admin AI features configuration", type: :system, js: true do
|
||||
ai_features_page.toggle_unconfigured
|
||||
|
||||
# this changes as we add more AI features
|
||||
expect(ai_features_page).to have_listed_modules(7)
|
||||
expect(ai_features_page).to have_listed_modules(8)
|
||||
end
|
||||
|
||||
it "lists the persona used for the corresponding AI feature" do
|
||||
|
Loading…
x
Reference in New Issue
Block a user