mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-07-06 14:32:14 +00:00
FEATURE: Use personas for generating hypothetical posts (#1482)
* FEATURE: Use personas for generating hypothetica posts * Update prompt
This commit is contained in:
parent
40fa527633
commit
75fb37144f
@ -222,6 +222,10 @@ en:
|
|||||||
name: "Search"
|
name: "Search"
|
||||||
description: "Enhances search experience by providing AI-generated answers to queries"
|
description: "Enhances search experience by providing AI-generated answers to queries"
|
||||||
discoveries: "Discoveries"
|
discoveries: "Discoveries"
|
||||||
|
embeddings:
|
||||||
|
name: "Embeddings"
|
||||||
|
description: "Powers features like Related Topics and AI Search by generating semantic representations of text"
|
||||||
|
hyde: "HyDE"
|
||||||
discord:
|
discord:
|
||||||
name: "Discord integration"
|
name: "Discord integration"
|
||||||
description: "Adds the ability to search Discord channels"
|
description: "Adds the ability to search Discord channels"
|
||||||
|
@ -394,6 +394,9 @@ en:
|
|||||||
spam_detector:
|
spam_detector:
|
||||||
name: "Spam detector"
|
name: "Spam detector"
|
||||||
description: "Default persona powering our Spam detection feature"
|
description: "Default persona powering our Spam detection feature"
|
||||||
|
content_creator:
|
||||||
|
name: "Content creator"
|
||||||
|
description: "Default persona powering HyDE search"
|
||||||
|
|
||||||
topic_not_found: "Summary unavailable, topic not found!"
|
topic_not_found: "Summary unavailable, topic not found!"
|
||||||
summarizing: "Summarizing topic"
|
summarizing: "Summarizing topic"
|
||||||
|
@ -222,21 +222,30 @@ discourse_ai:
|
|||||||
default: false
|
default: false
|
||||||
client: true
|
client: true
|
||||||
validator: "DiscourseAi::Configuration::EmbeddingsModuleValidator"
|
validator: "DiscourseAi::Configuration::EmbeddingsModuleValidator"
|
||||||
|
area: "ai-features/embeddings"
|
||||||
ai_embeddings_selected_model:
|
ai_embeddings_selected_model:
|
||||||
type: enum
|
type: enum
|
||||||
default: ""
|
default: ""
|
||||||
allow_any: false
|
allow_any: false
|
||||||
enum: "DiscourseAi::Configuration::EmbeddingDefsEnumerator"
|
enum: "DiscourseAi::Configuration::EmbeddingDefsEnumerator"
|
||||||
validator: "DiscourseAi::Configuration::EmbeddingDefsValidator"
|
validator: "DiscourseAi::Configuration::EmbeddingDefsValidator"
|
||||||
|
area: "ai-features/embeddings"
|
||||||
ai_embeddings_per_post_enabled:
|
ai_embeddings_per_post_enabled:
|
||||||
default: false
|
default: false
|
||||||
hidden: true
|
hidden: true
|
||||||
ai_embeddings_generate_for_pms: false
|
ai_embeddings_generate_for_pms:
|
||||||
|
default: false
|
||||||
|
area: "ai-features/embeddings"
|
||||||
ai_embeddings_semantic_related_topics_enabled:
|
ai_embeddings_semantic_related_topics_enabled:
|
||||||
default: false
|
default: false
|
||||||
client: true
|
client: true
|
||||||
ai_embeddings_semantic_related_topics: 5
|
area: "ai-features/embeddings"
|
||||||
ai_embeddings_semantic_related_include_closed_topics: true
|
ai_embeddings_semantic_related_topics:
|
||||||
|
default: 5
|
||||||
|
area: "ai-features/embeddings"
|
||||||
|
ai_embeddings_semantic_related_include_closed_topics:
|
||||||
|
default: true
|
||||||
|
area: "ai-features/embeddings"
|
||||||
ai_embeddings_backfill_batch_size:
|
ai_embeddings_backfill_batch_size:
|
||||||
default: 250
|
default: 250
|
||||||
hidden: true
|
hidden: true
|
||||||
@ -244,12 +253,14 @@ discourse_ai:
|
|||||||
default: false
|
default: false
|
||||||
client: true
|
client: true
|
||||||
validator: "DiscourseAi::Configuration::LlmDependencyValidator"
|
validator: "DiscourseAi::Configuration::LlmDependencyValidator"
|
||||||
|
area: "ai-features/embeddings"
|
||||||
ai_embeddings_semantic_search_hyde_model:
|
ai_embeddings_semantic_search_hyde_model:
|
||||||
default: ""
|
default: ""
|
||||||
type: enum
|
type: enum
|
||||||
allow_any: false
|
allow_any: false
|
||||||
enum: "DiscourseAi::Configuration::LlmEnumerator"
|
enum: "DiscourseAi::Configuration::LlmEnumerator"
|
||||||
validator: "DiscourseAi::Configuration::LlmValidator"
|
validator: "DiscourseAi::Configuration::LlmValidator"
|
||||||
|
area: "ai-features/embeddings"
|
||||||
ai_embeddings_semantic_search_hyde_model_allowed_seeded_models:
|
ai_embeddings_semantic_search_hyde_model_allowed_seeded_models:
|
||||||
default: ""
|
default: ""
|
||||||
hidden: true
|
hidden: true
|
||||||
@ -259,6 +270,12 @@ discourse_ai:
|
|||||||
default: false
|
default: false
|
||||||
client: true
|
client: true
|
||||||
hidden: true
|
hidden: true
|
||||||
|
area: "ai-features/embeddings"
|
||||||
|
ai_embeddings_semantic_search_hyde_persona:
|
||||||
|
default: "-32"
|
||||||
|
type: enum
|
||||||
|
enum: "DiscourseAi::Configuration::PersonaEnumerator"
|
||||||
|
area: "ai-features/embeddings"
|
||||||
|
|
||||||
ai_embeddings_discourse_service_api_endpoint:
|
ai_embeddings_discourse_service_api_endpoint:
|
||||||
default: ""
|
default: ""
|
||||||
|
@ -36,6 +36,8 @@ DiscourseAi::Personas::Persona.system_personas.each do |persona_class, id|
|
|||||||
setting_name = "ai_helper_custom_prompts_allowed_groups"
|
setting_name = "ai_helper_custom_prompts_allowed_groups"
|
||||||
default_groups = [Group::AUTO_GROUPS[:staff]]
|
default_groups = [Group::AUTO_GROUPS[:staff]]
|
||||||
persona.allowed_group_ids = from_setting(setting_name) || default_groups
|
persona.allowed_group_ids = from_setting(setting_name) || default_groups
|
||||||
|
elsif persona_class == DiscourseAi::Personas::ContentCreator
|
||||||
|
persona.allowed_group_ids = [Group::AUTO_GROUPS[:everyone]]
|
||||||
else
|
else
|
||||||
persona.allowed_group_ids = [Group::AUTO_GROUPS[:trust_level_0]]
|
persona.allowed_group_ids = [Group::AUTO_GROUPS[:trust_level_0]]
|
||||||
end
|
end
|
||||||
|
@ -144,6 +144,17 @@ module DiscourseAi
|
|||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def embeddings_features
|
||||||
|
feature_cache[:embeddings] ||= [
|
||||||
|
new(
|
||||||
|
"hyde",
|
||||||
|
"ai_embeddings_semantic_search_hyde_persona",
|
||||||
|
DiscourseAi::Configuration::Module::EMBEDDINGS_ID,
|
||||||
|
DiscourseAi::Configuration::Module::EMBEDDINGS,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
def lookup_bot_persona_ids
|
def lookup_bot_persona_ids
|
||||||
AiPersona
|
AiPersona
|
||||||
.where(enabled: true)
|
.where(enabled: true)
|
||||||
@ -196,6 +207,7 @@ module DiscourseAi
|
|||||||
translation_features,
|
translation_features,
|
||||||
bot_features,
|
bot_features,
|
||||||
spam_features,
|
spam_features,
|
||||||
|
embeddings_features,
|
||||||
].flatten
|
].flatten
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -241,6 +253,8 @@ module DiscourseAi
|
|||||||
DiscourseAi::AiHelper::Assistant.find_ai_helper_model(name, persona_klass)
|
DiscourseAi::AiHelper::Assistant.find_ai_helper_model(name, persona_klass)
|
||||||
when DiscourseAi::Configuration::Module::TRANSLATION
|
when DiscourseAi::Configuration::Module::TRANSLATION
|
||||||
DiscourseAi::Translation::BaseTranslator.preferred_llm_model(persona_klass)
|
DiscourseAi::Translation::BaseTranslator.preferred_llm_model(persona_klass)
|
||||||
|
when DiscourseAi::Configuration::Module::EMBEDDINGS
|
||||||
|
DiscourseAi::Embeddings::SemanticSearch.new(nil).find_ai_hyde_model(persona_klass)
|
||||||
end
|
end
|
||||||
|
|
||||||
if llm_model.blank? && persona.default_llm_id
|
if llm_model.blank? && persona.default_llm_id
|
||||||
|
@ -11,8 +11,19 @@ module DiscourseAi
|
|||||||
TRANSLATION = "translation"
|
TRANSLATION = "translation"
|
||||||
BOT = "bot"
|
BOT = "bot"
|
||||||
SPAM = "spam"
|
SPAM = "spam"
|
||||||
|
EMBEDDINGS = "embeddings"
|
||||||
|
|
||||||
NAMES = [SUMMARIZATION, SEARCH, DISCORD, INFERENCE, AI_HELPER, TRANSLATION, BOT, SPAM].freeze
|
NAMES = [
|
||||||
|
SUMMARIZATION,
|
||||||
|
SEARCH,
|
||||||
|
DISCORD,
|
||||||
|
INFERENCE,
|
||||||
|
AI_HELPER,
|
||||||
|
TRANSLATION,
|
||||||
|
BOT,
|
||||||
|
SPAM,
|
||||||
|
EMBEDDINGS,
|
||||||
|
].freeze
|
||||||
|
|
||||||
SUMMARIZATION_ID = 1
|
SUMMARIZATION_ID = 1
|
||||||
SEARCH_ID = 2
|
SEARCH_ID = 2
|
||||||
@ -22,6 +33,7 @@ module DiscourseAi
|
|||||||
TRANSLATION_ID = 6
|
TRANSLATION_ID = 6
|
||||||
BOT_ID = 7
|
BOT_ID = 7
|
||||||
SPAM_ID = 8
|
SPAM_ID = 8
|
||||||
|
EMBEDDINGS_ID = 9
|
||||||
|
|
||||||
class << self
|
class << self
|
||||||
def all
|
def all
|
||||||
@ -75,6 +87,13 @@ module DiscourseAi
|
|||||||
enabled_by_setting: "ai_spam_detection_enabled",
|
enabled_by_setting: "ai_spam_detection_enabled",
|
||||||
features: DiscourseAi::Configuration::Feature.spam_features,
|
features: DiscourseAi::Configuration::Feature.spam_features,
|
||||||
),
|
),
|
||||||
|
new(
|
||||||
|
EMBEDDINGS_ID,
|
||||||
|
EMBEDDINGS,
|
||||||
|
enabled_by_setting: "ai_embeddings_enabled",
|
||||||
|
features: DiscourseAi::Configuration::Feature.embeddings_features,
|
||||||
|
extra_check: -> { SiteSetting.ai_embeddings_semantic_search_enabled },
|
||||||
|
),
|
||||||
]
|
]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -78,7 +78,9 @@ module DiscourseAi
|
|||||||
return Post.none
|
return Post.none
|
||||||
end
|
end
|
||||||
|
|
||||||
search_embedding = hyde ? hyde_embedding(search_term) : embedding(search_term)
|
search_embedding = nil
|
||||||
|
search_embedding = hyde_embedding(search_term) if hyde
|
||||||
|
search_embedding = embedding(search_term) if search_embedding.blank?
|
||||||
|
|
||||||
over_selection_limit = limit * OVER_SELECTION_FACTOR
|
over_selection_limit = limit * OVER_SELECTION_FACTOR
|
||||||
|
|
||||||
@ -176,26 +178,47 @@ module DiscourseAi
|
|||||||
end
|
end
|
||||||
|
|
||||||
def hypothetical_post_from(search_term)
|
def hypothetical_post_from(search_term)
|
||||||
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
|
context =
|
||||||
You are a content creator for a forum. The forum description is as follows:
|
DiscourseAi::Personas::BotContext.new(
|
||||||
#{SiteSetting.title}
|
user: @guardian.user,
|
||||||
#{SiteSetting.site_description}
|
skip_tool_details: true,
|
||||||
|
feature_name: "semantic_search_hyde",
|
||||||
|
messages: [{ type: :user, content: search_term }],
|
||||||
|
)
|
||||||
|
|
||||||
Put the forum post between <ai></ai> tags.
|
bot = build_bot(@guardian.user)
|
||||||
TEXT
|
return nil if bot.nil?
|
||||||
|
|
||||||
prompt.push(type: :user, content: <<~TEXT.strip)
|
structured_output = nil
|
||||||
Using this description, write a forum post about the subject inside the <input></input> XML tags:
|
raw_response = +""
|
||||||
|
hyde_schema_key = bot.persona.response_format&.first.to_h
|
||||||
|
|
||||||
<input>#{search_term}</input>
|
buffer_blk =
|
||||||
TEXT
|
Proc.new do |partial, _, type|
|
||||||
|
if type == :structured_output
|
||||||
|
structured_output = partial
|
||||||
|
elsif type.blank?
|
||||||
|
# Assume response is a regular completion.
|
||||||
|
raw_response << partial
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
llm_response =
|
bot.reply(context, &buffer_blk)
|
||||||
DiscourseAi::Completions::Llm.proxy(
|
|
||||||
SiteSetting.ai_embeddings_semantic_search_hyde_model,
|
|
||||||
).generate(prompt, user: @guardian.user, feature_name: "semantic_search_hyde")
|
|
||||||
|
|
||||||
Nokogiri::HTML5.fragment(llm_response).at("ai")&.text.presence || llm_response
|
structured_output&.read_buffered_property(hyde_schema_key["key"]&.to_sym) || raw_response
|
||||||
|
end
|
||||||
|
|
||||||
|
# Priorities are:
|
||||||
|
# 1. Persona's default LLM
|
||||||
|
# 2. `ai_embeddings_semantic_search_hyde_model` setting.
|
||||||
|
def find_ai_hyde_model(persona_klass)
|
||||||
|
model_id =
|
||||||
|
persona_klass.default_llm_id ||
|
||||||
|
SiteSetting.ai_embeddings_semantic_search_hyde_model&.split(":")&.last
|
||||||
|
|
||||||
|
return if model_id.blank?
|
||||||
|
|
||||||
|
LlmModel.find_by(id: model_id)
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
@ -209,6 +232,18 @@ module DiscourseAi
|
|||||||
def build_embedding_key(digest, hyde_model, embedding_model)
|
def build_embedding_key(digest, hyde_model, embedding_model)
|
||||||
"#{build_hyde_key(digest, hyde_model)}-#{embedding_model}"
|
"#{build_hyde_key(digest, hyde_model)}-#{embedding_model}"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def build_bot(user)
|
||||||
|
persona_id = SiteSetting.ai_embeddings_semantic_search_hyde_persona
|
||||||
|
|
||||||
|
persona_klass = AiPersona.find_by(id: persona_id)&.class_instance
|
||||||
|
return if persona_klass.nil?
|
||||||
|
|
||||||
|
llm_model = find_ai_hyde_model(persona_klass)
|
||||||
|
return if llm_model.nil?
|
||||||
|
|
||||||
|
DiscourseAi::Personas::Bot.as(user, persona: persona_klass.new, model: llm_model)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -171,7 +171,7 @@ module DiscourseAi
|
|||||||
text = +""
|
text = +""
|
||||||
result.each { |item| text << item if item.is_a?(String) }
|
result.each { |item| text << item if item.is_a?(String) }
|
||||||
end
|
end
|
||||||
raw_context << [text, bot_user.username]
|
raw_context << [text, bot_user&.username]
|
||||||
end
|
end
|
||||||
|
|
||||||
total_completions += 1
|
total_completions += 1
|
||||||
|
33
lib/personas/content_creator.rb
Normal file
33
lib/personas/content_creator.rb
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
module DiscourseAi
|
||||||
|
module Personas
|
||||||
|
class ContentCreator < Persona
|
||||||
|
def self.default_enabled
|
||||||
|
false
|
||||||
|
end
|
||||||
|
|
||||||
|
def system_prompt
|
||||||
|
<<~PROMPT.strip
|
||||||
|
You are a content creator for a forum. The forum title and description is as follows:
|
||||||
|
* Ttitle: {site_title}
|
||||||
|
* Description: {site_description}
|
||||||
|
|
||||||
|
You will receive a couple of keywords and must create a post about the keywords, keeping the previous information in mind.
|
||||||
|
|
||||||
|
Format your response as a JSON object with a single key named "output", which has the created content.
|
||||||
|
Your output should be in the following format:
|
||||||
|
<output>
|
||||||
|
{"output": "xx"}
|
||||||
|
</output>
|
||||||
|
|
||||||
|
Where "xx" is replaced by the content.
|
||||||
|
PROMPT
|
||||||
|
end
|
||||||
|
|
||||||
|
def response_format
|
||||||
|
[{ "key" => "output", "type" => "string" }]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
@ -69,6 +69,7 @@ module DiscourseAi
|
|||||||
TopicTitleTranslator => -29,
|
TopicTitleTranslator => -29,
|
||||||
ShortTextTranslator => -30,
|
ShortTextTranslator => -30,
|
||||||
SpamDetector => -31,
|
SpamDetector => -31,
|
||||||
|
ContentCreator => -32,
|
||||||
}
|
}
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
|
|||||||
end
|
end
|
||||||
|
|
||||||
def trigger_search(query)
|
def trigger_search(query)
|
||||||
DiscourseAi::Completions::Llm.with_prepared_responses(["<ai>#{hypothetical_post}</ai>"]) do
|
DiscourseAi::Completions::Llm.with_prepared_responses([hypothetical_post]) do
|
||||||
subject.search_for_topics(query)
|
subject.search_for_topics(query)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@ -123,9 +123,9 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do
|
|||||||
context "while searching as anon" do
|
context "while searching as anon" do
|
||||||
it "returns an empty list" do
|
it "returns an empty list" do
|
||||||
posts =
|
posts =
|
||||||
DiscourseAi::Completions::Llm.with_prepared_responses(
|
DiscourseAi::Completions::Llm.with_prepared_responses([hypothetical_post]) do
|
||||||
["<ai>#{hypothetical_post}</ai>"],
|
described_class.new(Guardian.new(nil)).search_for_topics(query)
|
||||||
) { described_class.new(Guardian.new(nil)).search_for_topics(query) }
|
end
|
||||||
|
|
||||||
expect(posts).to be_empty
|
expect(posts).to be_empty
|
||||||
end
|
end
|
||||||
|
@ -125,7 +125,7 @@ RSpec.describe DiscourseAi::Personas::Tools::Search do
|
|||||||
DiscourseAi::Embeddings::Schema.for(Topic).store(post1.topic, hyde_embedding, "digest")
|
DiscourseAi::Embeddings::Schema.for(Topic).store(post1.topic, hyde_embedding, "digest")
|
||||||
|
|
||||||
results =
|
results =
|
||||||
DiscourseAi::Completions::Llm.with_prepared_responses(["<ai>#{query}</ai>"]) do
|
DiscourseAi::Completions::Llm.with_prepared_responses([query]) do
|
||||||
search.invoke(&progress_blk)
|
search.invoke(&progress_blk)
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -144,7 +144,7 @@ RSpec.describe DiscourseAi::Personas::Tools::Search do
|
|||||||
|
|
||||||
# results will be expanded by semantic search, but it will find nothing
|
# results will be expanded by semantic search, but it will find nothing
|
||||||
results =
|
results =
|
||||||
DiscourseAi::Completions::Llm.with_prepared_responses(["<ai>#{query}</ai>"]) do
|
DiscourseAi::Completions::Llm.with_prepared_responses([query]) do
|
||||||
search.invoke(&progress_blk)
|
search.invoke(&progress_blk)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -154,7 +154,7 @@ RSpec.describe DiscourseAi::Utils::Search do
|
|||||||
|
|
||||||
# Using a completely different search query, should still find via semantic search
|
# Using a completely different search query, should still find via semantic search
|
||||||
results =
|
results =
|
||||||
DiscourseAi::Completions::Llm.with_prepared_responses(["<ai>#{query}</ai>"]) do
|
DiscourseAi::Completions::Llm.with_prepared_responses([query]) do
|
||||||
described_class.perform_search(
|
described_class.perform_search(
|
||||||
search_query: "totally different query",
|
search_query: "totally different query",
|
||||||
current_user: admin,
|
current_user: admin,
|
||||||
|
@ -19,7 +19,7 @@ RSpec.describe DiscourseAi::Admin::AiFeaturesController do
|
|||||||
get "/admin/plugins/discourse-ai/ai-features.json"
|
get "/admin/plugins/discourse-ai/ai-features.json"
|
||||||
|
|
||||||
expect(response.status).to eq(200)
|
expect(response.status).to eq(200)
|
||||||
expect(response.parsed_body["ai_features"].count).to eq(8)
|
expect(response.parsed_body["ai_features"].count).to eq(9)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -28,7 +28,7 @@ RSpec.describe "Admin AI features configuration", type: :system, js: true do
|
|||||||
ai_features_page.toggle_unconfigured
|
ai_features_page.toggle_unconfigured
|
||||||
|
|
||||||
# this changes as we add more AI features
|
# this changes as we add more AI features
|
||||||
expect(ai_features_page).to have_listed_modules(7)
|
expect(ai_features_page).to have_listed_modules(8)
|
||||||
end
|
end
|
||||||
|
|
||||||
it "lists the persona used for the corresponding AI feature" do
|
it "lists the persona used for the corresponding AI feature" do
|
||||||
|
Loading…
x
Reference in New Issue
Block a user