From 4b42c0981486aa09cd1f92bfaa9df4a7a0f0c650 Mon Sep 17 00:00:00 2001 From: Rafael dos Santos Silva Date: Tue, 5 Sep 2023 16:11:07 -0300 Subject: [PATCH] FEATURE: Tweak HyDE prompts for better grounding in forum subject and limit response size (#200) * FEATURE: Tweak HyDE prompts for better grounding in forum subject and limit response size * fix test * lint --- lib/modules/embeddings/hyde_generators/anthropic.rb | 8 +++++--- lib/modules/embeddings/hyde_generators/base.rb | 11 +++++++++++ lib/modules/embeddings/hyde_generators/llama2.rb | 7 ++++--- lib/modules/embeddings/hyde_generators/llama2_ftos.rb | 7 ++++--- lib/modules/embeddings/hyde_generators/openai.rb | 5 +++-- spec/lib/modules/embeddings/semantic_search_spec.rb | 8 +++++++- 6 files changed, 34 insertions(+), 12 deletions(-) diff --git a/lib/modules/embeddings/hyde_generators/anthropic.rb b/lib/modules/embeddings/hyde_generators/anthropic.rb index 693ea0dc..72d36dac 100644 --- a/lib/modules/embeddings/hyde_generators/anthropic.rb +++ b/lib/modules/embeddings/hyde_generators/anthropic.rb @@ -6,10 +6,11 @@ module DiscourseAi class Anthropic < DiscourseAi::Embeddings::HydeGenerators::Base def prompt(search_term) <<~TEXT - Given a search term given between tags, generate a forum post about the search term. - Respond with the generated post between tags. - + Given a search term given between tags, generate a forum post about a given subject. + #{basic_prompt_instruction} #{search_term} + + Respond with the generated post between tags. TEXT end @@ -22,6 +23,7 @@ module DiscourseAi ::DiscourseAi::Inference::AnthropicCompletions.perform!( prompt(query), SiteSetting.ai_embeddings_semantic_search_hyde_model, + max_tokens: 400, ).dig(:completion) Nokogiri::HTML5.fragment(response).at("ai").text diff --git a/lib/modules/embeddings/hyde_generators/base.rb b/lib/modules/embeddings/hyde_generators/base.rb index 8514b414..be291b03 100644 --- a/lib/modules/embeddings/hyde_generators/base.rb +++ b/lib/modules/embeddings/hyde_generators/base.rb @@ -11,6 +11,17 @@ module DiscourseAi ) end end + + def basic_prompt_instruction + <<~TEXT + Act as a content writer for a forum. + The forum description is as follows: + #{SiteSetting.title} + #{SiteSetting.site_description} + + Given the forum description write a forum post about the following subject: + TEXT + end end end end diff --git a/lib/modules/embeddings/hyde_generators/llama2.rb b/lib/modules/embeddings/hyde_generators/llama2.rb index 6a72bb8c..86ca977a 100644 --- a/lib/modules/embeddings/hyde_generators/llama2.rb +++ b/lib/modules/embeddings/hyde_generators/llama2.rb @@ -8,12 +8,13 @@ module DiscourseAi <<~TEXT [INST] <> You are a helpful bot - You create forum posts about a given topic + You create forum posts about a given subject <> - Topic: #{search_term} + #{basic_prompt_instruction} + #{search_term} [/INST] - Here is a forum post about the above topic: + Here is a forum post about the above subject: TEXT end diff --git a/lib/modules/embeddings/hyde_generators/llama2_ftos.rb b/lib/modules/embeddings/hyde_generators/llama2_ftos.rb index fd4245ba..e5222e78 100644 --- a/lib/modules/embeddings/hyde_generators/llama2_ftos.rb +++ b/lib/modules/embeddings/hyde_generators/llama2_ftos.rb @@ -8,13 +8,14 @@ module DiscourseAi <<~TEXT ### System: You are a helpful bot - You create forum posts about a given topic + You create forum posts about a given subject ### User: - Topic: #{search_term} + #{basic_prompt_instruction} + #{search_term} ### Assistant: - Here is a forum post about the above topic: + Here is a forum post about the above subject: TEXT end diff --git a/lib/modules/embeddings/hyde_generators/openai.rb b/lib/modules/embeddings/hyde_generators/openai.rb index f44ca8fe..75ba2919 100644 --- a/lib/modules/embeddings/hyde_generators/openai.rb +++ b/lib/modules/embeddings/hyde_generators/openai.rb @@ -8,9 +8,9 @@ module DiscourseAi [ { role: "system", - content: "You are a helpful bot. You create forum posts about a given topic.", + content: "You are a helpful bot. You create forum posts about a given subject.", }, - { role: "user", content: "Create a forum post about the topic: #{search_term}" }, + { role: "user", content: "#{basic_prompt_instruction}\n#{search_term}" }, ] end @@ -22,6 +22,7 @@ module DiscourseAi ::DiscourseAi::Inference::OpenAiCompletions.perform!( prompt(query), SiteSetting.ai_embeddings_semantic_search_hyde_model, + max_tokens: 400, ).dig(:choices, 0, :message, :content) end end diff --git a/spec/lib/modules/embeddings/semantic_search_spec.rb b/spec/lib/modules/embeddings/semantic_search_spec.rb index 49826dd0..fa05bda7 100644 --- a/spec/lib/modules/embeddings/semantic_search_spec.rb +++ b/spec/lib/modules/embeddings/semantic_search_spec.rb @@ -17,7 +17,13 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do SiteSetting.ai_embeddings_discourse_service_api_endpoint = "http://test.com" prompt = DiscourseAi::Embeddings::HydeGenerators::OpenAi.new.prompt(query) - OpenAiCompletionsInferenceStubs.stub_response(prompt, hypothetical_post) + OpenAiCompletionsInferenceStubs.stub_response( + prompt, + hypothetical_post, + req_opts: { + max_tokens: 400, + }, + ) hyde_embedding = [0.049382, 0.9999] EmbeddingsGenerationStubs.discourse_service(