From e8b2a200c1125e903fb038e9b7ec74f4007f0dd0 Mon Sep 17 00:00:00 2001 From: Sam Date: Wed, 20 Mar 2024 16:33:05 +1100 Subject: [PATCH] FIX: prompt engineering for summary prompt (#539) Prompt was steering incorrectly into the wrong language. New prompt attempts to be more concise and clear and provides better guidance about size of summary and how to format it. --- lib/automation/report_runner.rb | 5 +-- lib/summarization/strategies/fold_content.rb | 40 +++++++++----------- 2 files changed, 19 insertions(+), 26 deletions(-) diff --git a/lib/automation/report_runner.rb b/lib/automation/report_runner.rb index fe53adf1..466f9f7c 100644 --- a/lib/automation/report_runner.rb +++ b/lib/automation/report_runner.rb @@ -140,10 +140,7 @@ Follow the provided writing composition instructions carefully and precisely ste prompt = DiscourseAi::Completions::Prompt.new( system_prompt, - messages: [ - { type: :user, content: input }, - { type: :model, content: "Here is the report I generated for you" }, - ], + messages: [{ type: :user, content: input }], ) result = +"" diff --git a/lib/summarization/strategies/fold_content.rb b/lib/summarization/strategies/fold_content.rb index 47f7b62b..d064c325 100644 --- a/lib/summarization/strategies/fold_content.rb +++ b/lib/summarization/strategies/fold_content.rb @@ -133,20 +133,23 @@ module DiscourseAi end def summarization_prompt(input, opts) - insts = <<~TEXT - You are a summarization bot that effectively summarize any text - Your reply MUST BE a summarized version of the posts I provided, using the first language you detect. - I'm NOT interested in anything other than the summary, don't include additional text or comments. - You understand and generate Discourse forum Markdown. - You format the response, including links, using Markdown. - Your summaries are always a cohesive narrative in the form of one or multiple paragraphs. + insts = +<<~TEXT + You are an advanced summarization bot that generates concise, coherent summaries of provided text. + + - Only include the summary, without any additional commentary. + - You understand and generate Discourse forum Markdown; including links, _italics_, **bold**. + - Maintain the original language of the text being summarized. + - Aim for summaries to be 400 words or less. TEXT - insts += <<~TEXT if opts[:resource_path] - Each post is formatted as ") " - Try generating links as well the format is #{opts[:resource_path]}/ - For example, a link to the 3rd post in the topic would be [post 3](#{opts[:resource_path]}/3) + insts << <<~TEXT if opts[:resource_path] + - Each post is formatted as ") " + - Cite specific noteworthy posts using the format [NAME](#{opts[:resource_path]}/POST_NUMBER) + - Example: link to the 3rd post by sam: [sam](#{opts[:resource_path]}/3) + - Example: link to the 6th post by jane: [agreed with](#{opts[:resource_path]}/6) + - Example: link to the 13th post by joe: [#13](#{opts[:resource_path]}/13) + - When formatting usernames either use @USERNMAE OR [USERNAME](#{opts[:resource_path]}/POST_NUMBER) TEXT prompt = DiscourseAi::Completions::Prompt.new(insts.strip) @@ -154,23 +157,14 @@ module DiscourseAi if opts[:resource_path] prompt.push( type: :user, - content: "1) user1 said: I love Mondays 2) user2 said: I hate Mondays", + content: + "Here are the posts inside XML tags:\n\n1) user1 said: I love Mondays 2) user2 said: I hate Mondays\n\nGenerate a concise, coherent summary of the text above maintaining the original language.", ) prompt.push( type: :model, content: "Two users are sharing their feelings toward Mondays. [user1](#{opts[:resource_path]}/1) hates them, while [user2](#{opts[:resource_path]}/2) loves them.", ) - - prompt.push( - type: :user, - content: "3) usuario1: Amo los lunes 6) usuario2: Odio los lunes", - ) - prompt.push( - type: :model, - content: - "Dos usuarios charlan sobre los lunes. [usuario1](#{opts[:resource_path]}/3) dice que los ama, mientras que [usuario2](#{opts[:resource_path]}/2) los odia.", - ) end prompt.push(type: :user, content: <<~TEXT.strip) @@ -180,6 +174,8 @@ module DiscourseAi #{input} + + Generate a concise, coherent summary of the text above maintaining the original language. TEXT prompt