diff --git a/lib/summarization/fold_content.rb b/lib/summarization/fold_content.rb index 23cf225f..9df6d608 100644 --- a/lib/summarization/fold_content.rb +++ b/lib/summarization/fold_content.rb @@ -121,9 +121,9 @@ module DiscourseAi prompt = ( if summary.blank? - strategy.first_summary_prompt(iteration_content) + strategy.first_summary_prompt(iteration_content, tokenizer) else - strategy.summary_extension_prompt(summary, iteration_content) + strategy.summary_extension_prompt(summary, iteration_content, tokenizer) end ) diff --git a/lib/summarization/strategies/base.rb b/lib/summarization/strategies/base.rb index f9a5e182..57dfad1d 100644 --- a/lib/summarization/strategies/base.rb +++ b/lib/summarization/strategies/base.rb @@ -34,12 +34,12 @@ module DiscourseAi end # @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM when extending an existing summary. - def summary_extension_prompt(_summary, _texts_to_summarize) + def summary_extension_prompt(_summary, _texts_to_summarize, _tokenizer) raise NotImplementedError end # @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM for summarizing a single chunk of content. - def first_summary_prompt(_input) + def first_summary_prompt(_input, _tokenizer) raise NotImplementedError end diff --git a/lib/summarization/strategies/chat_messages.rb b/lib/summarization/strategies/chat_messages.rb index 1f3aad6d..a50fb108 100644 --- a/lib/summarization/strategies/chat_messages.rb +++ b/lib/summarization/strategies/chat_messages.rb @@ -23,7 +23,7 @@ module DiscourseAi .map { { id: _1, poster: _2, text: _3 } } end - def summary_extension_prompt(summary, contents) + def summary_extension_prompt(summary, contents, _tokenizer) input = contents .map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " } @@ -63,7 +63,7 @@ module DiscourseAi prompt end - def first_summary_prompt(contents) + def first_summary_prompt(contents, _tokenizer) content_title = target.name input = contents.map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }.join diff --git a/lib/summarization/strategies/hot_topic_gists.rb b/lib/summarization/strategies/hot_topic_gists.rb index e0271be6..69e0269f 100644 --- a/lib/summarization/strategies/hot_topic_gists.rb +++ b/lib/summarization/strategies/hot_topic_gists.rb @@ -57,7 +57,7 @@ module DiscourseAi end end - def summary_extension_prompt(summary, contents) + def summary_extension_prompt(summary, contents, _tokenizer) statements = contents .to_a @@ -98,11 +98,22 @@ module DiscourseAi prompt end - def first_summary_prompt(contents) + def first_summary_prompt(contents, tokenizer) content_title = target.title statements = contents.to_a.map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " } + op_statement = statements.shift.to_s + split_1, split_2 = + [op_statement[0, op_statement.size / 2], op_statement[(op_statement.size / 2)..-1]] + + truncation_length = 500 + + op_statement = [ + tokenizer.truncate(split_1, truncation_length), + tokenizer.truncate(split_2.reverse, truncation_length).reverse, + ].join(" ") + prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip) You are an advanced summarization bot. Analyze a given conversation and produce a concise, single-sentence summary that conveys the main topic and current developments to someone with no prior context. @@ -127,7 +138,7 @@ module DiscourseAi The conversation began with the following statement: - #{statements.shift}\n + #{op_statement}\n TEXT if statements.present? diff --git a/lib/summarization/strategies/topic_summary.rb b/lib/summarization/strategies/topic_summary.rb index 2b126383..1cac7ee4 100644 --- a/lib/summarization/strategies/topic_summary.rb +++ b/lib/summarization/strategies/topic_summary.rb @@ -27,7 +27,7 @@ module DiscourseAi end end - def summary_extension_prompt(summary, contents) + def summary_extension_prompt(summary, contents, _tokenizer) resource_path = "#{Discourse.base_path}/t/-/#{target.id}" content_title = target.title input = @@ -70,7 +70,7 @@ module DiscourseAi prompt end - def first_summary_prompt(contents) + def first_summary_prompt(contents, _tokenizer) resource_path = "#{Discourse.base_path}/t/-/#{target.id}" content_title = target.title input =