FIX: Truncate OP for gists to help the model focus on the latest posts (#883)

This commit is contained in:
Roman Rizzi 2024-10-31 10:54:56 -03:00 committed by GitHub
parent 32fb023357
commit e8eed710e0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 22 additions and 11 deletions

View File

@ -121,9 +121,9 @@ module DiscourseAi
prompt = prompt =
( (
if summary.blank? if summary.blank?
strategy.first_summary_prompt(iteration_content) strategy.first_summary_prompt(iteration_content, tokenizer)
else else
strategy.summary_extension_prompt(summary, iteration_content) strategy.summary_extension_prompt(summary, iteration_content, tokenizer)
end end
) )

View File

@ -34,12 +34,12 @@ module DiscourseAi
end end
# @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM when extending an existing summary. # @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM when extending an existing summary.
def summary_extension_prompt(_summary, _texts_to_summarize) def summary_extension_prompt(_summary, _texts_to_summarize, _tokenizer)
raise NotImplementedError raise NotImplementedError
end end
# @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM for summarizing a single chunk of content. # @returns { DiscourseAi::Completions::Prompt } - Prompt passed to the LLM for summarizing a single chunk of content.
def first_summary_prompt(_input) def first_summary_prompt(_input, _tokenizer)
raise NotImplementedError raise NotImplementedError
end end

View File

@ -23,7 +23,7 @@ module DiscourseAi
.map { { id: _1, poster: _2, text: _3 } } .map { { id: _1, poster: _2, text: _3 } }
end end
def summary_extension_prompt(summary, contents) def summary_extension_prompt(summary, contents, _tokenizer)
input = input =
contents contents
.map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " } .map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
@ -63,7 +63,7 @@ module DiscourseAi
prompt prompt
end end
def first_summary_prompt(contents) def first_summary_prompt(contents, _tokenizer)
content_title = target.name content_title = target.name
input = input =
contents.map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }.join contents.map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }.join

View File

@ -57,7 +57,7 @@ module DiscourseAi
end end
end end
def summary_extension_prompt(summary, contents) def summary_extension_prompt(summary, contents, _tokenizer)
statements = statements =
contents contents
.to_a .to_a
@ -98,11 +98,22 @@ module DiscourseAi
prompt prompt
end end
def first_summary_prompt(contents) def first_summary_prompt(contents, tokenizer)
content_title = target.title content_title = target.title
statements = statements =
contents.to_a.map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " } contents.to_a.map { |item| "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " }
op_statement = statements.shift.to_s
split_1, split_2 =
[op_statement[0, op_statement.size / 2], op_statement[(op_statement.size / 2)..-1]]
truncation_length = 500
op_statement = [
tokenizer.truncate(split_1, truncation_length),
tokenizer.truncate(split_2.reverse, truncation_length).reverse,
].join(" ")
prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip) prompt = DiscourseAi::Completions::Prompt.new(<<~TEXT.strip)
You are an advanced summarization bot. Analyze a given conversation and produce a concise, You are an advanced summarization bot. Analyze a given conversation and produce a concise,
single-sentence summary that conveys the main topic and current developments to someone with no prior context. single-sentence summary that conveys the main topic and current developments to someone with no prior context.
@ -127,7 +138,7 @@ module DiscourseAi
The conversation began with the following statement: The conversation began with the following statement:
#{statements.shift}\n #{op_statement}\n
TEXT TEXT
if statements.present? if statements.present?

View File

@ -27,7 +27,7 @@ module DiscourseAi
end end
end end
def summary_extension_prompt(summary, contents) def summary_extension_prompt(summary, contents, _tokenizer)
resource_path = "#{Discourse.base_path}/t/-/#{target.id}" resource_path = "#{Discourse.base_path}/t/-/#{target.id}"
content_title = target.title content_title = target.title
input = input =
@ -70,7 +70,7 @@ module DiscourseAi
prompt prompt
end end
def first_summary_prompt(contents) def first_summary_prompt(contents, _tokenizer)
resource_path = "#{Discourse.base_path}/t/-/#{target.id}" resource_path = "#{Discourse.base_path}/t/-/#{target.id}"
content_title = target.title content_title = target.title
input = input =