129 lines
4.1 KiB
Ruby
129 lines
4.1 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module DiscourseAi
|
|
module Summarization
|
|
module Models
|
|
class Anthropic < Base
|
|
def display_name
|
|
"Anthropic's #{model}"
|
|
end
|
|
|
|
def correctly_configured?
|
|
SiteSetting.ai_anthropic_api_key.present?
|
|
end
|
|
|
|
def configuration_hint
|
|
I18n.t(
|
|
"discourse_ai.summarization.configuration_hint",
|
|
count: 1,
|
|
setting: "ai_anthropic_api_key",
|
|
)
|
|
end
|
|
|
|
def concatenate_summaries(summaries, &on_partial_blk)
|
|
instructions = <<~TEXT
|
|
Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative.
|
|
Include only the summary inside <ai> tags.
|
|
TEXT
|
|
|
|
instructions += summaries.reduce("") { |m, s| m += "<input>#{s}</input>\n" }
|
|
instructions += "Assistant:\n"
|
|
|
|
completion(instructions, &on_partial_blk)
|
|
end
|
|
|
|
def summarize_with_truncation(contents, opts, &on_partial_blk)
|
|
instructions = build_base_prompt(opts)
|
|
|
|
text_to_summarize = contents.map { |c| format_content_item(c) }.join
|
|
truncated_content = tokenizer.truncate(text_to_summarize, available_tokens)
|
|
|
|
instructions += "<input>#{truncated_content}</input>\nAssistant:\n"
|
|
|
|
completion(instructions, &on_partial_blk)
|
|
end
|
|
|
|
def summarize_single(chunk_text, opts, &on_partial_blk)
|
|
summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk)
|
|
end
|
|
|
|
private
|
|
|
|
def summarize_chunk(chunk_text, opts, &on_partial_blk)
|
|
completion(
|
|
build_base_prompt(opts) + "<input>#{chunk_text}</input>\nAssistant:\n",
|
|
&on_partial_blk
|
|
)
|
|
end
|
|
|
|
def build_base_prompt(opts)
|
|
initial_instruction =
|
|
if opts[:single_chunk]
|
|
"Summarize the following forum discussion inside the given <input> tag, creating a cohesive narrative."
|
|
else
|
|
"Summarize the following forum discussion inside the given <input> tag."
|
|
end
|
|
|
|
base_prompt = <<~TEXT
|
|
Human: #{initial_instruction}
|
|
Try to keep the summary in the same language as the forum discussion.
|
|
Format the response, including links, using markdown.
|
|
TEXT
|
|
|
|
base_prompt += <<~TEXT if opts[:resource_path]
|
|
Try generating links as well the format is #{opts[:resource_path]}/POST_ID
|
|
For example, a link to the 3rd post in the topic would be [post 3](#{opts[:resource_path]}/3)
|
|
TEXT
|
|
|
|
base_prompt += "Wrap the whole the summary inside <ai> tags.\n"
|
|
|
|
base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[
|
|
:content_title
|
|
]
|
|
|
|
base_prompt += "Don't use more than 400 words.\n" unless opts[:single_chunk]
|
|
|
|
base_prompt
|
|
end
|
|
|
|
def completion(prompt, &on_partial_blk)
|
|
# We need to discard any text that might come before the <ai> tag.
|
|
# Instructing the model to reply only with the summary seems impossible.
|
|
pre_tag_partial = +""
|
|
|
|
if on_partial_blk
|
|
on_partial_read =
|
|
Proc.new do |partial|
|
|
if pre_tag_partial.include?("<ai>")
|
|
on_partial_blk.call(partial[:completion])
|
|
else
|
|
pre_tag_partial << partial[:completion]
|
|
end
|
|
end
|
|
|
|
response =
|
|
::DiscourseAi::Inference::AnthropicCompletions.perform!(
|
|
prompt,
|
|
model,
|
|
&on_partial_read
|
|
)
|
|
else
|
|
response =
|
|
::DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, model).dig(
|
|
:completion,
|
|
)
|
|
end
|
|
|
|
Nokogiri::HTML5.fragment(response).at("ai")&.text.presence || response
|
|
end
|
|
|
|
def tokenizer
|
|
DiscourseAi::Tokenizer::AnthropicTokenizer
|
|
end
|
|
|
|
attr_reader :max_tokens
|
|
end
|
|
end
|
|
end
|
|
end
|