2023-04-04 10:24:09 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module DiscourseAi
|
|
|
|
module Summarization
|
|
|
|
class SummaryGenerator
|
2023-04-19 16:57:31 -04:00
|
|
|
def initialize(target, user)
|
2023-04-04 10:24:09 -04:00
|
|
|
@target = target
|
2023-04-19 16:57:31 -04:00
|
|
|
@user = user
|
2023-04-04 10:24:09 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def summarize!(content_since)
|
|
|
|
content = get_content(content_since)
|
|
|
|
|
2023-04-19 16:57:31 -04:00
|
|
|
send("#{summarization_provider}_summarization", content[0..(max_length - 1)])
|
2023-04-04 10:24:09 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
2023-04-19 16:57:31 -04:00
|
|
|
attr_reader :target, :user
|
2023-04-04 10:24:09 -04:00
|
|
|
|
|
|
|
def summarization_provider
|
2023-04-10 10:04:42 -04:00
|
|
|
case model
|
|
|
|
in "gpt-3.5-turbo"
|
|
|
|
"openai"
|
|
|
|
in "gpt-4"
|
|
|
|
"openai"
|
|
|
|
in "claude-v1"
|
|
|
|
"anthropic"
|
|
|
|
else
|
|
|
|
"discourse"
|
|
|
|
end
|
2023-04-04 10:24:09 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def get_content(content_since)
|
|
|
|
case target
|
|
|
|
in Post
|
|
|
|
target.raw
|
|
|
|
in Topic
|
2023-04-19 16:57:31 -04:00
|
|
|
TopicView
|
|
|
|
.new(
|
|
|
|
target,
|
|
|
|
user,
|
|
|
|
{
|
|
|
|
filter: "summary",
|
|
|
|
exclude_deleted_users: true,
|
|
|
|
exclude_hidden: true,
|
|
|
|
show_deleted: false,
|
|
|
|
},
|
|
|
|
)
|
|
|
|
.posts
|
|
|
|
.pluck(:raw)
|
|
|
|
.join("\n")
|
2023-04-04 10:24:09 -04:00
|
|
|
in ::Chat::Channel
|
|
|
|
target
|
|
|
|
.chat_messages
|
|
|
|
.where("chat_messages.created_at > ?", content_since.hours.ago)
|
|
|
|
.includes(:user)
|
|
|
|
.order(created_at: :asc)
|
|
|
|
.pluck(:username_lower, :message)
|
|
|
|
.map { "#{_1}: #{_2}" }
|
|
|
|
.join("\n")
|
|
|
|
else
|
2023-04-19 16:57:31 -04:00
|
|
|
raise "Can't find content to summarize"
|
2023-04-04 10:24:09 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def discourse_summarization(content)
|
|
|
|
::DiscourseAi::Inference::DiscourseClassifier.perform!(
|
|
|
|
"#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
|
|
|
|
model,
|
|
|
|
content,
|
2023-05-02 20:41:11 -04:00
|
|
|
SiteSetting.ai_summarization_discourse_service_api_key,
|
2023-04-04 10:24:09 -04:00
|
|
|
).dig(:summary_text)
|
|
|
|
end
|
|
|
|
|
|
|
|
def openai_summarization(content)
|
|
|
|
messages = [{ role: "system", content: <<~TEXT }]
|
|
|
|
Summarize the following article:\n\n#{content}
|
|
|
|
TEXT
|
|
|
|
|
|
|
|
::DiscourseAi::Inference::OpenAiCompletions.perform!(messages, model).dig(
|
|
|
|
:choices,
|
|
|
|
0,
|
|
|
|
:message,
|
|
|
|
:content,
|
|
|
|
)
|
|
|
|
end
|
|
|
|
|
2023-04-10 10:04:42 -04:00
|
|
|
def anthropic_summarization(content)
|
|
|
|
messages =
|
|
|
|
"Human: Summarize the following article that is inside <input> tags.
|
|
|
|
Plese include only the summary inside <ai> tags.
|
|
|
|
|
|
|
|
<input>##{content}</input>
|
|
|
|
|
|
|
|
|
|
|
|
Assistant:
|
|
|
|
"
|
|
|
|
|
|
|
|
response =
|
|
|
|
::DiscourseAi::Inference::AnthropicCompletions.perform!(messages).dig(:completion)
|
|
|
|
|
|
|
|
Nokogiri::HTML5.fragment(response).at("ai").text
|
|
|
|
end
|
|
|
|
|
2023-04-04 10:24:09 -04:00
|
|
|
def model
|
|
|
|
SiteSetting.ai_summarization_model
|
|
|
|
end
|
2023-04-19 16:57:31 -04:00
|
|
|
|
|
|
|
def max_length
|
|
|
|
lengths = {
|
|
|
|
"bart-large-cnn-samsum" => 8192,
|
|
|
|
"flan-t5-base-samsum" => 8192,
|
|
|
|
"long-t5-tglobal-base-16384-book-summary" => 8192,
|
|
|
|
"gpt-3.5-turbo" => 8192,
|
|
|
|
"gpt-4" => 8192,
|
|
|
|
"claude-v1" => 8192,
|
|
|
|
}
|
|
|
|
|
|
|
|
lengths[model]
|
|
|
|
end
|
2023-04-04 10:24:09 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|