DEV: Remove non-LLM-based summarization strategies. (#589)
We removed these services from our hosting two weeks ago. It's safe to assume everyone has moved to other LLM-based options.
This commit is contained in:
parent
66804bc13c
commit
0c4069ab3f
|
@ -61,16 +61,6 @@ module DiscourseAi
|
|||
foldable_models.each do |model|
|
||||
plugin.register_summarization_strategy(Strategies::FoldContent.new(model))
|
||||
end
|
||||
|
||||
truncatable_models = [
|
||||
Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384),
|
||||
Models::Discourse.new("bart-large-cnn-samsum", max_tokens: 1024),
|
||||
Models::Discourse.new("flan-t5-base-samsum", max_tokens: 512),
|
||||
]
|
||||
|
||||
truncatable_models.each do |model|
|
||||
plugin.register_summarization_strategy(Strategies::TruncateContent.new(model))
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -1,33 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Summarization
|
||||
module Models
|
||||
class Discourse < Base
|
||||
def display_name
|
||||
"Discourse AI's #{model}"
|
||||
end
|
||||
|
||||
def correctly_configured?
|
||||
SiteSetting.ai_summarization_discourse_service_api_endpoint.present? &&
|
||||
SiteSetting.ai_summarization_discourse_service_api_key.present?
|
||||
end
|
||||
|
||||
def configuration_hint
|
||||
I18n.t(
|
||||
"discourse_ai.summarization.configuration_hint",
|
||||
count: 2,
|
||||
settings:
|
||||
"ai_summarization_discourse_service_api_endpoint, ai_summarization_discourse_service_api_key",
|
||||
)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def reserved_tokens
|
||||
0
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,68 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module DiscourseAi
|
||||
module Summarization
|
||||
module Strategies
|
||||
class TruncateContent < ::Summarization::Base
|
||||
def initialize(completion_model)
|
||||
@completion_model = completion_model
|
||||
end
|
||||
|
||||
attr_reader :completion_model
|
||||
|
||||
delegate :correctly_configured?,
|
||||
:display_name,
|
||||
:configuration_hint,
|
||||
:model,
|
||||
to: :completion_model
|
||||
|
||||
def summarize(content, _user, &on_partial_blk)
|
||||
opts = content.except(:contents)
|
||||
|
||||
{
|
||||
summary: summarize_with_truncation(content[:contents], opts, &on_partial_blk),
|
||||
chunks: [],
|
||||
}
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def format_content_item(item)
|
||||
"(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
|
||||
end
|
||||
|
||||
def summarize_with_truncation(contents, opts)
|
||||
text_to_summarize = contents.map { |c| format_content_item(c) }.join
|
||||
truncated_content =
|
||||
::DiscourseAi::Tokenizer::BertTokenizer.truncate(
|
||||
text_to_summarize,
|
||||
completion_model.available_tokens,
|
||||
)
|
||||
|
||||
completion(truncated_content)
|
||||
end
|
||||
|
||||
def completion(prompt)
|
||||
::DiscourseAi::Inference::DiscourseClassifier.perform!(
|
||||
"#{endpoint}/api/v1/classify",
|
||||
completion_model.model,
|
||||
prompt,
|
||||
SiteSetting.ai_summarization_discourse_service_api_key,
|
||||
).dig(:summary_text)
|
||||
end
|
||||
|
||||
def endpoint
|
||||
if SiteSetting.ai_summarization_discourse_service_api_endpoint_srv.present?
|
||||
service =
|
||||
DiscourseAi::Utils::DnsSrv.lookup(
|
||||
SiteSetting.ai_summarization_discourse_service_api_endpoint_srv,
|
||||
)
|
||||
"https://#{service.target}:#{service.port}"
|
||||
else
|
||||
SiteSetting.ai_summarization_discourse_service_api_endpoint
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,46 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
RSpec.describe DiscourseAi::Summarization::Strategies::TruncateContent do
|
||||
subject(:strategy) { described_class.new(model) }
|
||||
|
||||
before { SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com" }
|
||||
|
||||
let(:summarize_text) { "This is a text" }
|
||||
let(:full_text) { "(1 asd said: #{summarize_text} " }
|
||||
let(:model_tokens) { ::DiscourseAi::Tokenizer::BertTokenizer.size(full_text) - 5 }
|
||||
|
||||
let(:model) do
|
||||
DiscourseAi::Summarization::Models::Discourse.new(
|
||||
"flan-t5-base-samsum",
|
||||
max_tokens: model_tokens,
|
||||
)
|
||||
end
|
||||
|
||||
let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } }
|
||||
|
||||
let(:summarized_text) { "this is a single summary" }
|
||||
|
||||
let(:user) { User.new }
|
||||
|
||||
describe "#summary" do
|
||||
it "truncates the content and requests a summary" do
|
||||
truncated =
|
||||
::DiscourseAi::Tokenizer::BertTokenizer.truncate(
|
||||
"(1 asd said: This is a text ",
|
||||
model_tokens,
|
||||
)
|
||||
|
||||
WebMock
|
||||
.stub_request(
|
||||
:post,
|
||||
"#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
|
||||
)
|
||||
.with(body: JSON.dump(model: model.model, content: truncated))
|
||||
.to_return(status: 200, body: JSON.dump({ summary_text: summarized_text }))
|
||||
|
||||
summary = strategy.summarize(content, user).dig(:summary)
|
||||
|
||||
expect(summary).to eq(summarized_text)
|
||||
end
|
||||
end
|
||||
end
|
Loading…
Reference in New Issue