DEV: Remove non-LLM-based summarization strategies. (#589)

We removed these services from our hosting two weeks ago. It's safe to assume everyone has moved to other LLM-based options.
This commit is contained in:
Roman Rizzi 2024-04-23 12:11:04 -03:00 committed by GitHub
parent 66804bc13c
commit 0c4069ab3f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 0 additions and 157 deletions

View File

@ -61,16 +61,6 @@ module DiscourseAi
foldable_models.each do |model|
plugin.register_summarization_strategy(Strategies::FoldContent.new(model))
end
truncatable_models = [
Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384),
Models::Discourse.new("bart-large-cnn-samsum", max_tokens: 1024),
Models::Discourse.new("flan-t5-base-samsum", max_tokens: 512),
]
truncatable_models.each do |model|
plugin.register_summarization_strategy(Strategies::TruncateContent.new(model))
end
end
end
end

View File

@ -1,33 +0,0 @@
# frozen_string_literal: true
module DiscourseAi
module Summarization
module Models
class Discourse < Base
def display_name
"Discourse AI's #{model}"
end
def correctly_configured?
SiteSetting.ai_summarization_discourse_service_api_endpoint.present? &&
SiteSetting.ai_summarization_discourse_service_api_key.present?
end
def configuration_hint
I18n.t(
"discourse_ai.summarization.configuration_hint",
count: 2,
settings:
"ai_summarization_discourse_service_api_endpoint, ai_summarization_discourse_service_api_key",
)
end
private
def reserved_tokens
0
end
end
end
end
end

View File

@ -1,68 +0,0 @@
# frozen_string_literal: true
module DiscourseAi
module Summarization
module Strategies
class TruncateContent < ::Summarization::Base
def initialize(completion_model)
@completion_model = completion_model
end
attr_reader :completion_model
delegate :correctly_configured?,
:display_name,
:configuration_hint,
:model,
to: :completion_model
def summarize(content, _user, &on_partial_blk)
opts = content.except(:contents)
{
summary: summarize_with_truncation(content[:contents], opts, &on_partial_blk),
chunks: [],
}
end
private
def format_content_item(item)
"(#{item[:id]} #{item[:poster]} said: #{item[:text]} "
end
def summarize_with_truncation(contents, opts)
text_to_summarize = contents.map { |c| format_content_item(c) }.join
truncated_content =
::DiscourseAi::Tokenizer::BertTokenizer.truncate(
text_to_summarize,
completion_model.available_tokens,
)
completion(truncated_content)
end
def completion(prompt)
::DiscourseAi::Inference::DiscourseClassifier.perform!(
"#{endpoint}/api/v1/classify",
completion_model.model,
prompt,
SiteSetting.ai_summarization_discourse_service_api_key,
).dig(:summary_text)
end
def endpoint
if SiteSetting.ai_summarization_discourse_service_api_endpoint_srv.present?
service =
DiscourseAi::Utils::DnsSrv.lookup(
SiteSetting.ai_summarization_discourse_service_api_endpoint_srv,
)
"https://#{service.target}:#{service.port}"
else
SiteSetting.ai_summarization_discourse_service_api_endpoint
end
end
end
end
end
end

View File

@ -1,46 +0,0 @@
# frozen_string_literal: true
RSpec.describe DiscourseAi::Summarization::Strategies::TruncateContent do
subject(:strategy) { described_class.new(model) }
before { SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com" }
let(:summarize_text) { "This is a text" }
let(:full_text) { "(1 asd said: #{summarize_text} " }
let(:model_tokens) { ::DiscourseAi::Tokenizer::BertTokenizer.size(full_text) - 5 }
let(:model) do
DiscourseAi::Summarization::Models::Discourse.new(
"flan-t5-base-samsum",
max_tokens: model_tokens,
)
end
let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } }
let(:summarized_text) { "this is a single summary" }
let(:user) { User.new }
describe "#summary" do
it "truncates the content and requests a summary" do
truncated =
::DiscourseAi::Tokenizer::BertTokenizer.truncate(
"(1 asd said: This is a text ",
model_tokens,
)
WebMock
.stub_request(
:post,
"#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify",
)
.with(body: JSON.dump(model: model.model, content: truncated))
.to_return(status: 200, body: JSON.dump({ summary_text: summarized_text }))
summary = strategy.summarize(content, user).dig(:summary)
expect(summary).to eq(summarized_text)
end
end
end