FIX: Truncate content for sentiment/toxicity classification (#431)

This commit is contained in:
Rafael dos Santos Silva 2024-01-17 15:17:58 -03:00 committed by GitHub
parent 5bdf3dc1f4
commit c70f43f130
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 19 additions and 11 deletions

View File

@ -48,11 +48,14 @@ module DiscourseAi
end end
def content_of(target_to_classify) def content_of(target_to_classify)
content =
if target_to_classify.post_number == 1 if target_to_classify.post_number == 1
"#{target_to_classify.topic.title}\n#{target_to_classify.raw}" "#{target_to_classify.topic.title}\n#{target_to_classify.raw}"
else else
target_to_classify.raw target_to_classify.raw
end end
Tokenizer::BertTokenizer.truncate(content, 512)
end end
def endpoint def endpoint

View File

@ -59,8 +59,10 @@ module DiscourseAi
end end
def content_of(target_to_classify) def content_of(target_to_classify)
return target_to_classify.message if target_to_classify.is_a?(Chat::Message) content =
if target_to_classify.is_a?(Chat::Message)
target_to_classify.message
else
if target_to_classify.post_number == 1 if target_to_classify.post_number == 1
"#{target_to_classify.topic.title}\n#{target_to_classify.raw}" "#{target_to_classify.topic.title}\n#{target_to_classify.raw}"
else else
@ -68,6 +70,9 @@ module DiscourseAi
end end
end end
Tokenizer::BertTokenizer.truncate(content, 512)
end
def endpoint def endpoint
if SiteSetting.ai_toxicity_inference_service_api_endpoint_srv.present? if SiteSetting.ai_toxicity_inference_service_api_endpoint_srv.present?
service = service =