FIX: Truncate content for sentiment/toxicity classification (#431)

This commit is contained in:
Rafael dos Santos Silva 2024-01-17 15:17:58 -03:00 committed by GitHub
parent 5bdf3dc1f4
commit c70f43f130
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 19 additions and 11 deletions

View File

@ -48,11 +48,14 @@ module DiscourseAi
end
def content_of(target_to_classify)
if target_to_classify.post_number == 1
"#{target_to_classify.topic.title}\n#{target_to_classify.raw}"
else
target_to_classify.raw
end
content =
if target_to_classify.post_number == 1
"#{target_to_classify.topic.title}\n#{target_to_classify.raw}"
else
target_to_classify.raw
end
Tokenizer::BertTokenizer.truncate(content, 512)
end
def endpoint

View File

@ -59,13 +59,18 @@ module DiscourseAi
end
def content_of(target_to_classify)
return target_to_classify.message if target_to_classify.is_a?(Chat::Message)
content =
if target_to_classify.is_a?(Chat::Message)
target_to_classify.message
else
if target_to_classify.post_number == 1
"#{target_to_classify.topic.title}\n#{target_to_classify.raw}"
else
target_to_classify.raw
end
end
if target_to_classify.post_number == 1
"#{target_to_classify.topic.title}\n#{target_to_classify.raw}"
else
target_to_classify.raw
end
Tokenizer::BertTokenizer.truncate(content, 512)
end
def endpoint