FIX: Handle truncation in HyDE search (#342)

This commit is contained in:
Rafael dos Santos Silva 2023-12-07 10:36:56 -03:00 committed by GitHub
parent 450ec915d8
commit 381b0d74ca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 6 additions and 4 deletions

View File

@ -114,7 +114,7 @@ module DiscourseAi
SiteSetting.ai_embeddings_semantic_search_hyde_model,
).completion!(prompt, @guardian.user)
Nokogiri::HTML5.fragment(llm_response).at("ai").text.presence || llm_response
Nokogiri::HTML5.fragment(llm_response).at("ai")&.text&.presence || llm_response
end
end
end

View File

@ -11,7 +11,8 @@ module DiscourseAi
.dig(:result, :data)
.first
elsif SiteSetting.ai_hugging_face_tei_endpoint.present?
DiscourseAi::Inference::HuggingFaceTextEmbeddings.perform!(text).first
truncated_text = tokenizer.truncate(text, max_sequence_length - 2)
DiscourseAi::Inference::HuggingFaceTextEmbeddings.perform!(truncated_text).first
elsif SiteSetting.ai_embeddings_discourse_service_api_endpoint.present?
DiscourseAi::Inference::DiscourseClassifier.perform!(
"#{SiteSetting.ai_embeddings_discourse_service_api_endpoint}/api/v1/classify",

View File

@ -6,7 +6,8 @@ module DiscourseAi
class MultilingualE5Large < Base
def vector_from(text)
if SiteSetting.ai_hugging_face_tei_endpoint.present?
DiscourseAi::Inference::HuggingFaceTextEmbeddings.perform!(text).first
truncated_text = tokenizer.truncate(text, max_sequence_length - 2)
DiscourseAi::Inference::HuggingFaceTextEmbeddings.perform!(truncated_text).first
elsif SiteSetting.ai_embeddings_discourse_service_api_endpoint.present?
DiscourseAi::Inference::DiscourseClassifier.perform!(
"#{SiteSetting.ai_embeddings_discourse_service_api_endpoint}/api/v1/classify",

View File

@ -5,7 +5,7 @@ module ::DiscourseAi
class HuggingFaceTextEmbeddings
def self.perform!(content)
headers = { "Referer" => Discourse.base_url, "Content-Type" => "application/json" }
body = { inputs: content }.to_json
body = { inputs: content, truncate: true }.to_json
api_endpoint = SiteSetting.ai_hugging_face_tei_endpoint