diff --git a/lib/completions/endpoints/hugging_face.rb b/lib/completions/endpoints/hugging_face.rb index 48c1f607..d5d10cea 100644 --- a/lib/completions/endpoints/hugging_face.rb +++ b/lib/completions/endpoints/hugging_face.rb @@ -33,6 +33,7 @@ module DiscourseAi token_limit = SiteSetting.ai_hugging_face_token_limit || 4_000 payload[:parameters][:max_new_tokens] = token_limit - prompt_size(prompt) + payload[:parameters][:return_full_text] = false payload[:stream] = true if @streaming_mode end diff --git a/lib/inference/hugging_face_text_generation.rb b/lib/inference/hugging_face_text_generation.rb index 1ea35d3f..049bba81 100644 --- a/lib/inference/hugging_face_text_generation.rb +++ b/lib/inference/hugging_face_text_generation.rb @@ -40,6 +40,7 @@ module ::DiscourseAi parameters[:max_new_tokens] = token_limit - prompt_size parameters[:temperature] = temperature if temperature parameters[:repetition_penalty] = repetition_penalty if repetition_penalty + parameters[:return_full_text] = false payload[:stream] = true if block_given? diff --git a/spec/lib/completions/endpoints/hugging_face_spec.rb b/spec/lib/completions/endpoints/hugging_face_spec.rb index b11413ce..ba29893a 100644 --- a/spec/lib/completions/endpoints/hugging_face_spec.rb +++ b/spec/lib/completions/endpoints/hugging_face_spec.rb @@ -18,6 +18,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::HuggingFace do .tap do |payload| payload[:parameters][:max_new_tokens] = (SiteSetting.ai_hugging_face_token_limit || 4_000) - model.prompt_size(prompt) + payload[:parameters][:return_full_text] = false end .to_json end @@ -29,6 +30,7 @@ RSpec.describe DiscourseAi::Completions::Endpoints::HuggingFace do payload[:parameters][:max_new_tokens] = (SiteSetting.ai_hugging_face_token_limit || 4_000) - model.prompt_size(prompt) payload[:stream] = true + payload[:parameters][:return_full_text] = false end .to_json end