From 9551b1a4d1b195fdfb58d9238b64f8311e0cb84a Mon Sep 17 00:00:00 2001 From: Sam Date: Wed, 13 Nov 2024 07:12:00 +1100 Subject: [PATCH] FIX: do not strip empty string during stream processing (#911) Fixes issue in Open AI provider eating newlines and spaces --- lib/completions/open_ai_message_processor.rb | 3 ++- .../lib/completions/endpoints/open_ai_spec.rb | 22 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/lib/completions/open_ai_message_processor.rb b/lib/completions/open_ai_message_processor.rb index 02369bec..2890083b 100644 --- a/lib/completions/open_ai_message_processor.rb +++ b/lib/completions/open_ai_message_processor.rb @@ -65,7 +65,8 @@ module DiscourseAi::Completions @tool.parameters = parsed_args rval = @tool @tool = nil - elsif content.present? + elsif !content.to_s.empty? + # we don't want to strip empty content like "\n", do not use present? rval = content end diff --git a/spec/lib/completions/endpoints/open_ai_spec.rb b/spec/lib/completions/endpoints/open_ai_spec.rb index c4d7758a..a1c6702a 100644 --- a/spec/lib/completions/endpoints/open_ai_spec.rb +++ b/spec/lib/completions/endpoints/open_ai_spec.rb @@ -519,6 +519,28 @@ TEXT expect(response).to eq(tool_calls) end + it "properly handles newlines" do + response = <<~TEXT.strip + data: {"id":"chatcmpl-ASngi346UA9k006bF6GBRV66tEJfQ","object":"chat.completion.chunk","created":1731427548,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_159d8341cc","choices":[{"index":0,"delta":{"content":":\\n\\n"},"logprobs":null,"finish_reason":null}],"usage":null} + + data: {"id":"chatcmpl-ASngi346UA9k006bF6GBRV66tEJfQ","object":"chat.completion.chunk","created":1731427548,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_159d8341cc","choices":[{"index":0,"delta":{"content":"```"},"logprobs":null,"finish_reason":null}],"usage":null} + + data: {"id":"chatcmpl-ASngi346UA9k006bF6GBRV66tEJfQ","object":"chat.completion.chunk","created":1731427548,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_159d8341cc","choices":[{"index":0,"delta":{"content":"ruby"},"logprobs":null,"finish_reason":null}],"usage":null} + + data: {"id":"chatcmpl-ASngi346UA9k006bF6GBRV66tEJfQ","object":"chat.completion.chunk","created":1731427548,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_159d8341cc","choices":[{"index":0,"delta":{"content":"\\n"},"logprobs":null,"finish_reason":null}],"usage":null} + + data: {"id":"chatcmpl-ASngi346UA9k006bF6GBRV66tEJfQ","object":"chat.completion.chunk","created":1731427548,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_159d8341cc","choices":[{"index":0,"delta":{"content":"def"},"logprobs":null,"finish_reason":null}],"usage":null} + TEXT + + open_ai_mock.stub_raw(response) + partials = [] + + dialect = compliance.dialect(prompt: compliance.generic_prompt) + endpoint.perform_completion!(dialect, user) { |partial| partials << partial } + + expect(partials).to eq([":\n\n", "```", "ruby", "\n", "def"]) + end + it "uses proper token accounting" do response = <<~TEXT.strip data: {"id":"chatcmpl-9OZidiHncpBhhNMcqCus9XiJ3TkqR","object":"chat.completion.chunk","created":1715644203,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_729ea513f7","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":null}|