From 9551b1a4d1b195fdfb58d9238b64f8311e0cb84a Mon Sep 17 00:00:00 2001
From: Sam <sam.saffron@gmail.com>
Date: Wed, 13 Nov 2024 07:12:00 +1100
Subject: [PATCH] FIX: do not strip empty string during stream processing
 (#911)

Fixes issue in Open AI provider eating newlines and spaces
---
 lib/completions/open_ai_message_processor.rb  |  3 ++-
 .../lib/completions/endpoints/open_ai_spec.rb | 22 +++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/lib/completions/open_ai_message_processor.rb b/lib/completions/open_ai_message_processor.rb
index 02369bec..2890083b 100644
--- a/lib/completions/open_ai_message_processor.rb
+++ b/lib/completions/open_ai_message_processor.rb
@@ -65,7 +65,8 @@ module DiscourseAi::Completions
         @tool.parameters = parsed_args
         rval = @tool
         @tool = nil
-      elsif content.present?
+      elsif !content.to_s.empty?
+        # we don't want to strip empty content like "\n", do not use present?
         rval = content
       end
 
diff --git a/spec/lib/completions/endpoints/open_ai_spec.rb b/spec/lib/completions/endpoints/open_ai_spec.rb
index c4d7758a..a1c6702a 100644
--- a/spec/lib/completions/endpoints/open_ai_spec.rb
+++ b/spec/lib/completions/endpoints/open_ai_spec.rb
@@ -519,6 +519,28 @@ TEXT
           expect(response).to eq(tool_calls)
         end
 
+        it "properly handles newlines" do
+          response = <<~TEXT.strip
+            data: {"id":"chatcmpl-ASngi346UA9k006bF6GBRV66tEJfQ","object":"chat.completion.chunk","created":1731427548,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_159d8341cc","choices":[{"index":0,"delta":{"content":":\\n\\n"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+            data: {"id":"chatcmpl-ASngi346UA9k006bF6GBRV66tEJfQ","object":"chat.completion.chunk","created":1731427548,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_159d8341cc","choices":[{"index":0,"delta":{"content":"```"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+            data: {"id":"chatcmpl-ASngi346UA9k006bF6GBRV66tEJfQ","object":"chat.completion.chunk","created":1731427548,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_159d8341cc","choices":[{"index":0,"delta":{"content":"ruby"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+            data: {"id":"chatcmpl-ASngi346UA9k006bF6GBRV66tEJfQ","object":"chat.completion.chunk","created":1731427548,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_159d8341cc","choices":[{"index":0,"delta":{"content":"\\n"},"logprobs":null,"finish_reason":null}],"usage":null}
+
+            data: {"id":"chatcmpl-ASngi346UA9k006bF6GBRV66tEJfQ","object":"chat.completion.chunk","created":1731427548,"model":"gpt-4o-2024-08-06","system_fingerprint":"fp_159d8341cc","choices":[{"index":0,"delta":{"content":"def"},"logprobs":null,"finish_reason":null}],"usage":null}
+         TEXT
+
+          open_ai_mock.stub_raw(response)
+          partials = []
+
+          dialect = compliance.dialect(prompt: compliance.generic_prompt)
+          endpoint.perform_completion!(dialect, user) { |partial| partials << partial }
+
+          expect(partials).to eq([":\n\n", "```", "ruby", "\n", "def"])
+        end
+
         it "uses proper token accounting" do
           response = <<~TEXT.strip
             data: {"id":"chatcmpl-9OZidiHncpBhhNMcqCus9XiJ3TkqR","object":"chat.completion.chunk","created":1715644203,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_729ea513f7","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}],"usage":null}|