FEATURE: add the ability to disable streaming on an Open AI LLM

Disabling streaming is required for models such o1 that do not have streaming enabled yet It is good to carry this feature around in case various apis decide not to support streaming endpoints and Discourse AI can continue to work just as it did before. Also: fixes issue where sharing artifacts would miss viewport leading to tiny artifacts on mobile
2025-07-07 06:52:36 +00:00 · 2025-01-13 17:01:01 +11:00 · 2025-01-13 17:01:01 +11:00 · 20612fde52
commit 20612fde52
parent 7e9c0dc076
6 changed files with 71 additions and 0 deletions
--- a/app/controllers/discourse_ai/ai_bot/artifacts_controller.rb
+++ b/app/controllers/discourse_ai/ai_bot/artifacts_controller.rb
@ -57,6 +57,7 @@ module DiscourseAi
            <head>
              <meta charset="UTF-8">
              <title>#{ERB::Util.html_escape(name)}</title>
+              <meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=1.0, user-scalable=yes, viewport-fit=cover, interactive-widget=resizes-content">
              <style>
                html, body, iframe {
                  margin: 0;
--- a/app/models/llm_model.rb
+++ b/app/models/llm_model.rb
@ -32,6 +32,7 @@ class LlmModel < ActiveRecord::Base
      open_ai: {
        organization: :text,
        disable_native_tools: :checkbox,
+        disable_streaming: :checkbox,
      },
      mistral: {
        disable_native_tools: :checkbox,
@ -51,11 +52,13 @@ class LlmModel < ActiveRecord::Base
      ollama: {
        disable_system_prompt: :checkbox,
        enable_native_tool: :checkbox,
+        disable_streaming: :checkbox,
      },
      open_router: {
        disable_native_tools: :checkbox,
        provider_order: :text,
        provider_quantizations: :text,
+        disable_streaming: :checkbox,
      },
    }
  end
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -420,6 +420,7 @@ en:
          disable_native_tools: "Disable native tool support (use XML based tools)"
          provider_order: "Provider order (comma delimited list)"
          provider_quantizations: "Order of provider quantizations (comma delimited list eg: fp16,fp8)"
+          disable_streaming: "Disable streaming completions (convert streaming to non streaming requests)"

      related_topics:
        title: "Related topics"
--- a/lib/completions/endpoints/base.rb
+++ b/lib/completions/endpoints/base.rb
@ -69,6 +69,27 @@ module DiscourseAi
          model_params = normalize_model_params(model_params)
          orig_blk = blk

+          if block_given? && disable_streaming?
+            result =
+              perform_completion!(
+                dialect,
+                user,
+                model_params,
+                feature_name: feature_name,
+                feature_context: feature_context,
+                partial_tool_calls: partial_tool_calls,
+              )
+
+            result = [result] if !result.is_a?(Array)
+            cancelled_by_caller = false
+            cancel_proc = -> { cancelled_by_caller = true }
+            result.each do |partial|
+              blk.call(partial, cancel_proc)
+              break if cancelled_by_caller
+            end
+            return result
+          end
+
          @streaming_mode = block_given?

          prompt = dialect.translate
@ -261,6 +282,10 @@ module DiscourseAi
          raise NotImplementedError
        end

+        def disable_streaming?
+          @disable_streaming = !!llm_model.lookup_custom_param("disable_streaming")
+        end
+
        private

        def start_log(
--- a/lib/completions/endpoints/open_ai.rb
+++ b/lib/completions/endpoints/open_ai.rb
@ -42,6 +42,10 @@ module DiscourseAi

        private

+        def disable_streaming?
+          @disable_streaming = llm_model.lookup_custom_param("disable_streaming")
+        end
+
        def model_uri
          if llm_model.url.to_s.starts_with?("srv://")
            service = DiscourseAi::Utils::DnsSrv.lookup(llm_model.url.sub("srv://", ""))
--- a/spec/lib/completions/endpoints/open_ai_spec.rb
+++ b/spec/lib/completions/endpoints/open_ai_spec.rb
@ -457,6 +457,43 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
      end
    end

+    it "falls back to non-streaming mode when streaming is disabled" do
+      model.update!(provider_params: { disable_streaming: true })
+
+      response = {
+        id: "chatcmpl-123",
+        object: "chat.completion",
+        created: 1_677_652_288,
+        choices: [
+          {
+            message: {
+              role: "assistant",
+              content: "Hello there",
+            },
+            index: 0,
+            finish_reason: "stop",
+          },
+        ],
+      }
+
+      parsed_body = nil
+      stub_request(:post, "https://api.openai.com/v1/chat/completions").with(
+        body:
+          proc do |req_body|
+            parsed_body = JSON.parse(req_body, symbolize_names: true)
+            true
+          end,
+      ).to_return(status: 200, body: response.to_json)
+
+      chunks = []
+      dialect = compliance.dialect(prompt: compliance.generic_prompt)
+      endpoint.perform_completion!(dialect, user) { |chunk| chunks << chunk }
+
+      expect(parsed_body).not_to have_key(:stream)
+
+      expect(chunks).to eq(["Hello there"])
+    end
+
    describe "when using streaming mode" do
      context "with simple prompts" do
        it "completes a trivial prompt and logs the response" do