FEATURE: add the ability to disable streaming on an Open AI LLM

Disabling streaming is required for models such o1 that do not have streaming
enabled yet

It is good to carry this feature around in case various apis decide not to support streaming endpoints and Discourse AI can continue to work just as it did before. 

Also: fixes issue where sharing artifacts would miss viewport leading to tiny artifacts on mobile
This commit is contained in:
Sam 2025-01-13 17:01:01 +11:00 committed by GitHub
parent 7e9c0dc076
commit 20612fde52
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 71 additions and 0 deletions

View File

@ -57,6 +57,7 @@ module DiscourseAi
<head>
<meta charset="UTF-8">
<title>#{ERB::Util.html_escape(name)}</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=1.0, user-scalable=yes, viewport-fit=cover, interactive-widget=resizes-content">
<style>
html, body, iframe {
margin: 0;

View File

@ -32,6 +32,7 @@ class LlmModel < ActiveRecord::Base
open_ai: {
organization: :text,
disable_native_tools: :checkbox,
disable_streaming: :checkbox,
},
mistral: {
disable_native_tools: :checkbox,
@ -51,11 +52,13 @@ class LlmModel < ActiveRecord::Base
ollama: {
disable_system_prompt: :checkbox,
enable_native_tool: :checkbox,
disable_streaming: :checkbox,
},
open_router: {
disable_native_tools: :checkbox,
provider_order: :text,
provider_quantizations: :text,
disable_streaming: :checkbox,
},
}
end

View File

@ -420,6 +420,7 @@ en:
disable_native_tools: "Disable native tool support (use XML based tools)"
provider_order: "Provider order (comma delimited list)"
provider_quantizations: "Order of provider quantizations (comma delimited list eg: fp16,fp8)"
disable_streaming: "Disable streaming completions (convert streaming to non streaming requests)"
related_topics:
title: "Related topics"

View File

@ -69,6 +69,27 @@ module DiscourseAi
model_params = normalize_model_params(model_params)
orig_blk = blk
if block_given? && disable_streaming?
result =
perform_completion!(
dialect,
user,
model_params,
feature_name: feature_name,
feature_context: feature_context,
partial_tool_calls: partial_tool_calls,
)
result = [result] if !result.is_a?(Array)
cancelled_by_caller = false
cancel_proc = -> { cancelled_by_caller = true }
result.each do |partial|
blk.call(partial, cancel_proc)
break if cancelled_by_caller
end
return result
end
@streaming_mode = block_given?
prompt = dialect.translate
@ -261,6 +282,10 @@ module DiscourseAi
raise NotImplementedError
end
def disable_streaming?
@disable_streaming = !!llm_model.lookup_custom_param("disable_streaming")
end
private
def start_log(

View File

@ -42,6 +42,10 @@ module DiscourseAi
private
def disable_streaming?
@disable_streaming = llm_model.lookup_custom_param("disable_streaming")
end
def model_uri
if llm_model.url.to_s.starts_with?("srv://")
service = DiscourseAi::Utils::DnsSrv.lookup(llm_model.url.sub("srv://", ""))

View File

@ -457,6 +457,43 @@ RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
end
end
it "falls back to non-streaming mode when streaming is disabled" do
model.update!(provider_params: { disable_streaming: true })
response = {
id: "chatcmpl-123",
object: "chat.completion",
created: 1_677_652_288,
choices: [
{
message: {
role: "assistant",
content: "Hello there",
},
index: 0,
finish_reason: "stop",
},
],
}
parsed_body = nil
stub_request(:post, "https://api.openai.com/v1/chat/completions").with(
body:
proc do |req_body|
parsed_body = JSON.parse(req_body, symbolize_names: true)
true
end,
).to_return(status: 200, body: response.to_json)
chunks = []
dialect = compliance.dialect(prompt: compliance.generic_prompt)
endpoint.perform_completion!(dialect, user) { |chunk| chunks << chunk }
expect(parsed_body).not_to have_key(:stream)
expect(chunks).to eq(["Hello there"])
end
describe "when using streaming mode" do
context "with simple prompts" do
it "completes a trivial prompt and logs the response" do