discourse-ai/spec/lib/completions/endpoints/open_ai_spec.rb

# frozen_string_literal: true

require_relative "endpoint_examples"

RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do
  subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::OpenAiTokenizer) }

  let(:model_name) { "gpt-3.5-turbo" }
  let(:generic_prompt) { { insts: "You are a helpful bot.", input: "write 3 words" } }
  let(:dialect) { DiscourseAi::Completions::Dialects::ChatGpt.new(generic_prompt, model_name) }
  let(:prompt) { dialect.translate }

  let(:tool_deltas) do
    [
      { id: "get_weather", name: "get_weather", arguments: {} },
      { id: "get_weather", name: "get_weather", arguments: { location: "" } },
      { id: "get_weather", name: "get_weather", arguments: { location: "Sydney", unit: "c" } },
    ]
  end

  let(:tool_call) do
    { id: "get_weather", name: "get_weather", arguments: { location: "Sydney", unit: "c" } }
  end

  let(:request_body) do
    model
      .default_options
      .merge(messages: prompt)
      .tap do |b|
        b[:tools] = generic_prompt[:tools].map do |t|
          { type: "function", tool: t }
        end if generic_prompt[:tools]
      end
      .to_json
  end
  let(:stream_request_body) do
    model
      .default_options
      .merge(messages: prompt, stream: true)
      .tap do |b|
        b[:tools] = generic_prompt[:tools].map do |t|
          { type: "function", tool: t }
        end if generic_prompt[:tools]
      end
      .to_json
  end

  def response(content, tool_call: false)
    message_content =
      if tool_call
        { tool_calls: [{ function: content }] }
      else
        { content: content }
      end

    {
      id: "chatcmpl-6sZfAb30Rnv9Q7ufzFwvQsMpjZh8S",
      object: "chat.completion",
      created: 1_678_464_820,
      model: "gpt-3.5-turbo-0301",
      usage: {
        prompt_tokens: 337,
        completion_tokens: 162,
        total_tokens: 499,
      },
      choices: [
        { message: { role: "assistant" }.merge(message_content), finish_reason: "stop", index: 0 },
      ],
    }
  end

  def stub_response(prompt, response_text, tool_call: false)
    WebMock
      .stub_request(:post, "https://api.openai.com/v1/chat/completions")
      .with(body: request_body)
      .to_return(status: 200, body: JSON.dump(response(response_text, tool_call: tool_call)))
  end

  def stream_line(delta, finish_reason: nil, tool_call: false)
    message_content =
      if tool_call
        { tool_calls: [{ function: delta }] }
      else
        { content: delta }
      end

    +"data: " << {
      id: "chatcmpl-#{SecureRandom.hex}",
      object: "chat.completion.chunk",
      created: 1_681_283_881,
      model: "gpt-3.5-turbo-0301",
      choices: [{ delta: message_content }],
      finish_reason: finish_reason,
      index: 0,
    }.to_json
  end

  def stub_streamed_response(prompt, deltas, tool_call: false)
    chunks =
      deltas.each_with_index.map do |_, index|
        if index == (deltas.length - 1)
          stream_line(deltas[index], finish_reason: "stop_sequence", tool_call: tool_call)
        else
          stream_line(deltas[index], tool_call: tool_call)
        end
      end

    chunks = (chunks.join("\n\n") << "data: [DONE]").split("")

    WebMock
      .stub_request(:post, "https://api.openai.com/v1/chat/completions")
      .with(body: stream_request_body)
      .to_return(status: 200, body: chunks)
  end

  it_behaves_like "an endpoint that can communicate with a completion service"

  context "when chunked encoding returns partial chunks" do
    # See: https://github.com/bblimke/webmock/issues/629
    let(:mock_net_http) do
      Class.new(Net::HTTP) do
        def request(*)
          super do |response|
            response.instance_eval do
              def read_body(*, &)
                @body.each(&)
              end
            end

            yield response if block_given?

            response
          end
        end
      end
    end

    let(:remove_original_net_http) { Net.send(:remove_const, :HTTP) }
    let(:original_http) { remove_original_net_http }
    let(:stub_net_http) { Net.send(:const_set, :HTTP, mock_net_http) }

    let(:remove_stubbed_net_http) { Net.send(:remove_const, :HTTP) }
    let(:restore_net_http) { Net.send(:const_set, :HTTP, original_http) }

    before do
      mock_net_http
      remove_original_net_http
      stub_net_http
    end

    after do
      remove_stubbed_net_http
      restore_net_http
    end

    it "will automatically recover from a bad payload" do
      # this should not happen, but lets ensure nothing bad happens
      # the row with test1 is invalid json
      raw_data = <<~TEXT
d|a|t|a|:| |{|"choices":[{"delta":{"content":"test,"}}]}

data: {"choices":[{"delta":{"content":"test1,"}}]

data: {"choices":[{"delta":|{"content":"test2,"}}]}

data: {"choices":[{"delta":{"content":"test3,"}}]|}

data: {"choices":[{|"|d|elta":{"content":"test4"}}]|}

data: [D|ONE]
    TEXT

      chunks = raw_data.split("|")

      stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
        status: 200,
        body: chunks,
      )

      partials = []
      llm = DiscourseAi::Completions::Llm.proxy("gpt-3.5-turbo")
      llm.completion!({ insts: "test" }, Discourse.system_user) { |partial| partials << partial }

      expect(partials.join).to eq("test,test2,test3,test4")
    end

    it "supports chunked encoding properly" do
      raw_data = <<~TEXT
da|ta: {"choices":[{"delta":{"content":"test,"}}]}

data: {"choices":[{"delta":{"content":"test1,"}}]}

data: {"choices":[{"delta":|{"content":"test2,"}}]}

data: {"choices":[{"delta":{"content":"test3,"}}]|}

data: {"choices":[{|"|d|elta":{"content":"test4"}}]|}

data: [D|ONE]
    TEXT

      chunks = raw_data.split("|")

      stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(
        status: 200,
        body: chunks,
      )

      partials = []
      llm = DiscourseAi::Completions::Llm.proxy("gpt-3.5-turbo")
      llm.completion!({ insts: "test" }, Discourse.system_user) { |partial| partials << partial }

      expect(partials.join).to eq("test,test1,test2,test3,test4")
    end
  end
end
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 12:58:54 -03:00			`# frozen_string_literal: true`

			`require_relative "endpoint_examples"`

DEV: port directory structure to Zeitwerk (#319) Previous to this change we relied on explicit loading for a files in Discourse AI. This had a few downsides: - Busywork whenever you add a file (an extra require relative) - We were not keeping to conventions internally ... some places were OpenAI others are OpenAi - Autoloader did not work which lead to lots of full application broken reloads when developing. This moves all of DiscourseAI into a Zeitwerk compatible structure. It also leaves some minimal amount of manual loading (automation - which is loading into an existing namespace that may or may not be there) To avoid needing /lib/discourse_ai/... we mount a namespace thus we are able to keep /lib pointed at ::DiscourseAi Various files were renamed to get around zeitwerk rules and minimize usage of custom inflections Though we can get custom inflections to work it is not worth it, will require a Discourse core patch which means we create a hard dependency. 2023-11-29 15:17:46 +11:00			`RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 12:58:54 -03:00			`subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::OpenAiTokenizer) }`

			`let(:model_name) { "gpt-3.5-turbo" }`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 18:06:01 -03:00			`let(:generic_prompt) { { insts: "You are a helpful bot.", input: "write 3 words" } }`
			`let(:dialect) { DiscourseAi::Completions::Dialects::ChatGpt.new(generic_prompt, model_name) }`
			`let(:prompt) { dialect.translate }`

			`let(:tool_deltas) do`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 12:58:54 -03:00			`[`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 18:06:01 -03:00			`{ id: "get_weather", name: "get_weather", arguments: {} },`
			`{ id: "get_weather", name: "get_weather", arguments: { location: "" } },`
			`{ id: "get_weather", name: "get_weather", arguments: { location: "Sydney", unit: "c" } },`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 12:58:54 -03:00			`]`
			`end`

DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 18:06:01 -03:00			`let(:tool_call) do`
			`{ id: "get_weather", name: "get_weather", arguments: { location: "Sydney", unit: "c" } }`
			`end`

			`let(:request_body) do`
			`model`
			`.default_options`
			`.merge(messages: prompt)`
			`.tap do \|b\|`
			`b[:tools] = generic_prompt[:tools].map do \|t\|`
			`{ type: "function", tool: t }`
			`end if generic_prompt[:tools]`
			`end`
			`.to_json`
			`end`
			`let(:stream_request_body) do`
			`model`
			`.default_options`
			`.merge(messages: prompt, stream: true)`
			`.tap do \|b\|`
			`b[:tools] = generic_prompt[:tools].map do \|t\|`
			`{ type: "function", tool: t }`
			`end if generic_prompt[:tools]`
			`end`
			`.to_json`
			`end`

			`def response(content, tool_call: false)`
			`message_content =`
			`if tool_call`
			`{ tool_calls: [{ function: content }] }`
			`else`
			`{ content: content }`
			`end`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 12:58:54 -03:00
			`{`
			`id: "chatcmpl-6sZfAb30Rnv9Q7ufzFwvQsMpjZh8S",`
			`object: "chat.completion",`
			`created: 1_678_464_820,`
			`model: "gpt-3.5-turbo-0301",`
			`usage: {`
			`prompt_tokens: 337,`
			`completion_tokens: 162,`
			`total_tokens: 499,`
			`},`
			`choices: [`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 18:06:01 -03:00			`{ message: { role: "assistant" }.merge(message_content), finish_reason: "stop", index: 0 },`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 12:58:54 -03:00			`],`
			`}`
			`end`

DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 18:06:01 -03:00			`def stub_response(prompt, response_text, tool_call: false)`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 12:58:54 -03:00			`WebMock`
			`.stub_request(:post, "https://api.openai.com/v1/chat/completions")`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 18:06:01 -03:00			`.with(body: request_body)`
			`.to_return(status: 200, body: JSON.dump(response(response_text, tool_call: tool_call)))`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 12:58:54 -03:00			`end`

DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 18:06:01 -03:00			`def stream_line(delta, finish_reason: nil, tool_call: false)`
			`message_content =`
			`if tool_call`
			`{ tool_calls: [{ function: delta }] }`
			`else`
			`{ content: delta }`
			`end`

REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 12:58:54 -03:00			`+"data: " << {`
			`id: "chatcmpl-#{SecureRandom.hex}",`
			`object: "chat.completion.chunk",`
			`created: 1_681_283_881,`
			`model: "gpt-3.5-turbo-0301",`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 18:06:01 -03:00			`choices: [{ delta: message_content }],`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 12:58:54 -03:00			`finish_reason: finish_reason,`
			`index: 0,`
			`}.to_json`
			`end`

DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 18:06:01 -03:00			`def stub_streamed_response(prompt, deltas, tool_call: false)`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 12:58:54 -03:00			`chunks =`
			`deltas.each_with_index.map do \|_, index\|`
			`if index == (deltas.length - 1)`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 18:06:01 -03:00			`stream_line(deltas[index], finish_reason: "stop_sequence", tool_call: tool_call)`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 12:58:54 -03:00			`else`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 18:06:01 -03:00			`stream_line(deltas[index], tool_call: tool_call)`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 12:58:54 -03:00			`end`
			`end`

DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 18:06:01 -03:00			`chunks = (chunks.join("\n\n") << "data: [DONE]").split("")`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 12:58:54 -03:00
			`WebMock`
			`.stub_request(:post, "https://api.openai.com/v1/chat/completions")`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 18:06:01 -03:00			`.with(body: stream_request_body)`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 12:58:54 -03:00			`.to_return(status: 200, body: chunks)`
			`end`

			`it_behaves_like "an endpoint that can communicate with a completion service"`
FIX: under certain conditions we would get duplicate data from llm (#373) Previously endpoint/base would `+=` decoded_chunk to leftover This could lead to cases where the leftover buffer had duplicate previously processed data Fix ensures we properly skip previously decoded data. 2023-12-21 04:28:05 +11:00
			`context "when chunked encoding returns partial chunks" do`
			`# See: https://github.com/bblimke/webmock/issues/629`
			`let(:mock_net_http) do`
			`Class.new(Net::HTTP) do`
			`def request(*)`
			`super do \|response\|`
			`response.instance_eval do`
			`def read_body(*, &)`
			`@body.each(&)`
			`end`
			`end`

			`yield response if block_given?`

			`response`
			`end`
			`end`
			`end`
			`end`

			`let(:remove_original_net_http) { Net.send(:remove_const, :HTTP) }`
			`let(:original_http) { remove_original_net_http }`
			`let(:stub_net_http) { Net.send(:const_set, :HTTP, mock_net_http) }`

			`let(:remove_stubbed_net_http) { Net.send(:remove_const, :HTTP) }`
			`let(:restore_net_http) { Net.send(:const_set, :HTTP, original_http) }`

			`before do`
			`mock_net_http`
			`remove_original_net_http`
			`stub_net_http`
			`end`

			`after do`
			`remove_stubbed_net_http`
			`restore_net_http`
			`end`

			`it "will automatically recover from a bad payload" do`
			`# this should not happen, but lets ensure nothing bad happens`
			`# the row with test1 is invalid json`
			`raw_data = <<~TEXT`
			`d\|a\|t\|a\|:\| \|{\|"choices":[{"delta":{"content":"test,"}}]}`

			`data: {"choices":[{"delta":{"content":"test1,"}}]`

			`data: {"choices":[{"delta":\|{"content":"test2,"}}]}`

			`data: {"choices":[{"delta":{"content":"test3,"}}]\|}`

			`data: {"choices":[{\|"\|d\|elta":{"content":"test4"}}]\|}`

			`data: [D\|ONE]`
			`TEXT`

			`chunks = raw_data.split("\|")`

			`stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(`
			`status: 200,`
			`body: chunks,`
			`)`

			`partials = []`
			`llm = DiscourseAi::Completions::Llm.proxy("gpt-3.5-turbo")`
			`llm.completion!({ insts: "test" }, Discourse.system_user) { \|partial\| partials << partial }`

			`expect(partials.join).to eq("test,test2,test3,test4")`
			`end`

			`it "supports chunked encoding properly" do`
			`raw_data = <<~TEXT`
			`da\|ta: {"choices":[{"delta":{"content":"test,"}}]}`

			`data: {"choices":[{"delta":{"content":"test1,"}}]}`

			`data: {"choices":[{"delta":\|{"content":"test2,"}}]}`

			`data: {"choices":[{"delta":{"content":"test3,"}}]\|}`

			`data: {"choices":[{\|"\|d\|elta":{"content":"test4"}}]\|}`

			`data: [D\|ONE]`
			`TEXT`

			`chunks = raw_data.split("\|")`

			`stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return(`
			`status: 200,`
			`body: chunks,`
			`)`

			`partials = []`
			`llm = DiscourseAi::Completions::Llm.proxy("gpt-3.5-turbo")`
			`llm.completion!({ insts: "test" }, Discourse.system_user) { \|partial\| partials << partial }`

			`expect(partials.join).to eq("test,test1,test2,test3,test4")`
			`end`
			`end`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 12:58:54 -03:00			`end`