# frozen_string_literal: true require_relative "endpoint_examples" RSpec.describe DiscourseAi::Completions::Endpoints::OpenAi do subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::OpenAiTokenizer) } let(:model_name) { "gpt-3.5-turbo" } let(:generic_prompt) { { insts: "You are a helpful bot.", input: "write 3 words" } } let(:dialect) { DiscourseAi::Completions::Dialects::ChatGpt.new(generic_prompt, model_name) } let(:prompt) { dialect.translate } let(:tool_deltas) do [ { id: "get_weather", name: "get_weather", arguments: {} }, { id: "get_weather", name: "get_weather", arguments: { location: "" } }, { id: "get_weather", name: "get_weather", arguments: { location: "Sydney", unit: "c" } }, ] end let(:tool_call) do { id: "get_weather", name: "get_weather", arguments: { location: "Sydney", unit: "c" } } end let(:request_body) do model .default_options .merge(messages: prompt) .tap do |b| b[:tools] = generic_prompt[:tools].map do |t| { type: "function", tool: t } end if generic_prompt[:tools] end .to_json end let(:stream_request_body) do model .default_options .merge(messages: prompt, stream: true) .tap do |b| b[:tools] = generic_prompt[:tools].map do |t| { type: "function", tool: t } end if generic_prompt[:tools] end .to_json end def response(content, tool_call: false) message_content = if tool_call { tool_calls: [{ function: content }] } else { content: content } end { id: "chatcmpl-6sZfAb30Rnv9Q7ufzFwvQsMpjZh8S", object: "chat.completion", created: 1_678_464_820, model: "gpt-3.5-turbo-0301", usage: { prompt_tokens: 337, completion_tokens: 162, total_tokens: 499, }, choices: [ { message: { role: "assistant" }.merge(message_content), finish_reason: "stop", index: 0 }, ], } end def stub_response(prompt, response_text, tool_call: false) WebMock .stub_request(:post, "https://api.openai.com/v1/chat/completions") .with(body: request_body) .to_return(status: 200, body: JSON.dump(response(response_text, tool_call: tool_call))) end def stream_line(delta, finish_reason: nil, tool_call: false) message_content = if tool_call { tool_calls: [{ function: delta }] } else { content: delta } end +"data: " << { id: "chatcmpl-#{SecureRandom.hex}", object: "chat.completion.chunk", created: 1_681_283_881, model: "gpt-3.5-turbo-0301", choices: [{ delta: message_content }], finish_reason: finish_reason, index: 0, }.to_json end def stub_streamed_response(prompt, deltas, tool_call: false) chunks = deltas.each_with_index.map do |_, index| if index == (deltas.length - 1) stream_line(deltas[index], finish_reason: "stop_sequence", tool_call: tool_call) else stream_line(deltas[index], tool_call: tool_call) end end chunks = (chunks.join("\n\n") << "data: [DONE]").split("") WebMock .stub_request(:post, "https://api.openai.com/v1/chat/completions") .with(body: stream_request_body) .to_return(status: 200, body: chunks) end it_behaves_like "an endpoint that can communicate with a completion service" context "when chunked encoding returns partial chunks" do # See: https://github.com/bblimke/webmock/issues/629 let(:mock_net_http) do Class.new(Net::HTTP) do def request(*) super do |response| response.instance_eval do def read_body(*, &) @body.each(&) end end yield response if block_given? response end end end end let(:remove_original_net_http) { Net.send(:remove_const, :HTTP) } let(:original_http) { remove_original_net_http } let(:stub_net_http) { Net.send(:const_set, :HTTP, mock_net_http) } let(:remove_stubbed_net_http) { Net.send(:remove_const, :HTTP) } let(:restore_net_http) { Net.send(:const_set, :HTTP, original_http) } before do mock_net_http remove_original_net_http stub_net_http end after do remove_stubbed_net_http restore_net_http end it "will automatically recover from a bad payload" do # this should not happen, but lets ensure nothing bad happens # the row with test1 is invalid json raw_data = <<~TEXT d|a|t|a|:| |{|"choices":[{"delta":{"content":"test,"}}]} data: {"choices":[{"delta":{"content":"test1,"}}] data: {"choices":[{"delta":|{"content":"test2,"}}]} data: {"choices":[{"delta":{"content":"test3,"}}]|} data: {"choices":[{|"|d|elta":{"content":"test4"}}]|} data: [D|ONE] TEXT chunks = raw_data.split("|") stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return( status: 200, body: chunks, ) partials = [] llm = DiscourseAi::Completions::Llm.proxy("gpt-3.5-turbo") llm.completion!({ insts: "test" }, Discourse.system_user) { |partial| partials << partial } expect(partials.join).to eq("test,test2,test3,test4") end it "supports chunked encoding properly" do raw_data = <<~TEXT da|ta: {"choices":[{"delta":{"content":"test,"}}]} data: {"choices":[{"delta":{"content":"test1,"}}]} data: {"choices":[{"delta":|{"content":"test2,"}}]} data: {"choices":[{"delta":{"content":"test3,"}}]|} data: {"choices":[{|"|d|elta":{"content":"test4"}}]|} data: [D|ONE] TEXT chunks = raw_data.split("|") stub_request(:post, "https://api.openai.com/v1/chat/completions").to_return( status: 200, body: chunks, ) partials = [] llm = DiscourseAi::Completions::Llm.proxy("gpt-3.5-turbo") llm.completion!({ insts: "test" }, Discourse.system_user) { |partial| partials << partial } expect(partials.join).to eq("test,test1,test2,test3,test4") end end end