discourse-ai/spec/lib/completions/endpoints/ollama_spec.rb

# frozen_string_literal: true

require_relative "endpoint_compliance"

class OllamaMock < EndpointMock
  def response(content, tool_call: false)
    message_content =
      if tool_call
        { content: "", tool_calls: [content] }
      else
        { content: content }
      end

    {
      created_at: "2024-09-25T06:47:21.283028Z",
      model: "llama3.1",
      message: { role: "assistant" }.merge(message_content),
      done: true,
      done_reason: "stop",
      total_duration: 7_639_718_541,
      load_duration: 299_886_663,
      prompt_eval_count: 18,
      prompt_eval_duration: 220_447_000,
      eval_count: 18,
      eval_duration: 220_447_000,
    }
  end

  def stub_response(prompt, response_text, tool_call: false)
    WebMock
      .stub_request(:post, "http://api.ollama.ai/api/chat")
      .with(body: request_body(prompt, tool_call: tool_call))
      .to_return(status: 200, body: JSON.dump(response(response_text, tool_call: tool_call)))
  end

  def stream_line(delta)
    message_content = { content: delta }

    +{
      model: "llama3.1",
      created_at: "2024-09-25T06:47:21.283028Z",
      message: { role: "assistant" }.merge(message_content),
      done: false,
    }.to_json
  end

  def stub_raw(chunks)
    WebMock.stub_request(:post, "http://api.ollama.ai/api/chat").to_return(
      status: 200,
      body: chunks,
    )
  end

  def stub_streamed_response(prompt, deltas)
    chunks = deltas.each_with_index.map { |_, index| stream_line(deltas[index]) }

    chunks =
      (
        chunks.join("\n\n") << {
          model: "llama3.1",
          created_at: "2024-09-25T06:47:21.283028Z",
          message: {
            role: "assistant",
            content: "",
          },
          done: true,
          done_reason: "stop",
          total_duration: 7_639_718_541,
          load_duration: 299_886_663,
          prompt_eval_count: 18,
          prompt_eval_duration: 220_447_000,
          eval_count: 18,
          eval_duration: 220_447_000,
        }.to_json
      ).split("")

    WebMock
      .stub_request(:post, "http://api.ollama.ai/api/chat")
      .with(body: request_body(prompt))
      .to_return(status: 200, body: chunks)

    yield if block_given?
  end

  def tool_response
    { function: { name: "get_weather", arguments: { location: "Sydney", unit: "c" } } }
  end

  def tool_payload
    {
      type: "function",
      function: {
        name: "get_weather",
        description: "Get the weather in a city",
        parameters: {
          type: "object",
          properties: {
            location: {
              type: "string",
              description: "the city name",
            },
            unit: {
              type: "string",
              description: "the unit of measurement celcius c or fahrenheit f",
              enum: %w[c f],
            },
          },
          required: %w[location unit],
        },
      },
    }
  end

  def request_body(prompt, tool_call: false)
    model
      .default_options
      .merge(messages: prompt)
      .tap do |b|
        b[:stream] = false
        b[:tools] = [tool_payload] if tool_call
      end
      .to_json
  end
end

RSpec.describe DiscourseAi::Completions::Endpoints::Ollama do
  subject(:endpoint) { described_class.new(model) }

  fab!(:user)
  fab!(:model) { Fabricate(:ollama_model) }

  let(:ollama_mock) { OllamaMock.new(endpoint) }

  let(:compliance) do
    EndpointsCompliance.new(self, endpoint, DiscourseAi::Completions::Dialects::Ollama, user)
  end

  describe "#perform_completion!" do
    context "when using regular mode" do
      it "completes a trivial prompt and logs the response" do
        compliance.regular_mode_simple_prompt(ollama_mock)
      end
    end

    context "with tools" do
      it "returns a function invocation" do
        compliance.regular_mode_tools(ollama_mock)
      end
    end
  end

  describe "when using streaming mode" do
    context "with simple prompts" do
      it "completes a trivial prompt and logs the response" do
        compliance.streaming_mode_simple_prompt(ollama_mock)
      end
    end
  end
end
FEATURE: Add Ollama provider (#812) This allows our users to add the Ollama provider and use it to serve our AI bot (completion/dialect). In this PR, we introduce: DiscourseAi::Completions::Dialects::Ollama which would help us translate by utilizing Completions::Endpoint::Ollama Correct extract_completion_from and partials_from in Endpoints::Ollama Also Add tests for Endpoints::Ollama Introduce ollama_model fabricator 2024-09-30 20:45:03 -04:00			`# frozen_string_literal: true`

			`require_relative "endpoint_compliance"`

			`class OllamaMock < EndpointMock`
FEATURE: Tools for models from Ollama provider (#819) Adds support for Ollama function calling 2024-10-10 16:25:53 -04:00			`def response(content, tool_call: false)`
			`message_content =`
			`if tool_call`
			`{ content: "", tool_calls: [content] }`
			`else`
			`{ content: content }`
			`end`
FEATURE: Add Ollama provider (#812) This allows our users to add the Ollama provider and use it to serve our AI bot (completion/dialect). In this PR, we introduce: DiscourseAi::Completions::Dialects::Ollama which would help us translate by utilizing Completions::Endpoint::Ollama Correct extract_completion_from and partials_from in Endpoints::Ollama Also Add tests for Endpoints::Ollama Introduce ollama_model fabricator 2024-09-30 20:45:03 -04:00
			`{`
			`created_at: "2024-09-25T06:47:21.283028Z",`
			`model: "llama3.1",`
			`message: { role: "assistant" }.merge(message_content),`
			`done: true,`
			`done_reason: "stop",`
			`total_duration: 7_639_718_541,`
			`load_duration: 299_886_663,`
			`prompt_eval_count: 18,`
			`prompt_eval_duration: 220_447_000,`
			`eval_count: 18,`
			`eval_duration: 220_447_000,`
			`}`
			`end`

FEATURE: Tools for models from Ollama provider (#819) Adds support for Ollama function calling 2024-10-10 16:25:53 -04:00			`def stub_response(prompt, response_text, tool_call: false)`
FEATURE: Add Ollama provider (#812) This allows our users to add the Ollama provider and use it to serve our AI bot (completion/dialect). In this PR, we introduce: DiscourseAi::Completions::Dialects::Ollama which would help us translate by utilizing Completions::Endpoint::Ollama Correct extract_completion_from and partials_from in Endpoints::Ollama Also Add tests for Endpoints::Ollama Introduce ollama_model fabricator 2024-09-30 20:45:03 -04:00			`WebMock`
			`.stub_request(:post, "http://api.ollama.ai/api/chat")`
FEATURE: Tools for models from Ollama provider (#819) Adds support for Ollama function calling 2024-10-10 16:25:53 -04:00			`.with(body: request_body(prompt, tool_call: tool_call))`
			`.to_return(status: 200, body: JSON.dump(response(response_text, tool_call: tool_call)))`
FEATURE: Add Ollama provider (#812) This allows our users to add the Ollama provider and use it to serve our AI bot (completion/dialect). In this PR, we introduce: DiscourseAi::Completions::Dialects::Ollama which would help us translate by utilizing Completions::Endpoint::Ollama Correct extract_completion_from and partials_from in Endpoints::Ollama Also Add tests for Endpoints::Ollama Introduce ollama_model fabricator 2024-09-30 20:45:03 -04:00			`end`

			`def stream_line(delta)`
			`message_content = { content: delta }`

			`+{`
			`model: "llama3.1",`
			`created_at: "2024-09-25T06:47:21.283028Z",`
			`message: { role: "assistant" }.merge(message_content),`
			`done: false,`
			`}.to_json`
			`end`

			`def stub_raw(chunks)`
			`WebMock.stub_request(:post, "http://api.ollama.ai/api/chat").to_return(`
			`status: 200,`
			`body: chunks,`
			`)`
			`end`

			`def stub_streamed_response(prompt, deltas)`
			`chunks = deltas.each_with_index.map { \|_, index\| stream_line(deltas[index]) }`

			`chunks =`
			`(`
			`chunks.join("\n\n") << {`
			`model: "llama3.1",`
			`created_at: "2024-09-25T06:47:21.283028Z",`
			`message: {`
			`role: "assistant",`
			`content: "",`
			`},`
			`done: true,`
			`done_reason: "stop",`
			`total_duration: 7_639_718_541,`
			`load_duration: 299_886_663,`
			`prompt_eval_count: 18,`
			`prompt_eval_duration: 220_447_000,`
			`eval_count: 18,`
			`eval_duration: 220_447_000,`
			`}.to_json`
			`).split("")`

			`WebMock`
			`.stub_request(:post, "http://api.ollama.ai/api/chat")`
FEATURE: Tools for models from Ollama provider (#819) Adds support for Ollama function calling 2024-10-10 16:25:53 -04:00			`.with(body: request_body(prompt))`
FEATURE: Add Ollama provider (#812) This allows our users to add the Ollama provider and use it to serve our AI bot (completion/dialect). In this PR, we introduce: DiscourseAi::Completions::Dialects::Ollama which would help us translate by utilizing Completions::Endpoint::Ollama Correct extract_completion_from and partials_from in Endpoints::Ollama Also Add tests for Endpoints::Ollama Introduce ollama_model fabricator 2024-09-30 20:45:03 -04:00			`.to_return(status: 200, body: chunks)`

			`yield if block_given?`
			`end`

FEATURE: Tools for models from Ollama provider (#819) Adds support for Ollama function calling 2024-10-10 16:25:53 -04:00			`def tool_response`
			`{ function: { name: "get_weather", arguments: { location: "Sydney", unit: "c" } } }`
			`end`

			`def tool_payload`
			`{`
			`type: "function",`
			`function: {`
			`name: "get_weather",`
			`description: "Get the weather in a city",`
			`parameters: {`
			`type: "object",`
			`properties: {`
			`location: {`
			`type: "string",`
			`description: "the city name",`
			`},`
			`unit: {`
			`type: "string",`
			`description: "the unit of measurement celcius c or fahrenheit f",`
			`enum: %w[c f],`
			`},`
			`},`
			`required: %w[location unit],`
			`},`
			`},`
			`}`
			`end`

			`def request_body(prompt, tool_call: false)`
			`model`
			`.default_options`
			`.merge(messages: prompt)`
			`.tap do \|b\|`
			`b[:stream] = false`
			`b[:tools] = [tool_payload] if tool_call`
			`end`
			`.to_json`
FEATURE: Add Ollama provider (#812) This allows our users to add the Ollama provider and use it to serve our AI bot (completion/dialect). In this PR, we introduce: DiscourseAi::Completions::Dialects::Ollama which would help us translate by utilizing Completions::Endpoint::Ollama Correct extract_completion_from and partials_from in Endpoints::Ollama Also Add tests for Endpoints::Ollama Introduce ollama_model fabricator 2024-09-30 20:45:03 -04:00			`end`
			`end`

			`RSpec.describe DiscourseAi::Completions::Endpoints::Ollama do`
			`subject(:endpoint) { described_class.new(model) }`

			`fab!(:user)`
			`fab!(:model) { Fabricate(:ollama_model) }`

			`let(:ollama_mock) { OllamaMock.new(endpoint) }`

			`let(:compliance) do`
			`EndpointsCompliance.new(self, endpoint, DiscourseAi::Completions::Dialects::Ollama, user)`
			`end`

			`describe "#perform_completion!" do`
			`context "when using regular mode" do`
			`it "completes a trivial prompt and logs the response" do`
			`compliance.regular_mode_simple_prompt(ollama_mock)`
			`end`
			`end`
FEATURE: Tools for models from Ollama provider (#819) Adds support for Ollama function calling 2024-10-10 16:25:53 -04:00
			`context "with tools" do`
			`it "returns a function invocation" do`
			`compliance.regular_mode_tools(ollama_mock)`
			`end`
			`end`
FEATURE: Add Ollama provider (#812) This allows our users to add the Ollama provider and use it to serve our AI bot (completion/dialect). In this PR, we introduce: DiscourseAi::Completions::Dialects::Ollama which would help us translate by utilizing Completions::Endpoint::Ollama Correct extract_completion_from and partials_from in Endpoints::Ollama Also Add tests for Endpoints::Ollama Introduce ollama_model fabricator 2024-09-30 20:45:03 -04:00			`end`

			`describe "when using streaming mode" do`
FEATURE: improve tool support (#904) This re-implements tool support in DiscourseAi::Completions::Llm #generate Previously tool support was always returned via XML and it would be the responsibility of the caller to parse XML New implementation has the endpoints return ToolCall objects. Additionally this simplifies the Llm endpoint interface and gives it more clarity. Llms must implement decode, decode_chunk (for streaming) It is the implementers responsibility to figure out how to decode chunks, base no longer implements. To make this easy we ship a flexible json decoder which is easy to wire up. Also (new) Better debugging for PMs, we now have a next / previous button to see all the Llm messages associated with a PM Token accounting is fixed for vllm (we were not correctly counting tokens) 2024-11-11 16:14:30 -05:00			`context "with simple prompts" do`
FEATURE: Add Ollama provider (#812) This allows our users to add the Ollama provider and use it to serve our AI bot (completion/dialect). In this PR, we introduce: DiscourseAi::Completions::Dialects::Ollama which would help us translate by utilizing Completions::Endpoint::Ollama Correct extract_completion_from and partials_from in Endpoints::Ollama Also Add tests for Endpoints::Ollama Introduce ollama_model fabricator 2024-09-30 20:45:03 -04:00			`it "completes a trivial prompt and logs the response" do`
			`compliance.streaming_mode_simple_prompt(ollama_mock)`
			`end`
			`end`
			`end`
			`end`