discourse-ai/spec/lib/completions/llm_spec.rb

# frozen_string_literal: true

RSpec.describe DiscourseAi::Completions::Llm do
  subject(:llm) do
    described_class.new(
      DiscourseAi::Completions::Dialects::OrcaStyle,
      canned_response,
      "Upstage-Llama-2-*-instruct-v2",
    )
  end

  fab!(:user) { Fabricate(:user) }

  describe ".proxy" do
    it "raises an exception when we can't proxy the model" do
      fake_model = "unknown_v2"

      expect { described_class.proxy(fake_model) }.to(
        raise_error(DiscourseAi::Completions::Llm::UNKNOWN_MODEL),
      )
    end
  end

  describe "#generate with fake model" do
    before do
      DiscourseAi::Completions::Endpoints::Fake.delays = []
      DiscourseAi::Completions::Endpoints::Fake.chunk_count = 10
    end

    let(:llm) { described_class.proxy("fake") }

    it "can generate a response" do
      response = llm.generate({ input: "fake prompt" }, user: user)
      expect(response).to be_present
    end

    it "can generate content via a block" do
      partials = []
      response =
        llm.generate({ input: "fake prompt" }, user: user) { |partial| partials << partial }

      expect(partials.length).to eq(10)
      expect(response).to eq(DiscourseAi::Completions::Endpoints::Fake.fake_content)

      expect(partials.join).to eq(response)
    end
  end

  describe "#generate" do
    let(:prompt) do
      {
        insts: <<~TEXT,
        I want you to act as a title generator for written pieces. I will provide you with a text,
        and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,
        and ensure that the meaning is maintained. Replies will utilize the language type of the topic.
      TEXT
        input: <<~TEXT,
        Here is the text, inside <input></input> XML tags:
        <input>
          To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,
          discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer
          defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.
        </input>
      TEXT
        post_insts:
          "Please put the translation between <ai></ai> tags and separate each title with a comma.",
      }
    end

    let(:canned_response) do
      DiscourseAi::Completions::Endpoints::CannedResponse.new(
        [
          "<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",
        ],
      )
    end

    context "when getting the full response" do
      it "processes the prompt and return the response" do
        llm_response = llm.generate(prompt, user: user)

        expect(llm_response).to eq(canned_response.responses[0])
      end
    end

    context "when getting a streamed response" do
      it "processes the prompt and call the given block with the partial response" do
        llm_response = +""

        llm.generate(prompt, user: user) { |partial, cancel_fn| llm_response << partial }

        expect(llm_response).to eq(canned_response.responses[0])
      end
    end
  end
end
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00			`# frozen_string_literal: true`

DEV: port directory structure to Zeitwerk (#319) Previous to this change we relied on explicit loading for a files in Discourse AI. This had a few downsides: - Busywork whenever you add a file (an extra require relative) - We were not keeping to conventions internally ... some places were OpenAI others are OpenAi - Autoloader did not work which lead to lots of full application broken reloads when developing. This moves all of DiscourseAI into a Zeitwerk compatible structure. It also leaves some minimal amount of manual loading (automation - which is loading into an existing namespace that may or may not be there) To avoid needing /lib/discourse_ai/... we mount a namespace thus we are able to keep /lib pointed at ::DiscourseAi Various files were renamed to get around zeitwerk rules and minimize usage of custom inflections Though we can get custom inflections to work it is not worth it, will require a Discourse core patch which means we create a hard dependency. 2023-11-28 23:17:46 -05:00			`RSpec.describe DiscourseAi::Completions::Llm do`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00			`subject(:llm) do`
			`described_class.new(`
DEV: Tool support for the LLM service. (#366) This PR adds tool support to available LLMs. We'll buffer tool invocations and return them instead of making users of this service parse the response. It also adds support for conversation context in the generic prompt. It includes bot messages, user messages, and tool invocations, which we'll trim to make sure it doesn't exceed the prompt limit, then translate them to the correct dialect. Finally, It adds some buffering when reading chunks to handle cases when streaming is extremely slow.:M 2023-12-18 16:06:01 -05:00			`DiscourseAi::Completions::Dialects::OrcaStyle,`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00			`canned_response,`
			`"Upstage-Llama-2-*-instruct-v2",`
			`)`
			`end`

			`fab!(:user) { Fabricate(:user) }`

			`describe ".proxy" do`
			`it "raises an exception when we can't proxy the model" do`
			`fake_model = "unknown_v2"`

			`expect { described_class.proxy(fake_model) }.to(`
DEV: port directory structure to Zeitwerk (#319) Previous to this change we relied on explicit loading for a files in Discourse AI. This had a few downsides: - Busywork whenever you add a file (an extra require relative) - We were not keeping to conventions internally ... some places were OpenAI others are OpenAi - Autoloader did not work which lead to lots of full application broken reloads when developing. This moves all of DiscourseAI into a Zeitwerk compatible structure. It also leaves some minimal amount of manual loading (automation - which is loading into an existing namespace that may or may not be there) To avoid needing /lib/discourse_ai/... we mount a namespace thus we are able to keep /lib pointed at ::DiscourseAi Various files were renamed to get around zeitwerk rules and minimize usage of custom inflections Though we can get custom inflections to work it is not worth it, will require a Discourse core patch which means we create a hard dependency. 2023-11-28 23:17:46 -05:00			`raise_error(DiscourseAi::Completions::Llm::UNKNOWN_MODEL),`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00			`)`
			`end`
			`end`

FEATURE: smooth streaming of AI responses on the client (#413) This PR introduces 3 things: 1. Fake bot that can be used on local so you can test LLMs, to enable on dev use: SiteSetting.ai_bot_enabled_chat_bots = "fake" 2. More elegant smooth streaming of progress on LLM completion This leans on JavaScript to buffer and trickle llm results through. It also amends it so the progress dot is much more consistently rendered 3. It fixes the Claude dialect Claude needs newlines exactly at the right spot, amended so it is happy --------- Co-authored-by: Martin Brennan <martin@discourse.org> 2024-01-10 23:56:40 -05:00			`describe "#generate with fake model" do`
			`before do`
			`DiscourseAi::Completions::Endpoints::Fake.delays = []`
			`DiscourseAi::Completions::Endpoints::Fake.chunk_count = 10`
			`end`

			`let(:llm) { described_class.proxy("fake") }`

			`it "can generate a response" do`
			`response = llm.generate({ input: "fake prompt" }, user: user)`
			`expect(response).to be_present`
			`end`

			`it "can generate content via a block" do`
			`partials = []`
			`response =`
			`llm.generate({ input: "fake prompt" }, user: user) { \|partial\| partials << partial }`

			`expect(partials.length).to eq(10)`
			`expect(response).to eq(DiscourseAi::Completions::Endpoints::Fake.fake_content)`

			`expect(partials.join).to eq(response)`
			`end`
			`end`

FIX: AI helper not working correctly with mixtral (#399) * FIX: AI helper not working correctly with mixtral This PR introduces a new function on the generic llm called #generate This will replace the implementation of completion! #generate introduces a new way to pass temperature, max_tokens and stop_sequences Then LLM implementers need to implement #normalize_model_params to ensure the generic names match the LLM specific endpoint This also adds temperature and stop_sequences to completion_prompts this allows for much more robust completion prompts * port everything over to #generate * Fix translation - On anthropic this no longer throws random "This is your translation:" - On mixtral this actually works * fix markdown table generation as well 2024-01-04 07:53:47 -05:00			`describe "#generate" do`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00			`let(:prompt) do`
			`{`
			`insts: <<~TEXT,`
			`I want you to act as a title generator for written pieces. I will provide you with a text,`
			`and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words,`
			`and ensure that the meaning is maintained. Replies will utilize the language type of the topic.`
			`TEXT`
			`input: <<~TEXT,`
			`Here is the text, inside <input></input> XML tags:`
			`<input>`
			`To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends,`
			`discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer`
			`defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry.`
			`</input>`
			`TEXT`
			`post_insts:`
			`"Please put the translation between <ai></ai> tags and separate each title with a comma.",`
			`}`
			`end`

			`let(:canned_response) do`
			`DiscourseAi::Completions::Endpoints::CannedResponse.new(`
			`[`
			`"<ai>The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.</ai>",`
			`],`
			`)`
			`end`

			`context "when getting the full response" do`
			`it "processes the prompt and return the response" do`
FIX: AI helper not working correctly with mixtral (#399) * FIX: AI helper not working correctly with mixtral This PR introduces a new function on the generic llm called #generate This will replace the implementation of completion! #generate introduces a new way to pass temperature, max_tokens and stop_sequences Then LLM implementers need to implement #normalize_model_params to ensure the generic names match the LLM specific endpoint This also adds temperature and stop_sequences to completion_prompts this allows for much more robust completion prompts * port everything over to #generate * Fix translation - On anthropic this no longer throws random "This is your translation:" - On mixtral this actually works * fix markdown table generation as well 2024-01-04 07:53:47 -05:00			`llm_response = llm.generate(prompt, user: user)`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00
			`expect(llm_response).to eq(canned_response.responses[0])`
			`end`
			`end`

			`context "when getting a streamed response" do`
			`it "processes the prompt and call the given block with the partial response" do`
			`llm_response = +""`

FIX: AI helper not working correctly with mixtral (#399) * FIX: AI helper not working correctly with mixtral This PR introduces a new function on the generic llm called #generate This will replace the implementation of completion! #generate introduces a new way to pass temperature, max_tokens and stop_sequences Then LLM implementers need to implement #normalize_model_params to ensure the generic names match the LLM specific endpoint This also adds temperature and stop_sequences to completion_prompts this allows for much more robust completion prompts * port everything over to #generate * Fix translation - On anthropic this no longer throws random "This is your translation:" - On mixtral this actually works * fix markdown table generation as well 2024-01-04 07:53:47 -05:00			`llm.generate(prompt, user: user) { \|partial, cancel_fn\| llm_response << partial }`
REFACTOR: Summarization and HyDE now use an LLM abstraction. (#297) * DEV: One LLM abstraction to rule them all * REFACTOR: HyDE search uses new LLM abstraction * REFACTOR: Summarization uses the LLM abstraction * Updated documentation and made small fixes. Remove Bedrock claude-2 restriction 2023-11-23 10:58:54 -05:00
			`expect(llm_response).to eq(canned_response.responses[0])`
			`end`
			`end`
			`end`
			`end`