FEATURE: port to use claude-2 for chat bot (#114)

Claude 1 costs the same and is less good than Claude 2. Make use of Claude 2 in all spots ... This also fixes streaming so it uses the far more efficient streaming protocol.
2023-07-27 11:24:44 +10:00 · 2023-07-27 11:24:44 +10:00 · 4b0c077ce5
parent 2031388f9c
commit 4b0c077ce5
12 changed files with 35 additions and 47 deletions
--- a/config/locales/client.en.yml
+++ b/config/locales/client.en.yml
@ -26,7 +26,7 @@ en:
          gpt-4: "GPT-4"
          gpt-3:
            5-turbo: "GPT-3.5"
-          claude-v1: "Claude V1"
+          claude-2: "Claude 2"


    review:
--- a/config/settings.yml
+++ b/config/settings.yml
@ -181,7 +181,7 @@ plugins:
    choices:
     - gpt-3.5-turbo
     - gpt-4
-     - claude-v1
+     - claude-2
  ai_bot_enabled_chat_commands:
    type: list
    default: "categories|google|image|search|tags|time"
--- a/lib/modules/ai_bot/anthropic_bot.rb
+++ b/lib/modules/ai_bot/anthropic_bot.rb
@ -4,7 +4,7 @@ module DiscourseAi
  module AiBot
    class AnthropicBot < Bot
      def self.can_reply_as?(bot_user)
-        bot_user.id == DiscourseAi::AiBot::EntryPoint::CLAUDE_V1_ID
+        bot_user.id == DiscourseAi::AiBot::EntryPoint::CLAUDE_V2_ID
      end

      def bot_prompt_with_topic_context(post)
@ -12,7 +12,7 @@ module DiscourseAi
      end

      def prompt_limit
-        7500 # https://console.anthropic.com/docs/prompt-design#what-is-a-prompt
+        50_000 # https://console.anthropic.com/docs/prompt-design#what-is-a-prompt
      end

      def title_prompt(post)
@ -20,14 +20,7 @@ module DiscourseAi
      end

      def get_delta(partial, context)
-        context[:pos] ||= 0
-
-        full = partial[:completion]
-        delta = full[context[:pos]..-1]
-
-        context[:pos] = full.length
-
-        delta
+        partial[:completion]
      end

      private
@ -45,7 +38,7 @@ module DiscourseAi
      end

      def model_for
-        "claude-v1.3"
+        "claude-2"
      end

      def get_updated_title(prompt)
--- a/lib/modules/ai_bot/entry_point.rb
+++ b/lib/modules/ai_bot/entry_point.rb
@ -5,12 +5,8 @@ module DiscourseAi
    class EntryPoint
      GPT4_ID = -110
      GPT3_5_TURBO_ID = -111
-      CLAUDE_V1_ID = -112
-      BOTS = [
-        [GPT4_ID, "gpt4_bot"],
-        [GPT3_5_TURBO_ID, "gpt3.5_bot"],
-        [CLAUDE_V1_ID, "claude_v1_bot"],
-      ]
+      CLAUDE_V2_ID = -112
+      BOTS = [[GPT4_ID, "gpt4_bot"], [GPT3_5_TURBO_ID, "gpt3.5_bot"], [CLAUDE_V2_ID, "claude_bot"]]

      def self.map_bot_model_to_user_id(model_name)
        case model_name
@ -18,8 +14,8 @@ module DiscourseAi
          GPT3_5_TURBO_ID
        in "gpt-4"
          GPT4_ID
-        in "claude-v1"
-          CLAUDE_V1_ID
+        in "claude-2"
+          CLAUDE_V2_ID
        else
          nil
        end
--- a/lib/modules/summarization/entry_point.rb
+++ b/lib/modules/summarization/entry_point.rb
@ -20,8 +20,6 @@ module DiscourseAi
          Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096),
          Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384),
          Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384),
-          Models::Anthropic.new("claude-v1", max_tokens: 9000),
-          Models::Anthropic.new("claude-v1-100k", max_tokens: 100_000),
          Models::Anthropic.new("claude-2", max_tokens: 100_000),
        ]

--- a/lib/shared/inference/anthropic_completions.rb
+++ b/lib/shared/inference/anthropic_completions.rb
@ -16,8 +16,9 @@ module ::DiscourseAi
      )
        url = URI("https://api.anthropic.com/v1/complete")
        headers = {
+          "anthropic-version" => "2023-06-01",
          "x-api-key" => SiteSetting.ai_anthropic_api_key,
-          "Content-Type" => "application/json",
+          "content-type" => "application/json",
        }

        payload = { model: model, prompt: prompt }
@ -85,17 +86,18 @@ module ::DiscourseAi
                  .split("\n")
                  .each do |line|
                    data = line.split("data: ", 2)[1]
-                    next if !data || data.squish == "[DONE]"
+                    next if !data

                    if !cancelled
                      begin
-                        # partial contains the entire payload till now
                        partial = JSON.parse(data, symbolize_names: true)
-                        response_data = partial[:completion].to_s
+                        response_data << partial[:completion].to_s

-                        yield partial, cancel
+                        # ping has no data... do not yeild it
+                        yield partial, cancel if partial[:completion]
                      rescue JSON::ParserError
                        nil
+                        # TODO leftover chunk carry over to next
                      end
                    end
                  end
--- a/spec/lib/modules/ai_bot/anthropic_bot_spec.rb
+++ b/spec/lib/modules/ai_bot/anthropic_bot_spec.rb
@ -16,7 +16,7 @@ RSpec.describe DiscourseAi::AiBot::AnthropicBot do
        reply << subject.get_delta({ completion: "Hello " }, context)
        expect(reply).to eq("Hello ")

-        reply << subject.get_delta({ completion: "Hello world" }, context)
+        reply << subject.get_delta({ completion: "world" }, context)
        expect(reply).to eq("Hello world")
      end
    end
--- a/spec/lib/modules/ai_bot/entry_point_spec.rb
+++ b/spec/lib/modules/ai_bot/entry_point_spec.rb
@ -57,7 +57,7 @@ RSpec.describe DiscourseAi::AiBot::EntryPoint do
      end

      it "includes the bot's user_id" do
-        claude_bot = User.find(described_class::CLAUDE_V1_ID)
+        claude_bot = User.find(described_class::CLAUDE_V2_ID)
        claude_post_attrs = post_args.merge(target_usernames: [claude_bot.username].join(","))

        expect { PostCreator.create!(admin, claude_post_attrs) }.to change(
--- a/spec/lib/modules/ai_bot/jobs/regular/create_ai_reply_spec.rb
+++ b/spec/lib/modules/ai_bot/jobs/regular/create_ai_reply_spec.rb
@ -78,12 +78,12 @@ RSpec.describe Jobs::CreateAiReply do
      let(:deltas) { claude_response.split(" ").map { |w| "#{w} " } }

      before do
-        bot_user = User.find(DiscourseAi::AiBot::EntryPoint::CLAUDE_V1_ID)
+        bot_user = User.find(DiscourseAi::AiBot::EntryPoint::CLAUDE_V2_ID)

        AnthropicCompletionStubs.stub_streamed_response(
          DiscourseAi::AiBot::AnthropicBot.new(bot_user).bot_prompt_with_topic_context(post),
          deltas,
-          model: "claude-v1.3",
+          model: "claude-2",
          req_opts: {
            max_tokens_to_sample: 3000,
            temperature: 0.4,
@ -95,7 +95,7 @@ RSpec.describe Jobs::CreateAiReply do
      it "adds a reply from the Claude bot" do
        subject.execute(
          post_id: topic.first_post.id,
-          bot_user_id: DiscourseAi::AiBot::EntryPoint::CLAUDE_V1_ID,
+          bot_user_id: DiscourseAi::AiBot::EntryPoint::CLAUDE_V2_ID,
        )

        expect(topic.posts.last.raw).to eq(expected_response)
--- a/spec/lib/modules/summarization/models/anthropic_spec.rb
+++ b/spec/lib/modules/summarization/models/anthropic_spec.rb
@ -5,7 +5,7 @@ require_relative "../../../../support/anthropic_completion_stubs"
 RSpec.describe DiscourseAi::Summarization::Models::Anthropic do
  subject(:model) { described_class.new(model_name, max_tokens: max_tokens) }

-  let(:model_name) { "claude-v1" }
+  let(:model_name) { "claude-2" }
  let(:max_tokens) { 720 }

  let(:content) do
--- a/spec/shared/inference/anthropic_completions_spec.rb
+++ b/spec/shared/inference/anthropic_completions_spec.rb
@ -16,7 +16,7 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
    completions =
      DiscourseAi::Inference::AnthropicCompletions.perform!(
        prompt,
-        "claude-v1",
+        "claude-2",
        temperature: req_opts[:temperature],
        max_tokens: req_opts[:max_tokens_to_sample],
        user_id: user_id,
@ -27,7 +27,7 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
    expect(AiApiAuditLog.count).to eq(1)
    log = AiApiAuditLog.first

-    request_body = { model: "claude-v1", prompt: prompt }.merge(req_opts).to_json
+    request_body = { model: "claude-2", prompt: prompt }.merge(req_opts).to_json
    response_body = AnthropicCompletionStubs.response(response_text).to_json

    expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
@ -47,11 +47,11 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do

    DiscourseAi::Inference::AnthropicCompletions.perform!(
      prompt,
-      "claude-v1",
+      "claude-2",
      max_tokens: req_opts[:max_tokens_to_sample],
    ) do |partial, cancel|
      data = partial[:completion]
-      content = data if data
+      content << data if data
      cancel.call if content.split(" ").length == 2
    end

@ -60,7 +60,7 @@ RSpec.describe DiscourseAi::Inference::AnthropicCompletions do
    expect(AiApiAuditLog.count).to eq(1)
    log = AiApiAuditLog.first

-    request_body = { model: "claude-v1", prompt: prompt }.merge(req_opts).to_json
+    request_body = { model: "claude-2", prompt: prompt }.merge(req_opts).to_json

    expect(log.provider_id).to eq(AiApiAuditLog::Provider::Anthropic)
    expect(log.request_tokens).to eq(6)
--- a/spec/support/anthropic_completion_stubs.rb
+++ b/spec/support/anthropic_completion_stubs.rb
@ -9,7 +9,7 @@ class AnthropicCompletionStubs
        stop_reason: "stop_sequence",
        truncated: false,
        log_id: "12dcc7feafbee4a394e0de9dffde3ac5",
-        model: "claude-v1",
+        model: "claude-2",
        exception: nil,
      }
    end
@ -18,7 +18,7 @@ class AnthropicCompletionStubs
      WebMock
        .stub_request(:post, "https://api.anthropic.com/v1/complete")
        .with(
-          body: { model: "claude-v1", prompt: prompt, max_tokens_to_sample: 2000 }.merge(
+          body: { model: "claude-2", prompt: prompt, max_tokens_to_sample: 2000 }.merge(
            req_opts,
          ).to_json,
        )
@ -32,7 +32,7 @@ class AnthropicCompletionStubs
        stop_reason: finish_reason,
        truncated: false,
        log_id: "12b029451c6d18094d868bc04ce83f63",
-        model: "claude-v1",
+        model: "claude-2",
        exception: nil,
      }.to_json
    end
@ -41,18 +41,17 @@ class AnthropicCompletionStubs
      chunks =
        deltas.each_with_index.map do |_, index|
          if index == (deltas.length - 1)
-            stream_line(deltas.join(""), finish_reason: "stop_sequence")
+            stream_line(deltas[index], finish_reason: "stop_sequence")
          else
-            stream_line(deltas[0..index].join(""))
+            stream_line(deltas[index])
          end
        end

-      chunks << "[DONE]"
      chunks = chunks.join("\n\n")

      WebMock
        .stub_request(:post, "https://api.anthropic.com/v1/complete")
-        .with(body: { model: model || "claude-v1", prompt: prompt }.merge(req_opts).to_json)
+        .with(body: { model: model || "claude-2", prompt: prompt }.merge(req_opts).to_json)
        .to_return(status: 200, body: chunks)
    end
  end