diff --git a/lib/completions/dialects/chat_gpt.rb b/lib/completions/dialects/chat_gpt.rb new file mode 100644 index 00000000..1f4166be --- /dev/null +++ b/lib/completions/dialects/chat_gpt.rb @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +module DiscourseAi + module Completions + module Dialects + class ChatGPT + def self.can_translate?(model_name) + %w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k].include?(model_name) + end + + def translate(generic_prompt) + open_ai_prompt = [ + { + role: "system", + content: [generic_prompt[:insts], generic_prompt[:post_insts].to_s].join("\n"), + }, + ] + + if generic_prompt[:examples] + generic_prompt[:examples].each do |example_pair| + open_ai_prompt << { role: "user", content: example_pair.first } + open_ai_prompt << { role: "assistant", content: example_pair.second } + end + end + + open_ai_prompt << { role: "user", content: generic_prompt[:input] } + end + + def tokenizer + DiscourseAi::Tokenizer::OpenAiTokenizer + end + end + end + end +end diff --git a/lib/completions/dialects/claude.rb b/lib/completions/dialects/claude.rb new file mode 100644 index 00000000..07438985 --- /dev/null +++ b/lib/completions/dialects/claude.rb @@ -0,0 +1,37 @@ +# frozen_string_literal: true + +module DiscourseAi + module Completions + module Dialects + class Claude + def self.can_translate?(model_name) + %w[claude-instant-1 claude-2].include?(model_name) + end + + def translate(generic_prompt) + claude_prompt = +"Human: #{generic_prompt[:insts]}\n" + + claude_prompt << build_examples(generic_prompt[:examples]) if generic_prompt[:examples] + + claude_prompt << "#{generic_prompt[:input]}\n" + + claude_prompt << "#{generic_prompt[:post_insts]}\n" if generic_prompt[:post_insts] + + claude_prompt << "Assistant:\n" + end + + def tokenizer + DiscourseAi::Tokenizer::AnthropicTokenizer + end + + private + + def build_examples(examples_arr) + examples_arr.reduce("") do |memo, example| + memo += "\nH: #{example[0]}\nA: #{example[1]}\n\n" + end + end + end + end + end +end diff --git a/lib/completions/dialects/llama2_classic.rb b/lib/completions/dialects/llama2_classic.rb new file mode 100644 index 00000000..b6c58c8b --- /dev/null +++ b/lib/completions/dialects/llama2_classic.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module DiscourseAi + module Completions + module Dialects + class Llama2Classic + def self.can_translate?(model_name) + "Llama2-*-chat-hf" == model_name + end + + def translate(generic_prompt) + llama2_prompt = + +"[INST]<>#{[generic_prompt[:insts], generic_prompt[:post_insts].to_s].join("\n")}<>[/INST]\n" + + if generic_prompt[:examples] + generic_prompt[:examples].each do |example_pair| + llama2_prompt << "[INST]#{example_pair.first}[/INST]\n" + llama2_prompt << "#{example_pair.second}\n" + end + end + + llama2_prompt << "[INST]#{generic_prompt[:input]}[/INST]\n" + end + + def tokenizer + DiscourseAi::Tokenizer::Llama2Tokenizer + end + end + end + end +end diff --git a/lib/completions/dialects/orca_style.rb b/lib/completions/dialects/orca_style.rb new file mode 100644 index 00000000..3aa11609 --- /dev/null +++ b/lib/completions/dialects/orca_style.rb @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +module DiscourseAi + module Completions + module Dialects + class OrcaStyle + def self.can_translate?(model_name) + %w[StableBeluga2 Upstage-Llama-2-*-instruct-v2].include?(model_name) + end + + def translate(generic_prompt) + orca_style_prompt = + +"### System:\n#{[generic_prompt[:insts], generic_prompt[:post_insts].to_s].join("\n")}\n" + + if generic_prompt[:examples] + generic_prompt[:examples].each do |example_pair| + orca_style_prompt << "### User:\n#{example_pair.first}\n" + orca_style_prompt << "### Assistant:\n#{example_pair.second}\n" + end + end + + orca_style_prompt << "### User:\n#{generic_prompt[:input]}\n" + + orca_style_prompt << "### Assistant:\n" + end + + def tokenizer + DiscourseAi::Tokenizer::Llama2Tokenizer + end + end + end + end +end diff --git a/lib/completions/endpoints/anthropic.rb b/lib/completions/endpoints/anthropic.rb new file mode 100644 index 00000000..5216d4e7 --- /dev/null +++ b/lib/completions/endpoints/anthropic.rb @@ -0,0 +1,52 @@ +# frozen_string_literal: true + +module DiscourseAi + module Completions + module Endpoints + class Anthropic < Base + def self.can_contact?(model_name) + %w[claude-instant-1 claude-2].include?(model_name) + end + + def default_options + { max_tokens_to_sample: 2000, model: model } + end + + def provider_id + AiApiAuditLog::Provider::Anthropic + end + + private + + def model_uri + @uri ||= URI("https://api.anthropic.com/v1/complete") + end + + def prepare_payload(prompt, model_params) + default_options + .merge(model_params) + .merge(prompt: prompt) + .tap { |payload| payload[:stream] = true if @streaming_mode } + end + + def prepare_request(payload) + headers = { + "anthropic-version" => "2023-06-01", + "x-api-key" => SiteSetting.ai_anthropic_api_key, + "content-type" => "application/json", + } + + Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload } + end + + def extract_completion_from(response_raw) + JSON.parse(response_raw, symbolize_names: true)[:completion].to_s + end + + def partials_from(decoded_chunk) + decoded_chunk.split("\n").map { |line| line.split("data: ", 2)[1] }.compact + end + end + end + end +end diff --git a/lib/completions/endpoints/aws_bedrock.rb b/lib/completions/endpoints/aws_bedrock.rb new file mode 100644 index 00000000..09a8d6d0 --- /dev/null +++ b/lib/completions/endpoints/aws_bedrock.rb @@ -0,0 +1,86 @@ +# frozen_string_literal: true + +module DiscourseAi + module Completions + module Endpoints + class AwsBedrock < Base + def self.can_contact?(model_name) + SiteSetting.ai_bedrock_access_key_id.present? && + SiteSetting.ai_bedrock_secret_access_key.present? && + SiteSetting.ai_bedrock_region.present? + end + + def default_options + { max_tokens_to_sample: 20_000 } + end + + def provider_id + AiApiAuditLog::Provider::Anthropic + end + + private + + def model_uri + api_url = + "https://bedrock-runtime.#{SiteSetting.ai_bedrock_region}.amazonaws.com/model/anthropic.#{model}/invoke" + + api_url = @streaming_mode ? (api_url + "-with-response-stream") : api_url + + URI(api_url) + end + + def prepare_payload(prompt, model_params) + default_options.merge(prompt: prompt).merge(model_params) + end + + def prepare_request(payload) + headers = { "content-type" => "application/json", "Accept" => "*/*" } + + signer = + Aws::Sigv4::Signer.new( + access_key_id: SiteSetting.ai_bedrock_access_key_id, + region: SiteSetting.ai_bedrock_region, + secret_access_key: SiteSetting.ai_bedrock_secret_access_key, + service: "bedrock", + ) + + Net::HTTP::Post + .new(model_uri, headers) + .tap do |r| + r.body = payload + + signed_request = + signer.sign_request(req: r, http_method: r.method, url: model_uri, body: r.body) + + r.initialize_http_header(headers.merge(signed_request.headers)) + end + end + + def decode(chunk) + Aws::EventStream::Decoder + .new + .decode_chunk(chunk) + .first + .payload + .string + .then { JSON.parse(_1) } + .dig("bytes") + .then { Base64.decode64(_1) } + rescue JSON::ParserError, + Aws::EventStream::Errors::MessageChecksumError, + Aws::EventStream::Errors::PreludeChecksumError => e + Rails.logger.error("#{self.class.name}: #{e.message}") + nil + end + + def extract_completion_from(response_raw) + JSON.parse(response_raw, symbolize_names: true)[:completion].to_s + end + + def partials_from(decoded_chunk) + [decoded_chunk] + end + end + end + end +end diff --git a/lib/completions/endpoints/base.rb b/lib/completions/endpoints/base.rb new file mode 100644 index 00000000..bfe9c741 --- /dev/null +++ b/lib/completions/endpoints/base.rb @@ -0,0 +1,167 @@ +# frozen_string_literal: true + +module DiscourseAi + module Completions + module Endpoints + class Base + CompletionFailed = Class.new(StandardError) + TIMEOUT = 60 + + def self.endpoint_for(model_name) + # Order is important. + # Bedrock has priority over Anthropic if creadentials are present. + [ + DiscourseAi::Completions::Endpoints::AwsBedrock, + DiscourseAi::Completions::Endpoints::Anthropic, + DiscourseAi::Completions::Endpoints::OpenAI, + DiscourseAi::Completions::Endpoints::Huggingface, + ].detect(-> { raise DiscourseAi::Completions::LLM::UNKNOWN_MODEL }) do |ek| + ek.can_contact?(model_name) + end + end + + def self.can_contact?(_model_name) + raise NotImplementedError + end + + def initialize(model_name, tokenizer) + @model = model_name + @tokenizer = tokenizer + end + + def perform_completion!(prompt, user, model_params = {}) + @streaming_mode = block_given? + + Net::HTTP.start( + model_uri.host, + model_uri.port, + use_ssl: true, + read_timeout: TIMEOUT, + open_timeout: TIMEOUT, + write_timeout: TIMEOUT, + ) do |http| + response_data = +"" + response_raw = +"" + request_body = prepare_payload(prompt, model_params).to_json + + request = prepare_request(request_body) + + http.request(request) do |response| + if response.code.to_i != 200 + Rails.logger.error( + "#{self.class.name}: status: #{response.code.to_i} - body: #{response.body}", + ) + raise CompletionFailed + end + + log = + AiApiAuditLog.new( + provider_id: provider_id, + user_id: user.id, + raw_request_payload: request_body, + request_tokens: prompt_size(prompt), + ) + + if !@streaming_mode + response_raw = response.read_body + response_data = extract_completion_from(response_raw) + + return response_data + end + + begin + cancelled = false + cancel = lambda { cancelled = true } + + leftover = "" + + response.read_body do |chunk| + if cancelled + http.finish + return + end + + decoded_chunk = decode(chunk) + response_raw << decoded_chunk + + partials_from(leftover + decoded_chunk).each do |raw_partial| + next if cancelled + next if raw_partial.blank? + + begin + partial = extract_completion_from(raw_partial) + leftover = "" + response_data << partial + + yield partial, cancel if partial + rescue JSON::ParserError + leftover = raw_partial + end + end + end + rescue IOError, StandardError + raise if !cancelled + end + + return response_data + ensure + log.raw_response_payload = response_raw + log.response_tokens = tokenizer.size(response_data) + log.save! + + if Rails.env.development? && log + puts "#{self.class.name}: request_tokens #{log.request_tokens} response_tokens #{log.response_tokens}" + end + end + end + end + + def default_options + raise NotImplementedError + end + + def provider_id + raise NotImplementedError + end + + def prompt_size(prompt) + tokenizer.size(extract_prompt_for_tokenizer(prompt)) + end + + attr_reader :tokenizer + + protected + + attr_reader :model + + def model_uri + raise NotImplementedError + end + + def prepare_payload(_prompt, _model_params) + raise NotImplementedError + end + + def prepare_request(_payload) + raise NotImplementedError + end + + def extract_completion_from(_response_raw) + raise NotImplementedError + end + + def decode(chunk) + chunk + end + + def partials_from(_decoded_chunk) + raise NotImplementedError + end + + def extract_prompt_for_tokenizer(prompt) + prompt + end + end + end + end +end diff --git a/lib/completions/endpoints/canned_response.rb b/lib/completions/endpoints/canned_response.rb new file mode 100644 index 00000000..2bdf7226 --- /dev/null +++ b/lib/completions/endpoints/canned_response.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +module DiscourseAi + module Completions + module Endpoints + class CannedResponse + CANNED_RESPONSE_ERROR = Class.new(StandardError) + + def self.can_contact?(_) + Rails.env.test? + end + + def initialize(responses) + @responses = responses + @completions = 0 + end + + attr_reader :responses, :completions + + def perform_completion!(_prompt, _user, _model_params) + response = responses[completions] + if response.nil? + raise CANNED_RESPONSE_ERROR, + "The number of completions you requested exceed the number of canned responses" + end + + @completions += 1 + if block_given? + cancelled = false + cancel_fn = lambda { cancelled = true } + + response.each_char do |char| + break if cancelled + yield(char, cancel_fn) + end + else + response + end + end + + def tokenizer + DiscourseAi::Tokenizer::OpenAiTokenizer + end + end + end + end +end diff --git a/lib/completions/endpoints/hugging_face.rb b/lib/completions/endpoints/hugging_face.rb new file mode 100644 index 00000000..271a3394 --- /dev/null +++ b/lib/completions/endpoints/hugging_face.rb @@ -0,0 +1,75 @@ +# frozen_string_literal: true + +module DiscourseAi + module Completions + module Endpoints + class Huggingface < Base + def self.can_contact?(model_name) + %w[StableBeluga2 Upstage-Llama-2-*-instruct-v2 Llama2-*-chat-hf].include?(model_name) + end + + def default_options + { parameters: { repetition_penalty: 1.1, temperature: 0.7 } } + end + + def provider_id + AiApiAuditLog::Provider::HuggingFaceTextGeneration + end + + private + + def model_uri + URI(SiteSetting.ai_hugging_face_api_url).tap do |uri| + uri.path = @streaming_mode ? "/generate_stream" : "/generate" + end + end + + def prepare_payload(prompt, model_params) + default_options + .merge(inputs: prompt) + .tap do |payload| + payload[:parameters].merge!(model_params) + + token_limit = 2_000 || SiteSetting.ai_hugging_face_token_limit + + payload[:parameters][:max_new_tokens] = token_limit - prompt_size(prompt) + end + end + + def prepare_request(payload) + headers = + { "Content-Type" => "application/json" }.tap do |h| + if SiteSetting.ai_hugging_face_api_key.present? + h["Authorization"] = "Bearer #{SiteSetting.ai_hugging_face_api_key}" + end + end + + Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload } + end + + def extract_completion_from(response_raw) + parsed = JSON.parse(response_raw, symbolize_names: true) + + if @streaming_mode + # Last chunk contains full response, which we already yielded. + return if parsed.dig(:token, :special) + + parsed.dig(:token, :text).to_s + else + parsed[:generated_text].to_s + end + end + + def partials_from(decoded_chunk) + decoded_chunk + .split("\n") + .map do |line| + data = line.split("data: ", 2)[1] + data&.squish == "[DONE]" ? nil : data + end + .compact + end + end + end + end +end diff --git a/lib/completions/endpoints/open_ai.rb b/lib/completions/endpoints/open_ai.rb new file mode 100644 index 00000000..3388c00c --- /dev/null +++ b/lib/completions/endpoints/open_ai.rb @@ -0,0 +1,92 @@ +# frozen_string_literal: true + +module DiscourseAi + module Completions + module Endpoints + class OpenAI < Base + def self.can_contact?(model_name) + %w[gpt-3.5-turbo gpt-4 gpt-3.5-turbo-16k gpt-4-32k].include?(model_name) + end + + def default_options + { model: model } + end + + def provider_id + AiApiAuditLog::Provider::OpenAI + end + + private + + def model_uri + url = + if model.include?("gpt-4") + if model.include?("32k") + SiteSetting.ai_openai_gpt4_32k_url + else + SiteSetting.ai_openai_gpt4_url + end + else + if model.include?("16k") + SiteSetting.ai_openai_gpt35_16k_url + else + SiteSetting.ai_openai_gpt35_url + end + end + + URI(url) + end + + def prepare_payload(prompt, model_params) + default_options + .merge(model_params) + .merge(messages: prompt) + .tap { |payload| payload[:stream] = true if @streaming_mode } + end + + def prepare_request(payload) + headers = + { "Content-Type" => "application/json" }.tap do |h| + if model_uri.host.include?("azure") + h["api-key"] = SiteSetting.ai_openai_api_key + else + h["Authorization"] = "Bearer #{SiteSetting.ai_openai_api_key}" + end + + if SiteSetting.ai_openai_organization.present? + h["OpenAI-Organization"] = SiteSetting.ai_openai_organization + end + end + + Net::HTTP::Post.new(model_uri, headers).tap { |r| r.body = payload } + end + + def extract_completion_from(response_raw) + parsed = JSON.parse(response_raw, symbolize_names: true) + + ( + if @streaming_mode + parsed.dig(:choices, 0, :delta, :content) + else + parsed.dig(:choices, 0, :message, :content) + end + ).to_s + end + + def partials_from(decoded_chunk) + decoded_chunk + .split("\n") + .map do |line| + data = line.split("data: ", 2)[1] + data == "[DONE]" ? nil : data + end + .compact + end + + def extract_prompt_for_tokenizer(prompt) + prompt.map { |message| message[:content] || message["content"] || "" }.join("\n") + end + end + end + end +end diff --git a/lib/completions/entry_point.rb b/lib/completions/entry_point.rb new file mode 100644 index 00000000..fa3d2ba6 --- /dev/null +++ b/lib/completions/entry_point.rb @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +module DiscourseAi + module Completions + class EntryPoint + def load_files + require_relative "dialects/chat_gpt" + require_relative "dialects/llama2_classic" + require_relative "dialects/orca_style" + require_relative "dialects/claude" + + require_relative "endpoints/canned_response" + require_relative "endpoints/base" + require_relative "endpoints/anthropic" + require_relative "endpoints/aws_bedrock" + require_relative "endpoints/open_ai" + require_relative "endpoints/hugging_face" + + require_relative "llm" + end + + def inject_into(_) + end + end + end +end diff --git a/lib/completions/llm.rb b/lib/completions/llm.rb new file mode 100644 index 00000000..7210aad2 --- /dev/null +++ b/lib/completions/llm.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +# A facade that abstracts multiple LLMs behind a single interface. +# +# Internally, it consists of the combination of a dialect and an endpoint. +# After recieving a prompt using our generic format, it translates it to +# the target model and routes the completion request through the correct gateway. +# +# Use the .proxy method to instantiate an object. +# It chooses the best dialect and endpoint for the model you want to interact with. +# +# Tests of modules that perform LLM calls can use .with_prepared_responses to return canned responses +# instead of relying on WebMock stubs like we did in the past. +# +module DiscourseAi + module Completions + class LLM + UNKNOWN_MODEL = Class.new(StandardError) + + def self.with_prepared_responses(responses) + @canned_response = DiscourseAi::Completions::Endpoints::CannedResponse.new(responses) + + yield(@canned_response).tap { @canned_response = nil } + end + + def self.proxy(model_name) + dialects = [ + DiscourseAi::Completions::Dialects::Claude, + DiscourseAi::Completions::Dialects::Llama2Classic, + DiscourseAi::Completions::Dialects::ChatGPT, + DiscourseAi::Completions::Dialects::OrcaStyle, + ] + + dialect = + dialects.detect(-> { raise UNKNOWN_MODEL }) { |d| d.can_translate?(model_name) }.new + + return new(dialect, @canned_response, model_name) if @canned_response + + gateway = + DiscourseAi::Completions::Endpoints::Base.endpoint_for(model_name).new( + model_name, + dialect.tokenizer, + ) + + new(dialect, gateway, model_name) + end + + def initialize(dialect, gateway, model_name) + @dialect = dialect + @gateway = gateway + @model_name = model_name + end + + delegate :tokenizer, to: :dialect + + # @param generic_prompt { Hash } - Prompt using our generic format. + # We use the following keys from the hash: + # - insts: String with instructions for the LLM. + # - input: String containing user input + # - examples (optional): Array of arrays with examples of input and responses. Each array is a input/response pair like [[example1, response1], [example2, response2]]. + # - post_insts (optional): Additional instructions for the LLM. Some dialects like Claude add these at the end of the prompt. + # + # @param user { User } - User requesting the summary. + # + # @param &on_partial_blk { Block - Optional } - The passed block will get called with the LLM partial response alongside a cancel function. + # + # @returns { String } - Completion result. + def completion!(generic_prompt, user, &partial_read_blk) + prompt = dialect.translate(generic_prompt) + + model_params = generic_prompt.dig(:params, model_name) || {} + + gateway.perform_completion!(prompt, user, model_params, &partial_read_blk) + end + + private + + attr_reader :dialect, :gateway, :model_name + end + end +end diff --git a/lib/modules/embeddings/entry_point.rb b/lib/modules/embeddings/entry_point.rb index c2582930..bb054bdb 100644 --- a/lib/modules/embeddings/entry_point.rb +++ b/lib/modules/embeddings/entry_point.rb @@ -15,11 +15,6 @@ module DiscourseAi require_relative "semantic_related" require_relative "semantic_topic_query" - require_relative "hyde_generators/base" - require_relative "hyde_generators/openai" - require_relative "hyde_generators/anthropic" - require_relative "hyde_generators/llama2" - require_relative "hyde_generators/llama2_ftos" require_relative "semantic_search" end diff --git a/lib/modules/embeddings/hyde_generators/anthropic.rb b/lib/modules/embeddings/hyde_generators/anthropic.rb deleted file mode 100644 index 7a1e87ff..00000000 --- a/lib/modules/embeddings/hyde_generators/anthropic.rb +++ /dev/null @@ -1,37 +0,0 @@ -# frozen_string_literal: true - -module DiscourseAi - module Embeddings - module HydeGenerators - class Anthropic < DiscourseAi::Embeddings::HydeGenerators::Base - def prompt(search_term) - <<~TEXT - Human: Given a search term given between tags, generate a forum post about a given subject. - #{basic_prompt_instruction} - #{search_term} - - Respond with the generated post between tags. - - Assistant:\n - TEXT - end - - def models - %w[claude-instant-1 claude-2] - end - - def hypothetical_post_from(query) - response = - ::DiscourseAi::Inference::AnthropicCompletions.perform!( - prompt(query), - SiteSetting.ai_embeddings_semantic_search_hyde_model, - max_tokens: 400, - stop_sequences: [""], - ).dig(:completion) - - Nokogiri::HTML5.fragment(response).at("ai").text - end - end - end - end -end diff --git a/lib/modules/embeddings/hyde_generators/base.rb b/lib/modules/embeddings/hyde_generators/base.rb deleted file mode 100644 index be291b03..00000000 --- a/lib/modules/embeddings/hyde_generators/base.rb +++ /dev/null @@ -1,28 +0,0 @@ -# frozen_string_literal: true - -module DiscourseAi - module Embeddings - module HydeGenerators - class Base - def self.current_hyde_model - DiscourseAi::Embeddings::HydeGenerators::Base.descendants.find do |generator_klass| - generator_klass.new.models.include?( - SiteSetting.ai_embeddings_semantic_search_hyde_model, - ) - end - end - - def basic_prompt_instruction - <<~TEXT - Act as a content writer for a forum. - The forum description is as follows: - #{SiteSetting.title} - #{SiteSetting.site_description} - - Given the forum description write a forum post about the following subject: - TEXT - end - end - end - end -end diff --git a/lib/modules/embeddings/hyde_generators/llama2.rb b/lib/modules/embeddings/hyde_generators/llama2.rb deleted file mode 100644 index 86ca977a..00000000 --- a/lib/modules/embeddings/hyde_generators/llama2.rb +++ /dev/null @@ -1,35 +0,0 @@ -# frozen_string_literal: true - -module DiscourseAi - module Embeddings - module HydeGenerators - class Llama2 < DiscourseAi::Embeddings::HydeGenerators::Base - def prompt(search_term) - <<~TEXT - [INST] <> - You are a helpful bot - You create forum posts about a given subject - <> - - #{basic_prompt_instruction} - #{search_term} - [/INST] - Here is a forum post about the above subject: - TEXT - end - - def models - ["Llama2-*-chat-hf"] - end - - def hypothetical_post_from(query) - ::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!( - prompt(query), - SiteSetting.ai_embeddings_semantic_search_hyde_model, - token_limit: 400, - ).dig(:generated_text) - end - end - end - end -end diff --git a/lib/modules/embeddings/hyde_generators/llama2_ftos.rb b/lib/modules/embeddings/hyde_generators/llama2_ftos.rb deleted file mode 100644 index e5222e78..00000000 --- a/lib/modules/embeddings/hyde_generators/llama2_ftos.rb +++ /dev/null @@ -1,28 +0,0 @@ -# frozen_string_literal: true - -module DiscourseAi - module Embeddings - module HydeGenerators - class Llama2Ftos < DiscourseAi::Embeddings::HydeGenerators::Llama2 - def prompt(search_term) - <<~TEXT - ### System: - You are a helpful bot - You create forum posts about a given subject - - ### User: - #{basic_prompt_instruction} - #{search_term} - - ### Assistant: - Here is a forum post about the above subject: - TEXT - end - - def models - %w[StableBeluga2 Upstage-Llama-2-*-instruct-v2] - end - end - end - end -end diff --git a/lib/modules/embeddings/hyde_generators/openai.rb b/lib/modules/embeddings/hyde_generators/openai.rb deleted file mode 100644 index 75ba2919..00000000 --- a/lib/modules/embeddings/hyde_generators/openai.rb +++ /dev/null @@ -1,31 +0,0 @@ -# frozen_string_literal: true - -module DiscourseAi - module Embeddings - module HydeGenerators - class OpenAi < DiscourseAi::Embeddings::HydeGenerators::Base - def prompt(search_term) - [ - { - role: "system", - content: "You are a helpful bot. You create forum posts about a given subject.", - }, - { role: "user", content: "#{basic_prompt_instruction}\n#{search_term}" }, - ] - end - - def models - %w[gpt-3.5-turbo gpt-4] - end - - def hypothetical_post_from(query) - ::DiscourseAi::Inference::OpenAiCompletions.perform!( - prompt(query), - SiteSetting.ai_embeddings_semantic_search_hyde_model, - max_tokens: 400, - ).dig(:choices, 0, :message, :content) - end - end - end - end -end diff --git a/lib/modules/embeddings/semantic_search.rb b/lib/modules/embeddings/semantic_search.rb index 0a2d5ae2..b56004fe 100644 --- a/lib/modules/embeddings/semantic_search.rb +++ b/lib/modules/embeddings/semantic_search.rb @@ -55,10 +55,7 @@ module DiscourseAi hypothetical_post = Discourse .cache - .fetch(hyde_key, expires_in: 1.week) do - hyde_generator = DiscourseAi::Embeddings::HydeGenerators::Base.current_hyde_model.new - hyde_generator.hypothetical_post_from(search_term) - end + .fetch(hyde_key, expires_in: 1.week) { hypothetical_post_from(search_term) } hypothetical_post_embedding = Discourse @@ -96,6 +93,30 @@ module DiscourseAi def build_embedding_key(digest, hyde_model, embedding_model) "#{build_hyde_key(digest, hyde_model)}-#{embedding_model}" end + + def hypothetical_post_from(search_term) + prompt = { + insts: <<~TEXT, + You are a content creator for a forum. The forum description is as follows: + #{SiteSetting.title} + #{SiteSetting.site_description} + Given the forum description write a forum post about the following subject: + TEXT + input: <<~TEXT, + Using this description, write a forum post about the subject inside the XML tags: + + #{search_term} + TEXT + post_insts: "Put the forum post between tags.", + } + + llm_response = + DiscourseAi::Completions::LLM.proxy( + SiteSetting.ai_embeddings_semantic_search_hyde_model, + ).completion!(prompt, @guardian.user) + + Nokogiri::HTML5.fragment(llm_response).at("ai").text + end end end end diff --git a/lib/modules/summarization/entry_point.rb b/lib/modules/summarization/entry_point.rb index 50e7ea32..ad582637 100644 --- a/lib/modules/summarization/entry_point.rb +++ b/lib/modules/summarization/entry_point.rb @@ -21,7 +21,6 @@ module DiscourseAi Models::OpenAi.new("gpt-4-32k", max_tokens: 32_768), Models::OpenAi.new("gpt-3.5-turbo", max_tokens: 4096), Models::OpenAi.new("gpt-3.5-turbo-16k", max_tokens: 16_384), - Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384), Models::Anthropic.new("claude-2", max_tokens: 100_000), Models::Anthropic.new("claude-instant-1", max_tokens: 100_000), Models::Llama2.new("Llama2-chat-hf", max_tokens: SiteSetting.ai_hugging_face_token_limit), @@ -36,6 +35,7 @@ module DiscourseAi end truncable_models = [ + Models::Discourse.new("long-t5-tglobal-base-16384-book-summary", max_tokens: 16_384), Models::Discourse.new("bart-large-cnn-samsum", max_tokens: 1024), Models::Discourse.new("flan-t5-base-samsum", max_tokens: 512), ] diff --git a/lib/modules/summarization/models/anthropic.rb b/lib/modules/summarization/models/anthropic.rb index 151545a3..2ce5ff61 100644 --- a/lib/modules/summarization/models/anthropic.rb +++ b/lib/modules/summarization/models/anthropic.rb @@ -19,109 +19,6 @@ module DiscourseAi setting: "ai_anthropic_api_key", ) end - - def concatenate_summaries(summaries, &on_partial_blk) - instructions = <<~TEXT - Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative. - Include only the summary inside tags. - TEXT - - instructions += summaries.reduce("") { |m, s| m += "#{s}\n" } - instructions += "Assistant:\n" - - completion(instructions, &on_partial_blk) - end - - def summarize_with_truncation(contents, opts, &on_partial_blk) - instructions = build_base_prompt(opts) - - text_to_summarize = contents.map { |c| format_content_item(c) }.join - truncated_content = tokenizer.truncate(text_to_summarize, available_tokens) - - instructions += "#{truncated_content}\nAssistant:\n" - - completion(instructions, &on_partial_blk) - end - - def summarize_single(chunk_text, opts, &on_partial_blk) - summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk) - end - - private - - def summarize_chunk(chunk_text, opts, &on_partial_blk) - completion( - build_base_prompt(opts) + "#{chunk_text}\nAssistant:\n", - &on_partial_blk - ) - end - - def build_base_prompt(opts) - initial_instruction = - if opts[:single_chunk] - "Summarize the following forum discussion inside the given tag, creating a cohesive narrative." - else - "Summarize the following forum discussion inside the given tag." - end - - base_prompt = <<~TEXT - Human: #{initial_instruction} - Try to keep the summary in the same language as the forum discussion. - Format the response, including links, using markdown. - TEXT - - base_prompt += <<~TEXT if opts[:resource_path] - Try generating links as well the format is #{opts[:resource_path]}/POST_ID - For example, a link to the 3rd post in the topic would be [post 3](#{opts[:resource_path]}/3) - TEXT - - base_prompt += "Wrap the whole the summary inside tags.\n" - - base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[ - :content_title - ] - - base_prompt += "Don't use more than 400 words.\n" unless opts[:single_chunk] - - base_prompt - end - - def completion(prompt, &on_partial_blk) - # We need to discard any text that might come before the tag. - # Instructing the model to reply only with the summary seems impossible. - pre_tag_partial = +"" - - if on_partial_blk - on_partial_read = - Proc.new do |partial| - if pre_tag_partial.include?("") - on_partial_blk.call(partial[:completion]) - else - pre_tag_partial << partial[:completion] - end - end - - response = - ::DiscourseAi::Inference::AnthropicCompletions.perform!( - prompt, - model, - &on_partial_read - ) - else - response = - ::DiscourseAi::Inference::AnthropicCompletions.perform!(prompt, model).dig( - :completion, - ) - end - - Nokogiri::HTML5.fragment(response).at("ai")&.text.presence || response - end - - def tokenizer - DiscourseAi::Tokenizer::AnthropicTokenizer - end - - attr_reader :max_tokens end end end diff --git a/lib/modules/summarization/models/base.rb b/lib/modules/summarization/models/base.rb index 1ce220fb..00e4b84f 100644 --- a/lib/modules/summarization/models/base.rb +++ b/lib/modules/summarization/models/base.rb @@ -21,29 +21,6 @@ module DiscourseAi raise NotImplemented end - def summarize_in_chunks(chunks, opts) - chunks.map do |chunk| - chunk[:summary] = summarize_chunk(chunk[:summary], opts) - chunk - end - end - - def concatenate_summaries(_summaries) - raise NotImplemented - end - - def summarize_with_truncation(_contents, _opts) - raise NotImplemented - end - - def summarize_single(chunk_text, opts) - raise NotImplemented - end - - def format_content_item(item) - "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " - end - def available_tokens max_tokens - reserved_tokens end @@ -57,16 +34,6 @@ module DiscourseAi # ~500 words 700 end - - def summarize_chunk(_chunk_text, _opts) - raise NotImplemented - end - - def tokenizer - raise NotImplemented - end - - delegate :can_expand_tokens?, to: :tokenizer end end end diff --git a/lib/modules/summarization/models/discourse.rb b/lib/modules/summarization/models/discourse.rb index 79669aa6..c37c6cb9 100644 --- a/lib/modules/summarization/models/discourse.rb +++ b/lib/modules/summarization/models/discourse.rb @@ -22,44 +22,11 @@ module DiscourseAi ) end - def concatenate_summaries(summaries) - completion(summaries.join("\n")) - end - - def summarize_with_truncation(contents, opts) - text_to_summarize = contents.map { |c| format_content_item(c) }.join - truncated_content = - ::DiscourseAi::Tokenizer::BertTokenizer.truncate(text_to_summarize, available_tokens) - - completion(truncated_content) - end - - def summarize_single(chunk_text, _opts) - completion(chunk_text) - end - private - def summarize_chunk(chunk_text, _opts) - completion(chunk_text) - end - def reserved_tokens 0 end - - def completion(prompt) - ::DiscourseAi::Inference::DiscourseClassifier.perform!( - "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify", - model, - prompt, - SiteSetting.ai_summarization_discourse_service_api_key, - ).dig(:summary_text) - end - - def tokenizer - DiscourseAi::Tokenizer::BertTokenizer - end end end end diff --git a/lib/modules/summarization/models/llama2.rb b/lib/modules/summarization/models/llama2.rb index 1dc96088..4942ae5c 100644 --- a/lib/modules/summarization/models/llama2.rb +++ b/lib/modules/summarization/models/llama2.rb @@ -19,104 +19,6 @@ module DiscourseAi setting: "ai_hugging_face_api_url", ) end - - def concatenate_summaries(summaries, &on_partial_blk) - prompt = <<~TEXT - [INST] <> - You are a helpful bot - <> - - Concatenate these disjoint summaries, creating a cohesive narrative: - #{summaries.join("\n")} [/INST] - TEXT - - completion(prompt, &on_partial_blk) - end - - def summarize_with_truncation(contents, opts, &on_partial_blk) - text_to_summarize = contents.map { |c| format_content_item(c) }.join - truncated_content = tokenizer.truncate(text_to_summarize, available_tokens) - - prompt = <<~TEXT - [INST] <> - #{build_base_prompt(opts)} - <> - - Summarize the following in up to 400 words: - #{truncated_content} [/INST] - Here is a summary of the above topic: - TEXT - - completion(prompt, &on_partial_blk) - end - - def summarize_single(chunk_text, opts, &on_partial_blk) - summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk) - end - - private - - def summarize_chunk(chunk_text, opts, &on_partial_blk) - summary_instruction = - if opts[:single_chunk] - "Summarize the following forum discussion, creating a cohesive narrative:" - else - "Summarize the following in up to 400 words:" - end - - prompt = <<~TEXT - [INST] <> - #{build_base_prompt(opts)} - <> - - #{summary_instruction} - #{chunk_text} [/INST] - Here is a summary of the above topic: - TEXT - - completion(prompt, &on_partial_blk) - end - - def build_base_prompt(opts) - base_prompt = <<~TEXT - You are a summarization bot. - You effectively summarise any text and reply ONLY with ONLY the summarized text. - You condense it into a shorter version. - You understand and generate Discourse forum Markdown. - TEXT - - if opts[:resource_path] - base_prompt += - "Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)\n" - end - - base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[ - :content_title - ] - - base_prompt - end - - def completion(prompt, &on_partial_blk) - if on_partial_blk - on_partial_read = - Proc.new { |partial| on_partial_blk.call(partial.dig(:token, :text).to_s) } - - ::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!( - prompt, - model, - &on_partial_read - ) - else - ::DiscourseAi::Inference::HuggingFaceTextGeneration.perform!(prompt, model).dig( - :generated_text, - ) - end - end - - def tokenizer - DiscourseAi::Tokenizer::Llama2Tokenizer - end end end end diff --git a/lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb b/lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb index acc682f6..81ff6bda 100644 --- a/lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb +++ b/lib/modules/summarization/models/llama2_fine_tuned_orca_style.rb @@ -7,65 +7,6 @@ module DiscourseAi def display_name "Llama2FineTunedOrcaStyle's #{SiteSetting.ai_hugging_face_model_display_name.presence || model}" end - - def concatenate_summaries(summaries, &on_partial_blk) - prompt = <<~TEXT - ### System: - You are a helpful bot - - ### User: - Concatenate these disjoint summaries, creating a cohesive narrative: - #{summaries.join("\n")} - - ### Assistant: - TEXT - - completion(prompt, &on_partial_blk) - end - - def summarize_with_truncation(contents, opts, &on_partial_blk) - text_to_summarize = contents.map { |c| format_content_item(c) }.join - truncated_content = tokenizer.truncate(text_to_summarize, available_tokens) - - prompt = <<~TEXT - ### System: - #{build_base_prompt(opts)} - - ### User: - Summarize the following in up to 400 words: - #{truncated_content} - - ### Assistant: - Here is a summary of the above topic: - TEXT - - completion(prompt, &on_partial_blk) - end - - private - - def summarize_chunk(chunk_text, opts, &on_partial_blk) - summary_instruction = - if opts[:single_chunk] - "Summarize the following forum discussion, creating a cohesive narrative:" - else - "Summarize the following in up to 400 words:" - end - - prompt = <<~TEXT - ### System: - #{build_base_prompt(opts)} - - ### User: - #{summary_instruction} - #{chunk_text} - - ### Assistant: - Here is a summary of the above topic: - TEXT - - completion(prompt, &on_partial_blk) - end end end end diff --git a/lib/modules/summarization/models/open_ai.rb b/lib/modules/summarization/models/open_ai.rb index 79bd63dd..121d71f5 100644 --- a/lib/modules/summarization/models/open_ai.rb +++ b/lib/modules/summarization/models/open_ai.rb @@ -19,100 +19,6 @@ module DiscourseAi setting: "ai_openai_api_key", ) end - - def concatenate_summaries(summaries, &on_partial_blk) - messages = [ - { role: "system", content: "You are a helpful bot" }, - { - role: "user", - content: - "Concatenate these disjoint summaries, creating a cohesive narrative. Keep the summary in the same language used in the text below.\n#{summaries.join("\n")}", - }, - ] - - completion(messages, &on_partial_blk) - end - - def summarize_with_truncation(contents, opts, &on_partial_blk) - messages = [{ role: "system", content: build_base_prompt(opts) }] - - text_to_summarize = contents.map { |c| format_content_item(c) }.join - truncated_content = tokenizer.truncate(text_to_summarize, available_tokens) - - messages << { - role: "user", - content: - "Summarize the following in 400 words. Keep the summary in the same language used in the text below.\n#{truncated_content}", - } - - completion(messages, &on_partial_blk) - end - - def summarize_single(chunk_text, opts, &on_partial_blk) - summarize_chunk(chunk_text, opts.merge(single_chunk: true), &on_partial_blk) - end - - private - - def summarize_chunk(chunk_text, opts, &on_partial_blk) - summary_instruction = - if opts[:single_chunk] - "Summarize the following forum discussion, creating a cohesive narrative. Keep the summary in the same language used in the text below." - else - "Summarize the following in 400 words. Keep the summary in the same language used in the text below." - end - - completion( - [ - { role: "system", content: build_base_prompt(opts) }, - { role: "user", content: "#{summary_instruction}\n#{chunk_text}" }, - ], - &on_partial_blk - ) - end - - def build_base_prompt(opts) - base_prompt = <<~TEXT - You are a summarization bot. - You effectively summarise any text and reply ONLY with ONLY the summarized text. - You condense it into a shorter version. - You understand and generate Discourse forum Markdown. - You format the response, including links, using markdown. - TEXT - - if opts[:resource_path] - base_prompt += - "Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77)\n" - end - - base_prompt += "The discussion title is: #{opts[:content_title]}.\n" if opts[ - :content_title - ] - - base_prompt - end - - def completion(prompt, &on_partial_blk) - if on_partial_blk - on_partial_read = - Proc.new do |partial| - on_partial_blk.call(partial.dig(:choices, 0, :delta, :content).to_s) - end - - ::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model, &on_partial_read) - else - ::DiscourseAi::Inference::OpenAiCompletions.perform!(prompt, model).dig( - :choices, - 0, - :message, - :content, - ) - end - end - - def tokenizer - DiscourseAi::Tokenizer::OpenAiTokenizer - end end end end diff --git a/lib/modules/summarization/strategies/fold_content.rb b/lib/modules/summarization/strategies/fold_content.rb index a8d5bd11..5c646ef3 100644 --- a/lib/modules/summarization/strategies/fold_content.rb +++ b/lib/modules/summarization/strategies/fold_content.rb @@ -16,22 +16,29 @@ module DiscourseAi :model, to: :completion_model - def summarize(content, _user, &on_partial_blk) + def summarize(content, user, &on_partial_blk) opts = content.except(:contents) - chunks = split_into_chunks(content[:contents]) + llm = DiscourseAi::Completions::LLM.proxy(completion_model.model) + + chunks = split_into_chunks(llm.tokenizer, content[:contents]) if chunks.length == 1 { - summary: - completion_model.summarize_single(chunks.first[:summary], opts, &on_partial_blk), + summary: summarize_single(llm, chunks.first[:summary], user, opts, &on_partial_blk), chunks: [], } else - summaries = completion_model.summarize_in_chunks(chunks, opts) + summaries = summarize_in_chunks(llm, chunks, user, opts) { - summary: completion_model.concatenate_summaries(summaries, &on_partial_blk), + summary: + concatenate_summaries( + llm, + summaries.map { |s| s[:summary] }, + user, + &on_partial_blk + ), chunks: summaries, } end @@ -39,14 +46,18 @@ module DiscourseAi private - def split_into_chunks(contents) + def format_content_item(item) + "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " + end + + def split_into_chunks(tokenizer, contents) section = { ids: [], summary: "" } chunks = contents.reduce([]) do |sections, item| - new_content = completion_model.format_content_item(item) + new_content = format_content_item(item) - if completion_model.can_expand_tokens?( + if tokenizer.can_expand_tokens?( section[:summary], new_content, completion_model.available_tokens, @@ -65,6 +76,71 @@ module DiscourseAi chunks end + + def summarize_single(llm, text, user, opts, &on_partial_blk) + prompt = summarization_prompt(text, opts) + + llm.completion!(prompt, user, &on_partial_blk) + end + + def summarize_in_chunks(llm, chunks, user, opts) + chunks.map do |chunk| + prompt = summarization_prompt(chunk[:summary], opts) + prompt[:post_insts] = "Don't use more than 400 words for the summary." + + chunk[:summary] = llm.completion!(prompt, user) + chunk + end + end + + def concatenate_summaries(llm, summaries, user, &on_partial_blk) + prompt = summarization_prompt(summaries.join("\n"), {}) + prompt[:insts] = <<~TEXT + You are a bot that can concatenate disjoint summaries, creating a cohesive narrative. + Keep the resulting summary in the same language used in the text below. + TEXT + + llm.completion!(prompt, user, &on_partial_blk) + end + + def summarization_prompt(input, opts) + insts = <<~TEXT + You are a summarization bot that effectively summarize any text, creating a cohesive narrative. + Your replies contain ONLY a summarized version of the text I provided and you, using the same language. + You understand and generate Discourse forum Markdown. + You format the response, including links, using Markdown. + TEXT + + insts += <<~TEXT if opts[:resource_path] + Each message is formatted as ") " + Append to #{opts[:resource_path]} when linking posts. + TEXT + + insts += "The discussion title is: #{opts[:content_title]}.\n" if opts[:content_title] + + prompt = { insts: insts, input: <<~TEXT } + Here is the text, inside XML tags: + + + #{input} + + TEXT + + if opts[:resource_path] + prompt[:examples] = [ + [ + "(1 user1 said: I love Mondays 2) user2 said: I hate Mondays", + "Two users are sharing their feelings toward Mondays. [user1](#{opts[:resource_path]}/1) hates them, while [user2](#{opts[:resource_path]}/2) loves them.", + ], + [ + "3) usuario1: Amo los lunes 6) usuario2: Odio los lunes", + "Dos usuarios charlan sobre los lunes. [usuario1](#{opts[:resource_path]}/3) dice que los ama, mientras que [usuario2](#{opts[:resource_path]}/2) los odia.", + ], + ] + end + + prompt + end end end end diff --git a/lib/modules/summarization/strategies/truncate_content.rb b/lib/modules/summarization/strategies/truncate_content.rb index c26f2e8a..6e4c368a 100644 --- a/lib/modules/summarization/strategies/truncate_content.rb +++ b/lib/modules/summarization/strategies/truncate_content.rb @@ -25,6 +25,32 @@ module DiscourseAi chunks: [], } end + + private + + def format_content_item(item) + "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " + end + + def summarize_with_truncation(contents, opts) + text_to_summarize = contents.map { |c| format_content_item(c) }.join + truncated_content = + ::DiscourseAi::Tokenizer::BertTokenizer.truncate( + text_to_summarize, + completion_model.available_tokens, + ) + + completion(truncated_content) + end + + def completion(prompt) + ::DiscourseAi::Inference::DiscourseClassifier.perform!( + "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify", + completion_model.model, + prompt, + SiteSetting.ai_summarization_discourse_service_api_key, + ).dig(:summary_text) + end end end end diff --git a/plugin.rb b/plugin.rb index 0eb0d8ee..a4022c62 100644 --- a/plugin.rb +++ b/plugin.rb @@ -52,6 +52,8 @@ after_initialize do require_relative "lib/shared/database/connection" + require_relative "lib/completions/entry_point" + require_relative "lib/modules/nsfw/entry_point" require_relative "lib/modules/toxicity/entry_point" require_relative "lib/modules/sentiment/entry_point" @@ -64,6 +66,7 @@ after_initialize do add_admin_route "discourse_ai.title", "discourse-ai" [ + DiscourseAi::Completions::EntryPoint.new, DiscourseAi::Embeddings::EntryPoint.new, DiscourseAi::NSFW::EntryPoint.new, DiscourseAi::Toxicity::EntryPoint.new, diff --git a/spec/lib/completions/dialects/chat_gpt_spec.rb b/spec/lib/completions/dialects/chat_gpt_spec.rb new file mode 100644 index 00000000..599f1113 --- /dev/null +++ b/spec/lib/completions/dialects/chat_gpt_spec.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +RSpec.describe DiscourseAi::Completions::Dialects::ChatGPT do + subject(:dialect) { described_class.new } + + let(:prompt) do + { + insts: <<~TEXT, + I want you to act as a title generator for written pieces. I will provide you with a text, + and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words, + and ensure that the meaning is maintained. Replies will utilize the language type of the topic. + TEXT + input: <<~TEXT, + Here is the text, inside XML tags: + + To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends, + discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer + defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry. + + Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires, + a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and + slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he + dies so that a scene may be repeated. + + TEXT + post_insts: + "Please put the translation between tags and separate each title with a comma.", + } + end + + describe "#translate" do + it "translates a prompt written in our generic format to the ChatGPT format" do + open_ai_version = [ + { role: "system", content: [prompt[:insts], prompt[:post_insts]].join("\n") }, + { role: "user", content: prompt[:input] }, + ] + + translated = dialect.translate(prompt) + + expect(translated).to contain_exactly(*open_ai_version) + end + + it "include examples in the ChatGPT version" do + prompt[:examples] = [ + [ + "In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.", + "The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.", + ], + ] + + open_ai_version = [ + { role: "system", content: [prompt[:insts], prompt[:post_insts]].join("\n") }, + { role: "user", content: prompt[:examples][0][0] }, + { role: "assistant", content: prompt[:examples][0][1] }, + { role: "user", content: prompt[:input] }, + ] + + translated = dialect.translate(prompt) + + expect(translated).to contain_exactly(*open_ai_version) + end + end +end diff --git a/spec/lib/completions/dialects/claude_spec.rb b/spec/lib/completions/dialects/claude_spec.rb new file mode 100644 index 00000000..d26dd570 --- /dev/null +++ b/spec/lib/completions/dialects/claude_spec.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +RSpec.describe DiscourseAi::Completions::Dialects::Claude do + subject(:dialect) { described_class.new } + + let(:prompt) do + { + insts: <<~TEXT, + I want you to act as a title generator for written pieces. I will provide you with a text, + and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words, + and ensure that the meaning is maintained. Replies will utilize the language type of the topic. + TEXT + input: <<~TEXT, + Here is the text, inside XML tags: + + To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends, + discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer + defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry. + + Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires, + a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and + slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he + dies so that a scene may be repeated. + + TEXT + post_insts: + "Please put the translation between tags and separate each title with a comma.", + } + end + + describe "#translate" do + it "translates a prompt written in our generic format to Claude's format" do + anthropic_version = <<~TEXT + Human: #{prompt[:insts]} + #{prompt[:input]} + #{prompt[:post_insts]} + Assistant: + TEXT + + translated = dialect.translate(prompt) + + expect(translated).to eq(anthropic_version) + end + + it "knows how to translate examples to Claude's format" do + prompt[:examples] = [ + [ + "In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.", + "The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.", + ], + ] + anthropic_version = <<~TEXT + Human: #{prompt[:insts]} + + H: #{prompt[:examples][0][0]} + A: #{prompt[:examples][0][1]} + + #{prompt[:input]} + #{prompt[:post_insts]} + Assistant: + TEXT + + translated = dialect.translate(prompt) + + expect(translated).to eq(anthropic_version) + end + end +end diff --git a/spec/lib/completions/dialects/llama2_classic_spec.rb b/spec/lib/completions/dialects/llama2_classic_spec.rb new file mode 100644 index 00000000..2b1d93a2 --- /dev/null +++ b/spec/lib/completions/dialects/llama2_classic_spec.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +RSpec.describe DiscourseAi::Completions::Dialects::Llama2Classic do + subject(:dialect) { described_class.new } + + let(:prompt) do + { + insts: <<~TEXT, + I want you to act as a title generator for written pieces. I will provide you with a text, + and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words, + and ensure that the meaning is maintained. Replies will utilize the language type of the topic. + TEXT + input: <<~TEXT, + Here is the text, inside XML tags: + + To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends, + discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer + defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry. + + Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires, + a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and + slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he + dies so that a scene may be repeated. + + TEXT + post_insts: + "Please put the translation between tags and separate each title with a comma.", + } + end + + describe "#translate" do + it "translates a prompt written in our generic format to the Llama2 format" do + llama2_classic_version = <<~TEXT + [INST]<>#{[prompt[:insts], prompt[:post_insts]].join("\n")}<>[/INST] + [INST]#{prompt[:input]}[/INST] + TEXT + + translated = dialect.translate(prompt) + + expect(translated).to eq(llama2_classic_version) + end + + it "includes examples in the translation" do + prompt[:examples] = [ + [ + "In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.", + "The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.", + ], + ] + + llama2_classic_version = <<~TEXT + [INST]<>#{[prompt[:insts], prompt[:post_insts]].join("\n")}<>[/INST] + [INST]#{prompt[:examples][0][0]}[/INST] + #{prompt[:examples][0][1]} + [INST]#{prompt[:input]}[/INST] + TEXT + + translated = dialect.translate(prompt) + + expect(translated).to eq(llama2_classic_version) + end + end +end diff --git a/spec/lib/completions/dialects/orca_style_spec.rb b/spec/lib/completions/dialects/orca_style_spec.rb new file mode 100644 index 00000000..411a84a8 --- /dev/null +++ b/spec/lib/completions/dialects/orca_style_spec.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +RSpec.describe DiscourseAi::Completions::Dialects::OrcaStyle do + subject(:dialect) { described_class.new } + + describe "#translate" do + let(:prompt) do + { + insts: <<~TEXT, + I want you to act as a title generator for written pieces. I will provide you with a text, + and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words, + and ensure that the meaning is maintained. Replies will utilize the language type of the topic. + TEXT + input: <<~TEXT, + Here is the text, inside XML tags: + + To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends, + discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer + defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry. + + Destiny favors repetitions, variants, symmetries; nineteen centuries later, in the southern province of Buenos Aires, + a gaucho is attacked by other gauchos and, as he falls, recognizes a godson of his and says with gentle rebuke and + slow surprise (these words must be heard, not read): 'But, my friend!' He is killed and does not know that he + dies so that a scene may be repeated. + + TEXT + post_insts: + "Please put the translation between tags and separate each title with a comma.", + } + end + + it "translates a prompt written in our generic format to the Open AI format" do + orca_style_version = <<~TEXT + ### System: + #{[prompt[:insts], prompt[:post_insts]].join("\n")} + ### User: + #{prompt[:input]} + ### Assistant: + TEXT + + translated = dialect.translate(prompt) + + expect(translated).to eq(orca_style_version) + end + + it "include examples in the translated prompt" do + prompt[:examples] = [ + [ + "In the labyrinth of time, a solitary horse, etched in gold by the setting sun, embarked on an infinite journey.", + "The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.", + ], + ] + + orca_style_version = <<~TEXT + ### System: + #{[prompt[:insts], prompt[:post_insts]].join("\n")} + ### User: + #{prompt[:examples][0][0]} + ### Assistant: + #{prompt[:examples][0][1]} + ### User: + #{prompt[:input]} + ### Assistant: + TEXT + + translated = dialect.translate(prompt) + + expect(translated).to eq(orca_style_version) + end + end +end diff --git a/spec/lib/completions/endpoints/anthropic_spec.rb b/spec/lib/completions/endpoints/anthropic_spec.rb new file mode 100644 index 00000000..d0309e2f --- /dev/null +++ b/spec/lib/completions/endpoints/anthropic_spec.rb @@ -0,0 +1,64 @@ +# frozen_String_literal: true + +require_relative "endpoint_examples" + +RSpec.describe DiscourseAi::Completions::Endpoints::Anthropic do + subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::AnthropicTokenizer) } + + let(:model_name) { "claude-2" } + let(:prompt) { "Human: write 3 words\n\n" } + + let(:request_body) { model.default_options.merge(prompt: prompt).to_json } + let(:stream_request_body) { model.default_options.merge(prompt: prompt, stream: true).to_json } + + def response(content) + { + completion: content, + stop: "\n\nHuman:", + stop_reason: "stop_sequence", + truncated: false, + log_id: "12dcc7feafbee4a394e0de9dffde3ac5", + model: model_name, + exception: nil, + } + end + + def stub_response(prompt, response_text) + WebMock + .stub_request(:post, "https://api.anthropic.com/v1/complete") + .with(body: model.default_options.merge(prompt: prompt).to_json) + .to_return(status: 200, body: JSON.dump(response(response_text))) + end + + def stream_line(delta, finish_reason: nil) + +"data: " << { + completion: delta, + stop: finish_reason ? "\n\nHuman:" : nil, + stop_reason: finish_reason, + truncated: false, + log_id: "12b029451c6d18094d868bc04ce83f63", + model: "claude-2", + exception: nil, + }.to_json + end + + def stub_streamed_response(prompt, deltas) + chunks = + deltas.each_with_index.map do |_, index| + if index == (deltas.length - 1) + stream_line(deltas[index], finish_reason: "stop_sequence") + else + stream_line(deltas[index]) + end + end + + chunks = chunks.join("\n\n") + + WebMock + .stub_request(:post, "https://api.anthropic.com/v1/complete") + .with(body: model.default_options.merge(prompt: prompt, stream: true).to_json) + .to_return(status: 200, body: chunks) + end + + it_behaves_like "an endpoint that can communicate with a completion service" +end diff --git a/spec/lib/completions/endpoints/aws_bedrock_spec.rb b/spec/lib/completions/endpoints/aws_bedrock_spec.rb new file mode 100644 index 00000000..4b655a91 --- /dev/null +++ b/spec/lib/completions/endpoints/aws_bedrock_spec.rb @@ -0,0 +1,122 @@ +# frozen_string_literal: true + +require_relative "endpoint_examples" + +RSpec.describe DiscourseAi::Completions::Endpoints::AwsBedrock do + subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::AnthropicTokenizer) } + + let(:model_name) { "claude-2" } + let(:prompt) { "Human: write 3 words\n\n" } + + let(:request_body) { model.default_options.merge(prompt: prompt).to_json } + let(:stream_request_body) { model.default_options.merge(prompt: prompt).to_json } + + before do + SiteSetting.ai_bedrock_access_key_id = "123456" + SiteSetting.ai_bedrock_secret_access_key = "asd-asd-asd" + SiteSetting.ai_bedrock_region = "us-east-1" + end + + # Copied from https://github.com/bblimke/webmock/issues/629 + # Workaround for stubbing a streamed response + before do + mocked_http = + Class.new(Net::HTTP) do + def request(*) + super do |response| + response.instance_eval do + def read_body(*, &block) + if block_given? + @body.each(&block) + else + super + end + end + end + + yield response if block_given? + + response + end + end + end + + @original_net_http = Net.send(:remove_const, :HTTP) + Net.send(:const_set, :HTTP, mocked_http) + end + + after do + Net.send(:remove_const, :HTTP) + Net.send(:const_set, :HTTP, @original_net_http) + end + + def response(content) + { + completion: content, + stop: "\n\nHuman:", + stop_reason: "stop_sequence", + truncated: false, + log_id: "12dcc7feafbee4a394e0de9dffde3ac5", + model: model_name, + exception: nil, + } + end + + def stub_response(prompt, response_text) + WebMock + .stub_request( + :post, + "https://bedrock-runtime.#{SiteSetting.ai_bedrock_region}.amazonaws.com/model/anthropic.#{model_name}/invoke", + ) + .with(body: request_body) + .to_return(status: 200, body: JSON.dump(response(response_text))) + end + + def stream_line(delta, finish_reason: nil) + encoder = Aws::EventStream::Encoder.new + + message = + Aws::EventStream::Message.new( + payload: + StringIO.new( + { + bytes: + Base64.encode64( + { + completion: delta, + stop: finish_reason ? "\n\nHuman:" : nil, + stop_reason: finish_reason, + truncated: false, + log_id: "12b029451c6d18094d868bc04ce83f63", + model: "claude-2", + exception: nil, + }.to_json, + ), + }.to_json, + ), + ) + + encoder.encode(message) + end + + def stub_streamed_response(prompt, deltas) + chunks = + deltas.each_with_index.map do |_, index| + if index == (deltas.length - 1) + stream_line(deltas[index], finish_reason: "stop_sequence") + else + stream_line(deltas[index]) + end + end + + WebMock + .stub_request( + :post, + "https://bedrock-runtime.#{SiteSetting.ai_bedrock_region}.amazonaws.com/model/anthropic.#{model_name}/invoke-with-response-stream", + ) + .with(body: stream_request_body) + .to_return(status: 200, body: chunks) + end + + it_behaves_like "an endpoint that can communicate with a completion service" +end diff --git a/spec/lib/completions/endpoints/endpoint_examples.rb b/spec/lib/completions/endpoints/endpoint_examples.rb new file mode 100644 index 00000000..6ca86070 --- /dev/null +++ b/spec/lib/completions/endpoints/endpoint_examples.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +RSpec.shared_examples "an endpoint that can communicate with a completion service" do + describe "#perform_completion!" do + fab!(:user) { Fabricate(:user) } + + let(:response_text) { "1. Serenity\\n2. Laughter\\n3. Adventure" } + + context "when using regular mode" do + before { stub_response(prompt, response_text) } + + it "can complete a trivial prompt" do + completion_response = model.perform_completion!(prompt, user) + + expect(completion_response).to eq(response_text) + end + + it "creates an audit log for the request" do + model.perform_completion!(prompt, user) + + expect(AiApiAuditLog.count).to eq(1) + log = AiApiAuditLog.first + + response_body = response(response_text).to_json + + expect(log.provider_id).to eq(model.provider_id) + expect(log.user_id).to eq(user.id) + expect(log.raw_request_payload).to eq(request_body) + expect(log.raw_response_payload).to eq(response_body) + expect(log.request_tokens).to eq(model.prompt_size(prompt)) + expect(log.response_tokens).to eq(model.tokenizer.size(response_text)) + end + end + + context "when using stream mode" do + let(:deltas) { ["Mount", "ain", " ", "Tree ", "Frog"] } + + before { stub_streamed_response(prompt, deltas) } + + it "can complete a trivial prompt" do + completion_response = +"" + + model.perform_completion!(prompt, user) do |partial, cancel| + completion_response << partial + cancel.call if completion_response.split(" ").length == 2 + end + + expect(completion_response).to eq(deltas[0...-1].join) + end + + it "creates an audit log and updates is on each read." do + completion_response = +"" + + model.perform_completion!(prompt, user) do |partial, cancel| + completion_response << partial + cancel.call if completion_response.split(" ").length == 2 + end + + expect(AiApiAuditLog.count).to eq(1) + log = AiApiAuditLog.first + + expect(log.provider_id).to eq(model.provider_id) + expect(log.user_id).to eq(user.id) + expect(log.raw_request_payload).to eq(stream_request_body) + expect(log.raw_response_payload).to be_present + expect(log.request_tokens).to eq(model.prompt_size(prompt)) + expect(log.response_tokens).to eq(model.tokenizer.size(deltas[0...-1].join)) + end + end + end +end diff --git a/spec/lib/completions/endpoints/hugging_face_spec.rb b/spec/lib/completions/endpoints/hugging_face_spec.rb new file mode 100644 index 00000000..0acd480f --- /dev/null +++ b/spec/lib/completions/endpoints/hugging_face_spec.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +require_relative "endpoint_examples" + +RSpec.describe DiscourseAi::Completions::Endpoints::Huggingface do + subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::Llama2Tokenizer) } + + let(:model_name) { "Llama2-*-chat-hf" } + let(:prompt) { <<~TEXT } + [INST]<>You are a helpful bot.<>[/INST] + [INST]Write 3 words[/INST] + TEXT + + let(:request_body) do + model + .default_options + .merge(inputs: prompt) + .tap { |payload| payload[:parameters][:max_new_tokens] = 2_000 - model.prompt_size(prompt) } + .to_json + end + let(:stream_request_body) { request_body } + + before { SiteSetting.ai_hugging_face_api_url = "https://test.dev" } + + def response(content) + { generated_text: content } + end + + def stub_response(prompt, response_text) + WebMock + .stub_request(:post, "#{SiteSetting.ai_hugging_face_api_url}/generate") + .with(body: request_body) + .to_return(status: 200, body: JSON.dump(response(response_text))) + end + + def stream_line(delta, finish_reason: nil) + +"data: " << { + token: { + id: 29_889, + text: delta, + logprob: -0.08319092, + special: !!finish_reason, + }, + generated_text: finish_reason ? response_text : nil, + details: nil, + }.to_json + end + + def stub_streamed_response(prompt, deltas) + chunks = + deltas.each_with_index.map do |_, index| + if index == (deltas.length - 1) + stream_line(deltas[index], finish_reason: true) + else + stream_line(deltas[index]) + end + end + + chunks = chunks.join("\n\n") + + WebMock + .stub_request(:post, "#{SiteSetting.ai_hugging_face_api_url}/generate_stream") + .with(body: request_body) + .to_return(status: 200, body: chunks) + end + + it_behaves_like "an endpoint that can communicate with a completion service" +end diff --git a/spec/lib/completions/endpoints/open_ai_spec.rb b/spec/lib/completions/endpoints/open_ai_spec.rb new file mode 100644 index 00000000..63d33d78 --- /dev/null +++ b/spec/lib/completions/endpoints/open_ai_spec.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +require_relative "endpoint_examples" + +RSpec.describe DiscourseAi::Completions::Endpoints::OpenAI do + subject(:model) { described_class.new(model_name, DiscourseAi::Tokenizer::OpenAiTokenizer) } + + let(:model_name) { "gpt-3.5-turbo" } + let(:prompt) do + [ + { role: "system", content: "You are a helpful bot." }, + { role: "user", content: "Write 3 words" }, + ] + end + + let(:request_body) { model.default_options.merge(messages: prompt).to_json } + let(:stream_request_body) { model.default_options.merge(messages: prompt, stream: true).to_json } + + def response(content) + { + id: "chatcmpl-6sZfAb30Rnv9Q7ufzFwvQsMpjZh8S", + object: "chat.completion", + created: 1_678_464_820, + model: "gpt-3.5-turbo-0301", + usage: { + prompt_tokens: 337, + completion_tokens: 162, + total_tokens: 499, + }, + choices: [ + { message: { role: "assistant", content: content }, finish_reason: "stop", index: 0 }, + ], + } + end + + def stub_response(prompt, response_text) + WebMock + .stub_request(:post, "https://api.openai.com/v1/chat/completions") + .with(body: { model: model_name, messages: prompt }) + .to_return(status: 200, body: JSON.dump(response(response_text))) + end + + def stream_line(delta, finish_reason: nil) + +"data: " << { + id: "chatcmpl-#{SecureRandom.hex}", + object: "chat.completion.chunk", + created: 1_681_283_881, + model: "gpt-3.5-turbo-0301", + choices: [{ delta: { content: delta } }], + finish_reason: finish_reason, + index: 0, + }.to_json + end + + def stub_streamed_response(prompt, deltas) + chunks = + deltas.each_with_index.map do |_, index| + if index == (deltas.length - 1) + stream_line(deltas[index], finish_reason: "stop_sequence") + else + stream_line(deltas[index]) + end + end + + chunks = chunks.join("\n\n") + + WebMock + .stub_request(:post, "https://api.openai.com/v1/chat/completions") + .with(body: model.default_options.merge(messages: prompt, stream: true).to_json) + .to_return(status: 200, body: chunks) + end + + it_behaves_like "an endpoint that can communicate with a completion service" +end diff --git a/spec/lib/completions/llm_spec.rb b/spec/lib/completions/llm_spec.rb new file mode 100644 index 00000000..9c7148c0 --- /dev/null +++ b/spec/lib/completions/llm_spec.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +RSpec.describe DiscourseAi::Completions::LLM do + subject(:llm) do + described_class.new( + DiscourseAi::Completions::Dialects::OrcaStyle.new, + canned_response, + "Upstage-Llama-2-*-instruct-v2", + ) + end + + fab!(:user) { Fabricate(:user) } + + describe ".proxy" do + it "raises an exception when we can't proxy the model" do + fake_model = "unknown_v2" + + expect { described_class.proxy(fake_model) }.to( + raise_error(DiscourseAi::Completions::LLM::UNKNOWN_MODEL), + ) + end + end + + describe "#completion!" do + let(:prompt) do + { + insts: <<~TEXT, + I want you to act as a title generator for written pieces. I will provide you with a text, + and you will generate five attention-grabbing titles. Please keep the title concise and under 20 words, + and ensure that the meaning is maintained. Replies will utilize the language type of the topic. + TEXT + input: <<~TEXT, + Here is the text, inside XML tags: + + To perfect his horror, Caesar, surrounded at the base of the statue by the impatient daggers of his friends, + discovers among the faces and blades that of Marcus Brutus, his protege, perhaps his son, and he no longer + defends himself, but instead exclaims: 'You too, my son!' Shakespeare and Quevedo capture the pathetic cry. + + TEXT + post_insts: + "Please put the translation between tags and separate each title with a comma.", + } + end + + let(:canned_response) do + DiscourseAi::Completions::Endpoints::CannedResponse.new( + [ + "The solitary horse.,The horse etched in gold.,A horse's infinite journey.,A horse lost in time.,A horse's last ride.", + ], + ) + end + + context "when getting the full response" do + it "processes the prompt and return the response" do + llm_response = llm.completion!(prompt, user) + + expect(llm_response).to eq(canned_response.responses[0]) + end + end + + context "when getting a streamed response" do + it "processes the prompt and call the given block with the partial response" do + llm_response = +"" + + llm.completion!(prompt, user) { |partial, cancel_fn| llm_response << partial } + + expect(llm_response).to eq(canned_response.responses[0]) + end + end + end +end diff --git a/spec/lib/modules/ai_bot/commands/search_command_spec.rb b/spec/lib/modules/ai_bot/commands/search_command_spec.rb index 285b88ed..215fd3b2 100644 --- a/spec/lib/modules/ai_bot/commands/search_command_spec.rb +++ b/spec/lib/modules/ai_bot/commands/search_command_spec.rb @@ -66,7 +66,10 @@ RSpec.describe DiscourseAi::AiBot::Commands::SearchCommand do .expects(:asymmetric_topics_similarity_search) .returns([post1.topic_id]) - results = search.process(search_query: "hello world, sam", status: "public") + results = + DiscourseAi::Completions::LLM.with_prepared_responses(["#{query}"]) do + search.process(search_query: "hello world, sam", status: "public") + end expect(results[:args]).to eq({ search_query: "hello world, sam", status: "public" }) expect(results[:rows].length).to eq(1) diff --git a/spec/lib/modules/embeddings/semantic_search_spec.rb b/spec/lib/modules/embeddings/semantic_search_spec.rb index b6bbad11..b406dbaa 100644 --- a/spec/lib/modules/embeddings/semantic_search_spec.rb +++ b/spec/lib/modules/embeddings/semantic_search_spec.rb @@ -13,15 +13,6 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do before do SiteSetting.ai_embeddings_discourse_service_api_endpoint = "http://test.com" - prompt = DiscourseAi::Embeddings::HydeGenerators::OpenAi.new.prompt(query) - OpenAiCompletionsInferenceStubs.stub_response( - prompt, - hypothetical_post, - req_opts: { - max_tokens: 400, - }, - ) - hyde_embedding = [0.049382, 0.9999] EmbeddingsGenerationStubs.discourse_service( SiteSetting.ai_embeddings_model, @@ -39,10 +30,16 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do .returns(candidate_ids) end + def trigger_search(query) + DiscourseAi::Completions::LLM.with_prepared_responses(["#{hypothetical_post}"]) do + subject.search_for_topics(query) + end + end + it "returns the first post of a topic included in the asymmetric search results" do stub_candidate_ids([post.topic_id]) - posts = subject.search_for_topics(query) + posts = trigger_search(query) expect(posts).to contain_exactly(post) end @@ -53,7 +50,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do post.topic.update!(visible: false) stub_candidate_ids([post.topic_id]) - posts = subject.search_for_topics(query) + posts = trigger_search(query) expect(posts).to be_empty end @@ -64,7 +61,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do pm_post = Fabricate(:private_message_post) stub_candidate_ids([pm_post.topic_id]) - posts = subject.search_for_topics(query) + posts = trigger_search(query) expect(posts).to be_empty end @@ -75,7 +72,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do post.update!(post_type: Post.types[:whisper]) stub_candidate_ids([post.topic_id]) - posts = subject.search_for_topics(query) + posts = trigger_search(query) expect(posts).to be_empty end @@ -87,7 +84,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do reply.topic.first_post.trash! stub_candidate_ids([reply.topic_id]) - posts = subject.search_for_topics(query) + posts = trigger_search(query) expect(posts).to be_empty end @@ -98,7 +95,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do post_2 = Fabricate(:post) stub_candidate_ids([post.topic_id]) - posts = subject.search_for_topics(query) + posts = trigger_search(query) expect(posts).not_to include(post_2) end @@ -114,7 +111,7 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do end it "returns an empty list" do - posts = subject.search_for_topics(query) + posts = trigger_search(query) expect(posts).to be_empty end @@ -122,14 +119,17 @@ RSpec.describe DiscourseAi::Embeddings::SemanticSearch do it "returns the results if the user has access to the category" do group.add(user) - posts = subject.search_for_topics(query) + posts = trigger_search(query) expect(posts).to contain_exactly(post) end context "while searching as anon" do it "returns an empty list" do - posts = described_class.new(Guardian.new(nil)).search_for_topics(query) + posts = + DiscourseAi::Completions::LLM.with_prepared_responses( + ["#{hypothetical_post}"], + ) { described_class.new(Guardian.new(nil)).search_for_topics(query) } expect(posts).to be_empty end diff --git a/spec/lib/modules/summarization/models/anthropic_spec.rb b/spec/lib/modules/summarization/models/anthropic_spec.rb deleted file mode 100644 index 263ab62f..00000000 --- a/spec/lib/modules/summarization/models/anthropic_spec.rb +++ /dev/null @@ -1,122 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe DiscourseAi::Summarization::Models::Anthropic do - subject(:model) { described_class.new(model_name, max_tokens: max_tokens) } - - let(:model_name) { "claude-2" } - let(:max_tokens) { 720 } - - let(:content) do - { - resource_path: "/t/-/1", - content_title: "This is a title", - contents: [{ poster: "asd", id: 1, text: "This is a text" }], - } - end - - def as_chunk(item) - { ids: [item[:id]], summary: "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " } - end - - def expected_messages(contents, opts) - base_prompt = <<~TEXT - Human: Summarize the following forum discussion inside the given tag. - Try to keep the summary in the same language as the forum discussion. - Format the response, including links, using markdown. - Try generating links as well the format is #{opts[:resource_path]}/POST_ID - For example, a link to the 3rd post in the topic would be [post 3](#{opts[:resource_path]}/3) - Wrap the whole the summary inside tags. - The discussion title is: #{opts[:content_title]}. - Don't use more than 400 words. - TEXT - - text = - contents.reduce("") do |memo, item| - memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " - end - - base_prompt += "#{text}\nAssistant:\n" - end - - describe "#summarize_in_chunks" do - context "when the content fits in a single chunk" do - it "performs a request to summarize" do - opts = content.except(:contents) - - AnthropicCompletionStubs.stub_response( - expected_messages(content[:contents], opts), - "This is summary 1", - ) - - chunks = content[:contents].map { |c| as_chunk(c) } - summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] } - - expect(summarized_chunks).to contain_exactly("This is summary 1") - end - end - - context "when the content fits in multiple chunks" do - it "performs a request for each one to summarize" do - content[:contents] << { - poster: "asd2", - id: 2, - text: "This is a different text to summarize", - } - opts = content.except(:contents) - - content[:contents].each_with_index do |item, idx| - AnthropicCompletionStubs.stub_response( - expected_messages([item], opts), - "This is summary #{idx + 1}", - ) - end - - chunks = content[:contents].map { |c| as_chunk(c) } - summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] } - - expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2") - end - end - end - - describe "#concatenate_summaries" do - it "combines all the different summaries into a single one" do - messages = <<~TEXT - Human: Concatenate the following disjoint summaries inside the given input tags, creating a cohesive narrative. - Include only the summary inside tags. - summary 1 - summary 2 - Assistant: - TEXT - - AnthropicCompletionStubs.stub_response(messages, "concatenated summary") - - expect(model.concatenate_summaries(["summary 1", "summary 2"])).to eq("concatenated summary") - end - end - - describe "#summarize_with_truncation" do - let(:max_tokens) { 709 } - - it "truncates the context to meet the token limit" do - opts = content.except(:contents) - - instructions = <<~TEXT - Human: Summarize the following forum discussion inside the given tag. - Try to keep the summary in the same language as the forum discussion. - Format the response, including links, using markdown. - Try generating links as well the format is #{opts[:resource_path]}/POST_ID - For example, a link to the 3rd post in the topic would be [post 3](#{opts[:resource_path]}/3) - Wrap the whole the summary inside tags. - The discussion title is: #{opts[:content_title]}. - Don't use more than 400 words. - (1 asd said: This is a - Assistant: - TEXT - - AnthropicCompletionStubs.stub_response(instructions, "truncated summary") - - expect(model.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary") - end - end -end diff --git a/spec/lib/modules/summarization/models/discourse_spec.rb b/spec/lib/modules/summarization/models/discourse_spec.rb deleted file mode 100644 index cd1c768a..00000000 --- a/spec/lib/modules/summarization/models/discourse_spec.rb +++ /dev/null @@ -1,95 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe DiscourseAi::Summarization::Models::Discourse do - subject(:model) { described_class.new(model_name, max_tokens: max_tokens) } - - let(:model_name) { "bart-large-cnn-samsum" } - let(:max_tokens) { 20 } - - let(:content) do - { - resource_path: "/t/1/POST_NUMBER", - content_title: "This is a title", - contents: [{ poster: "asd", id: 1, text: "This is a text" }], - } - end - - before { SiteSetting.ai_summarization_discourse_service_api_endpoint = "https://test.com" } - - def stub_request(prompt, response) - WebMock - .stub_request( - :post, - "#{SiteSetting.ai_summarization_discourse_service_api_endpoint}/api/v1/classify", - ) - .with(body: JSON.dump(model: model_name, content: prompt)) - .to_return(status: 200, body: JSON.dump(summary_text: response)) - end - - def expected_messages(contents, opts) - contents.reduce("") do |memo, item| - memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " - end - end - - def as_chunk(item) - { ids: [item[:id]], summary: "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " } - end - - describe "#summarize_in_chunks" do - context "when the content fits in a single chunk" do - it "performs a request to summarize" do - opts = content.except(:contents) - - stub_request(expected_messages(content[:contents], opts), "This is summary 1") - - chunks = content[:contents].map { |c| as_chunk(c) } - summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] } - - expect(summarized_chunks).to contain_exactly("This is summary 1") - end - end - - context "when the content fits in multiple chunks" do - it "performs a request for each one to summarize" do - content[:contents] << { - poster: "asd2", - id: 2, - text: "This is a different text to summarize", - } - opts = content.except(:contents) - - content[:contents].each_with_index do |item, idx| - stub_request(expected_messages([item], opts), "This is summary #{idx + 1}") - end - - chunks = content[:contents].map { |c| as_chunk(c) } - summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] } - - expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2") - end - end - end - - describe "#concatenate_summaries" do - it "combines all the different summaries into a single one" do - messages = ["summary 1", "summary 2"].join("\n") - - stub_request(messages, "concatenated summary") - - expect(model.concatenate_summaries(["summary 1", "summary 2"])).to eq("concatenated summary") - end - end - - describe "#summarize_with_truncation" do - let(:max_tokens) { 9 } - - it "truncates the context to meet the token limit" do - opts = content.except(:contents) - - stub_request("( 1 asd said : this is", "truncated summary") - - expect(model.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary") - end - end -end diff --git a/spec/lib/modules/summarization/models/open_ai_spec.rb b/spec/lib/modules/summarization/models/open_ai_spec.rb deleted file mode 100644 index bf2773e7..00000000 --- a/spec/lib/modules/summarization/models/open_ai_spec.rb +++ /dev/null @@ -1,121 +0,0 @@ -# frozen_string_literal: true - -RSpec.describe DiscourseAi::Summarization::Models::OpenAi do - subject(:model) { described_class.new(model_name, max_tokens: max_tokens) } - - let(:model_name) { "gpt-3.5-turbo" } - let(:max_tokens) { 720 } - - let(:content) do - { - resource_path: "/t/1/POST_NUMBER", - content_title: "This is a title", - contents: [{ poster: "asd", id: 1, text: "This is a text" }], - } - end - - def as_chunk(item) - { ids: [item[:id]], summary: "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " } - end - - def expected_messages(contents, opts) - base_prompt = <<~TEXT - You are a summarization bot. - You effectively summarise any text and reply ONLY with ONLY the summarized text. - You condense it into a shorter version. - You understand and generate Discourse forum Markdown. - You format the response, including links, using markdown. - Try generating links as well the format is #{opts[:resource_path]}. eg: [ref](#{opts[:resource_path]}/77) - The discussion title is: #{opts[:content_title]}. - TEXT - - messages = [{ role: "system", content: base_prompt }] - - text = - contents.reduce("") do |memo, item| - memo += "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " - end - - messages << { - role: "user", - content: - "Summarize the following in 400 words. Keep the summary in the same language used in the text below.\n#{text}", - } - end - - describe "#summarize_in_chunks" do - context "when the content fits in a single chunk" do - it "performs a request to summarize" do - opts = content.except(:contents) - - OpenAiCompletionsInferenceStubs.stub_response( - expected_messages(content[:contents], opts), - "This is summary 1", - ) - - chunks = content[:contents].map { |c| as_chunk(c) } - summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] } - - expect(summarized_chunks).to contain_exactly("This is summary 1") - end - end - - context "when the content fits in multiple chunks" do - it "performs a request for each one to summarize" do - content[:contents] << { - poster: "asd2", - id: 2, - text: "This is a different text to summarize", - } - opts = content.except(:contents) - - content[:contents].each_with_index do |item, idx| - OpenAiCompletionsInferenceStubs.stub_response( - expected_messages([item], opts), - "This is summary #{idx + 1}", - ) - end - - chunks = content[:contents].map { |c| as_chunk(c) } - summarized_chunks = model.summarize_in_chunks(chunks, opts).map { |c| c[:summary] } - - expect(summarized_chunks).to contain_exactly("This is summary 1", "This is summary 2") - end - end - end - - describe "#concatenate_summaries" do - it "combines all the different summaries into a single one" do - messages = [ - { role: "system", content: "You are a helpful bot" }, - { - role: "user", - content: - "Concatenate these disjoint summaries, creating a cohesive narrative. Keep the summary in the same language used in the text below.\nsummary 1\nsummary 2", - }, - ] - - OpenAiCompletionsInferenceStubs.stub_response(messages, "concatenated summary") - - expect(model.concatenate_summaries(["summary 1", "summary 2"])).to eq("concatenated summary") - end - end - - describe "#summarize_with_truncation" do - let(:max_tokens) { 709 } - - it "truncates the context to meet the token limit" do - opts = content.except(:contents) - - truncated_version = expected_messages(content[:contents], opts) - - truncated_version.last[ - :content - ] = "Summarize the following in 400 words. Keep the summary in the same language used in the text below.\n(1 asd said: This is a" - - OpenAiCompletionsInferenceStubs.stub_response(truncated_version, "truncated summary") - - expect(model.summarize_with_truncation(content[:contents], opts)).to eq("truncated summary") - end - end -end diff --git a/spec/lib/modules/summarization/strategies/fold_content_spec.rb b/spec/lib/modules/summarization/strategies/fold_content_spec.rb index abe664f3..dfe35528 100644 --- a/spec/lib/modules/summarization/strategies/fold_content_spec.rb +++ b/spec/lib/modules/summarization/strategies/fold_content_spec.rb @@ -1,28 +1,35 @@ # frozen_string_literal: true -require_relative "../../../../support/summarization/dummy_completion_model" - RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do describe "#summarize" do subject(:strategy) { described_class.new(model) } let(:summarize_text) { "This is a text" } - let(:model) { DummyCompletionModel.new(model_tokens) } let(:model_tokens) do # Make sure each content fits in a single chunk. - DiscourseAi::Tokenizer::BertTokenizer.size("(1 asd said: This is a text ") + 3 + # 700 is the number of tokens reserved for the prompt. + 700 + DiscourseAi::Tokenizer::OpenAiTokenizer.size("(1 asd said: This is a text ") + 3 end - let(:user) { User.new } + let(:model) do + DiscourseAi::Summarization::Models::OpenAi.new("gpt-4", max_tokens: model_tokens) + end let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } } + let(:single_summary) { "this is a single summary" } + let(:concatenated_summary) { "this is a concatenated summary" } + + let(:user) { User.new } + context "when the content to summarize fits in a single call" do it "does one call to summarize content" do - result = strategy.summarize(content, user) + result = + DiscourseAi::Completions::LLM.with_prepared_responses([single_summary]) do |spy| + strategy.summarize(content, user).tap { expect(spy.completions).to eq(1) } + end - expect(model.summarization_calls).to eq(1) - expect(result[:summary]).to eq(DummyCompletionModel::SINGLE_SUMMARY) + expect(result[:summary]).to eq(single_summary) end end @@ -30,10 +37,12 @@ RSpec.describe DiscourseAi::Summarization::Strategies::FoldContent do it "summarizes each chunk and then concatenates them" do content[:contents] << { poster: "asd2", id: 2, text: summarize_text } - result = strategy.summarize(content, user) + result = + DiscourseAi::Completions::LLM.with_prepared_responses( + [single_summary, single_summary, concatenated_summary], + ) { |spy| strategy.summarize(content, user).tap { expect(spy.completions).to eq(3) } } - expect(model.summarization_calls).to eq(3) - expect(result[:summary]).to eq(DummyCompletionModel::CONCATENATED_SUMMARIES) + expect(result[:summary]).to eq(concatenated_summary) end end end diff --git a/spec/lib/modules/summarization/strategies/truncate_content_spec.rb b/spec/lib/modules/summarization/strategies/truncate_content_spec.rb deleted file mode 100644 index 1e7cc6ea..00000000 --- a/spec/lib/modules/summarization/strategies/truncate_content_spec.rb +++ /dev/null @@ -1,28 +0,0 @@ -# frozen_string_literal: true - -require_relative "../../../../support/summarization/dummy_completion_model" - -RSpec.describe DiscourseAi::Summarization::Strategies::TruncateContent do - describe "#summarize" do - subject(:strategy) { described_class.new(model) } - - let(:summarize_text) { "This is a text" } - let(:model_tokens) { summarize_text.length } - let(:model) { DummyCompletionModel.new(model_tokens) } - - let(:user) { User.new } - - let(:content) { { contents: [{ poster: "asd", id: 1, text: summarize_text }] } } - - context "when the content to summarize doesn't fit in a single call" do - it "summarizes a truncated version" do - content[:contents] << { poster: "asd2", id: 2, text: summarize_text } - - result = strategy.summarize(content, user) - - expect(model.summarization_calls).to eq(1) - expect(result[:summary]).to eq(DummyCompletionModel::SINGLE_SUMMARY) - end - end - end -end diff --git a/spec/support/summarization/dummy_completion_model.rb b/spec/support/summarization/dummy_completion_model.rb deleted file mode 100644 index a83a434a..00000000 --- a/spec/support/summarization/dummy_completion_model.rb +++ /dev/null @@ -1,46 +0,0 @@ -# frozen_string_literal: true - -class DummyCompletionModel - SINGLE_SUMMARY = "this is a single summary" - CONCATENATED_SUMMARIES = "this is a concatenated summary" - - def initialize(max_tokens) - @summarization_calls = 0 - @available_tokens = max_tokens - end - - attr_reader :max_length, :summarization_calls, :available_tokens - - delegate :can_expand_tokens?, to: :tokenizer - - def summarize_single(single_chunk, opts) - @summarization_calls += 1 - SINGLE_SUMMARY - end - - def summarize_in_chunks(chunks, opts) - chunks.map do |chunk| - chunk[:summary] = SINGLE_SUMMARY - @summarization_calls += 1 - chunk - end - end - - def concatenate_summaries(summaries) - @summarization_calls += 1 - CONCATENATED_SUMMARIES - end - - def summarize_with_truncation(_contents, _opts) - @summarization_calls += 1 - SINGLE_SUMMARY - end - - def format_content_item(item) - "(#{item[:id]} #{item[:poster]} said: #{item[:text]} " - end - - def tokenizer - DiscourseAi::Tokenizer::BertTokenizer - end -end