diff --git a/config/settings.yml b/config/settings.yml index ff6f975e..a96c2035 100644 --- a/config/settings.yml +++ b/config/settings.yml @@ -236,6 +236,8 @@ discourse_ai: choices: - all-mpnet-base-v2 - text-embedding-ada-002 + - text-embedding-3-small + - text-embedding-3-large - multilingual-e5-large - bge-large-en - gemini diff --git a/db/migrate/20240126013358_create_openai_text_embedding_tables.rb b/db/migrate/20240126013358_create_openai_text_embedding_tables.rb new file mode 100644 index 00000000..6758ba17 --- /dev/null +++ b/db/migrate/20240126013358_create_openai_text_embedding_tables.rb @@ -0,0 +1,49 @@ +# frozen_string_literal: true + +class CreateOpenaiTextEmbeddingTables < ActiveRecord::Migration[7.0] + def change + create_table :ai_topic_embeddings_6_1, id: false do |t| + t.integer :topic_id, null: false + t.integer :model_version, null: false + t.integer :strategy_version, null: false + t.text :digest, null: false + t.column :embeddings, "vector(1536)", null: false + t.timestamps + + t.index :topic_id, unique: true + end + + create_table :ai_topic_embeddings_7_1, id: false do |t| + t.integer :topic_id, null: false + t.integer :model_version, null: false + t.integer :strategy_version, null: false + t.text :digest, null: false + t.column :embeddings, "vector(2000)", null: false + t.timestamps + + t.index :topic_id, unique: true + end + + create_table :ai_post_embeddings_6_1, id: false do |t| + t.integer :post_id, null: false + t.integer :model_version, null: false + t.integer :strategy_version, null: false + t.text :digest, null: false + t.column :embeddings, "vector(1536)", null: false + t.timestamps + + t.index :post_id, unique: true + end + + create_table :ai_post_embeddings_7_1, id: false do |t| + t.integer :post_id, null: false + t.integer :model_version, null: false + t.integer :strategy_version, null: false + t.text :digest, null: false + t.column :embeddings, "vector(2000)", null: false + t.timestamps + + t.index :post_id, unique: true + end + end +end diff --git a/lib/ai_bot/entry_point.rb b/lib/ai_bot/entry_point.rb index 3c9d9a75..f0a48fa0 100644 --- a/lib/ai_bot/entry_point.rb +++ b/lib/ai_bot/entry_point.rb @@ -46,7 +46,8 @@ module DiscourseAi def inject_into(plugin) plugin.on(:site_setting_changed) do |name, _old_value, _new_value| - if name == :ai_bot_enabled_chat_bots || name == :ai_bot_enabled + if name == :ai_bot_enabled_chat_bots || name == :ai_bot_enabled || + name == :discourse_ai_enabled DiscourseAi::AiBot::SiteSettingsExtension.enable_or_disable_ai_bots end end diff --git a/lib/embeddings/semantic_related.rb b/lib/embeddings/semantic_related.rb index da813703..e9f5ed75 100644 --- a/lib/embeddings/semantic_related.rb +++ b/lib/embeddings/semantic_related.rb @@ -18,20 +18,19 @@ module DiscourseAi DiscourseAi::Embeddings::VectorRepresentations::Base.current_representation(strategy) cache_for = results_ttl(topic) - asd = - Discourse - .cache - .fetch(semantic_suggested_key(topic.id), expires_in: cache_for) do - vector_rep - .symmetric_topics_similarity_search(topic) - .tap do |candidate_ids| - # Happens when the topic doesn't have any embeddings - # I'd rather not use Exceptions to control the flow, so this should be refactored soon - if candidate_ids.empty? || !candidate_ids.include?(topic.id) - raise MissingEmbeddingError, "No embeddings found for topic #{topic.id}" - end + Discourse + .cache + .fetch(semantic_suggested_key(topic.id), expires_in: cache_for) do + vector_rep + .symmetric_topics_similarity_search(topic) + .tap do |candidate_ids| + # Happens when the topic doesn't have any embeddings + # I'd rather not use Exceptions to control the flow, so this should be refactored soon + if candidate_ids.empty? || !candidate_ids.include?(topic.id) + raise MissingEmbeddingError, "No embeddings found for topic #{topic.id}" end - end + end + end rescue MissingEmbeddingError # avoid a flood of jobs when visiting topic if Discourse.redis.set( @@ -71,15 +70,14 @@ module DiscourseAi return "" if related_topics.empty? - render_result = - ApplicationController.render( - template: "list/related_topics", - layout: false, - assigns: { - list: related_topics, - topic: topic, - }, - ) + ApplicationController.render( + template: "list/related_topics", + layout: false, + assigns: { + list: related_topics, + topic: topic, + }, + ) end private diff --git a/lib/embeddings/vector_representations/base.rb b/lib/embeddings/vector_representations/base.rb index 3eb68245..58226913 100644 --- a/lib/embeddings/vector_representations/base.rb +++ b/lib/embeddings/vector_representations/base.rb @@ -13,6 +13,8 @@ module DiscourseAi DiscourseAi::Embeddings::VectorRepresentations::Gemini, DiscourseAi::Embeddings::VectorRepresentations::MultilingualE5Large, DiscourseAi::Embeddings::VectorRepresentations::TextEmbeddingAda002, + DiscourseAi::Embeddings::VectorRepresentations::TextEmbedding3Small, + DiscourseAi::Embeddings::VectorRepresentations::TextEmbedding3Large, ].map { _1.new(strategy) }.find { _1.name == SiteSetting.ai_embeddings_model } end diff --git a/lib/embeddings/vector_representations/text_embedding_3_large.rb b/lib/embeddings/vector_representations/text_embedding_3_large.rb new file mode 100644 index 00000000..ab88b238 --- /dev/null +++ b/lib/embeddings/vector_representations/text_embedding_3_large.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +module DiscourseAi + module Embeddings + module VectorRepresentations + class TextEmbedding3Large < Base + def id + 7 + end + + def version + 1 + end + + def name + "text-embedding-3-large" + end + + def dimensions + # real dimentions are 3072, but we only support up to 2000 in the + # indexes, so we downsample to 2000 via API + 2000 + end + + def max_sequence_length + 8191 + end + + def pg_function + "<=>" + end + + def pg_index_type + "vector_cosine_ops" + end + + def vector_from(text) + response = + DiscourseAi::Inference::OpenAiEmbeddings.perform!( + text, + model: name, + dimensions: dimensions, + ) + response[:data].first[:embedding] + end + + def tokenizer + DiscourseAi::Tokenizer::OpenAiTokenizer + end + end + end + end +end diff --git a/lib/embeddings/vector_representations/text_embedding_3_small.rb b/lib/embeddings/vector_representations/text_embedding_3_small.rb new file mode 100644 index 00000000..a32544d9 --- /dev/null +++ b/lib/embeddings/vector_representations/text_embedding_3_small.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +module DiscourseAi + module Embeddings + module VectorRepresentations + class TextEmbedding3Small < Base + def id + 6 + end + + def version + 1 + end + + def name + "text-embedding-3-small" + end + + def dimensions + 1536 + end + + def max_sequence_length + 8191 + end + + def pg_function + "<=>" + end + + def pg_index_type + "vector_cosine_ops" + end + + def vector_from(text) + response = DiscourseAi::Inference::OpenAiEmbeddings.perform!(text, model: name) + response[:data].first[:embedding] + end + + def tokenizer + DiscourseAi::Tokenizer::OpenAiTokenizer + end + end + end + end +end diff --git a/lib/embeddings/vector_representations/text_embedding_ada_002.rb b/lib/embeddings/vector_representations/text_embedding_ada_002.rb index 3d1fc0ff..2bce079b 100644 --- a/lib/embeddings/vector_representations/text_embedding_ada_002.rb +++ b/lib/embeddings/vector_representations/text_embedding_ada_002.rb @@ -33,7 +33,7 @@ module DiscourseAi end def vector_from(text) - response = DiscourseAi::Inference::OpenAiEmbeddings.perform!(text) + response = DiscourseAi::Inference::OpenAiEmbeddings.perform!(text, model: name) response[:data].first[:embedding] end diff --git a/lib/inference/open_ai_embeddings.rb b/lib/inference/open_ai_embeddings.rb index 0879a9f6..6826a896 100644 --- a/lib/inference/open_ai_embeddings.rb +++ b/lib/inference/open_ai_embeddings.rb @@ -3,7 +3,7 @@ module ::DiscourseAi module Inference class OpenAiEmbeddings - def self.perform!(content, model = nil) + def self.perform!(content, model:, dimensions: nil) headers = { "Content-Type" => "application/json" } if SiteSetting.ai_openai_embeddings_url.include?("azure") @@ -12,14 +12,10 @@ module ::DiscourseAi headers["Authorization"] = "Bearer #{SiteSetting.ai_openai_api_key}" end - model ||= "text-embedding-ada-002" + payload = { model: model, input: content } + payload[:dimensions] = dimensions if dimensions.present? - response = - Faraday.post( - SiteSetting.ai_openai_embeddings_url, - { model: model, input: content }.to_json, - headers, - ) + response = Faraday.post(SiteSetting.ai_openai_embeddings_url, payload.to_json, headers) case response.status when 200 diff --git a/spec/shared/inference/openai_embeddings_spec.rb b/spec/shared/inference/openai_embeddings_spec.rb index a8e2c3a3..e938b6a4 100644 --- a/spec/shared/inference/openai_embeddings_spec.rb +++ b/spec/shared/inference/openai_embeddings_spec.rb @@ -25,7 +25,8 @@ describe DiscourseAi::Inference::OpenAiEmbeddings do }, ).to_return(status: 200, body: body_json, headers: {}) - result = DiscourseAi::Inference::OpenAiEmbeddings.perform!("hello") + result = + DiscourseAi::Inference::OpenAiEmbeddings.perform!("hello", model: "text-embedding-ada-002") expect(result[:usage]).to eq({ prompt_tokens: 1, total_tokens: 1 }) expect(result[:data].first).to eq({ object: "embedding", embedding: [0.0, 0.1] }) @@ -42,15 +43,22 @@ describe DiscourseAi::Inference::OpenAiEmbeddings do data: [{ object: "embedding", embedding: [0.0, 0.1] }], }.to_json + body = { model: "text-embedding-ada-002", input: "hello", dimensions: 1000 }.to_json + stub_request(:post, "https://api.openai.com/v1/embeddings").with( - body: "{\"model\":\"text-embedding-ada-002\",\"input\":\"hello\"}", + body: body, headers: { "Authorization" => "Bearer 123456", "Content-Type" => "application/json", }, ).to_return(status: 200, body: body_json, headers: {}) - result = DiscourseAi::Inference::OpenAiEmbeddings.perform!("hello") + result = + DiscourseAi::Inference::OpenAiEmbeddings.perform!( + "hello", + model: "text-embedding-ada-002", + dimensions: 1000, + ) expect(result[:usage]).to eq({ prompt_tokens: 1, total_tokens: 1 }) expect(result[:data].first).to eq({ object: "embedding", embedding: [0.0, 0.1] })