mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-03-09 11:48:47 +00:00
63 lines
1.7 KiB
Ruby
63 lines
1.7 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
module DiscourseAi
|
|
module Embeddings
|
|
class Models
|
|
MODEL = Data.define(:name, :dimensions, :max_sequence_lenght, :functions, :type, :provider)
|
|
|
|
SEARCH_FUNCTION_TO_PG_INDEX = {
|
|
dot: "vector_ip_ops",
|
|
cosine: "vector_cosine_ops",
|
|
euclidean: "vector_l2_ops",
|
|
}
|
|
|
|
SEARCH_FUNCTION_TO_PG_FUNCTION = { dot: "<#>", cosine: "<=>", euclidean: "<->" }
|
|
|
|
def self.enabled_models
|
|
setting = SiteSetting.ai_embeddings_models.split("|").map(&:strip)
|
|
list.filter { |model| setting.include?(model.name) }
|
|
end
|
|
|
|
def self.list
|
|
@@list ||= [
|
|
MODEL.new(
|
|
"all-mpnet-base-v2",
|
|
768,
|
|
384,
|
|
%i[dot cosine euclidean],
|
|
[:symmetric],
|
|
"discourse",
|
|
),
|
|
MODEL.new(
|
|
"all-distilroberta-v1",
|
|
768,
|
|
512,
|
|
%i[dot cosine euclidean],
|
|
[:symmetric],
|
|
"discourse",
|
|
),
|
|
MODEL.new("multi-qa-mpnet-base-dot-v1", 768, 512, [:dot], [:symmetric], "discourse"),
|
|
MODEL.new(
|
|
"paraphrase-multilingual-mpnet-base-v2",
|
|
768,
|
|
128,
|
|
[:cosine],
|
|
[:symmetric],
|
|
"discourse",
|
|
),
|
|
MODEL.new("msmarco-distilbert-base-v4", 768, 512, [:cosine], [:asymmetric], "discourse"),
|
|
MODEL.new("msmarco-distilbert-base-tas-b", 768, 512, [:dot], [:asymmetric], "discourse"),
|
|
MODEL.new(
|
|
"text-embedding-ada-002",
|
|
1536,
|
|
2048,
|
|
[:cosine],
|
|
%i[:symmetric :asymmetric],
|
|
"openai",
|
|
),
|
|
]
|
|
end
|
|
end
|
|
end
|
|
end
|