Work in progress... post embeddings and version
Feel free to continue any work here, will pick it up again next week
This commit is contained in:
parent
93d9d9ea91
commit
6e6ced4554
|
@ -47,6 +47,7 @@ en:
|
|||
ai_embeddings_generate_for_pms: "Generate embeddings for personal messages."
|
||||
ai_embeddings_semantic_related_topics_enabled: "Use Semantic Search for related topics."
|
||||
ai_embeddings_semantic_related_topics: "Maximum number of topics to show in related topic section."
|
||||
ai_embeddings_semantic_related_topics_include_closed_topics: "Include closed topics in related topics."
|
||||
ai_embeddings_pg_connection_string: "PostgreSQL connection string for the embeddings module. Needs pgvector extension enabled and a series of tables created. See docs for more info."
|
||||
ai_embeddings_semantic_search_model: "Model to use for semantic search."
|
||||
ai_embeddings_semantic_search_enabled: "Enable full-page semantic search."
|
||||
|
|
|
@ -156,7 +156,9 @@ plugins:
|
|||
ai_embeddings_semantic_related_topics_enabled: false
|
||||
ai_embeddings_semantic_related_topics: 5
|
||||
ai_embeddings_semantic_related_include_closed_topics: true
|
||||
ai_embeddings_pg_connection_string: ""
|
||||
ai_embeddings_pg_connection_string:
|
||||
default: ""
|
||||
secret: true
|
||||
ai_embeddings_semantic_search_enabled:
|
||||
default: false
|
||||
client: true
|
||||
|
|
|
@ -3,7 +3,9 @@
|
|||
module DiscourseAi
|
||||
module Embeddings
|
||||
class Topic
|
||||
def generate_and_store_embeddings_for(topic)
|
||||
VERSION = 1
|
||||
|
||||
def generate_and_store_embeddings_for(topic, include_posts: true)
|
||||
return unless SiteSetting.ai_embeddings_enabled
|
||||
return if topic.blank? || topic.first_post.blank?
|
||||
|
||||
|
@ -13,6 +15,18 @@ module DiscourseAi
|
|||
enabled_models.each do |model|
|
||||
embedding = model.generate_embedding(topic.first_post.raw)
|
||||
persist_embedding(topic, model, embedding) if embedding
|
||||
|
||||
if include_posts
|
||||
persist_embedding(topic.first_post, model, embedding) if embedding
|
||||
|
||||
topic
|
||||
.posts
|
||||
.where("post_number > 1 AND post_type = 1")
|
||||
.each do |post|
|
||||
embedding = model.generate_embedding(post.raw)
|
||||
persist_embedding(post, model, embedding) if embedding
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -70,13 +84,20 @@ module DiscourseAi
|
|||
|
||||
private
|
||||
|
||||
def persist_embedding(topic, model, embedding)
|
||||
DiscourseAi::Database::Connection.db.exec(<<~SQL, topic_id: topic.id, embedding: embedding)
|
||||
INSERT INTO topic_embeddings_#{model.name.underscore} (topic_id, embedding)
|
||||
VALUES (:topic_id, '[:embedding]')
|
||||
ON CONFLICT (topic_id)
|
||||
def persist_embedding(topic_or_post, model, embedding)
|
||||
table = topic_or_post.is_a?(Topic) ? "topic" : "post"
|
||||
|
||||
DiscourseAi::Database::Connection.db.exec(
|
||||
<<~SQL,
|
||||
INSERT INTO #{table}_embeddings_#{model.name.underscore} (#{table}_id, embedding, version)
|
||||
VALUES (:id, '[:embedding]', :version)
|
||||
ON CONFLICT (#{table}_id)
|
||||
DO UPDATE SET embedding = '[:embedding]'
|
||||
SQL
|
||||
id: topic_or_post.id,
|
||||
embedding: embedding,
|
||||
version: VERSION,
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -7,17 +7,24 @@ task "ai:embeddings:create_table" => [:environment] do
|
|||
SQL
|
||||
|
||||
DiscourseAi::Embeddings::Model.enabled_models.each do |model|
|
||||
DiscourseAi::Database::Connection.db.exec(<<~SQL)
|
||||
CREATE TABLE IF NOT EXISTS topic_embeddings_#{model.name.underscore} (
|
||||
topic_id bigint PRIMARY KEY,
|
||||
embedding vector(#{model.dimensions})
|
||||
);
|
||||
%w[topic post].each do |table|
|
||||
table_name = "#{table}_embeddings_#{model.name.underscore}"
|
||||
DiscourseAi::Database::Connection.db.exec(<<~SQL)
|
||||
CREATE TABLE IF NOT EXISTS #{table_name} (
|
||||
#{table}_id bigint PRIMARY KEY,
|
||||
embedding vector(#{model.dimensions}),
|
||||
version smallint
|
||||
)
|
||||
SQL
|
||||
DiscourseAi::Database::Connection.db.exec(<<~SQL)
|
||||
ALTER TABLE #{table_name} ADD COLUMN IF NOT EXISTS version smallint
|
||||
SQL
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
desc "Backfill embeddings for all topics"
|
||||
task "ai:embeddings:backfill", [:start_topic] => [:environment] do
|
||||
task "ai:embeddings:backfill", [:start_topic] => [:environment] do |_, args|
|
||||
public_categories = Category.where(read_restricted: false).pluck(:id)
|
||||
topic_embeddings = DiscourseAi::Embeddings::Topic.new
|
||||
Topic
|
||||
|
|
Loading…
Reference in New Issue