FIX: Update migrations with latest vector rep changes (#199)
This commit is contained in:
parent
ee734a340a
commit
175def1267
|
@ -2,26 +2,23 @@
|
|||
|
||||
class CreateAiTopicEmbeddingsTable < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
models = [
|
||||
DiscourseAi::Embeddings::Models::AllMpnetBaseV2,
|
||||
DiscourseAi::Embeddings::Models::TextEmbeddingAda002,
|
||||
]
|
||||
strategies = [DiscourseAi::Embeddings::Strategies::Truncation]
|
||||
truncation = DiscourseAi::Embeddings::Strategies::Truncation.new
|
||||
vector_reps =
|
||||
[
|
||||
DiscourseAi::Embeddings::VectorRepresentations::AllMpnetBaseV2,
|
||||
DiscourseAi::Embeddings::VectorRepresentations::TextEmbeddingAda002,
|
||||
].map { |k| k.new(truncation) }
|
||||
|
||||
models.each do |model|
|
||||
strategies.each do |strategy|
|
||||
table_name = "ai_topic_embeddings_#{model.id}_#{strategy.id}".to_sym
|
||||
vector_reps.each do |vector_rep|
|
||||
create_table vector_rep.table_name.to_sym, id: false do |t|
|
||||
t.integer :topic_id, null: false
|
||||
t.integer :model_version, null: false
|
||||
t.integer :strategy_version, null: false
|
||||
t.text :digest, null: false
|
||||
t.column :embeddings, "vector(#{vector_rep.dimensions})", null: false
|
||||
t.timestamps
|
||||
|
||||
create_table table_name, id: false do |t|
|
||||
t.integer :topic_id, null: false
|
||||
t.integer :model_version, null: false
|
||||
t.integer :strategy_version, null: false
|
||||
t.text :digest, null: false
|
||||
t.column :embeddings, "vector(#{model.dimensions})", null: false
|
||||
t.timestamps
|
||||
|
||||
t.index :topic_id, unique: true
|
||||
end
|
||||
t.index :topic_id, unique: true
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -5,54 +5,54 @@ class MigrateEmbeddingsFromDedicatedDatabase < ActiveRecord::Migration[7.0]
|
|||
return unless SiteSetting.ai_embeddings_enabled
|
||||
return unless SiteSetting.ai_embeddings_pg_connection_string.present?
|
||||
|
||||
models = [
|
||||
DiscourseAi::Embeddings::Models::AllMpnetBaseV2,
|
||||
DiscourseAi::Embeddings::Models::TextEmbeddingAda002,
|
||||
]
|
||||
strategies = [DiscourseAi::Embeddings::Strategies::Truncation]
|
||||
truncation = DiscourseAi::Embeddings::Strategies::Truncation.new
|
||||
|
||||
models.each do |model|
|
||||
strategies.each do |strategy|
|
||||
new_table_name = "ai_topic_embeddings_#{model.id}_#{strategy.id}"
|
||||
old_table_name = "topic_embeddings_#{model.name.underscore}"
|
||||
vector_reps =
|
||||
[
|
||||
DiscourseAi::Embeddings::VectorRepresentations::AllMpnetBaseV2,
|
||||
DiscourseAi::Embeddings::VectorRepresentations::TextEmbeddingAda002,
|
||||
].map { |k| k.new(truncation) }
|
||||
|
||||
begin
|
||||
row_count =
|
||||
DiscourseAi::Database::Connection
|
||||
.db
|
||||
.query_single("SELECT COUNT(*) FROM #{old_table_name}")
|
||||
.first
|
||||
vector_reps.each do |vector_rep|
|
||||
new_table_name = vector_rep.table_name
|
||||
old_table_name = "topic_embeddings_#{vector_rep.name.underscore}"
|
||||
|
||||
if row_count > 0
|
||||
puts "Migrating #{row_count} embeddings from #{old_table_name} to #{new_table_name}"
|
||||
begin
|
||||
row_count =
|
||||
DiscourseAi::Database::Connection
|
||||
.db
|
||||
.query_single("SELECT COUNT(*) FROM #{old_table_name}")
|
||||
.first
|
||||
|
||||
last_topic_id = 0
|
||||
if row_count > 0
|
||||
puts "Migrating #{row_count} embeddings from #{old_table_name} to #{new_table_name}"
|
||||
|
||||
loop do
|
||||
batch = DiscourseAi::Database::Connection.db.query(<<-SQL)
|
||||
last_topic_id = 0
|
||||
|
||||
loop do
|
||||
batch = DiscourseAi::Database::Connection.db.query(<<-SQL)
|
||||
SELECT topic_id, embedding
|
||||
FROM #{old_table_name}
|
||||
WHERE topic_id > #{last_topic_id}
|
||||
ORDER BY topic_id ASC
|
||||
LIMIT 50
|
||||
SQL
|
||||
break if batch.empty?
|
||||
break if batch.empty?
|
||||
|
||||
DB.exec(<<-SQL)
|
||||
DB.exec(<<-SQL)
|
||||
INSERT INTO #{new_table_name} (topic_id, model_version, strategy_version, digest, embeddings, created_at, updated_at)
|
||||
VALUES #{batch.map { |r| "(#{r.topic_id}, 0, 0, '', '#{r.embedding}', CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)" }.join(", ")}
|
||||
ON CONFLICT (topic_id)
|
||||
DO NOTHING
|
||||
SQL
|
||||
|
||||
last_topic_id = batch.last.topic_id
|
||||
end
|
||||
last_topic_id = batch.last.topic_id
|
||||
end
|
||||
rescue PG::Error => e
|
||||
Rails.logger.error(
|
||||
"Error #{e} migrating embeddings from #{old_table_name} to #{new_table_name}",
|
||||
)
|
||||
end
|
||||
rescue PG::Error => e
|
||||
Rails.logger.error(
|
||||
"Error #{e} migrating embeddings from #{old_table_name} to #{new_table_name}",
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -2,24 +2,18 @@
|
|||
|
||||
class CreateMultilingualTopicEmbeddingsTable < ActiveRecord::Migration[7.0]
|
||||
def change
|
||||
models = [DiscourseAi::Embeddings::Models::MultilingualE5Large]
|
||||
strategies = [DiscourseAi::Embeddings::Strategies::Truncation]
|
||||
truncation = DiscourseAi::Embeddings::Strategies::Truncation.new
|
||||
vector_rep = DiscourseAi::Embeddings::VectorRepresentations::MultilingualE5Large.new(truncation)
|
||||
|
||||
models.each do |model|
|
||||
strategies.each do |strategy|
|
||||
table_name = "ai_topic_embeddings_#{model.id}_#{strategy.id}".to_sym
|
||||
create_table vector_rep.table_name.to_sym, id: false do |t|
|
||||
t.integer :topic_id, null: false
|
||||
t.integer :model_version, null: false
|
||||
t.integer :strategy_version, null: false
|
||||
t.text :digest, null: false
|
||||
t.column :embeddings, "vector(#{vector_rep.dimensions})", null: false
|
||||
t.timestamps
|
||||
|
||||
create_table table_name, id: false do |t|
|
||||
t.integer :topic_id, null: false
|
||||
t.integer :model_version, null: false
|
||||
t.integer :strategy_version, null: false
|
||||
t.text :digest, null: false
|
||||
t.column :embeddings, "vector(#{model.dimensions})", null: false
|
||||
t.timestamps
|
||||
|
||||
t.index :topic_id, unique: true
|
||||
end
|
||||
end
|
||||
t.index :topic_id, unique: true
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue