DEV: Embedding tables' model_id has to be a bigint (#1058)
* DEV: Embedding tables' model_id has to be a bigint * Drop old search_bit indexes * copy rag fragment embeddings created during deploy window
This commit is contained in:
parent
d07cf51653
commit
65bbcd71fc
|
@ -0,0 +1,85 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class NewEmbeddingsTables < ActiveRecord::Migration[7.2]
|
||||
def up
|
||||
create_table :ai_topics_embeddings, id: false do |t|
|
||||
t.bigint :topic_id, null: false
|
||||
t.bigint :model_id, null: false
|
||||
t.integer :model_version, null: false
|
||||
t.integer :strategy_id, null: false
|
||||
t.integer :strategy_version, null: false
|
||||
t.text :digest, null: false
|
||||
t.column :embeddings, "halfvec", null: false
|
||||
t.timestamps
|
||||
|
||||
t.index %i[model_id strategy_id topic_id],
|
||||
unique: true,
|
||||
name: "index_ai_topics_embeddings_on_model_strategy_topic"
|
||||
end
|
||||
|
||||
create_table :ai_posts_embeddings, id: false do |t|
|
||||
t.bigint :post_id, null: false
|
||||
t.bigint :model_id, null: false
|
||||
t.integer :model_version, null: false
|
||||
t.integer :strategy_id, null: false
|
||||
t.integer :strategy_version, null: false
|
||||
t.text :digest, null: false
|
||||
t.column :embeddings, "halfvec", null: false
|
||||
t.timestamps
|
||||
|
||||
t.index %i[model_id strategy_id post_id],
|
||||
unique: true,
|
||||
name: "index_ai_posts_embeddings_on_model_strategy_post"
|
||||
end
|
||||
|
||||
create_table :ai_document_fragments_embeddings, id: false do |t|
|
||||
t.bigint :rag_document_fragment_id, null: false
|
||||
t.bigint :model_id, null: false
|
||||
t.integer :model_version, null: false
|
||||
t.integer :strategy_id, null: false
|
||||
t.integer :strategy_version, null: false
|
||||
t.text :digest, null: false
|
||||
t.column :embeddings, "halfvec", null: false
|
||||
t.timestamps
|
||||
|
||||
t.index %i[model_id strategy_id rag_document_fragment_id],
|
||||
unique: true,
|
||||
name: "index_ai_fragments_embeddings_on_model_strategy_fragment"
|
||||
end
|
||||
|
||||
# Copied from 20241008054440_create_binary_indexes_for_embeddings
|
||||
%w[topics posts document_fragments].each do |type|
|
||||
# our supported embeddings models IDs and dimensions
|
||||
[
|
||||
[1, 768],
|
||||
[2, 1536],
|
||||
[3, 1024],
|
||||
[4, 1024],
|
||||
[5, 768],
|
||||
[6, 1536],
|
||||
[7, 2000],
|
||||
[8, 1024],
|
||||
].each { |model_id, dimensions| execute <<-SQL }
|
||||
CREATE INDEX ai_#{type}_embeddings_#{model_id}_1_search_bit ON ai_#{type}_embeddings
|
||||
USING hnsw ((binary_quantize(embeddings)::bit(#{dimensions})) bit_hamming_ops)
|
||||
WHERE model_id = #{model_id} AND strategy_id = 1;
|
||||
SQL
|
||||
end
|
||||
|
||||
# Copy data from old tables to new tables
|
||||
execute <<-SQL
|
||||
INSERT INTO ai_topics_embeddings (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
|
||||
SELECT * FROM ai_topic_embeddings;
|
||||
|
||||
INSERT INTO ai_posts_embeddings (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
|
||||
SELECT * FROM ai_post_embeddings;
|
||||
|
||||
INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
|
||||
SELECT * FROM ai_document_fragment_embeddings;
|
||||
SQL
|
||||
end
|
||||
|
||||
def down
|
||||
raise ActiveRecord::IrreversibleMigration
|
||||
end
|
||||
end
|
|
@ -0,0 +1,51 @@
|
|||
# frozen_string_literal: true
|
||||
class DropOldEmbeddingTables < ActiveRecord::Migration[7.2]
|
||||
def up
|
||||
# Copy rag embeddings created during deploy.
|
||||
execute <<~SQL
|
||||
INSERT INTO ai_document_fragments_embeddings (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
|
||||
(
|
||||
SELECT ai_document_fragment_embeddings.*
|
||||
FROM ai_document_fragment_embeddings
|
||||
LEFT OUTER JOIN ai_document_fragments_embeddings ON ai_document_fragment_embeddings.rag_document_fragment_id = ai_document_fragments_embeddings.rag_document_fragment_id
|
||||
WHERE ai_document_fragments_embeddings.rag_document_fragment_id IS NULL
|
||||
)
|
||||
SQL
|
||||
|
||||
execute <<~SQL
|
||||
DROP INDEX IF EXISTS ai_topic_embeddings_1_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_topic_embeddings_2_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_topic_embeddings_3_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_topic_embeddings_4_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_topic_embeddings_5_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_topic_embeddings_6_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_topic_embeddings_7_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_topic_embeddings_8_1_search_bit;
|
||||
|
||||
DROP INDEX IF EXISTS ai_post_embeddings_1_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_post_embeddings_2_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_post_embeddings_3_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_post_embeddings_4_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_post_embeddings_5_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_post_embeddings_6_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_post_embeddings_7_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_post_embeddings_8_1_search_bit;
|
||||
|
||||
DROP INDEX IF EXISTS ai_document_fragment_embeddings_1_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_document_fragment_embeddings_2_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_document_fragment_embeddings_3_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_document_fragment_embeddings_4_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_document_fragment_embeddings_5_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_document_fragment_embeddings_6_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_document_fragment_embeddings_7_1_search_bit;
|
||||
DROP INDEX IF EXISTS ai_document_fragment_embeddings_8_1_search_bit;
|
||||
SQL
|
||||
|
||||
drop_table :ai_topic_embeddings
|
||||
drop_table :ai_post_embeddings
|
||||
drop_table :ai_document_fragment_embeddings
|
||||
end
|
||||
|
||||
def down
|
||||
end
|
||||
end
|
|
@ -8,9 +8,9 @@
|
|||
module DiscourseAi
|
||||
module Embeddings
|
||||
class Schema
|
||||
TOPICS_TABLE = "ai_topic_embeddings"
|
||||
POSTS_TABLE = "ai_post_embeddings"
|
||||
RAG_DOCS_TABLE = "ai_document_fragment_embeddings"
|
||||
TOPICS_TABLE = "ai_topics_embeddings"
|
||||
POSTS_TABLE = "ai_posts_embeddings"
|
||||
RAG_DOCS_TABLE = "ai_document_fragments_embeddings"
|
||||
|
||||
def self.for(
|
||||
target_klass,
|
||||
|
|
Loading…
Reference in New Issue