mirror of
https://github.com/discourse/discourse-ai.git
synced 2025-07-08 23:32:45 +00:00
62 lines
1.7 KiB
Ruby
62 lines
1.7 KiB
Ruby
|
# frozen_string_literal: true
|
||
|
class CleanUnusedEmbeddingSearchIndexes < ActiveRecord::Migration[7.2]
|
||
|
def up
|
||
|
existing_definitions =
|
||
|
DB.query("SELECT id, dimensions FROM embedding_definitions WHERE id <= 8")
|
||
|
|
||
|
drop_statements =
|
||
|
(1..8)
|
||
|
.reduce([]) do |memo, model_id|
|
||
|
model = existing_definitions.find { |ed| ed&.id == model_id }
|
||
|
|
||
|
if model.blank? || !correctly_indexed?(model)
|
||
|
embedding_tables.each do |type|
|
||
|
memo << "DROP INDEX IF EXISTS ai_#{type}_embeddings_#{model_id}_1_search_bit;"
|
||
|
end
|
||
|
end
|
||
|
|
||
|
memo
|
||
|
end
|
||
|
.join("\n")
|
||
|
|
||
|
DB.exec(drop_statements) if drop_statements.present?
|
||
|
|
||
|
amend_statements =
|
||
|
(1..8)
|
||
|
.reduce([]) do |memo, model_id|
|
||
|
model = existing_definitions.find { |ed| ed&.id == model_id }
|
||
|
|
||
|
memo << amended_idxs(model) if model.present? && !correctly_indexed?(model)
|
||
|
|
||
|
memo
|
||
|
end
|
||
|
.join("\n")
|
||
|
|
||
|
DB.exec(amend_statements) if amend_statements.present?
|
||
|
end
|
||
|
|
||
|
def embedding_tables
|
||
|
%w[topics posts document_fragments]
|
||
|
end
|
||
|
|
||
|
def amended_idxs(model)
|
||
|
embedding_tables.map { |t| <<~SQL }.join("\n")
|
||
|
CREATE INDEX IF NOT EXISTS ai_#{t}_embeddings_#{model.id}_1_search_bit ON ai_#{t}_embeddings
|
||
|
USING hnsw ((binary_quantize(embeddings)::bit(#{model.dimensions})) bit_hamming_ops)
|
||
|
WHERE model_id = #{model.id} AND strategy_id = 1;
|
||
|
SQL
|
||
|
end
|
||
|
|
||
|
def correctly_indexed?(edef)
|
||
|
seeded_dimensions[edef.id] == edef.dimensions
|
||
|
end
|
||
|
|
||
|
def seeded_dimensions
|
||
|
{ 1 => 768, 2 => 1536, 3 => 1024, 4 => 1024, 5 => 768, 6 => 1536, 7 => 2000, 8 => 1024 }
|
||
|
end
|
||
|
|
||
|
def down
|
||
|
raise ActiveRecord::IrreversibleMigration
|
||
|
end
|
||
|
end
|