2023-09-05 10:08:23 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module DiscourseAi
|
|
|
|
module Embeddings
|
|
|
|
module VectorRepresentations
|
|
|
|
class Base
|
2024-02-01 14:54:09 -05:00
|
|
|
class << self
|
|
|
|
def find_representation(model_name)
|
|
|
|
# we are explicit here cause the loader may have not
|
|
|
|
# loaded the subclasses yet
|
|
|
|
[
|
|
|
|
DiscourseAi::Embeddings::VectorRepresentations::AllMpnetBaseV2,
|
|
|
|
DiscourseAi::Embeddings::VectorRepresentations::BgeLargeEn,
|
2024-04-10 16:24:01 -04:00
|
|
|
DiscourseAi::Embeddings::VectorRepresentations::BgeM3,
|
2024-02-01 14:54:09 -05:00
|
|
|
DiscourseAi::Embeddings::VectorRepresentations::Gemini,
|
|
|
|
DiscourseAi::Embeddings::VectorRepresentations::MultilingualE5Large,
|
|
|
|
DiscourseAi::Embeddings::VectorRepresentations::TextEmbedding3Large,
|
2024-04-10 16:24:01 -04:00
|
|
|
DiscourseAi::Embeddings::VectorRepresentations::TextEmbedding3Small,
|
|
|
|
DiscourseAi::Embeddings::VectorRepresentations::TextEmbeddingAda002,
|
2024-02-01 14:54:09 -05:00
|
|
|
].find { _1.name == model_name }
|
|
|
|
end
|
|
|
|
|
|
|
|
def current_representation(strategy)
|
|
|
|
find_representation(SiteSetting.ai_embeddings_model).new(strategy)
|
|
|
|
end
|
|
|
|
|
|
|
|
def correctly_configured?
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def dependant_setting_names
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def configuration_hint
|
|
|
|
settings = dependant_setting_names
|
|
|
|
I18n.t(
|
|
|
|
"discourse_ai.embeddings.configuration.hint",
|
|
|
|
settings: settings.join(", "),
|
|
|
|
count: settings.length,
|
|
|
|
)
|
|
|
|
end
|
2023-09-05 10:08:23 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def initialize(strategy)
|
|
|
|
@strategy = strategy
|
|
|
|
end
|
|
|
|
|
2023-10-26 11:07:37 -04:00
|
|
|
def consider_indexing(memory: "100MB")
|
2023-12-29 10:28:45 -05:00
|
|
|
[topic_table_name, post_table_name].each do |table_name|
|
|
|
|
index_name = index_name(table_name)
|
|
|
|
# Using extension maintainer's recommendation for ivfflat indexes
|
|
|
|
# Results are not as good as without indexes, but it's much faster
|
|
|
|
# Disk usage is ~1x the size of the table, so this doubles table total size
|
2024-08-08 10:55:20 -04:00
|
|
|
count =
|
|
|
|
DB.query_single(
|
|
|
|
"SELECT count(*) FROM #{table_name} WHERE model_id = #{id} AND strategy_id = #{@strategy.id};",
|
|
|
|
).first
|
2023-12-29 10:28:45 -05:00
|
|
|
lists = [count < 1_000_000 ? count / 1000 : Math.sqrt(count).to_i, 10].max
|
|
|
|
probes = [count < 1_000_000 ? lists / 10 : Math.sqrt(lists).to_i, 1].max
|
2024-08-08 10:55:20 -04:00
|
|
|
Discourse.cache.write("#{table_name}-#{id}-#{@strategy.id}-probes", probes)
|
2023-10-26 11:07:37 -04:00
|
|
|
|
2023-12-29 10:28:45 -05:00
|
|
|
existing_index = DB.query_single(<<~SQL, index_name: index_name).first
|
|
|
|
SELECT
|
|
|
|
indexdef
|
|
|
|
FROM
|
|
|
|
pg_indexes
|
|
|
|
WHERE
|
|
|
|
indexname = :index_name
|
2024-01-24 13:57:26 -05:00
|
|
|
AND schemaname = 'public'
|
2023-12-29 10:28:45 -05:00
|
|
|
LIMIT 1
|
|
|
|
SQL
|
2023-10-26 11:07:37 -04:00
|
|
|
|
2023-12-29 10:28:45 -05:00
|
|
|
if !existing_index.present?
|
|
|
|
Rails.logger.info("Index #{index_name} does not exist, creating...")
|
|
|
|
return create_index!(table_name, memory, lists, probes)
|
2023-10-26 11:07:37 -04:00
|
|
|
end
|
|
|
|
|
2023-12-29 10:28:45 -05:00
|
|
|
existing_index_age =
|
|
|
|
DB
|
|
|
|
.query_single(
|
|
|
|
"SELECT pg_catalog.obj_description((:index_name)::regclass, 'pg_class');",
|
|
|
|
index_name: index_name,
|
|
|
|
)
|
|
|
|
.first
|
|
|
|
.to_i || 0
|
|
|
|
new_rows =
|
|
|
|
DB.query_single(
|
2024-08-08 10:55:20 -04:00
|
|
|
"SELECT count(*) FROM #{table_name} WHERE model_id = #{id} AND strategy_id = #{@strategy.id} AND created_at > '#{Time.at(existing_index_age)}';",
|
2023-12-29 10:28:45 -05:00
|
|
|
).first
|
|
|
|
existing_lists = existing_index.match(/lists='(\d+)'/)&.captures&.first&.to_i
|
|
|
|
|
2024-02-09 14:08:54 -05:00
|
|
|
if existing_index_age > 0 &&
|
|
|
|
existing_index_age <
|
|
|
|
(
|
|
|
|
if SiteSetting.ai_embeddings_semantic_related_topics_enabled
|
|
|
|
1.hour.ago.to_i
|
|
|
|
else
|
|
|
|
1.day.ago.to_i
|
|
|
|
end
|
|
|
|
)
|
2023-12-29 10:28:45 -05:00
|
|
|
if new_rows > 10_000
|
|
|
|
Rails.logger.info(
|
|
|
|
"Index #{index_name} is #{existing_index_age} seconds old, and there are #{new_rows} new rows, updating...",
|
|
|
|
)
|
|
|
|
return create_index!(table_name, memory, lists, probes)
|
|
|
|
elsif existing_lists != lists
|
|
|
|
Rails.logger.info(
|
|
|
|
"Index #{index_name} already exists, but lists is #{existing_lists} instead of #{lists}, updating...",
|
|
|
|
)
|
|
|
|
return create_index!(table_name, memory, lists, probes)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
Rails.logger.info(
|
|
|
|
"Index #{index_name} kept. #{Time.now.to_i - existing_index_age} seconds old, #{new_rows} new rows, #{existing_lists} lists, #{probes} probes.",
|
|
|
|
)
|
|
|
|
end
|
2023-10-26 11:07:37 -04:00
|
|
|
end
|
2023-09-05 10:08:23 -04:00
|
|
|
|
2023-12-29 10:28:45 -05:00
|
|
|
def create_index!(table_name, memory, lists, probes)
|
2024-01-05 07:56:28 -05:00
|
|
|
tries = 0
|
2023-12-29 10:28:45 -05:00
|
|
|
index_name = index_name(table_name)
|
2023-10-26 11:07:37 -04:00
|
|
|
DB.exec("SET work_mem TO '#{memory}';")
|
|
|
|
DB.exec("SET maintenance_work_mem TO '#{memory}';")
|
2024-01-05 07:56:28 -05:00
|
|
|
begin
|
|
|
|
DB.exec(<<~SQL)
|
|
|
|
DROP INDEX IF EXISTS #{index_name};
|
|
|
|
CREATE INDEX IF NOT EXISTS
|
|
|
|
#{index_name}
|
|
|
|
ON
|
|
|
|
#{table_name}
|
|
|
|
USING
|
2024-08-08 10:55:20 -04:00
|
|
|
ivfflat ((embeddings::halfvec(#{dimensions})) #{pg_index_type})
|
2024-01-05 07:56:28 -05:00
|
|
|
WITH
|
2024-08-08 10:55:20 -04:00
|
|
|
(lists = #{lists})
|
|
|
|
WHERE
|
|
|
|
model_id = #{id} AND strategy_id = #{@strategy.id};
|
2024-01-05 07:56:28 -05:00
|
|
|
SQL
|
|
|
|
rescue PG::ProgramLimitExceeded => e
|
|
|
|
parsed_error = e.message.match(/memory required is (\d+ [A-Z]{2}), ([a-z_]+)/)
|
|
|
|
if parsed_error[1].present? && parsed_error[2].present?
|
|
|
|
DB.exec("SET #{parsed_error[2]} TO '#{parsed_error[1].tr(" ", "")}';")
|
|
|
|
tries += 1
|
|
|
|
retry if tries < 3
|
|
|
|
else
|
|
|
|
raise e
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2023-10-26 11:07:37 -04:00
|
|
|
DB.exec("COMMENT ON INDEX #{index_name} IS '#{Time.now.to_i}';")
|
|
|
|
DB.exec("RESET work_mem;")
|
|
|
|
DB.exec("RESET maintenance_work_mem;")
|
2023-09-05 10:08:23 -04:00
|
|
|
end
|
|
|
|
|
2024-03-08 11:02:50 -05:00
|
|
|
def vector_from(text, asymetric: false)
|
2023-09-05 10:08:23 -04:00
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
2023-12-29 10:28:45 -05:00
|
|
|
def generate_representation_from(target, persist: true)
|
2023-09-05 10:08:23 -04:00
|
|
|
text = @strategy.prepare_text_from(target, tokenizer, max_sequence_length - 2)
|
2023-12-29 12:59:08 -05:00
|
|
|
return if text.blank?
|
2023-09-05 10:08:23 -04:00
|
|
|
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
target_column =
|
|
|
|
case target
|
|
|
|
when Topic
|
|
|
|
"topic_id"
|
|
|
|
when Post
|
|
|
|
"post_id"
|
|
|
|
when RagDocumentFragment
|
|
|
|
"rag_document_fragment_id"
|
|
|
|
else
|
|
|
|
raise ArgumentError, "Invalid target type"
|
|
|
|
end
|
|
|
|
|
2023-10-26 11:07:37 -04:00
|
|
|
new_digest = OpenSSL::Digest::SHA1.hexdigest(text)
|
2023-12-29 10:28:45 -05:00
|
|
|
current_digest = DB.query_single(<<~SQL, target_id: target.id).first
|
2023-10-26 11:07:37 -04:00
|
|
|
SELECT
|
|
|
|
digest
|
|
|
|
FROM
|
2023-12-29 10:28:45 -05:00
|
|
|
#{table_name(target)}
|
2023-10-26 11:07:37 -04:00
|
|
|
WHERE
|
2024-08-08 10:55:20 -04:00
|
|
|
model_id = #{id} AND
|
|
|
|
strategy_id = #{@strategy.id} AND
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
#{target_column} = :target_id
|
2023-10-26 11:07:37 -04:00
|
|
|
LIMIT 1
|
|
|
|
SQL
|
|
|
|
return if current_digest == new_digest
|
|
|
|
|
|
|
|
vector = vector_from(text)
|
|
|
|
|
|
|
|
save_to_db(target, vector, new_digest) if persist
|
2023-09-05 10:08:23 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def topic_id_from_representation(raw_vector)
|
|
|
|
DB.query_single(<<~SQL, query_embedding: raw_vector).first
|
|
|
|
SELECT
|
|
|
|
topic_id
|
|
|
|
FROM
|
2023-12-29 10:28:45 -05:00
|
|
|
#{topic_table_name}
|
2024-08-08 10:55:20 -04:00
|
|
|
WHERE
|
|
|
|
model_id = #{id} AND
|
|
|
|
strategy_id = #{@strategy.id}
|
2023-12-29 10:28:45 -05:00
|
|
|
ORDER BY
|
2024-08-08 10:55:20 -04:00
|
|
|
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions})
|
2023-12-29 10:28:45 -05:00
|
|
|
LIMIT 1
|
|
|
|
SQL
|
|
|
|
end
|
|
|
|
|
|
|
|
def post_id_from_representation(raw_vector)
|
|
|
|
DB.query_single(<<~SQL, query_embedding: raw_vector).first
|
|
|
|
SELECT
|
|
|
|
post_id
|
|
|
|
FROM
|
|
|
|
#{post_table_name}
|
2024-08-08 10:55:20 -04:00
|
|
|
WHERE
|
|
|
|
model_id = #{id} AND
|
|
|
|
strategy_id = #{@strategy.id}
|
2023-09-05 10:08:23 -04:00
|
|
|
ORDER BY
|
2024-08-08 10:55:20 -04:00
|
|
|
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions})
|
2023-09-05 10:08:23 -04:00
|
|
|
LIMIT 1
|
|
|
|
SQL
|
|
|
|
end
|
|
|
|
|
|
|
|
def asymmetric_topics_similarity_search(raw_vector, limit:, offset:, return_distance: false)
|
|
|
|
results = DB.query(<<~SQL, query_embedding: raw_vector, limit: limit, offset: offset)
|
2024-03-08 11:02:50 -05:00
|
|
|
#{probes_sql(topic_table_name)}
|
2023-09-05 10:08:23 -04:00
|
|
|
SELECT
|
|
|
|
topic_id,
|
2024-08-08 10:55:20 -04:00
|
|
|
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions}) AS distance
|
2023-09-05 10:08:23 -04:00
|
|
|
FROM
|
2023-12-29 10:28:45 -05:00
|
|
|
#{topic_table_name}
|
2024-08-08 10:55:20 -04:00
|
|
|
WHERE
|
|
|
|
model_id = #{id} AND strategy_id = #{@strategy.id}
|
2023-09-05 10:08:23 -04:00
|
|
|
ORDER BY
|
2024-08-08 10:55:20 -04:00
|
|
|
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions})
|
2023-09-05 10:08:23 -04:00
|
|
|
LIMIT :limit
|
|
|
|
OFFSET :offset
|
|
|
|
SQL
|
|
|
|
|
|
|
|
if return_distance
|
|
|
|
results.map { |r| [r.topic_id, r.distance] }
|
|
|
|
else
|
|
|
|
results.map(&:topic_id)
|
|
|
|
end
|
|
|
|
rescue PG::Error => e
|
|
|
|
Rails.logger.error("Error #{e} querying embeddings for model #{name}")
|
|
|
|
raise MissingEmbeddingError
|
|
|
|
end
|
|
|
|
|
2024-03-08 11:02:50 -05:00
|
|
|
def asymmetric_posts_similarity_search(raw_vector, limit:, offset:, return_distance: false)
|
|
|
|
results = DB.query(<<~SQL, query_embedding: raw_vector, limit: limit, offset: offset)
|
|
|
|
#{probes_sql(post_table_name)}
|
|
|
|
SELECT
|
|
|
|
post_id,
|
2024-08-08 10:55:20 -04:00
|
|
|
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions}) AS distance
|
2024-03-08 11:02:50 -05:00
|
|
|
FROM
|
|
|
|
#{post_table_name}
|
|
|
|
INNER JOIN
|
|
|
|
posts AS p ON p.id = post_id
|
|
|
|
INNER JOIN
|
|
|
|
topics AS t ON t.id = p.topic_id AND t.archetype = 'regular'
|
2024-08-08 10:55:20 -04:00
|
|
|
WHERE
|
|
|
|
model_id = #{id} AND strategy_id = #{@strategy.id}
|
2024-03-08 11:02:50 -05:00
|
|
|
ORDER BY
|
2024-08-08 10:55:20 -04:00
|
|
|
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions})
|
2024-03-08 11:02:50 -05:00
|
|
|
LIMIT :limit
|
|
|
|
OFFSET :offset
|
|
|
|
SQL
|
|
|
|
|
|
|
|
if return_distance
|
|
|
|
results.map { |r| [r.post_id, r.distance] }
|
|
|
|
else
|
|
|
|
results.map(&:post_id)
|
|
|
|
end
|
|
|
|
rescue PG::Error => e
|
|
|
|
Rails.logger.error("Error #{e} querying embeddings for model #{name}")
|
|
|
|
raise MissingEmbeddingError
|
|
|
|
end
|
|
|
|
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
def asymmetric_rag_fragment_similarity_search(
|
|
|
|
raw_vector,
|
2024-09-15 18:17:17 -04:00
|
|
|
target_id:,
|
|
|
|
target_type:,
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
limit:,
|
|
|
|
offset:,
|
|
|
|
return_distance: false
|
|
|
|
)
|
|
|
|
results =
|
|
|
|
DB.query(
|
|
|
|
<<~SQL,
|
|
|
|
#{probes_sql(post_table_name)}
|
|
|
|
SELECT
|
|
|
|
rag_document_fragment_id,
|
2024-08-08 10:55:20 -04:00
|
|
|
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions}) AS distance
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
FROM
|
|
|
|
#{rag_fragments_table_name}
|
|
|
|
INNER JOIN
|
|
|
|
rag_document_fragments AS rdf ON rdf.id = rag_document_fragment_id
|
|
|
|
WHERE
|
2024-08-08 10:55:20 -04:00
|
|
|
model_id = #{id} AND
|
|
|
|
strategy_id = #{@strategy.id} AND
|
2024-09-15 18:17:17 -04:00
|
|
|
rdf.target_id = :target_id AND
|
|
|
|
rdf.target_type = :target_type
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
ORDER BY
|
2024-08-08 10:55:20 -04:00
|
|
|
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions})
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
LIMIT :limit
|
|
|
|
OFFSET :offset
|
|
|
|
SQL
|
|
|
|
query_embedding: raw_vector,
|
2024-09-15 18:17:17 -04:00
|
|
|
target_id: target_id,
|
|
|
|
target_type: target_type,
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
limit: limit,
|
|
|
|
offset: offset,
|
|
|
|
)
|
|
|
|
|
|
|
|
if return_distance
|
|
|
|
results.map { |r| [r.rag_document_fragment_id, r.distance] }
|
|
|
|
else
|
|
|
|
results.map(&:rag_document_fragment_id)
|
|
|
|
end
|
|
|
|
rescue PG::Error => e
|
|
|
|
Rails.logger.error("Error #{e} querying embeddings for model #{name}")
|
|
|
|
raise MissingEmbeddingError
|
|
|
|
end
|
|
|
|
|
2023-09-05 10:08:23 -04:00
|
|
|
def symmetric_topics_similarity_search(topic)
|
|
|
|
DB.query(<<~SQL, topic_id: topic.id).map(&:topic_id)
|
2024-03-08 11:02:50 -05:00
|
|
|
#{probes_sql(topic_table_name)}
|
2023-09-05 10:08:23 -04:00
|
|
|
SELECT
|
|
|
|
topic_id
|
|
|
|
FROM
|
2023-12-29 10:28:45 -05:00
|
|
|
#{topic_table_name}
|
2024-08-08 10:55:20 -04:00
|
|
|
WHERE
|
|
|
|
model_id = #{id} AND
|
|
|
|
strategy_id = #{@strategy.id}
|
2023-09-05 10:08:23 -04:00
|
|
|
ORDER BY
|
2024-08-08 10:55:20 -04:00
|
|
|
embeddings::halfvec(#{dimensions}) #{pg_function} (
|
2023-09-05 10:08:23 -04:00
|
|
|
SELECT
|
|
|
|
embeddings
|
|
|
|
FROM
|
2023-12-29 10:28:45 -05:00
|
|
|
#{topic_table_name}
|
2023-09-05 10:08:23 -04:00
|
|
|
WHERE
|
2024-08-08 10:55:20 -04:00
|
|
|
model_id = #{id} AND
|
|
|
|
strategy_id = #{@strategy.id} AND
|
2023-09-05 10:08:23 -04:00
|
|
|
topic_id = :topic_id
|
|
|
|
LIMIT 1
|
2024-08-08 10:55:20 -04:00
|
|
|
)::halfvec(#{dimensions})
|
2023-09-05 10:08:23 -04:00
|
|
|
LIMIT 100
|
|
|
|
SQL
|
|
|
|
rescue PG::Error => e
|
|
|
|
Rails.logger.error(
|
|
|
|
"Error #{e} querying embeddings for topic #{topic.id} and model #{name}",
|
|
|
|
)
|
|
|
|
raise MissingEmbeddingError
|
|
|
|
end
|
|
|
|
|
2023-12-29 10:28:45 -05:00
|
|
|
def topic_table_name
|
2024-08-08 10:55:20 -04:00
|
|
|
"ai_topic_embeddings"
|
2023-09-05 10:08:23 -04:00
|
|
|
end
|
|
|
|
|
2023-12-29 10:28:45 -05:00
|
|
|
def post_table_name
|
2024-08-08 10:55:20 -04:00
|
|
|
"ai_post_embeddings"
|
2023-12-29 10:28:45 -05:00
|
|
|
end
|
|
|
|
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
def rag_fragments_table_name
|
2024-08-08 10:55:20 -04:00
|
|
|
"ai_document_fragment_embeddings"
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
end
|
|
|
|
|
2023-12-29 10:28:45 -05:00
|
|
|
def table_name(target)
|
|
|
|
case target
|
|
|
|
when Topic
|
|
|
|
topic_table_name
|
|
|
|
when Post
|
|
|
|
post_table_name
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
when RagDocumentFragment
|
|
|
|
rag_fragments_table_name
|
2023-12-29 10:28:45 -05:00
|
|
|
else
|
|
|
|
raise ArgumentError, "Invalid target type"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def index_name(table_name)
|
2024-08-08 10:55:20 -04:00
|
|
|
"#{table_name}_#{id}_#{@strategy.id}_search"
|
2023-10-26 11:07:37 -04:00
|
|
|
end
|
|
|
|
|
2024-03-08 11:02:50 -05:00
|
|
|
def probes_sql(table_name)
|
2024-08-08 10:55:20 -04:00
|
|
|
probes = Discourse.cache.read("#{table_name}-#{id}-#{@strategy.id}-probes")
|
2024-03-08 11:02:50 -05:00
|
|
|
probes.present? ? "SET LOCAL ivfflat.probes TO #{probes};" : ""
|
|
|
|
end
|
|
|
|
|
2023-09-05 10:08:23 -04:00
|
|
|
def name
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def dimensions
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def max_sequence_length
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def id
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def pg_function
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def version
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def tokenizer
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
2024-03-08 11:02:50 -05:00
|
|
|
def asymmetric_query_prefix
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
2023-09-05 10:08:23 -04:00
|
|
|
protected
|
|
|
|
|
|
|
|
def save_to_db(target, vector, digest)
|
2023-12-29 10:28:45 -05:00
|
|
|
if target.is_a?(Topic)
|
|
|
|
DB.exec(
|
|
|
|
<<~SQL,
|
2024-08-08 10:55:20 -04:00
|
|
|
INSERT INTO #{topic_table_name} (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
|
2024-08-30 00:37:55 -04:00
|
|
|
VALUES (:topic_id, :model_id, :model_version, :strategy_id, :strategy_version, :digest, '[:embeddings]', :now, :now)
|
2024-08-08 10:55:20 -04:00
|
|
|
ON CONFLICT (strategy_id, model_id, topic_id)
|
2023-12-29 10:28:45 -05:00
|
|
|
DO UPDATE SET
|
|
|
|
model_version = :model_version,
|
|
|
|
strategy_version = :strategy_version,
|
|
|
|
digest = :digest,
|
|
|
|
embeddings = '[:embeddings]',
|
2024-08-30 00:37:55 -04:00
|
|
|
updated_at = :now
|
2023-12-29 10:28:45 -05:00
|
|
|
SQL
|
|
|
|
topic_id: target.id,
|
2024-08-08 10:55:20 -04:00
|
|
|
model_id: id,
|
2023-12-29 10:28:45 -05:00
|
|
|
model_version: version,
|
2024-08-08 10:55:20 -04:00
|
|
|
strategy_id: @strategy.id,
|
2023-12-29 10:28:45 -05:00
|
|
|
strategy_version: @strategy.version,
|
|
|
|
digest: digest,
|
|
|
|
embeddings: vector,
|
2024-08-30 00:37:55 -04:00
|
|
|
now: Time.zone.now,
|
2023-12-29 10:28:45 -05:00
|
|
|
)
|
|
|
|
elsif target.is_a?(Post)
|
|
|
|
DB.exec(
|
|
|
|
<<~SQL,
|
2024-08-08 10:55:20 -04:00
|
|
|
INSERT INTO #{post_table_name} (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
|
2024-08-30 00:37:55 -04:00
|
|
|
VALUES (:post_id, :model_id, :model_version, :strategy_id, :strategy_version, :digest, '[:embeddings]', :now, :now)
|
2024-08-08 10:55:20 -04:00
|
|
|
ON CONFLICT (model_id, strategy_id, post_id)
|
2023-12-29 10:28:45 -05:00
|
|
|
DO UPDATE SET
|
|
|
|
model_version = :model_version,
|
|
|
|
strategy_version = :strategy_version,
|
|
|
|
digest = :digest,
|
|
|
|
embeddings = '[:embeddings]',
|
2024-08-30 00:37:55 -04:00
|
|
|
updated_at = :now
|
2023-12-29 10:28:45 -05:00
|
|
|
SQL
|
|
|
|
post_id: target.id,
|
2024-08-08 10:55:20 -04:00
|
|
|
model_id: id,
|
2023-12-29 10:28:45 -05:00
|
|
|
model_version: version,
|
2024-08-08 10:55:20 -04:00
|
|
|
strategy_id: @strategy.id,
|
2023-12-29 10:28:45 -05:00
|
|
|
strategy_version: @strategy.version,
|
|
|
|
digest: digest,
|
|
|
|
embeddings: vector,
|
2024-08-30 00:37:55 -04:00
|
|
|
now: Time.zone.now,
|
2023-12-29 10:28:45 -05:00
|
|
|
)
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
elsif target.is_a?(RagDocumentFragment)
|
|
|
|
DB.exec(
|
|
|
|
<<~SQL,
|
2024-08-08 10:55:20 -04:00
|
|
|
INSERT INTO #{rag_fragments_table_name} (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
|
2024-08-30 00:37:55 -04:00
|
|
|
VALUES (:fragment_id, :model_id, :model_version, :strategy_id, :strategy_version, :digest, '[:embeddings]', :now, :now)
|
2024-08-08 10:55:20 -04:00
|
|
|
ON CONFLICT (model_id, strategy_id, rag_document_fragment_id)
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
DO UPDATE SET
|
|
|
|
model_version = :model_version,
|
|
|
|
strategy_version = :strategy_version,
|
|
|
|
digest = :digest,
|
|
|
|
embeddings = '[:embeddings]',
|
2024-08-30 00:37:55 -04:00
|
|
|
updated_at = :now
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
SQL
|
|
|
|
fragment_id: target.id,
|
2024-08-08 10:55:20 -04:00
|
|
|
model_id: id,
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
model_version: version,
|
2024-08-08 10:55:20 -04:00
|
|
|
strategy_id: @strategy.id,
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
strategy_version: @strategy.version,
|
|
|
|
digest: digest,
|
|
|
|
embeddings: vector,
|
2024-08-30 00:37:55 -04:00
|
|
|
now: Time.zone.now,
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
)
|
2023-12-29 10:28:45 -05:00
|
|
|
else
|
|
|
|
raise ArgumentError, "Invalid target type"
|
|
|
|
end
|
2023-09-05 10:08:23 -04:00
|
|
|
end
|
2024-01-10 17:23:07 -05:00
|
|
|
|
|
|
|
def discourse_embeddings_endpoint
|
|
|
|
if SiteSetting.ai_embeddings_discourse_service_api_endpoint_srv.present?
|
|
|
|
service =
|
|
|
|
DiscourseAi::Utils::DnsSrv.lookup(
|
|
|
|
SiteSetting.ai_embeddings_discourse_service_api_endpoint_srv,
|
|
|
|
)
|
|
|
|
"https://#{service.target}:#{service.port}"
|
|
|
|
else
|
|
|
|
SiteSetting.ai_embeddings_discourse_service_api_endpoint
|
|
|
|
end
|
|
|
|
end
|
2023-09-05 10:08:23 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|