2023-09-05 10:08:23 -04:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module DiscourseAi
|
|
|
|
module Embeddings
|
|
|
|
module VectorRepresentations
|
|
|
|
class Base
|
2024-02-01 14:54:09 -05:00
|
|
|
class << self
|
|
|
|
def find_representation(model_name)
|
|
|
|
# we are explicit here cause the loader may have not
|
|
|
|
# loaded the subclasses yet
|
|
|
|
[
|
|
|
|
DiscourseAi::Embeddings::VectorRepresentations::AllMpnetBaseV2,
|
|
|
|
DiscourseAi::Embeddings::VectorRepresentations::BgeLargeEn,
|
2024-04-10 16:24:01 -04:00
|
|
|
DiscourseAi::Embeddings::VectorRepresentations::BgeM3,
|
2024-02-01 14:54:09 -05:00
|
|
|
DiscourseAi::Embeddings::VectorRepresentations::Gemini,
|
|
|
|
DiscourseAi::Embeddings::VectorRepresentations::MultilingualE5Large,
|
|
|
|
DiscourseAi::Embeddings::VectorRepresentations::TextEmbedding3Large,
|
2024-04-10 16:24:01 -04:00
|
|
|
DiscourseAi::Embeddings::VectorRepresentations::TextEmbedding3Small,
|
|
|
|
DiscourseAi::Embeddings::VectorRepresentations::TextEmbeddingAda002,
|
2024-02-01 14:54:09 -05:00
|
|
|
].find { _1.name == model_name }
|
|
|
|
end
|
|
|
|
|
|
|
|
def current_representation(strategy)
|
|
|
|
find_representation(SiteSetting.ai_embeddings_model).new(strategy)
|
|
|
|
end
|
|
|
|
|
|
|
|
def correctly_configured?
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def dependant_setting_names
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def configuration_hint
|
|
|
|
settings = dependant_setting_names
|
|
|
|
I18n.t(
|
|
|
|
"discourse_ai.embeddings.configuration.hint",
|
|
|
|
settings: settings.join(", "),
|
|
|
|
count: settings.length,
|
|
|
|
)
|
|
|
|
end
|
2023-09-05 10:08:23 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def initialize(strategy)
|
|
|
|
@strategy = strategy
|
|
|
|
end
|
|
|
|
|
2024-03-08 11:02:50 -05:00
|
|
|
def vector_from(text, asymetric: false)
|
2023-09-05 10:08:23 -04:00
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
2024-11-26 12:12:32 -05:00
|
|
|
def gen_bulk_reprensentations(relation)
|
|
|
|
http_pool_size = 100
|
|
|
|
pool =
|
|
|
|
Concurrent::CachedThreadPool.new(
|
|
|
|
min_threads: 0,
|
|
|
|
max_threads: http_pool_size,
|
|
|
|
idletime: 30,
|
|
|
|
)
|
|
|
|
|
|
|
|
embedding_gen = inference_client
|
|
|
|
promised_embeddings =
|
2024-11-26 13:54:20 -05:00
|
|
|
relation
|
|
|
|
.map do |record|
|
|
|
|
prepared_text = prepare_text(record)
|
|
|
|
next if prepared_text.blank?
|
2024-11-26 12:12:32 -05:00
|
|
|
|
2024-12-04 15:47:28 -05:00
|
|
|
new_digest = OpenSSL::Digest::SHA1.hexdigest(prepared_text)
|
|
|
|
next if find_digest_of(record) == new_digest
|
|
|
|
|
2024-11-26 13:54:20 -05:00
|
|
|
Concurrent::Promises
|
2024-12-04 15:47:28 -05:00
|
|
|
.fulfilled_future(
|
|
|
|
{ target: record, text: prepared_text, digest: new_digest },
|
|
|
|
pool,
|
|
|
|
)
|
2024-11-26 13:54:20 -05:00
|
|
|
.then_on(pool) do |w_prepared_text|
|
2024-12-04 15:47:28 -05:00
|
|
|
w_prepared_text.merge(embedding: embedding_gen.perform!(w_prepared_text[:text]))
|
2024-11-26 13:54:20 -05:00
|
|
|
end
|
|
|
|
end
|
|
|
|
.compact
|
2024-11-26 12:12:32 -05:00
|
|
|
|
|
|
|
Concurrent::Promises
|
|
|
|
.zip(*promised_embeddings)
|
|
|
|
.value!
|
|
|
|
.each { |e| save_to_db(e[:target], e[:embedding], e[:digest]) }
|
2024-11-26 16:12:03 -05:00
|
|
|
|
|
|
|
pool.shutdown
|
|
|
|
pool.wait_for_termination
|
2024-11-26 12:12:32 -05:00
|
|
|
end
|
|
|
|
|
2023-12-29 10:28:45 -05:00
|
|
|
def generate_representation_from(target, persist: true)
|
2024-11-26 12:12:32 -05:00
|
|
|
text = prepare_text(target)
|
2023-12-29 12:59:08 -05:00
|
|
|
return if text.blank?
|
2023-09-05 10:08:23 -04:00
|
|
|
|
2023-10-26 11:07:37 -04:00
|
|
|
new_digest = OpenSSL::Digest::SHA1.hexdigest(text)
|
2024-12-04 15:47:28 -05:00
|
|
|
return if find_digest_of(target) == new_digest
|
2023-10-26 11:07:37 -04:00
|
|
|
|
|
|
|
vector = vector_from(text)
|
|
|
|
|
|
|
|
save_to_db(target, vector, new_digest) if persist
|
2023-09-05 10:08:23 -04:00
|
|
|
end
|
|
|
|
|
|
|
|
def topic_id_from_representation(raw_vector)
|
|
|
|
DB.query_single(<<~SQL, query_embedding: raw_vector).first
|
|
|
|
SELECT
|
|
|
|
topic_id
|
|
|
|
FROM
|
2023-12-29 10:28:45 -05:00
|
|
|
#{topic_table_name}
|
2024-08-08 10:55:20 -04:00
|
|
|
WHERE
|
|
|
|
model_id = #{id} AND
|
|
|
|
strategy_id = #{@strategy.id}
|
2023-12-29 10:28:45 -05:00
|
|
|
ORDER BY
|
2024-08-08 10:55:20 -04:00
|
|
|
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions})
|
2023-12-29 10:28:45 -05:00
|
|
|
LIMIT 1
|
|
|
|
SQL
|
|
|
|
end
|
|
|
|
|
|
|
|
def post_id_from_representation(raw_vector)
|
|
|
|
DB.query_single(<<~SQL, query_embedding: raw_vector).first
|
|
|
|
SELECT
|
|
|
|
post_id
|
|
|
|
FROM
|
|
|
|
#{post_table_name}
|
2024-08-08 10:55:20 -04:00
|
|
|
WHERE
|
|
|
|
model_id = #{id} AND
|
|
|
|
strategy_id = #{@strategy.id}
|
2023-09-05 10:08:23 -04:00
|
|
|
ORDER BY
|
2024-08-08 10:55:20 -04:00
|
|
|
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions})
|
2023-09-05 10:08:23 -04:00
|
|
|
LIMIT 1
|
|
|
|
SQL
|
|
|
|
end
|
|
|
|
|
2024-11-28 20:17:28 -05:00
|
|
|
def asymmetric_topics_similarity_search(
|
|
|
|
raw_vector,
|
|
|
|
limit:,
|
|
|
|
offset:,
|
|
|
|
return_distance: false,
|
|
|
|
exclude_category_ids: nil
|
|
|
|
)
|
|
|
|
builder = DB.build(<<~SQL)
|
2024-10-14 12:26:03 -04:00
|
|
|
WITH candidates AS (
|
|
|
|
SELECT
|
|
|
|
topic_id,
|
|
|
|
embeddings::halfvec(#{dimensions}) AS embeddings
|
|
|
|
FROM
|
|
|
|
#{topic_table_name}
|
2024-11-28 20:17:28 -05:00
|
|
|
/*join*/
|
|
|
|
/*where*/
|
2024-10-14 12:26:03 -04:00
|
|
|
ORDER BY
|
|
|
|
binary_quantize(embeddings)::bit(#{dimensions}) <~> binary_quantize('[:query_embedding]'::halfvec(#{dimensions}))
|
|
|
|
LIMIT :limit * 2
|
|
|
|
)
|
2023-09-05 10:08:23 -04:00
|
|
|
SELECT
|
|
|
|
topic_id,
|
2024-08-08 10:55:20 -04:00
|
|
|
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions}) AS distance
|
2023-09-05 10:08:23 -04:00
|
|
|
FROM
|
2024-10-14 12:26:03 -04:00
|
|
|
candidates
|
2023-09-05 10:08:23 -04:00
|
|
|
ORDER BY
|
2024-08-08 10:55:20 -04:00
|
|
|
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions})
|
2023-09-05 10:08:23 -04:00
|
|
|
LIMIT :limit
|
|
|
|
OFFSET :offset
|
|
|
|
SQL
|
|
|
|
|
2024-11-28 20:17:28 -05:00
|
|
|
builder.where(
|
|
|
|
"model_id = :model_id AND strategy_id = :strategy_id",
|
|
|
|
model_id: id,
|
|
|
|
strategy_id: @strategy.id,
|
|
|
|
)
|
|
|
|
|
|
|
|
if exclude_category_ids.present?
|
|
|
|
builder.join("topics t on t.id = topic_id")
|
|
|
|
builder.where(<<~SQL, exclude_category_ids: exclude_category_ids.map(&:to_i))
|
|
|
|
t.category_id NOT IN (:exclude_category_ids) AND
|
|
|
|
t.category_id NOT IN (SELECT categories.id FROM categories WHERE categories.parent_category_id IN (:exclude_category_ids))
|
|
|
|
SQL
|
|
|
|
end
|
|
|
|
|
|
|
|
results = builder.query(query_embedding: raw_vector, limit: limit, offset: offset)
|
|
|
|
|
2023-09-05 10:08:23 -04:00
|
|
|
if return_distance
|
|
|
|
results.map { |r| [r.topic_id, r.distance] }
|
|
|
|
else
|
|
|
|
results.map(&:topic_id)
|
|
|
|
end
|
|
|
|
rescue PG::Error => e
|
|
|
|
Rails.logger.error("Error #{e} querying embeddings for model #{name}")
|
|
|
|
raise MissingEmbeddingError
|
|
|
|
end
|
|
|
|
|
2024-03-08 11:02:50 -05:00
|
|
|
def asymmetric_posts_similarity_search(raw_vector, limit:, offset:, return_distance: false)
|
|
|
|
results = DB.query(<<~SQL, query_embedding: raw_vector, limit: limit, offset: offset)
|
2024-10-14 12:26:03 -04:00
|
|
|
WITH candidates AS (
|
|
|
|
SELECT
|
|
|
|
post_id,
|
|
|
|
embeddings::halfvec(#{dimensions}) AS embeddings
|
|
|
|
FROM
|
|
|
|
#{post_table_name}
|
|
|
|
WHERE
|
|
|
|
model_id = #{id} AND strategy_id = #{@strategy.id}
|
|
|
|
ORDER BY
|
|
|
|
binary_quantize(embeddings)::bit(#{dimensions}) <~> binary_quantize('[:query_embedding]'::halfvec(#{dimensions}))
|
|
|
|
LIMIT :limit * 2
|
|
|
|
)
|
2024-03-08 11:02:50 -05:00
|
|
|
SELECT
|
|
|
|
post_id,
|
2024-08-08 10:55:20 -04:00
|
|
|
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions}) AS distance
|
2024-03-08 11:02:50 -05:00
|
|
|
FROM
|
2024-10-14 12:26:03 -04:00
|
|
|
candidates
|
2024-03-08 11:02:50 -05:00
|
|
|
ORDER BY
|
2024-08-08 10:55:20 -04:00
|
|
|
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions})
|
2024-03-08 11:02:50 -05:00
|
|
|
LIMIT :limit
|
|
|
|
OFFSET :offset
|
|
|
|
SQL
|
|
|
|
|
|
|
|
if return_distance
|
|
|
|
results.map { |r| [r.post_id, r.distance] }
|
|
|
|
else
|
|
|
|
results.map(&:post_id)
|
|
|
|
end
|
|
|
|
rescue PG::Error => e
|
|
|
|
Rails.logger.error("Error #{e} querying embeddings for model #{name}")
|
|
|
|
raise MissingEmbeddingError
|
|
|
|
end
|
|
|
|
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
def asymmetric_rag_fragment_similarity_search(
|
|
|
|
raw_vector,
|
2024-09-15 18:17:17 -04:00
|
|
|
target_id:,
|
|
|
|
target_type:,
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
limit:,
|
|
|
|
offset:,
|
|
|
|
return_distance: false
|
|
|
|
)
|
2024-10-14 12:26:03 -04:00
|
|
|
# A too low limit exacerbates the the recall loss of binary quantization
|
|
|
|
binary_search_limit = [limit * 2, 100].max
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
results =
|
|
|
|
DB.query(
|
|
|
|
<<~SQL,
|
2024-10-14 12:26:03 -04:00
|
|
|
WITH candidates AS (
|
|
|
|
SELECT
|
|
|
|
rag_document_fragment_id,
|
|
|
|
embeddings::halfvec(#{dimensions}) AS embeddings
|
|
|
|
FROM
|
|
|
|
#{rag_fragments_table_name}
|
|
|
|
INNER JOIN
|
2024-10-25 01:01:25 -04:00
|
|
|
rag_document_fragments ON
|
|
|
|
rag_document_fragments.id = rag_document_fragment_id AND
|
|
|
|
rag_document_fragments.target_id = :target_id AND
|
|
|
|
rag_document_fragments.target_type = :target_type
|
2024-10-14 12:26:03 -04:00
|
|
|
WHERE
|
|
|
|
model_id = #{id} AND strategy_id = #{@strategy.id}
|
|
|
|
ORDER BY
|
|
|
|
binary_quantize(embeddings)::bit(#{dimensions}) <~> binary_quantize('[:query_embedding]'::halfvec(#{dimensions}))
|
|
|
|
LIMIT :binary_search_limit
|
|
|
|
)
|
|
|
|
SELECT
|
|
|
|
rag_document_fragment_id,
|
|
|
|
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions}) AS distance
|
|
|
|
FROM
|
|
|
|
candidates
|
|
|
|
ORDER BY
|
|
|
|
embeddings::halfvec(#{dimensions}) #{pg_function} '[:query_embedding]'::halfvec(#{dimensions})
|
|
|
|
LIMIT :limit
|
|
|
|
OFFSET :offset
|
|
|
|
SQL
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
query_embedding: raw_vector,
|
2024-09-15 18:17:17 -04:00
|
|
|
target_id: target_id,
|
|
|
|
target_type: target_type,
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
limit: limit,
|
|
|
|
offset: offset,
|
2024-10-14 12:26:03 -04:00
|
|
|
binary_search_limit: binary_search_limit,
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
)
|
|
|
|
|
|
|
|
if return_distance
|
|
|
|
results.map { |r| [r.rag_document_fragment_id, r.distance] }
|
|
|
|
else
|
|
|
|
results.map(&:rag_document_fragment_id)
|
|
|
|
end
|
|
|
|
rescue PG::Error => e
|
|
|
|
Rails.logger.error("Error #{e} querying embeddings for model #{name}")
|
|
|
|
raise MissingEmbeddingError
|
|
|
|
end
|
|
|
|
|
2023-09-05 10:08:23 -04:00
|
|
|
def symmetric_topics_similarity_search(topic)
|
|
|
|
DB.query(<<~SQL, topic_id: topic.id).map(&:topic_id)
|
2024-10-14 12:26:03 -04:00
|
|
|
WITH le_target AS (
|
|
|
|
SELECT
|
2023-09-05 10:08:23 -04:00
|
|
|
embeddings
|
|
|
|
FROM
|
2023-12-29 10:28:45 -05:00
|
|
|
#{topic_table_name}
|
2023-09-05 10:08:23 -04:00
|
|
|
WHERE
|
2024-08-08 10:55:20 -04:00
|
|
|
model_id = #{id} AND
|
|
|
|
strategy_id = #{@strategy.id} AND
|
2023-09-05 10:08:23 -04:00
|
|
|
topic_id = :topic_id
|
|
|
|
LIMIT 1
|
2024-10-14 12:26:03 -04:00
|
|
|
)
|
|
|
|
SELECT topic_id FROM (
|
|
|
|
SELECT
|
|
|
|
topic_id, embeddings
|
|
|
|
FROM
|
|
|
|
#{topic_table_name}
|
|
|
|
WHERE
|
|
|
|
model_id = #{id} AND
|
|
|
|
strategy_id = #{@strategy.id}
|
|
|
|
ORDER BY
|
|
|
|
binary_quantize(embeddings)::bit(#{dimensions}) <~> (
|
|
|
|
SELECT
|
|
|
|
binary_quantize(embeddings)::bit(#{dimensions})
|
|
|
|
FROM
|
|
|
|
le_target
|
|
|
|
LIMIT 1
|
|
|
|
)
|
|
|
|
LIMIT 200
|
|
|
|
) AS widenet
|
|
|
|
ORDER BY
|
|
|
|
embeddings::halfvec(#{dimensions}) #{pg_function} (
|
|
|
|
SELECT
|
|
|
|
embeddings::halfvec(#{dimensions})
|
|
|
|
FROM
|
|
|
|
le_target
|
|
|
|
LIMIT 1
|
|
|
|
)
|
|
|
|
LIMIT 100;
|
2023-09-05 10:08:23 -04:00
|
|
|
SQL
|
|
|
|
rescue PG::Error => e
|
|
|
|
Rails.logger.error(
|
|
|
|
"Error #{e} querying embeddings for topic #{topic.id} and model #{name}",
|
|
|
|
)
|
|
|
|
raise MissingEmbeddingError
|
|
|
|
end
|
|
|
|
|
2023-12-29 10:28:45 -05:00
|
|
|
def topic_table_name
|
2024-08-08 10:55:20 -04:00
|
|
|
"ai_topic_embeddings"
|
2023-09-05 10:08:23 -04:00
|
|
|
end
|
|
|
|
|
2023-12-29 10:28:45 -05:00
|
|
|
def post_table_name
|
2024-08-08 10:55:20 -04:00
|
|
|
"ai_post_embeddings"
|
2023-12-29 10:28:45 -05:00
|
|
|
end
|
|
|
|
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
def rag_fragments_table_name
|
2024-08-08 10:55:20 -04:00
|
|
|
"ai_document_fragment_embeddings"
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
end
|
|
|
|
|
2023-12-29 10:28:45 -05:00
|
|
|
def table_name(target)
|
|
|
|
case target
|
|
|
|
when Topic
|
|
|
|
topic_table_name
|
|
|
|
when Post
|
|
|
|
post_table_name
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
when RagDocumentFragment
|
|
|
|
rag_fragments_table_name
|
2023-12-29 10:28:45 -05:00
|
|
|
else
|
|
|
|
raise ArgumentError, "Invalid target type"
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def index_name(table_name)
|
2024-08-08 10:55:20 -04:00
|
|
|
"#{table_name}_#{id}_#{@strategy.id}_search"
|
2023-10-26 11:07:37 -04:00
|
|
|
end
|
|
|
|
|
2023-09-05 10:08:23 -04:00
|
|
|
def name
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def dimensions
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def max_sequence_length
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def id
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def pg_function
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def version
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
|
|
|
def tokenizer
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
2024-03-08 11:02:50 -05:00
|
|
|
def asymmetric_query_prefix
|
|
|
|
raise NotImplementedError
|
|
|
|
end
|
|
|
|
|
2023-09-05 10:08:23 -04:00
|
|
|
protected
|
|
|
|
|
2024-12-04 15:47:28 -05:00
|
|
|
def find_digest_of(target)
|
|
|
|
target_column =
|
|
|
|
case target
|
|
|
|
when Topic
|
|
|
|
"topic_id"
|
|
|
|
when Post
|
|
|
|
"post_id"
|
|
|
|
when RagDocumentFragment
|
|
|
|
"rag_document_fragment_id"
|
|
|
|
else
|
|
|
|
raise ArgumentError, "Invalid target type"
|
|
|
|
end
|
|
|
|
|
|
|
|
DB.query_single(<<~SQL, target_id: target.id).first
|
|
|
|
SELECT
|
|
|
|
digest
|
|
|
|
FROM
|
|
|
|
#{table_name(target)}
|
|
|
|
WHERE
|
|
|
|
model_id = #{id} AND
|
|
|
|
strategy_id = #{@strategy.id} AND
|
|
|
|
#{target_column} = :target_id
|
|
|
|
LIMIT 1
|
|
|
|
SQL
|
|
|
|
end
|
|
|
|
|
2023-09-05 10:08:23 -04:00
|
|
|
def save_to_db(target, vector, digest)
|
2023-12-29 10:28:45 -05:00
|
|
|
if target.is_a?(Topic)
|
|
|
|
DB.exec(
|
|
|
|
<<~SQL,
|
2024-08-08 10:55:20 -04:00
|
|
|
INSERT INTO #{topic_table_name} (topic_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
|
2024-08-30 00:37:55 -04:00
|
|
|
VALUES (:topic_id, :model_id, :model_version, :strategy_id, :strategy_version, :digest, '[:embeddings]', :now, :now)
|
2024-08-08 10:55:20 -04:00
|
|
|
ON CONFLICT (strategy_id, model_id, topic_id)
|
2023-12-29 10:28:45 -05:00
|
|
|
DO UPDATE SET
|
|
|
|
model_version = :model_version,
|
|
|
|
strategy_version = :strategy_version,
|
|
|
|
digest = :digest,
|
|
|
|
embeddings = '[:embeddings]',
|
2024-08-30 00:37:55 -04:00
|
|
|
updated_at = :now
|
2023-12-29 10:28:45 -05:00
|
|
|
SQL
|
|
|
|
topic_id: target.id,
|
2024-08-08 10:55:20 -04:00
|
|
|
model_id: id,
|
2023-12-29 10:28:45 -05:00
|
|
|
model_version: version,
|
2024-08-08 10:55:20 -04:00
|
|
|
strategy_id: @strategy.id,
|
2023-12-29 10:28:45 -05:00
|
|
|
strategy_version: @strategy.version,
|
|
|
|
digest: digest,
|
|
|
|
embeddings: vector,
|
2024-08-30 00:37:55 -04:00
|
|
|
now: Time.zone.now,
|
2023-12-29 10:28:45 -05:00
|
|
|
)
|
|
|
|
elsif target.is_a?(Post)
|
|
|
|
DB.exec(
|
|
|
|
<<~SQL,
|
2024-08-08 10:55:20 -04:00
|
|
|
INSERT INTO #{post_table_name} (post_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
|
2024-08-30 00:37:55 -04:00
|
|
|
VALUES (:post_id, :model_id, :model_version, :strategy_id, :strategy_version, :digest, '[:embeddings]', :now, :now)
|
2024-08-08 10:55:20 -04:00
|
|
|
ON CONFLICT (model_id, strategy_id, post_id)
|
2023-12-29 10:28:45 -05:00
|
|
|
DO UPDATE SET
|
|
|
|
model_version = :model_version,
|
|
|
|
strategy_version = :strategy_version,
|
|
|
|
digest = :digest,
|
|
|
|
embeddings = '[:embeddings]',
|
2024-08-30 00:37:55 -04:00
|
|
|
updated_at = :now
|
2023-12-29 10:28:45 -05:00
|
|
|
SQL
|
|
|
|
post_id: target.id,
|
2024-08-08 10:55:20 -04:00
|
|
|
model_id: id,
|
2023-12-29 10:28:45 -05:00
|
|
|
model_version: version,
|
2024-08-08 10:55:20 -04:00
|
|
|
strategy_id: @strategy.id,
|
2023-12-29 10:28:45 -05:00
|
|
|
strategy_version: @strategy.version,
|
|
|
|
digest: digest,
|
|
|
|
embeddings: vector,
|
2024-08-30 00:37:55 -04:00
|
|
|
now: Time.zone.now,
|
2023-12-29 10:28:45 -05:00
|
|
|
)
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
elsif target.is_a?(RagDocumentFragment)
|
|
|
|
DB.exec(
|
|
|
|
<<~SQL,
|
2024-08-08 10:55:20 -04:00
|
|
|
INSERT INTO #{rag_fragments_table_name} (rag_document_fragment_id, model_id, model_version, strategy_id, strategy_version, digest, embeddings, created_at, updated_at)
|
2024-08-30 00:37:55 -04:00
|
|
|
VALUES (:fragment_id, :model_id, :model_version, :strategy_id, :strategy_version, :digest, '[:embeddings]', :now, :now)
|
2024-08-08 10:55:20 -04:00
|
|
|
ON CONFLICT (model_id, strategy_id, rag_document_fragment_id)
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
DO UPDATE SET
|
|
|
|
model_version = :model_version,
|
|
|
|
strategy_version = :strategy_version,
|
|
|
|
digest = :digest,
|
|
|
|
embeddings = '[:embeddings]',
|
2024-08-30 00:37:55 -04:00
|
|
|
updated_at = :now
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
SQL
|
|
|
|
fragment_id: target.id,
|
2024-08-08 10:55:20 -04:00
|
|
|
model_id: id,
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
model_version: version,
|
2024-08-08 10:55:20 -04:00
|
|
|
strategy_id: @strategy.id,
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
strategy_version: @strategy.version,
|
|
|
|
digest: digest,
|
|
|
|
embeddings: vector,
|
2024-08-30 00:37:55 -04:00
|
|
|
now: Time.zone.now,
|
FEATURE: AI Bot RAG support. (#537)
This PR lets you associate uploads to an AI persona, which we'll split and generate embeddings from. When building the system prompt to get a bot reply, we'll do a similarity search followed by a re-ranking (if available). This will let us find the most relevant fragments from the body of knowledge you associated with the persona, resulting in better, more informed responses.
For now, we'll only allow plain-text files, but this will change in the future.
Commits:
* FEATURE: RAG embeddings for the AI Bot
This first commit introduces a UI where admins can upload text files, which we'll store, split into fragments,
and generate embeddings of. In a next commit, we'll use those to give the bot additional information during
conversations.
* Basic asymmetric similarity search to provide guidance in system prompt
* Fix tests and lint
* Apply reranker to fragments
* Uploads filter, css adjustments and file validations
* Add placeholder for rag fragments
* Update annotations
2024-04-01 12:43:34 -04:00
|
|
|
)
|
2023-12-29 10:28:45 -05:00
|
|
|
else
|
|
|
|
raise ArgumentError, "Invalid target type"
|
|
|
|
end
|
2023-09-05 10:08:23 -04:00
|
|
|
end
|
2024-01-10 17:23:07 -05:00
|
|
|
|
2024-11-25 11:12:43 -05:00
|
|
|
def inference_client
|
|
|
|
raise NotImplementedError
|
2024-01-10 17:23:07 -05:00
|
|
|
end
|
2024-11-26 12:12:32 -05:00
|
|
|
|
|
|
|
def prepare_text(record)
|
|
|
|
@strategy.prepare_text_from(record, tokenizer, max_sequence_length - 2)
|
|
|
|
end
|
2023-09-05 10:08:23 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|