DEV: Improve embedding configs validations (#1101)

Before this change, we let you set the embeddings selected model back to " " even with embeddings enabled. This will leave the site in a broken state.

Additionally, it adds a fail-safe for these scenarios to avoid errors on the topics page.
This commit is contained in:
Roman Rizzi 2025-01-30 14:16:56 -03:00 committed by GitHub
parent 8f0756fbca
commit 1572068735
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 19 additions and 8 deletions

View File

@ -434,6 +434,7 @@ en:
cannot_edit_builtin: "You can't edit a built-in model."
configuration:
disable_embeddings: "You have to disable 'ai embeddings enabled' first."
invalid_config: "You selected a invalid option."
choose_model: "Set 'ai embeddings selected model' first."
llm_models:

View File

@ -8,10 +8,19 @@ module DiscourseAi
end
def valid_value?(val)
val.blank? || EmbeddingDefinition.exists?(id: val)
if val.blank?
@module_enabled = SiteSetting.ai_embeddings_enabled
!@module_enabled
else
EmbeddingDefinition.exists?(id: val).tap { |def_exists| @invalid_option = !def_exists }
end
end
def error_message
return I18n.t("discourse_ai.embeddings.configuration.disable_embeddings") if @module_enabled
return I18n.t("discourse_ai.embeddings.configuration.invalid_config") if @invalid_option
""
end
end

View File

@ -10,6 +10,7 @@ module DiscourseAi
def related_topic_ids_for(topic)
return [] if SiteSetting.ai_embeddings_semantic_related_topics < 1
return [] if SiteSetting.ai_embeddings_selected_model.blank? # fail-safe in case something end up in a broken state.
cache_for = results_ttl(topic)

View File

@ -10,8 +10,8 @@ module DiscourseAi
"semantic-search-#{digest}-#{SiteSetting.ai_embeddings_semantic_search_hyde_model}"
Discourse.cache.delete(hyde_key)
Discourse.cache.delete("#{hyde_key}-#{SiteSetting.ai_embeddings_model}")
Discourse.cache.delete("-#{SiteSetting.ai_embeddings_model}")
Discourse.cache.delete("#{hyde_key}-#{SiteSetting.ai_embeddings_selected_model}")
Discourse.cache.delete("-#{SiteSetting.ai_embeddings_selected_model}")
end
def initialize(guardian)
@ -24,7 +24,7 @@ module DiscourseAi
build_embedding_key(
digest,
SiteSetting.ai_embeddings_semantic_search_hyde_model,
SiteSetting.ai_embeddings_model,
SiteSetting.ai_embeddings_selected_model,
)
Discourse.cache.read(embedding_key).present?
@ -42,7 +42,7 @@ module DiscourseAi
build_embedding_key(
digest,
SiteSetting.ai_embeddings_semantic_search_hyde_model,
SiteSetting.ai_embeddings_model,
SiteSetting.ai_embeddings_selected_model,
)
hypothetical_post =
@ -57,7 +57,7 @@ module DiscourseAi
def embedding(search_term)
digest = OpenSSL::Digest::SHA1.hexdigest(search_term)
embedding_key = build_embedding_key(digest, "", SiteSetting.ai_embeddings_model)
embedding_key = build_embedding_key(digest, "", SiteSetting.ai_embeddings_selected_model)
Discourse.cache.fetch(embedding_key, expires_in: 1.week) { vector.vector_from(search_term) }
end
@ -120,7 +120,7 @@ module DiscourseAi
build_embedding_key(
digest,
SiteSetting.ai_embeddings_semantic_search_hyde_model,
SiteSetting.ai_embeddings_model,
SiteSetting.ai_embeddings_selected_model,
)
search_term_embedding =

View File

@ -97,7 +97,7 @@ RSpec.describe RagDocumentFragment do
vector.generate_representation_from(rag_document_fragment_1)
end
it "regenerates all embeddings if ai_embeddings_model changes" do
it "regenerates all embeddings if ai_embeddings_selected_model changes" do
old_id = rag_document_fragment_1.id
UploadReference.create!(upload_id: upload_1.id, target: persona)