mirror of
				https://github.com/discourse/discourse-ai.git
				synced 2025-10-31 06:28:48 +00:00 
			
		
		
		
	* FEATURE: HyDE-powered semantic search. It relies on the new outlet added on discourse/discourse#23390 to display semantic search results in an unobtrusive way. We'll use a HyDE-backed approach for semantic search, which consists on generating an hypothetical document from a given keywords, which gets transformed into a vector and used in a asymmetric similarity topic search. This PR also reorganizes the internals to have less moving parts, maintaining one hierarchy of DAOish classes for vector-related operations like transformations and querying. Completions and vectors created by HyDE will remain cached on Redis for now, but we could later use Postgres instead. * Missing translation and rate limiting --------- Co-authored-by: Roman Rizzi <rizziromanalejandro@gmail.com>
		
			
				
	
	
		
			73 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
			
		
		
	
	
			73 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
| # frozen_string_literal: true
 | |
| 
 | |
| module DiscourseAi
 | |
|   module Embeddings
 | |
|     class SemanticRelated
 | |
|       MissingEmbeddingError = Class.new(StandardError)
 | |
| 
 | |
|       def self.clear_cache_for(topic)
 | |
|         Discourse.cache.delete("semantic-suggested-topic-#{topic.id}")
 | |
|         Discourse.redis.del("build-semantic-suggested-topic-#{topic.id}")
 | |
|       end
 | |
| 
 | |
|       def related_topic_ids_for(topic)
 | |
|         return [] if SiteSetting.ai_embeddings_semantic_related_topics < 1
 | |
| 
 | |
|         strategy = DiscourseAi::Embeddings::Strategies::Truncation.new
 | |
|         vector_rep =
 | |
|           DiscourseAi::Embeddings::VectorRepresentations::Base.current_representation(strategy)
 | |
|         cache_for = results_ttl(topic)
 | |
| 
 | |
|         asd =
 | |
|           Discourse
 | |
|             .cache
 | |
|             .fetch(semantic_suggested_key(topic.id), expires_in: cache_for) do
 | |
|               vector_rep
 | |
|                 .symmetric_topics_similarity_search(topic)
 | |
|                 .tap do |candidate_ids|
 | |
|                   # Happens when the topic doesn't have any embeddings
 | |
|                   # I'd rather not use Exceptions to control the flow, so this should be refactored soon
 | |
|                   if candidate_ids.empty? || !candidate_ids.include?(topic.id)
 | |
|                     raise MissingEmbeddingError, "No embeddings found for topic #{topic.id}"
 | |
|                   end
 | |
|                 end
 | |
|             end
 | |
|       rescue MissingEmbeddingError
 | |
|         # avoid a flood of jobs when visiting topic
 | |
|         if Discourse.redis.set(
 | |
|              build_semantic_suggested_key(topic.id),
 | |
|              "queued",
 | |
|              ex: 15.minutes.to_i,
 | |
|              nx: true,
 | |
|            )
 | |
|           Jobs.enqueue(:generate_embeddings, topic_id: topic.id)
 | |
|         end
 | |
|         []
 | |
|       end
 | |
| 
 | |
|       def results_ttl(topic)
 | |
|         case topic.created_at
 | |
|         when 6.hour.ago..Time.now
 | |
|           15.minutes
 | |
|         when 3.day.ago..6.hour.ago
 | |
|           1.hour
 | |
|         when 15.days.ago..3.day.ago
 | |
|           12.hours
 | |
|         else
 | |
|           1.week
 | |
|         end
 | |
|       end
 | |
| 
 | |
|       private
 | |
| 
 | |
|       def semantic_suggested_key(topic_id)
 | |
|         "semantic-suggested-topic-#{topic_id}"
 | |
|       end
 | |
| 
 | |
|       def build_semantic_suggested_key(topic_id)
 | |
|         "build-semantic-suggested-topic-#{topic_id}"
 | |
|       end
 | |
|     end
 | |
|   end
 | |
| end
 |