mirror of
				https://github.com/discourse/discourse-ai.git
				synced 2025-10-24 19:18:39 +00:00 
			
		
		
		
	This allows to simply scope search results to specific domains and prepend arbitrary snippets to searches made
		
			
				
	
	
		
			109 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
			
		
		
	
	
			109 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			Ruby
		
	
	
	
	
	
| # frozen_string_literal: true
 | |
| 
 | |
| module DiscourseAi
 | |
|   module AiBot
 | |
|     module Tools
 | |
|       class Google < Tool
 | |
|         def self.signature
 | |
|           {
 | |
|             name: name,
 | |
|             description:
 | |
|               "Will search using Google - global internet search (supports all Google search operators)",
 | |
|             parameters: [
 | |
|               { name: "query", description: "The search query", type: "string", required: true },
 | |
|             ],
 | |
|           }
 | |
|         end
 | |
| 
 | |
|         def self.custom_system_message
 | |
|           "You were trained on OLD data, lean on search to get up to date information from the web"
 | |
|         end
 | |
| 
 | |
|         def self.name
 | |
|           "google"
 | |
|         end
 | |
| 
 | |
|         def self.accepted_options
 | |
|           [option(:base_query, type: :string)]
 | |
|         end
 | |
| 
 | |
|         def query
 | |
|           parameters[:query].to_s.strip
 | |
|         end
 | |
| 
 | |
|         def invoke
 | |
|           query = self.query
 | |
| 
 | |
|           yield(query)
 | |
| 
 | |
|           api_key = SiteSetting.ai_google_custom_search_api_key
 | |
|           cx = SiteSetting.ai_google_custom_search_cx
 | |
| 
 | |
|           query = "#{options[:base_query]} #{query}" if options[:base_query].present?
 | |
| 
 | |
|           escaped_query = CGI.escape(query)
 | |
|           uri =
 | |
|             URI(
 | |
|               "https://www.googleapis.com/customsearch/v1?key=#{api_key}&cx=#{cx}&q=#{escaped_query}&num=10",
 | |
|             )
 | |
| 
 | |
|           body = Net::HTTP.get(uri)
 | |
| 
 | |
|           parse_search_json(body, escaped_query, llm)
 | |
|         end
 | |
| 
 | |
|         attr_reader :results_count
 | |
| 
 | |
|         protected
 | |
| 
 | |
|         def description_args
 | |
|           {
 | |
|             count: results_count || 0,
 | |
|             query: query,
 | |
|             url: "https://google.com/search?q=#{CGI.escape(query)}",
 | |
|           }
 | |
|         end
 | |
| 
 | |
|         private
 | |
| 
 | |
|         def minimize_field(result, field, llm, max_tokens: 100)
 | |
|           data = result[field]
 | |
|           return "" if data.blank?
 | |
| 
 | |
|           llm.tokenizer.truncate(data, max_tokens).squish
 | |
|         end
 | |
| 
 | |
|         def parse_search_json(json_data, escaped_query, llm)
 | |
|           parsed = JSON.parse(json_data)
 | |
|           error_code = parsed.dig("error", "code")
 | |
|           if error_code == 429
 | |
|             Rails.logger.warn(
 | |
|               "Google Custom Search is Rate Limited, no search can be performed at the moment. #{json_data[0..1000]}",
 | |
|             )
 | |
|             return(
 | |
|               "Google Custom Search is Rate Limited, no search can be performed at the moment. Let the user know there is a problem."
 | |
|             )
 | |
|           elsif error_code
 | |
|             Rails.logger.warn("Google Custom Search returned an error. #{json_data[0..1000]}")
 | |
|             return "Google Custom Search returned an error. Let the user know there is a problem."
 | |
|           end
 | |
| 
 | |
|           results = parsed["items"]
 | |
| 
 | |
|           @results_count = parsed.dig("searchInformation", "totalResults").to_i
 | |
| 
 | |
|           format_results(results, args: escaped_query) do |result|
 | |
|             {
 | |
|               title: minimize_field(result, "title", llm),
 | |
|               link: minimize_field(result, "link", llm),
 | |
|               snippet: minimize_field(result, "snippet", llm, max_tokens: 120),
 | |
|               displayLink: minimize_field(result, "displayLink", llm),
 | |
|               formattedUrl: minimize_field(result, "formattedUrl", llm),
 | |
|             }
 | |
|           end
 | |
|         end
 | |
|       end
 | |
|     end
 | |
|   end
 | |
| end
 |