2024-01-04 08:44:07 -05:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module DiscourseAi
|
|
|
|
module AiBot
|
|
|
|
module Tools
|
|
|
|
class Google < Tool
|
|
|
|
def self.signature
|
|
|
|
{
|
|
|
|
name: name,
|
|
|
|
description:
|
|
|
|
"Will search using Google - global internet search (supports all Google search operators)",
|
|
|
|
parameters: [
|
|
|
|
{ name: "query", description: "The search query", type: "string", required: true },
|
|
|
|
],
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.custom_system_message
|
|
|
|
"You were trained on OLD data, lean on search to get up to date information from the web"
|
|
|
|
end
|
|
|
|
|
|
|
|
def self.name
|
|
|
|
"google"
|
|
|
|
end
|
|
|
|
|
|
|
|
def query
|
2024-01-15 02:51:14 -05:00
|
|
|
parameters[:query].to_s.strip
|
2024-01-04 08:44:07 -05:00
|
|
|
end
|
|
|
|
|
2024-05-07 07:55:46 -04:00
|
|
|
def invoke
|
2024-01-15 02:51:14 -05:00
|
|
|
yield(query)
|
2024-01-04 08:44:07 -05:00
|
|
|
|
|
|
|
api_key = SiteSetting.ai_google_custom_search_api_key
|
|
|
|
cx = SiteSetting.ai_google_custom_search_cx
|
|
|
|
escaped_query = CGI.escape(query)
|
|
|
|
uri =
|
|
|
|
URI(
|
|
|
|
"https://www.googleapis.com/customsearch/v1?key=#{api_key}&cx=#{cx}&q=#{escaped_query}&num=10",
|
|
|
|
)
|
2024-03-01 15:53:21 -05:00
|
|
|
|
2024-01-04 08:44:07 -05:00
|
|
|
body = Net::HTTP.get(uri)
|
|
|
|
|
|
|
|
parse_search_json(body, escaped_query, llm)
|
|
|
|
end
|
|
|
|
|
|
|
|
attr_reader :results_count
|
|
|
|
|
|
|
|
protected
|
|
|
|
|
|
|
|
def description_args
|
|
|
|
{
|
|
|
|
count: results_count || 0,
|
|
|
|
query: query,
|
|
|
|
url: "https://google.com/search?q=#{CGI.escape(query)}",
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
private
|
|
|
|
|
|
|
|
def minimize_field(result, field, llm, max_tokens: 100)
|
|
|
|
data = result[field]
|
|
|
|
return "" if data.blank?
|
|
|
|
|
|
|
|
llm.tokenizer.truncate(data, max_tokens).squish
|
|
|
|
end
|
|
|
|
|
|
|
|
def parse_search_json(json_data, escaped_query, llm)
|
|
|
|
parsed = JSON.parse(json_data)
|
2024-03-01 15:53:21 -05:00
|
|
|
error_code = parsed.dig("error", "code")
|
|
|
|
if error_code == 429
|
|
|
|
Rails.logger.warn(
|
|
|
|
"Google Custom Search is Rate Limited, no search can be performed at the moment. #{json_data[0..1000]}",
|
|
|
|
)
|
|
|
|
return(
|
|
|
|
"Google Custom Search is Rate Limited, no search can be performed at the moment. Let the user know there is a problem."
|
|
|
|
)
|
|
|
|
elsif error_code
|
|
|
|
Rails.logger.warn("Google Custom Search returned an error. #{json_data[0..1000]}")
|
|
|
|
return "Google Custom Search returned an error. Let the user know there is a problem."
|
|
|
|
end
|
|
|
|
|
2024-01-04 08:44:07 -05:00
|
|
|
results = parsed["items"]
|
|
|
|
|
|
|
|
@results_count = parsed.dig("searchInformation", "totalResults").to_i
|
|
|
|
|
|
|
|
format_results(results, args: escaped_query) do |result|
|
|
|
|
{
|
|
|
|
title: minimize_field(result, "title", llm),
|
|
|
|
link: minimize_field(result, "link", llm),
|
|
|
|
snippet: minimize_field(result, "snippet", llm, max_tokens: 120),
|
|
|
|
displayLink: minimize_field(result, "displayLink", llm),
|
|
|
|
formattedUrl: minimize_field(result, "formattedUrl", llm),
|
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|