2023-05-20 03:45:54 -04:00
|
|
|
#frozen_string_literal: true
|
|
|
|
|
|
|
|
module DiscourseAi::AiBot::Commands
|
|
|
|
class GoogleCommand < Command
|
|
|
|
class << self
|
|
|
|
def name
|
|
|
|
"google"
|
|
|
|
end
|
|
|
|
|
|
|
|
def desc
|
2023-06-19 18:45:31 -04:00
|
|
|
"Will search using Google - global internet search (supports all Google search operators)"
|
|
|
|
end
|
|
|
|
|
|
|
|
def parameters
|
|
|
|
[
|
|
|
|
Parameter.new(
|
|
|
|
name: "query",
|
|
|
|
description: "The search query",
|
|
|
|
type: "string",
|
|
|
|
required: true,
|
|
|
|
),
|
|
|
|
]
|
|
|
|
end
|
|
|
|
|
|
|
|
def custom_system_message
|
|
|
|
"You were trained on OLD data, lean on search to get up to date information from the web"
|
2023-05-20 03:45:54 -04:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def result_name
|
|
|
|
"results"
|
|
|
|
end
|
|
|
|
|
|
|
|
def description_args
|
|
|
|
{
|
|
|
|
count: @last_num_results || 0,
|
|
|
|
query: @last_query || "",
|
|
|
|
url: "https://google.com/search?q=#{CGI.escape(@last_query || "")}",
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2023-08-03 19:37:58 -04:00
|
|
|
def process(query:)
|
|
|
|
@last_query = query
|
2023-08-14 02:30:12 -04:00
|
|
|
|
|
|
|
show_progress(localized_description)
|
|
|
|
|
2023-05-20 03:45:54 -04:00
|
|
|
api_key = SiteSetting.ai_google_custom_search_api_key
|
|
|
|
cx = SiteSetting.ai_google_custom_search_cx
|
2023-08-03 19:37:58 -04:00
|
|
|
query = CGI.escape(query)
|
2023-05-20 03:45:54 -04:00
|
|
|
uri =
|
|
|
|
URI("https://www.googleapis.com/customsearch/v1?key=#{api_key}&cx=#{cx}&q=#{query}&num=10")
|
|
|
|
body = Net::HTTP.get(uri)
|
|
|
|
|
2023-08-08 01:41:57 -04:00
|
|
|
parse_search_json(body, query)
|
2023-05-20 03:45:54 -04:00
|
|
|
end
|
|
|
|
|
2023-08-08 01:41:57 -04:00
|
|
|
def minimize_field(result, field, max_tokens: 100)
|
|
|
|
data = result[field].squish
|
|
|
|
data = ::DiscourseAi::Tokenizer::BertTokenizer.truncate(data, max_tokens).squish
|
|
|
|
data
|
|
|
|
end
|
|
|
|
|
|
|
|
def parse_search_json(json_data, query)
|
2023-05-20 03:45:54 -04:00
|
|
|
parsed = JSON.parse(json_data)
|
|
|
|
results = parsed["items"]
|
|
|
|
|
|
|
|
@last_num_results = parsed.dig("searchInformation", "totalResults").to_i
|
|
|
|
|
2023-08-08 01:41:57 -04:00
|
|
|
format_results(results, args: query) do |result|
|
2023-05-21 22:09:14 -04:00
|
|
|
{
|
2023-08-08 01:41:57 -04:00
|
|
|
title: minimize_field(result, "title"),
|
|
|
|
link: minimize_field(result, "link"),
|
|
|
|
snippet: minimize_field(result, "snippet", max_tokens: 120),
|
|
|
|
displayLink: minimize_field(result, "displayLink"),
|
|
|
|
formattedUrl: minimize_field(result, "formattedUrl"),
|
2023-05-20 03:45:54 -04:00
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|