FIX: Google command was including full payload (#128)

* FIX: Google command was including full payload

Additionally there was no truncating happening meaning you could blow token
budget easily on a single search.

This made Google search mostly useless and it would mean that after using
Google we would revert to a clean slate which is very confusing.

* no need for nil there
This commit is contained in:
Sam 2023-08-08 15:41:57 +10:00 committed by GitHub
parent 7edb57c005
commit 03e689deb7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 17 additions and 8 deletions

View File

@ -48,22 +48,28 @@ module DiscourseAi::AiBot::Commands
URI("https://www.googleapis.com/customsearch/v1?key=#{api_key}&cx=#{cx}&q=#{query}&num=10")
body = Net::HTTP.get(uri)
parse_search_json(body)
parse_search_json(body, query)
end
def parse_search_json(json_data)
def minimize_field(result, field, max_tokens: 100)
data = result[field].squish
data = ::DiscourseAi::Tokenizer::BertTokenizer.truncate(data, max_tokens).squish
data
end
def parse_search_json(json_data, query)
parsed = JSON.parse(json_data)
results = parsed["items"]
@last_num_results = parsed.dig("searchInformation", "totalResults").to_i
format_results(results, args: json_data) do |result|
format_results(results, args: query) do |result|
{
title: result["title"],
link: result["link"],
snippet: result["snippet"],
displayLink: result["displayLink"],
formattedUrl: result["formattedUrl"],
title: minimize_field(result, "title"),
link: minimize_field(result, "link"),
snippet: minimize_field(result, "snippet", max_tokens: 120),
displayLink: minimize_field(result, "displayLink"),
formattedUrl: minimize_field(result, "formattedUrl"),
}
end
end

View File

@ -23,6 +23,7 @@ RSpec.describe DiscourseAi::AiBot::Commands::GoogleCommand do
snippet: "snippet1",
displayLink: "displayLink1",
formattedUrl: "formattedUrl1",
oops: "do no include me ... oops",
},
],
}.to_json
@ -38,6 +39,8 @@ RSpec.describe DiscourseAi::AiBot::Commands::GoogleCommand do
expect(google.description_args[:count]).to eq(1)
expect(info).to include("title1")
expect(info).to include("snippet1")
expect(info).to include("some+search+term")
expect(info).to_not include("oops")
end
end
end