2023-05-20 17:45:54 +10:00
|
|
|
#frozen_string_literal: true
|
|
|
|
|
|
|
|
module DiscourseAi::AiBot::Commands
|
|
|
|
class SearchCommand < Command
|
|
|
|
class << self
|
|
|
|
def name
|
|
|
|
"search"
|
|
|
|
end
|
|
|
|
|
|
|
|
def desc
|
2023-06-20 08:45:31 +10:00
|
|
|
"Will search topics in the current discourse instance, when rendering always prefer to link to the topics you find"
|
2023-05-20 17:45:54 +10:00
|
|
|
end
|
|
|
|
|
2023-12-08 08:42:56 +11:00
|
|
|
def options
|
2023-12-11 16:54:16 +11:00
|
|
|
[option(:base_query, type: :string), option(:max_results, type: :integer)]
|
2023-12-08 08:42:56 +11:00
|
|
|
end
|
|
|
|
|
2023-06-20 08:45:31 +10:00
|
|
|
def parameters
|
|
|
|
[
|
|
|
|
Parameter.new(
|
|
|
|
name: "search_query",
|
2023-09-07 13:25:26 +10:00
|
|
|
description:
|
|
|
|
"Specific keywords to search for, space seperated (correct bad spelling, remove connector words)",
|
2023-06-20 08:45:31 +10:00
|
|
|
type: "string",
|
|
|
|
),
|
|
|
|
Parameter.new(
|
|
|
|
name: "user",
|
2023-06-20 15:44:03 +10:00
|
|
|
description:
|
|
|
|
"Filter search results to this username (only include if user explicitly asks to filter by user)",
|
2023-06-20 08:45:31 +10:00
|
|
|
type: "string",
|
|
|
|
),
|
|
|
|
Parameter.new(
|
|
|
|
name: "order",
|
2023-08-23 07:49:36 +10:00
|
|
|
description: "search result order",
|
2023-06-20 08:45:31 +10:00
|
|
|
type: "string",
|
|
|
|
enum: %w[latest latest_topic oldest views likes],
|
|
|
|
),
|
|
|
|
Parameter.new(
|
|
|
|
name: "limit",
|
2023-06-20 15:44:03 +10:00
|
|
|
description:
|
2023-12-11 16:54:16 +11:00
|
|
|
"Number of results to return. Defaults to maximum number of results. Only set if absolutely necessary",
|
2023-06-20 08:45:31 +10:00
|
|
|
type: "integer",
|
|
|
|
),
|
|
|
|
Parameter.new(
|
|
|
|
name: "max_posts",
|
|
|
|
description:
|
|
|
|
"maximum number of posts on the topics (topics where lots of people posted)",
|
|
|
|
type: "integer",
|
|
|
|
),
|
|
|
|
Parameter.new(
|
|
|
|
name: "tags",
|
|
|
|
description:
|
|
|
|
"list of tags to search for. Use + to join with OR, use , to join with AND",
|
|
|
|
type: "string",
|
|
|
|
),
|
|
|
|
Parameter.new(
|
|
|
|
name: "category",
|
|
|
|
description: "category name to filter to",
|
|
|
|
type: "string",
|
|
|
|
),
|
|
|
|
Parameter.new(
|
|
|
|
name: "before",
|
|
|
|
description: "only topics created before a specific date YYYY-MM-DD",
|
|
|
|
type: "string",
|
|
|
|
),
|
|
|
|
Parameter.new(
|
|
|
|
name: "after",
|
|
|
|
description: "only topics created after a specific date YYYY-MM-DD",
|
|
|
|
type: "string",
|
|
|
|
),
|
|
|
|
Parameter.new(
|
|
|
|
name: "status",
|
|
|
|
description: "search for topics in a particular state",
|
|
|
|
type: "string",
|
|
|
|
enum: %w[open closed archived noreplies single_user],
|
|
|
|
),
|
|
|
|
]
|
|
|
|
end
|
2023-05-20 17:45:54 +10:00
|
|
|
|
2023-06-20 08:45:31 +10:00
|
|
|
def custom_system_message
|
2023-06-21 20:07:55 +10:00
|
|
|
<<~TEXT
|
|
|
|
You were trained on OLD data, lean on search to get up to date information about this forum
|
|
|
|
When searching try to SIMPLIFY search terms
|
|
|
|
Discourse search joins all terms with AND. Reduce and simplify terms to find more results.
|
|
|
|
TEXT
|
2023-05-20 17:45:54 +10:00
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def result_name
|
|
|
|
"results"
|
|
|
|
end
|
|
|
|
|
|
|
|
def description_args
|
|
|
|
{
|
|
|
|
count: @last_num_results || 0,
|
|
|
|
query: @last_query || "",
|
|
|
|
url: "#{Discourse.base_path}/search?q=#{CGI.escape(@last_query || "")}",
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2023-09-07 13:25:26 +10:00
|
|
|
MIN_SEMANTIC_RESULTS = 5
|
|
|
|
|
2023-12-11 16:54:16 +11:00
|
|
|
def max_semantic_results
|
|
|
|
max_results / 4
|
|
|
|
end
|
|
|
|
|
|
|
|
def max_results
|
|
|
|
return 20 if !bot
|
|
|
|
|
|
|
|
max_results = persona_options[:max_results].to_i
|
|
|
|
return [max_results, 100].min if max_results > 0
|
|
|
|
|
|
|
|
if bot.prompt_limit(allow_commands: false) > 30_000
|
|
|
|
60
|
|
|
|
elsif bot.prompt_limit(allow_commands: false) > 10_000
|
|
|
|
40
|
|
|
|
else
|
|
|
|
20
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2023-08-04 09:37:58 +10:00
|
|
|
def process(**search_args)
|
2023-05-22 12:09:14 +10:00
|
|
|
limit = nil
|
|
|
|
|
|
|
|
search_string =
|
2023-08-04 09:37:58 +10:00
|
|
|
search_args
|
2023-06-20 08:45:31 +10:00
|
|
|
.map do |key, value|
|
2023-08-04 09:37:58 +10:00
|
|
|
if key == :search_query
|
2023-06-20 08:45:31 +10:00
|
|
|
value
|
2023-08-04 09:37:58 +10:00
|
|
|
elsif key == :limit
|
2023-06-20 08:45:31 +10:00
|
|
|
limit = value.to_i
|
2023-05-22 12:09:14 +10:00
|
|
|
nil
|
|
|
|
else
|
2023-06-20 08:45:31 +10:00
|
|
|
"#{key}:#{value}"
|
2023-05-22 12:09:14 +10:00
|
|
|
end
|
|
|
|
end
|
|
|
|
.compact
|
|
|
|
.join(" ")
|
|
|
|
|
2023-05-20 17:45:54 +10:00
|
|
|
@last_query = search_string
|
2023-09-13 14:59:45 +10:00
|
|
|
|
2023-09-14 16:46:56 +10:00
|
|
|
show_progress(I18n.t("discourse_ai.ai_bot.searching", query: search_string))
|
2023-09-13 14:59:45 +10:00
|
|
|
|
2023-12-08 08:42:56 +11:00
|
|
|
if persona_options[:base_query].present?
|
|
|
|
search_string = "#{search_string} #{persona_options[:base_query]}"
|
|
|
|
end
|
|
|
|
|
2023-05-20 17:45:54 +10:00
|
|
|
results =
|
2023-05-23 23:08:17 +10:00
|
|
|
Search.execute(
|
|
|
|
search_string.to_s + " status:public",
|
|
|
|
search_type: :full_page,
|
2023-12-06 16:26:43 +10:00
|
|
|
guardian: Guardian.new(),
|
2023-05-23 23:08:17 +10:00
|
|
|
)
|
|
|
|
|
|
|
|
# let's be frugal with tokens, 50 results is too much and stuff gets cut off
|
2023-12-11 16:54:16 +11:00
|
|
|
limit ||= max_results
|
|
|
|
limit = max_results if limit > max_results
|
2023-09-07 13:25:26 +10:00
|
|
|
|
|
|
|
should_try_semantic_search = SiteSetting.ai_embeddings_semantic_search_enabled
|
2023-12-11 16:54:16 +11:00
|
|
|
should_try_semantic_search &&= (limit == max_results)
|
2023-09-07 13:25:26 +10:00
|
|
|
should_try_semantic_search &&= (search_args[:search_query].present?)
|
|
|
|
|
2023-12-11 16:54:16 +11:00
|
|
|
limit = limit - max_semantic_results if should_try_semantic_search
|
2023-05-20 17:45:54 +10:00
|
|
|
|
2023-05-22 15:14:26 +10:00
|
|
|
posts = results&.posts || []
|
2023-05-23 23:08:17 +10:00
|
|
|
posts = posts[0..limit - 1]
|
2023-05-22 12:09:14 +10:00
|
|
|
|
2023-09-07 13:25:26 +10:00
|
|
|
if should_try_semantic_search
|
2023-12-06 16:26:43 +10:00
|
|
|
semantic_search = DiscourseAi::Embeddings::SemanticSearch.new(Guardian.new())
|
2023-09-07 13:25:26 +10:00
|
|
|
topic_ids = Set.new(posts.map(&:topic_id))
|
|
|
|
|
2023-12-06 16:26:43 +10:00
|
|
|
search = Search.new(search_string, guardian: Guardian.new)
|
2023-09-07 13:25:26 +10:00
|
|
|
|
2023-09-14 16:46:56 +10:00
|
|
|
results = nil
|
|
|
|
begin
|
|
|
|
results = semantic_search.search_for_topics(search.term)
|
|
|
|
rescue => e
|
|
|
|
Discourse.warn_exception(e, message: "Semantic search failed")
|
|
|
|
end
|
|
|
|
|
|
|
|
if results
|
|
|
|
results = search.apply_filters(results)
|
2023-09-07 13:25:26 +10:00
|
|
|
|
2023-09-14 16:46:56 +10:00
|
|
|
results.each do |post|
|
|
|
|
next if topic_ids.include?(post.topic_id)
|
2023-09-13 14:59:45 +10:00
|
|
|
|
2023-09-14 16:46:56 +10:00
|
|
|
topic_ids << post.topic_id
|
|
|
|
posts << post
|
2023-09-13 14:59:45 +10:00
|
|
|
|
2023-12-11 16:54:16 +11:00
|
|
|
break if posts.length >= max_results
|
2023-09-14 16:46:56 +10:00
|
|
|
end
|
2023-09-13 14:59:45 +10:00
|
|
|
end
|
2023-09-07 13:25:26 +10:00
|
|
|
end
|
|
|
|
|
2023-05-22 15:14:26 +10:00
|
|
|
@last_num_results = posts.length
|
2023-09-12 16:09:28 +10:00
|
|
|
# this is the general pattern from core
|
|
|
|
# if there are millions of hidden tags it may fail
|
|
|
|
hidden_tags = nil
|
2023-05-20 17:45:54 +10:00
|
|
|
|
2023-05-22 12:09:14 +10:00
|
|
|
if posts.blank?
|
2023-06-21 17:10:30 +10:00
|
|
|
{ args: search_args, rows: [], instruction: "nothing was found, expand your search" }
|
2023-05-22 12:09:14 +10:00
|
|
|
else
|
2023-06-21 17:10:30 +10:00
|
|
|
format_results(posts, args: search_args) do |post|
|
2023-09-12 16:09:28 +10:00
|
|
|
category_names = [
|
|
|
|
post.topic.category&.parent_category&.name,
|
|
|
|
post.topic.category&.name,
|
|
|
|
].compact.join(" > ")
|
|
|
|
row = {
|
2023-05-22 12:09:14 +10:00
|
|
|
title: post.topic.title,
|
2023-06-20 08:45:31 +10:00
|
|
|
url: Discourse.base_path + post.url,
|
2023-12-12 12:22:28 +11:00
|
|
|
username: post.user&.username,
|
2023-05-22 12:09:14 +10:00
|
|
|
excerpt: post.excerpt,
|
|
|
|
created: post.created_at,
|
2023-09-12 16:09:28 +10:00
|
|
|
category: category_names,
|
2023-12-12 12:22:28 +11:00
|
|
|
likes: post.like_count,
|
|
|
|
topic_views: post.topic.views,
|
|
|
|
topic_likes: post.topic.like_count,
|
|
|
|
topic_replies: post.topic.posts_count - 1,
|
2023-05-20 17:45:54 +10:00
|
|
|
}
|
2023-09-12 16:09:28 +10:00
|
|
|
|
|
|
|
if SiteSetting.tagging_enabled
|
|
|
|
hidden_tags ||= DiscourseTagging.hidden_tag_names
|
|
|
|
# using map over pluck to avoid n+1 (assuming caller preloading)
|
|
|
|
tags = post.topic.tags.map(&:name) - hidden_tags
|
|
|
|
row[:tags] = tags.join(", ") if tags.present?
|
|
|
|
end
|
|
|
|
row
|
2023-05-20 17:45:54 +10:00
|
|
|
end
|
2023-05-22 12:09:14 +10:00
|
|
|
end
|
2023-05-20 17:45:54 +10:00
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|