FIX: Experimental hashtag search result matching and limit fixes (#19144)

This changes the hashtag search to first do a lookup to find
results where the slug exactly matches the
search term. Now when we search for hashtags, the
exact matches will be found first and put at the top of
the results.

`ChatChannelFetcher` has also been modified here to allow
for more options for performance -- we do not need to
query DM channels for secured IDs when looking up or searching
channels for hashtags, since they should never show in
results there (they have no slugs). Nor do we need to include
the channel archive records.

Also changes the limit of hashtag results to 20 by default
with a hidden site setting, and makes it so the scroll for the
results is overflowed.
This commit is contained in:
Martin Brennan 2022-11-24 10:07:59 +10:00 committed by GitHub
parent 6b7bdc991d
commit 274b21663e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 131 additions and 45 deletions

View File

@ -30,6 +30,9 @@ a.hashtag-cooked {
}
.hashtag-autocomplete {
max-height: 300px;
overflow-y: scroll;
.hashtag-autocomplete__option {
.hashtag-autocomplete__link {
align-items: center;

View File

@ -2,7 +2,7 @@
class HashtagAutocompleteService
HASHTAGS_PER_REQUEST = 20
SEARCH_MAX_LIMIT = 20
SEARCH_MAX_LIMIT = 50
attr_reader :guardian
cattr_reader :data_sources, :contexts
@ -77,7 +77,7 @@ class HashtagAutocompleteService
icon: self.icon,
type: self.type,
ref: self.ref,
slug: self.slug
slug: self.slug,
}
end
end
@ -122,9 +122,8 @@ class HashtagAutocompleteService
# composer we want a slug without a suffix to be a category first, tag second.
if slugs_without_suffixes.any?
types_in_priority_order.each do |type|
found_from_slugs = set_refs(@@data_sources[type].lookup(guardian, slugs_without_suffixes))
found_from_slugs.each { |item| item.type = type }.sort_by! { |item| item.text.downcase }
lookup_results[type.to_sym] = lookup_results[type.to_sym].concat(found_from_slugs)
found_from_slugs = execute_lookup!(lookup_results, type, guardian, slugs_without_suffixes)
slugs_without_suffixes = slugs_without_suffixes - found_from_slugs.map(&:ref)
break if slugs_without_suffixes.empty?
end
@ -139,9 +138,7 @@ class HashtagAutocompleteService
.select { |slug| slug.ends_with?("::#{type}") }
.map { |slug| slug.gsub("::#{type}", "") }
next if slugs_for_type.empty?
found_from_slugs = set_refs(@@data_sources[type].lookup(guardian, slugs_for_type))
found_from_slugs.each { |item| item.type = type }.sort_by! { |item| item.text.downcase }
lookup_results[type.to_sym] = lookup_results[type.to_sym].concat(found_from_slugs)
execute_lookup!(lookup_results, type, guardian, slugs_for_type)
end
end
@ -156,6 +153,9 @@ class HashtagAutocompleteService
# searching tags. The @guardian handles permissions around which results should
# be returned here.
#
# Items which have a slug that exactly matches the search term via lookup will be found
# first and floated to the top of the results, and still be ordered by type.
#
# @param {String} term Search term, from the UI generally where the user is typing #has...
# @param {Array} types_in_priority_order The resource types we are searching for
# and the priority order in which we should
@ -164,32 +164,46 @@ class HashtagAutocompleteService
# bother searching subsequent types if the first types in
# the array already reach the limit.
# @returns {Array} The results as HashtagItems
def search(term, types_in_priority_order, limit: 5)
def search(
term,
types_in_priority_order,
limit: SiteSetting.experimental_hashtag_search_result_limit
)
raise Discourse::InvalidParameters.new(:order) if !types_in_priority_order.is_a?(Array)
limit = [limit, SEARCH_MAX_LIMIT].min
limited_results = []
slugs_by_type = {}
top_ranked_type = nil
term = term.downcase
types_in_priority_order =
types_in_priority_order.select { |type| @@data_sources.keys.include?(type) }
# Float exact matches by slug to the top of the list, any of these will be excluded
# from further results.
types_in_priority_order.each do |type|
search_results = execute_lookup!(nil, type, guardian, [term])
limited_results.concat(search_results) if search_results
break if limited_results.length >= limit
end
return limited_results if limited_results.length >= limit
# Search the data source for each type, validate and sort results,
# and break off from searching more data sources if we reach our limit
types_in_priority_order.each do |type|
search_results =
set_refs(@@data_sources[type].search(guardian, term, limit - limited_results.length))
search_results = search_for_type(type, guardian, term, limit - limited_results.length)
next if search_results.empty?
all_data_items_valid =
search_results.all? do |item|
item.kind_of?(HashtagItem) && item.slug.present? && item.text.present?
next if !all_data_items_valid?(search_results)
search_results =
search_results
.reject do |item|
limited_results.any? { |exact| exact.type == type && exact.slug === item.slug }
end
next if !all_data_items_valid
search_results.each { |item| item.type = type }.sort_by! { |item| item.text.downcase }
slugs_by_type[type] = search_results.map(&:slug)
.sort_by { |item| item.text.downcase }
top_ranked_type = type if top_ranked_type.nil?
limited_results.concat(search_results)
break if limited_results.length >= limit
end
@ -205,7 +219,6 @@ class HashtagAutocompleteService
#
# For example, if there is a category with the slug #general and a tag
# with the slug #general, then the tag will have its ref changed to #general::tag
top_ranked_type = slugs_by_type.keys.first
limited_results.each do |hashtag_item|
next if hashtag_item.type == top_ranked_type
@ -272,4 +285,28 @@ class HashtagAutocompleteService
def set_refs(hashtag_items)
hashtag_items.each { |item| item.ref ||= item.slug }
end
def all_data_items_valid?(items)
items.all? { |item| item.kind_of?(HashtagItem) && item.slug.present? && item.text.present? }
end
def search_for_type(type, guardian, term, limit)
set_refs(@@data_sources[type].search(guardian, term, limit)).each { |item| item.type = type }
end
def execute_lookup!(lookup_results, type, guardian, slugs)
found_from_slugs = lookup_for_type(type, guardian, slugs)
return if !all_data_items_valid?(found_from_slugs)
found_from_slugs.sort_by! { |item| item.text.downcase }
if lookup_results.present?
lookup_results[type.to_sym] = lookup_results[type.to_sym].concat(found_from_slugs)
end
found_from_slugs
end
def lookup_for_type(type, guardian, slugs)
set_refs(@@data_sources[type].lookup(guardian, slugs)).each { |item| item.type = type }
end
end

View File

@ -28,7 +28,6 @@ class TagHashtagDataSource
def self.lookup(guardian, slugs)
return [] if !SiteSetting.tagging_enabled
DiscourseTagging
.filter_visible(Tag.where_name(slugs), guardian)
.map { |tag| tag_to_hashtag_item(tag) }
@ -46,6 +45,7 @@ class TagHashtagDataSource
for_input: true,
order_search_results: true,
)
TagsController
.tag_counts_json(tags_with_counts)
.take(limit)

View File

@ -2036,6 +2036,10 @@ developer:
default: false
client: true
hidden: true
experimental_hashtag_search_result_limit:
default: 20
client: true
hidden: true
enable_new_user_profile_nav_groups:
client: true
type: group_list

View File

@ -29,10 +29,8 @@ module Chat::ChatChannelFetcher
SQL
end
def self.generate_allowed_channel_ids_sql(guardian)
<<~SQL
-- secured category chat channels
#{
def self.generate_allowed_channel_ids_sql(guardian, exclude_dm_channels: false)
category_channel_sql =
ChatChannel
.select(:id)
.joins(
@ -43,8 +41,10 @@ module Chat::ChatChannelFetcher
allowed_category_ids: guardian.allowed_category_ids,
)
.to_sql
}
dm_channel_sql = ""
if !exclude_dm_channels
dm_channel_sql = <<~SQL
UNION
-- secured direct message chat channels
@ -62,21 +62,45 @@ module Chat::ChatChannelFetcher
SQL
end
def self.secured_public_channel_search(guardian, options = {})
channels =
<<~SQL
-- secured category chat channels
#{category_channel_sql}
#{dm_channel_sql}
SQL
end
def self.secured_public_channel_slug_lookup(guardian, slugs)
allowed_channel_ids = generate_allowed_channel_ids_sql(guardian, exclude_dm_channels: true)
ChatChannel
.includes(:chat_channel_archive)
.includes(chatable: [:topic_only_relative_url])
.joins(
"LEFT JOIN categories ON categories.id = chat_channels.chatable_id AND chat_channels.chatable_type = 'Category'",
)
.where(chatable_type: ChatChannel.public_channel_chatable_types)
.where("chat_channels.id IN (#{generate_allowed_channel_ids_sql(guardian)})")
.where("chat_channels.id IN (#{allowed_channel_ids})")
.where("chat_channels.slug IN (:slugs)", slugs: slugs)
.limit(1)
end
def self.secured_public_channel_search(guardian, options = {})
allowed_channel_ids =
generate_allowed_channel_ids_sql(guardian, exclude_dm_channels: options[:exclude_dm_channels])
channels = ChatChannel.includes(chatable: [:topic_only_relative_url])
channels = channels.includes(:chat_channel_archive) if options[:include_archives]
channels =
channels
.joins(
"LEFT JOIN categories ON categories.id = chat_channels.chatable_id AND chat_channels.chatable_type = 'Category'",
)
.where(chatable_type: ChatChannel.public_channel_chatable_types)
.where("chat_channels.id IN (#{allowed_channel_ids})")
channels = channels.where(status: options[:status]) if options[:status].present?
if options[:filter].present?
sql = "chat_channels.name ILIKE :filter OR chat_channels.slug ILIKE :filter OR categories.name ILIKE :filter"
sql =
"chat_channels.name ILIKE :filter OR chat_channels.slug ILIKE :filter OR categories.name ILIKE :filter"
channels =
channels.where(sql, filter: "%#{options[:filter].downcase}%").order(
"chat_channels.name ASC, categories.name ASC",
@ -115,7 +139,11 @@ module Chat::ChatChannelFetcher
end
def self.secured_public_channels(guardian, memberships, options = { following: true })
channels = secured_public_channel_search(guardian, options)
channels =
secured_public_channel_search(
guardian,
options.merge(include_archives: true),
)
decorate_memberships_with_tracking_data(guardian, channels, memberships)
channels = channels.to_a
preload_custom_fields_for(channels)

View File

@ -19,7 +19,7 @@ class Chat::ChatChannelHashtagDataSource
def self.lookup(guardian, slugs)
if SiteSetting.enable_experimental_hashtag_autocomplete
Chat::ChatChannelFetcher
.secured_public_channel_search(guardian, slugs: slugs)
.secured_public_channel_slug_lookup(guardian, slugs)
.map { |channel| channel_to_hashtag_item(guardian, channel) }
else
[]
@ -29,7 +29,12 @@ class Chat::ChatChannelHashtagDataSource
def self.search(guardian, term, limit)
if SiteSetting.enable_experimental_hashtag_autocomplete
Chat::ChatChannelFetcher
.secured_public_channel_search(guardian, filter: term, limit: limit)
.secured_public_channel_search(
guardian,
filter: term,
limit: limit,
exclude_dm_channels: true,
)
.map { |channel| channel_to_hashtag_item(guardian, channel) }
else
[]

View File

@ -92,7 +92,9 @@ RSpec.describe HashtagAutocompleteService do
end
it "does not search other data sources if the limit is reached by earlier type data sources" do
Site.any_instance.expects(:categories).never
# only expected once to try get the exact matches first
site_guardian_categories = Site.new(guardian).categories
Site.any_instance.expects(:categories).once.returns(site_guardian_categories)
subject.search("book", %w[tag category], limit: 1)
end
@ -167,19 +169,26 @@ RSpec.describe HashtagAutocompleteService do
context "when multiple tags and categories are returned" do
fab!(:category2) { Fabricate(:category, name: "Book Zone", slug: "book-zone") }
fab!(:category3) { Fabricate(:category, name: "Book Dome", slug: "book-dome") }
fab!(:category4) { Fabricate(:category, name: "Bookworld", slug: "book") }
fab!(:tag2) { Fabricate(:tag, name: "mid-books") }
fab!(:tag3) { Fabricate(:tag, name: "terrible-books") }
fab!(:tag4) { Fabricate(:tag, name: "book") }
it "orders them by name within their type order" do
expect(subject.search("book", %w[category tag], limit: 10).map(&:ref)).to eq(
%w[book-club book-dome book-zone book great-books mid-books terrible-books],
%w[book book::tag book-club book-dome book-zone great-books mid-books terrible-books],
)
end
it "orders correctly with lower limits" do
expect(subject.search("book", %w[category tag], limit: 5).map(&:ref)).to eq(
%w[book-club book-dome book-zone book great-books],
%w[book book::tag book-club book-dome book-zone],
)
end
it "prioritises exact matches to the top of the list" do
expect(subject.search("book", %w[category tag], limit: 10).map(&:ref)).to eq(
%w[book book::tag book-club book-dome book-zone great-books mid-books terrible-books],
)
end
end