FEATURE: Use similarity in user search (#20112)

Currently, when doing `@mention` for users we have 0 tolerance for typos and misspellings.

With this patch, if a user search doesn't return enough results we go and use `pg_trgm` features to try and find more matches based on trigrams of usernames and names.

It also introduces GiST indexes on those fields in order to improve performance of this search, going from 130ms down to 15ms in my tests.

This is all gated in a feature flag and can be enabled by running  `SiteSetting.user_search_similar_results = true` in the rails console.
This commit is contained in:
Rafael dos Santos Silva 2023-02-02 13:35:04 -03:00 committed by GitHub
parent ca2b2d034f
commit 14cf8eacf1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 96 additions and 8 deletions

View File

@ -2222,12 +2222,14 @@ end
#
# Indexes
#
# idx_users_admin (id) WHERE admin
# idx_users_moderator (id) WHERE moderator
# index_users_on_last_posted_at (last_posted_at)
# index_users_on_last_seen_at (last_seen_at)
# index_users_on_secure_identifier (secure_identifier) UNIQUE
# index_users_on_uploaded_avatar_id (uploaded_avatar_id)
# index_users_on_username (username) UNIQUE
# index_users_on_username_lower (username_lower) UNIQUE
# idx_users_admin (id) WHERE admin
# idx_users_moderator (id) WHERE moderator
# index_users_on_last_posted_at (last_posted_at)
# index_users_on_last_seen_at (last_seen_at)
# index_users_on_name_trgm (name) USING gist
# index_users_on_secure_identifier (secure_identifier) UNIQUE
# index_users_on_uploaded_avatar_id (uploaded_avatar_id)
# index_users_on_username (username) UNIQUE
# index_users_on_username_lower (username_lower) UNIQUE
# index_users_on_username_lower_trgm (username_lower) USING gist
#

View File

@ -160,6 +160,8 @@ class UserSearch
.each { |id| users << id }
end
return users.to_a if users.size >= @limit
# 5. last seen users (for search auto-suggestions)
if @last_seen_users
scoped_users
@ -169,6 +171,32 @@ class UserSearch
.each { |id| users << id }
end
return users.to_a if users.size >= @limit
if SiteSetting.user_search_similar_results
# 6. similar usernames
if @term.present?
scoped_users
.where("username_lower <-> ? < 1", @term)
.order(["username_lower <-> ? ASC", @term])
.limit(@limit - users.size)
.pluck(:id)
.each { |id| users << id }
end
return users.to_a if users.size >= @limit
# 7. similar names
if SiteSetting.enable_names? && @term.present?
scoped_users
.where("name <-> ? < 1", @term)
.order(["name <-> ? ASC", @term])
.limit(@limit - users.size)
.pluck(:id)
.each { |id| users << id }
end
end
users.to_a
end

View File

@ -2194,6 +2194,9 @@ backups:
client: true
search:
user_search_similar_results:
default: false
hidden: true
prioritize_exact_search_title_match:
default: false
hidden: true

View File

@ -0,0 +1,24 @@
# frozen_string_literal: true
class AddTrigramIndexesToUsers < ActiveRecord::Migration[7.0]
disable_ddl_transaction!
def change
add_index(
:users,
:username_lower,
using: "gist",
opclass: :gist_trgm_ops,
algorithm: :concurrently,
name: "index_users_on_username_lower_trgm",
)
add_index(
:users,
:name,
using: "gist",
opclass: :gist_trgm_ops,
algorithm: :concurrently,
name: "index_users_on_name_trgm",
)
end
end

View File

@ -267,4 +267,35 @@ RSpec.describe UserSearch do
expect(results[2]).to eq("mrorange")
end
end
context "when using SiteSetting.user_search_similar_results" do
it "should find the user even with a typo if the setting is enabled" do
rafael = Fabricate(:user, username: "rafael", name: "Rafael Silva")
codinghorror = Fabricate(:user, username: "codinghorror", name: "Jeff Atwood")
pfaffman = Fabricate(:user, username: "pfaffman")
zogstrip = Fabricate(:user, username: "zogstrip", name: "Régis Hanol")
SiteSetting.user_search_similar_results = false
expect(UserSearch.new("rafel").search).to be_blank
expect(UserSearch.new("codding").search).to be_blank
expect(UserSearch.new("pffman").search).to be_blank
SiteSetting.user_search_similar_results = true
expect(UserSearch.new("rafel").search).to include(rafael)
expect(UserSearch.new("codding").search).to include(codinghorror)
expect(UserSearch.new("pffman").search).to include(pfaffman)
SiteSetting.user_search_similar_results = false
expect(UserSearch.new("silvia").search).to be_blank
expect(UserSearch.new("atwod").search).to be_blank
expect(UserSearch.new("regis").search).to be_blank
expect(UserSearch.new("reg").search).to be_blank
SiteSetting.user_search_similar_results = true
expect(UserSearch.new("silvia").search).to include(rafael)
expect(UserSearch.new("atwod").search).to include(codinghorror)
expect(UserSearch.new("regis").search).to include(zogstrip)
expect(UserSearch.new("reg").search).to include(zogstrip)
end
end
end