FIX: do not allow title stuffing to dominate search (#21464)
We were giving topics with repeated words extra weight in search index. This meant that it was trivial to stuff words into title to dominate in search given we search for exact title matches first. The following tweak means that: `invite invited invites` and `invite some stuff` Both rank the same for title searching. Titles are short and punchy, duplicating words should not give special weight. Requires a full reindex to take effect.
This commit is contained in:
parent
a010c3495b
commit
bd32912c5e
|
@ -118,7 +118,9 @@ class SearchIndexer
|
||||||
.each do |index|
|
.each do |index|
|
||||||
family = nil
|
family = nil
|
||||||
family = index[-1] if index[-1].match?(/[A-D]/)
|
family = index[-1] if index[-1].match?(/[A-D]/)
|
||||||
if (family_counts[family] += 1) <= max_dupes
|
# title dupes can completely dominate the index
|
||||||
|
# so we limit them to 1
|
||||||
|
if (family_counts[family] += 1) <= (family == "A" ? 1 : max_dupes)
|
||||||
new_index_array << index
|
new_index_array << index
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -2658,6 +2658,37 @@ RSpec.describe Search do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context "when some categories are prioritized" do
|
||||||
|
before { SearchIndexer.enable }
|
||||||
|
after { SearchIndexer.disable }
|
||||||
|
|
||||||
|
it "correctly ranks topics with prioritized categories and stuffed topic terms" do
|
||||||
|
topic1 = Fabricate(:topic, title: "invite invited invites testing stuff with things")
|
||||||
|
post1 =
|
||||||
|
Fabricate(
|
||||||
|
:post,
|
||||||
|
topic: topic1,
|
||||||
|
raw: "this topic is a story about some person invites are fun",
|
||||||
|
)
|
||||||
|
|
||||||
|
category = Fabricate(:category, search_priority: Searchable::PRIORITIES[:high])
|
||||||
|
|
||||||
|
topic2 = Fabricate(:topic, title: "invite is the bestest", category: category)
|
||||||
|
post2 =
|
||||||
|
Fabricate(
|
||||||
|
:post,
|
||||||
|
topic: topic2,
|
||||||
|
raw: "this topic is a story about some other person invites are fun",
|
||||||
|
)
|
||||||
|
|
||||||
|
result = Search.execute("invite")
|
||||||
|
expect(result.posts.length).to eq(2)
|
||||||
|
|
||||||
|
# title match should win cause we limited duplication
|
||||||
|
expect(result.posts.pluck(:id)).to eq([post2.id, post1.id])
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
context "when max_duplicate_search_index_terms limits duplication" do
|
context "when max_duplicate_search_index_terms limits duplication" do
|
||||||
before { SearchIndexer.enable }
|
before { SearchIndexer.enable }
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue