FIX: Ignore document length in search when ranking by relevance.

Considering document length in search introduced too much variance in
our search results such that it makes certain searches better but at the
same time made certain searches worst. Instead, we want to have a more
determistic way of ranking search so that it is easier to reason about
why a post is rank higher in search than another.

The long term plan to tackle repeated terms is to restrict the number of
positions for a given lexeme in our search index.
This commit is contained in:
Guo Xiang Tan 2020-07-15 13:25:15 +08:00
parent 0058a15266
commit 6385fbbfbf
No known key found for this signature in database
GPG Key ID: FBD110179AAC1F20
2 changed files with 4 additions and 27 deletions

View File

@ -1748,7 +1748,7 @@ backups:
search:
search_ranking_normalization:
default: '1'
default: '0'
hidden: true
min_search_term_length:
client: true

View File

@ -402,29 +402,6 @@ describe Search do
expect(result.blurb(reply)).to eq(expected_blurb)
end
it 'does not allow a post with repeated words to dominate the ranking' do
category = Fabricate(:category_with_definition, name: "winter is coming")
post = Fabricate(:post,
raw: "I think winter will end soon",
topic: Fabricate(:topic,
title: "dragon john snow winter",
category: category
)
)
post2 = Fabricate(:post,
raw: "I think #{'winter' * 20} will end soon",
topic: Fabricate(:topic, title: "dragon john snow summer", category: category)
)
result = Search.execute('winter')
expect(result.posts.pluck(:id)).to eq([
post.id, category.topic.first_post.id, post2.id
])
end
it 'applies a small penalty to closed topic when ranking' do
post = Fabricate(:post,
raw: "My weekly update",
@ -698,12 +675,12 @@ describe Search do
expect(search.posts.map(&:id)).to eq([
child_of_ignored_category.topic.first_post,
category.topic.first_post,
post,
post2
post2,
post
].map(&:id))
search = Search.execute("snow")
expect(search.posts).to eq([post, post2])
expect(search.posts.map(&:id)).to eq([post2.id, post.id])
category.set_permissions({})
category.save