FIX: Ignore document length in search when ranking by relevance.
Considering document length in search introduced too much variance in our search results such that it makes certain searches better but at the same time made certain searches worst. Instead, we want to have a more determistic way of ranking search so that it is easier to reason about why a post is rank higher in search than another. The long term plan to tackle repeated terms is to restrict the number of positions for a given lexeme in our search index.
This commit is contained in:
parent
0058a15266
commit
6385fbbfbf
|
@ -1748,7 +1748,7 @@ backups:
|
||||||
|
|
||||||
search:
|
search:
|
||||||
search_ranking_normalization:
|
search_ranking_normalization:
|
||||||
default: '1'
|
default: '0'
|
||||||
hidden: true
|
hidden: true
|
||||||
min_search_term_length:
|
min_search_term_length:
|
||||||
client: true
|
client: true
|
||||||
|
|
|
@ -402,29 +402,6 @@ describe Search do
|
||||||
expect(result.blurb(reply)).to eq(expected_blurb)
|
expect(result.blurb(reply)).to eq(expected_blurb)
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'does not allow a post with repeated words to dominate the ranking' do
|
|
||||||
category = Fabricate(:category_with_definition, name: "winter is coming")
|
|
||||||
|
|
||||||
post = Fabricate(:post,
|
|
||||||
raw: "I think winter will end soon",
|
|
||||||
topic: Fabricate(:topic,
|
|
||||||
title: "dragon john snow winter",
|
|
||||||
category: category
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
post2 = Fabricate(:post,
|
|
||||||
raw: "I think #{'winter' * 20} will end soon",
|
|
||||||
topic: Fabricate(:topic, title: "dragon john snow summer", category: category)
|
|
||||||
)
|
|
||||||
|
|
||||||
result = Search.execute('winter')
|
|
||||||
|
|
||||||
expect(result.posts.pluck(:id)).to eq([
|
|
||||||
post.id, category.topic.first_post.id, post2.id
|
|
||||||
])
|
|
||||||
end
|
|
||||||
|
|
||||||
it 'applies a small penalty to closed topic when ranking' do
|
it 'applies a small penalty to closed topic when ranking' do
|
||||||
post = Fabricate(:post,
|
post = Fabricate(:post,
|
||||||
raw: "My weekly update",
|
raw: "My weekly update",
|
||||||
|
@ -698,12 +675,12 @@ describe Search do
|
||||||
expect(search.posts.map(&:id)).to eq([
|
expect(search.posts.map(&:id)).to eq([
|
||||||
child_of_ignored_category.topic.first_post,
|
child_of_ignored_category.topic.first_post,
|
||||||
category.topic.first_post,
|
category.topic.first_post,
|
||||||
post,
|
post2,
|
||||||
post2
|
post
|
||||||
].map(&:id))
|
].map(&:id))
|
||||||
|
|
||||||
search = Search.execute("snow")
|
search = Search.execute("snow")
|
||||||
expect(search.posts).to eq([post, post2])
|
expect(search.posts.map(&:id)).to eq([post2.id, post.id])
|
||||||
|
|
||||||
category.set_permissions({})
|
category.set_permissions({})
|
||||||
category.save
|
category.save
|
||||||
|
|
Loading…
Reference in New Issue