FIX: correct issue with search omitting words with multiple dots

Previously we used to break up words with dots incorrectly leading to
missing search terms
This commit is contained in:
Sam 2017-12-19 16:04:24 +11:00
parent e63a2487e3
commit 57a1190b07
2 changed files with 4 additions and 5 deletions

View File

@ -22,9 +22,7 @@ class SearchIndexer
# insert some extra words for I.am.a.word so "word" is tokenized
# I.am.a.word becomes I.am.a.word am a word
# uses \p{L} which matchs a single code point in category letter
# uses \p{N} which matchs a single code point in category number
search_data = raw_data.gsub(/(\p{L}|\p{N}|_|-|\.)*\.(\p{L}|\p{N}|_|-|\.)*/) do |with_dot|
search_data = raw_data.gsub(/[^[:space:]]*[\.]+[^[:space:]]*/) do |with_dot|
split = with_dot.split(".")
if split.length > 1
with_dot + (" " << split[1..-1].join(" "))

View File

@ -760,8 +760,9 @@ describe Search do
end
it 'can tokanize website names correctly' do
post = Fabricate(:post, raw: 'i like wb.camra.org.uk so yay')
expect(Search.execute('wb.camra.org.uk').posts.map(&:id)).to eq([post.id])
post = Fabricate(:post, raw: 'i like http://wb.camra.org.uk/latest#test so yay')
expect(Search.execute('http://wb.camra.org.uk/latest#test').posts.map(&:id)).to eq([post.id])
expect(Search.execute('camra').posts.map(&:id)).to eq([post.id])
end
it 'supports category slug and tags' do