FIX: correct issue with search omitting words with multiple dots
Previously we used to break up words with dots incorrectly leading to missing search terms
This commit is contained in:
parent
e63a2487e3
commit
57a1190b07
|
@ -22,9 +22,7 @@ class SearchIndexer
|
||||||
|
|
||||||
# insert some extra words for I.am.a.word so "word" is tokenized
|
# insert some extra words for I.am.a.word so "word" is tokenized
|
||||||
# I.am.a.word becomes I.am.a.word am a word
|
# I.am.a.word becomes I.am.a.word am a word
|
||||||
# uses \p{L} which matchs a single code point in category letter
|
search_data = raw_data.gsub(/[^[:space:]]*[\.]+[^[:space:]]*/) do |with_dot|
|
||||||
# uses \p{N} which matchs a single code point in category number
|
|
||||||
search_data = raw_data.gsub(/(\p{L}|\p{N}|_|-|\.)*\.(\p{L}|\p{N}|_|-|\.)*/) do |with_dot|
|
|
||||||
split = with_dot.split(".")
|
split = with_dot.split(".")
|
||||||
if split.length > 1
|
if split.length > 1
|
||||||
with_dot + (" " << split[1..-1].join(" "))
|
with_dot + (" " << split[1..-1].join(" "))
|
||||||
|
|
|
@ -760,8 +760,9 @@ describe Search do
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'can tokanize website names correctly' do
|
it 'can tokanize website names correctly' do
|
||||||
post = Fabricate(:post, raw: 'i like wb.camra.org.uk so yay')
|
post = Fabricate(:post, raw: 'i like http://wb.camra.org.uk/latest#test so yay')
|
||||||
expect(Search.execute('wb.camra.org.uk').posts.map(&:id)).to eq([post.id])
|
expect(Search.execute('http://wb.camra.org.uk/latest#test').posts.map(&:id)).to eq([post.id])
|
||||||
|
expect(Search.execute('camra').posts.map(&:id)).to eq([post.id])
|
||||||
end
|
end
|
||||||
|
|
||||||
it 'supports category slug and tags' do
|
it 'supports category slug and tags' do
|
||||||
|
|
Loading…
Reference in New Issue