FIX: not splitting words correctly for search tokenizer

This commit is contained in:
Sam 2017-08-14 16:19:28 -04:00
parent 0ef7a969f2
commit 6002f2ca4a
2 changed files with 9 additions and 1 deletions

View File

@ -21,7 +21,10 @@ class SearchIndexer
foreign_key = "#{table}_id"
# insert some extra words for I.am.a.word so "word" is tokenized
search_data = raw_data.gsub(/\p{L}*\.\p{L}*/) do |with_dot|
# I.am.a.word becomes I.am.a.word am a word
# uses \p{L} which matchs a single code point in category letter
# uses \p{N} which matchs a single code point in category number
search_data = raw_data.gsub(/(\p{L}|\p{N}|_|-|\.)*\.(\p{L}|\p{N}|_|-|\.)*/) do |with_dot|
split = with_dot.split(".")
if split.length > 1
with_dot + (" " << split[1..-1].join(" "))

View File

@ -703,6 +703,11 @@ describe Search do
expect(Search.execute('bill').posts.map(&:id)).to eq([post.id])
end
it 'can tokanize website names correctly' do
post = Fabricate(:post, raw: 'i like wb.camra.org.uk so yay')
expect(Search.execute('wb.camra.org.uk').posts.map(&:id)).to eq([post.id])
end
it 'supports category slug and tags' do
# main category
category = Fabricate(:category, name: 'category 24', slug: 'category-24')