FIX: not splitting words correctly for search tokenizer
This commit is contained in:
parent
0ef7a969f2
commit
6002f2ca4a
|
@ -21,7 +21,10 @@ class SearchIndexer
|
|||
foreign_key = "#{table}_id"
|
||||
|
||||
# insert some extra words for I.am.a.word so "word" is tokenized
|
||||
search_data = raw_data.gsub(/\p{L}*\.\p{L}*/) do |with_dot|
|
||||
# I.am.a.word becomes I.am.a.word am a word
|
||||
# uses \p{L} which matchs a single code point in category letter
|
||||
# uses \p{N} which matchs a single code point in category number
|
||||
search_data = raw_data.gsub(/(\p{L}|\p{N}|_|-|\.)*\.(\p{L}|\p{N}|_|-|\.)*/) do |with_dot|
|
||||
split = with_dot.split(".")
|
||||
if split.length > 1
|
||||
with_dot + (" " << split[1..-1].join(" "))
|
||||
|
|
|
@ -703,6 +703,11 @@ describe Search do
|
|||
expect(Search.execute('bill').posts.map(&:id)).to eq([post.id])
|
||||
end
|
||||
|
||||
it 'can tokanize website names correctly' do
|
||||
post = Fabricate(:post, raw: 'i like wb.camra.org.uk so yay')
|
||||
expect(Search.execute('wb.camra.org.uk').posts.map(&:id)).to eq([post.id])
|
||||
end
|
||||
|
||||
it 'supports category slug and tags' do
|
||||
# main category
|
||||
category = Fabricate(:category, name: 'category 24', slug: 'category-24')
|
||||
|
|
Loading…
Reference in New Issue