FIX: search within topic not working correctly in CJK
We were splitting the term prior to search causing everything to miss
This commit is contained in:
parent
30501b6660
commit
06b9d8223a
|
@ -55,20 +55,26 @@ class Search
|
|||
end
|
||||
|
||||
def self.prepare_data(search_data, purpose = :query)
|
||||
data = search_data.squish
|
||||
# TODO cppjieba_rb is designed for chinese, we need something else for Japanese
|
||||
# Korean appears to be safe cause words are already space seperated
|
||||
# For Japanese we should investigate using kakasi
|
||||
if ['zh_TW', 'zh_CN', 'ja'].include?(SiteSetting.default_locale) || SiteSetting.search_tokenize_chinese_japanese_korean
|
||||
require 'cppjieba_rb' unless defined? CppjiebaRb
|
||||
mode = (purpose == :query ? :query : :mix)
|
||||
data = CppjiebaRb.segment(search_data, mode: mode)
|
||||
data = CppjiebaRb.filter_stop_word(data).join(' ')
|
||||
end
|
||||
purpose ||= :query
|
||||
|
||||
data = search_data.dup
|
||||
data.force_encoding("UTF-8")
|
||||
if SiteSetting.search_ignore_accents
|
||||
data = strip_diacritics(data)
|
||||
if purpose != :topic
|
||||
# TODO cppjieba_rb is designed for chinese, we need something else for Japanese
|
||||
# Korean appears to be safe cause words are already space seperated
|
||||
# For Japanese we should investigate using kakasi
|
||||
if ['zh_TW', 'zh_CN', 'ja'].include?(SiteSetting.default_locale) || SiteSetting.search_tokenize_chinese_japanese_korean
|
||||
require 'cppjieba_rb' unless defined? CppjiebaRb
|
||||
mode = (purpose == :query ? :query : :mix)
|
||||
data = CppjiebaRb.segment(search_data, mode: mode)
|
||||
data = CppjiebaRb.filter_stop_word(data).join(' ')
|
||||
else
|
||||
data.squish!
|
||||
end
|
||||
|
||||
if SiteSetting.search_ignore_accents
|
||||
data = strip_diacritics(data)
|
||||
end
|
||||
end
|
||||
data
|
||||
end
|
||||
|
@ -155,7 +161,7 @@ class Search
|
|||
term = process_advanced_search!(term)
|
||||
|
||||
if term.present?
|
||||
@term = Search.prepare_data(term)
|
||||
@term = Search.prepare_data(term, Topic === @search_context ? :topic : nil)
|
||||
@original_term = PG::Connection.escape_string(@term)
|
||||
end
|
||||
|
||||
|
|
|
@ -246,10 +246,19 @@ describe Search do
|
|||
|
||||
context 'search within topic' do
|
||||
|
||||
def new_post(raw, topic)
|
||||
def new_post(raw, topic = nil)
|
||||
topic ||= Fabricate(:topic)
|
||||
Fabricate(:post, topic: topic, topic_id: topic.id, user: topic.user, raw: raw)
|
||||
end
|
||||
|
||||
it 'works in Chinese' do
|
||||
SiteSetting.search_tokenize_chinese_japanese_korean = true
|
||||
post = new_post('I am not in English 何点になると思いますか')
|
||||
|
||||
results = Search.execute('何点になると思', search_context: post.topic)
|
||||
expect(results.posts.map(&:id)).to eq([post.id])
|
||||
end
|
||||
|
||||
it 'displays multiple results within a topic' do
|
||||
topic = Fabricate(:topic)
|
||||
topic2 = Fabricate(:topic)
|
||||
|
|
Loading…
Reference in New Issue