FIX: Korean needs no word segmentation
This commit is contained in:
parent
4bd24e78fc
commit
c677877e4f
|
@ -1324,6 +1324,8 @@ search:
|
||||||
locale_default:
|
locale_default:
|
||||||
zh_CN: 2
|
zh_CN: 2
|
||||||
zh_TW: 2
|
zh_TW: 2
|
||||||
|
ko: 2
|
||||||
|
ja: 2
|
||||||
|
|
||||||
search_tokenize_chinese_japanese_korean: false
|
search_tokenize_chinese_japanese_korean: false
|
||||||
search_prefer_recent_posts: false
|
search_prefer_recent_posts: false
|
||||||
|
|
|
@ -48,8 +48,10 @@ class Search
|
||||||
|
|
||||||
def self.prepare_data(search_data, purpose = :query)
|
def self.prepare_data(search_data, purpose = :query)
|
||||||
data = search_data.squish
|
data = search_data.squish
|
||||||
# TODO cppjieba_rb is designed for chinese, we need something else for Korean / Japanese
|
# TODO cppjieba_rb is designed for chinese, we need something else for Japanese
|
||||||
if ['zh_TW', 'zh_CN', 'ja', 'ko'].include?(SiteSetting.default_locale) || SiteSetting.search_tokenize_chinese_japanese_korean
|
# Korean appears to be safe cause words are already space seperated
|
||||||
|
# For Japanese we should investigate using kakasi
|
||||||
|
if ['zh_TW', 'zh_CN', 'ja'].include?(SiteSetting.default_locale) || SiteSetting.search_tokenize_chinese_japanese_korean
|
||||||
require 'cppjieba_rb' unless defined? CppjiebaRb
|
require 'cppjieba_rb' unless defined? CppjiebaRb
|
||||||
mode = (purpose == :query ? :query : :mix)
|
mode = (purpose == :query ? :query : :mix)
|
||||||
data = CppjiebaRb.segment(search_data, mode: mode)
|
data = CppjiebaRb.segment(search_data, mode: mode)
|
||||||
|
|
Loading…
Reference in New Issue