FIX: Korean needs no word segmentation

This commit is contained in:
Sam 2018-05-28 09:37:57 +10:00
parent 4bd24e78fc
commit c677877e4f
2 changed files with 6 additions and 2 deletions

View File

@ -1324,6 +1324,8 @@ search:
locale_default: locale_default:
zh_CN: 2 zh_CN: 2
zh_TW: 2 zh_TW: 2
ko: 2
ja: 2
search_tokenize_chinese_japanese_korean: false search_tokenize_chinese_japanese_korean: false
search_prefer_recent_posts: false search_prefer_recent_posts: false

View File

@ -48,8 +48,10 @@ class Search
def self.prepare_data(search_data, purpose = :query) def self.prepare_data(search_data, purpose = :query)
data = search_data.squish data = search_data.squish
# TODO cppjieba_rb is designed for chinese, we need something else for Korean / Japanese # TODO cppjieba_rb is designed for chinese, we need something else for Japanese
if ['zh_TW', 'zh_CN', 'ja', 'ko'].include?(SiteSetting.default_locale) || SiteSetting.search_tokenize_chinese_japanese_korean # Korean appears to be safe cause words are already space seperated
# For Japanese we should investigate using kakasi
if ['zh_TW', 'zh_CN', 'ja'].include?(SiteSetting.default_locale) || SiteSetting.search_tokenize_chinese_japanese_korean
require 'cppjieba_rb' unless defined? CppjiebaRb require 'cppjieba_rb' unless defined? CppjiebaRb
mode = (purpose == :query ? :query : :mix) mode = (purpose == :query ? :query : :mix)
data = CppjiebaRb.segment(search_data, mode: mode) data = CppjiebaRb.segment(search_data, mode: mode)