FEATURE: allow CJK to be tokenized in non CJK sites.

Meaning a mixed English/Chinese site can still have a functioning search.
This commit is contained in:
Sam 2015-11-27 16:35:27 +11:00
parent b0905bee15
commit f74a6457ee
4 changed files with 16 additions and 1 deletions

View File

@ -792,6 +792,7 @@ en:
max_topic_title_length: "Maximum allowed topic title length in characters"
min_private_message_title_length: "Minimum allowed title length for a message in characters"
min_search_term_length: "Minimum valid search term length in characters"
search_tokenize_chinese_japanese_korean: "Force search to tokenize Chinese/Japanese/Korean even on non CJK sites"
allow_uncategorized_topics: "Allow topics to be created without a category. WARNING: If there are any uncategorized topics, you must recategorize them before turning this off."
uncategorized_description: "The description of the uncategorized category. Leave blank for no description."
allow_duplicate_topic_titles: "Allow topics with identical, duplicate titles."

View File

@ -887,6 +887,8 @@ uncategorized:
min_search_term_length:
client: true
default: 3
search_tokenize_chinese_japanese_korean: false
max_similar_results: 5
minimum_topics_similar: 50

View File

@ -76,7 +76,7 @@ class Search
def self.prepare_data(search_data)
data = search_data.squish
# TODO rmmseg is designed for chinese, we need something else for Korean / Japanese
if ['zh_TW', 'zh_CN', 'ja', 'ko'].include?(SiteSetting.default_locale)
if ['zh_TW', 'zh_CN', 'ja', 'ko'].include?(SiteSetting.default_locale) || SiteSetting.search_tokenize_chinese_japanese_korean
unless defined? RMMSeg
require 'rmmseg'
RMMSeg::Dictionary.load_dictionaries

View File

@ -392,6 +392,18 @@ describe Search do
expect(Search.execute('社區指南').posts.first.id).to eq(post.id)
expect(Search.execute('指南').posts.first.id).to eq(post.id)
end
it 'finds chinese topic based on title if tokenization is forced' do
skip("skipped until pg app installs the db correctly") if RbConfig::CONFIG["arch"] =~ /darwin/
SiteSetting.search_tokenize_chinese_japanese_korean = true
topic = Fabricate(:topic, title: 'My Title Discourse社區指南')
post = Fabricate(:post, topic: topic)
expect(Search.execute('社區指南').posts.first.id).to eq(post.id)
expect(Search.execute('指南').posts.first.id).to eq(post.id)
end
end
describe 'Advanced search' do