FEATURE: allow CJK to be tokenized in non CJK sites.
Meaning a mixed English/Chinese site can still have a functioning search.
This commit is contained in:
parent
b0905bee15
commit
f74a6457ee
|
@ -792,6 +792,7 @@ en:
|
||||||
max_topic_title_length: "Maximum allowed topic title length in characters"
|
max_topic_title_length: "Maximum allowed topic title length in characters"
|
||||||
min_private_message_title_length: "Minimum allowed title length for a message in characters"
|
min_private_message_title_length: "Minimum allowed title length for a message in characters"
|
||||||
min_search_term_length: "Minimum valid search term length in characters"
|
min_search_term_length: "Minimum valid search term length in characters"
|
||||||
|
search_tokenize_chinese_japanese_korean: "Force search to tokenize Chinese/Japanese/Korean even on non CJK sites"
|
||||||
allow_uncategorized_topics: "Allow topics to be created without a category. WARNING: If there are any uncategorized topics, you must recategorize them before turning this off."
|
allow_uncategorized_topics: "Allow topics to be created without a category. WARNING: If there are any uncategorized topics, you must recategorize them before turning this off."
|
||||||
uncategorized_description: "The description of the uncategorized category. Leave blank for no description."
|
uncategorized_description: "The description of the uncategorized category. Leave blank for no description."
|
||||||
allow_duplicate_topic_titles: "Allow topics with identical, duplicate titles."
|
allow_duplicate_topic_titles: "Allow topics with identical, duplicate titles."
|
||||||
|
|
|
@ -887,6 +887,8 @@ uncategorized:
|
||||||
min_search_term_length:
|
min_search_term_length:
|
||||||
client: true
|
client: true
|
||||||
default: 3
|
default: 3
|
||||||
|
|
||||||
|
search_tokenize_chinese_japanese_korean: false
|
||||||
max_similar_results: 5
|
max_similar_results: 5
|
||||||
minimum_topics_similar: 50
|
minimum_topics_similar: 50
|
||||||
|
|
||||||
|
|
|
@ -76,7 +76,7 @@ class Search
|
||||||
def self.prepare_data(search_data)
|
def self.prepare_data(search_data)
|
||||||
data = search_data.squish
|
data = search_data.squish
|
||||||
# TODO rmmseg is designed for chinese, we need something else for Korean / Japanese
|
# TODO rmmseg is designed for chinese, we need something else for Korean / Japanese
|
||||||
if ['zh_TW', 'zh_CN', 'ja', 'ko'].include?(SiteSetting.default_locale)
|
if ['zh_TW', 'zh_CN', 'ja', 'ko'].include?(SiteSetting.default_locale) || SiteSetting.search_tokenize_chinese_japanese_korean
|
||||||
unless defined? RMMSeg
|
unless defined? RMMSeg
|
||||||
require 'rmmseg'
|
require 'rmmseg'
|
||||||
RMMSeg::Dictionary.load_dictionaries
|
RMMSeg::Dictionary.load_dictionaries
|
||||||
|
|
|
@ -392,6 +392,18 @@ describe Search do
|
||||||
expect(Search.execute('社區指南').posts.first.id).to eq(post.id)
|
expect(Search.execute('社區指南').posts.first.id).to eq(post.id)
|
||||||
expect(Search.execute('指南').posts.first.id).to eq(post.id)
|
expect(Search.execute('指南').posts.first.id).to eq(post.id)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it 'finds chinese topic based on title if tokenization is forced' do
|
||||||
|
skip("skipped until pg app installs the db correctly") if RbConfig::CONFIG["arch"] =~ /darwin/
|
||||||
|
|
||||||
|
SiteSetting.search_tokenize_chinese_japanese_korean = true
|
||||||
|
|
||||||
|
topic = Fabricate(:topic, title: 'My Title Discourse社區指南')
|
||||||
|
post = Fabricate(:post, topic: topic)
|
||||||
|
|
||||||
|
expect(Search.execute('社區指南').posts.first.id).to eq(post.id)
|
||||||
|
expect(Search.execute('指南').posts.first.id).to eq(post.id)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
describe 'Advanced search' do
|
describe 'Advanced search' do
|
||||||
|
|
Loading…
Reference in New Issue