Fix handling of stop word _lang_ notation

Fixes #2412
This commit is contained in:
Igor Motov 2012-11-14 18:47:27 -05:00 committed by Shay Banon
parent 2094207bf1
commit 65a43d3ad4
3 changed files with 113 additions and 23 deletions

View File

@ -62,10 +62,7 @@ import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.*;
/**
*
@ -157,37 +154,45 @@ public class Analysis {
if ("_none_".equals(value)) {
return CharArraySet.EMPTY_SET;
} else {
return new CharArraySet(version, Strings.commaDelimitedListToSet(value), ignore_case);
return resolveNamedStopWords(Strings.commaDelimitedListToSet(value), version, ignore_case);
}
}
String[] stopWords = settings.getAsArray("stopwords", null);
if (stopWords != null) {
CharArraySet setStopWords = new CharArraySet(version, stopWords.length, ignore_case);
for (String stopWord : stopWords) {
if (namedStopWords.containsKey(stopWord)) {
setStopWords.addAll(namedStopWords.get(stopWord));
} else {
setStopWords.add(stopWord);
}
}
return setStopWords;
return resolveNamedStopWords(stopWords, version, ignore_case);
}
List<String> pathLoadedStopWords = getWordList(env, settings, "stopwords");
if (pathLoadedStopWords != null) {
CharArraySet setStopWords = new CharArraySet(version, pathLoadedStopWords.size(), ignore_case);
for (String stopWord : pathLoadedStopWords) {
if (namedStopWords.containsKey(stopWord)) {
setStopWords.addAll(namedStopWords.get(stopWord));
} else {
setStopWords.add(stopWord);
}
}
return setStopWords;
return resolveNamedStopWords(stopWords, version, ignore_case);
}
return defaultStopWords;
}
private static CharArraySet resolveNamedStopWords(Collection<String> words, Version version, boolean ignore_case) {
CharArraySet setStopWords = new CharArraySet(version, words.size(), ignore_case);
for (String stopWord : words) {
if (namedStopWords.containsKey(stopWord)) {
setStopWords.addAll(namedStopWords.get(stopWord));
} else {
setStopWords.add(stopWord);
}
}
return setStopWords;
}
private static CharArraySet resolveNamedStopWords(String[] words, Version version, boolean ignore_case) {
CharArraySet setStopWords = new CharArraySet(version, words.length, ignore_case);
for (String stopWord : words) {
if (namedStopWords.containsKey(stopWord)) {
setStopWords.addAll(namedStopWords.get(stopWord));
} else {
setStopWords.add(stopWord);
}
}
return setStopWords;
}
public static CharArraySet getWordSet(Environment env, Settings settings, String settingsPrefix, Version version) {
List<String> wordList = getWordList(env, settings, settingsPrefix);
if (wordList == null) {

View File

@ -0,0 +1,67 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.test.unit.index.analysis;
import org.elasticsearch.common.inject.Injector;
import org.elasticsearch.common.inject.ModulesBuilder;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.settings.SettingsModule;
import org.elasticsearch.env.Environment;
import org.elasticsearch.env.EnvironmentModule;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexNameModule;
import org.elasticsearch.index.analysis.AnalysisModule;
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.settings.IndexSettingsModule;
import org.elasticsearch.indices.analysis.IndicesAnalysisModule;
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
import org.testng.annotations.Test;
import java.io.StringReader;
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static org.elasticsearch.test.unit.index.analysis.AnalysisTestsHelper.assertSimpleTSOutput;
public class StopAnalyzerTests {
@Test
public void testDefaultsCompoundAnalysis() throws Exception {
Index index = new Index("test");
Settings settings = settingsBuilder().loadFromClasspath("org/elasticsearch/test/unit/index/analysis/stop.json").build();
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()).createInjector();
Injector injector = new ModulesBuilder().add(
new IndexSettingsModule(index, settings),
new IndexNameModule(index),
new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class)))
.createChildInjector(parentInjector);
AnalysisService analysisService = injector.getInstance(AnalysisService.class);
NamedAnalyzer analyzer1 = analysisService.analyzer("analyzer1");
assertSimpleTSOutput(analyzer1.tokenStream("test", new StringReader("to be or not to be")), new String[0]);
NamedAnalyzer analyzer2 = analysisService.analyzer("analyzer2");
assertSimpleTSOutput(analyzer2.tokenStream("test", new StringReader("to be or not to be")), new String[0]);
}
}

View File

@ -0,0 +1,18 @@
{
"index":{
"number_of_shards":1,
"number_of_replicas":0,
"analysis":{
"analyzer":{
"analyzer1":{
"type":"stop",
"stopwords":["_english_"]
},
"analyzer2":{
"type":"stop",
"stopwords":"_english_"
}
}
}
}
}