diff --git a/src/main/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactory.java index 66e6564b0c1..b72b6507a48 100644 --- a/src/main/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/ShingleTokenFilterFactory.java @@ -32,48 +32,83 @@ import org.elasticsearch.index.settings.IndexSettings; */ public class ShingleTokenFilterFactory extends AbstractTokenFilterFactory { - private final int maxShingleSize; - - private final boolean outputUnigrams; - - private final boolean outputUnigramsIfNoShingles; - - private String tokenSeparator; - - private int minShingleSize; + private final Factory factory; @Inject public ShingleTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); - maxShingleSize = settings.getAsInt("max_shingle_size", ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE); - minShingleSize = settings.getAsInt("min_shingle_size", ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE); - outputUnigrams = settings.getAsBoolean("output_unigrams", true); - outputUnigramsIfNoShingles = settings.getAsBoolean("output_unigrams_if_no_shingles", false); - tokenSeparator = settings.get("token_separator", ShingleFilter.TOKEN_SEPARATOR); + Integer maxShingleSize = settings.getAsInt("max_shingle_size", ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE); + Integer minShingleSize = settings.getAsInt("min_shingle_size", ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE); + Boolean outputUnigrams = settings.getAsBoolean("output_unigrams", true); + Boolean outputUnigramsIfNoShingles = settings.getAsBoolean("output_unigrams_if_no_shingles", false); + String tokenSeparator = settings.get("token_separator", ShingleFilter.TOKEN_SEPARATOR); + factory = new Factory("shingle", minShingleSize, maxShingleSize, outputUnigrams, outputUnigramsIfNoShingles, tokenSeparator); } + @Override public TokenStream create(TokenStream tokenStream) { - ShingleFilter filter = new ShingleFilter(tokenStream, minShingleSize, maxShingleSize); - filter.setOutputUnigrams(outputUnigrams); - filter.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles); - filter.setTokenSeparator(tokenSeparator); - return filter; + return factory.create(tokenStream); } - public int getMaxShingleSize() { - return maxShingleSize; + + public Factory getInnerFactory() { + return this.factory; } - public int getMinShingleSize() { - return minShingleSize; - } - - public boolean getOutputUnigrams() { - return outputUnigrams; - } - - public boolean getOutputUnigramsIfNoShingles() { - return outputUnigramsIfNoShingles; + public static final class Factory implements TokenFilterFactory { + private final int maxShingleSize; + + private final boolean outputUnigrams; + + private final boolean outputUnigramsIfNoShingles; + + private final String tokenSeparator; + + private int minShingleSize; + + private final String name; + + public Factory(String name) { + this(name, ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE, true, false, ShingleFilter.TOKEN_SEPARATOR); + } + + Factory(String name, int minShingleSize, int maxShingleSize, boolean outputUnigrams, boolean outputUnigramsIfNoShingles, String tokenSeparator) { + this.maxShingleSize = maxShingleSize; + this.outputUnigrams = outputUnigrams; + this.outputUnigramsIfNoShingles = outputUnigramsIfNoShingles; + this.tokenSeparator = tokenSeparator; + this.minShingleSize = minShingleSize; + this.name = name; + } + + public TokenStream create(TokenStream tokenStream) { + ShingleFilter filter = new ShingleFilter(tokenStream, minShingleSize, maxShingleSize); + filter.setOutputUnigrams(outputUnigrams); + filter.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles); + filter.setTokenSeparator(tokenSeparator); + return filter; + } + + public int getMaxShingleSize() { + return maxShingleSize; + } + + public int getMinShingleSize() { + return minShingleSize; + } + + public boolean getOutputUnigrams() { + return outputUnigrams; + } + + public boolean getOutputUnigramsIfNoShingles() { + return outputUnigramsIfNoShingles; + } + + @Override + public String name() { + return name; + } } } diff --git a/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java b/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java index e9f185a60d0..32f9c2f1b55 100644 --- a/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java +++ b/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java @@ -504,17 +504,7 @@ public class IndicesAnalysisService extends AbstractComponent { } })); - tokenFilterFactories.put("shingle", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { - @Override - public String name() { - return "shingle"; - } - - @Override - public TokenStream create(TokenStream tokenStream) { - return new ShingleFilter(tokenStream, ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE); - } - })); + tokenFilterFactories.put("shingle", new PreBuiltTokenFilterFactoryFactory(new ShingleTokenFilterFactory.Factory("shingle"))); tokenFilterFactories.put("unique", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override diff --git a/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java b/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java index 75fffd5a144..32336123df9 100644 --- a/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java +++ b/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java @@ -23,7 +23,6 @@ import java.util.Comparator; import java.util.Locale; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.CustomAnalyzerWrapper; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; @@ -276,21 +275,21 @@ public final class SuggestUtils { } - public static ShingleTokenFilterFactory getShingleFilterFactory(Analyzer analyzer) { + public static ShingleTokenFilterFactory.Factory getShingleFilterFactory(Analyzer analyzer) { if (analyzer instanceof NamedAnalyzer) { analyzer = ((NamedAnalyzer)analyzer).analyzer(); } if (analyzer instanceof CustomAnalyzer) { - CustomAnalyzer a = (CustomAnalyzer) analyzer; - TokenFilterFactory[] tokenFilters = a.tokenFilters(); + final CustomAnalyzer a = (CustomAnalyzer) analyzer; + final TokenFilterFactory[] tokenFilters = a.tokenFilters(); for (TokenFilterFactory tokenFilterFactory : tokenFilters) { if (tokenFilterFactory instanceof ShingleTokenFilterFactory) { - return ((ShingleTokenFilterFactory) tokenFilterFactory); + return ((ShingleTokenFilterFactory)tokenFilterFactory).getInnerFactory(); + } else if (tokenFilterFactory instanceof ShingleTokenFilterFactory.Factory) { + return (ShingleTokenFilterFactory.Factory) tokenFilterFactory; } } } return null; } - - } diff --git a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java index 96a4fd1339e..10373bed595 100644 --- a/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java +++ b/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggestParser.java @@ -216,7 +216,7 @@ public final class PhraseSuggestParser implements SuggestContextParser { } if (!gramSizeSet || suggestion.generators().isEmpty()) { - final ShingleTokenFilterFactory shingleFilterFactory = SuggestUtils.getShingleFilterFactory(suggestion.getAnalyzer() == null ? context.mapperService().fieldSearchAnalyzer(suggestion.getField()) : suggestion.getAnalyzer()); ; + final ShingleTokenFilterFactory.Factory shingleFilterFactory = SuggestUtils.getShingleFilterFactory(suggestion.getAnalyzer() == null ? context.mapperService().fieldSearchAnalyzer(suggestion.getField()) : suggestion.getAnalyzer()); ; if (!gramSizeSet) { // try to detect the shingle size if (shingleFilterFactory != null) { diff --git a/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java b/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java index b4c0d96be18..2942ce2e527 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java +++ b/src/test/java/org/elasticsearch/test/integration/search/suggest/SuggestSearchTests.java @@ -740,6 +740,8 @@ public class SuggestSearchTests extends AbstractNodesTests { builder.putArray("index.analysis.analyzer.bigram.filter", "my_shingle", "lowercase"); builder.put("index.analysis.analyzer.ngram.tokenizer", "standard"); builder.putArray("index.analysis.analyzer.ngram.filter", "my_shingle2", "lowercase"); + builder.put("index.analysis.analyzer.myDefAnalyzer.tokenizer", "standard"); + builder.putArray("index.analysis.analyzer.myDefAnalyzer.filter", "shingle", "lowercase"); builder.put("index.analysis.filter.my_shingle.type", "shingle"); builder.put("index.analysis.filter.my_shingle.output_unigrams", false); builder.put("index.analysis.filter.my_shingle.min_shingle_size", 2); @@ -833,6 +835,23 @@ public class SuggestSearchTests extends AbstractNodesTests { SearchResponse search = client.prepareSearch() .setSearchType(SearchType.COUNT) .setSuggestText("Xor the Got-Jewel") + .addSuggestion( + phraseSuggestion("simple_phrase").maxErrors(0.5f).field("ngram").analyzer("myDefAnalyzer") + .addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("body").minWordLength(1).suggestMode("always")) + .size(1)).execute().actionGet(); + + assertThat(Arrays.toString(search.getShardFailures()), search.getFailedShards(), equalTo(0)); + assertThat(search.getSuggest(), notNullValue()); + assertThat(search.getSuggest().size(), equalTo(1)); + assertThat(search.getSuggest().getSuggestion("simple_phrase").getName(), equalTo("simple_phrase")); + assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().size(), equalTo(1)); + assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().size(), equalTo(1)); + assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Xor the Got-Jewel")); + assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().get(0).getText().string(), equalTo("xorr the god jewel")); + + search = client.prepareSearch() + .setSearchType(SearchType.COUNT) + .setSuggestText("Xor the Got-Jewel") .addSuggestion( phraseSuggestion("simple_phrase").maxErrors(0.5f).field("ngram") .addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("body").minWordLength(1).suggestMode("always")) @@ -846,6 +865,7 @@ public class SuggestSearchTests extends AbstractNodesTests { assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().size(), equalTo(1)); assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Xor the Got-Jewel")); assertThat(search.getSuggest().getSuggestion("simple_phrase").getEntries().get(0).getOptions().get(0).getText().string(), equalTo("xorr the god jewel")); + }