Upgrade to lucene-8.0.0-snapshot-67cdd21996 (#35816)
This commit is contained in:
parent
96a741f2c2
commit
e37a0ef844
|
@ -1,5 +1,5 @@
|
|||
elasticsearch = 7.0.0
|
||||
lucene = 8.0.0-snapshot-6d9c714052
|
||||
lucene = 8.0.0-snapshot-67cdd21996
|
||||
|
||||
# optional dependencies
|
||||
spatial4j = 0.7
|
||||
|
|
|
@ -26,14 +26,14 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for ASCIIFoldingFilter.
|
||||
*/
|
||||
public class ASCIIFoldingTokenFilterFactory extends AbstractTokenFilterFactory
|
||||
implements MultiTermAwareComponent {
|
||||
implements NormalizingTokenFilterFactory {
|
||||
|
||||
public static final ParseField PRESERVE_ORIGINAL = new ParseField("preserve_original");
|
||||
public static final boolean DEFAULT_PRESERVE_ORIGINAL = false;
|
||||
|
||||
|
@ -51,21 +51,8 @@ public class ASCIIFoldingTokenFilterFactory extends AbstractTokenFilterFactory
|
|||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
if (preserveOriginal == false) {
|
||||
return this;
|
||||
} else {
|
||||
// See https://issues.apache.org/jira/browse/LUCENE-7536 for the reasoning
|
||||
return new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return ASCIIFoldingTokenFilterFactory.this.name();
|
||||
}
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new ASCIIFoldingFilter(tokenStream, false);
|
||||
}
|
||||
};
|
||||
}
|
||||
public TokenStream normalize(TokenStream tokenStream) {
|
||||
// Normalization should only emit a single token, so always turn off preserveOriginal
|
||||
return new ASCIIFoldingFilter(tokenStream, false);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,9 +24,9 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
|
||||
|
||||
public class ArabicNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public class ArabicNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
ArabicNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
|
@ -37,8 +37,4 @@ public class ArabicNormalizationFilterFactory extends AbstractTokenFilterFactory
|
|||
return new ArabicNormalizationFilter(tokenStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,12 +24,12 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link BengaliNormalizationFilter}
|
||||
*/
|
||||
public class BengaliNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public class BengaliNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
BengaliNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
|
@ -40,8 +40,4 @@ public class BengaliNormalizationFilterFactory extends AbstractTokenFilterFactor
|
|||
return new BengaliNormalizationFilter(tokenStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,9 +25,9 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
|
||||
|
||||
public final class CJKWidthFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public final class CJKWidthFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
CJKWidthFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
|
@ -38,9 +38,4 @@ public final class CJKWidthFilterFactory extends AbstractTokenFilterFactory impl
|
|||
return new CJKWidthFilter(tokenStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -22,7 +22,6 @@ package org.elasticsearch.analysis.common;
|
|||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
|
@ -492,35 +491,26 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
|
|||
@Override
|
||||
public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
|
||||
List<PreConfiguredTokenizer> tokenizers = new ArrayList<>();
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("keyword", KeywordTokenizer::new, null));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("classic", ClassicTokenizer::new, null));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("uax_url_email", UAX29URLEmailTokenizer::new, null));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("path_hierarchy", PathHierarchyTokenizer::new, null));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("letter", LetterTokenizer::new, null));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("whitespace", WhitespaceTokenizer::new, null));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("ngram", NGramTokenizer::new, null));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("keyword", KeywordTokenizer::new));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("classic", ClassicTokenizer::new));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("uax_url_email", UAX29URLEmailTokenizer::new));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("path_hierarchy", PathHierarchyTokenizer::new));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("letter", LetterTokenizer::new));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("whitespace", WhitespaceTokenizer::new));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("ngram", NGramTokenizer::new));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("edge_ngram",
|
||||
() -> new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE), null));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("pattern", () -> new PatternTokenizer(Regex.compile("\\W+", null), -1), null));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("thai", ThaiTokenizer::new, null));
|
||||
() -> new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE)));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("pattern", () -> new PatternTokenizer(Regex.compile("\\W+", null), -1)));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("thai", ThaiTokenizer::new));
|
||||
// TODO deprecate and remove in API
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", XLowerCaseTokenizer::new, () -> new TokenFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return "lowercase";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new LowerCaseFilter(tokenStream);
|
||||
}
|
||||
}));
|
||||
// This is already broken with normalization, so backwards compat isn't necessary?
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", XLowerCaseTokenizer::new));
|
||||
|
||||
// Temporary shim for aliases. TODO deprecate after they are moved
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("nGram", NGramTokenizer::new, null));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("nGram", NGramTokenizer::new));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("edgeNGram",
|
||||
() -> new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE), null));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("PathHierarchy", PathHierarchyTokenizer::new, null));
|
||||
() -> new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE)));
|
||||
tokenizers.add(PreConfiguredTokenizer.singleton("PathHierarchy", PathHierarchyTokenizer::new));
|
||||
|
||||
return tokenizers;
|
||||
}
|
||||
|
|
|
@ -25,12 +25,12 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link DecimalDigitFilter}
|
||||
*/
|
||||
public final class DecimalDigitFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public final class DecimalDigitFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
DecimalDigitFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
|
@ -41,8 +41,4 @@ public final class DecimalDigitFilterFactory extends AbstractTokenFilterFactory
|
|||
return new DecimalDigitFilter(tokenStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,9 +27,9 @@ import org.elasticsearch.env.Environment;
|
|||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
|
||||
|
||||
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
private final CharArraySet articles;
|
||||
|
||||
|
@ -43,8 +43,4 @@ public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory implem
|
|||
return new ElisionFilter(tokenStream, articles);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,12 +24,12 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link GermanNormalizationFilter}
|
||||
*/
|
||||
public class GermanNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public class GermanNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
GermanNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
|
@ -40,8 +40,4 @@ public class GermanNormalizationFilterFactory extends AbstractTokenFilterFactory
|
|||
return new GermanNormalizationFilter(tokenStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,12 +24,12 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link HindiNormalizationFilter}
|
||||
*/
|
||||
public class HindiNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public class HindiNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
HindiNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
|
@ -40,8 +40,4 @@ public class HindiNormalizationFilterFactory extends AbstractTokenFilterFactory
|
|||
return new HindiNormalizationFilter(tokenStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,12 +24,12 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link IndicNormalizationFilter}
|
||||
*/
|
||||
public class IndicNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public class IndicNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
IndicNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
|
@ -40,8 +40,4 @@ public class IndicNormalizationFilterFactory extends AbstractTokenFilterFactory
|
|||
return new IndicNormalizationFilter(tokenStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,7 +28,7 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link LowerCaseFilter} and some language-specific variants
|
||||
|
@ -39,7 +39,7 @@ import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
|||
* <li>turkish: {@link TurkishLowerCaseFilter}
|
||||
* </ul>
|
||||
*/
|
||||
public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
private final String lang;
|
||||
|
||||
|
@ -63,10 +63,6 @@ public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory impl
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -26,14 +26,14 @@ import org.elasticsearch.env.Environment;
|
|||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractCharFilterFactory;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingCharFilterFactory;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class MappingCharFilterFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent {
|
||||
public class MappingCharFilterFactory extends AbstractCharFilterFactory implements NormalizingCharFilterFactory {
|
||||
|
||||
private final NormalizeCharMap normMap;
|
||||
|
||||
|
@ -118,8 +118,4 @@ public class MappingCharFilterFactory extends AbstractCharFilterFactory implemen
|
|||
return new String(out, 0, writePos);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,9 +18,6 @@
|
|||
*/
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.pattern.PatternReplaceCharFilter;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
|
@ -28,9 +25,12 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractCharFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingCharFilterFactory;
|
||||
|
||||
public class PatternReplaceCharFilterFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent {
|
||||
import java.io.Reader;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class PatternReplaceCharFilterFactory extends AbstractCharFilterFactory implements NormalizingCharFilterFactory {
|
||||
|
||||
private final Pattern pattern;
|
||||
private final String replacement;
|
||||
|
@ -59,8 +59,4 @@ public class PatternReplaceCharFilterFactory extends AbstractCharFilterFactory i
|
|||
return new PatternReplaceCharFilter(pattern, replacement, tokenStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,9 +24,9 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
|
||||
|
||||
public class PersianNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public class PersianNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
PersianNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
|
@ -37,8 +37,4 @@ public class PersianNormalizationFilterFactory extends AbstractTokenFilterFactor
|
|||
return new PersianNormalizationFilter(tokenStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,12 +24,12 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link ScandinavianFoldingFilter}
|
||||
*/
|
||||
public class ScandinavianFoldingFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public class ScandinavianFoldingFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
ScandinavianFoldingFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
|
@ -40,8 +40,4 @@ public class ScandinavianFoldingFilterFactory extends AbstractTokenFilterFactory
|
|||
return new ScandinavianFoldingFilter(tokenStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,12 +24,12 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link ScandinavianNormalizationFilter}
|
||||
*/
|
||||
public class ScandinavianNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public class ScandinavianNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
ScandinavianNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
|
@ -40,8 +40,4 @@ public class ScandinavianNormalizationFilterFactory extends AbstractTokenFilterF
|
|||
return new ScandinavianNormalizationFilter(tokenStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,9 +25,9 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
|
||||
|
||||
public class SerbianNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public class SerbianNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
SerbianNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
|
@ -38,8 +38,4 @@ public class SerbianNormalizationFilterFactory extends AbstractTokenFilterFactor
|
|||
return new SerbianNormalizationFilter(tokenStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,12 +24,12 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
|
||||
|
||||
/**
|
||||
* Factory for {@link SoraniNormalizationFilter}
|
||||
*/
|
||||
public class SoraniNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public class SoraniNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
public SoraniNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
|
@ -40,9 +40,4 @@ public class SoraniNormalizationFilterFactory extends AbstractTokenFilterFactory
|
|||
return new SoraniNormalizationFilter(tokenStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -25,9 +25,9 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
|
||||
|
||||
public class TrimTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public class TrimTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
private static final String UPDATE_OFFSETS_KEY = "update_offsets";
|
||||
|
||||
|
@ -43,8 +43,4 @@ public class TrimTokenFilterFactory extends AbstractTokenFilterFactory implement
|
|||
return new TrimFilter(tokenStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,9 +25,9 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
|
||||
|
||||
public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
public UpperCaseTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
|
@ -38,10 +38,6 @@ public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory impl
|
|||
return new UpperCaseFilter(tokenStream);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.analysis.AnalysisTestsHelper;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.test.ESTokenStreamTestCase;
|
||||
|
@ -64,11 +63,9 @@ public class ASCIIFoldingTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
|||
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
|
||||
|
||||
// but the multi-term aware component still emits a single token
|
||||
tokenFilter = (TokenFilterFactory) ((MultiTermAwareComponent) tokenFilter)
|
||||
.getMultiTermComponent();
|
||||
tokenizer = new WhitespaceTokenizer();
|
||||
tokenizer.setReader(new StringReader(source));
|
||||
expected = new String[]{"Anspruche"};
|
||||
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
|
||||
assertTokenStreamContents(tokenFilter.normalize(tokenizer), expected);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
65b85d26f4eb4d23b98aaeffc9b1054c23d0227b
|
|
@ -1 +0,0 @@
|
|||
8f76b85824b273fafa1e25610c3aff66b97b0dd1
|
|
@ -0,0 +1 @@
|
|||
2c31180c0afaf7ce10244175c68a9189e57b456b
|
|
@ -1 +0,0 @@
|
|||
ee5e4e4341fdde3978b01945bbfaac72a200fa04
|
|
@ -41,7 +41,7 @@ import org.elasticsearch.index.IndexSettings;
|
|||
*
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class IcuFoldingTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public class IcuFoldingTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
/** Store here the same Normalizer used by the lucene ICUFoldingFilter */
|
||||
private static final Normalizer2 ICU_FOLDING_NORMALIZER = Normalizer2.getInstance(
|
||||
ICUFoldingFilter.class.getResourceAsStream("utr30.nrm"), "utr30", Normalizer2.Mode.COMPOSE);
|
||||
|
@ -58,8 +58,4 @@ public class IcuFoldingTokenFilterFactory extends AbstractTokenFilterFactory imp
|
|||
return new org.apache.lucene.analysis.icu.ICUNormalizer2Filter(tokenStream, normalizer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ import java.io.Reader;
|
|||
* <p>The {@code mode} can be used to provide 'compose' or 'decompose'. Default is compose.</p>
|
||||
* <p>The {@code unicodeSetFilter} attribute can be used to provide the UniCodeSet for filtering.</p>
|
||||
*/
|
||||
public class IcuNormalizerCharFilterFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent {
|
||||
public class IcuNormalizerCharFilterFactory extends AbstractCharFilterFactory implements NormalizingCharFilterFactory {
|
||||
|
||||
private final Normalizer2 normalizer;
|
||||
|
||||
|
@ -57,8 +57,4 @@ public class IcuNormalizerCharFilterFactory extends AbstractCharFilterFactory im
|
|||
return new ICUNormalizer2CharFilter(reader, normalizer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@ import org.elasticsearch.index.IndexSettings;
|
|||
* <p>The {@code name} can be used to provide the type of normalization to perform.</p>
|
||||
* <p>The {@code unicodeSetFilter} attribute can be used to provide the UniCodeSet for filtering.</p>
|
||||
*/
|
||||
public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
private static final DeprecationLogger deprecationLogger =
|
||||
new DeprecationLogger(LogManager.getLogger(IcuNormalizerTokenFilterFactory.class));
|
||||
|
@ -56,11 +56,6 @@ public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory
|
|||
return new org.apache.lucene.analysis.icu.ICUNormalizer2Filter(tokenStream, normalizer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
|
||||
static Normalizer2 wrapWithUnicodeSetFilter(final IndexSettings indexSettings,
|
||||
final Normalizer2 normalizer,
|
||||
final Settings settings) {
|
||||
|
|
|
@ -26,7 +26,7 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
|
||||
public class IcuTransformTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||
public class IcuTransformTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
|
||||
|
||||
private final String id;
|
||||
private final int dir;
|
||||
|
@ -45,8 +45,4 @@ public class IcuTransformTokenFilterFactory extends AbstractTokenFilterFactory i
|
|||
return new ICUTransformFilter(tokenStream, transliterator);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
d39dee7d510aecb9437a1e438ec19cf4398d8792
|
|
@ -1 +0,0 @@
|
|||
34dfcdd2e37b62ad01a8bb4fbda66ea6bf513c28
|
|
@ -26,7 +26,7 @@ import org.elasticsearch.index.IndexSettings;
|
|||
|
||||
import java.io.Reader;
|
||||
|
||||
public class KuromojiIterationMarkCharFilterFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent {
|
||||
public class KuromojiIterationMarkCharFilterFactory extends AbstractCharFilterFactory implements NormalizingCharFilterFactory {
|
||||
|
||||
private final boolean normalizeKanji;
|
||||
private final boolean normalizeKana;
|
||||
|
@ -42,8 +42,4 @@ public class KuromojiIterationMarkCharFilterFactory extends AbstractCharFilterFa
|
|||
return new JapaneseIterationMarkCharFilter(reader, normalizeKanji, normalizeKana);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
1f3ce32163fbf344f82d18b61715dc0891c22e00
|
|
@ -1 +0,0 @@
|
|||
25f02c3dfee4efbfe74d87558a6bdd0ea8389e12
|
|
@ -0,0 +1 @@
|
|||
6d378fb5b5a904cd3e3a1b1f3bab8b7c5cbc9d85
|
|
@ -1 +0,0 @@
|
|||
1023375e89d6340a93c2409c726a881752eb4ac1
|
|
@ -0,0 +1 @@
|
|||
df4957389f85da32b553dd901f30767879a507f2
|
|
@ -1 +0,0 @@
|
|||
70e598154fb5cb3dced5e82de4afcde2009f1755
|
|
@ -0,0 +1 @@
|
|||
210ea4e9423e03cd3f6ea9b8e81cab727101d3cb
|
|
@ -1 +0,0 @@
|
|||
e8b4634d426efee1515fc289b4ad67d1c714d14d
|
|
@ -0,0 +1 @@
|
|||
3c345959ae03ae458be1590c2ac782b2a621abb2
|
|
@ -1 +0,0 @@
|
|||
9f53e03113ca04c337d678126acf025cfeccff6e
|
|
@ -0,0 +1 @@
|
|||
1e557f096cd55fd1f20104b1fb4c0d0095e03fd2
|
|
@ -1 +0,0 @@
|
|||
ee88dcf4ea69de2a13df7b76d5524e8fd442f243
|
|
@ -0,0 +1 @@
|
|||
77c1844fd0b17e26fb4facb94f6140e98a6bbd49
|
|
@ -1 +0,0 @@
|
|||
ec090fd8bd804775aa128ccb20467b062b72d625
|
|
@ -0,0 +1 @@
|
|||
20b559db91bda12f7b242c516915aad26e654baa
|
|
@ -1 +0,0 @@
|
|||
0bba71a2e8bfd1c15db407ff06ee4185a091d5ec
|
|
@ -0,0 +1 @@
|
|||
24e4eb6703be36c910bd0d7e3f060259602131b8
|
|
@ -1 +0,0 @@
|
|||
fcee5b1586f7c695c65863ca9ee3a8ebe99c3242
|
|
@ -0,0 +1 @@
|
|||
1a9acefd0d7a9348f62fb0ea307853fe06cebc63
|
|
@ -1 +0,0 @@
|
|||
0a26a4870e9fddae497be6899fe9a0a2d3002294
|
|
@ -0,0 +1 @@
|
|||
941fa34281837c5d2a62d67657618b4d6e92c6d7
|
|
@ -1 +0,0 @@
|
|||
700722c50f8bfcb2d1773b50f43519603961d0ce
|
|
@ -0,0 +1 @@
|
|||
eb78318f2a76b2013857ba72e0ddc42141bad36e
|
|
@ -1 +0,0 @@
|
|||
9c9657903e4ade7773aaaf76f19d96e2a936e42d
|
|
@ -0,0 +1 @@
|
|||
ce90ede863c08726d7ae70f9f15443f122674d89
|
|
@ -1 +0,0 @@
|
|||
58ce1753cc41dfe445423c4cee42c129576a2ca2
|
|
@ -0,0 +1 @@
|
|||
e3b889834b8b43f3c5b718ee0b1b2fd198aa9467
|
|
@ -1 +0,0 @@
|
|||
bf1ee7b66f6e6349624d8760c00669480460a55d
|
|
@ -0,0 +1 @@
|
|||
f4c6c02a0834d582a918c895a715a74f40195297
|
|
@ -1 +0,0 @@
|
|||
2ed20db0ccc53f966cc211aeb3b623dcf69d2cca
|
|
@ -0,0 +1 @@
|
|||
7ed65e999af74d9356180c91176bcf0bcdf80b6a
|
|
@ -1 +0,0 @@
|
|||
e06d99480f44eede9302fb7dda3c62f3e8ff68e1
|
|
@ -0,0 +1 @@
|
|||
28a64cb272639b610064291e726f2a1792c224f2
|
|
@ -1 +0,0 @@
|
|||
64ff3b354c21fc371cfeef208158af92cdf93316
|
|
@ -0,0 +1 @@
|
|||
6af61d6e2d22be8cf0d7afb42ea61e73a59e6708
|
|
@ -1 +0,0 @@
|
|||
2dffc0dec40028ca958a0a2fdf0628fd8e8354d0
|
|
@ -0,0 +1 @@
|
|||
7e7d3d4c5b7a3a4a065db5c7e4a22d75c11191ff
|
|
@ -1 +0,0 @@
|
|||
d0ed3d77875bab18abe45706ec8b5d441cf46bdc
|
|
@ -0,0 +1 @@
|
|||
b2993443ae730960c22a2c9050f58d943fb8797c
|
|
@ -1 +0,0 @@
|
|||
8bb05a98bb9c2615ad1262980dd6b07802bafa1d
|
|
@ -108,7 +108,7 @@ public final class BinaryDocValuesRangeQuery extends Query {
|
|||
return 4; // at most 4 comparisons
|
||||
}
|
||||
};
|
||||
return new ConstantScoreScorer(this, score(), iterator);
|
||||
return new ConstantScoreScorer(this, score(), scoreMode, iterator);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -92,7 +92,7 @@ public final class MinDocQuery extends Query {
|
|||
}
|
||||
final int segmentMinDoc = Math.max(0, minDoc - context.docBase);
|
||||
final DocIdSetIterator disi = new MinDocIterator(segmentMinDoc, maxDoc);
|
||||
return new ConstantScoreScorer(this, score(), disi);
|
||||
return new ConstantScoreScorer(this, score(), scoreMode, disi);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -87,7 +87,7 @@ public class SearchAfterSortedDocQuery extends Query {
|
|||
return null;
|
||||
}
|
||||
final DocIdSetIterator disi = new MinDocQuery.MinDocIterator(firstDoc, maxDoc);
|
||||
return new ConstantScoreScorer(this, score(), disi);
|
||||
return new ConstantScoreScorer(this, score(), scoreMode, disi);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -27,7 +27,6 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.core.internal.io.IOUtils;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.action.support.ActionFilters;
|
||||
|
@ -42,6 +41,7 @@ import org.elasticsearch.common.UUIDs;
|
|||
import org.elasticsearch.common.collect.Tuple;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.core.internal.io.IOUtils;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexService;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
|
@ -49,7 +49,8 @@ import org.elasticsearch.index.analysis.AnalysisRegistry;
|
|||
import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||
import org.elasticsearch.index.analysis.CustomAnalyzer;
|
||||
import org.elasticsearch.index.analysis.IndexAnalyzers;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NormalizingCharFilterFactory;
|
||||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.TokenizerFactory;
|
||||
|
@ -575,11 +576,10 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
|||
throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]");
|
||||
}
|
||||
if (normalizer) {
|
||||
if (charFilterFactory instanceof MultiTermAwareComponent == false) {
|
||||
if (charFilterFactory instanceof NormalizingCharFilterFactory == false) {
|
||||
throw new IllegalArgumentException("Custom normalizer may not use char filter ["
|
||||
+ charFilterFactory.name() + "]");
|
||||
}
|
||||
charFilterFactory = (CharFilterFactory) ((MultiTermAwareComponent) charFilterFactory).getMultiTermComponent();
|
||||
}
|
||||
charFilterFactoryList.add(charFilterFactory);
|
||||
}
|
||||
|
@ -677,11 +677,10 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
|||
throw new IllegalArgumentException("failed to find or create token filter under [" + tokenFilter.name + "]");
|
||||
}
|
||||
if (normalizer) {
|
||||
if (tokenFilterFactory instanceof MultiTermAwareComponent == false) {
|
||||
if (tokenFilterFactory instanceof NormalizingTokenFilterFactory == false) {
|
||||
throw new IllegalArgumentException("Custom normalizer may not use filter ["
|
||||
+ tokenFilterFactory.name() + "]");
|
||||
}
|
||||
tokenFilterFactory = (TokenFilterFactory) ((MultiTermAwareComponent) tokenFilterFactory).getMultiTermComponent();
|
||||
}
|
||||
tokenFilterFactoryList.add(tokenFilterFactory);
|
||||
}
|
||||
|
|
|
@ -26,4 +26,8 @@ public interface CharFilterFactory {
|
|||
String name();
|
||||
|
||||
Reader create(Reader reader);
|
||||
|
||||
default Reader normalize(Reader reader) {
|
||||
return reader;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -107,10 +107,7 @@ public final class CustomAnalyzer extends Analyzer {
|
|||
@Override
|
||||
protected Reader initReaderForNormalization(String fieldName, Reader reader) {
|
||||
for (CharFilterFactory charFilter : charFilters) {
|
||||
if (charFilter instanceof MultiTermAwareComponent) {
|
||||
charFilter = (CharFilterFactory) ((MultiTermAwareComponent) charFilter).getMultiTermComponent();
|
||||
reader = charFilter.create(reader);
|
||||
}
|
||||
reader = charFilter.normalize(reader);
|
||||
}
|
||||
return reader;
|
||||
}
|
||||
|
@ -119,10 +116,7 @@ public final class CustomAnalyzer extends Analyzer {
|
|||
protected TokenStream normalize(String fieldName, TokenStream in) {
|
||||
TokenStream result = in;
|
||||
for (TokenFilterFactory filter : tokenFilters) {
|
||||
if (filter instanceof MultiTermAwareComponent) {
|
||||
filter = (TokenFilterFactory) ((MultiTermAwareComponent) filter).getMultiTermComponent();
|
||||
result = filter.create(result);
|
||||
}
|
||||
result = filter.normalize(result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -57,11 +57,10 @@ public final class CustomNormalizerProvider extends AbstractIndexAnalyzerProvide
|
|||
throw new IllegalArgumentException("Custom normalizer [" + name() + "] failed to find char_filter under name ["
|
||||
+ charFilterName + "]");
|
||||
}
|
||||
if (charFilter instanceof MultiTermAwareComponent == false) {
|
||||
if (charFilter instanceof NormalizingCharFilterFactory == false) {
|
||||
throw new IllegalArgumentException("Custom normalizer [" + name() + "] may not use char filter ["
|
||||
+ charFilterName + "]");
|
||||
}
|
||||
charFilter = (CharFilterFactory) ((MultiTermAwareComponent) charFilter).getMultiTermComponent();
|
||||
charFiltersList.add(charFilter);
|
||||
}
|
||||
|
||||
|
@ -73,10 +72,9 @@ public final class CustomNormalizerProvider extends AbstractIndexAnalyzerProvide
|
|||
throw new IllegalArgumentException("Custom Analyzer [" + name() + "] failed to find filter under name ["
|
||||
+ tokenFilterName + "]");
|
||||
}
|
||||
if (tokenFilter instanceof MultiTermAwareComponent == false) {
|
||||
if (tokenFilter instanceof NormalizingTokenFilterFactory == false) {
|
||||
throw new IllegalArgumentException("Custom normalizer [" + name() + "] may not use filter [" + tokenFilterName + "]");
|
||||
}
|
||||
tokenFilter = (TokenFilterFactory) ((MultiTermAwareComponent) tokenFilter).getMultiTermComponent();
|
||||
tokenFilterList.add(tokenFilter);
|
||||
}
|
||||
|
||||
|
|
|
@ -19,12 +19,19 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
/** Elasticsearch counterpart of {@link org.apache.lucene.analysis.util.MultiTermAwareComponent}. */
|
||||
public interface MultiTermAwareComponent {
|
||||
import java.io.Reader;
|
||||
|
||||
/** Returns an analysis component to handle analysis if multi-term queries.
|
||||
* The returned component must be a TokenizerFactory, TokenFilterFactory or CharFilterFactory.
|
||||
*/
|
||||
Object getMultiTermComponent();
|
||||
/**
|
||||
* A CharFilterFactory that also supports normalization
|
||||
*
|
||||
* The default implementation of {@link #normalize(Reader)} delegates to
|
||||
* {@link #create(Reader)}
|
||||
*/
|
||||
public interface NormalizingCharFilterFactory extends CharFilterFactory {
|
||||
|
||||
@Override
|
||||
default Reader normalize(Reader reader) {
|
||||
return create(reader);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
/**
|
||||
* A TokenFilterFactory that may be used for normalization
|
||||
*
|
||||
* The default implementation delegates {@link #normalize(TokenStream)} to
|
||||
* {@link #create(TokenStream)}}.
|
||||
*/
|
||||
public interface NormalizingTokenFilterFactory extends TokenFilterFactory {
|
||||
|
||||
@Override
|
||||
default TokenStream normalize(TokenStream tokenStream) {
|
||||
return create(tokenStream);
|
||||
}
|
||||
|
||||
}
|
|
@ -83,12 +83,10 @@ public class PreConfiguredCharFilter extends PreConfiguredAnalysisComponent<Char
|
|||
return useFilterForMultitermQueries;
|
||||
}
|
||||
|
||||
private interface MultiTermAwareCharFilterFactory extends CharFilterFactory, MultiTermAwareComponent {}
|
||||
|
||||
@Override
|
||||
protected CharFilterFactory create(Version version) {
|
||||
if (useFilterForMultitermQueries) {
|
||||
return new MultiTermAwareCharFilterFactory() {
|
||||
return new NormalizingCharFilterFactory() {
|
||||
@Override
|
||||
public String name() {
|
||||
return getName();
|
||||
|
@ -98,11 +96,6 @@ public class PreConfiguredCharFilter extends PreConfiguredAnalysisComponent<Char
|
|||
public Reader create(Reader reader) {
|
||||
return create.apply(reader, version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
};
|
||||
}
|
||||
return new CharFilterFactory() {
|
||||
|
|
|
@ -84,12 +84,16 @@ public final class PreConfiguredTokenFilter extends PreConfiguredAnalysisCompone
|
|||
return useFilterForMultitermQueries;
|
||||
}
|
||||
|
||||
private interface MultiTermAwareTokenFilterFactory extends TokenFilterFactory, MultiTermAwareComponent {}
|
||||
|
||||
@Override
|
||||
protected TokenFilterFactory create(Version version) {
|
||||
if (useFilterForMultitermQueries) {
|
||||
return new MultiTermAwareTokenFilterFactory() {
|
||||
return new NormalizingTokenFilterFactory() {
|
||||
|
||||
@Override
|
||||
public TokenStream normalize(TokenStream tokenStream) {
|
||||
return create.apply(tokenStream, version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return getName();
|
||||
|
@ -100,10 +104,6 @@ public final class PreConfiguredTokenFilter extends PreConfiguredAnalysisCompone
|
|||
return create.apply(tokenStream, version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
};
|
||||
}
|
||||
return new TokenFilterFactory() {
|
||||
|
|
|
@ -21,7 +21,6 @@ package org.elasticsearch.index.analysis;
|
|||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
|
||||
|
||||
|
@ -37,77 +36,40 @@ public final class PreConfiguredTokenizer extends PreConfiguredAnalysisComponent
|
|||
*
|
||||
* @param name the name of the tokenizer in the api
|
||||
* @param create builds the tokenizer
|
||||
* @param multiTermComponent null if this tokenizer shouldn't be used for multi-term queries, otherwise a supplier for the
|
||||
* {@link TokenFilterFactory} that stands in for this tokenizer in multi-term queries.
|
||||
*/
|
||||
public static PreConfiguredTokenizer singleton(String name, Supplier<Tokenizer> create,
|
||||
@Nullable Supplier<TokenFilterFactory> multiTermComponent) {
|
||||
return new PreConfiguredTokenizer(name, CachingStrategy.ONE, version -> create.get(),
|
||||
multiTermComponent == null ? null : version -> multiTermComponent.get());
|
||||
public static PreConfiguredTokenizer singleton(String name, Supplier<Tokenizer> create) {
|
||||
return new PreConfiguredTokenizer(name, CachingStrategy.ONE, version -> create.get());
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a pre-configured tokenizer that may vary based on the Lucene version.
|
||||
*
|
||||
*
|
||||
* @param name the name of the tokenizer in the api
|
||||
* @param create builds the tokenizer
|
||||
* @param multiTermComponent null if this tokenizer shouldn't be used for multi-term queries, otherwise a supplier for the
|
||||
* {@link TokenFilterFactory} that stands in for this tokenizer in multi-term queries.
|
||||
*/
|
||||
public static PreConfiguredTokenizer luceneVersion(String name, Function<org.apache.lucene.util.Version, Tokenizer> create,
|
||||
@Nullable Function<org.apache.lucene.util.Version, TokenFilterFactory> multiTermComponent) {
|
||||
return new PreConfiguredTokenizer(name, CachingStrategy.LUCENE, version -> create.apply(version.luceneVersion),
|
||||
multiTermComponent == null ? null : version -> multiTermComponent.apply(version.luceneVersion));
|
||||
public static PreConfiguredTokenizer luceneVersion(String name, Function<org.apache.lucene.util.Version, Tokenizer> create) {
|
||||
return new PreConfiguredTokenizer(name, CachingStrategy.LUCENE, version -> create.apply(version.luceneVersion));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a pre-configured tokenizer that may vary based on the Elasticsearch version.
|
||||
*
|
||||
*
|
||||
* @param name the name of the tokenizer in the api
|
||||
* @param create builds the tokenizer
|
||||
* @param multiTermComponent null if this tokenizer shouldn't be used for multi-term queries, otherwise a supplier for the
|
||||
* {@link TokenFilterFactory} that stands in for this tokenizer in multi-term queries.
|
||||
*/
|
||||
public static PreConfiguredTokenizer elasticsearchVersion(String name, Function<org.elasticsearch.Version, Tokenizer> create,
|
||||
@Nullable Function<Version, TokenFilterFactory> multiTermComponent) {
|
||||
return new PreConfiguredTokenizer(name, CachingStrategy.ELASTICSEARCH, create, multiTermComponent);
|
||||
public static PreConfiguredTokenizer elasticsearchVersion(String name, Function<org.elasticsearch.Version, Tokenizer> create) {
|
||||
return new PreConfiguredTokenizer(name, CachingStrategy.ELASTICSEARCH, create);
|
||||
}
|
||||
|
||||
private final Function<Version, Tokenizer> create;
|
||||
private final Function<Version, TokenFilterFactory> multiTermComponent;
|
||||
|
||||
private PreConfiguredTokenizer(String name, PreBuiltCacheFactory.CachingStrategy cache, Function<Version, Tokenizer> create,
|
||||
@Nullable Function<Version, TokenFilterFactory> multiTermComponent) {
|
||||
private PreConfiguredTokenizer(String name, PreBuiltCacheFactory.CachingStrategy cache, Function<Version, Tokenizer> create) {
|
||||
super(name, cache);
|
||||
this.create = create;
|
||||
this.multiTermComponent = multiTermComponent;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does this tokenizer has an equivalent component for analyzing multi-term queries?
|
||||
*/
|
||||
public boolean hasMultiTermComponent() {
|
||||
return multiTermComponent != null;
|
||||
}
|
||||
|
||||
private interface MultiTermAwareTokenizerFactory extends TokenizerFactory, MultiTermAwareComponent {}
|
||||
|
||||
@Override
|
||||
protected TokenizerFactory create(Version version) {
|
||||
if (multiTermComponent != null) {
|
||||
return new MultiTermAwareTokenizerFactory() {
|
||||
@Override
|
||||
public Tokenizer create() {
|
||||
return create.apply(version);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return multiTermComponent.apply(version);
|
||||
}
|
||||
};
|
||||
} else {
|
||||
return () -> create.apply(version);
|
||||
}
|
||||
return () -> create.apply(version);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,6 +31,15 @@ public interface TokenFilterFactory {
|
|||
|
||||
TokenStream create(TokenStream tokenStream);
|
||||
|
||||
/**
|
||||
* Normalize a tokenStream for use in multi-term queries
|
||||
*
|
||||
* The default implementation is a no-op
|
||||
*/
|
||||
default TokenStream normalize(TokenStream tokenStream) {
|
||||
return tokenStream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Does this analyzer mess up the {@link OffsetAttribute}s in such as way as to break the
|
||||
* {@link FastVectorHighlighter}? If this is {@code true} then the
|
||||
|
|
|
@ -191,7 +191,7 @@ public class ScriptQueryBuilder extends AbstractQueryBuilder<ScriptQueryBuilder>
|
|||
return 1000f;
|
||||
}
|
||||
};
|
||||
return new ConstantScoreScorer(this, score(), twoPhase);
|
||||
return new ConstantScoreScorer(this, score(), scoreMode, twoPhase);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -114,7 +114,7 @@ final class ShardSplittingQuery extends Query {
|
|||
TwoPhaseIterator twoPhaseIterator =
|
||||
parentBitSet == null ? new RoutingPartitionedDocIdSetIterator(visitor) :
|
||||
new NestedRoutingPartitionedDocIdSetIterator(visitor, parentBitSet);
|
||||
return new ConstantScoreScorer(this, score(), twoPhaseIterator);
|
||||
return new ConstantScoreScorer(this, score(), scoreMode, twoPhaseIterator);
|
||||
} else {
|
||||
// here we potentially guard the docID consumers with our parent bitset if we have one.
|
||||
// this ensures that we are only marking root documents in the nested case and if necessary
|
||||
|
@ -155,7 +155,7 @@ final class ShardSplittingQuery extends Query {
|
|||
}
|
||||
}
|
||||
|
||||
return new ConstantScoreScorer(this, score(), new BitSetIterator(bitSet, bitSet.length()));
|
||||
return new ConstantScoreScorer(this, score(), scoreMode, new BitSetIterator(bitSet, bitSet.length()));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -212,8 +212,7 @@ public final class AnalysisModule {
|
|||
PreConfiguredTokenizer preConfigured;
|
||||
switch (tokenizer.getCachingStrategy()) {
|
||||
case ONE:
|
||||
preConfigured = PreConfiguredTokenizer.singleton(name,
|
||||
() -> tokenizer.create(Version.CURRENT), null);
|
||||
preConfigured = PreConfiguredTokenizer.singleton(name, () -> tokenizer.create(Version.CURRENT));
|
||||
break;
|
||||
default:
|
||||
throw new UnsupportedOperationException(
|
||||
|
|
|
@ -75,7 +75,7 @@ public final class DocValuesSliceQuery extends SliceQuery {
|
|||
return 10;
|
||||
}
|
||||
};
|
||||
return new ConstantScoreScorer(this, score(), twoPhase);
|
||||
return new ConstantScoreScorer(this, score(), scoreMode, twoPhase);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -62,7 +62,7 @@ public final class TermsSliceQuery extends SliceQuery {
|
|||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
final DocIdSet disi = build(context.reader());
|
||||
final DocIdSetIterator leafIt = disi.iterator();
|
||||
return new ConstantScoreScorer(this, score(), leafIt);
|
||||
return new ConstantScoreScorer(this, score(), scoreMode, leafIt);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -98,7 +98,7 @@ public class GetTermVectorsTests extends ESSingleNodeTestCase {
|
|||
@Override
|
||||
public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
|
||||
return Collections.singletonList(PreConfiguredTokenizer.singleton("mock-whitespace",
|
||||
() -> new MockTokenizer(MockTokenizer.WHITESPACE, false), null));
|
||||
() -> new MockTokenizer(MockTokenizer.WHITESPACE, false)));
|
||||
}
|
||||
|
||||
// Based on DelimitedPayloadTokenFilter:
|
||||
|
|
|
@ -137,7 +137,7 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
|
|||
@Override
|
||||
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
|
||||
return singletonMap("mock_char_filter", (indexSettings, env, name, settings) -> {
|
||||
class Factory implements CharFilterFactory, MultiTermAwareComponent {
|
||||
class Factory implements NormalizingCharFilterFactory {
|
||||
@Override
|
||||
public String name() {
|
||||
return name;
|
||||
|
@ -162,10 +162,6 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
|
|||
}
|
||||
};
|
||||
}
|
||||
@Override
|
||||
public Object getMultiTermComponent() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
return new Factory();
|
||||
});
|
||||
|
|
|
@ -78,7 +78,7 @@ public class IndicesQueryCacheTests extends ESTestCase {
|
|||
return new ConstantScoreWeight(this, boost) {
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc()));
|
||||
return new ConstantScoreScorer(this, score(), scoreMode, DocIdSetIterator.all(context.reader().maxDoc()));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -371,9 +371,6 @@ public class AnalysisModuleTests extends ESTestCase {
|
|||
* and that do not vary based on version at all.
|
||||
*/
|
||||
public void testPluginPreConfiguredTokenizers() throws IOException {
|
||||
boolean noVersionSupportsMultiTerm = randomBoolean();
|
||||
boolean luceneVersionSupportsMultiTerm = randomBoolean();
|
||||
boolean elasticsearchVersionSupportsMultiTerm = randomBoolean();
|
||||
|
||||
// Simple tokenizer that always spits out a single token with some preconfigured characters
|
||||
final class FixedTokenizer extends Tokenizer {
|
||||
|
@ -409,16 +406,11 @@ public class AnalysisModuleTests extends ESTestCase {
|
|||
@Override
|
||||
public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
|
||||
return Arrays.asList(
|
||||
PreConfiguredTokenizer.singleton("no_version", () -> new FixedTokenizer("no_version"),
|
||||
noVersionSupportsMultiTerm ? () -> AppendTokenFilter.factoryForSuffix("no_version") : null),
|
||||
PreConfiguredTokenizer.singleton("no_version", () -> new FixedTokenizer("no_version")),
|
||||
PreConfiguredTokenizer.luceneVersion("lucene_version",
|
||||
luceneVersion -> new FixedTokenizer(luceneVersion.toString()),
|
||||
luceneVersionSupportsMultiTerm ?
|
||||
luceneVersion -> AppendTokenFilter.factoryForSuffix(luceneVersion.toString()) : null),
|
||||
luceneVersion -> new FixedTokenizer(luceneVersion.toString())),
|
||||
PreConfiguredTokenizer.elasticsearchVersion("elasticsearch_version",
|
||||
esVersion -> new FixedTokenizer(esVersion.toString()),
|
||||
elasticsearchVersionSupportsMultiTerm ?
|
||||
esVersion -> AppendTokenFilter.factoryForSuffix(esVersion.toString()) : null)
|
||||
esVersion -> new FixedTokenizer(esVersion.toString()))
|
||||
);
|
||||
}
|
||||
})).getAnalysisRegistry();
|
||||
|
|
|
@ -19,24 +19,17 @@
|
|||
|
||||
package org.elasticsearch.indices.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.elasticsearch.common.collect.MapBuilder;
|
||||
import org.elasticsearch.index.analysis.HunspellTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
|
||||
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
|
||||
import org.elasticsearch.index.analysis.StopTokenFilterFactory;
|
||||
import org.elasticsearch.plugins.AnalysisPlugin;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
@ -46,9 +39,6 @@ import java.util.regex.Matcher;
|
|||
import java.util.regex.Pattern;
|
||||
|
||||
import static java.util.Collections.emptyMap;
|
||||
import static java.util.Collections.singletonList;
|
||||
import static org.hamcrest.Matchers.empty;
|
||||
import static org.hamcrest.Matchers.typeCompatibleWith;
|
||||
|
||||
/**
|
||||
* Alerts us if new analysis components are added to Lucene, so we don't miss them.
|
||||
|
@ -308,142 +298,6 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
|||
assertTrue("new tokenfilters found, please update KNOWN_TOKENFILTERS: " + missing.toString(), missing.isEmpty());
|
||||
}
|
||||
|
||||
public void testMultiTermAware() {
|
||||
Collection<Class<?>> expected = new HashSet<>();
|
||||
for (Map.Entry<String, Class<?>> entry : getTokenizers().entrySet()) {
|
||||
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(
|
||||
org.apache.lucene.analysis.util.TokenizerFactory.lookupClass(entry.getKey()))) {
|
||||
expected.add(entry.getValue());
|
||||
}
|
||||
}
|
||||
for (Map.Entry<String, Class<?>> entry : getTokenFilters().entrySet()) {
|
||||
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(
|
||||
org.apache.lucene.analysis.util.TokenFilterFactory.lookupClass(entry.getKey()))) {
|
||||
expected.add(entry.getValue());
|
||||
}
|
||||
}
|
||||
for (Map.Entry<String, Class<?>> entry : getCharFilters().entrySet()) {
|
||||
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(
|
||||
org.apache.lucene.analysis.util.CharFilterFactory.lookupClass(entry.getKey()))) {
|
||||
expected.add(entry.getValue());
|
||||
}
|
||||
}
|
||||
expected.remove(Void.class);
|
||||
expected.remove(MovedToAnalysisCommon.class);
|
||||
expected.remove(Deprecated.class);
|
||||
|
||||
Collection<Class<?>> actual = new HashSet<>();
|
||||
for (Class<?> clazz : getTokenizers().values()) {
|
||||
if (MultiTermAwareComponent.class.isAssignableFrom(clazz)) {
|
||||
actual.add(clazz);
|
||||
}
|
||||
}
|
||||
for (Class<?> clazz : getTokenFilters().values()) {
|
||||
if (MultiTermAwareComponent.class.isAssignableFrom(clazz)) {
|
||||
actual.add(clazz);
|
||||
}
|
||||
}
|
||||
for (Class<?> clazz : getCharFilters().values()) {
|
||||
if (MultiTermAwareComponent.class.isAssignableFrom(clazz)) {
|
||||
actual.add(clazz);
|
||||
}
|
||||
}
|
||||
|
||||
Set<Class<?>> classesMissingMultiTermSupport = new HashSet<>(expected);
|
||||
classesMissingMultiTermSupport.removeAll(actual);
|
||||
assertTrue("Classes are missing multi-term support: " + classesMissingMultiTermSupport,
|
||||
classesMissingMultiTermSupport.isEmpty());
|
||||
|
||||
Set<Class<?>> classesThatShouldNotHaveMultiTermSupport = new HashSet<>(actual);
|
||||
classesThatShouldNotHaveMultiTermSupport.removeAll(expected);
|
||||
assertTrue("Classes should not have multi-term support: " + classesThatShouldNotHaveMultiTermSupport,
|
||||
classesThatShouldNotHaveMultiTermSupport.isEmpty());
|
||||
}
|
||||
|
||||
public void testPreBuiltMultiTermAware() {
|
||||
Collection<Object> expected = new HashSet<>();
|
||||
Collection<Object> actual = new HashSet<>();
|
||||
|
||||
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters =
|
||||
new HashMap<>(AnalysisModule.setupPreConfiguredTokenFilters(singletonList(plugin)));
|
||||
for (Map.Entry<String, Class<?>> entry : getPreConfiguredTokenFilters().entrySet()) {
|
||||
String name = entry.getKey();
|
||||
Class<?> luceneFactory = entry.getValue();
|
||||
PreConfiguredTokenFilter filter = preConfiguredTokenFilters.remove(name);
|
||||
assertNotNull("test claims pre built token filter [" + name + "] should be available but it wasn't", filter);
|
||||
if (luceneFactory == Void.class) {
|
||||
continue;
|
||||
}
|
||||
if (luceneFactory == null) {
|
||||
luceneFactory = TokenFilterFactory.lookupClass(toCamelCase(name));
|
||||
}
|
||||
assertThat(luceneFactory, typeCompatibleWith(TokenFilterFactory.class));
|
||||
if (filter.shouldUseFilterForMultitermQueries()) {
|
||||
actual.add("token filter [" + name + "]");
|
||||
}
|
||||
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) {
|
||||
expected.add("token filter [" + name + "]");
|
||||
}
|
||||
}
|
||||
assertThat("pre configured token filter not registered with test", preConfiguredTokenFilters.keySet(), empty());
|
||||
|
||||
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = new HashMap<>(
|
||||
AnalysisModule.setupPreConfiguredTokenizers(singletonList(plugin)));
|
||||
for (Map.Entry<String, Class<?>> entry : getPreConfiguredTokenizers().entrySet()) {
|
||||
String name = entry.getKey();
|
||||
Class<?> luceneFactory = entry.getValue();
|
||||
PreConfiguredTokenizer tokenizer = preConfiguredTokenizers.remove(name);
|
||||
assertNotNull("test claims pre built tokenizer [" + name + "] should be available but it wasn't", tokenizer);
|
||||
if (luceneFactory == Void.class) {
|
||||
continue;
|
||||
}
|
||||
if (luceneFactory == null) {
|
||||
luceneFactory = TokenizerFactory.lookupClass(toCamelCase(name));
|
||||
}
|
||||
assertThat(luceneFactory, typeCompatibleWith(TokenizerFactory.class));
|
||||
if (tokenizer.hasMultiTermComponent()) {
|
||||
actual.add(tokenizer);
|
||||
}
|
||||
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) {
|
||||
expected.add(tokenizer);
|
||||
}
|
||||
}
|
||||
assertThat("pre configured tokenizer not registered with test", preConfiguredTokenizers.keySet(), empty());
|
||||
|
||||
Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = new HashMap<>(
|
||||
AnalysisModule.setupPreConfiguredCharFilters(singletonList(plugin)));
|
||||
for (Map.Entry<String, Class<?>> entry : getPreConfiguredCharFilters().entrySet()) {
|
||||
String name = entry.getKey();
|
||||
Class<?> luceneFactory = entry.getValue();
|
||||
PreConfiguredCharFilter filter = preConfiguredCharFilters.remove(name);
|
||||
assertNotNull("test claims pre built char filter [" + name + "] should be available but it wasn't", filter);
|
||||
if (luceneFactory == Void.class) {
|
||||
continue;
|
||||
}
|
||||
if (luceneFactory == null) {
|
||||
luceneFactory = TokenFilterFactory.lookupClass(toCamelCase(name));
|
||||
}
|
||||
assertThat(luceneFactory, typeCompatibleWith(CharFilterFactory.class));
|
||||
if (filter.shouldUseFilterForMultitermQueries()) {
|
||||
actual.add(filter);
|
||||
}
|
||||
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) {
|
||||
expected.add("token filter [" + name + "]");
|
||||
}
|
||||
}
|
||||
assertThat("pre configured char filter not registered with test", preConfiguredCharFilters.keySet(), empty());
|
||||
|
||||
Set<Object> classesMissingMultiTermSupport = new HashSet<>(expected);
|
||||
classesMissingMultiTermSupport.removeAll(actual);
|
||||
assertTrue("Pre-built components are missing multi-term support: " + classesMissingMultiTermSupport,
|
||||
classesMissingMultiTermSupport.isEmpty());
|
||||
|
||||
Set<Object> classesThatShouldNotHaveMultiTermSupport = new HashSet<>(actual);
|
||||
classesThatShouldNotHaveMultiTermSupport.removeAll(expected);
|
||||
assertTrue("Pre-built components should not have multi-term support: " + classesThatShouldNotHaveMultiTermSupport,
|
||||
classesThatShouldNotHaveMultiTermSupport.isEmpty());
|
||||
}
|
||||
|
||||
/**
|
||||
* Marker class for components that have moved to the analysis-common modules. This will be
|
||||
* removed when the module is complete and these analysis components aren't available to core.
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
20b559db91bda12f7b242c516915aad26e654baa
|
|
@ -1 +0,0 @@
|
|||
0bba71a2e8bfd1c15db407ff06ee4185a091d5ec
|
Loading…
Reference in New Issue