Upgrade to lucene-8.0.0-snapshot-67cdd21996 (#35816)

This commit is contained in:
Jim Ferenczi 2018-11-22 15:42:59 +01:00 committed by GitHub
parent 96a741f2c2
commit e37a0ef844
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
98 changed files with 196 additions and 473 deletions

View File

@ -1,5 +1,5 @@
elasticsearch = 7.0.0
lucene = 8.0.0-snapshot-6d9c714052
lucene = 8.0.0-snapshot-67cdd21996
# optional dependencies
spatial4j = 0.7

View File

@ -26,14 +26,14 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
/**
* Factory for ASCIIFoldingFilter.
*/
public class ASCIIFoldingTokenFilterFactory extends AbstractTokenFilterFactory
implements MultiTermAwareComponent {
implements NormalizingTokenFilterFactory {
public static final ParseField PRESERVE_ORIGINAL = new ParseField("preserve_original");
public static final boolean DEFAULT_PRESERVE_ORIGINAL = false;
@ -51,21 +51,8 @@ public class ASCIIFoldingTokenFilterFactory extends AbstractTokenFilterFactory
}
@Override
public Object getMultiTermComponent() {
if (preserveOriginal == false) {
return this;
} else {
// See https://issues.apache.org/jira/browse/LUCENE-7536 for the reasoning
return new TokenFilterFactory() {
@Override
public String name() {
return ASCIIFoldingTokenFilterFactory.this.name();
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new ASCIIFoldingFilter(tokenStream, false);
}
};
}
public TokenStream normalize(TokenStream tokenStream) {
// Normalization should only emit a single token, so always turn off preserveOriginal
return new ASCIIFoldingFilter(tokenStream, false);
}
}

View File

@ -24,9 +24,9 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
public class ArabicNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public class ArabicNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
ArabicNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -37,8 +37,4 @@ public class ArabicNormalizationFilterFactory extends AbstractTokenFilterFactory
return new ArabicNormalizationFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -24,12 +24,12 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
/**
* Factory for {@link BengaliNormalizationFilter}
*/
public class BengaliNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public class BengaliNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
BengaliNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -40,8 +40,4 @@ public class BengaliNormalizationFilterFactory extends AbstractTokenFilterFactor
return new BengaliNormalizationFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -25,9 +25,9 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
public final class CJKWidthFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public final class CJKWidthFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
CJKWidthFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
@ -38,9 +38,4 @@ public final class CJKWidthFilterFactory extends AbstractTokenFilterFactory impl
return new CJKWidthFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -22,7 +22,6 @@ package org.elasticsearch.analysis.common;
import org.apache.logging.log4j.LogManager;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
@ -492,35 +491,26 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
@Override
public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
List<PreConfiguredTokenizer> tokenizers = new ArrayList<>();
tokenizers.add(PreConfiguredTokenizer.singleton("keyword", KeywordTokenizer::new, null));
tokenizers.add(PreConfiguredTokenizer.singleton("classic", ClassicTokenizer::new, null));
tokenizers.add(PreConfiguredTokenizer.singleton("uax_url_email", UAX29URLEmailTokenizer::new, null));
tokenizers.add(PreConfiguredTokenizer.singleton("path_hierarchy", PathHierarchyTokenizer::new, null));
tokenizers.add(PreConfiguredTokenizer.singleton("letter", LetterTokenizer::new, null));
tokenizers.add(PreConfiguredTokenizer.singleton("whitespace", WhitespaceTokenizer::new, null));
tokenizers.add(PreConfiguredTokenizer.singleton("ngram", NGramTokenizer::new, null));
tokenizers.add(PreConfiguredTokenizer.singleton("keyword", KeywordTokenizer::new));
tokenizers.add(PreConfiguredTokenizer.singleton("classic", ClassicTokenizer::new));
tokenizers.add(PreConfiguredTokenizer.singleton("uax_url_email", UAX29URLEmailTokenizer::new));
tokenizers.add(PreConfiguredTokenizer.singleton("path_hierarchy", PathHierarchyTokenizer::new));
tokenizers.add(PreConfiguredTokenizer.singleton("letter", LetterTokenizer::new));
tokenizers.add(PreConfiguredTokenizer.singleton("whitespace", WhitespaceTokenizer::new));
tokenizers.add(PreConfiguredTokenizer.singleton("ngram", NGramTokenizer::new));
tokenizers.add(PreConfiguredTokenizer.singleton("edge_ngram",
() -> new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE), null));
tokenizers.add(PreConfiguredTokenizer.singleton("pattern", () -> new PatternTokenizer(Regex.compile("\\W+", null), -1), null));
tokenizers.add(PreConfiguredTokenizer.singleton("thai", ThaiTokenizer::new, null));
() -> new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE)));
tokenizers.add(PreConfiguredTokenizer.singleton("pattern", () -> new PatternTokenizer(Regex.compile("\\W+", null), -1)));
tokenizers.add(PreConfiguredTokenizer.singleton("thai", ThaiTokenizer::new));
// TODO deprecate and remove in API
tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", XLowerCaseTokenizer::new, () -> new TokenFilterFactory() {
@Override
public String name() {
return "lowercase";
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new LowerCaseFilter(tokenStream);
}
}));
// This is already broken with normalization, so backwards compat isn't necessary?
tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", XLowerCaseTokenizer::new));
// Temporary shim for aliases. TODO deprecate after they are moved
tokenizers.add(PreConfiguredTokenizer.singleton("nGram", NGramTokenizer::new, null));
tokenizers.add(PreConfiguredTokenizer.singleton("nGram", NGramTokenizer::new));
tokenizers.add(PreConfiguredTokenizer.singleton("edgeNGram",
() -> new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE), null));
tokenizers.add(PreConfiguredTokenizer.singleton("PathHierarchy", PathHierarchyTokenizer::new, null));
() -> new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE)));
tokenizers.add(PreConfiguredTokenizer.singleton("PathHierarchy", PathHierarchyTokenizer::new));
return tokenizers;
}

View File

@ -25,12 +25,12 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
/**
* Factory for {@link DecimalDigitFilter}
*/
public final class DecimalDigitFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public final class DecimalDigitFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
DecimalDigitFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
@ -41,8 +41,4 @@ public final class DecimalDigitFilterFactory extends AbstractTokenFilterFactory
return new DecimalDigitFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -27,9 +27,9 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.Analysis;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
private final CharArraySet articles;
@ -43,8 +43,4 @@ public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory implem
return new ElisionFilter(tokenStream, articles);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -24,12 +24,12 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
/**
* Factory for {@link GermanNormalizationFilter}
*/
public class GermanNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public class GermanNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
GermanNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -40,8 +40,4 @@ public class GermanNormalizationFilterFactory extends AbstractTokenFilterFactory
return new GermanNormalizationFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -24,12 +24,12 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
/**
* Factory for {@link HindiNormalizationFilter}
*/
public class HindiNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public class HindiNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
HindiNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -40,8 +40,4 @@ public class HindiNormalizationFilterFactory extends AbstractTokenFilterFactory
return new HindiNormalizationFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -24,12 +24,12 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
/**
* Factory for {@link IndicNormalizationFilter}
*/
public class IndicNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public class IndicNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
IndicNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -40,8 +40,4 @@ public class IndicNormalizationFilterFactory extends AbstractTokenFilterFactory
return new IndicNormalizationFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -28,7 +28,7 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
/**
* Factory for {@link LowerCaseFilter} and some language-specific variants
@ -39,7 +39,7 @@ import org.elasticsearch.index.analysis.MultiTermAwareComponent;
* <li>turkish: {@link TurkishLowerCaseFilter}
* </ul>
*/
public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
private final String lang;
@ -63,10 +63,6 @@ public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory impl
}
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -26,14 +26,14 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractCharFilterFactory;
import org.elasticsearch.index.analysis.Analysis;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingCharFilterFactory;
import java.io.Reader;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MappingCharFilterFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent {
public class MappingCharFilterFactory extends AbstractCharFilterFactory implements NormalizingCharFilterFactory {
private final NormalizeCharMap normMap;
@ -118,8 +118,4 @@ public class MappingCharFilterFactory extends AbstractCharFilterFactory implemen
return new String(out, 0, writePos);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -18,9 +18,6 @@
*/
package org.elasticsearch.analysis.common;
import java.io.Reader;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.pattern.PatternReplaceCharFilter;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.regex.Regex;
@ -28,9 +25,12 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractCharFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingCharFilterFactory;
public class PatternReplaceCharFilterFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent {
import java.io.Reader;
import java.util.regex.Pattern;
public class PatternReplaceCharFilterFactory extends AbstractCharFilterFactory implements NormalizingCharFilterFactory {
private final Pattern pattern;
private final String replacement;
@ -59,8 +59,4 @@ public class PatternReplaceCharFilterFactory extends AbstractCharFilterFactory i
return new PatternReplaceCharFilter(pattern, replacement, tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -24,9 +24,9 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
public class PersianNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public class PersianNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
PersianNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -37,8 +37,4 @@ public class PersianNormalizationFilterFactory extends AbstractTokenFilterFactor
return new PersianNormalizationFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -24,12 +24,12 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
/**
* Factory for {@link ScandinavianFoldingFilter}
*/
public class ScandinavianFoldingFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public class ScandinavianFoldingFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
ScandinavianFoldingFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -40,8 +40,4 @@ public class ScandinavianFoldingFilterFactory extends AbstractTokenFilterFactory
return new ScandinavianFoldingFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -24,12 +24,12 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
/**
* Factory for {@link ScandinavianNormalizationFilter}
*/
public class ScandinavianNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public class ScandinavianNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
ScandinavianNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -40,8 +40,4 @@ public class ScandinavianNormalizationFilterFactory extends AbstractTokenFilterF
return new ScandinavianNormalizationFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -25,9 +25,9 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
public class SerbianNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public class SerbianNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
SerbianNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -38,8 +38,4 @@ public class SerbianNormalizationFilterFactory extends AbstractTokenFilterFactor
return new SerbianNormalizationFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -24,12 +24,12 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
/**
* Factory for {@link SoraniNormalizationFilter}
*/
public class SoraniNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public class SoraniNormalizationFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
public SoraniNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -40,9 +40,4 @@ public class SoraniNormalizationFilterFactory extends AbstractTokenFilterFactory
return new SoraniNormalizationFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -25,9 +25,9 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
public class TrimTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public class TrimTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
private static final String UPDATE_OFFSETS_KEY = "update_offsets";
@ -43,8 +43,4 @@ public class TrimTokenFilterFactory extends AbstractTokenFilterFactory implement
return new TrimFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -25,9 +25,9 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
public UpperCaseTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
@ -38,10 +38,6 @@ public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory impl
return new UpperCaseFilter(tokenStream);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.analysis.AnalysisTestsHelper;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.ESTokenStreamTestCase;
@ -64,11 +63,9 @@ public class ASCIIFoldingTokenFilterFactoryTests extends ESTokenStreamTestCase {
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
// but the multi-term aware component still emits a single token
tokenFilter = (TokenFilterFactory) ((MultiTermAwareComponent) tokenFilter)
.getMultiTermComponent();
tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));
expected = new String[]{"Anspruche"};
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
assertTokenStreamContents(tokenFilter.normalize(tokenizer), expected);
}
}

View File

@ -0,0 +1 @@
65b85d26f4eb4d23b98aaeffc9b1054c23d0227b

View File

@ -1 +0,0 @@
8f76b85824b273fafa1e25610c3aff66b97b0dd1

View File

@ -0,0 +1 @@
2c31180c0afaf7ce10244175c68a9189e57b456b

View File

@ -1 +0,0 @@
ee5e4e4341fdde3978b01945bbfaac72a200fa04

View File

@ -41,7 +41,7 @@ import org.elasticsearch.index.IndexSettings;
*
* @author kimchy (shay.banon)
*/
public class IcuFoldingTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public class IcuFoldingTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
/** Store here the same Normalizer used by the lucene ICUFoldingFilter */
private static final Normalizer2 ICU_FOLDING_NORMALIZER = Normalizer2.getInstance(
ICUFoldingFilter.class.getResourceAsStream("utr30.nrm"), "utr30", Normalizer2.Mode.COMPOSE);
@ -58,8 +58,4 @@ public class IcuFoldingTokenFilterFactory extends AbstractTokenFilterFactory imp
return new org.apache.lucene.analysis.icu.ICUNormalizer2Filter(tokenStream, normalizer);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -36,7 +36,7 @@ import java.io.Reader;
* <p>The {@code mode} can be used to provide 'compose' or 'decompose'. Default is compose.</p>
* <p>The {@code unicodeSetFilter} attribute can be used to provide the UniCodeSet for filtering.</p>
*/
public class IcuNormalizerCharFilterFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent {
public class IcuNormalizerCharFilterFactory extends AbstractCharFilterFactory implements NormalizingCharFilterFactory {
private final Normalizer2 normalizer;
@ -57,8 +57,4 @@ public class IcuNormalizerCharFilterFactory extends AbstractCharFilterFactory im
return new ICUNormalizer2CharFilter(reader, normalizer);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -37,7 +37,7 @@ import org.elasticsearch.index.IndexSettings;
* <p>The {@code name} can be used to provide the type of normalization to perform.</p>
* <p>The {@code unicodeSetFilter} attribute can be used to provide the UniCodeSet for filtering.</p>
*/
public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
private static final DeprecationLogger deprecationLogger =
new DeprecationLogger(LogManager.getLogger(IcuNormalizerTokenFilterFactory.class));
@ -56,11 +56,6 @@ public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory
return new org.apache.lucene.analysis.icu.ICUNormalizer2Filter(tokenStream, normalizer);
}
@Override
public Object getMultiTermComponent() {
return this;
}
static Normalizer2 wrapWithUnicodeSetFilter(final IndexSettings indexSettings,
final Normalizer2 normalizer,
final Settings settings) {

View File

@ -26,7 +26,7 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
public class IcuTransformTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public class IcuTransformTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {
private final String id;
private final int dir;
@ -45,8 +45,4 @@ public class IcuTransformTokenFilterFactory extends AbstractTokenFilterFactory i
return new ICUTransformFilter(tokenStream, transliterator);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -0,0 +1 @@
d39dee7d510aecb9437a1e438ec19cf4398d8792

View File

@ -1 +0,0 @@
34dfcdd2e37b62ad01a8bb4fbda66ea6bf513c28

View File

@ -26,7 +26,7 @@ import org.elasticsearch.index.IndexSettings;
import java.io.Reader;
public class KuromojiIterationMarkCharFilterFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent {
public class KuromojiIterationMarkCharFilterFactory extends AbstractCharFilterFactory implements NormalizingCharFilterFactory {
private final boolean normalizeKanji;
private final boolean normalizeKana;
@ -42,8 +42,4 @@ public class KuromojiIterationMarkCharFilterFactory extends AbstractCharFilterFa
return new JapaneseIterationMarkCharFilter(reader, normalizeKanji, normalizeKana);
}
@Override
public Object getMultiTermComponent() {
return this;
}
}

View File

@ -0,0 +1 @@
1f3ce32163fbf344f82d18b61715dc0891c22e00

View File

@ -1 +0,0 @@
25f02c3dfee4efbfe74d87558a6bdd0ea8389e12

View File

@ -0,0 +1 @@
6d378fb5b5a904cd3e3a1b1f3bab8b7c5cbc9d85

View File

@ -1 +0,0 @@
1023375e89d6340a93c2409c726a881752eb4ac1

View File

@ -0,0 +1 @@
df4957389f85da32b553dd901f30767879a507f2

View File

@ -1 +0,0 @@
70e598154fb5cb3dced5e82de4afcde2009f1755

View File

@ -0,0 +1 @@
210ea4e9423e03cd3f6ea9b8e81cab727101d3cb

View File

@ -1 +0,0 @@
e8b4634d426efee1515fc289b4ad67d1c714d14d

View File

@ -0,0 +1 @@
3c345959ae03ae458be1590c2ac782b2a621abb2

View File

@ -1 +0,0 @@
9f53e03113ca04c337d678126acf025cfeccff6e

View File

@ -0,0 +1 @@
1e557f096cd55fd1f20104b1fb4c0d0095e03fd2

View File

@ -1 +0,0 @@
ee88dcf4ea69de2a13df7b76d5524e8fd442f243

View File

@ -0,0 +1 @@
77c1844fd0b17e26fb4facb94f6140e98a6bbd49

View File

@ -1 +0,0 @@
ec090fd8bd804775aa128ccb20467b062b72d625

View File

@ -0,0 +1 @@
20b559db91bda12f7b242c516915aad26e654baa

View File

@ -1 +0,0 @@
0bba71a2e8bfd1c15db407ff06ee4185a091d5ec

View File

@ -0,0 +1 @@
24e4eb6703be36c910bd0d7e3f060259602131b8

View File

@ -1 +0,0 @@
fcee5b1586f7c695c65863ca9ee3a8ebe99c3242

View File

@ -0,0 +1 @@
1a9acefd0d7a9348f62fb0ea307853fe06cebc63

View File

@ -1 +0,0 @@
0a26a4870e9fddae497be6899fe9a0a2d3002294

View File

@ -0,0 +1 @@
941fa34281837c5d2a62d67657618b4d6e92c6d7

View File

@ -1 +0,0 @@
700722c50f8bfcb2d1773b50f43519603961d0ce

View File

@ -0,0 +1 @@
eb78318f2a76b2013857ba72e0ddc42141bad36e

View File

@ -1 +0,0 @@
9c9657903e4ade7773aaaf76f19d96e2a936e42d

View File

@ -0,0 +1 @@
ce90ede863c08726d7ae70f9f15443f122674d89

View File

@ -1 +0,0 @@
58ce1753cc41dfe445423c4cee42c129576a2ca2

View File

@ -0,0 +1 @@
e3b889834b8b43f3c5b718ee0b1b2fd198aa9467

View File

@ -1 +0,0 @@
bf1ee7b66f6e6349624d8760c00669480460a55d

View File

@ -0,0 +1 @@
f4c6c02a0834d582a918c895a715a74f40195297

View File

@ -1 +0,0 @@
2ed20db0ccc53f966cc211aeb3b623dcf69d2cca

View File

@ -0,0 +1 @@
7ed65e999af74d9356180c91176bcf0bcdf80b6a

View File

@ -1 +0,0 @@
e06d99480f44eede9302fb7dda3c62f3e8ff68e1

View File

@ -0,0 +1 @@
28a64cb272639b610064291e726f2a1792c224f2

View File

@ -1 +0,0 @@
64ff3b354c21fc371cfeef208158af92cdf93316

View File

@ -0,0 +1 @@
6af61d6e2d22be8cf0d7afb42ea61e73a59e6708

View File

@ -1 +0,0 @@
2dffc0dec40028ca958a0a2fdf0628fd8e8354d0

View File

@ -0,0 +1 @@
7e7d3d4c5b7a3a4a065db5c7e4a22d75c11191ff

View File

@ -1 +0,0 @@
d0ed3d77875bab18abe45706ec8b5d441cf46bdc

View File

@ -0,0 +1 @@
b2993443ae730960c22a2c9050f58d943fb8797c

View File

@ -1 +0,0 @@
8bb05a98bb9c2615ad1262980dd6b07802bafa1d

View File

@ -108,7 +108,7 @@ public final class BinaryDocValuesRangeQuery extends Query {
return 4; // at most 4 comparisons
}
};
return new ConstantScoreScorer(this, score(), iterator);
return new ConstantScoreScorer(this, score(), scoreMode, iterator);
}
@Override

View File

@ -92,7 +92,7 @@ public final class MinDocQuery extends Query {
}
final int segmentMinDoc = Math.max(0, minDoc - context.docBase);
final DocIdSetIterator disi = new MinDocIterator(segmentMinDoc, maxDoc);
return new ConstantScoreScorer(this, score(), disi);
return new ConstantScoreScorer(this, score(), scoreMode, disi);
}
@Override

View File

@ -87,7 +87,7 @@ public class SearchAfterSortedDocQuery extends Query {
return null;
}
final DocIdSetIterator disi = new MinDocQuery.MinDocIterator(firstDoc, maxDoc);
return new ConstantScoreScorer(this, score(), disi);
return new ConstantScoreScorer(this, score(), scoreMode, disi);
}
@Override

View File

@ -27,7 +27,6 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import org.elasticsearch.action.support.ActionFilters;
@ -42,6 +41,7 @@ import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.core.internal.io.IOUtils;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.IndexSettings;
@ -49,7 +49,8 @@ import org.elasticsearch.index.analysis.AnalysisRegistry;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NormalizingCharFilterFactory;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
@ -575,11 +576,10 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]");
}
if (normalizer) {
if (charFilterFactory instanceof MultiTermAwareComponent == false) {
if (charFilterFactory instanceof NormalizingCharFilterFactory == false) {
throw new IllegalArgumentException("Custom normalizer may not use char filter ["
+ charFilterFactory.name() + "]");
}
charFilterFactory = (CharFilterFactory) ((MultiTermAwareComponent) charFilterFactory).getMultiTermComponent();
}
charFilterFactoryList.add(charFilterFactory);
}
@ -677,11 +677,10 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
throw new IllegalArgumentException("failed to find or create token filter under [" + tokenFilter.name + "]");
}
if (normalizer) {
if (tokenFilterFactory instanceof MultiTermAwareComponent == false) {
if (tokenFilterFactory instanceof NormalizingTokenFilterFactory == false) {
throw new IllegalArgumentException("Custom normalizer may not use filter ["
+ tokenFilterFactory.name() + "]");
}
tokenFilterFactory = (TokenFilterFactory) ((MultiTermAwareComponent) tokenFilterFactory).getMultiTermComponent();
}
tokenFilterFactoryList.add(tokenFilterFactory);
}

View File

@ -26,4 +26,8 @@ public interface CharFilterFactory {
String name();
Reader create(Reader reader);
default Reader normalize(Reader reader) {
return reader;
}
}

View File

@ -107,10 +107,7 @@ public final class CustomAnalyzer extends Analyzer {
@Override
protected Reader initReaderForNormalization(String fieldName, Reader reader) {
for (CharFilterFactory charFilter : charFilters) {
if (charFilter instanceof MultiTermAwareComponent) {
charFilter = (CharFilterFactory) ((MultiTermAwareComponent) charFilter).getMultiTermComponent();
reader = charFilter.create(reader);
}
reader = charFilter.normalize(reader);
}
return reader;
}
@ -119,10 +116,7 @@ public final class CustomAnalyzer extends Analyzer {
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream result = in;
for (TokenFilterFactory filter : tokenFilters) {
if (filter instanceof MultiTermAwareComponent) {
filter = (TokenFilterFactory) ((MultiTermAwareComponent) filter).getMultiTermComponent();
result = filter.create(result);
}
result = filter.normalize(result);
}
return result;
}

View File

@ -57,11 +57,10 @@ public final class CustomNormalizerProvider extends AbstractIndexAnalyzerProvide
throw new IllegalArgumentException("Custom normalizer [" + name() + "] failed to find char_filter under name ["
+ charFilterName + "]");
}
if (charFilter instanceof MultiTermAwareComponent == false) {
if (charFilter instanceof NormalizingCharFilterFactory == false) {
throw new IllegalArgumentException("Custom normalizer [" + name() + "] may not use char filter ["
+ charFilterName + "]");
}
charFilter = (CharFilterFactory) ((MultiTermAwareComponent) charFilter).getMultiTermComponent();
charFiltersList.add(charFilter);
}
@ -73,10 +72,9 @@ public final class CustomNormalizerProvider extends AbstractIndexAnalyzerProvide
throw new IllegalArgumentException("Custom Analyzer [" + name() + "] failed to find filter under name ["
+ tokenFilterName + "]");
}
if (tokenFilter instanceof MultiTermAwareComponent == false) {
if (tokenFilter instanceof NormalizingTokenFilterFactory == false) {
throw new IllegalArgumentException("Custom normalizer [" + name() + "] may not use filter [" + tokenFilterName + "]");
}
tokenFilter = (TokenFilterFactory) ((MultiTermAwareComponent) tokenFilter).getMultiTermComponent();
tokenFilterList.add(tokenFilter);
}

View File

@ -19,12 +19,19 @@
package org.elasticsearch.index.analysis;
/** Elasticsearch counterpart of {@link org.apache.lucene.analysis.util.MultiTermAwareComponent}. */
public interface MultiTermAwareComponent {
import java.io.Reader;
/** Returns an analysis component to handle analysis if multi-term queries.
* The returned component must be a TokenizerFactory, TokenFilterFactory or CharFilterFactory.
*/
Object getMultiTermComponent();
/**
* A CharFilterFactory that also supports normalization
*
* The default implementation of {@link #normalize(Reader)} delegates to
* {@link #create(Reader)}
*/
public interface NormalizingCharFilterFactory extends CharFilterFactory {
@Override
default Reader normalize(Reader reader) {
return create(reader);
}
}

View File

@ -0,0 +1,37 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.TokenStream;
/**
* A TokenFilterFactory that may be used for normalization
*
* The default implementation delegates {@link #normalize(TokenStream)} to
* {@link #create(TokenStream)}}.
*/
public interface NormalizingTokenFilterFactory extends TokenFilterFactory {
@Override
default TokenStream normalize(TokenStream tokenStream) {
return create(tokenStream);
}
}

View File

@ -83,12 +83,10 @@ public class PreConfiguredCharFilter extends PreConfiguredAnalysisComponent<Char
return useFilterForMultitermQueries;
}
private interface MultiTermAwareCharFilterFactory extends CharFilterFactory, MultiTermAwareComponent {}
@Override
protected CharFilterFactory create(Version version) {
if (useFilterForMultitermQueries) {
return new MultiTermAwareCharFilterFactory() {
return new NormalizingCharFilterFactory() {
@Override
public String name() {
return getName();
@ -98,11 +96,6 @@ public class PreConfiguredCharFilter extends PreConfiguredAnalysisComponent<Char
public Reader create(Reader reader) {
return create.apply(reader, version);
}
@Override
public Object getMultiTermComponent() {
return this;
}
};
}
return new CharFilterFactory() {

View File

@ -84,12 +84,16 @@ public final class PreConfiguredTokenFilter extends PreConfiguredAnalysisCompone
return useFilterForMultitermQueries;
}
private interface MultiTermAwareTokenFilterFactory extends TokenFilterFactory, MultiTermAwareComponent {}
@Override
protected TokenFilterFactory create(Version version) {
if (useFilterForMultitermQueries) {
return new MultiTermAwareTokenFilterFactory() {
return new NormalizingTokenFilterFactory() {
@Override
public TokenStream normalize(TokenStream tokenStream) {
return create.apply(tokenStream, version);
}
@Override
public String name() {
return getName();
@ -100,10 +104,6 @@ public final class PreConfiguredTokenFilter extends PreConfiguredAnalysisCompone
return create.apply(tokenStream, version);
}
@Override
public Object getMultiTermComponent() {
return this;
}
};
}
return new TokenFilterFactory() {

View File

@ -21,7 +21,6 @@ package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.Version;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory;
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
@ -37,77 +36,40 @@ public final class PreConfiguredTokenizer extends PreConfiguredAnalysisComponent
*
* @param name the name of the tokenizer in the api
* @param create builds the tokenizer
* @param multiTermComponent null if this tokenizer shouldn't be used for multi-term queries, otherwise a supplier for the
* {@link TokenFilterFactory} that stands in for this tokenizer in multi-term queries.
*/
public static PreConfiguredTokenizer singleton(String name, Supplier<Tokenizer> create,
@Nullable Supplier<TokenFilterFactory> multiTermComponent) {
return new PreConfiguredTokenizer(name, CachingStrategy.ONE, version -> create.get(),
multiTermComponent == null ? null : version -> multiTermComponent.get());
public static PreConfiguredTokenizer singleton(String name, Supplier<Tokenizer> create) {
return new PreConfiguredTokenizer(name, CachingStrategy.ONE, version -> create.get());
}
/**
* Create a pre-configured tokenizer that may vary based on the Lucene version.
*
*
* @param name the name of the tokenizer in the api
* @param create builds the tokenizer
* @param multiTermComponent null if this tokenizer shouldn't be used for multi-term queries, otherwise a supplier for the
* {@link TokenFilterFactory} that stands in for this tokenizer in multi-term queries.
*/
public static PreConfiguredTokenizer luceneVersion(String name, Function<org.apache.lucene.util.Version, Tokenizer> create,
@Nullable Function<org.apache.lucene.util.Version, TokenFilterFactory> multiTermComponent) {
return new PreConfiguredTokenizer(name, CachingStrategy.LUCENE, version -> create.apply(version.luceneVersion),
multiTermComponent == null ? null : version -> multiTermComponent.apply(version.luceneVersion));
public static PreConfiguredTokenizer luceneVersion(String name, Function<org.apache.lucene.util.Version, Tokenizer> create) {
return new PreConfiguredTokenizer(name, CachingStrategy.LUCENE, version -> create.apply(version.luceneVersion));
}
/**
* Create a pre-configured tokenizer that may vary based on the Elasticsearch version.
*
*
* @param name the name of the tokenizer in the api
* @param create builds the tokenizer
* @param multiTermComponent null if this tokenizer shouldn't be used for multi-term queries, otherwise a supplier for the
* {@link TokenFilterFactory} that stands in for this tokenizer in multi-term queries.
*/
public static PreConfiguredTokenizer elasticsearchVersion(String name, Function<org.elasticsearch.Version, Tokenizer> create,
@Nullable Function<Version, TokenFilterFactory> multiTermComponent) {
return new PreConfiguredTokenizer(name, CachingStrategy.ELASTICSEARCH, create, multiTermComponent);
public static PreConfiguredTokenizer elasticsearchVersion(String name, Function<org.elasticsearch.Version, Tokenizer> create) {
return new PreConfiguredTokenizer(name, CachingStrategy.ELASTICSEARCH, create);
}
private final Function<Version, Tokenizer> create;
private final Function<Version, TokenFilterFactory> multiTermComponent;
private PreConfiguredTokenizer(String name, PreBuiltCacheFactory.CachingStrategy cache, Function<Version, Tokenizer> create,
@Nullable Function<Version, TokenFilterFactory> multiTermComponent) {
private PreConfiguredTokenizer(String name, PreBuiltCacheFactory.CachingStrategy cache, Function<Version, Tokenizer> create) {
super(name, cache);
this.create = create;
this.multiTermComponent = multiTermComponent;
}
/**
* Does this tokenizer has an equivalent component for analyzing multi-term queries?
*/
public boolean hasMultiTermComponent() {
return multiTermComponent != null;
}
private interface MultiTermAwareTokenizerFactory extends TokenizerFactory, MultiTermAwareComponent {}
@Override
protected TokenizerFactory create(Version version) {
if (multiTermComponent != null) {
return new MultiTermAwareTokenizerFactory() {
@Override
public Tokenizer create() {
return create.apply(version);
}
@Override
public Object getMultiTermComponent() {
return multiTermComponent.apply(version);
}
};
} else {
return () -> create.apply(version);
}
return () -> create.apply(version);
}
}

View File

@ -31,6 +31,15 @@ public interface TokenFilterFactory {
TokenStream create(TokenStream tokenStream);
/**
* Normalize a tokenStream for use in multi-term queries
*
* The default implementation is a no-op
*/
default TokenStream normalize(TokenStream tokenStream) {
return tokenStream;
}
/**
* Does this analyzer mess up the {@link OffsetAttribute}s in such as way as to break the
* {@link FastVectorHighlighter}? If this is {@code true} then the

View File

@ -191,7 +191,7 @@ public class ScriptQueryBuilder extends AbstractQueryBuilder<ScriptQueryBuilder>
return 1000f;
}
};
return new ConstantScoreScorer(this, score(), twoPhase);
return new ConstantScoreScorer(this, score(), scoreMode, twoPhase);
}
@Override

View File

@ -114,7 +114,7 @@ final class ShardSplittingQuery extends Query {
TwoPhaseIterator twoPhaseIterator =
parentBitSet == null ? new RoutingPartitionedDocIdSetIterator(visitor) :
new NestedRoutingPartitionedDocIdSetIterator(visitor, parentBitSet);
return new ConstantScoreScorer(this, score(), twoPhaseIterator);
return new ConstantScoreScorer(this, score(), scoreMode, twoPhaseIterator);
} else {
// here we potentially guard the docID consumers with our parent bitset if we have one.
// this ensures that we are only marking root documents in the nested case and if necessary
@ -155,7 +155,7 @@ final class ShardSplittingQuery extends Query {
}
}
return new ConstantScoreScorer(this, score(), new BitSetIterator(bitSet, bitSet.length()));
return new ConstantScoreScorer(this, score(), scoreMode, new BitSetIterator(bitSet, bitSet.length()));
}
@Override

View File

@ -212,8 +212,7 @@ public final class AnalysisModule {
PreConfiguredTokenizer preConfigured;
switch (tokenizer.getCachingStrategy()) {
case ONE:
preConfigured = PreConfiguredTokenizer.singleton(name,
() -> tokenizer.create(Version.CURRENT), null);
preConfigured = PreConfiguredTokenizer.singleton(name, () -> tokenizer.create(Version.CURRENT));
break;
default:
throw new UnsupportedOperationException(

View File

@ -75,7 +75,7 @@ public final class DocValuesSliceQuery extends SliceQuery {
return 10;
}
};
return new ConstantScoreScorer(this, score(), twoPhase);
return new ConstantScoreScorer(this, score(), scoreMode, twoPhase);
}
@Override

View File

@ -62,7 +62,7 @@ public final class TermsSliceQuery extends SliceQuery {
public Scorer scorer(LeafReaderContext context) throws IOException {
final DocIdSet disi = build(context.reader());
final DocIdSetIterator leafIt = disi.iterator();
return new ConstantScoreScorer(this, score(), leafIt);
return new ConstantScoreScorer(this, score(), scoreMode, leafIt);
}
@Override

View File

@ -98,7 +98,7 @@ public class GetTermVectorsTests extends ESSingleNodeTestCase {
@Override
public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
return Collections.singletonList(PreConfiguredTokenizer.singleton("mock-whitespace",
() -> new MockTokenizer(MockTokenizer.WHITESPACE, false), null));
() -> new MockTokenizer(MockTokenizer.WHITESPACE, false)));
}
// Based on DelimitedPayloadTokenFilter:

View File

@ -137,7 +137,7 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
@Override
public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() {
return singletonMap("mock_char_filter", (indexSettings, env, name, settings) -> {
class Factory implements CharFilterFactory, MultiTermAwareComponent {
class Factory implements NormalizingCharFilterFactory {
@Override
public String name() {
return name;
@ -162,10 +162,6 @@ public class CustomNormalizerTests extends ESTokenStreamTestCase {
}
};
}
@Override
public Object getMultiTermComponent() {
return this;
}
}
return new Factory();
});

View File

@ -78,7 +78,7 @@ public class IndicesQueryCacheTests extends ESTestCase {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
return new ConstantScoreScorer(this, score(), DocIdSetIterator.all(context.reader().maxDoc()));
return new ConstantScoreScorer(this, score(), scoreMode, DocIdSetIterator.all(context.reader().maxDoc()));
}
@Override

View File

@ -371,9 +371,6 @@ public class AnalysisModuleTests extends ESTestCase {
* and that do not vary based on version at all.
*/
public void testPluginPreConfiguredTokenizers() throws IOException {
boolean noVersionSupportsMultiTerm = randomBoolean();
boolean luceneVersionSupportsMultiTerm = randomBoolean();
boolean elasticsearchVersionSupportsMultiTerm = randomBoolean();
// Simple tokenizer that always spits out a single token with some preconfigured characters
final class FixedTokenizer extends Tokenizer {
@ -409,16 +406,11 @@ public class AnalysisModuleTests extends ESTestCase {
@Override
public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
return Arrays.asList(
PreConfiguredTokenizer.singleton("no_version", () -> new FixedTokenizer("no_version"),
noVersionSupportsMultiTerm ? () -> AppendTokenFilter.factoryForSuffix("no_version") : null),
PreConfiguredTokenizer.singleton("no_version", () -> new FixedTokenizer("no_version")),
PreConfiguredTokenizer.luceneVersion("lucene_version",
luceneVersion -> new FixedTokenizer(luceneVersion.toString()),
luceneVersionSupportsMultiTerm ?
luceneVersion -> AppendTokenFilter.factoryForSuffix(luceneVersion.toString()) : null),
luceneVersion -> new FixedTokenizer(luceneVersion.toString())),
PreConfiguredTokenizer.elasticsearchVersion("elasticsearch_version",
esVersion -> new FixedTokenizer(esVersion.toString()),
elasticsearchVersionSupportsMultiTerm ?
esVersion -> AppendTokenFilter.factoryForSuffix(esVersion.toString()) : null)
esVersion -> new FixedTokenizer(esVersion.toString()))
);
}
})).getAnalysisRegistry();

View File

@ -19,24 +19,17 @@
package org.elasticsearch.indices.analysis;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.elasticsearch.common.collect.MapBuilder;
import org.elasticsearch.index.analysis.HunspellTokenFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
import org.elasticsearch.index.analysis.StopTokenFilterFactory;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.test.ESTestCase;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
@ -46,9 +39,6 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static java.util.Collections.emptyMap;
import static java.util.Collections.singletonList;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.typeCompatibleWith;
/**
* Alerts us if new analysis components are added to Lucene, so we don't miss them.
@ -308,142 +298,6 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
assertTrue("new tokenfilters found, please update KNOWN_TOKENFILTERS: " + missing.toString(), missing.isEmpty());
}
public void testMultiTermAware() {
Collection<Class<?>> expected = new HashSet<>();
for (Map.Entry<String, Class<?>> entry : getTokenizers().entrySet()) {
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(
org.apache.lucene.analysis.util.TokenizerFactory.lookupClass(entry.getKey()))) {
expected.add(entry.getValue());
}
}
for (Map.Entry<String, Class<?>> entry : getTokenFilters().entrySet()) {
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(
org.apache.lucene.analysis.util.TokenFilterFactory.lookupClass(entry.getKey()))) {
expected.add(entry.getValue());
}
}
for (Map.Entry<String, Class<?>> entry : getCharFilters().entrySet()) {
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(
org.apache.lucene.analysis.util.CharFilterFactory.lookupClass(entry.getKey()))) {
expected.add(entry.getValue());
}
}
expected.remove(Void.class);
expected.remove(MovedToAnalysisCommon.class);
expected.remove(Deprecated.class);
Collection<Class<?>> actual = new HashSet<>();
for (Class<?> clazz : getTokenizers().values()) {
if (MultiTermAwareComponent.class.isAssignableFrom(clazz)) {
actual.add(clazz);
}
}
for (Class<?> clazz : getTokenFilters().values()) {
if (MultiTermAwareComponent.class.isAssignableFrom(clazz)) {
actual.add(clazz);
}
}
for (Class<?> clazz : getCharFilters().values()) {
if (MultiTermAwareComponent.class.isAssignableFrom(clazz)) {
actual.add(clazz);
}
}
Set<Class<?>> classesMissingMultiTermSupport = new HashSet<>(expected);
classesMissingMultiTermSupport.removeAll(actual);
assertTrue("Classes are missing multi-term support: " + classesMissingMultiTermSupport,
classesMissingMultiTermSupport.isEmpty());
Set<Class<?>> classesThatShouldNotHaveMultiTermSupport = new HashSet<>(actual);
classesThatShouldNotHaveMultiTermSupport.removeAll(expected);
assertTrue("Classes should not have multi-term support: " + classesThatShouldNotHaveMultiTermSupport,
classesThatShouldNotHaveMultiTermSupport.isEmpty());
}
public void testPreBuiltMultiTermAware() {
Collection<Object> expected = new HashSet<>();
Collection<Object> actual = new HashSet<>();
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters =
new HashMap<>(AnalysisModule.setupPreConfiguredTokenFilters(singletonList(plugin)));
for (Map.Entry<String, Class<?>> entry : getPreConfiguredTokenFilters().entrySet()) {
String name = entry.getKey();
Class<?> luceneFactory = entry.getValue();
PreConfiguredTokenFilter filter = preConfiguredTokenFilters.remove(name);
assertNotNull("test claims pre built token filter [" + name + "] should be available but it wasn't", filter);
if (luceneFactory == Void.class) {
continue;
}
if (luceneFactory == null) {
luceneFactory = TokenFilterFactory.lookupClass(toCamelCase(name));
}
assertThat(luceneFactory, typeCompatibleWith(TokenFilterFactory.class));
if (filter.shouldUseFilterForMultitermQueries()) {
actual.add("token filter [" + name + "]");
}
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) {
expected.add("token filter [" + name + "]");
}
}
assertThat("pre configured token filter not registered with test", preConfiguredTokenFilters.keySet(), empty());
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = new HashMap<>(
AnalysisModule.setupPreConfiguredTokenizers(singletonList(plugin)));
for (Map.Entry<String, Class<?>> entry : getPreConfiguredTokenizers().entrySet()) {
String name = entry.getKey();
Class<?> luceneFactory = entry.getValue();
PreConfiguredTokenizer tokenizer = preConfiguredTokenizers.remove(name);
assertNotNull("test claims pre built tokenizer [" + name + "] should be available but it wasn't", tokenizer);
if (luceneFactory == Void.class) {
continue;
}
if (luceneFactory == null) {
luceneFactory = TokenizerFactory.lookupClass(toCamelCase(name));
}
assertThat(luceneFactory, typeCompatibleWith(TokenizerFactory.class));
if (tokenizer.hasMultiTermComponent()) {
actual.add(tokenizer);
}
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) {
expected.add(tokenizer);
}
}
assertThat("pre configured tokenizer not registered with test", preConfiguredTokenizers.keySet(), empty());
Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = new HashMap<>(
AnalysisModule.setupPreConfiguredCharFilters(singletonList(plugin)));
for (Map.Entry<String, Class<?>> entry : getPreConfiguredCharFilters().entrySet()) {
String name = entry.getKey();
Class<?> luceneFactory = entry.getValue();
PreConfiguredCharFilter filter = preConfiguredCharFilters.remove(name);
assertNotNull("test claims pre built char filter [" + name + "] should be available but it wasn't", filter);
if (luceneFactory == Void.class) {
continue;
}
if (luceneFactory == null) {
luceneFactory = TokenFilterFactory.lookupClass(toCamelCase(name));
}
assertThat(luceneFactory, typeCompatibleWith(CharFilterFactory.class));
if (filter.shouldUseFilterForMultitermQueries()) {
actual.add(filter);
}
if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) {
expected.add("token filter [" + name + "]");
}
}
assertThat("pre configured char filter not registered with test", preConfiguredCharFilters.keySet(), empty());
Set<Object> classesMissingMultiTermSupport = new HashSet<>(expected);
classesMissingMultiTermSupport.removeAll(actual);
assertTrue("Pre-built components are missing multi-term support: " + classesMissingMultiTermSupport,
classesMissingMultiTermSupport.isEmpty());
Set<Object> classesThatShouldNotHaveMultiTermSupport = new HashSet<>(actual);
classesThatShouldNotHaveMultiTermSupport.removeAll(expected);
assertTrue("Pre-built components should not have multi-term support: " + classesThatShouldNotHaveMultiTermSupport,
classesThatShouldNotHaveMultiTermSupport.isEmpty());
}
/**
* Marker class for components that have moved to the analysis-common modules. This will be
* removed when the module is complete and these analysis components aren't available to core.

View File

@ -0,0 +1 @@
20b559db91bda12f7b242c516915aad26e654baa

View File

@ -1 +0,0 @@
0bba71a2e8bfd1c15db407ff06ee4185a091d5ec