Upgrade Lucene 3.1: Analyzers, closes #826.
This commit is contained in:
parent
4e4495ff1d
commit
5d6e84f206
|
@ -27,8 +27,10 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
import org.elasticsearch.index.analysis.AnalyzerScope;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
|
||||
|
@ -52,6 +54,20 @@ public class Lucene {
|
|||
|
||||
public static final int NO_DOC = -1;
|
||||
|
||||
public static Version parseVersion(@Nullable String version, Version defaultVersion, ESLogger logger) {
|
||||
if (version == null) {
|
||||
return defaultVersion;
|
||||
}
|
||||
if ("3.1".equals(version)) {
|
||||
return Version.LUCENE_31;
|
||||
}
|
||||
if ("3.0".equals(version)) {
|
||||
return Version.LUCENE_30;
|
||||
}
|
||||
logger.warn("no version match {}, default to {}", version, defaultVersion);
|
||||
return defaultVersion;
|
||||
}
|
||||
|
||||
public static long count(IndexSearcher searcher, Query query, float minScore) throws IOException {
|
||||
CountCollector countCollector = new CountCollector(minScore);
|
||||
searcher.search(query, countCollector);
|
||||
|
|
|
@ -33,7 +33,7 @@ import org.elasticsearch.index.settings.IndexSettings;
|
|||
public class ASCIIFoldingTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
@Inject public ASCIIFoldingTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.AbstractIndexComponent;
|
||||
import org.elasticsearch.index.Index;
|
||||
|
@ -32,6 +34,8 @@ public abstract class AbstractIndexAnalyzerProvider<T extends Analyzer> extends
|
|||
|
||||
private final String name;
|
||||
|
||||
protected final Version version;
|
||||
|
||||
/**
|
||||
* Constructs a new analyzer component, with the index name and its settings and the analyzer name.
|
||||
*
|
||||
|
@ -39,9 +43,10 @@ public abstract class AbstractIndexAnalyzerProvider<T extends Analyzer> extends
|
|||
* @param indexSettings The index settings
|
||||
* @param name The analyzer name
|
||||
*/
|
||||
public AbstractIndexAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, String name) {
|
||||
public AbstractIndexAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, String name, Settings settings) {
|
||||
super(index, indexSettings);
|
||||
this.name = name;
|
||||
this.version = Lucene.parseVersion(settings.get("version"), Lucene.ANALYZER_VERSION, logger);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -52,9 +57,10 @@ public abstract class AbstractIndexAnalyzerProvider<T extends Analyzer> extends
|
|||
* @param prefixSettings A settings prefix (like "com.mycompany") to simplify extracting the component settings
|
||||
* @param name The analyzer name
|
||||
*/
|
||||
public AbstractIndexAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, String prefixSettings, String name) {
|
||||
public AbstractIndexAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, String prefixSettings, String name, Settings settings) {
|
||||
super(index, indexSettings, prefixSettings);
|
||||
this.name = name;
|
||||
this.version = Lucene.parseVersion(settings.get("version"), Lucene.ANALYZER_VERSION, logger);
|
||||
}
|
||||
|
||||
@Override public String name() {
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.AbstractIndexComponent;
|
||||
import org.elasticsearch.index.Index;
|
||||
|
@ -31,9 +33,12 @@ public abstract class AbstractTokenFilterFactory extends AbstractIndexComponent
|
|||
|
||||
private final String name;
|
||||
|
||||
public AbstractTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, String name) {
|
||||
protected final Version version;
|
||||
|
||||
public AbstractTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, String name, Settings settings) {
|
||||
super(index, indexSettings);
|
||||
this.name = name;
|
||||
this.version = Lucene.parseVersion(settings.get("version"), Lucene.ANALYZER_VERSION, logger);
|
||||
}
|
||||
|
||||
@Override public String name() {
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.AbstractIndexComponent;
|
||||
import org.elasticsearch.index.Index;
|
||||
|
@ -31,9 +33,12 @@ public abstract class AbstractTokenizerFactory extends AbstractIndexComponent im
|
|||
|
||||
private final String name;
|
||||
|
||||
public AbstractTokenizerFactory(Index index, @IndexSettings Settings indexSettings, String name) {
|
||||
protected final Version version;
|
||||
|
||||
public AbstractTokenizerFactory(Index index, @IndexSettings Settings indexSettings, String name, Settings settings) {
|
||||
super(index, indexSettings);
|
||||
this.name = name;
|
||||
this.version = Lucene.parseVersion(settings.get("version"), Lucene.ANALYZER_VERSION, logger);
|
||||
}
|
||||
|
||||
@Override public String name() {
|
||||
|
|
|
@ -302,6 +302,7 @@ public class AnalysisModule extends AbstractModule {
|
|||
|
||||
@Override public void processTokenFilters(TokenFiltersBindings tokenFiltersBindings) {
|
||||
tokenFiltersBindings.processTokenFilter("stop", StopTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("reverse", ReverseTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("asciifolding", ASCIIFoldingTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("length", LengthTokenFilterFactory.class);
|
||||
tokenFiltersBindings.processTokenFilter("lowercase", LowerCaseTokenFilterFactory.class);
|
||||
|
@ -322,6 +323,10 @@ public class AnalysisModule extends AbstractModule {
|
|||
|
||||
@Override public void processTokenizers(TokenizersBindings tokenizersBindings) {
|
||||
tokenizersBindings.processTokenizer("standard", StandardTokenizerFactory.class);
|
||||
tokenizersBindings.processTokenizer("uax_url_email", UAX29URLEmailTokenizerFactory.class);
|
||||
tokenizersBindings.processTokenizer("uaUrlEmail", UAX29URLEmailTokenizerFactory.class);
|
||||
tokenizersBindings.processTokenizer("path_hierarchy", PathHierarchyTokenizerFactory.class);
|
||||
tokenizersBindings.processTokenizer("pathHierarchy", PathHierarchyTokenizerFactory.class);
|
||||
tokenizersBindings.processTokenizer("keyword", KeywordTokenizerFactory.class);
|
||||
tokenizersBindings.processTokenizer("letter", LetterTokenizerFactory.class);
|
||||
tokenizersBindings.processTokenizer("lowercase", LowerCaseTokenizerFactory.class);
|
||||
|
|
|
@ -22,7 +22,6 @@ package org.elasticsearch.index.analysis;
|
|||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
@ -37,9 +36,9 @@ public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider<Arabic
|
|||
private final ArabicAnalyzer arabicAnalyzer;
|
||||
|
||||
@Inject public ArabicAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, ArabicAnalyzer.getDefaultStopSet());
|
||||
arabicAnalyzer = new ArabicAnalyzer(Lucene.ANALYZER_VERSION, stopWords);
|
||||
arabicAnalyzer = new ArabicAnalyzer(version, stopWords);
|
||||
}
|
||||
|
||||
@Override public ArabicAnalyzer get() {
|
||||
|
|
|
@ -33,7 +33,7 @@ import org.elasticsearch.index.settings.IndexSettings;
|
|||
public class ArabicStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
@Inject public ArabicStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.elasticsearch.common.collect.ImmutableSet;
|
|||
import org.elasticsearch.common.collect.Iterators;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
@ -41,7 +40,7 @@ public class BrazilianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Bra
|
|||
private final BrazilianAnalyzer analyzer;
|
||||
|
||||
@Inject public BrazilianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, BrazilianAnalyzer.getDefaultStopSet());
|
||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
||||
if (stemExclusion.length > 0) {
|
||||
|
@ -49,7 +48,7 @@ public class BrazilianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Bra
|
|||
} else {
|
||||
this.stemExclusion = ImmutableSet.of();
|
||||
}
|
||||
analyzer = new BrazilianAnalyzer(Lucene.ANALYZER_VERSION, stopWords, this.stemExclusion);
|
||||
analyzer = new BrazilianAnalyzer(version, stopWords, this.stemExclusion);
|
||||
}
|
||||
|
||||
@Override public BrazilianAnalyzer get() {
|
||||
|
|
|
@ -39,7 +39,7 @@ public class BrazilianStemTokenFilterFactory extends AbstractTokenFilterFactory
|
|||
private final Set<?> exclusions;
|
||||
|
||||
@Inject public BrazilianStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
||||
if (stemExclusion.length > 0) {
|
||||
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||
|
|
|
@ -34,7 +34,7 @@ public class ChineseAnalyzerProvider extends AbstractIndexAnalyzerProvider<Chine
|
|||
private final ChineseAnalyzer analyzer;
|
||||
|
||||
@Inject public ChineseAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new ChineseAnalyzer();
|
||||
}
|
||||
|
||||
|
|
|
@ -22,7 +22,6 @@ package org.elasticsearch.index.analysis;
|
|||
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
@ -37,10 +36,10 @@ public class CjkAnalyzerProvider extends AbstractIndexAnalyzerProvider<CJKAnalyz
|
|||
private final CJKAnalyzer analyzer;
|
||||
|
||||
@Inject public CjkAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, CJKAnalyzer.getDefaultStopSet());
|
||||
|
||||
analyzer = new CJKAnalyzer(Lucene.ANALYZER_VERSION, stopWords);
|
||||
analyzer = new CJKAnalyzer(version, stopWords);
|
||||
}
|
||||
|
||||
@Override public CJKAnalyzer get() {
|
||||
|
|
|
@ -53,7 +53,7 @@ public class CustomAnalyzerProvider extends AbstractIndexAnalyzerProvider<Custom
|
|||
Map<String, TokenFilterFactoryFactory> tokenFilterFactories,
|
||||
@IndexSettings Settings indexSettings,
|
||||
@Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
String tokenizerName = settings.get("tokenizer");
|
||||
if (tokenizerName == null) {
|
||||
throw new IllegalArgumentException("Custom Analyzer [" + name + "] must be configured with a tokenizer");
|
||||
|
|
|
@ -22,7 +22,6 @@ package org.elasticsearch.index.analysis;
|
|||
import org.apache.lucene.analysis.cz.CzechAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
@ -37,10 +36,10 @@ public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider<CzechAn
|
|||
private final CzechAnalyzer analyzer;
|
||||
|
||||
@Inject public CzechAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, CzechAnalyzer.getDefaultStopSet());
|
||||
|
||||
analyzer = new CzechAnalyzer(Lucene.ANALYZER_VERSION, stopWords);
|
||||
analyzer = new CzechAnalyzer(version, stopWords);
|
||||
}
|
||||
|
||||
@Override public CzechAnalyzer get() {
|
||||
|
|
|
@ -11,7 +11,7 @@ import org.elasticsearch.index.settings.IndexSettings;
|
|||
public class CzechStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
@Inject public CzechStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.elasticsearch.common.collect.ImmutableSet;
|
|||
import org.elasticsearch.common.collect.Iterators;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
@ -41,7 +40,7 @@ public class DutchAnalyzerProvider extends AbstractIndexAnalyzerProvider<DutchAn
|
|||
private final DutchAnalyzer analyzer;
|
||||
|
||||
@Inject public DutchAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, DutchAnalyzer.getDefaultStopSet());
|
||||
|
||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
||||
|
@ -50,7 +49,7 @@ public class DutchAnalyzerProvider extends AbstractIndexAnalyzerProvider<DutchAn
|
|||
} else {
|
||||
this.stemExclusion = ImmutableSet.of();
|
||||
}
|
||||
analyzer = new DutchAnalyzer(Lucene.VERSION, stopWords, this.stemExclusion);
|
||||
analyzer = new DutchAnalyzer(version, stopWords, this.stemExclusion);
|
||||
}
|
||||
|
||||
@Override public DutchAnalyzer get() {
|
||||
|
|
|
@ -39,7 +39,7 @@ public class DutchStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
private final Set<?> exclusions;
|
||||
|
||||
@Inject public DutchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
||||
if (stemExclusion.length > 0) {
|
||||
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||
|
|
|
@ -42,7 +42,7 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
private final EdgeNGramTokenFilter.Side side;
|
||||
|
||||
@Inject public EdgeNGramTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
|
||||
this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
|
||||
this.side = EdgeNGramTokenFilter.Side.getSide(settings.get("side", EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
|
||||
|
|
|
@ -42,7 +42,7 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory {
|
|||
private final EdgeNGramTokenizer.Side side;
|
||||
|
||||
@Inject public EdgeNGramTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
|
||||
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
|
||||
this.side = EdgeNGramTokenizer.Side.getSide(settings.get("side", EdgeNGramTokenizer.DEFAULT_SIDE.getLabel()));
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.elasticsearch.common.collect.ImmutableSet;
|
|||
import org.elasticsearch.common.collect.Iterators;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
@ -41,7 +40,7 @@ public class FrenchAnalyzerProvider extends AbstractIndexAnalyzerProvider<French
|
|||
private final FrenchAnalyzer analyzer;
|
||||
|
||||
@Inject public FrenchAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, FrenchAnalyzer.getDefaultStopSet());
|
||||
|
||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
||||
|
@ -50,7 +49,7 @@ public class FrenchAnalyzerProvider extends AbstractIndexAnalyzerProvider<French
|
|||
} else {
|
||||
this.stemExclusion = ImmutableSet.of();
|
||||
}
|
||||
analyzer = new FrenchAnalyzer(Lucene.ANALYZER_VERSION, stopWords, this.stemExclusion);
|
||||
analyzer = new FrenchAnalyzer(version, stopWords, this.stemExclusion);
|
||||
}
|
||||
|
||||
@Override public FrenchAnalyzer get() {
|
||||
|
|
|
@ -39,7 +39,7 @@ public class FrenchStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
private final Set<?> exclusions;
|
||||
|
||||
@Inject public FrenchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
||||
if (stemExclusion.length > 0) {
|
||||
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||
|
|
|
@ -24,7 +24,6 @@ import org.elasticsearch.common.collect.ImmutableSet;
|
|||
import org.elasticsearch.common.collect.Iterators;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
@ -41,7 +40,7 @@ public class GermanAnalyzerProvider extends AbstractIndexAnalyzerProvider<German
|
|||
private final GermanAnalyzer analyzer;
|
||||
|
||||
@Inject public GermanAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, GermanAnalyzer.getDefaultStopSet());
|
||||
|
||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
||||
|
@ -50,7 +49,7 @@ public class GermanAnalyzerProvider extends AbstractIndexAnalyzerProvider<German
|
|||
} else {
|
||||
this.stemExclusion = ImmutableSet.of();
|
||||
}
|
||||
analyzer = new GermanAnalyzer(Lucene.ANALYZER_VERSION, stopWords, this.stemExclusion);
|
||||
analyzer = new GermanAnalyzer(version, stopWords, this.stemExclusion);
|
||||
}
|
||||
|
||||
@Override public GermanAnalyzer get() {
|
||||
|
|
|
@ -39,7 +39,7 @@ public class GermanStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
private final Set<?> exclusions;
|
||||
|
||||
@Inject public GermanStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
String[] stemExclusion = settings.getAsArray("stem_exclusion");
|
||||
if (stemExclusion.length > 0) {
|
||||
this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion));
|
||||
|
|
|
@ -22,7 +22,6 @@ package org.elasticsearch.index.analysis;
|
|||
import org.apache.lucene.analysis.el.GreekAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
@ -37,10 +36,10 @@ public class GreekAnalyzerProvider extends AbstractIndexAnalyzerProvider<GreekAn
|
|||
private final GreekAnalyzer analyzer;
|
||||
|
||||
@Inject public GreekAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, GreekAnalyzer.getDefaultStopSet());
|
||||
|
||||
analyzer = new GreekAnalyzer(Lucene.ANALYZER_VERSION, stopWords);
|
||||
analyzer = new GreekAnalyzer(version, stopWords);
|
||||
}
|
||||
|
||||
@Override public GreekAnalyzer get() {
|
||||
|
|
|
@ -34,7 +34,7 @@ public class KeywordAnalyzerProvider extends AbstractIndexAnalyzerProvider<Keywo
|
|||
private final KeywordAnalyzer keywordAnalyzer;
|
||||
|
||||
@Inject public KeywordAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
this.keywordAnalyzer = new KeywordAnalyzer();
|
||||
}
|
||||
|
||||
|
|
|
@ -37,7 +37,7 @@ public class KeywordTokenizerFactory extends AbstractTokenizerFactory {
|
|||
private final int bufferSize;
|
||||
|
||||
@Inject public KeywordTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
bufferSize = settings.getAsInt("buffer_size", 256);
|
||||
}
|
||||
|
||||
|
|
|
@ -33,17 +33,18 @@ import org.elasticsearch.index.settings.IndexSettings;
|
|||
public class LengthTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final int min;
|
||||
|
||||
private final int max;
|
||||
private final boolean enablePositionIncrements;
|
||||
|
||||
@Inject public LengthTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
min = settings.getAsInt("min", 0);
|
||||
max = settings.getAsInt("max", Integer.MAX_VALUE);
|
||||
enablePositionIncrements = settings.getAsBoolean("enabled_position_increments", false);
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new LengthFilter(tokenStream, min, max);
|
||||
return new LengthFilter(enablePositionIncrements, tokenStream, min, max);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -35,10 +35,10 @@ import java.io.Reader;
|
|||
public class LetterTokenizerFactory extends AbstractTokenizerFactory {
|
||||
|
||||
@Inject public LetterTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
}
|
||||
|
||||
@Override public Tokenizer create(Reader reader) {
|
||||
return new LetterTokenizer(reader);
|
||||
return new LetterTokenizer(version, reader);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,11 +33,11 @@ import org.elasticsearch.index.settings.IndexSettings;
|
|||
public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
@Inject public LowerCaseTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new LowerCaseFilter(tokenStream);
|
||||
return new LowerCaseFilter(version, tokenStream);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -35,10 +35,10 @@ import java.io.Reader;
|
|||
public class LowerCaseTokenizerFactory extends AbstractTokenizerFactory {
|
||||
|
||||
@Inject public LowerCaseTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
}
|
||||
|
||||
@Override public Tokenizer create(Reader reader) {
|
||||
return new LowerCaseTokenizer(reader);
|
||||
return new LowerCaseTokenizer(version, reader);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
|
||||
|
||||
@Inject public NGramTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
|
||||
this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ public class NGramTokenizerFactory extends AbstractTokenizerFactory {
|
|||
private final int maxGram;
|
||||
|
||||
@Inject public NGramTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
|
||||
this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
|
||||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
public class PathHierarchyTokenizerFactory extends AbstractTokenizerFactory {
|
||||
|
||||
private final int bufferSize;
|
||||
|
||||
private final char delimiter;
|
||||
private final char replacement;
|
||||
|
||||
@Inject public PathHierarchyTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
bufferSize = settings.getAsInt("buffer_size", 1024);
|
||||
String delimiter = settings.get("delimiter");
|
||||
if (delimiter == null) {
|
||||
this.delimiter = PathHierarchyTokenizer.DEFAULT_DELIMITER;
|
||||
} else if (delimiter.length() > 1) {
|
||||
throw new ElasticSearchIllegalArgumentException("delimiter can only be a one char value");
|
||||
} else {
|
||||
this.delimiter = delimiter.charAt(0);
|
||||
}
|
||||
|
||||
String replacement = settings.get("replacement");
|
||||
if (replacement == null) {
|
||||
this.replacement = PathHierarchyTokenizer.DEFAULT_DELIMITER;
|
||||
} else if (replacement.length() > 1) {
|
||||
throw new ElasticSearchIllegalArgumentException("replacement can only be a one char value");
|
||||
} else {
|
||||
this.replacement = replacement.charAt(0);
|
||||
}
|
||||
}
|
||||
|
||||
@Override public Tokenizer create(Reader reader) {
|
||||
return new PathHierarchyTokenizer(reader, bufferSize, delimiter, replacement);
|
||||
}
|
||||
}
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer;
|
|||
import org.elasticsearch.ElasticSearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
|
@ -41,7 +40,7 @@ public class PatternAnalyzerProvider extends AbstractIndexAnalyzerProvider<Patte
|
|||
private final PatternAnalyzer analyzer;
|
||||
|
||||
@Inject public PatternAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
|
||||
boolean lowercase = settings.getAsBoolean("lowercase", true);
|
||||
|
||||
|
@ -53,7 +52,7 @@ public class PatternAnalyzerProvider extends AbstractIndexAnalyzerProvider<Patte
|
|||
}
|
||||
Pattern pattern = Regex.compile(sPattern, settings.get("flags"));
|
||||
|
||||
analyzer = new PatternAnalyzer(Lucene.ANALYZER_VERSION, pattern, lowercase, stopWords);
|
||||
analyzer = new PatternAnalyzer(version, pattern, lowercase, stopWords);
|
||||
}
|
||||
|
||||
@Override public PatternAnalyzer get() {
|
||||
|
|
|
@ -22,7 +22,6 @@ package org.elasticsearch.index.analysis;
|
|||
import org.apache.lucene.analysis.fa.PersianAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
@ -37,10 +36,10 @@ public class PersianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Persi
|
|||
private final PersianAnalyzer analyzer;
|
||||
|
||||
@Inject public PersianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, PersianAnalyzer.getDefaultStopSet());
|
||||
|
||||
analyzer = new PersianAnalyzer(Lucene.ANALYZER_VERSION, stopWords);
|
||||
analyzer = new PersianAnalyzer(version, stopWords);
|
||||
}
|
||||
|
||||
@Override public PersianAnalyzer get() {
|
||||
|
|
|
@ -33,7 +33,7 @@ import org.elasticsearch.index.settings.IndexSettings;
|
|||
public class PorterStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
@Inject public PorterStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
* @author kimchy (Shay Banon)
|
||||
*/
|
||||
public class ReverseTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
@Inject public ReverseTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new ReverseStringFilter(version, tokenStream);
|
||||
}
|
||||
}
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.analysis.ru.RussianAnalyzer;
|
|||
import org.elasticsearch.common.collect.ImmutableSet;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
@ -38,15 +37,15 @@ public class RussianAnalyzerProvider extends AbstractIndexAnalyzerProvider<Russi
|
|||
private final RussianAnalyzer analyzer;
|
||||
|
||||
@Inject public RussianAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
if (Analysis.isNoStopwords(settings)) {
|
||||
analyzer = new RussianAnalyzer(Lucene.ANALYZER_VERSION, ImmutableSet.of());
|
||||
analyzer = new RussianAnalyzer(version, ImmutableSet.of());
|
||||
} else {
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, ImmutableSet.of());
|
||||
if (!stopWords.isEmpty()) {
|
||||
analyzer = new RussianAnalyzer(Lucene.ANALYZER_VERSION, stopWords);
|
||||
analyzer = new RussianAnalyzer(version, stopWords);
|
||||
} else {
|
||||
analyzer = new RussianAnalyzer(Lucene.ANALYZER_VERSION);
|
||||
analyzer = new RussianAnalyzer(version);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ import java.io.Reader;
|
|||
public class RussianLetterTokenizerFactory extends AbstractTokenizerFactory {
|
||||
|
||||
@Inject public RussianLetterTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
}
|
||||
|
||||
@Override public Tokenizer create(Reader reader) {
|
||||
|
|
|
@ -33,7 +33,7 @@ import org.elasticsearch.index.settings.IndexSettings;
|
|||
public class RussianStemTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
@Inject public RussianStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
|
|
|
@ -37,7 +37,7 @@ public class ShingleTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
private final boolean outputUnigrams;
|
||||
|
||||
@Inject public ShingleTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
maxShingleSize = settings.getAsInt("max_shingle_size", ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
|
||||
outputUnigrams = settings.getAsBoolean("output_unigrams", true);
|
||||
}
|
||||
|
|
|
@ -34,8 +34,8 @@ public class SimpleAnalyzerProvider extends AbstractIndexAnalyzerProvider<Simple
|
|||
private final SimpleAnalyzer simpleAnalyzer;
|
||||
|
||||
@Inject public SimpleAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
this.simpleAnalyzer = new SimpleAnalyzer();
|
||||
super(index, indexSettings, name, settings);
|
||||
this.simpleAnalyzer = new SimpleAnalyzer(version);
|
||||
}
|
||||
|
||||
@Override public SimpleAnalyzer get() {
|
||||
|
|
|
@ -29,7 +29,6 @@ import org.elasticsearch.common.collect.ImmutableSet;
|
|||
import org.elasticsearch.common.collect.MapBuilder;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
@ -63,13 +62,13 @@ public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider<Snow
|
|||
private final SnowballAnalyzer analyzer;
|
||||
|
||||
@Inject public SnowballAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
|
||||
String language = settings.get("language", settings.get("name", "English"));
|
||||
Set<?> defaultStopwords = defaultLanguageStopwords.containsKey(language) ? defaultLanguageStopwords.get(language) : ImmutableSet.<Set<?>>of();
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, defaultStopwords);
|
||||
|
||||
analyzer = new SnowballAnalyzer(Lucene.VERSION, language, stopWords);
|
||||
analyzer = new SnowballAnalyzer(version, language, stopWords);
|
||||
}
|
||||
|
||||
@Override public SnowballAnalyzer get() {
|
||||
|
|
|
@ -38,7 +38,7 @@ public class SnowballTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
private String language;
|
||||
|
||||
@Inject public SnowballTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
this.language = settings.get("language", settings.get("name", "English"));
|
||||
}
|
||||
|
||||
|
|
|
@ -23,7 +23,6 @@ import org.apache.lucene.analysis.StopAnalyzer;
|
|||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
@ -38,10 +37,10 @@ public class StandardAnalyzerProvider extends AbstractIndexAnalyzerProvider<Stan
|
|||
private final StandardAnalyzer standardAnalyzer;
|
||||
|
||||
@Inject public StandardAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
standardAnalyzer = new StandardAnalyzer(Lucene.ANALYZER_VERSION, stopWords);
|
||||
standardAnalyzer = new StandardAnalyzer(version, stopWords);
|
||||
standardAnalyzer.setMaxTokenLength(maxTokenLength);
|
||||
}
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ public class StandardHtmlStripAnalyzerProvider extends AbstractIndexAnalyzerProv
|
|||
private final CustomAnalyzer analyzer;
|
||||
|
||||
@Inject public StandardHtmlStripAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new CustomAnalyzer(new StandardTokenizerFactory(index, indexSettings, name, settings),
|
||||
new CharFilterFactory[]{new HtmlStripCharFilterFactory(index, indexSettings, name, settings)},
|
||||
new TokenFilterFactory[]{
|
||||
|
|
|
@ -34,10 +34,10 @@ import org.elasticsearch.index.settings.IndexSettings;
|
|||
public class StandardTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
@Inject public StandardTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new StandardFilter(tokenStream);
|
||||
return new StandardFilter(version, tokenStream);
|
||||
}
|
||||
}
|
|
@ -24,7 +24,6 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
@ -32,19 +31,18 @@ import org.elasticsearch.index.settings.IndexSettings;
|
|||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* @author kimchy (Shay Banon)
|
||||
*/
|
||||
public class StandardTokenizerFactory extends AbstractTokenizerFactory {
|
||||
|
||||
private final int maxTokenLength;
|
||||
|
||||
@Inject public StandardTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
}
|
||||
|
||||
@Override public Tokenizer create(Reader reader) {
|
||||
StandardTokenizer tokenizer = new StandardTokenizer(Lucene.ANALYZER_VERSION, reader);
|
||||
StandardTokenizer tokenizer = new StandardTokenizer(version, reader);
|
||||
tokenizer.setMaxTokenLength(maxTokenLength);
|
||||
return tokenizer;
|
||||
}
|
||||
|
|
|
@ -22,7 +22,6 @@ package org.elasticsearch.index.analysis;
|
|||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
@ -37,9 +36,9 @@ public class StopAnalyzerProvider extends AbstractIndexAnalyzerProvider<StopAnal
|
|||
private final StopAnalyzer stopAnalyzer;
|
||||
|
||||
@Inject public StopAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
Set<?> stopWords = Analysis.parseStopWords(settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
this.stopAnalyzer = new StopAnalyzer(Lucene.ANALYZER_VERSION, stopWords);
|
||||
this.stopAnalyzer = new StopAnalyzer(version, stopWords);
|
||||
}
|
||||
|
||||
@Override public StopAnalyzer get() {
|
||||
|
|
|
@ -37,29 +37,22 @@ public class StopTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
|
||||
private final Set<?> stopWords;
|
||||
|
||||
private final boolean enablePositionIncrements;
|
||||
|
||||
private final boolean ignoreCase;
|
||||
|
||||
@Inject public StopTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
this.stopWords = Analysis.parseStopWords(settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", true);
|
||||
this.ignoreCase = settings.getAsBoolean("ignore_case", false);
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new StopFilter(enablePositionIncrements, tokenStream, stopWords, ignoreCase);
|
||||
return new StopFilter(version, tokenStream, stopWords, ignoreCase);
|
||||
}
|
||||
|
||||
public Set<?> stopWords() {
|
||||
return stopWords;
|
||||
}
|
||||
|
||||
public boolean enablePositionIncrements() {
|
||||
return enablePositionIncrements;
|
||||
}
|
||||
|
||||
public boolean ignoreCase() {
|
||||
return ignoreCase;
|
||||
}
|
||||
|
|
|
@ -22,7 +22,6 @@ package org.elasticsearch.index.analysis;
|
|||
import org.apache.lucene.analysis.th.ThaiAnalyzer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
@ -35,8 +34,8 @@ public class ThaiAnalyzerProvider extends AbstractIndexAnalyzerProvider<ThaiAnal
|
|||
private final ThaiAnalyzer analyzer;
|
||||
|
||||
@Inject public ThaiAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
analyzer = new ThaiAnalyzer(Lucene.ANALYZER_VERSION);
|
||||
super(index, indexSettings, name, settings);
|
||||
analyzer = new ThaiAnalyzer(version);
|
||||
}
|
||||
|
||||
@Override public ThaiAnalyzer get() {
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class UAX29URLEmailTokenizerFactory extends AbstractTokenizerFactory {
|
||||
|
||||
private final int maxTokenLength;
|
||||
|
||||
@Inject public UAX29URLEmailTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name, settings);
|
||||
maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
|
||||
}
|
||||
|
||||
@Override public Tokenizer create(Reader reader) {
|
||||
UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(reader);
|
||||
tokenizer.setMaxTokenLength(maxTokenLength);
|
||||
return tokenizer;
|
||||
}
|
||||
}
|
|
@ -34,8 +34,8 @@ public class WhitespaceAnalyzerProvider extends AbstractIndexAnalyzerProvider<Wh
|
|||
private final WhitespaceAnalyzer analyzer;
|
||||
|
||||
@Inject public WhitespaceAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
this.analyzer = new WhitespaceAnalyzer();
|
||||
super(index, indexSettings, name, settings);
|
||||
this.analyzer = new WhitespaceAnalyzer(version);
|
||||
}
|
||||
|
||||
@Override public WhitespaceAnalyzer get() {
|
||||
|
|
|
@ -35,10 +35,10 @@ import java.io.Reader;
|
|||
public class WhitespaceTokenizerFactory extends AbstractTokenizerFactory {
|
||||
|
||||
@Inject public WhitespaceTokenizerFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
}
|
||||
|
||||
@Override public Tokenizer create(Reader reader) {
|
||||
return new WhitespaceTokenizer(reader);
|
||||
return new WhitespaceTokenizer(version, reader);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@ public abstract class AbstractCompoundWordTokenFilterFactory extends AbstractTok
|
|||
protected final Set<String> wordList;
|
||||
|
||||
@Inject public AbstractCompoundWordTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
|
||||
minWordSize = settings.getAsInt("min_word_size", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
|
||||
minSubwordSize = settings.getAsInt("min_subword_size", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
|
||||
|
|
|
@ -44,7 +44,7 @@ public class DictionaryCompoundWordTokenFilterFactory extends AbstractCompoundWo
|
|||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new DictionaryCompoundWordTokenFilter(tokenStream, wordList,
|
||||
return new DictionaryCompoundWordTokenFilter(version, tokenStream, wordList,
|
||||
minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
|
||||
}
|
||||
}
|
|
@ -65,7 +65,7 @@ public class HyphenationCompoundWordTokenFilterFactory extends AbstractCompoundW
|
|||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new HyphenationCompoundWordTokenFilter(tokenStream,
|
||||
return new HyphenationCompoundWordTokenFilter(version, tokenStream,
|
||||
hyphenationTree, wordList,
|
||||
minWordSize, minSubwordSize, maxSubwordSize, onlyLongestMatch);
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ public class PhoneticTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
private final boolean inject;
|
||||
|
||||
@Inject public PhoneticTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
this.inject = settings.getAsBoolean("replace", true);
|
||||
String encoder = settings.get("encoder");
|
||||
if (encoder == null) {
|
||||
|
|
|
@ -17,19 +17,13 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: kimchy
|
||||
* Date: 1/21/11
|
||||
* Time: 2:02 AM
|
||||
*/
|
||||
package org.elasticsearch.index.analysis.filter1;
|
||||
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||
|
@ -38,10 +32,10 @@ import org.elasticsearch.index.settings.IndexSettings;
|
|||
public class MyFilterTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
@Inject public MyFilterTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, String name) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, ImmutableSettings.Builder.EMPTY_SETTINGS);
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
return new StopFilter(Lucene.VERSION, tokenStream, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
return new StopFilter(version, tokenStream, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
}
|
||||
}
|
|
@ -53,7 +53,7 @@ public class IcuCollationTokenFilterFactory extends AbstractTokenFilterFactory {
|
|||
private final Collator collator;
|
||||
|
||||
@Inject public IcuCollationTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment environment, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
|
||||
Collator collator;
|
||||
String rules = settings.get("rules");
|
||||
|
|
|
@ -34,7 +34,7 @@ import org.elasticsearch.index.settings.IndexSettings;
|
|||
public class IcuFoldingTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
@Inject public IcuFoldingTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
}
|
||||
|
||||
@Override public TokenStream create(TokenStream tokenStream) {
|
||||
|
|
|
@ -40,7 +40,7 @@ public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory
|
|||
private final String name;
|
||||
|
||||
@Inject public IcuNormalizerTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) {
|
||||
super(index, indexSettings, name);
|
||||
super(index, indexSettings, name, settings);
|
||||
this.name = settings.get("name", "nfkc_cf");
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue