Move number of language analyzers to analysis-common module (#31143)

The following analyzers were moved from server module to analysis-common module:
`snowball`, `arabic`, `armenian`, `basque`, `bengali`, `brazilian`, `bulgarian`,
`catalan`, `chinese`, `cjk`, `czech`, `danish`, `dutch`, `english`, `finnish`,
`french`, `galician` and `german`.

Relates to #23658
This commit is contained in:
Martijn van Groningen 2018-06-08 08:58:46 +02:00 committed by GitHub
parent 435a825a53
commit 07a57cc131
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
38 changed files with 951 additions and 347 deletions

View File

@ -17,19 +17,21 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider<ArabicAnalyzer> {
private final ArabicAnalyzer arabicAnalyzer;
public ArabicAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
ArabicAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
arabicAnalyzer = new ArabicAnalyzer(
Analysis.parseStopWords(env, settings, ArabicAnalyzer.getDefaultStopSet()),

View File

@ -17,19 +17,21 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
public class ArmenianAnalyzerProvider extends AbstractIndexAnalyzerProvider<ArmenianAnalyzer> {
private final ArmenianAnalyzer analyzer;
public ArmenianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
ArmenianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new ArmenianAnalyzer(
Analysis.parseStopWords(env, settings, ArmenianAnalyzer.getDefaultStopSet()),

View File

@ -17,19 +17,21 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
public class BasqueAnalyzerProvider extends AbstractIndexAnalyzerProvider<BasqueAnalyzer> {
private final BasqueAnalyzer analyzer;
public BasqueAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
BasqueAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new BasqueAnalyzer(
Analysis.parseStopWords(env, settings, BasqueAnalyzer.getDefaultStopSet()),

View File

@ -17,19 +17,21 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.bn.BengaliAnalyzer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
public class BengaliAnalyzerProvider extends AbstractIndexAnalyzerProvider<BengaliAnalyzer> {
private final BengaliAnalyzer analyzer;
public BengaliAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
BengaliAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new BengaliAnalyzer(
Analysis.parseStopWords(env, settings, BengaliAnalyzer.getDefaultStopSet()),

View File

@ -17,19 +17,21 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
public class BrazilianAnalyzerProvider extends AbstractIndexAnalyzerProvider<BrazilianAnalyzer> {
private final BrazilianAnalyzer analyzer;
public BrazilianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
BrazilianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new BrazilianAnalyzer(
Analysis.parseStopWords(env, settings, BrazilianAnalyzer.getDefaultStopSet()),

View File

@ -17,19 +17,21 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
public class BulgarianAnalyzerProvider extends AbstractIndexAnalyzerProvider<BulgarianAnalyzer> {
private final BulgarianAnalyzer analyzer;
public BulgarianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
BulgarianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new BulgarianAnalyzer(
Analysis.parseStopWords(env, settings, BulgarianAnalyzer.getDefaultStopSet()),

View File

@ -17,19 +17,21 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
public class CatalanAnalyzerProvider extends AbstractIndexAnalyzerProvider<CatalanAnalyzer> {
private final CatalanAnalyzer analyzer;
public CatalanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
CatalanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new CatalanAnalyzer(
Analysis.parseStopWords(env, settings, CatalanAnalyzer.getDefaultStopSet()),

View File

@ -17,12 +17,13 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
/**
* Only for old indexes
@ -31,16 +32,16 @@ public class ChineseAnalyzerProvider extends AbstractIndexAnalyzerProvider<Stand
private final StandardAnalyzer analyzer;
public ChineseAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
ChineseAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
// old index: best effort
analyzer = new StandardAnalyzer();
analyzer.setVersion(version);
}
@Override
public StandardAnalyzer get() {
return this.analyzer;
}
}
}

View File

@ -17,19 +17,21 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
public class CjkAnalyzerProvider extends AbstractIndexAnalyzerProvider<CJKAnalyzer> {
private final CJKAnalyzer analyzer;
public CjkAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
CjkAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
CharArraySet stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet());

View File

@ -24,11 +24,17 @@ import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
import org.apache.lucene.analysis.ar.ArabicStemFilter;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.apache.lucene.analysis.bn.BengaliAnalyzer;
import org.apache.lucene.analysis.bn.BengaliNormalizationFilter;
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
import org.apache.lucene.analysis.br.BrazilianStemFilter;
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.cjk.CJKBigramFilter;
import org.apache.lucene.analysis.cjk.CJKWidthFilter;
import org.apache.lucene.analysis.ckb.SoraniNormalizationFilter;
@ -40,14 +46,22 @@ import org.apache.lucene.analysis.core.LowerCaseTokenizer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.UpperCaseFilter;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.apache.lucene.analysis.cz.CzechStemFilter;
import org.apache.lucene.analysis.da.DanishAnalyzer;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.de.GermanNormalizationFilter;
import org.apache.lucene.analysis.de.GermanStemFilter;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.en.KStemFilter;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute;
@ -64,6 +78,7 @@ import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
import org.apache.lucene.analysis.ngram.NGramTokenizer;
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
import org.apache.lucene.analysis.pattern.PatternTokenizer;
import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter;
@ -73,6 +88,7 @@ import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.ClassicFilter;
import org.apache.lucene.analysis.standard.ClassicTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
import org.apache.lucene.analysis.th.ThaiTokenizer;
import org.apache.lucene.analysis.tr.ApostropheFilter;
@ -113,6 +129,24 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
analyzers.put("fingerprint", FingerprintAnalyzerProvider::new);
analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new);
analyzers.put("pattern", PatternAnalyzerProvider::new);
analyzers.put("snowball", SnowballAnalyzerProvider::new);
analyzers.put("arabic", ArabicAnalyzerProvider::new);
analyzers.put("armenian", ArmenianAnalyzerProvider::new);
analyzers.put("basque", BasqueAnalyzerProvider::new);
analyzers.put("bengali", BengaliAnalyzerProvider::new);
analyzers.put("brazilian", BrazilianAnalyzerProvider::new);
analyzers.put("bulgarian", BulgarianAnalyzerProvider::new);
analyzers.put("catalan", CatalanAnalyzerProvider::new);
analyzers.put("chinese", ChineseAnalyzerProvider::new);
analyzers.put("cjk", CjkAnalyzerProvider::new);
analyzers.put("czech", CzechAnalyzerProvider::new);
analyzers.put("danish", DanishAnalyzerProvider::new);
analyzers.put("dutch", DutchAnalyzerProvider::new);
analyzers.put("english", EnglishAnalyzerProvider::new);
analyzers.put("finnish", FinnishAnalyzerProvider::new);
analyzers.put("french", FrenchAnalyzerProvider::new);
analyzers.put("galician", GalicianAnalyzerProvider::new);
analyzers.put("german", GermanAnalyzerProvider::new);
return analyzers;
}
@ -213,10 +247,108 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
@Override
public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.LUCENE,
version -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)));
analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH, version ->
new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, CharArraySet.EMPTY_SET)));
analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.LUCENE, version -> {
Analyzer a = new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET);
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH, version -> {
Analyzer a = new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true,
CharArraySet.EMPTY_SET);
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("snowball", CachingStrategy.LUCENE, version -> {
Analyzer a = new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET);
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("arabic", CachingStrategy.LUCENE, version -> {
Analyzer a = new ArabicAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("armenian", CachingStrategy.LUCENE, version -> {
Analyzer a = new ArmenianAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("basque", CachingStrategy.LUCENE, version -> {
Analyzer a = new BasqueAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("bengali", CachingStrategy.LUCENE, version -> {
Analyzer a = new BengaliAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("brazilian", CachingStrategy.LUCENE, version -> {
Analyzer a = new BrazilianAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("bulgarian", CachingStrategy.LUCENE, version -> {
Analyzer a = new BulgarianAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("catalan", CachingStrategy.LUCENE, version -> {
Analyzer a = new CatalanAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("chinese", CachingStrategy.LUCENE, version -> {
// only for old indices, best effort
Analyzer a = new StandardAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("cjk", CachingStrategy.LUCENE, version -> {
Analyzer a = new CJKAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("czech", CachingStrategy.LUCENE, version -> {
Analyzer a = new CzechAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("danish", CachingStrategy.LUCENE, version -> {
Analyzer a = new DanishAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("dutch", CachingStrategy.LUCENE, version -> {
Analyzer a = new DutchAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("english", CachingStrategy.LUCENE, version -> {
Analyzer a = new EnglishAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("finnish", CachingStrategy.LUCENE, version -> {
Analyzer a = new FinnishAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("french", CachingStrategy.LUCENE, version -> {
Analyzer a = new FrenchAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("galician", CachingStrategy.LUCENE, version -> {
Analyzer a = new GalicianAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
analyzers.add(new PreBuiltAnalyzerProviderFactory("german", CachingStrategy.LUCENE, version -> {
Analyzer a = new GermanAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}));
return analyzers;
}

View File

@ -17,19 +17,21 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider<CzechAnalyzer> {
private final CzechAnalyzer analyzer;
public CzechAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
CzechAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new CzechAnalyzer(
Analysis.parseStopWords(env, settings, CzechAnalyzer.getDefaultStopSet()),

View File

@ -17,19 +17,21 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.da.DanishAnalyzer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
public class DanishAnalyzerProvider extends AbstractIndexAnalyzerProvider<DanishAnalyzer> {
private final DanishAnalyzer analyzer;
public DanishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
DanishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new DanishAnalyzer(
Analysis.parseStopWords(env, settings, DanishAnalyzer.getDefaultStopSet()),

View File

@ -17,19 +17,21 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
public class DutchAnalyzerProvider extends AbstractIndexAnalyzerProvider<DutchAnalyzer> {
private final DutchAnalyzer analyzer;
public DutchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
DutchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new DutchAnalyzer(
Analysis.parseStopWords(env, settings, DutchAnalyzer.getDefaultStopSet()),

View File

@ -17,19 +17,21 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
public class EnglishAnalyzerProvider extends AbstractIndexAnalyzerProvider<EnglishAnalyzer> {
private final EnglishAnalyzer analyzer;
public EnglishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
EnglishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new EnglishAnalyzer(
Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet()),

View File

@ -17,19 +17,21 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
public class FinnishAnalyzerProvider extends AbstractIndexAnalyzerProvider<FinnishAnalyzer> {
private final FinnishAnalyzer analyzer;
public FinnishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
FinnishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new FinnishAnalyzer(
Analysis.parseStopWords(env, settings, FinnishAnalyzer.getDefaultStopSet()),

View File

@ -17,19 +17,21 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
public class FrenchAnalyzerProvider extends AbstractIndexAnalyzerProvider<FrenchAnalyzer> {
private final FrenchAnalyzer analyzer;
public FrenchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
FrenchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new FrenchAnalyzer(
Analysis.parseStopWords(env, settings, FrenchAnalyzer.getDefaultStopSet()),

View File

@ -17,19 +17,21 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
public class GalicianAnalyzerProvider extends AbstractIndexAnalyzerProvider<GalicianAnalyzer> {
private final GalicianAnalyzer analyzer;
public GalicianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
GalicianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new GalicianAnalyzer(
Analysis.parseStopWords(env, settings, GalicianAnalyzer.getDefaultStopSet()),

View File

@ -17,19 +17,21 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
public class GermanAnalyzerProvider extends AbstractIndexAnalyzerProvider<GermanAnalyzer> {
private final GermanAnalyzer analyzer;
public GermanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
GermanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
analyzer = new GermanAnalyzer(
Analysis.parseStopWords(env, settings, GermanAnalyzer.getDefaultStopSet()),

View File

@ -1,4 +1,4 @@
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
/*
* Licensed to Elasticsearch under one or more contributor
@ -48,12 +48,12 @@ public final class SnowballAnalyzer extends Analyzer {
private CharArraySet stopSet;
/** Builds the named analyzer with no stop words. */
public SnowballAnalyzer(String name) {
SnowballAnalyzer(String name) {
this.name = name;
}
/** Builds the named analyzer with the given stop words. */
public SnowballAnalyzer(String name, CharArraySet stopWords) {
SnowballAnalyzer(String name, CharArraySet stopWords) {
this(name);
stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopWords));
}

View File

@ -16,7 +16,7 @@
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.core.StopAnalyzer;
@ -26,6 +26,8 @@ import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.Analysis;
import java.util.HashMap;
import java.util.Map;
@ -60,7 +62,7 @@ public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider<Snow
private final SnowballAnalyzer analyzer;
public SnowballAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
SnowballAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
String language = settings.get("language", settings.get("name", "English"));

View File

@ -1,4 +1,4 @@
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
/*
* Licensed to Elasticsearch under one or more contributor
@ -30,14 +30,14 @@ public class SnowballAnalyzerTests extends ESTokenStreamTestCase {
assertAnalyzesTo(a, "he abhorred accents",
new String[]{"he", "abhor", "accent"});
}
public void testStopwords() throws Exception {
Analyzer a = new SnowballAnalyzer("English",
StandardAnalyzer.STOP_WORDS_SET);
assertAnalyzesTo(a, "the quick brown fox jumped",
new String[]{"quick", "brown", "fox", "jump"});
}
/**
* Test turkish lowercasing
*/
@ -48,7 +48,7 @@ public class SnowballAnalyzerTests extends ESTokenStreamTestCase {
assertAnalyzesTo(a, "AĞACI", new String[] { "ağaç" });
}
public void testReusableTokenStream() throws Exception {
Analyzer a = new SnowballAnalyzer("English");
assertAnalyzesTo(a, "he abhorred accents",
@ -56,4 +56,4 @@ public class SnowballAnalyzerTests extends ESTokenStreamTestCase {
assertAnalyzesTo(a, "she abhorred him",
new String[]{"she", "abhor", "him"});
}
}
}

View File

@ -38,6 +38,25 @@
- length: { tokens: 1 }
- match: { tokens.0.token: বার }
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
my_analyzer:
type: bengali
- do:
indices.analyze:
index: test
body:
text: বাড়ী
analyzer: my_analyzer
- length: { tokens: 1 }
- match: { tokens.0.token: বার }
---
"fingerprint":
- do:
@ -69,3 +88,507 @@
- length: { tokens: 2 }
- match: { tokens.0.token: foo }
- match: { tokens.1.token: bar }
---
"snowball":
- do:
indices.analyze:
body:
text: the brown foxes
analyzer: snowball
- length: { tokens: 2 }
- match: { tokens.0.token: brown }
- match: { tokens.1.token: fox }
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
my_snowball:
type: snowball
language: "Dutch"
- do:
indices.analyze:
index: test
body:
text: de bruine vossen
analyzer: my_snowball
- length: { tokens: 2 }
- match: { tokens.0.token: bruin }
- match: { tokens.1.token: voss }
---
"arabic":
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
my_analyzer:
type: arabic
- do:
indices.analyze:
body:
text: كبيرة
analyzer: arabic
- length: { tokens: 1 }
- match: { tokens.0.token: كبير }
- do:
indices.analyze:
index: test
body:
text: كبيرة
analyzer: my_analyzer
- length: { tokens: 1 }
- match: { tokens.0.token: كبير }
---
"armenian":
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
my_analyzer:
type: armenian
- do:
indices.analyze:
body:
text: արծիվ
analyzer: armenian
- length: { tokens: 1 }
- match: { tokens.0.token: արծ }
- do:
indices.analyze:
index: test
body:
text: արծիվ
analyzer: my_analyzer
- length: { tokens: 1 }
- match: { tokens.0.token: արծ }
---
"basque":
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
my_analyzer:
type: basque
- do:
indices.analyze:
body:
text: zaldiak
analyzer: basque
- length: { tokens: 1 }
- match: { tokens.0.token: zaldi }
- do:
indices.analyze:
index: test
body:
text: zaldiak
analyzer: my_analyzer
- length: { tokens: 1 }
- match: { tokens.0.token: zaldi }
---
"brazilian":
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
my_analyzer:
type: brazilian
- do:
indices.analyze:
body:
text: boataria
analyzer: brazilian
- length: { tokens: 1 }
- match: { tokens.0.token: boat }
- do:
indices.analyze:
index: test
body:
text: boataria
analyzer: my_analyzer
- length: { tokens: 1 }
- match: { tokens.0.token: boat }
---
"bulgarian":
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
my_analyzer:
type: bulgarian
- do:
indices.analyze:
body:
text: градове
analyzer: bulgarian
- length: { tokens: 1 }
- match: { tokens.0.token: град }
- do:
indices.analyze:
index: test
body:
text: градове
analyzer: my_analyzer
- length: { tokens: 1 }
- match: { tokens.0.token: град }
---
"catalan":
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
my_analyzer:
type: catalan
- do:
indices.analyze:
body:
text: llengües
analyzer: catalan
- length: { tokens: 1 }
- match: { tokens.0.token: llengu }
- do:
indices.analyze:
index: test
body:
text: llengües
analyzer: my_analyzer
- length: { tokens: 1 }
- match: { tokens.0.token: llengu }
---
"chinese":
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
my_analyzer:
type: chinese
- do:
indices.analyze:
body:
text: only for old indices
analyzer: chinese
- length: { tokens: 3 }
- match: { tokens.0.token: only }
- match: { tokens.1.token: old }
- match: { tokens.2.token: indices }
- do:
indices.analyze:
index: test
body:
text: only for old indices
analyzer: my_analyzer
- length: { tokens: 3 }
- match: { tokens.0.token: only }
- match: { tokens.1.token: old }
- match: { tokens.2.token: indices }
---
"cjk":
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
my_analyzer:
type: cjk
- do:
indices.analyze:
body:
text: 多くの
analyzer: cjk
- length: { tokens: 2 }
- match: { tokens.0.token: 多く }
- match: { tokens.1.token: くの }
- do:
indices.analyze:
index: test
body:
text: 多くの
analyzer: my_analyzer
- length: { tokens: 2 }
- match: { tokens.0.token: 多く }
- match: { tokens.1.token: くの }
---
"czech":
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
my_analyzer:
type: czech
- do:
indices.analyze:
body:
text: Pokud mluvime o volnem
analyzer: czech
- length: { tokens: 2 }
- match: { tokens.0.token: mluvim }
- match: { tokens.1.token: voln }
- do:
indices.analyze:
index: test
body:
text: Pokud mluvime o volnem
analyzer: my_analyzer
- length: { tokens: 2 }
- match: { tokens.0.token: mluvim }
- match: { tokens.1.token: voln }
---
"danish":
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
my_analyzer:
type: danish
- do:
indices.analyze:
body:
text: undersøgelse
analyzer: danish
- length: { tokens: 1 }
- match: { tokens.0.token: undersøg }
- do:
indices.analyze:
index: test
body:
text: undersøgelse
analyzer: my_analyzer
- length: { tokens: 1 }
- match: { tokens.0.token: undersøg }
---
"dutch":
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
my_analyzer:
type: dutch
- do:
indices.analyze:
body:
text: lidstaten
analyzer: dutch
- length: { tokens: 1 }
- match: { tokens.0.token: lidstat }
- do:
indices.analyze:
index: test
body:
text: lidstaten
analyzer: my_analyzer
- length: { tokens: 1 }
- match: { tokens.0.token: lidstat }
---
"english":
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
my_analyzer:
type: english
- do:
indices.analyze:
body:
text: books
analyzer: english
- length: { tokens: 1 }
- match: { tokens.0.token: book }
- do:
indices.analyze:
index: test
body:
text: books
analyzer: my_analyzer
- length: { tokens: 1 }
- match: { tokens.0.token: book }
---
"finnish":
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
my_analyzer:
type: finnish
- do:
indices.analyze:
body:
text: edeltäjiinsä
analyzer: finnish
- length: { tokens: 1 }
- match: { tokens.0.token: edeltäj }
- do:
indices.analyze:
index: test
body:
text: edeltäjiinsä
analyzer: my_analyzer
- length: { tokens: 1 }
- match: { tokens.0.token: edeltäj }
---
"french":
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
my_analyzer:
type: french
- do:
indices.analyze:
body:
text: sécuritaires
analyzer: french
- length: { tokens: 1 }
- match: { tokens.0.token: securitair }
- do:
indices.analyze:
index: test
body:
text: sécuritaires
analyzer: my_analyzer
- length: { tokens: 1 }
- match: { tokens.0.token: securitair }
---
"galician":
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
my_analyzer:
type: galician
- do:
indices.analyze:
body:
text: corresponderá
analyzer: galician
- length: { tokens: 1 }
- match: { tokens.0.token: correspond }
- do:
indices.analyze:
index: test
body:
text: corresponderá
analyzer: my_analyzer
- length: { tokens: 1 }
- match: { tokens.0.token: correspond }
---
"german":
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
my_analyzer:
type: german
- do:
indices.analyze:
body:
text: Tischen
analyzer: german
- length: { tokens: 1 }
- match: { tokens.0.token: tisch }
- do:
indices.analyze:
index: test
body:
text: Tischen
analyzer: my_analyzer
- length: { tokens: 1 }
- match: { tokens.0.token: tisch }

View File

@ -0,0 +1,58 @@
---
"Test query string with snowball":
- do:
indices.create:
index: test
body:
mappings:
test:
properties:
field:
type: text
number:
type: integer
- do:
index:
index: test
type: test
id: 1
body: { field: foo bar}
- do:
indices.refresh:
index: [test]
- do:
indices.validate_query:
index: test
q: field:bars
analyzer: snowball
- is_true: valid
- do:
search:
index: test
q: field:bars
analyzer: snowball
- match: {hits.total: 1}
- do:
explain:
index: test
type: test
id: 1
q: field:bars
analyzer: snowball
- is_true: matched
- do:
count:
index: test
q: field:bars
analyzer: snowball
- match: {count : 1}

View File

@ -27,6 +27,7 @@ import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.aggregations.AggregationBuilders;
@ -109,7 +110,13 @@ public class TokenCountFieldMapperIntegrationIT extends ESIntegTestCase {
}
private void init() throws IOException {
prepareCreate("test").addMapping("test", jsonBuilder().startObject()
Settings.Builder settings = Settings.builder();
settings.put(indexSettings());
settings.put("index.analysis.analyzer.mock_english.tokenizer", "standard");
settings.put("index.analysis.analyzer.mock_english.filter", "stop");
prepareCreate("test")
.setSettings(settings)
.addMapping("test", jsonBuilder().startObject()
.startObject("test")
.startObject("properties")
.startObject("foo")
@ -133,7 +140,7 @@ public class TokenCountFieldMapperIntegrationIT extends ESIntegTestCase {
.endObject()
.startObject("token_count_without_position_increments")
.field("type", "token_count")
.field("analyzer", "english")
.field("analyzer", "mock_english")
.field("enable_position_increments", false)
.field("store", true)
.endObject()
@ -214,13 +221,13 @@ public class TokenCountFieldMapperIntegrationIT extends ESIntegTestCase {
assertThat(hit.field("foo.token_count"), not(nullValue()));
assertThat(hit.field("foo.token_count").getValues().size(), equalTo(standardTermCounts.length));
for (int i = 0; i < standardTermCounts.length; i++) {
assertThat((Integer) hit.field("foo.token_count").getValues().get(i), equalTo(standardTermCounts[i]));
assertThat(hit.field("foo.token_count").getValues().get(i), equalTo(standardTermCounts[i]));
}
assertThat(hit.field("foo.token_count_without_position_increments"), not(nullValue()));
assertThat(hit.field("foo.token_count_without_position_increments").getValues().size(), equalTo(englishTermCounts.length));
for (int i = 0; i < englishTermCounts.length; i++) {
assertThat((Integer) hit.field("foo.token_count_without_position_increments").getValues().get(i),
assertThat(hit.field("foo.token_count_without_position_increments").getValues().get(i),
equalTo(englishTermCounts[i]));
}

View File

@ -8,14 +8,14 @@
place:
properties:
name:
type: text
type: keyword
- do:
index:
index: test
type: place
id: 1
refresh: true
body: { "name": "bob's house" }
body: { "name": "bob! house" }
- do:
indices.put_mapping:
@ -24,11 +24,10 @@
body:
properties:
name:
type: text
type: keyword
fields:
english:
type: text
analyzer: english
- do:
search:

View File

@ -44,14 +44,6 @@
- match: {count : 0}
- do:
count:
index: test
q: field:bars
analyzer: snowball
- match: {count : 1}
- do:
count:
index: test

View File

@ -50,16 +50,6 @@
- is_false: matched
- do:
explain:
index: test
type: test
id: 1
q: field:bars
analyzer: snowball
- is_true: matched
- do:
explain:
index: test

View File

@ -35,14 +35,6 @@
- is_true: valid
- do:
indices.validate_query:
index: test
q: field:bars
analyzer: snowball
- is_true: valid
- do:
indices.validate_query:
index: test

View File

@ -44,14 +44,6 @@
- match: {hits.total: 0}
- do:
search:
index: test
q: field:bars
analyzer: snowball
- match: {hits.total: 1}
- do:
search:
index: test

View File

@ -29,24 +29,7 @@ import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AnalysisRegistry;
import org.elasticsearch.index.analysis.AnalyzerProvider;
import org.elasticsearch.index.analysis.ArabicAnalyzerProvider;
import org.elasticsearch.index.analysis.ArmenianAnalyzerProvider;
import org.elasticsearch.index.analysis.BasqueAnalyzerProvider;
import org.elasticsearch.index.analysis.BengaliAnalyzerProvider;
import org.elasticsearch.index.analysis.BrazilianAnalyzerProvider;
import org.elasticsearch.index.analysis.BulgarianAnalyzerProvider;
import org.elasticsearch.index.analysis.CatalanAnalyzerProvider;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.ChineseAnalyzerProvider;
import org.elasticsearch.index.analysis.CjkAnalyzerProvider;
import org.elasticsearch.index.analysis.CzechAnalyzerProvider;
import org.elasticsearch.index.analysis.DanishAnalyzerProvider;
import org.elasticsearch.index.analysis.DutchAnalyzerProvider;
import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
import org.elasticsearch.index.analysis.GermanAnalyzerProvider;
import org.elasticsearch.index.analysis.GreekAnalyzerProvider;
import org.elasticsearch.index.analysis.HindiAnalyzerProvider;
import org.elasticsearch.index.analysis.HungarianAnalyzerProvider;
@ -68,7 +51,6 @@ import org.elasticsearch.index.analysis.RomanianAnalyzerProvider;
import org.elasticsearch.index.analysis.RussianAnalyzerProvider;
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
import org.elasticsearch.index.analysis.SimpleAnalyzerProvider;
import org.elasticsearch.index.analysis.SnowballAnalyzerProvider;
import org.elasticsearch.index.analysis.SoraniAnalyzerProvider;
import org.elasticsearch.index.analysis.SpanishAnalyzerProvider;
import org.elasticsearch.index.analysis.StandardAnalyzerProvider;
@ -245,24 +227,6 @@ public final class AnalysisModule {
analyzers.register("stop", StopAnalyzerProvider::new);
analyzers.register("whitespace", WhitespaceAnalyzerProvider::new);
analyzers.register("keyword", KeywordAnalyzerProvider::new);
analyzers.register("snowball", SnowballAnalyzerProvider::new);
analyzers.register("arabic", ArabicAnalyzerProvider::new);
analyzers.register("armenian", ArmenianAnalyzerProvider::new);
analyzers.register("basque", BasqueAnalyzerProvider::new);
analyzers.register("bengali", BengaliAnalyzerProvider::new);
analyzers.register("brazilian", BrazilianAnalyzerProvider::new);
analyzers.register("bulgarian", BulgarianAnalyzerProvider::new);
analyzers.register("catalan", CatalanAnalyzerProvider::new);
analyzers.register("chinese", ChineseAnalyzerProvider::new);
analyzers.register("cjk", CjkAnalyzerProvider::new);
analyzers.register("czech", CzechAnalyzerProvider::new);
analyzers.register("danish", DanishAnalyzerProvider::new);
analyzers.register("dutch", DutchAnalyzerProvider::new);
analyzers.register("english", EnglishAnalyzerProvider::new);
analyzers.register("finnish", FinnishAnalyzerProvider::new);
analyzers.register("french", FrenchAnalyzerProvider::new);
analyzers.register("galician", GalicianAnalyzerProvider::new);
analyzers.register("german", GermanAnalyzerProvider::new);
analyzers.register("greek", GreekAnalyzerProvider::new);
analyzers.register("hindi", HindiAnalyzerProvider::new);
analyzers.register("hungarian", HungarianAnalyzerProvider::new);

View File

@ -20,37 +20,21 @@ package org.elasticsearch.indices.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
import org.apache.lucene.analysis.bn.BengaliAnalyzer;
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
import org.apache.lucene.analysis.ckb.SoraniAnalyzer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.cz.CzechAnalyzer;
import org.apache.lucene.analysis.da.DanishAnalyzer;
import org.apache.lucene.analysis.de.GermanAnalyzer;
import org.apache.lucene.analysis.el.GreekAnalyzer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.es.SpanishAnalyzer;
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
import org.apache.lucene.analysis.fa.PersianAnalyzer;
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.ga.IrishAnalyzer;
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
import org.apache.lucene.analysis.hi.HindiAnalyzer;
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
import org.apache.lucene.analysis.it.ItalianAnalyzer;
import org.apache.lucene.analysis.lt.LithuanianAnalyzer;
import org.apache.lucene.analysis.lv.LatvianAnalyzer;
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
@ -61,7 +45,6 @@ import org.apache.lucene.analysis.sv.SwedishAnalyzer;
import org.apache.lucene.analysis.th.ThaiAnalyzer;
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
import org.elasticsearch.Version;
import org.elasticsearch.index.analysis.SnowballAnalyzer;
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
import java.util.Locale;
@ -129,168 +112,6 @@ public enum PreBuiltAnalyzers {
}
},
SNOWBALL {
@Override
protected Analyzer create(Version version) {
Analyzer analyzer = new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET);
analyzer.setVersion(version.luceneVersion);
return analyzer;
}
},
ARABIC {
@Override
protected Analyzer create(Version version) {
Analyzer a = new ArabicAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},
ARMENIAN {
@Override
protected Analyzer create(Version version) {
Analyzer a = new ArmenianAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},
BASQUE {
@Override
protected Analyzer create(Version version) {
Analyzer a = new BasqueAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},
BENGALI {
@Override
protected Analyzer create(Version version) {
Analyzer a = new BengaliAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},
BRAZILIAN {
@Override
protected Analyzer create(Version version) {
Analyzer a = new BrazilianAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},
BULGARIAN {
@Override
protected Analyzer create(Version version) {
Analyzer a = new BulgarianAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},
CATALAN {
@Override
protected Analyzer create(Version version) {
Analyzer a = new CatalanAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},
CHINESE(CachingStrategy.ONE) {
@Override
protected Analyzer create(Version version) {
Analyzer a = new StandardAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},
CJK {
@Override
protected Analyzer create(Version version) {
Analyzer a = new CJKAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},
CZECH {
@Override
protected Analyzer create(Version version) {
Analyzer a = new CzechAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},
DUTCH {
@Override
protected Analyzer create(Version version) {
Analyzer a = new DutchAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},
DANISH {
@Override
protected Analyzer create(Version version) {
Analyzer a = new DanishAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},
ENGLISH {
@Override
protected Analyzer create(Version version) {
Analyzer a = new EnglishAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},
FINNISH {
@Override
protected Analyzer create(Version version) {
Analyzer a = new FinnishAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},
FRENCH {
@Override
protected Analyzer create(Version version) {
Analyzer a = new FrenchAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},
GALICIAN {
@Override
protected Analyzer create(Version version) {
Analyzer a = new GalicianAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},
GERMAN {
@Override
protected Analyzer create(Version version) {
Analyzer a = new GermanAnalyzer();
a.setVersion(version.luceneVersion);
return a;
}
},
GREEK {
@Override
protected Analyzer create(Version version) {

View File

@ -61,14 +61,17 @@ public class PreBuiltAnalyzerTests extends ESSingleNodeTestCase {
}
public void testThatInstancesAreCachedAndReused() {
assertSame(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.CURRENT),
PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.CURRENT));
// same lucene version should be cached
assertSame(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_5_2_1),
PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_5_2_2));
assertSame(PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.CURRENT),
PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.CURRENT));
// same es version should be cached
assertSame(PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_5_2_1),
PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_5_2_1));
assertNotSame(PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_5_0_0),
PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_5_0_1));
assertNotSame(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_5_0_0),
PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_5_0_1));
// Same Lucene version should be cached:
assertSame(PreBuiltAnalyzers.STOP.getAnalyzer(Version.V_5_2_1),
PreBuiltAnalyzers.STOP.getAnalyzer(Version.V_5_2_2));
}
public void testThatAnalyzersAreUsedInMapping() throws IOException {

View File

@ -55,7 +55,6 @@ import org.elasticsearch.index.mapper.MapperService.MergeReason;
import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.search.MatchQuery;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;
@ -87,6 +86,9 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
.putList("index.analysis.filter.mySynonyms.synonyms", Collections.singletonList("car, auto"))
.put("index.analysis.analyzer.synonym.tokenizer", "standard")
.put("index.analysis.analyzer.synonym.filter", "mySynonyms")
// Stop filter remains in server as it is part of lucene-core
.put("index.analysis.analyzer.my_stop_analyzer.tokenizer", "standard")
.put("index.analysis.analyzer.my_stop_analyzer.filter", "stop")
.build();
indexService = createIndex("test", settings);
parser = indexService.mapperService().documentMapperParser();
@ -621,7 +623,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("analyzer", "english")
.field("analyzer", "standard")
.startObject("index_prefixes").endObject()
.field("index_options", "offsets")
.endObject().endObject().endObject().endObject());
@ -637,7 +639,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("analyzer", "english")
.field("analyzer", "standard")
.startObject("index_prefixes").endObject()
.field("index_options", "freqs")
.endObject().endObject().endObject().endObject());
@ -654,7 +656,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("analyzer", "english")
.field("analyzer", "standard")
.startObject("index_prefixes").endObject()
.field("index_options", "positions")
.endObject().endObject().endObject().endObject());
@ -675,7 +677,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("analyzer", "english")
.field("analyzer", "standard")
.startObject("index_prefixes").endObject()
.field("term_vector", "with_positions_offsets")
.endObject().endObject().endObject().endObject());
@ -696,7 +698,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("analyzer", "english")
.field("analyzer", "standard")
.startObject("index_prefixes").endObject()
.field("term_vector", "with_positions")
.endObject().endObject().endObject().endObject());
@ -725,7 +727,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
.startObject("properties")
.startObject("field")
.field("type", "text")
.field("analyzer", "english")
.field("analyzer", "my_stop_analyzer")
.field("index_phrases", true)
.endObject()
.startObject("synfield")
@ -742,20 +744,20 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
queryShardContext.getMapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);
Query q = new MatchPhraseQueryBuilder("field", "two words").toQuery(queryShardContext);
assertThat(q, is(new PhraseQuery("field._index_phrase", "two word")));
assertThat(q, is(new PhraseQuery("field._index_phrase", "two words")));
Query q2 = new MatchPhraseQueryBuilder("field", "three words here").toQuery(queryShardContext);
assertThat(q2, is(new PhraseQuery("field._index_phrase", "three word", "word here")));
assertThat(q2, is(new PhraseQuery("field._index_phrase", "three words", "words here")));
Query q3 = new MatchPhraseQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext);
assertThat(q3, is(new PhraseQuery(1, "field", "two", "word")));
assertThat(q3, is(new PhraseQuery(1, "field", "two", "words")));
Query q4 = new MatchPhraseQueryBuilder("field", "singleton").toQuery(queryShardContext);
assertThat(q4, is(new TermQuery(new Term("field", "singleton"))));
Query q5 = new MatchPhraseQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext);
assertThat(q5,
is(new PhraseQuery.Builder().add(new Term("field", "sparkl")).add(new Term("field", "stopword"), 2).build()));
is(new PhraseQuery.Builder().add(new Term("field", "sparkle")).add(new Term("field", "stopword"), 2).build()));
Query q6 = new MatchPhraseQueryBuilder("synfield", "motor car").toQuery(queryShardContext);
assertThat(q6, is(new MultiPhraseQuery.Builder()
@ -778,7 +780,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
assertTrue(ts.incrementToken());
assertEquals("some english", termAtt.toString());
assertEquals("Some English", termAtt.toString());
}
{
@ -821,7 +823,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("analyzer", "english")
.field("analyzer", "standard")
.startObject("index_prefixes")
.field("min_chars", 1)
.field("max_chars", 10)
@ -855,7 +857,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("analyzer", "english")
.field("analyzer", "standard")
.startObject("index_prefixes").endObject()
.endObject().endObject()
.endObject().endObject());
@ -880,7 +882,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
String illegalMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("analyzer", "english")
.field("analyzer", "standard")
.startObject("index_prefixes")
.field("min_chars", 1)
.field("max_chars", 10)
@ -903,7 +905,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("analyzer", "english")
.field("analyzer", "standard")
.startObject("index_prefixes")
.field("min_chars", 11)
.field("max_chars", 10)
@ -920,7 +922,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("analyzer", "english")
.field("analyzer", "standard")
.startObject("index_prefixes")
.field("min_chars", 0)
.field("max_chars", 10)
@ -937,7 +939,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("analyzer", "english")
.field("analyzer", "standard")
.startObject("index_prefixes")
.field("min_chars", 1)
.field("max_chars", 25)
@ -954,7 +956,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("analyzer", "english")
.field("analyzer", "standard")
.field("index_prefixes", (String) null)
.endObject().endObject()
.endObject().endObject());

View File

@ -1300,7 +1300,7 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
Query query = new QueryStringQueryBuilder("the quick fox")
.field(STRING_FIELD_NAME)
.analyzer("english")
.analyzer("stop")
.toQuery(createShardContext());
BooleanQuery expected = new BooleanQuery.Builder()
.add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), Occur.SHOULD)
@ -1313,7 +1313,7 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
Query query = new QueryStringQueryBuilder("the* quick fox")
.field(STRING_FIELD_NAME)
.analyzer("english")
.analyzer("stop")
.toQuery(createShardContext());
BooleanQuery expected = new BooleanQuery.Builder()
.add(new PrefixQuery(new Term(STRING_FIELD_NAME, "the")), Occur.SHOULD)

View File

@ -629,7 +629,7 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase<SimpleQ
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
Query query = new SimpleQueryStringBuilder("the quick fox")
.field(STRING_FIELD_NAME)
.analyzer("english")
.analyzer("stop")
.toQuery(createShardContext());
BooleanQuery expected = new BooleanQuery.Builder()
.add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), BooleanClause.Occur.SHOULD)
@ -642,7 +642,7 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase<SimpleQ
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
Query query = new SimpleQueryStringBuilder("the* quick fox")
.field(STRING_FIELD_NAME)
.analyzer("english")
.analyzer("stop")
.toQuery(createShardContext());
BooleanQuery expected = new BooleanQuery.Builder()
.add(new PrefixQuery(new Term(STRING_FIELD_NAME, "the")), BooleanClause.Occur.SHOULD)

View File

@ -22,6 +22,9 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.search.join.ScoreMode;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchRequestBuilder;
@ -36,6 +39,7 @@ import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
import org.elasticsearch.index.analysis.AnalyzerProvider;
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.query.AbstractQueryBuilder;
import org.elasticsearch.index.query.IdsQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
@ -66,9 +70,11 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import static java.util.Collections.singletonList;
import static java.util.Collections.singletonMap;
import static org.elasticsearch.client.Requests.searchRequest;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
@ -113,7 +119,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(InternalSettingsPlugin.class, MockKeywordPlugin.class, MockWhitespacePlugin.class);
return Arrays.asList(InternalSettingsPlugin.class, MockKeywordPlugin.class, MockAnalysisPlugin.class);
}
public void testHighlightingWithStoredKeyword() throws IOException {
@ -765,14 +771,19 @@ public class HighlighterSearchIT extends ESIntegTestCase {
}
private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception {
Settings.Builder settings = Settings.builder();
settings.put(indexSettings());
settings.put("index.analysis.analyzer.mock_english.tokenizer", "standard");
settings.put("index.analysis.analyzer.mock_english.filter", "mock_snowball");
assertAcked(prepareCreate("test")
.setSettings(settings)
.addMapping("type1", XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties")
.startObject("foo")
.field("type", "text")
.field("term_vector", "with_positions_offsets")
.field("store", true)
.field("analyzer", "english")
.field("analyzer", "mock_english")
.startObject("fields")
.startObject("plain")
.field("type", "text")
@ -785,7 +796,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
.field("type", "text")
.field("term_vector", "with_positions_offsets")
.field("store", true)
.field("analyzer", "english")
.field("analyzer", "mock_english")
.startObject("fields")
.startObject("plain")
.field("type", "text")
@ -2819,7 +2830,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
assertAcked(prepareCreate("test").setSettings(builder.build())
.addMapping("type1", "field1",
"type=text,term_vector=with_positions_offsets,search_analyzer=synonym," +
"analyzer=english,index_options=offsets"));
"analyzer=standard,index_options=offsets"));
ensureGreen();
client().prepareIndex("test", "type1", "0").setSource(
@ -2983,7 +2994,39 @@ public class HighlighterSearchIT extends ESIntegTestCase {
}
}
public static class MockWhitespacePlugin extends Plugin implements AnalysisPlugin {
public static class MockAnalysisPlugin extends Plugin implements AnalysisPlugin {
public final class MockSnowBall extends TokenFilter {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/** Sole constructor. */
MockSnowBall(TokenStream in) {
super(in);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
final char[] buffer = termAtt.buffer();
final int length = termAtt.length();
if (buffer[length - 1] == 's') {
termAtt.setLength(length - 1);
}
if (length > 3) {
if (buffer[length - 1] == 'g' && buffer[length - 2] == 'n' && buffer[length - 3] == 'i') {
termAtt.setLength(length- 3);
}
}
return true;
} else
return false;
}
}
@Override
public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
return singletonList(PreConfiguredTokenFilter.singleton("mock_snowball", false, MockSnowBall::new));
}
@Override
public Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {

View File

@ -19,6 +19,12 @@
package org.elasticsearch.search.query;
import org.apache.lucene.analysis.CharacterUtils;
import org.apache.lucene.analysis.MockLowerCaseFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
@ -28,12 +34,19 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.Operator;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.SimpleQueryStringBuilder;
import org.elasticsearch.index.query.SimpleQueryStringFlag;
import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
@ -42,14 +55,19 @@ import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.InternalSettingsPlugin;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.function.Function;
import static java.util.Collections.singletonList;
import static java.util.Collections.singletonMap;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.simpleQueryStringQuery;
@ -72,11 +90,15 @@ import static org.hamcrest.Matchers.equalTo;
public class SimpleQueryStringIT extends ESIntegTestCase {
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(InternalSettingsPlugin.class); // uses index.version.created
return Arrays.asList(MockAnalysisPlugin.class, InternalSettingsPlugin.class); // uses index.version.created
}
public void testSimpleQueryString() throws ExecutionException, InterruptedException {
createIndex("test");
Settings.Builder settings = Settings.builder();
settings.put(indexSettings());
settings.put("index.analysis.analyzer.mock_snowball.tokenizer", "standard");
settings.put("index.analysis.analyzer.mock_snowball.filter", "mock_snowball");
createIndex("test", settings.build());
indexRandom(true, false,
client().prepareIndex("test", "type1", "1").setSource("body", "foo"),
client().prepareIndex("test", "type1", "2").setSource("body", "bar"),
@ -108,7 +130,7 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
assertSearchHits(searchResponse, "4", "5");
searchResponse = client().prepareSearch().setQuery(
simpleQueryStringQuery("eggplants").analyzer("snowball")).get();
simpleQueryStringQuery("eggplants").analyzer("mock_snowball")).get();
assertHitCount(searchResponse, 1L);
assertFirstHit(searchResponse, hasId("4"));
@ -312,7 +334,7 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
.startObject("properties")
.startObject("location")
.field("type", "text")
.field("analyzer", "german")
.field("analyzer", "standard")
.endObject()
.endObject()
.endObject()
@ -583,4 +605,33 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
}
assertThat(hitIds, containsInAnyOrder(ids));
}
public static class MockAnalysisPlugin extends Plugin implements AnalysisPlugin {
public final class MockSnowBall extends TokenFilter {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/** Sole constructor. */
MockSnowBall(TokenStream in) {
super(in);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
char[] buffer = termAtt.buffer();
if (buffer[termAtt.length() - 1] == 's') {
termAtt.setLength(termAtt.length() - 1);
}
return true;
} else
return false;
}
}
@Override
public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
return singletonList(PreConfiguredTokenFilter.singleton("mock_snowball", false, MockSnowBall::new));
}
}
}

View File

@ -87,7 +87,7 @@ public class SimpleValidateQueryIT extends ESIntegTestCase {
.setSource(XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("foo").field("type", "text").endObject()
.startObject("bar").field("type", "integer").endObject()
.startObject("baz").field("type", "text").field("analyzer", "snowball").endObject()
.startObject("baz").field("type", "text").field("analyzer", "standard").endObject()
.startObject("pin").startObject("properties").startObject("location").field("type", "geo_point").endObject().endObject().endObject()
.endObject().endObject().endObject())
.execute().actionGet();