Move number of language analyzers to analysis-common module (#31143)
The following analyzers were moved from server module to analysis-common module: `snowball`, `arabic`, `armenian`, `basque`, `bengali`, `brazilian`, `bulgarian`, `catalan`, `chinese`, `cjk`, `czech`, `danish`, `dutch`, `english`, `finnish`, `french`, `galician` and `german`. Relates to #23658
This commit is contained in:
parent
435a825a53
commit
07a57cc131
|
@ -17,19 +17,21 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider<ArabicAnalyzer> {
|
||||
|
||||
private final ArabicAnalyzer arabicAnalyzer;
|
||||
|
||||
public ArabicAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
ArabicAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
arabicAnalyzer = new ArabicAnalyzer(
|
||||
Analysis.parseStopWords(env, settings, ArabicAnalyzer.getDefaultStopSet()),
|
|
@ -17,19 +17,21 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class ArmenianAnalyzerProvider extends AbstractIndexAnalyzerProvider<ArmenianAnalyzer> {
|
||||
|
||||
private final ArmenianAnalyzer analyzer;
|
||||
|
||||
public ArmenianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
ArmenianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
analyzer = new ArmenianAnalyzer(
|
||||
Analysis.parseStopWords(env, settings, ArmenianAnalyzer.getDefaultStopSet()),
|
|
@ -17,19 +17,21 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class BasqueAnalyzerProvider extends AbstractIndexAnalyzerProvider<BasqueAnalyzer> {
|
||||
|
||||
private final BasqueAnalyzer analyzer;
|
||||
|
||||
public BasqueAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
BasqueAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
analyzer = new BasqueAnalyzer(
|
||||
Analysis.parseStopWords(env, settings, BasqueAnalyzer.getDefaultStopSet()),
|
|
@ -17,19 +17,21 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.bn.BengaliAnalyzer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class BengaliAnalyzerProvider extends AbstractIndexAnalyzerProvider<BengaliAnalyzer> {
|
||||
|
||||
private final BengaliAnalyzer analyzer;
|
||||
|
||||
public BengaliAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
BengaliAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
analyzer = new BengaliAnalyzer(
|
||||
Analysis.parseStopWords(env, settings, BengaliAnalyzer.getDefaultStopSet()),
|
|
@ -17,19 +17,21 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class BrazilianAnalyzerProvider extends AbstractIndexAnalyzerProvider<BrazilianAnalyzer> {
|
||||
|
||||
private final BrazilianAnalyzer analyzer;
|
||||
|
||||
public BrazilianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
BrazilianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
analyzer = new BrazilianAnalyzer(
|
||||
Analysis.parseStopWords(env, settings, BrazilianAnalyzer.getDefaultStopSet()),
|
|
@ -17,19 +17,21 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class BulgarianAnalyzerProvider extends AbstractIndexAnalyzerProvider<BulgarianAnalyzer> {
|
||||
|
||||
private final BulgarianAnalyzer analyzer;
|
||||
|
||||
public BulgarianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
BulgarianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
analyzer = new BulgarianAnalyzer(
|
||||
Analysis.parseStopWords(env, settings, BulgarianAnalyzer.getDefaultStopSet()),
|
|
@ -17,19 +17,21 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class CatalanAnalyzerProvider extends AbstractIndexAnalyzerProvider<CatalanAnalyzer> {
|
||||
|
||||
private final CatalanAnalyzer analyzer;
|
||||
|
||||
public CatalanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
CatalanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
analyzer = new CatalanAnalyzer(
|
||||
Analysis.parseStopWords(env, settings, CatalanAnalyzer.getDefaultStopSet()),
|
|
@ -17,12 +17,13 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
|
||||
/**
|
||||
* Only for old indexes
|
||||
|
@ -31,16 +32,16 @@ public class ChineseAnalyzerProvider extends AbstractIndexAnalyzerProvider<Stand
|
|||
|
||||
private final StandardAnalyzer analyzer;
|
||||
|
||||
public ChineseAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
ChineseAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
// old index: best effort
|
||||
analyzer = new StandardAnalyzer();
|
||||
analyzer.setVersion(version);
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public StandardAnalyzer get() {
|
||||
return this.analyzer;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -17,19 +17,21 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class CjkAnalyzerProvider extends AbstractIndexAnalyzerProvider<CJKAnalyzer> {
|
||||
|
||||
private final CJKAnalyzer analyzer;
|
||||
|
||||
public CjkAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
CjkAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
CharArraySet stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet());
|
||||
|
|
@ -24,11 +24,17 @@ import org.apache.lucene.analysis.CharArraySet;
|
|||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
|
||||
import org.apache.lucene.analysis.ar.ArabicStemFilter;
|
||||
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
|
||||
import org.apache.lucene.analysis.bn.BengaliAnalyzer;
|
||||
import org.apache.lucene.analysis.bn.BengaliNormalizationFilter;
|
||||
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
|
||||
import org.apache.lucene.analysis.br.BrazilianStemFilter;
|
||||
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
|
||||
import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter;
|
||||
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
|
||||
import org.apache.lucene.analysis.cjk.CJKBigramFilter;
|
||||
import org.apache.lucene.analysis.cjk.CJKWidthFilter;
|
||||
import org.apache.lucene.analysis.ckb.SoraniNormalizationFilter;
|
||||
|
@ -40,14 +46,22 @@ import org.apache.lucene.analysis.core.LowerCaseTokenizer;
|
|||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.core.UpperCaseFilter;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.cz.CzechAnalyzer;
|
||||
import org.apache.lucene.analysis.cz.CzechStemFilter;
|
||||
import org.apache.lucene.analysis.da.DanishAnalyzer;
|
||||
import org.apache.lucene.analysis.de.GermanAnalyzer;
|
||||
import org.apache.lucene.analysis.de.GermanNormalizationFilter;
|
||||
import org.apache.lucene.analysis.de.GermanStemFilter;
|
||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||
import org.apache.lucene.analysis.en.KStemFilter;
|
||||
import org.apache.lucene.analysis.en.PorterStemFilter;
|
||||
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
|
||||
import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
|
||||
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
|
||||
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
|
||||
import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
|
||||
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
|
||||
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute;
|
||||
|
@ -64,6 +78,7 @@ import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
|||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
||||
import org.apache.lucene.analysis.nl.DutchAnalyzer;
|
||||
import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
|
||||
import org.apache.lucene.analysis.pattern.PatternTokenizer;
|
||||
import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter;
|
||||
|
@ -73,6 +88,7 @@ import org.apache.lucene.analysis.shingle.ShingleFilter;
|
|||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.ClassicFilter;
|
||||
import org.apache.lucene.analysis.standard.ClassicTokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
|
||||
import org.apache.lucene.analysis.th.ThaiTokenizer;
|
||||
import org.apache.lucene.analysis.tr.ApostropheFilter;
|
||||
|
@ -113,6 +129,24 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
|
|||
analyzers.put("fingerprint", FingerprintAnalyzerProvider::new);
|
||||
analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new);
|
||||
analyzers.put("pattern", PatternAnalyzerProvider::new);
|
||||
analyzers.put("snowball", SnowballAnalyzerProvider::new);
|
||||
analyzers.put("arabic", ArabicAnalyzerProvider::new);
|
||||
analyzers.put("armenian", ArmenianAnalyzerProvider::new);
|
||||
analyzers.put("basque", BasqueAnalyzerProvider::new);
|
||||
analyzers.put("bengali", BengaliAnalyzerProvider::new);
|
||||
analyzers.put("brazilian", BrazilianAnalyzerProvider::new);
|
||||
analyzers.put("bulgarian", BulgarianAnalyzerProvider::new);
|
||||
analyzers.put("catalan", CatalanAnalyzerProvider::new);
|
||||
analyzers.put("chinese", ChineseAnalyzerProvider::new);
|
||||
analyzers.put("cjk", CjkAnalyzerProvider::new);
|
||||
analyzers.put("czech", CzechAnalyzerProvider::new);
|
||||
analyzers.put("danish", DanishAnalyzerProvider::new);
|
||||
analyzers.put("dutch", DutchAnalyzerProvider::new);
|
||||
analyzers.put("english", EnglishAnalyzerProvider::new);
|
||||
analyzers.put("finnish", FinnishAnalyzerProvider::new);
|
||||
analyzers.put("french", FrenchAnalyzerProvider::new);
|
||||
analyzers.put("galician", GalicianAnalyzerProvider::new);
|
||||
analyzers.put("german", GermanAnalyzerProvider::new);
|
||||
return analyzers;
|
||||
}
|
||||
|
||||
|
@ -213,10 +247,108 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
|
|||
@Override
|
||||
public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
|
||||
List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.LUCENE,
|
||||
version -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH, version ->
|
||||
new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, CharArraySet.EMPTY_SET)));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.LUCENE, version -> {
|
||||
Analyzer a = new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET);
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH, version -> {
|
||||
Analyzer a = new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true,
|
||||
CharArraySet.EMPTY_SET);
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("snowball", CachingStrategy.LUCENE, version -> {
|
||||
Analyzer a = new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("arabic", CachingStrategy.LUCENE, version -> {
|
||||
Analyzer a = new ArabicAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("armenian", CachingStrategy.LUCENE, version -> {
|
||||
Analyzer a = new ArmenianAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("basque", CachingStrategy.LUCENE, version -> {
|
||||
Analyzer a = new BasqueAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("bengali", CachingStrategy.LUCENE, version -> {
|
||||
Analyzer a = new BengaliAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("brazilian", CachingStrategy.LUCENE, version -> {
|
||||
Analyzer a = new BrazilianAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("bulgarian", CachingStrategy.LUCENE, version -> {
|
||||
Analyzer a = new BulgarianAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("catalan", CachingStrategy.LUCENE, version -> {
|
||||
Analyzer a = new CatalanAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("chinese", CachingStrategy.LUCENE, version -> {
|
||||
// only for old indices, best effort
|
||||
Analyzer a = new StandardAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("cjk", CachingStrategy.LUCENE, version -> {
|
||||
Analyzer a = new CJKAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("czech", CachingStrategy.LUCENE, version -> {
|
||||
Analyzer a = new CzechAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("danish", CachingStrategy.LUCENE, version -> {
|
||||
Analyzer a = new DanishAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("dutch", CachingStrategy.LUCENE, version -> {
|
||||
Analyzer a = new DutchAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("english", CachingStrategy.LUCENE, version -> {
|
||||
Analyzer a = new EnglishAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("finnish", CachingStrategy.LUCENE, version -> {
|
||||
Analyzer a = new FinnishAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("french", CachingStrategy.LUCENE, version -> {
|
||||
Analyzer a = new FrenchAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("galician", CachingStrategy.LUCENE, version -> {
|
||||
Analyzer a = new GalicianAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
analyzers.add(new PreBuiltAnalyzerProviderFactory("german", CachingStrategy.LUCENE, version -> {
|
||||
Analyzer a = new GermanAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}));
|
||||
return analyzers;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,19 +17,21 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.cz.CzechAnalyzer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider<CzechAnalyzer> {
|
||||
|
||||
private final CzechAnalyzer analyzer;
|
||||
|
||||
public CzechAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
CzechAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
analyzer = new CzechAnalyzer(
|
||||
Analysis.parseStopWords(env, settings, CzechAnalyzer.getDefaultStopSet()),
|
|
@ -17,19 +17,21 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.da.DanishAnalyzer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class DanishAnalyzerProvider extends AbstractIndexAnalyzerProvider<DanishAnalyzer> {
|
||||
|
||||
private final DanishAnalyzer analyzer;
|
||||
|
||||
public DanishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
DanishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
analyzer = new DanishAnalyzer(
|
||||
Analysis.parseStopWords(env, settings, DanishAnalyzer.getDefaultStopSet()),
|
|
@ -17,19 +17,21 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.nl.DutchAnalyzer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class DutchAnalyzerProvider extends AbstractIndexAnalyzerProvider<DutchAnalyzer> {
|
||||
|
||||
private final DutchAnalyzer analyzer;
|
||||
|
||||
public DutchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
DutchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
analyzer = new DutchAnalyzer(
|
||||
Analysis.parseStopWords(env, settings, DutchAnalyzer.getDefaultStopSet()),
|
|
@ -17,19 +17,21 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class EnglishAnalyzerProvider extends AbstractIndexAnalyzerProvider<EnglishAnalyzer> {
|
||||
|
||||
private final EnglishAnalyzer analyzer;
|
||||
|
||||
public EnglishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
EnglishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
analyzer = new EnglishAnalyzer(
|
||||
Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet()),
|
|
@ -17,19 +17,21 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class FinnishAnalyzerProvider extends AbstractIndexAnalyzerProvider<FinnishAnalyzer> {
|
||||
|
||||
private final FinnishAnalyzer analyzer;
|
||||
|
||||
public FinnishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
FinnishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
analyzer = new FinnishAnalyzer(
|
||||
Analysis.parseStopWords(env, settings, FinnishAnalyzer.getDefaultStopSet()),
|
|
@ -17,19 +17,21 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class FrenchAnalyzerProvider extends AbstractIndexAnalyzerProvider<FrenchAnalyzer> {
|
||||
|
||||
private final FrenchAnalyzer analyzer;
|
||||
|
||||
public FrenchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
FrenchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
analyzer = new FrenchAnalyzer(
|
||||
Analysis.parseStopWords(env, settings, FrenchAnalyzer.getDefaultStopSet()),
|
|
@ -17,19 +17,21 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class GalicianAnalyzerProvider extends AbstractIndexAnalyzerProvider<GalicianAnalyzer> {
|
||||
|
||||
private final GalicianAnalyzer analyzer;
|
||||
|
||||
public GalicianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
GalicianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
analyzer = new GalicianAnalyzer(
|
||||
Analysis.parseStopWords(env, settings, GalicianAnalyzer.getDefaultStopSet()),
|
|
@ -17,19 +17,21 @@
|
|||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.de.GermanAnalyzer;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
public class GermanAnalyzerProvider extends AbstractIndexAnalyzerProvider<GermanAnalyzer> {
|
||||
|
||||
private final GermanAnalyzer analyzer;
|
||||
|
||||
public GermanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
GermanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
analyzer = new GermanAnalyzer(
|
||||
Analysis.parseStopWords(env, settings, GermanAnalyzer.getDefaultStopSet()),
|
|
@ -1,4 +1,4 @@
|
|||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
|
@ -48,12 +48,12 @@ public final class SnowballAnalyzer extends Analyzer {
|
|||
private CharArraySet stopSet;
|
||||
|
||||
/** Builds the named analyzer with no stop words. */
|
||||
public SnowballAnalyzer(String name) {
|
||||
SnowballAnalyzer(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
/** Builds the named analyzer with the given stop words. */
|
||||
public SnowballAnalyzer(String name, CharArraySet stopWords) {
|
||||
SnowballAnalyzer(String name, CharArraySet stopWords) {
|
||||
this(name);
|
||||
stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopWords));
|
||||
}
|
|
@ -16,7 +16,7 @@
|
|||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
|
@ -26,6 +26,8 @@ import org.apache.lucene.analysis.nl.DutchAnalyzer;
|
|||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.Analysis;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
@ -60,7 +62,7 @@ public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider<Snow
|
|||
|
||||
private final SnowballAnalyzer analyzer;
|
||||
|
||||
public SnowballAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
SnowballAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
|
||||
super(indexSettings, name, settings);
|
||||
|
||||
String language = settings.get("language", settings.get("name", "English"));
|
|
@ -1,4 +1,4 @@
|
|||
package org.elasticsearch.index.analysis;
|
||||
package org.elasticsearch.analysis.common;
|
||||
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
|
@ -30,14 +30,14 @@ public class SnowballAnalyzerTests extends ESTokenStreamTestCase {
|
|||
assertAnalyzesTo(a, "he abhorred accents",
|
||||
new String[]{"he", "abhor", "accent"});
|
||||
}
|
||||
|
||||
|
||||
public void testStopwords() throws Exception {
|
||||
Analyzer a = new SnowballAnalyzer("English",
|
||||
StandardAnalyzer.STOP_WORDS_SET);
|
||||
assertAnalyzesTo(a, "the quick brown fox jumped",
|
||||
new String[]{"quick", "brown", "fox", "jump"});
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Test turkish lowercasing
|
||||
*/
|
||||
|
@ -48,7 +48,7 @@ public class SnowballAnalyzerTests extends ESTokenStreamTestCase {
|
|||
assertAnalyzesTo(a, "AĞACI", new String[] { "ağaç" });
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer a = new SnowballAnalyzer("English");
|
||||
assertAnalyzesTo(a, "he abhorred accents",
|
||||
|
@ -56,4 +56,4 @@ public class SnowballAnalyzerTests extends ESTokenStreamTestCase {
|
|||
assertAnalyzesTo(a, "she abhorred him",
|
||||
new String[]{"she", "abhor", "him"});
|
||||
}
|
||||
}
|
||||
}
|
|
@ -38,6 +38,25 @@
|
|||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: বার }
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_analyzer:
|
||||
type: bengali
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: বাড়ী
|
||||
analyzer: my_analyzer
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: বার }
|
||||
|
||||
---
|
||||
"fingerprint":
|
||||
- do:
|
||||
|
@ -69,3 +88,507 @@
|
|||
- length: { tokens: 2 }
|
||||
- match: { tokens.0.token: foo }
|
||||
- match: { tokens.1.token: bar }
|
||||
|
||||
---
|
||||
"snowball":
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: the brown foxes
|
||||
analyzer: snowball
|
||||
- length: { tokens: 2 }
|
||||
- match: { tokens.0.token: brown }
|
||||
- match: { tokens.1.token: fox }
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_snowball:
|
||||
type: snowball
|
||||
language: "Dutch"
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: de bruine vossen
|
||||
analyzer: my_snowball
|
||||
- length: { tokens: 2 }
|
||||
- match: { tokens.0.token: bruin }
|
||||
- match: { tokens.1.token: voss }
|
||||
|
||||
---
|
||||
"arabic":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_analyzer:
|
||||
type: arabic
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: كبيرة
|
||||
analyzer: arabic
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: كبير }
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: كبيرة
|
||||
analyzer: my_analyzer
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: كبير }
|
||||
|
||||
---
|
||||
"armenian":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_analyzer:
|
||||
type: armenian
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: արծիվ
|
||||
analyzer: armenian
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: արծ }
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: արծիվ
|
||||
analyzer: my_analyzer
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: արծ }
|
||||
|
||||
---
|
||||
"basque":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_analyzer:
|
||||
type: basque
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: zaldiak
|
||||
analyzer: basque
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: zaldi }
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: zaldiak
|
||||
analyzer: my_analyzer
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: zaldi }
|
||||
|
||||
---
|
||||
"brazilian":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_analyzer:
|
||||
type: brazilian
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: boataria
|
||||
analyzer: brazilian
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: boat }
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: boataria
|
||||
analyzer: my_analyzer
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: boat }
|
||||
|
||||
---
|
||||
"bulgarian":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_analyzer:
|
||||
type: bulgarian
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: градове
|
||||
analyzer: bulgarian
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: град }
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: градове
|
||||
analyzer: my_analyzer
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: град }
|
||||
|
||||
---
|
||||
"catalan":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_analyzer:
|
||||
type: catalan
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: llengües
|
||||
analyzer: catalan
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: llengu }
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: llengües
|
||||
analyzer: my_analyzer
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: llengu }
|
||||
|
||||
---
|
||||
"chinese":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_analyzer:
|
||||
type: chinese
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: only for old indices
|
||||
analyzer: chinese
|
||||
- length: { tokens: 3 }
|
||||
- match: { tokens.0.token: only }
|
||||
- match: { tokens.1.token: old }
|
||||
- match: { tokens.2.token: indices }
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: only for old indices
|
||||
analyzer: my_analyzer
|
||||
- length: { tokens: 3 }
|
||||
- match: { tokens.0.token: only }
|
||||
- match: { tokens.1.token: old }
|
||||
- match: { tokens.2.token: indices }
|
||||
|
||||
---
|
||||
"cjk":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_analyzer:
|
||||
type: cjk
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: 多くの
|
||||
analyzer: cjk
|
||||
- length: { tokens: 2 }
|
||||
- match: { tokens.0.token: 多く }
|
||||
- match: { tokens.1.token: くの }
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: 多くの
|
||||
analyzer: my_analyzer
|
||||
- length: { tokens: 2 }
|
||||
- match: { tokens.0.token: 多く }
|
||||
- match: { tokens.1.token: くの }
|
||||
|
||||
---
|
||||
"czech":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_analyzer:
|
||||
type: czech
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: Pokud mluvime o volnem
|
||||
analyzer: czech
|
||||
- length: { tokens: 2 }
|
||||
- match: { tokens.0.token: mluvim }
|
||||
- match: { tokens.1.token: voln }
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: Pokud mluvime o volnem
|
||||
analyzer: my_analyzer
|
||||
- length: { tokens: 2 }
|
||||
- match: { tokens.0.token: mluvim }
|
||||
- match: { tokens.1.token: voln }
|
||||
|
||||
---
|
||||
"danish":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_analyzer:
|
||||
type: danish
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: undersøgelse
|
||||
analyzer: danish
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: undersøg }
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: undersøgelse
|
||||
analyzer: my_analyzer
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: undersøg }
|
||||
|
||||
---
|
||||
"dutch":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_analyzer:
|
||||
type: dutch
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: lidstaten
|
||||
analyzer: dutch
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: lidstat }
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: lidstaten
|
||||
analyzer: my_analyzer
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: lidstat }
|
||||
|
||||
---
|
||||
"english":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_analyzer:
|
||||
type: english
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: books
|
||||
analyzer: english
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: book }
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: books
|
||||
analyzer: my_analyzer
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: book }
|
||||
|
||||
---
|
||||
"finnish":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_analyzer:
|
||||
type: finnish
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: edeltäjiinsä
|
||||
analyzer: finnish
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: edeltäj }
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: edeltäjiinsä
|
||||
analyzer: my_analyzer
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: edeltäj }
|
||||
|
||||
---
|
||||
"french":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_analyzer:
|
||||
type: french
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: sécuritaires
|
||||
analyzer: french
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: securitair }
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: sécuritaires
|
||||
analyzer: my_analyzer
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: securitair }
|
||||
|
||||
---
|
||||
"galician":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_analyzer:
|
||||
type: galician
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: corresponderá
|
||||
analyzer: galician
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: correspond }
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: corresponderá
|
||||
analyzer: my_analyzer
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: correspond }
|
||||
|
||||
---
|
||||
"german":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
settings:
|
||||
analysis:
|
||||
analyzer:
|
||||
my_analyzer:
|
||||
type: german
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: Tischen
|
||||
analyzer: german
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: tisch }
|
||||
|
||||
- do:
|
||||
indices.analyze:
|
||||
index: test
|
||||
body:
|
||||
text: Tischen
|
||||
analyzer: my_analyzer
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: tisch }
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
---
|
||||
"Test query string with snowball":
|
||||
- do:
|
||||
indices.create:
|
||||
index: test
|
||||
body:
|
||||
mappings:
|
||||
test:
|
||||
properties:
|
||||
field:
|
||||
type: text
|
||||
number:
|
||||
type: integer
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
type: test
|
||||
id: 1
|
||||
body: { field: foo bar}
|
||||
|
||||
- do:
|
||||
indices.refresh:
|
||||
index: [test]
|
||||
|
||||
- do:
|
||||
indices.validate_query:
|
||||
index: test
|
||||
q: field:bars
|
||||
analyzer: snowball
|
||||
|
||||
- is_true: valid
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
q: field:bars
|
||||
analyzer: snowball
|
||||
|
||||
- match: {hits.total: 1}
|
||||
|
||||
- do:
|
||||
explain:
|
||||
index: test
|
||||
type: test
|
||||
id: 1
|
||||
q: field:bars
|
||||
analyzer: snowball
|
||||
|
||||
- is_true: matched
|
||||
|
||||
- do:
|
||||
count:
|
||||
index: test
|
||||
q: field:bars
|
||||
analyzer: snowball
|
||||
|
||||
- match: {count : 1}
|
|
@ -27,6 +27,7 @@ import org.elasticsearch.action.bulk.BulkResponse;
|
|||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||
import org.elasticsearch.action.search.SearchRequestBuilder;
|
||||
import org.elasticsearch.action.search.SearchResponse;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.query.QueryBuilders;
|
||||
import org.elasticsearch.search.SearchHit;
|
||||
import org.elasticsearch.search.aggregations.AggregationBuilders;
|
||||
|
@ -109,7 +110,13 @@ public class TokenCountFieldMapperIntegrationIT extends ESIntegTestCase {
|
|||
}
|
||||
|
||||
private void init() throws IOException {
|
||||
prepareCreate("test").addMapping("test", jsonBuilder().startObject()
|
||||
Settings.Builder settings = Settings.builder();
|
||||
settings.put(indexSettings());
|
||||
settings.put("index.analysis.analyzer.mock_english.tokenizer", "standard");
|
||||
settings.put("index.analysis.analyzer.mock_english.filter", "stop");
|
||||
prepareCreate("test")
|
||||
.setSettings(settings)
|
||||
.addMapping("test", jsonBuilder().startObject()
|
||||
.startObject("test")
|
||||
.startObject("properties")
|
||||
.startObject("foo")
|
||||
|
@ -133,7 +140,7 @@ public class TokenCountFieldMapperIntegrationIT extends ESIntegTestCase {
|
|||
.endObject()
|
||||
.startObject("token_count_without_position_increments")
|
||||
.field("type", "token_count")
|
||||
.field("analyzer", "english")
|
||||
.field("analyzer", "mock_english")
|
||||
.field("enable_position_increments", false)
|
||||
.field("store", true)
|
||||
.endObject()
|
||||
|
@ -214,13 +221,13 @@ public class TokenCountFieldMapperIntegrationIT extends ESIntegTestCase {
|
|||
assertThat(hit.field("foo.token_count"), not(nullValue()));
|
||||
assertThat(hit.field("foo.token_count").getValues().size(), equalTo(standardTermCounts.length));
|
||||
for (int i = 0; i < standardTermCounts.length; i++) {
|
||||
assertThat((Integer) hit.field("foo.token_count").getValues().get(i), equalTo(standardTermCounts[i]));
|
||||
assertThat(hit.field("foo.token_count").getValues().get(i), equalTo(standardTermCounts[i]));
|
||||
}
|
||||
|
||||
assertThat(hit.field("foo.token_count_without_position_increments"), not(nullValue()));
|
||||
assertThat(hit.field("foo.token_count_without_position_increments").getValues().size(), equalTo(englishTermCounts.length));
|
||||
for (int i = 0; i < englishTermCounts.length; i++) {
|
||||
assertThat((Integer) hit.field("foo.token_count_without_position_increments").getValues().get(i),
|
||||
assertThat(hit.field("foo.token_count_without_position_increments").getValues().get(i),
|
||||
equalTo(englishTermCounts[i]));
|
||||
}
|
||||
|
||||
|
|
|
@ -8,14 +8,14 @@
|
|||
place:
|
||||
properties:
|
||||
name:
|
||||
type: text
|
||||
type: keyword
|
||||
- do:
|
||||
index:
|
||||
index: test
|
||||
type: place
|
||||
id: 1
|
||||
refresh: true
|
||||
body: { "name": "bob's house" }
|
||||
body: { "name": "bob! house" }
|
||||
|
||||
- do:
|
||||
indices.put_mapping:
|
||||
|
@ -24,11 +24,10 @@
|
|||
body:
|
||||
properties:
|
||||
name:
|
||||
type: text
|
||||
type: keyword
|
||||
fields:
|
||||
english:
|
||||
type: text
|
||||
analyzer: english
|
||||
|
||||
- do:
|
||||
search:
|
||||
|
|
|
@ -44,14 +44,6 @@
|
|||
|
||||
- match: {count : 0}
|
||||
|
||||
- do:
|
||||
count:
|
||||
index: test
|
||||
q: field:bars
|
||||
analyzer: snowball
|
||||
|
||||
- match: {count : 1}
|
||||
|
||||
- do:
|
||||
count:
|
||||
index: test
|
||||
|
|
|
@ -50,16 +50,6 @@
|
|||
|
||||
- is_false: matched
|
||||
|
||||
- do:
|
||||
explain:
|
||||
index: test
|
||||
type: test
|
||||
id: 1
|
||||
q: field:bars
|
||||
analyzer: snowball
|
||||
|
||||
- is_true: matched
|
||||
|
||||
- do:
|
||||
explain:
|
||||
index: test
|
||||
|
|
|
@ -35,14 +35,6 @@
|
|||
|
||||
- is_true: valid
|
||||
|
||||
- do:
|
||||
indices.validate_query:
|
||||
index: test
|
||||
q: field:bars
|
||||
analyzer: snowball
|
||||
|
||||
- is_true: valid
|
||||
|
||||
- do:
|
||||
indices.validate_query:
|
||||
index: test
|
||||
|
|
|
@ -44,14 +44,6 @@
|
|||
|
||||
- match: {hits.total: 0}
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
q: field:bars
|
||||
analyzer: snowball
|
||||
|
||||
- match: {hits.total: 1}
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test
|
||||
|
|
|
@ -29,24 +29,7 @@ import org.elasticsearch.env.Environment;
|
|||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AnalysisRegistry;
|
||||
import org.elasticsearch.index.analysis.AnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.ArabicAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.ArmenianAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.BasqueAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.BengaliAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.BrazilianAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.BulgarianAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.CatalanAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||
import org.elasticsearch.index.analysis.ChineseAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.CjkAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.CzechAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.DanishAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.DutchAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.GermanAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.GreekAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.HindiAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.HungarianAnalyzerProvider;
|
||||
|
@ -68,7 +51,6 @@ import org.elasticsearch.index.analysis.RomanianAnalyzerProvider;
|
|||
import org.elasticsearch.index.analysis.RussianAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.SimpleAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.SnowballAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.SoraniAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.SpanishAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.StandardAnalyzerProvider;
|
||||
|
@ -245,24 +227,6 @@ public final class AnalysisModule {
|
|||
analyzers.register("stop", StopAnalyzerProvider::new);
|
||||
analyzers.register("whitespace", WhitespaceAnalyzerProvider::new);
|
||||
analyzers.register("keyword", KeywordAnalyzerProvider::new);
|
||||
analyzers.register("snowball", SnowballAnalyzerProvider::new);
|
||||
analyzers.register("arabic", ArabicAnalyzerProvider::new);
|
||||
analyzers.register("armenian", ArmenianAnalyzerProvider::new);
|
||||
analyzers.register("basque", BasqueAnalyzerProvider::new);
|
||||
analyzers.register("bengali", BengaliAnalyzerProvider::new);
|
||||
analyzers.register("brazilian", BrazilianAnalyzerProvider::new);
|
||||
analyzers.register("bulgarian", BulgarianAnalyzerProvider::new);
|
||||
analyzers.register("catalan", CatalanAnalyzerProvider::new);
|
||||
analyzers.register("chinese", ChineseAnalyzerProvider::new);
|
||||
analyzers.register("cjk", CjkAnalyzerProvider::new);
|
||||
analyzers.register("czech", CzechAnalyzerProvider::new);
|
||||
analyzers.register("danish", DanishAnalyzerProvider::new);
|
||||
analyzers.register("dutch", DutchAnalyzerProvider::new);
|
||||
analyzers.register("english", EnglishAnalyzerProvider::new);
|
||||
analyzers.register("finnish", FinnishAnalyzerProvider::new);
|
||||
analyzers.register("french", FrenchAnalyzerProvider::new);
|
||||
analyzers.register("galician", GalicianAnalyzerProvider::new);
|
||||
analyzers.register("german", GermanAnalyzerProvider::new);
|
||||
analyzers.register("greek", GreekAnalyzerProvider::new);
|
||||
analyzers.register("hindi", HindiAnalyzerProvider::new);
|
||||
analyzers.register("hungarian", HungarianAnalyzerProvider::new);
|
||||
|
|
|
@ -20,37 +20,21 @@ package org.elasticsearch.indices.analysis;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.ar.ArabicAnalyzer;
|
||||
import org.apache.lucene.analysis.bg.BulgarianAnalyzer;
|
||||
import org.apache.lucene.analysis.bn.BengaliAnalyzer;
|
||||
import org.apache.lucene.analysis.br.BrazilianAnalyzer;
|
||||
import org.apache.lucene.analysis.ca.CatalanAnalyzer;
|
||||
import org.apache.lucene.analysis.cjk.CJKAnalyzer;
|
||||
import org.apache.lucene.analysis.ckb.SoraniAnalyzer;
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.core.SimpleAnalyzer;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.cz.CzechAnalyzer;
|
||||
import org.apache.lucene.analysis.da.DanishAnalyzer;
|
||||
import org.apache.lucene.analysis.de.GermanAnalyzer;
|
||||
import org.apache.lucene.analysis.el.GreekAnalyzer;
|
||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||
import org.apache.lucene.analysis.es.SpanishAnalyzer;
|
||||
import org.apache.lucene.analysis.eu.BasqueAnalyzer;
|
||||
import org.apache.lucene.analysis.fa.PersianAnalyzer;
|
||||
import org.apache.lucene.analysis.fi.FinnishAnalyzer;
|
||||
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
|
||||
import org.apache.lucene.analysis.ga.IrishAnalyzer;
|
||||
import org.apache.lucene.analysis.gl.GalicianAnalyzer;
|
||||
import org.apache.lucene.analysis.hi.HindiAnalyzer;
|
||||
import org.apache.lucene.analysis.hu.HungarianAnalyzer;
|
||||
import org.apache.lucene.analysis.hy.ArmenianAnalyzer;
|
||||
import org.apache.lucene.analysis.id.IndonesianAnalyzer;
|
||||
import org.apache.lucene.analysis.it.ItalianAnalyzer;
|
||||
import org.apache.lucene.analysis.lt.LithuanianAnalyzer;
|
||||
import org.apache.lucene.analysis.lv.LatvianAnalyzer;
|
||||
import org.apache.lucene.analysis.nl.DutchAnalyzer;
|
||||
import org.apache.lucene.analysis.no.NorwegianAnalyzer;
|
||||
import org.apache.lucene.analysis.pt.PortugueseAnalyzer;
|
||||
import org.apache.lucene.analysis.ro.RomanianAnalyzer;
|
||||
|
@ -61,7 +45,6 @@ import org.apache.lucene.analysis.sv.SwedishAnalyzer;
|
|||
import org.apache.lucene.analysis.th.ThaiAnalyzer;
|
||||
import org.apache.lucene.analysis.tr.TurkishAnalyzer;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.index.analysis.SnowballAnalyzer;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
|
||||
|
||||
import java.util.Locale;
|
||||
|
@ -129,168 +112,6 @@ public enum PreBuiltAnalyzers {
|
|||
}
|
||||
},
|
||||
|
||||
SNOWBALL {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
Analyzer analyzer = new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
analyzer.setVersion(version.luceneVersion);
|
||||
return analyzer;
|
||||
}
|
||||
},
|
||||
|
||||
ARABIC {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
Analyzer a = new ArabicAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}
|
||||
},
|
||||
|
||||
ARMENIAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
Analyzer a = new ArmenianAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}
|
||||
},
|
||||
|
||||
BASQUE {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
Analyzer a = new BasqueAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}
|
||||
},
|
||||
|
||||
BENGALI {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
Analyzer a = new BengaliAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}
|
||||
},
|
||||
|
||||
BRAZILIAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
Analyzer a = new BrazilianAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}
|
||||
},
|
||||
|
||||
BULGARIAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
Analyzer a = new BulgarianAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}
|
||||
},
|
||||
|
||||
CATALAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
Analyzer a = new CatalanAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}
|
||||
},
|
||||
|
||||
CHINESE(CachingStrategy.ONE) {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
Analyzer a = new StandardAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}
|
||||
},
|
||||
|
||||
CJK {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
Analyzer a = new CJKAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}
|
||||
},
|
||||
|
||||
CZECH {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
Analyzer a = new CzechAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}
|
||||
},
|
||||
|
||||
DUTCH {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
Analyzer a = new DutchAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}
|
||||
},
|
||||
|
||||
DANISH {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
Analyzer a = new DanishAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}
|
||||
},
|
||||
|
||||
ENGLISH {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
Analyzer a = new EnglishAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}
|
||||
},
|
||||
|
||||
FINNISH {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
Analyzer a = new FinnishAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}
|
||||
},
|
||||
|
||||
FRENCH {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
Analyzer a = new FrenchAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}
|
||||
},
|
||||
|
||||
GALICIAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
Analyzer a = new GalicianAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}
|
||||
},
|
||||
|
||||
GERMAN {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
Analyzer a = new GermanAnalyzer();
|
||||
a.setVersion(version.luceneVersion);
|
||||
return a;
|
||||
}
|
||||
},
|
||||
|
||||
GREEK {
|
||||
@Override
|
||||
protected Analyzer create(Version version) {
|
||||
|
|
|
@ -61,14 +61,17 @@ public class PreBuiltAnalyzerTests extends ESSingleNodeTestCase {
|
|||
}
|
||||
|
||||
public void testThatInstancesAreCachedAndReused() {
|
||||
assertSame(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.CURRENT),
|
||||
PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.CURRENT));
|
||||
// same lucene version should be cached
|
||||
assertSame(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_5_2_1),
|
||||
PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_5_2_2));
|
||||
assertSame(PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.CURRENT),
|
||||
PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.CURRENT));
|
||||
// same es version should be cached
|
||||
assertSame(PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_5_2_1),
|
||||
PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_5_2_1));
|
||||
assertNotSame(PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_5_0_0),
|
||||
PreBuiltAnalyzers.STANDARD.getAnalyzer(Version.V_5_0_1));
|
||||
|
||||
assertNotSame(PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_5_0_0),
|
||||
PreBuiltAnalyzers.ARABIC.getAnalyzer(Version.V_5_0_1));
|
||||
// Same Lucene version should be cached:
|
||||
assertSame(PreBuiltAnalyzers.STOP.getAnalyzer(Version.V_5_2_1),
|
||||
PreBuiltAnalyzers.STOP.getAnalyzer(Version.V_5_2_2));
|
||||
}
|
||||
|
||||
public void testThatAnalyzersAreUsedInMapping() throws IOException {
|
||||
|
|
|
@ -55,7 +55,6 @@ import org.elasticsearch.index.mapper.MapperService.MergeReason;
|
|||
import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
|
||||
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
import org.elasticsearch.index.search.MatchQuery;
|
||||
import org.elasticsearch.index.shard.IndexShard;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
import org.elasticsearch.test.ESSingleNodeTestCase;
|
||||
|
@ -87,6 +86,9 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||
.putList("index.analysis.filter.mySynonyms.synonyms", Collections.singletonList("car, auto"))
|
||||
.put("index.analysis.analyzer.synonym.tokenizer", "standard")
|
||||
.put("index.analysis.analyzer.synonym.filter", "mySynonyms")
|
||||
// Stop filter remains in server as it is part of lucene-core
|
||||
.put("index.analysis.analyzer.my_stop_analyzer.tokenizer", "standard")
|
||||
.put("index.analysis.analyzer.my_stop_analyzer.filter", "stop")
|
||||
.build();
|
||||
indexService = createIndex("test", settings);
|
||||
parser = indexService.mapperService().documentMapperParser();
|
||||
|
@ -621,7 +623,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "english")
|
||||
.field("analyzer", "standard")
|
||||
.startObject("index_prefixes").endObject()
|
||||
.field("index_options", "offsets")
|
||||
.endObject().endObject().endObject().endObject());
|
||||
|
@ -637,7 +639,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "english")
|
||||
.field("analyzer", "standard")
|
||||
.startObject("index_prefixes").endObject()
|
||||
.field("index_options", "freqs")
|
||||
.endObject().endObject().endObject().endObject());
|
||||
|
@ -654,7 +656,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "english")
|
||||
.field("analyzer", "standard")
|
||||
.startObject("index_prefixes").endObject()
|
||||
.field("index_options", "positions")
|
||||
.endObject().endObject().endObject().endObject());
|
||||
|
@ -675,7 +677,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "english")
|
||||
.field("analyzer", "standard")
|
||||
.startObject("index_prefixes").endObject()
|
||||
.field("term_vector", "with_positions_offsets")
|
||||
.endObject().endObject().endObject().endObject());
|
||||
|
@ -696,7 +698,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "english")
|
||||
.field("analyzer", "standard")
|
||||
.startObject("index_prefixes").endObject()
|
||||
.field("term_vector", "with_positions")
|
||||
.endObject().endObject().endObject().endObject());
|
||||
|
@ -725,7 +727,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||
.startObject("properties")
|
||||
.startObject("field")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "english")
|
||||
.field("analyzer", "my_stop_analyzer")
|
||||
.field("index_phrases", true)
|
||||
.endObject()
|
||||
.startObject("synfield")
|
||||
|
@ -742,20 +744,20 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||
queryShardContext.getMapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);
|
||||
|
||||
Query q = new MatchPhraseQueryBuilder("field", "two words").toQuery(queryShardContext);
|
||||
assertThat(q, is(new PhraseQuery("field._index_phrase", "two word")));
|
||||
assertThat(q, is(new PhraseQuery("field._index_phrase", "two words")));
|
||||
|
||||
Query q2 = new MatchPhraseQueryBuilder("field", "three words here").toQuery(queryShardContext);
|
||||
assertThat(q2, is(new PhraseQuery("field._index_phrase", "three word", "word here")));
|
||||
assertThat(q2, is(new PhraseQuery("field._index_phrase", "three words", "words here")));
|
||||
|
||||
Query q3 = new MatchPhraseQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext);
|
||||
assertThat(q3, is(new PhraseQuery(1, "field", "two", "word")));
|
||||
assertThat(q3, is(new PhraseQuery(1, "field", "two", "words")));
|
||||
|
||||
Query q4 = new MatchPhraseQueryBuilder("field", "singleton").toQuery(queryShardContext);
|
||||
assertThat(q4, is(new TermQuery(new Term("field", "singleton"))));
|
||||
|
||||
Query q5 = new MatchPhraseQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext);
|
||||
assertThat(q5,
|
||||
is(new PhraseQuery.Builder().add(new Term("field", "sparkl")).add(new Term("field", "stopword"), 2).build()));
|
||||
is(new PhraseQuery.Builder().add(new Term("field", "sparkle")).add(new Term("field", "stopword"), 2).build()));
|
||||
|
||||
Query q6 = new MatchPhraseQueryBuilder("synfield", "motor car").toQuery(queryShardContext);
|
||||
assertThat(q6, is(new MultiPhraseQuery.Builder()
|
||||
|
@ -778,7 +780,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
ts.reset();
|
||||
assertTrue(ts.incrementToken());
|
||||
assertEquals("some english", termAtt.toString());
|
||||
assertEquals("Some English", termAtt.toString());
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -821,7 +823,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "english")
|
||||
.field("analyzer", "standard")
|
||||
.startObject("index_prefixes")
|
||||
.field("min_chars", 1)
|
||||
.field("max_chars", 10)
|
||||
|
@ -855,7 +857,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "english")
|
||||
.field("analyzer", "standard")
|
||||
.startObject("index_prefixes").endObject()
|
||||
.endObject().endObject()
|
||||
.endObject().endObject());
|
||||
|
@ -880,7 +882,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||
String illegalMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "english")
|
||||
.field("analyzer", "standard")
|
||||
.startObject("index_prefixes")
|
||||
.field("min_chars", 1)
|
||||
.field("max_chars", 10)
|
||||
|
@ -903,7 +905,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||
String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "english")
|
||||
.field("analyzer", "standard")
|
||||
.startObject("index_prefixes")
|
||||
.field("min_chars", 11)
|
||||
.field("max_chars", 10)
|
||||
|
@ -920,7 +922,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||
String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "english")
|
||||
.field("analyzer", "standard")
|
||||
.startObject("index_prefixes")
|
||||
.field("min_chars", 0)
|
||||
.field("max_chars", 10)
|
||||
|
@ -937,7 +939,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||
String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "english")
|
||||
.field("analyzer", "standard")
|
||||
.startObject("index_prefixes")
|
||||
.field("min_chars", 1)
|
||||
.field("max_chars", 25)
|
||||
|
@ -954,7 +956,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
|
|||
String badConfigMapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
|
||||
.startObject("properties").startObject("field")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "english")
|
||||
.field("analyzer", "standard")
|
||||
.field("index_prefixes", (String) null)
|
||||
.endObject().endObject()
|
||||
.endObject().endObject());
|
||||
|
|
|
@ -1300,7 +1300,7 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
|
|||
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
|
||||
Query query = new QueryStringQueryBuilder("the quick fox")
|
||||
.field(STRING_FIELD_NAME)
|
||||
.analyzer("english")
|
||||
.analyzer("stop")
|
||||
.toQuery(createShardContext());
|
||||
BooleanQuery expected = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), Occur.SHOULD)
|
||||
|
@ -1313,7 +1313,7 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase<QueryStr
|
|||
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
|
||||
Query query = new QueryStringQueryBuilder("the* quick fox")
|
||||
.field(STRING_FIELD_NAME)
|
||||
.analyzer("english")
|
||||
.analyzer("stop")
|
||||
.toQuery(createShardContext());
|
||||
BooleanQuery expected = new BooleanQuery.Builder()
|
||||
.add(new PrefixQuery(new Term(STRING_FIELD_NAME, "the")), Occur.SHOULD)
|
||||
|
|
|
@ -629,7 +629,7 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase<SimpleQ
|
|||
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
|
||||
Query query = new SimpleQueryStringBuilder("the quick fox")
|
||||
.field(STRING_FIELD_NAME)
|
||||
.analyzer("english")
|
||||
.analyzer("stop")
|
||||
.toQuery(createShardContext());
|
||||
BooleanQuery expected = new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term(STRING_FIELD_NAME, "quick")), BooleanClause.Occur.SHOULD)
|
||||
|
@ -642,7 +642,7 @@ public class SimpleQueryStringBuilderTests extends AbstractQueryTestCase<SimpleQ
|
|||
assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0);
|
||||
Query query = new SimpleQueryStringBuilder("the* quick fox")
|
||||
.field(STRING_FIELD_NAME)
|
||||
.analyzer("english")
|
||||
.analyzer("stop")
|
||||
.toQuery(createShardContext());
|
||||
BooleanQuery expected = new BooleanQuery.Builder()
|
||||
.add(new PrefixQuery(new Term(STRING_FIELD_NAME, "the")), BooleanClause.Occur.SHOULD)
|
||||
|
|
|
@ -22,6 +22,9 @@ import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.search.join.ScoreMode;
|
||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||
import org.elasticsearch.action.search.SearchRequestBuilder;
|
||||
|
@ -36,6 +39,7 @@ import org.elasticsearch.common.xcontent.XContentFactory;
|
|||
import org.elasticsearch.common.xcontent.XContentType;
|
||||
import org.elasticsearch.index.analysis.AbstractIndexAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.AnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
||||
import org.elasticsearch.index.query.AbstractQueryBuilder;
|
||||
import org.elasticsearch.index.query.IdsQueryBuilder;
|
||||
import org.elasticsearch.index.query.MatchQueryBuilder;
|
||||
|
@ -66,9 +70,11 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import static java.util.Collections.singletonList;
|
||||
import static java.util.Collections.singletonMap;
|
||||
import static org.elasticsearch.client.Requests.searchRequest;
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
|
@ -113,7 +119,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
|||
|
||||
@Override
|
||||
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
||||
return Arrays.asList(InternalSettingsPlugin.class, MockKeywordPlugin.class, MockWhitespacePlugin.class);
|
||||
return Arrays.asList(InternalSettingsPlugin.class, MockKeywordPlugin.class, MockAnalysisPlugin.class);
|
||||
}
|
||||
|
||||
public void testHighlightingWithStoredKeyword() throws IOException {
|
||||
|
@ -765,14 +771,19 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
|||
}
|
||||
|
||||
private void checkMatchedFieldsCase(boolean requireFieldMatch) throws Exception {
|
||||
Settings.Builder settings = Settings.builder();
|
||||
settings.put(indexSettings());
|
||||
settings.put("index.analysis.analyzer.mock_english.tokenizer", "standard");
|
||||
settings.put("index.analysis.analyzer.mock_english.filter", "mock_snowball");
|
||||
assertAcked(prepareCreate("test")
|
||||
.setSettings(settings)
|
||||
.addMapping("type1", XContentFactory.jsonBuilder().startObject().startObject("type1")
|
||||
.startObject("properties")
|
||||
.startObject("foo")
|
||||
.field("type", "text")
|
||||
.field("term_vector", "with_positions_offsets")
|
||||
.field("store", true)
|
||||
.field("analyzer", "english")
|
||||
.field("analyzer", "mock_english")
|
||||
.startObject("fields")
|
||||
.startObject("plain")
|
||||
.field("type", "text")
|
||||
|
@ -785,7 +796,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
|||
.field("type", "text")
|
||||
.field("term_vector", "with_positions_offsets")
|
||||
.field("store", true)
|
||||
.field("analyzer", "english")
|
||||
.field("analyzer", "mock_english")
|
||||
.startObject("fields")
|
||||
.startObject("plain")
|
||||
.field("type", "text")
|
||||
|
@ -2819,7 +2830,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
|||
assertAcked(prepareCreate("test").setSettings(builder.build())
|
||||
.addMapping("type1", "field1",
|
||||
"type=text,term_vector=with_positions_offsets,search_analyzer=synonym," +
|
||||
"analyzer=english,index_options=offsets"));
|
||||
"analyzer=standard,index_options=offsets"));
|
||||
ensureGreen();
|
||||
|
||||
client().prepareIndex("test", "type1", "0").setSource(
|
||||
|
@ -2983,7 +2994,39 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public static class MockWhitespacePlugin extends Plugin implements AnalysisPlugin {
|
||||
public static class MockAnalysisPlugin extends Plugin implements AnalysisPlugin {
|
||||
|
||||
public final class MockSnowBall extends TokenFilter {
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
/** Sole constructor. */
|
||||
MockSnowBall(TokenStream in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
final char[] buffer = termAtt.buffer();
|
||||
final int length = termAtt.length();
|
||||
if (buffer[length - 1] == 's') {
|
||||
termAtt.setLength(length - 1);
|
||||
}
|
||||
if (length > 3) {
|
||||
if (buffer[length - 1] == 'g' && buffer[length - 2] == 'n' && buffer[length - 3] == 'i') {
|
||||
termAtt.setLength(length- 3);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
|
||||
return singletonList(PreConfiguredTokenFilter.singleton("mock_snowball", false, MockSnowBall::new));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
|
||||
|
|
|
@ -19,6 +19,12 @@
|
|||
|
||||
package org.elasticsearch.search.query;
|
||||
|
||||
import org.apache.lucene.analysis.CharacterUtils;
|
||||
import org.apache.lucene.analysis.MockLowerCaseFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.elasticsearch.ExceptionsHelper;
|
||||
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
|
||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||
|
@ -28,12 +34,19 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentType;
|
||||
import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
|
||||
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
|
||||
import org.elasticsearch.index.analysis.TokenizerFactory;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.query.BoolQueryBuilder;
|
||||
import org.elasticsearch.index.query.Operator;
|
||||
import org.elasticsearch.index.query.QueryBuilders;
|
||||
import org.elasticsearch.index.query.SimpleQueryStringBuilder;
|
||||
import org.elasticsearch.index.query.SimpleQueryStringFlag;
|
||||
import org.elasticsearch.indices.analysis.AnalysisModule;
|
||||
import org.elasticsearch.plugins.AnalysisPlugin;
|
||||
import org.elasticsearch.plugins.Plugin;
|
||||
import org.elasticsearch.search.SearchHit;
|
||||
import org.elasticsearch.search.SearchHits;
|
||||
|
@ -42,14 +55,19 @@ import org.elasticsearch.test.ESIntegTestCase;
|
|||
import org.elasticsearch.test.InternalSettingsPlugin;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.function.Function;
|
||||
|
||||
import static java.util.Collections.singletonList;
|
||||
import static java.util.Collections.singletonMap;
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
|
||||
import static org.elasticsearch.index.query.QueryBuilders.simpleQueryStringQuery;
|
||||
|
@ -72,11 +90,15 @@ import static org.hamcrest.Matchers.equalTo;
|
|||
public class SimpleQueryStringIT extends ESIntegTestCase {
|
||||
@Override
|
||||
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
||||
return Arrays.asList(InternalSettingsPlugin.class); // uses index.version.created
|
||||
return Arrays.asList(MockAnalysisPlugin.class, InternalSettingsPlugin.class); // uses index.version.created
|
||||
}
|
||||
|
||||
public void testSimpleQueryString() throws ExecutionException, InterruptedException {
|
||||
createIndex("test");
|
||||
Settings.Builder settings = Settings.builder();
|
||||
settings.put(indexSettings());
|
||||
settings.put("index.analysis.analyzer.mock_snowball.tokenizer", "standard");
|
||||
settings.put("index.analysis.analyzer.mock_snowball.filter", "mock_snowball");
|
||||
createIndex("test", settings.build());
|
||||
indexRandom(true, false,
|
||||
client().prepareIndex("test", "type1", "1").setSource("body", "foo"),
|
||||
client().prepareIndex("test", "type1", "2").setSource("body", "bar"),
|
||||
|
@ -108,7 +130,7 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
|
|||
assertSearchHits(searchResponse, "4", "5");
|
||||
|
||||
searchResponse = client().prepareSearch().setQuery(
|
||||
simpleQueryStringQuery("eggplants").analyzer("snowball")).get();
|
||||
simpleQueryStringQuery("eggplants").analyzer("mock_snowball")).get();
|
||||
assertHitCount(searchResponse, 1L);
|
||||
assertFirstHit(searchResponse, hasId("4"));
|
||||
|
||||
|
@ -312,7 +334,7 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
|
|||
.startObject("properties")
|
||||
.startObject("location")
|
||||
.field("type", "text")
|
||||
.field("analyzer", "german")
|
||||
.field("analyzer", "standard")
|
||||
.endObject()
|
||||
.endObject()
|
||||
.endObject()
|
||||
|
@ -583,4 +605,33 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
|
|||
}
|
||||
assertThat(hitIds, containsInAnyOrder(ids));
|
||||
}
|
||||
|
||||
public static class MockAnalysisPlugin extends Plugin implements AnalysisPlugin {
|
||||
|
||||
public final class MockSnowBall extends TokenFilter {
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
/** Sole constructor. */
|
||||
MockSnowBall(TokenStream in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
char[] buffer = termAtt.buffer();
|
||||
if (buffer[termAtt.length() - 1] == 's') {
|
||||
termAtt.setLength(termAtt.length() - 1);
|
||||
}
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
|
||||
return singletonList(PreConfiguredTokenFilter.singleton("mock_snowball", false, MockSnowBall::new));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -87,7 +87,7 @@ public class SimpleValidateQueryIT extends ESIntegTestCase {
|
|||
.setSource(XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("properties")
|
||||
.startObject("foo").field("type", "text").endObject()
|
||||
.startObject("bar").field("type", "integer").endObject()
|
||||
.startObject("baz").field("type", "text").field("analyzer", "snowball").endObject()
|
||||
.startObject("baz").field("type", "text").field("analyzer", "standard").endObject()
|
||||
.startObject("pin").startObject("properties").startObject("location").field("type", "geo_point").endObject().endObject().endObject()
|
||||
.endObject().endObject().endObject())
|
||||
.execute().actionGet();
|
||||
|
|
Loading…
Reference in New Issue