Moved more token filters to analysis-common module.
The following token filters were moved: `edge_ngram`, `ngram`, `uppercase`, `lowercase`, `length`, `flatten_graph` and `unique`. Relates to #23658
This commit is contained in:
parent
2a78b0a19f
commit
428e70758a
|
@ -571,7 +571,6 @@
|
||||||
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]IndexingSlowLogTests.java" checks="LineLength" />
|
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]IndexingSlowLogTests.java" checks="LineLength" />
|
||||||
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]MergePolicySettingsTests.java" checks="LineLength" />
|
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]MergePolicySettingsTests.java" checks="LineLength" />
|
||||||
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]SearchSlowLogTests.java" checks="LineLength" />
|
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]SearchSlowLogTests.java" checks="LineLength" />
|
||||||
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]NGramTokenizerFactoryTests.java" checks="LineLength" />
|
|
||||||
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PatternCaptureTokenFilterTests.java" checks="LineLength" />
|
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PatternCaptureTokenFilterTests.java" checks="LineLength" />
|
||||||
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PreBuiltAnalyzerTests.java" checks="LineLength" />
|
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PreBuiltAnalyzerTests.java" checks="LineLength" />
|
||||||
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]commongrams[/\\]CommonGramsTokenFilterFactoryTests.java" checks="LineLength" />
|
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]commongrams[/\\]CommonGramsTokenFilterFactoryTests.java" checks="LineLength" />
|
||||||
|
|
|
@ -54,14 +54,12 @@ import org.elasticsearch.index.analysis.DecimalDigitFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
|
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.DutchAnalyzerProvider;
|
import org.elasticsearch.index.analysis.DutchAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.DutchStemTokenFilterFactory;
|
import org.elasticsearch.index.analysis.DutchStemTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
|
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.ElisionTokenFilterFactory;
|
import org.elasticsearch.index.analysis.ElisionTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
|
import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider;
|
import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.FingerprintTokenFilterFactory;
|
import org.elasticsearch.index.analysis.FingerprintTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
|
import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
|
import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.FrenchStemTokenFilterFactory;
|
import org.elasticsearch.index.analysis.FrenchStemTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
|
import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
|
||||||
|
@ -83,14 +81,11 @@ import org.elasticsearch.index.analysis.KeepWordFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
|
import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
|
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
|
import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.LengthTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
|
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
|
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider;
|
import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.LowerCaseTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
|
import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
|
import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.NGramTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
|
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider;
|
import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
|
import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
|
||||||
|
@ -133,8 +128,6 @@ import org.elasticsearch.index.analysis.TokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
|
import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.TurkishAnalyzerProvider;
|
import org.elasticsearch.index.analysis.TurkishAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
|
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.UniqueTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.WhitespaceAnalyzerProvider;
|
import org.elasticsearch.index.analysis.WhitespaceAnalyzerProvider;
|
||||||
import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
|
import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
|
import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
|
||||||
|
@ -209,25 +202,16 @@ public final class AnalysisModule {
|
||||||
NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = new NamedRegistry<>("token_filter");
|
NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = new NamedRegistry<>("token_filter");
|
||||||
tokenFilters.register("stop", StopTokenFilterFactory::new);
|
tokenFilters.register("stop", StopTokenFilterFactory::new);
|
||||||
tokenFilters.register("reverse", ReverseTokenFilterFactory::new);
|
tokenFilters.register("reverse", ReverseTokenFilterFactory::new);
|
||||||
tokenFilters.register("length", LengthTokenFilterFactory::new);
|
|
||||||
tokenFilters.register("lowercase", LowerCaseTokenFilterFactory::new);
|
|
||||||
tokenFilters.register("uppercase", UpperCaseTokenFilterFactory::new);
|
|
||||||
tokenFilters.register("kstem", KStemTokenFilterFactory::new);
|
tokenFilters.register("kstem", KStemTokenFilterFactory::new);
|
||||||
tokenFilters.register("standard", StandardTokenFilterFactory::new);
|
tokenFilters.register("standard", StandardTokenFilterFactory::new);
|
||||||
tokenFilters.register("nGram", NGramTokenFilterFactory::new);
|
|
||||||
tokenFilters.register("ngram", NGramTokenFilterFactory::new);
|
|
||||||
tokenFilters.register("edgeNGram", EdgeNGramTokenFilterFactory::new);
|
|
||||||
tokenFilters.register("edge_ngram", EdgeNGramTokenFilterFactory::new);
|
|
||||||
tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
|
tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
|
||||||
tokenFilters.register("min_hash", MinHashTokenFilterFactory::new);
|
tokenFilters.register("min_hash", MinHashTokenFilterFactory::new);
|
||||||
tokenFilters.register("unique", UniqueTokenFilterFactory::new);
|
|
||||||
tokenFilters.register("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new));
|
tokenFilters.register("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new));
|
||||||
tokenFilters.register("limit", LimitTokenCountFilterFactory::new);
|
tokenFilters.register("limit", LimitTokenCountFilterFactory::new);
|
||||||
tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
|
tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
|
||||||
tokenFilters.register("stemmer", StemmerTokenFilterFactory::new);
|
tokenFilters.register("stemmer", StemmerTokenFilterFactory::new);
|
||||||
tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
|
tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
|
||||||
tokenFilters.register("elision", ElisionTokenFilterFactory::new);
|
tokenFilters.register("elision", ElisionTokenFilterFactory::new);
|
||||||
tokenFilters.register("flatten_graph", FlattenGraphTokenFilterFactory::new);
|
|
||||||
tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new));
|
tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new));
|
||||||
tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new));
|
tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new));
|
||||||
tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
|
tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
|
||||||
|
|
|
@ -19,7 +19,6 @@
|
||||||
package org.elasticsearch.search.fetch.subphase.highlight;
|
package org.elasticsearch.search.fetch.subphase.highlight;
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||||
|
|
||||||
import org.apache.lucene.search.join.ScoreMode;
|
import org.apache.lucene.search.join.ScoreMode;
|
||||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||||
import org.elasticsearch.action.search.SearchRequestBuilder;
|
import org.elasticsearch.action.search.SearchRequestBuilder;
|
||||||
|
@ -214,54 +213,6 @@ public class HighlighterSearchIT extends ESIntegTestCase {
|
||||||
assertHighlight(search, 0, "name", 0, startsWith("<em>abc</em> <em>abc</em> <em>abc</em> <em>abc</em>"));
|
assertHighlight(search, 0, "name", 0, startsWith("<em>abc</em> <em>abc</em> <em>abc</em> <em>abc</em>"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testNgramHighlighting() throws IOException {
|
|
||||||
assertAcked(prepareCreate("test")
|
|
||||||
.addMapping("test",
|
|
||||||
"name", "type=text,analyzer=name_index_analyzer,search_analyzer=name_search_analyzer,"
|
|
||||||
+ "term_vector=with_positions_offsets",
|
|
||||||
"name2", "type=text,analyzer=name2_index_analyzer,search_analyzer=name_search_analyzer,"
|
|
||||||
+ "term_vector=with_positions_offsets")
|
|
||||||
.setSettings(Settings.builder()
|
|
||||||
.put(indexSettings())
|
|
||||||
.put("analysis.filter.my_ngram.max_gram", 20)
|
|
||||||
.put("analysis.filter.my_ngram.min_gram", 1)
|
|
||||||
.put("analysis.filter.my_ngram.type", "ngram")
|
|
||||||
.put("analysis.tokenizer.my_ngramt.max_gram", 20)
|
|
||||||
.put("analysis.tokenizer.my_ngramt.min_gram", 1)
|
|
||||||
.put("analysis.tokenizer.my_ngramt.token_chars", "letter,digit")
|
|
||||||
.put("analysis.tokenizer.my_ngramt.type", "ngram")
|
|
||||||
.put("analysis.analyzer.name_index_analyzer.tokenizer", "my_ngramt")
|
|
||||||
.put("analysis.analyzer.name2_index_analyzer.tokenizer", "whitespace")
|
|
||||||
.put("analysis.analyzer.name2_index_analyzer.filter", "my_ngram")
|
|
||||||
.put("analysis.analyzer.name_search_analyzer.tokenizer", "whitespace")));
|
|
||||||
client().prepareIndex("test", "test", "1")
|
|
||||||
.setSource("name", "logicacmg ehemals avinci - the know how company",
|
|
||||||
"name2", "logicacmg ehemals avinci - the know how company").get();
|
|
||||||
refresh();
|
|
||||||
ensureGreen();
|
|
||||||
SearchResponse search = client().prepareSearch().setQuery(matchQuery("name", "logica m"))
|
|
||||||
.highlighter(new HighlightBuilder().field("name")).get();
|
|
||||||
assertHighlight(search, 0, "name", 0,
|
|
||||||
equalTo("<em>logica</em>c<em>m</em>g ehe<em>m</em>als avinci - the know how co<em>m</em>pany"));
|
|
||||||
|
|
||||||
search = client().prepareSearch().setQuery(matchQuery("name", "logica ma")).highlighter(new HighlightBuilder().field("name")).get();
|
|
||||||
assertHighlight(search, 0, "name", 0, equalTo("<em>logica</em>cmg ehe<em>ma</em>ls avinci - the know how company"));
|
|
||||||
|
|
||||||
search = client().prepareSearch().setQuery(matchQuery("name", "logica")).highlighter(new HighlightBuilder().field("name")).get();
|
|
||||||
assertHighlight(search, 0, "name", 0, equalTo("<em>logica</em>cmg ehemals avinci - the know how company"));
|
|
||||||
|
|
||||||
search = client().prepareSearch().setQuery(matchQuery("name2", "logica m")).highlighter(new HighlightBuilder().field("name2"))
|
|
||||||
.get();
|
|
||||||
assertHighlight(search, 0, "name2", 0, equalTo("<em>logicacmg</em> <em>ehemals</em> avinci - the know how <em>company</em>"));
|
|
||||||
|
|
||||||
search = client().prepareSearch().setQuery(matchQuery("name2", "logica ma")).highlighter(new HighlightBuilder().field("name2"))
|
|
||||||
.get();
|
|
||||||
assertHighlight(search, 0, "name2", 0, equalTo("<em>logicacmg</em> <em>ehemals</em> avinci - the know how company"));
|
|
||||||
|
|
||||||
search = client().prepareSearch().setQuery(matchQuery("name2", "logica")).highlighter(new HighlightBuilder().field("name2")).get();
|
|
||||||
assertHighlight(search, 0, "name2", 0, equalTo("<em>logicacmg</em> ehemals avinci - the know how company"));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testEnsureNoNegativeOffsets() throws Exception {
|
public void testEnsureNoNegativeOffsets() throws Exception {
|
||||||
assertAcked(prepareCreate("test")
|
assertAcked(prepareCreate("test")
|
||||||
.addMapping("type1",
|
.addMapping("type1",
|
||||||
|
|
|
@ -19,16 +19,6 @@
|
||||||
|
|
||||||
package org.elasticsearch.search.query;
|
package org.elasticsearch.search.query;
|
||||||
|
|
||||||
import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
|
|
||||||
import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath;
|
|
||||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
|
||||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
|
|
||||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoSearchHits;
|
|
||||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
|
|
||||||
import static org.hamcrest.Matchers.containsInAnyOrder;
|
|
||||||
import static org.hamcrest.Matchers.containsString;
|
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.elasticsearch.ExceptionsHelper;
|
import org.elasticsearch.ExceptionsHelper;
|
||||||
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
|
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
|
||||||
|
@ -56,6 +46,16 @@ import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
|
import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
|
||||||
|
import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath;
|
||||||
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
||||||
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
|
||||||
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoSearchHits;
|
||||||
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
|
||||||
|
import static org.hamcrest.Matchers.containsInAnyOrder;
|
||||||
|
import static org.hamcrest.Matchers.containsString;
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
|
||||||
public class QueryStringIT extends ESIntegTestCase {
|
public class QueryStringIT extends ESIntegTestCase {
|
||||||
@Override
|
@Override
|
||||||
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
||||||
|
@ -91,10 +91,6 @@ public class QueryStringIT extends ESIntegTestCase {
|
||||||
resp = client().prepareSearch("test").setQuery(queryStringQuery("Bar")).get();
|
resp = client().prepareSearch("test").setQuery(queryStringQuery("Bar")).get();
|
||||||
assertHitCount(resp, 3L);
|
assertHitCount(resp, 3L);
|
||||||
assertHits(resp.getHits(), "1", "2", "3");
|
assertHits(resp.getHits(), "1", "2", "3");
|
||||||
|
|
||||||
resp = client().prepareSearch("test").setQuery(queryStringQuery("foa")).get();
|
|
||||||
assertHitCount(resp, 1L);
|
|
||||||
assertHits(resp.getHits(), "3");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testWithDate() throws Exception {
|
public void testWithDate() throws Exception {
|
||||||
|
@ -161,8 +157,6 @@ public class QueryStringIT extends ESIntegTestCase {
|
||||||
assertHits(resp.getHits(), "1");
|
assertHits(resp.getHits(), "1");
|
||||||
resp = client().prepareSearch("test").setQuery(queryStringQuery("Baz")).get();
|
resp = client().prepareSearch("test").setQuery(queryStringQuery("Baz")).get();
|
||||||
assertHits(resp.getHits(), "1");
|
assertHits(resp.getHits(), "1");
|
||||||
resp = client().prepareSearch("test").setQuery(queryStringQuery("sbaz")).get();
|
|
||||||
assertHits(resp.getHits(), "1");
|
|
||||||
resp = client().prepareSearch("test").setQuery(queryStringQuery("19")).get();
|
resp = client().prepareSearch("test").setQuery(queryStringQuery("19")).get();
|
||||||
assertHits(resp.getHits(), "1");
|
assertHits(resp.getHits(), "1");
|
||||||
// nested doesn't match because it's hidden
|
// nested doesn't match because it's hidden
|
||||||
|
@ -223,11 +217,11 @@ public class QueryStringIT extends ESIntegTestCase {
|
||||||
indexRandom(true, false, reqs);
|
indexRandom(true, false, reqs);
|
||||||
|
|
||||||
SearchResponse resp = client().prepareSearch("test2").setQuery(
|
SearchResponse resp = client().prepareSearch("test2").setQuery(
|
||||||
queryStringQuery("foo eggplent").defaultOperator(Operator.AND)).get();
|
queryStringQuery("foo eggplant").defaultOperator(Operator.AND)).get();
|
||||||
assertHitCount(resp, 0L);
|
assertHitCount(resp, 0L);
|
||||||
|
|
||||||
resp = client().prepareSearch("test2").setQuery(
|
resp = client().prepareSearch("test2").setQuery(
|
||||||
queryStringQuery("foo eggplent").defaultOperator(Operator.AND).useAllFields(true)).get();
|
queryStringQuery("foo eggplant").defaultOperator(Operator.AND).useAllFields(true)).get();
|
||||||
assertHits(resp.getHits(), "1");
|
assertHits(resp.getHits(), "1");
|
||||||
assertHitCount(resp, 1L);
|
assertHitCount(resp, 1L);
|
||||||
|
|
||||||
|
|
|
@ -398,10 +398,6 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
|
||||||
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("Bar")).get();
|
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("Bar")).get();
|
||||||
assertHitCount(resp, 3L);
|
assertHitCount(resp, 3L);
|
||||||
assertHits(resp.getHits(), "1", "2", "3");
|
assertHits(resp.getHits(), "1", "2", "3");
|
||||||
|
|
||||||
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("foa")).get();
|
|
||||||
assertHitCount(resp, 1L);
|
|
||||||
assertHits(resp.getHits(), "3");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testWithDate() throws Exception {
|
public void testWithDate() throws Exception {
|
||||||
|
@ -480,8 +476,6 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
|
||||||
assertHits(resp.getHits(), "1");
|
assertHits(resp.getHits(), "1");
|
||||||
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("Baz")).get();
|
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("Baz")).get();
|
||||||
assertHits(resp.getHits(), "1");
|
assertHits(resp.getHits(), "1");
|
||||||
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("sbaz")).get();
|
|
||||||
assertHits(resp.getHits(), "1");
|
|
||||||
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("19")).get();
|
resp = client().prepareSearch("test").setQuery(simpleQueryStringQuery("19")).get();
|
||||||
assertHits(resp.getHits(), "1");
|
assertHits(resp.getHits(), "1");
|
||||||
// nested doesn't match because it's hidden
|
// nested doesn't match because it's hidden
|
||||||
|
@ -547,11 +541,11 @@ public class SimpleQueryStringIT extends ESIntegTestCase {
|
||||||
indexRandom(true, false, reqs);
|
indexRandom(true, false, reqs);
|
||||||
|
|
||||||
SearchResponse resp = client().prepareSearch("test").setQuery(
|
SearchResponse resp = client().prepareSearch("test").setQuery(
|
||||||
simpleQueryStringQuery("foo eggplent").defaultOperator(Operator.AND)).get();
|
simpleQueryStringQuery("foo eggplant").defaultOperator(Operator.AND)).get();
|
||||||
assertHitCount(resp, 0L);
|
assertHitCount(resp, 0L);
|
||||||
|
|
||||||
resp = client().prepareSearch("test").setQuery(
|
resp = client().prepareSearch("test").setQuery(
|
||||||
simpleQueryStringQuery("foo eggplent").defaultOperator(Operator.AND).useAllFields(true)).get();
|
simpleQueryStringQuery("foo eggplant").defaultOperator(Operator.AND).useAllFields(true)).get();
|
||||||
assertHits(resp.getHits(), "1");
|
assertHits(resp.getHits(), "1");
|
||||||
assertHitCount(resp, 1L);
|
assertHitCount(resp, 1L);
|
||||||
|
|
||||||
|
|
|
@ -6,22 +6,7 @@
|
||||||
"version": {
|
"version": {
|
||||||
"created": "5000099"
|
"created": "5000099"
|
||||||
},
|
},
|
||||||
"analysis": {
|
"query.default_field": "f1"
|
||||||
"analyzer": {
|
|
||||||
"my_ngrams": {
|
|
||||||
"type": "custom",
|
|
||||||
"tokenizer": "standard",
|
|
||||||
"filter": ["my_ngrams"]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"filter": {
|
|
||||||
"my_ngrams": {
|
|
||||||
"type": "ngram",
|
|
||||||
"min_gram": 2,
|
|
||||||
"max_gram": 2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"mappings": {
|
"mappings": {
|
||||||
|
@ -31,7 +16,7 @@
|
||||||
},
|
},
|
||||||
"properties": {
|
"properties": {
|
||||||
"f1": {"type": "text"},
|
"f1": {"type": "text"},
|
||||||
"f2": {"type": "text", "analyzer": "my_ngrams"}
|
"f2": {"type": "text"}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,23 +2,7 @@
|
||||||
"settings": {
|
"settings": {
|
||||||
"index": {
|
"index": {
|
||||||
"number_of_shards": 1,
|
"number_of_shards": 1,
|
||||||
"number_of_replicas": 0,
|
"number_of_replicas": 0
|
||||||
"analysis": {
|
|
||||||
"analyzer": {
|
|
||||||
"my_ngrams": {
|
|
||||||
"type": "custom",
|
|
||||||
"tokenizer": "standard",
|
|
||||||
"filter": ["my_ngrams"]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"filter": {
|
|
||||||
"my_ngrams": {
|
|
||||||
"type": "ngram",
|
|
||||||
"min_gram": 2,
|
|
||||||
"max_gram": 2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"mappings": {
|
"mappings": {
|
||||||
|
@ -26,7 +10,7 @@
|
||||||
"properties": {
|
"properties": {
|
||||||
"f1": {"type": "text"},
|
"f1": {"type": "text"},
|
||||||
"f2": {"type": "keyword"},
|
"f2": {"type": "keyword"},
|
||||||
"f3": {"type": "text", "analyzer": "my_ngrams"},
|
"f3": {"type": "text"},
|
||||||
"f4": {
|
"f4": {
|
||||||
"type": "text",
|
"type": "text",
|
||||||
"index_options": "docs"
|
"index_options": "docs"
|
||||||
|
|
|
@ -52,7 +52,6 @@ import org.apache.lucene.analysis.miscellaneous.ScandinavianFoldingFilter;
|
||||||
import org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilter;
|
import org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilter;
|
||||||
import org.apache.lucene.analysis.miscellaneous.TrimFilter;
|
import org.apache.lucene.analysis.miscellaneous.TrimFilter;
|
||||||
import org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter;
|
import org.apache.lucene.analysis.miscellaneous.TruncateTokenFilter;
|
||||||
import org.apache.lucene.analysis.miscellaneous.UniqueTokenFilter;
|
|
||||||
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
|
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
|
||||||
import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
|
import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
|
||||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
||||||
|
@ -98,6 +97,15 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
|
||||||
filters.put("trim", TrimTokenFilterFactory::new);
|
filters.put("trim", TrimTokenFilterFactory::new);
|
||||||
filters.put("word_delimiter", WordDelimiterTokenFilterFactory::new);
|
filters.put("word_delimiter", WordDelimiterTokenFilterFactory::new);
|
||||||
filters.put("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new);
|
filters.put("word_delimiter_graph", WordDelimiterGraphTokenFilterFactory::new);
|
||||||
|
filters.put("unique", UniqueTokenFilterFactory::new);
|
||||||
|
filters.put("flatten_graph", FlattenGraphTokenFilterFactory::new);
|
||||||
|
filters.put("length", LengthTokenFilterFactory::new);
|
||||||
|
filters.put("lowercase", LowerCaseTokenFilterFactory::new);
|
||||||
|
filters.put("uppercase", UpperCaseTokenFilterFactory::new);
|
||||||
|
filters.put("nGram", NGramTokenFilterFactory::new);
|
||||||
|
filters.put("ngram", NGramTokenFilterFactory::new);
|
||||||
|
filters.put("edgeNGram", EdgeNGramTokenFilterFactory::new);
|
||||||
|
filters.put("edge_ngram", EdgeNGramTokenFilterFactory::new);
|
||||||
return filters;
|
return filters;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -172,7 +180,7 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
|
||||||
filters.add(PreConfiguredTokenFilter.singleton("nGram", false, NGramTokenFilter::new));
|
filters.add(PreConfiguredTokenFilter.singleton("nGram", false, NGramTokenFilter::new));
|
||||||
filters.add(PreConfiguredTokenFilter.singleton("persian_normalization", true, PersianNormalizationFilter::new));
|
filters.add(PreConfiguredTokenFilter.singleton("persian_normalization", true, PersianNormalizationFilter::new));
|
||||||
filters.add(PreConfiguredTokenFilter.singleton("porter_stem", false, PorterStemFilter::new));
|
filters.add(PreConfiguredTokenFilter.singleton("porter_stem", false, PorterStemFilter::new));
|
||||||
filters.add(PreConfiguredTokenFilter.singleton("reverse", false, input -> new ReverseStringFilter(input)));
|
filters.add(PreConfiguredTokenFilter.singleton("reverse", false, ReverseStringFilter::new));
|
||||||
filters.add(PreConfiguredTokenFilter.singleton("russian_stem", false, input -> new SnowballFilter(input, "Russian")));
|
filters.add(PreConfiguredTokenFilter.singleton("russian_stem", false, input -> new SnowballFilter(input, "Russian")));
|
||||||
filters.add(PreConfiguredTokenFilter.singleton("scandinavian_folding", true, ScandinavianFoldingFilter::new));
|
filters.add(PreConfiguredTokenFilter.singleton("scandinavian_folding", true, ScandinavianFoldingFilter::new));
|
||||||
filters.add(PreConfiguredTokenFilter.singleton("scandinavian_normalization", true, ScandinavianNormalizationFilter::new));
|
filters.add(PreConfiguredTokenFilter.singleton("scandinavian_normalization", true, ScandinavianNormalizationFilter::new));
|
||||||
|
@ -185,7 +193,7 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
|
||||||
filters.add(PreConfiguredTokenFilter.singleton("trim", false, TrimFilter::new));
|
filters.add(PreConfiguredTokenFilter.singleton("trim", false, TrimFilter::new));
|
||||||
filters.add(PreConfiguredTokenFilter.singleton("truncate", false, input -> new TruncateTokenFilter(input, 10)));
|
filters.add(PreConfiguredTokenFilter.singleton("truncate", false, input -> new TruncateTokenFilter(input, 10)));
|
||||||
filters.add(PreConfiguredTokenFilter.singleton("type_as_payload", false, TypeAsPayloadTokenFilter::new));
|
filters.add(PreConfiguredTokenFilter.singleton("type_as_payload", false, TypeAsPayloadTokenFilter::new));
|
||||||
filters.add(PreConfiguredTokenFilter.singleton("unique", false, input -> new UniqueTokenFilter(input)));
|
filters.add(PreConfiguredTokenFilter.singleton("unique", false, UniqueTokenFilter::new));
|
||||||
filters.add(PreConfiguredTokenFilter.singleton("uppercase", true, UpperCaseFilter::new));
|
filters.add(PreConfiguredTokenFilter.singleton("uppercase", true, UpperCaseFilter::new));
|
||||||
filters.add(PreConfiguredTokenFilter.singleton("word_delimiter", false, input ->
|
filters.add(PreConfiguredTokenFilter.singleton("word_delimiter", false, input ->
|
||||||
new WordDelimiterFilter(input,
|
new WordDelimiterFilter(input,
|
||||||
|
|
|
@ -17,7 +17,7 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
||||||
|
@ -26,6 +26,7 @@ import org.apache.lucene.analysis.reverse.ReverseStringFilter;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||||
|
|
||||||
|
|
||||||
public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
@ -38,7 +39,7 @@ public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
public static final int SIDE_BACK = 2;
|
public static final int SIDE_BACK = 2;
|
||||||
private final int side;
|
private final int side;
|
||||||
|
|
||||||
public EdgeNGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
EdgeNGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, name, settings);
|
||||||
this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
|
this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
|
||||||
this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
|
this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
|
|
@ -17,17 +17,18 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.FlattenGraphFilter;
|
import org.apache.lucene.analysis.core.FlattenGraphFilter;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||||
|
|
||||||
public class FlattenGraphTokenFilterFactory extends AbstractTokenFilterFactory {
|
public class FlattenGraphTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
public FlattenGraphTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
FlattenGraphTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, name, settings);
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,13 +17,14 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
|
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||||
|
|
||||||
public class LengthTokenFilterFactory extends AbstractTokenFilterFactory {
|
public class LengthTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
|
@ -33,7 +34,7 @@ public class LengthTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
// ancient unsupported option
|
// ancient unsupported option
|
||||||
private static final String ENABLE_POS_INC_KEY = "enable_position_increments";
|
private static final String ENABLE_POS_INC_KEY = "enable_position_increments";
|
||||||
|
|
||||||
public LengthTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
LengthTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, name, settings);
|
||||||
min = settings.getAsInt("min", 0);
|
min = settings.getAsInt("min", 0);
|
||||||
max = settings.getAsInt("max", Integer.MAX_VALUE);
|
max = settings.getAsInt("max", Integer.MAX_VALUE);
|
|
@ -17,7 +17,7 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -27,6 +27,8 @@ import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||||
|
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory for {@link LowerCaseFilter} and some language-specific variants
|
* Factory for {@link LowerCaseFilter} and some language-specific variants
|
||||||
|
@ -41,7 +43,7 @@ public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory impl
|
||||||
|
|
||||||
private final String lang;
|
private final String lang;
|
||||||
|
|
||||||
public LowerCaseTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
LowerCaseTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, name, settings);
|
||||||
this.lang = settings.get("language", null);
|
this.lang = settings.get("language", null);
|
||||||
}
|
}
|
|
@ -17,13 +17,14 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
|
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||||
|
|
||||||
|
|
||||||
public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
@ -33,7 +34,7 @@ public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
private final int maxGram;
|
private final int maxGram;
|
||||||
|
|
||||||
|
|
||||||
public NGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
NGramTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, name, settings);
|
||||||
this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
|
this.minGram = settings.getAsInt("min_gram", NGramTokenFilter.DEFAULT_MIN_NGRAM_SIZE);
|
||||||
this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
|
this.maxGram = settings.getAsInt("max_gram", NGramTokenFilter.DEFAULT_MAX_NGRAM_SIZE);
|
|
@ -17,7 +17,7 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.lucene.analysis.miscellaneous;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
|
@ -31,7 +31,7 @@ import java.io.IOException;
|
||||||
* A token filter that generates unique tokens. Can remove unique tokens only on the same
|
* A token filter that generates unique tokens. Can remove unique tokens only on the same
|
||||||
* position increments as well.
|
* position increments as well.
|
||||||
*/
|
*/
|
||||||
public class UniqueTokenFilter extends TokenFilter {
|
class UniqueTokenFilter extends TokenFilter {
|
||||||
|
|
||||||
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||||
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
|
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
|
||||||
|
@ -39,11 +39,11 @@ public class UniqueTokenFilter extends TokenFilter {
|
||||||
private final CharArraySet previous = new CharArraySet(8, false);
|
private final CharArraySet previous = new CharArraySet(8, false);
|
||||||
private final boolean onlyOnSamePosition;
|
private final boolean onlyOnSamePosition;
|
||||||
|
|
||||||
public UniqueTokenFilter(TokenStream in) {
|
UniqueTokenFilter(TokenStream in) {
|
||||||
this(in, false);
|
this(in, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
public UniqueTokenFilter(TokenStream in, boolean onlyOnSamePosition) {
|
UniqueTokenFilter(TokenStream in, boolean onlyOnSamePosition) {
|
||||||
super(in);
|
super(in);
|
||||||
this.onlyOnSamePosition = onlyOnSamePosition;
|
this.onlyOnSamePosition = onlyOnSamePosition;
|
||||||
}
|
}
|
|
@ -17,19 +17,19 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.miscellaneous.UniqueTokenFilter;
|
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||||
|
|
||||||
public class UniqueTokenFilterFactory extends AbstractTokenFilterFactory {
|
public class UniqueTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||||
|
|
||||||
private final boolean onlyOnSamePosition;
|
private final boolean onlyOnSamePosition;
|
||||||
|
|
||||||
public UniqueTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
UniqueTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
|
||||||
super(indexSettings, name, settings);
|
super(indexSettings, name, settings);
|
||||||
this.onlyOnSamePosition = settings.getAsBooleanLenientForPreEs6Indices(
|
this.onlyOnSamePosition = settings.getAsBooleanLenientForPreEs6Indices(
|
||||||
indexSettings.getIndexVersionCreated(), "only_on_same_position", false, deprecationLogger);
|
indexSettings.getIndexVersionCreated(), "only_on_same_position", false, deprecationLogger);
|
|
@ -17,13 +17,15 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.core.UpperCaseFilter;
|
import org.apache.lucene.analysis.core.UpperCaseFilter;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.env.Environment;
|
import org.elasticsearch.env.Environment;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
|
||||||
|
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||||
|
|
||||||
public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
|
||||||
|
|
|
@ -58,6 +58,15 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
|
||||||
filters.put("trim", TrimTokenFilterFactory.class);
|
filters.put("trim", TrimTokenFilterFactory.class);
|
||||||
filters.put("worddelimiter", WordDelimiterTokenFilterFactory.class);
|
filters.put("worddelimiter", WordDelimiterTokenFilterFactory.class);
|
||||||
filters.put("worddelimitergraph", WordDelimiterGraphTokenFilterFactory.class);
|
filters.put("worddelimitergraph", WordDelimiterGraphTokenFilterFactory.class);
|
||||||
|
filters.put("flattengraph", FlattenGraphTokenFilterFactory.class);
|
||||||
|
filters.put("length", LengthTokenFilterFactory.class);
|
||||||
|
filters.put("greeklowercase", LowerCaseTokenFilterFactory.class);
|
||||||
|
filters.put("irishlowercase", LowerCaseTokenFilterFactory.class);
|
||||||
|
filters.put("lowercase", LowerCaseTokenFilterFactory.class);
|
||||||
|
filters.put("turkishlowercase", LowerCaseTokenFilterFactory.class);
|
||||||
|
filters.put("uppercase", UpperCaseTokenFilterFactory.class);
|
||||||
|
filters.put("ngram", NGramTokenFilterFactory.class);
|
||||||
|
filters.put("edgengram", EdgeNGramTokenFilterFactory.class);
|
||||||
return filters;
|
return filters;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -17,9 +17,7 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CannedTokenStream;
|
import org.apache.lucene.analysis.CannedTokenStream;
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.Token;
|
||||||
|
@ -30,6 +28,8 @@ import org.elasticsearch.index.IndexSettings;
|
||||||
import org.elasticsearch.test.ESTokenStreamTestCase;
|
import org.elasticsearch.test.ESTokenStreamTestCase;
|
||||||
import org.elasticsearch.test.IndexSettingsModule;
|
import org.elasticsearch.test.IndexSettingsModule;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
public class FlattenGraphTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
public class FlattenGraphTokenFilterFactoryTests extends ESTokenStreamTestCase {
|
||||||
|
|
||||||
public void testBasic() throws IOException {
|
public void testBasic() throws IOException {
|
|
@ -17,7 +17,7 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.elasticsearch.index.analysis;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -30,6 +30,8 @@ import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.settings.Settings.Builder;
|
import org.elasticsearch.common.settings.Settings.Builder;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
|
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
|
||||||
|
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
|
||||||
import org.elasticsearch.test.ESTokenStreamTestCase;
|
import org.elasticsearch.test.ESTokenStreamTestCase;
|
||||||
import org.elasticsearch.test.IndexSettingsModule;
|
import org.elasticsearch.test.IndexSettingsModule;
|
||||||
|
|
||||||
|
@ -52,7 +54,8 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
|
||||||
final Settings indexSettings = newAnalysisSettingsBuilder().build();
|
final Settings indexSettings = newAnalysisSettingsBuilder().build();
|
||||||
IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
|
IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
|
||||||
for (String tokenChars : Arrays.asList("letters", "number", "DIRECTIONALITY_UNDEFINED")) {
|
for (String tokenChars : Arrays.asList("letters", "number", "DIRECTIONALITY_UNDEFINED")) {
|
||||||
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", tokenChars).build();
|
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
|
||||||
|
.put("token_chars", tokenChars).build();
|
||||||
try {
|
try {
|
||||||
new NGramTokenizerFactory(indexProperties, null, name, settings).create();
|
new NGramTokenizerFactory(indexProperties, null, name, settings).create();
|
||||||
fail();
|
fail();
|
||||||
|
@ -61,7 +64,8 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (String tokenChars : Arrays.asList("letter", " digit ", "punctuation", "DIGIT", "CoNtRoL", "dash_punctuation")) {
|
for (String tokenChars : Arrays.asList("letter", " digit ", "punctuation", "DIGIT", "CoNtRoL", "dash_punctuation")) {
|
||||||
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", tokenChars).build();
|
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
|
||||||
|
.put("token_chars", tokenChars).build();
|
||||||
indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
|
indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings);
|
||||||
|
|
||||||
new NGramTokenizerFactory(indexProperties, null, name, settings).create();
|
new NGramTokenizerFactory(indexProperties, null, name, settings).create();
|
||||||
|
@ -73,8 +77,10 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
|
||||||
final Index index = new Index("test", "_na_");
|
final Index index = new Index("test", "_na_");
|
||||||
final String name = "ngr";
|
final String name = "ngr";
|
||||||
final Settings indexSettings = newAnalysisSettingsBuilder().build();
|
final Settings indexSettings = newAnalysisSettingsBuilder().build();
|
||||||
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 4).putArray("token_chars", new String[0]).build();
|
final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 4)
|
||||||
Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
|
.putArray("token_chars", new String[0]).build();
|
||||||
|
Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
|
||||||
|
.create();
|
||||||
tokenizer.setReader(new StringReader("1.34"));
|
tokenizer.setReader(new StringReader("1.34"));
|
||||||
assertTokenStreamContents(tokenizer, new String[] {"1.", "1.3", "1.34", ".3", ".34", "34"});
|
assertTokenStreamContents(tokenizer, new String[] {"1.", "1.3", "1.34", ".3", ".34", "34"});
|
||||||
}
|
}
|
||||||
|
@ -84,12 +90,15 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
|
||||||
final Index index = new Index("test", "_na_");
|
final Index index = new Index("test", "_na_");
|
||||||
final String name = "ngr";
|
final String name = "ngr";
|
||||||
final Settings indexSettings = newAnalysisSettingsBuilder().build();
|
final Settings indexSettings = newAnalysisSettingsBuilder().build();
|
||||||
Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build();
|
Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
|
||||||
Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
|
.put("token_chars", "letter,digit").build();
|
||||||
|
Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
|
||||||
|
.create();
|
||||||
tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f "));
|
tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f "));
|
||||||
assertTokenStreamContents(tokenizer,
|
assertTokenStreamContents(tokenizer,
|
||||||
new String[] {"Åb", "Åbc", "bc", "dé", "déf", "éf", "g\uD801\uDC00", "g\uD801\uDC00f", "\uD801\uDC00f"});
|
new String[] {"Åb", "Åbc", "bc", "dé", "déf", "éf", "g\uD801\uDC00", "g\uD801\uDC00f", "\uD801\uDC00f"});
|
||||||
settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
|
settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
|
||||||
|
.put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
|
||||||
tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
|
tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
|
||||||
tokenizer.setReader(new StringReader(" a!$ 9"));
|
tokenizer.setReader(new StringReader(" a!$ 9"));
|
||||||
assertTokenStreamContents(tokenizer,
|
assertTokenStreamContents(tokenizer,
|
||||||
|
@ -102,12 +111,15 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
|
||||||
final String name = "ngr";
|
final String name = "ngr";
|
||||||
final Settings indexSettings = newAnalysisSettingsBuilder().build();
|
final Settings indexSettings = newAnalysisSettingsBuilder().build();
|
||||||
Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build();
|
Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build();
|
||||||
Tokenizer tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
|
Tokenizer tokenizer =
|
||||||
|
new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
|
||||||
tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f "));
|
tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f "));
|
||||||
assertTokenStreamContents(tokenizer,
|
assertTokenStreamContents(tokenizer,
|
||||||
new String[] {"Åb", "Åbc", "dé", "déf", "g\uD801\uDC00", "g\uD801\uDC00f"});
|
new String[] {"Åb", "Åbc", "dé", "déf", "g\uD801\uDC00", "g\uD801\uDC00f"});
|
||||||
settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
|
settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3)
|
||||||
tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create();
|
.put("token_chars", "letter,digit,punctuation,whitespace,symbol").build();
|
||||||
|
tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
|
||||||
|
.create();
|
||||||
tokenizer.setReader(new StringReader(" a!$ 9"));
|
tokenizer.setReader(new StringReader(" a!$ 9"));
|
||||||
assertTokenStreamContents(tokenizer,
|
assertTokenStreamContents(tokenizer,
|
||||||
new String[] {" a", " a!"});
|
new String[] {" a", " a!"});
|
||||||
|
@ -128,7 +140,9 @@ public class NGramTokenizerFactoryTests extends ESTokenStreamTestCase {
|
||||||
Settings indexSettings = newAnalysisSettingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, v.id).build();
|
Settings indexSettings = newAnalysisSettingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, v.id).build();
|
||||||
Tokenizer tokenizer = new MockTokenizer();
|
Tokenizer tokenizer = new MockTokenizer();
|
||||||
tokenizer.setReader(new StringReader("foo bar"));
|
tokenizer.setReader(new StringReader("foo bar"));
|
||||||
TokenStream edgeNGramTokenFilter = new EdgeNGramTokenFilterFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(tokenizer);
|
TokenStream edgeNGramTokenFilter =
|
||||||
|
new EdgeNGramTokenFilterFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings)
|
||||||
|
.create(tokenizer);
|
||||||
if (reverse) {
|
if (reverse) {
|
||||||
assertThat(edgeNGramTokenFilter, instanceOf(ReverseStringFilter.class));
|
assertThat(edgeNGramTokenFilter, instanceOf(ReverseStringFilter.class));
|
||||||
} else {
|
} else {
|
|
@ -17,7 +17,7 @@
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.lucene.analysis.miscellaneous;
|
package org.elasticsearch.analysis.common;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
|
@ -210,3 +210,185 @@
|
||||||
- match: { detail.tokenfilters.0.tokens.5.start_offset: 16 }
|
- match: { detail.tokenfilters.0.tokens.5.start_offset: 16 }
|
||||||
- match: { detail.tokenfilters.0.tokens.5.end_offset: 19 }
|
- match: { detail.tokenfilters.0.tokens.5.end_offset: 19 }
|
||||||
- match: { detail.tokenfilters.0.tokens.5.position: 5 }
|
- match: { detail.tokenfilters.0.tokens.5.position: 5 }
|
||||||
|
|
||||||
|
---
|
||||||
|
"unique":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
body:
|
||||||
|
text: Foo Foo Bar!
|
||||||
|
tokenizer: whitespace
|
||||||
|
filter: [unique]
|
||||||
|
- length: { tokens: 2 }
|
||||||
|
- match: { tokens.0.token: Foo }
|
||||||
|
- match: { tokens.1.token: Bar! }
|
||||||
|
|
||||||
|
---
|
||||||
|
"synonym_graph and flatten_graph":
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
settings:
|
||||||
|
analysis:
|
||||||
|
filter:
|
||||||
|
my_synonym_graph:
|
||||||
|
type: synonym_graph
|
||||||
|
synonyms: ["automatic teller machine,atm,cash point"]
|
||||||
|
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
text: this automatic teller machine is down
|
||||||
|
tokenizer: whitespace
|
||||||
|
filter: [my_synonym_graph]
|
||||||
|
- length: { tokens: 9 }
|
||||||
|
- match: { tokens.0.token: this }
|
||||||
|
- match: { tokens.0.position: 0 }
|
||||||
|
- is_false: tokens.0.positionLength
|
||||||
|
- match: { tokens.1.token: atm }
|
||||||
|
- match: { tokens.1.position: 1 }
|
||||||
|
- match: { tokens.1.positionLength: 4 }
|
||||||
|
- match: { tokens.2.token: cash }
|
||||||
|
- match: { tokens.2.position: 1 }
|
||||||
|
- is_false: tokens.2.positionLength
|
||||||
|
- match: { tokens.3.token: automatic }
|
||||||
|
- match: { tokens.3.position: 1 }
|
||||||
|
- match: { tokens.3.positionLength: 2 }
|
||||||
|
- match: { tokens.4.token: point }
|
||||||
|
- match: { tokens.4.position: 2 }
|
||||||
|
- match: { tokens.4.positionLength: 3 }
|
||||||
|
- match: { tokens.5.token: teller }
|
||||||
|
- match: { tokens.5.position: 3 }
|
||||||
|
- is_false: tokens.5.positionLength
|
||||||
|
- match: { tokens.6.token: machine }
|
||||||
|
- match: { tokens.6.position: 4 }
|
||||||
|
- is_false: tokens.6.positionLength
|
||||||
|
- match: { tokens.7.token: is }
|
||||||
|
- match: { tokens.7.position: 5 }
|
||||||
|
- is_false: tokens.7.positionLength
|
||||||
|
- match: { tokens.8.token: down }
|
||||||
|
- match: { tokens.8.position: 6 }
|
||||||
|
- is_false: tokens.8.positionLength
|
||||||
|
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
text: this automatic teller machine is down
|
||||||
|
tokenizer: whitespace
|
||||||
|
filter: [my_synonym_graph,flatten_graph]
|
||||||
|
- length: { tokens: 9 }
|
||||||
|
- match: { tokens.0.token: this }
|
||||||
|
- match: { tokens.0.position: 0 }
|
||||||
|
- is_false: tokens.0.positionLength
|
||||||
|
- match: { tokens.1.token: atm }
|
||||||
|
- match: { tokens.1.position: 1 }
|
||||||
|
- match: { tokens.1.positionLength: 3 }
|
||||||
|
- match: { tokens.2.token: cash }
|
||||||
|
- match: { tokens.2.position: 1 }
|
||||||
|
- is_false: tokens.2.positionLength
|
||||||
|
- match: { tokens.3.token: automatic }
|
||||||
|
- match: { tokens.3.position: 1 }
|
||||||
|
- is_false: tokens.3.positionLength
|
||||||
|
- match: { tokens.4.token: point }
|
||||||
|
- match: { tokens.4.position: 2 }
|
||||||
|
- match: { tokens.4.positionLength: 2 }
|
||||||
|
- match: { tokens.5.token: teller }
|
||||||
|
- match: { tokens.5.position: 2 }
|
||||||
|
- is_false: tokens.5.positionLength
|
||||||
|
- match: { tokens.6.token: machine }
|
||||||
|
- match: { tokens.6.position: 3 }
|
||||||
|
- is_false: tokens.6.positionLength
|
||||||
|
- match: { tokens.7.token: is }
|
||||||
|
- match: { tokens.7.position: 4 }
|
||||||
|
- is_false: tokens.7.positionLength
|
||||||
|
- match: { tokens.8.token: down }
|
||||||
|
- match: { tokens.8.position: 5 }
|
||||||
|
- is_false: tokens.8.positionLength
|
||||||
|
|
||||||
|
---
|
||||||
|
"length":
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
settings:
|
||||||
|
analysis:
|
||||||
|
filter:
|
||||||
|
my_length:
|
||||||
|
type: length
|
||||||
|
min: 6
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
text: foo bar foobar
|
||||||
|
tokenizer: whitespace
|
||||||
|
filter: [my_length]
|
||||||
|
- length: { tokens: 1 }
|
||||||
|
- match: { tokens.0.token: foobar }
|
||||||
|
|
||||||
|
---
|
||||||
|
"uppercase":
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
body:
|
||||||
|
text: foobar
|
||||||
|
tokenizer: keyword
|
||||||
|
filter: [uppercase]
|
||||||
|
- length: { tokens: 1 }
|
||||||
|
- match: { tokens.0.token: FOOBAR }
|
||||||
|
|
||||||
|
---
|
||||||
|
"ngram":
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
settings:
|
||||||
|
analysis:
|
||||||
|
filter:
|
||||||
|
my_ngram:
|
||||||
|
type: ngram
|
||||||
|
min_gram: 3
|
||||||
|
max_gram: 3
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
text: foobar
|
||||||
|
tokenizer: keyword
|
||||||
|
filter: [my_ngram]
|
||||||
|
- length: { tokens: 4 }
|
||||||
|
- match: { tokens.0.token: foo }
|
||||||
|
- match: { tokens.1.token: oob }
|
||||||
|
- match: { tokens.2.token: oba }
|
||||||
|
- match: { tokens.3.token: bar }
|
||||||
|
|
||||||
|
---
|
||||||
|
"edge_ngram":
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
settings:
|
||||||
|
analysis:
|
||||||
|
filter:
|
||||||
|
my_edge_ngram:
|
||||||
|
type: edge_ngram
|
||||||
|
min_gram: 3
|
||||||
|
max_gram: 6
|
||||||
|
- do:
|
||||||
|
indices.analyze:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
text: foobar
|
||||||
|
tokenizer: keyword
|
||||||
|
filter: [my_edge_ngram]
|
||||||
|
- length: { tokens: 4 }
|
||||||
|
- match: { tokens.0.token: foo }
|
||||||
|
- match: { tokens.1.token: foob }
|
||||||
|
- match: { tokens.2.token: fooba }
|
||||||
|
- match: { tokens.3.token: foobar }
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
"ngram search":
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
settings:
|
||||||
|
number_of_shards: 1
|
||||||
|
number_of_replicas: 0
|
||||||
|
analysis:
|
||||||
|
analyzer:
|
||||||
|
my_analyzer:
|
||||||
|
tokenizer: standard
|
||||||
|
filter: [my_ngram]
|
||||||
|
filter:
|
||||||
|
my_ngram:
|
||||||
|
type: ngram
|
||||||
|
min: 2,
|
||||||
|
max: 2
|
||||||
|
mappings:
|
||||||
|
doc:
|
||||||
|
properties:
|
||||||
|
text:
|
||||||
|
type: text
|
||||||
|
analyzer: my_analyzer
|
||||||
|
|
||||||
|
- do:
|
||||||
|
index:
|
||||||
|
index: test
|
||||||
|
type: doc
|
||||||
|
id: 1
|
||||||
|
body: { "text": "foo bar baz" }
|
||||||
|
refresh: true
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
match:
|
||||||
|
text:
|
||||||
|
query: foa
|
||||||
|
- match: {hits.total: 1}
|
|
@ -0,0 +1,129 @@
|
||||||
|
"ngram highlighting":
|
||||||
|
- do:
|
||||||
|
indices.create:
|
||||||
|
index: test
|
||||||
|
body:
|
||||||
|
settings:
|
||||||
|
number_of_shards: 1
|
||||||
|
number_of_replicas: 0
|
||||||
|
analysis:
|
||||||
|
tokenizer:
|
||||||
|
my_ngramt:
|
||||||
|
type: ngram
|
||||||
|
min_gram: 1
|
||||||
|
max_gram: 20
|
||||||
|
token_chars: letter,digit
|
||||||
|
filter:
|
||||||
|
my_ngram:
|
||||||
|
type: ngram
|
||||||
|
min_gram: 1
|
||||||
|
max_gram: 20
|
||||||
|
analyzer:
|
||||||
|
name2_index_analyzer:
|
||||||
|
tokenizer: whitespace
|
||||||
|
filter: [my_ngram]
|
||||||
|
name_index_analyzer:
|
||||||
|
tokenizer: my_ngramt
|
||||||
|
name_search_analyzer:
|
||||||
|
tokenizer: whitespace
|
||||||
|
mappings:
|
||||||
|
doc:
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
type: text
|
||||||
|
term_vector: with_positions_offsets
|
||||||
|
analyzer: name_index_analyzer
|
||||||
|
search_analyzer: name_search_analyzer
|
||||||
|
name2:
|
||||||
|
type: text
|
||||||
|
term_vector: with_positions_offsets
|
||||||
|
analyzer: name2_index_analyzer
|
||||||
|
search_analyzer: name_search_analyzer
|
||||||
|
|
||||||
|
- do:
|
||||||
|
index:
|
||||||
|
index: test
|
||||||
|
type: doc
|
||||||
|
id: 1
|
||||||
|
refresh: true
|
||||||
|
body:
|
||||||
|
name: logicacmg ehemals avinci - the know how company
|
||||||
|
name2: logicacmg ehemals avinci - the know how company
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
match:
|
||||||
|
name:
|
||||||
|
query: logica m
|
||||||
|
highlight:
|
||||||
|
fields:
|
||||||
|
- name: {}
|
||||||
|
- match: {hits.total: 1}
|
||||||
|
- match: {hits.hits.0.highlight.name.0: "<em>logica</em>c<em>m</em>g ehe<em>m</em>als avinci - the know how co<em>m</em>pany"}
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
match:
|
||||||
|
name:
|
||||||
|
query: logica ma
|
||||||
|
highlight:
|
||||||
|
fields:
|
||||||
|
- name: {}
|
||||||
|
- match: {hits.total: 1}
|
||||||
|
- match: {hits.hits.0.highlight.name.0: "<em>logica</em>cmg ehe<em>ma</em>ls avinci - the know how company"}
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
match:
|
||||||
|
name:
|
||||||
|
query: logica
|
||||||
|
highlight:
|
||||||
|
fields:
|
||||||
|
- name: {}
|
||||||
|
- match: {hits.total: 1}
|
||||||
|
- match: {hits.hits.0.highlight.name.0: "<em>logica</em>cmg ehemals avinci - the know how company"}
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
match:
|
||||||
|
name2:
|
||||||
|
query: logica m
|
||||||
|
highlight:
|
||||||
|
fields:
|
||||||
|
- name2: {}
|
||||||
|
- match: {hits.total: 1}
|
||||||
|
- match: {hits.hits.0.highlight.name2.0: "<em>logicacmg</em> <em>ehemals</em> avinci - the know how <em>company</em>"}
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
match:
|
||||||
|
name2:
|
||||||
|
query: logica ma
|
||||||
|
highlight:
|
||||||
|
fields:
|
||||||
|
- name2: {}
|
||||||
|
- match: {hits.total: 1}
|
||||||
|
- match: {hits.hits.0.highlight.name2.0: "<em>logicacmg</em> <em>ehemals</em> avinci - the know how company"}
|
||||||
|
|
||||||
|
- do:
|
||||||
|
search:
|
||||||
|
body:
|
||||||
|
query:
|
||||||
|
match:
|
||||||
|
name2:
|
||||||
|
query: logica
|
||||||
|
highlight:
|
||||||
|
fields:
|
||||||
|
- name2: {}
|
||||||
|
- match: {hits.total: 1}
|
||||||
|
- match: {hits.hits.0.highlight.name2.0: "<em>logicacmg</em> ehemals avinci - the know how company"}
|
|
@ -22,7 +22,6 @@ package org.elasticsearch.indices.analysis;
|
||||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||||
import org.elasticsearch.Version;
|
|
||||||
import org.elasticsearch.common.collect.MapBuilder;
|
import org.elasticsearch.common.collect.MapBuilder;
|
||||||
import org.elasticsearch.index.analysis.ApostropheFilterFactory;
|
import org.elasticsearch.index.analysis.ApostropheFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.ArabicNormalizationFilterFactory;
|
import org.elasticsearch.index.analysis.ArabicNormalizationFilterFactory;
|
||||||
|
@ -36,10 +35,8 @@ import org.elasticsearch.index.analysis.CommonGramsTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory;
|
import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.DecimalDigitFilterFactory;
|
import org.elasticsearch.index.analysis.DecimalDigitFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
|
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
|
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.ElisionTokenFilterFactory;
|
import org.elasticsearch.index.analysis.ElisionTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.GermanNormalizationFilterFactory;
|
import org.elasticsearch.index.analysis.GermanNormalizationFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.GermanStemTokenFilterFactory;
|
import org.elasticsearch.index.analysis.GermanStemTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.HindiNormalizationFilterFactory;
|
import org.elasticsearch.index.analysis.HindiNormalizationFilterFactory;
|
||||||
|
@ -49,14 +46,11 @@ import org.elasticsearch.index.analysis.KStemTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
|
import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.KeepWordFilterFactory;
|
import org.elasticsearch.index.analysis.KeepWordFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
|
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.LengthTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
|
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
|
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.LowerCaseTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
|
import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
|
import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||||
import org.elasticsearch.index.analysis.NGramTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
|
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
|
import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
|
import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
|
||||||
|
@ -82,7 +76,6 @@ import org.elasticsearch.index.analysis.SynonymTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.ThaiTokenizerFactory;
|
import org.elasticsearch.index.analysis.ThaiTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
|
import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
|
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory;
|
|
||||||
import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
|
import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
|
||||||
import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
|
import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
|
||||||
import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
|
import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
|
||||||
|
@ -90,7 +83,6 @@ import org.elasticsearch.plugins.AnalysisPlugin;
|
||||||
import org.elasticsearch.test.ESTestCase;
|
import org.elasticsearch.test.ESTestCase;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.EnumMap;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
@ -165,7 +157,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
||||||
.put("decimaldigit", DecimalDigitFilterFactory.class)
|
.put("decimaldigit", DecimalDigitFilterFactory.class)
|
||||||
.put("delimitedpayload", DelimitedPayloadTokenFilterFactory.class)
|
.put("delimitedpayload", DelimitedPayloadTokenFilterFactory.class)
|
||||||
.put("dictionarycompoundword", DictionaryCompoundWordTokenFilterFactory.class)
|
.put("dictionarycompoundword", DictionaryCompoundWordTokenFilterFactory.class)
|
||||||
.put("edgengram", EdgeNGramTokenFilterFactory.class)
|
.put("edgengram", MovedToAnalysisCommon.class)
|
||||||
.put("elision", ElisionTokenFilterFactory.class)
|
.put("elision", ElisionTokenFilterFactory.class)
|
||||||
.put("englishminimalstem", StemmerTokenFilterFactory.class)
|
.put("englishminimalstem", StemmerTokenFilterFactory.class)
|
||||||
.put("englishpossessive", StemmerTokenFilterFactory.class)
|
.put("englishpossessive", StemmerTokenFilterFactory.class)
|
||||||
|
@ -178,7 +170,7 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
||||||
.put("germanlightstem", StemmerTokenFilterFactory.class)
|
.put("germanlightstem", StemmerTokenFilterFactory.class)
|
||||||
.put("germanminimalstem", StemmerTokenFilterFactory.class)
|
.put("germanminimalstem", StemmerTokenFilterFactory.class)
|
||||||
.put("germannormalization", GermanNormalizationFilterFactory.class)
|
.put("germannormalization", GermanNormalizationFilterFactory.class)
|
||||||
.put("greeklowercase", LowerCaseTokenFilterFactory.class)
|
.put("greeklowercase", MovedToAnalysisCommon.class)
|
||||||
.put("greekstem", StemmerTokenFilterFactory.class)
|
.put("greekstem", StemmerTokenFilterFactory.class)
|
||||||
.put("hindinormalization", HindiNormalizationFilterFactory.class)
|
.put("hindinormalization", HindiNormalizationFilterFactory.class)
|
||||||
.put("hindistem", StemmerTokenFilterFactory.class)
|
.put("hindistem", StemmerTokenFilterFactory.class)
|
||||||
|
@ -186,17 +178,17 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
||||||
.put("hunspellstem", HunspellTokenFilterFactory.class)
|
.put("hunspellstem", HunspellTokenFilterFactory.class)
|
||||||
.put("hyphenationcompoundword", HyphenationCompoundWordTokenFilterFactory.class)
|
.put("hyphenationcompoundword", HyphenationCompoundWordTokenFilterFactory.class)
|
||||||
.put("indicnormalization", IndicNormalizationFilterFactory.class)
|
.put("indicnormalization", IndicNormalizationFilterFactory.class)
|
||||||
.put("irishlowercase", LowerCaseTokenFilterFactory.class)
|
.put("irishlowercase", MovedToAnalysisCommon.class)
|
||||||
.put("indonesianstem", StemmerTokenFilterFactory.class)
|
.put("indonesianstem", StemmerTokenFilterFactory.class)
|
||||||
.put("italianlightstem", StemmerTokenFilterFactory.class)
|
.put("italianlightstem", StemmerTokenFilterFactory.class)
|
||||||
.put("keepword", KeepWordFilterFactory.class)
|
.put("keepword", KeepWordFilterFactory.class)
|
||||||
.put("keywordmarker", MovedToAnalysisCommon.class)
|
.put("keywordmarker", MovedToAnalysisCommon.class)
|
||||||
.put("kstem", KStemTokenFilterFactory.class)
|
.put("kstem", KStemTokenFilterFactory.class)
|
||||||
.put("latvianstem", StemmerTokenFilterFactory.class)
|
.put("latvianstem", StemmerTokenFilterFactory.class)
|
||||||
.put("length", LengthTokenFilterFactory.class)
|
.put("length", MovedToAnalysisCommon.class)
|
||||||
.put("limittokencount", LimitTokenCountFilterFactory.class)
|
.put("limittokencount", LimitTokenCountFilterFactory.class)
|
||||||
.put("lowercase", LowerCaseTokenFilterFactory.class)
|
.put("lowercase", MovedToAnalysisCommon.class)
|
||||||
.put("ngram", NGramTokenFilterFactory.class)
|
.put("ngram", MovedToAnalysisCommon.class)
|
||||||
.put("norwegianlightstem", StemmerTokenFilterFactory.class)
|
.put("norwegianlightstem", StemmerTokenFilterFactory.class)
|
||||||
.put("norwegianminimalstem", StemmerTokenFilterFactory.class)
|
.put("norwegianminimalstem", StemmerTokenFilterFactory.class)
|
||||||
.put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class)
|
.put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class)
|
||||||
|
@ -225,12 +217,12 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
|
||||||
.put("synonymgraph", SynonymGraphTokenFilterFactory.class)
|
.put("synonymgraph", SynonymGraphTokenFilterFactory.class)
|
||||||
.put("trim", MovedToAnalysisCommon.class)
|
.put("trim", MovedToAnalysisCommon.class)
|
||||||
.put("truncate", TruncateTokenFilterFactory.class)
|
.put("truncate", TruncateTokenFilterFactory.class)
|
||||||
.put("turkishlowercase", LowerCaseTokenFilterFactory.class)
|
.put("turkishlowercase", MovedToAnalysisCommon.class)
|
||||||
.put("type", KeepTypesFilterFactory.class)
|
.put("type", KeepTypesFilterFactory.class)
|
||||||
.put("uppercase", UpperCaseTokenFilterFactory.class)
|
.put("uppercase", MovedToAnalysisCommon.class)
|
||||||
.put("worddelimiter", MovedToAnalysisCommon.class)
|
.put("worddelimiter", MovedToAnalysisCommon.class)
|
||||||
.put("worddelimitergraph", MovedToAnalysisCommon.class)
|
.put("worddelimitergraph", MovedToAnalysisCommon.class)
|
||||||
.put("flattengraph", FlattenGraphTokenFilterFactory.class)
|
.put("flattengraph", MovedToAnalysisCommon.class)
|
||||||
|
|
||||||
// TODO: these tokenfilters are not yet exposed: useful?
|
// TODO: these tokenfilters are not yet exposed: useful?
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue