From f243d75f5920620697906c4d7e923c8dada09141 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 28 Sep 2018 09:02:47 +0100 Subject: [PATCH] Remove special-casing of Synonym filters in AnalysisRegistry (#34034) The synonym filters no longer need access to the AnalysisRegistry in their constructors, so we can remove the special-case code and move them to the common analysis module. This commit means that synonyms are no longer available for `server` integration tests, so several of these are either rewritten or migrated to the common analysis module as rest-spec-api tests --- .../analysis/common/CommonAnalysisPlugin.java | 2 + .../analysis/common}/ESSolrSynonymParser.java | 2 +- .../common}/ESWordnetSynonymParser.java | 2 +- .../SynonymGraphTokenFilterFactory.java | 12 +- .../common}/SynonymTokenFilterFactory.java | 27 +- .../common/CommonAnalysisFactoryTests.java | 2 +- .../common}/ESSolrSynonymParserTests.java | 2 +- .../common}/ESWordnetSynonymParserTests.java | 2 +- .../common/HighlighterWithAnalyzersTests.java | 171 ++++++++++ .../test/analysis-common/40_token_filters.yml | 62 ++++ .../test/indices.analyze/10_synonyms.yml | 47 +++ .../indices/validate_query/10_synonyms.yml | 82 +++++ .../search.query/50_queries_with_synonyms.yml | 307 ++++++++++++++++++ .../test/search.query/60_synonym_graph.yml | 205 ++++++++++++ .../test/search.suggest/30_synonyms.yml | 44 +++ .../AnnotatedTextFieldMapperTests.java | 77 ++--- .../index/analysis/AnalysisRegistry.java | 38 +-- .../index/mapper/TextFieldMapperTests.java | 16 +- .../index/search/MatchQueryIT.java | 220 ------------- .../index/search/MultiMatchQueryTests.java | 36 +- .../indices/analyze/AnalyzeActionIT.java | 48 --- .../highlight/HighlighterSearchIT.java | 152 --------- .../search/functionscore/QueryRescorerIT.java | 18 +- .../search/query/QueryStringIT.java | 90 ----- .../search/query/SearchQueryIT.java | 156 --------- .../suggest/CompletionSuggestSearchIT.java | 22 -- .../SharedClusterSnapshotRestoreIT.java | 15 +- .../validate/SimpleValidateQueryIT.java | 36 -- .../analysis/AnalysisFactoryTestCase.java | 6 +- 29 files changed, 1023 insertions(+), 876 deletions(-) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/ESSolrSynonymParser.java (98%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/ESWordnetSynonymParser.java (98%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/SynonymGraphTokenFilterFactory.java (87%) rename {server/src/main/java/org/elasticsearch/index/analysis => modules/analysis-common/src/main/java/org/elasticsearch/analysis/common}/SynonymTokenFilterFactory.java (84%) rename {server/src/test/java/org/elasticsearch/index/analysis => modules/analysis-common/src/test/java/org/elasticsearch/analysis/common}/ESSolrSynonymParserTests.java (98%) rename {server/src/test/java/org/elasticsearch/index/analysis => modules/analysis-common/src/test/java/org/elasticsearch/analysis/common}/ESWordnetSynonymParserTests.java (98%) create mode 100644 modules/analysis-common/src/test/resources/rest-api-spec/test/indices/validate_query/10_synonyms.yml create mode 100644 modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/50_queries_with_synonyms.yml create mode 100644 modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/60_synonym_graph.yml create mode 100644 modules/analysis-common/src/test/resources/rest-api-spec/test/search.suggest/30_synonyms.yml delete mode 100644 server/src/test/java/org/elasticsearch/index/search/MatchQueryIT.java diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index 3bd59357240..59ecde8cf37 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -275,6 +275,8 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri filters.put("sorani_normalization", SoraniNormalizationFilterFactory::new); filters.put("stemmer_override", requiresAnalysisSettings(StemmerOverrideTokenFilterFactory::new)); filters.put("stemmer", StemmerTokenFilterFactory::new); + filters.put("synonym", requiresAnalysisSettings(SynonymTokenFilterFactory::new)); + filters.put("synonym_graph", requiresAnalysisSettings(SynonymGraphTokenFilterFactory::new)); filters.put("trim", TrimTokenFilterFactory::new); filters.put("truncate", requiresAnalysisSettings(TruncateTokenFilterFactory::new)); filters.put("unique", UniqueTokenFilterFactory::new); diff --git a/server/src/main/java/org/elasticsearch/index/analysis/ESSolrSynonymParser.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ESSolrSynonymParser.java similarity index 98% rename from server/src/main/java/org/elasticsearch/index/analysis/ESSolrSynonymParser.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ESSolrSynonymParser.java index 006973dd9b6..256e05982c6 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/ESSolrSynonymParser.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ESSolrSynonymParser.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.LogManager; diff --git a/server/src/main/java/org/elasticsearch/index/analysis/ESWordnetSynonymParser.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ESWordnetSynonymParser.java similarity index 98% rename from server/src/main/java/org/elasticsearch/index/analysis/ESWordnetSynonymParser.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ESWordnetSynonymParser.java index ebcd84e39d7..1e09011af67 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/ESWordnetSynonymParser.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ESWordnetSynonymParser.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.LogManager; diff --git a/server/src/main/java/org/elasticsearch/index/analysis/SynonymGraphTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SynonymGraphTokenFilterFactory.java similarity index 87% rename from server/src/main/java/org/elasticsearch/index/analysis/SynonymGraphTokenFilterFactory.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SynonymGraphTokenFilterFactory.java index 200e426fbd4..cccfdc8d7b7 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/SynonymGraphTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SynonymGraphTokenFilterFactory.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; @@ -26,16 +26,18 @@ import org.apache.lucene.analysis.synonym.SynonymMap; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.CharFilterFactory; +import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; -import java.io.IOException; import java.util.List; import java.util.function.Function; public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory { - public SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry, - String name, Settings settings) throws IOException { - super(indexSettings, env, analysisRegistry, name, settings); + SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, + String name, Settings settings) { + super(indexSettings, env, name, settings); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SynonymTokenFilterFactory.java similarity index 84% rename from server/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java rename to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SynonymTokenFilterFactory.java index c18e8c94310..99810432cd8 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SynonymTokenFilterFactory.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; @@ -26,8 +26,13 @@ import org.apache.lucene.analysis.synonym.SynonymMap; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.AbstractTokenFilterFactory; +import org.elasticsearch.index.analysis.Analysis; +import org.elasticsearch.index.analysis.CharFilterFactory; +import org.elasticsearch.index.analysis.CustomAnalyzer; +import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; -import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.util.List; @@ -35,14 +40,14 @@ import java.util.function.Function; public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory { - protected final String format; - protected final boolean expand; - protected final boolean lenient; + private final String format; + private final boolean expand; + private final boolean lenient; protected final Settings settings; protected final Environment environment; - public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry, - String name, Settings settings) throws IOException { + SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, + String name, Settings settings) { super(indexSettings, name, settings); this.settings = settings; @@ -83,15 +88,15 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory { }; } - protected Analyzer buildSynonymAnalyzer(TokenizerFactory tokenizer, List charFilters, - List tokenFilters) { + Analyzer buildSynonymAnalyzer(TokenizerFactory tokenizer, List charFilters, + List tokenFilters) { return new CustomAnalyzer("synonyms", tokenizer, charFilters.toArray(new CharFilterFactory[0]), tokenFilters.stream() .map(TokenFilterFactory::getSynonymFilter) .toArray(TokenFilterFactory[]::new)); } - protected SynonymMap buildSynonyms(Analyzer analyzer, Reader rules) { + SynonymMap buildSynonyms(Analyzer analyzer, Reader rules) { try { SynonymMap.Builder parser; if ("wordnet".equalsIgnoreCase(format)) { @@ -107,7 +112,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory { } } - protected Reader getRulesFromSettings(Environment env) { + Reader getRulesFromSettings(Environment env) { Reader rulesReader; if (settings.getAsList("synonyms", null) != null) { List rulesList = Analysis.getWordList(env, settings, "synonyms"); diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java index baa55395828..f9fca66cc54 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java @@ -24,7 +24,6 @@ import org.apache.lucene.analysis.en.PorterStemFilterFactory; import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory; import org.apache.lucene.analysis.reverse.ReverseStringFilterFactory; import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory; -import org.elasticsearch.index.analysis.SynonymTokenFilterFactory; import org.elasticsearch.indices.analysis.AnalysisFactoryTestCase; import java.util.List; @@ -106,6 +105,7 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase { filters.put("stemmeroverride", StemmerOverrideTokenFilterFactory.class); filters.put("kstem", KStemTokenFilterFactory.class); filters.put("synonym", SynonymTokenFilterFactory.class); + filters.put("synonymgraph", SynonymGraphTokenFilterFactory.class); filters.put("dictionarycompoundword", DictionaryCompoundWordTokenFilterFactory.class); filters.put("hyphenationcompoundword", HyphenationCompoundWordTokenFilterFactory.class); filters.put("reversestring", ReverseTokenFilterFactory.class); diff --git a/server/src/test/java/org/elasticsearch/index/analysis/ESSolrSynonymParserTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESSolrSynonymParserTests.java similarity index 98% rename from server/src/test/java/org/elasticsearch/index/analysis/ESSolrSynonymParserTests.java rename to modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESSolrSynonymParserTests.java index 31aa1a9be25..e6ed9b03855 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/ESSolrSynonymParserTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESSolrSynonymParserTests.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.StopFilter; diff --git a/server/src/test/java/org/elasticsearch/index/analysis/ESWordnetSynonymParserTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESWordnetSynonymParserTests.java similarity index 98% rename from server/src/test/java/org/elasticsearch/index/analysis/ESWordnetSynonymParserTests.java rename to modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESWordnetSynonymParserTests.java index 6d0fd8944d4..18eaaedb5d0 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/ESWordnetSynonymParserTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/ESWordnetSynonymParserTests.java @@ -17,7 +17,7 @@ * under the License. */ -package org.elasticsearch.index.analysis; +package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.StopFilter; diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java index 96e8043570d..db39a27d5c1 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java @@ -21,9 +21,12 @@ package org.elasticsearch.analysis.common; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.query.Operator; import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; import org.elasticsearch.test.ESIntegTestCase; @@ -31,11 +34,18 @@ import java.io.IOException; import java.util.Arrays; import java.util.Collection; +import static org.elasticsearch.client.Requests.searchRequest; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.elasticsearch.index.query.QueryBuilders.boolQuery; +import static org.elasticsearch.index.query.QueryBuilders.matchPhrasePrefixQuery; import static org.elasticsearch.index.query.QueryBuilders.matchPhraseQuery; import static org.elasticsearch.index.query.QueryBuilders.matchQuery; +import static org.elasticsearch.index.query.QueryBuilders.termQuery; +import static org.elasticsearch.search.builder.SearchSourceBuilder.highlight; +import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHighlight; +import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.startsWith; @@ -153,4 +163,165 @@ public class HighlighterWithAnalyzersTests extends ESIntegTestCase { + "http://www.facebook.com http://elasticsearch.org " + "http://xing.com http://cnn.com http://quora.com")); } + + public void testSynonyms() throws IOException { + Settings.Builder builder = Settings.builder() + .put(indexSettings()) + .put("index.analysis.analyzer.synonym.tokenizer", "standard") + .putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase") + .put("index.analysis.filter.synonym.type", "synonym") + .putList("index.analysis.filter.synonym.synonyms", "fast,quick"); + + assertAcked(prepareCreate("test").setSettings(builder.build()) + .addMapping("type1", "field1", + "type=text,term_vector=with_positions_offsets,search_analyzer=synonym," + + "analyzer=standard,index_options=offsets")); + ensureGreen(); + + client().prepareIndex("test", "type1", "0").setSource( + "field1", "The quick brown fox jumps over the lazy dog").get(); + refresh(); + for (String highlighterType : new String[] {"plain", "fvh", "unified"}) { + logger.info("--> highlighting (type=" + highlighterType + ") and searching on field1"); + SearchSourceBuilder source = searchSource() + .query(matchQuery("field1", "quick brown fox").operator(Operator.AND)) + .highlighter( + highlight() + .field("field1") + .order("score") + .preTags("") + .postTags("") + .highlighterType(highlighterType)); + SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + assertHighlight(searchResponse, 0, "field1", 0, 1, + equalTo("The quick brown fox jumps over the lazy dog")); + + source = searchSource() + .query(matchQuery("field1", "fast brown fox").operator(Operator.AND)) + .highlighter(highlight().field("field1").order("score").preTags("").postTags("")); + searchResponse = client().search(searchRequest("test").source(source)).actionGet(); + assertHighlight(searchResponse, 0, "field1", 0, 1, + equalTo("The quick brown fox jumps over the lazy dog")); + } + } + + public void testPhrasePrefix() throws IOException { + Settings.Builder builder = Settings.builder() + .put(indexSettings()) + .put("index.analysis.analyzer.synonym.tokenizer", "standard") + .putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase") + .put("index.analysis.filter.synonym.type", "synonym") + .putList("index.analysis.filter.synonym.synonyms", "quick => fast"); + + assertAcked(prepareCreate("first_test_index").setSettings(builder.build()).addMapping("type1", type1TermVectorMapping())); + + ensureGreen(); + + client().prepareIndex("first_test_index", "type1", "0").setSource( + "field0", "The quick brown fox jumps over the lazy dog", + "field1", "The quick brown fox jumps over the lazy dog").get(); + client().prepareIndex("first_test_index", "type1", "1").setSource("field1", + "The quick browse button is a fancy thing, right bro?").get(); + refresh(); + logger.info("--> highlighting and searching on field0"); + + SearchSourceBuilder source = searchSource() + .query(matchPhrasePrefixQuery("field0", "bro")) + .highlighter(highlight().field("field0").order("score").preTags("").postTags("")); + SearchResponse searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet(); + + assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); + + source = searchSource() + .query(matchPhrasePrefixQuery("field0", "quick bro")) + .highlighter(highlight().field("field0").order("score").preTags("").postTags("")); + + searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet(); + assertHighlight(searchResponse, 0, "field0", 0, 1, + equalTo("The quick brown fox jumps over the lazy dog")); + + logger.info("--> highlighting and searching on field1"); + source = searchSource() + .query(boolQuery() + .should(matchPhrasePrefixQuery("field1", "test")) + .should(matchPhrasePrefixQuery("field1", "bro")) + ) + .highlighter(highlight().field("field1").order("score").preTags("").postTags("")); + + searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet(); + assertThat(searchResponse.getHits().totalHits, equalTo(2L)); + for (int i = 0; i < 2; i++) { + assertHighlight(searchResponse, i, "field1", 0, 1, anyOf( + equalTo("The quick browse button is a fancy thing, right bro?"), + equalTo("The quick brown fox jumps over the lazy dog"))); + } + + source = searchSource() + .query(matchPhrasePrefixQuery("field1", "quick bro")) + .highlighter(highlight().field("field1").order("score").preTags("").postTags("")); + + searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet(); + + assertHighlight(searchResponse, 0, "field1", 0, 1, anyOf( + equalTo("The quick browse button is a fancy thing, right bro?"), + equalTo("The quick brown fox jumps over the lazy dog"))); + assertHighlight(searchResponse, 1, "field1", 0, 1, anyOf( + equalTo("The quick browse button is a fancy thing, right bro?"), + equalTo("The quick brown fox jumps over the lazy dog"))); + + assertAcked(prepareCreate("second_test_index").setSettings(builder.build()).addMapping("doc", + "field4", "type=text,term_vector=with_positions_offsets,analyzer=synonym", + "field3", "type=text,analyzer=synonym")); + // with synonyms + client().prepareIndex("second_test_index", "doc", "0").setSource( + "type", "type2", + "field4", "The quick brown fox jumps over the lazy dog", + "field3", "The quick brown fox jumps over the lazy dog").get(); + client().prepareIndex("second_test_index", "doc", "1").setSource( + "type", "type2", + "field4", "The quick browse button is a fancy thing, right bro?").get(); + client().prepareIndex("second_test_index", "doc", "2").setSource( + "type", "type2", + "field4", "a quick fast blue car").get(); + refresh(); + + source = searchSource().postFilter(termQuery("type", "type2")).query(matchPhrasePrefixQuery("field3", "fast bro")) + .highlighter(highlight().field("field3").order("score").preTags("").postTags("")); + + searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet(); + + assertHighlight(searchResponse, 0, "field3", 0, 1, + equalTo("The quick brown fox jumps over the lazy dog")); + + logger.info("--> highlighting and searching on field4"); + source = searchSource().postFilter(termQuery("type", "type2")).query(matchPhrasePrefixQuery("field4", "the fast bro")) + .highlighter(highlight().field("field4").order("score").preTags("").postTags("")); + searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet(); + + assertHighlight(searchResponse, 0, "field4", 0, 1, anyOf( + equalTo("The quick browse button is a fancy thing, right bro?"), + equalTo("The quick brown fox jumps over the lazy dog"))); + assertHighlight(searchResponse, 1, "field4", 0, 1, anyOf( + equalTo("The quick browse button is a fancy thing, right bro?"), + equalTo("The quick brown fox jumps over the lazy dog"))); + + logger.info("--> highlighting and searching on field4"); + source = searchSource().postFilter(termQuery("type", "type2")) + .query(matchPhrasePrefixQuery("field4", "a fast quick blue ca")) + .highlighter(highlight().field("field4").order("score").preTags("").postTags("")); + searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet(); + + assertHighlight(searchResponse, 0, "field4", 0, 1, + anyOf(equalTo("a quick fast blue car"), + equalTo("a quick fast blue car"))); + } + + public static XContentBuilder type1TermVectorMapping() throws IOException { + return XContentFactory.jsonBuilder().startObject().startObject("type1") + .startObject("properties") + .startObject("field1").field("type", "text").field("term_vector", "with_positions_offsets").endObject() + .startObject("field2").field("type", "text").field("term_vector", "with_positions_offsets").endObject() + .endObject() + .endObject().endObject(); + } } diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml index 150fa39dcb9..4106237f2cc 100644 --- a/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/analysis-common/40_token_filters.yml @@ -223,6 +223,68 @@ - match: { tokens.0.token: Foo } - match: { tokens.1.token: Bar! } +--- +"synonym": + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + my_synonym: + type: synonym + synonyms: ["car,auto"] + + - do: + indices.analyze: + index: test + body: + text: what car magazine + tokenizer: whitespace + filter: [ my_synonym ] + - length: { tokens: 4 } + - match: { tokens.0.token: what } + - match: { tokens.0.position: 0 } + - match: { tokens.1.token: car } + - match: { tokens.1.position: 1 } + - match: { tokens.2.token: auto } + - match: { tokens.2.position: 1 } + - match: { tokens.3.token: magazine } + - match: { tokens.3.position: 2 } + +--- +"synonym_graph": + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + my_graph_synonym: + type: synonym_graph + synonyms: [ "guinea pig,cavy" ] + + - do: + indices.analyze: + index: test + body: + text: my guinea pig snores + tokenizer: whitespace + filter: [ my_graph_synonym ] + - length: { tokens: 5 } + - match: { tokens.0.token: my } + - match: { tokens.1.token: cavy } + - match: { tokens.1.position: 1 } + - match: { tokens.1.positionLength: 2 } + - match: { tokens.2.token: guinea } + - match: { tokens.2.position: 1 } + - match: { tokens.3.token: pig } + - match: { tokens.3.position: 2 } + - match: { tokens.4.token: snores } + - match: { tokens.4.position: 3 } + --- "synonym_graph and flatten_graph": - do: diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_synonyms.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_synonyms.yml index 774d30b0b04..840d836b13c 100644 --- a/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_synonyms.yml +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_synonyms.yml @@ -1,3 +1,4 @@ +--- "Synonym filter with char_filter": # Tests analyze with synonym and char_filter. This is in the analysis-common module # because there are no char filters in core. @@ -30,3 +31,49 @@ - match: { tokens.2.token: the } - match: { tokens.3.token: elasticsearch } - match: { tokens.4.token: man! } + +--- +"Non-standard position length": + - do: + indices.create: + index: test + body: + settings: + index: + analysis: + filter: + syns: + type: synonym + synonyms: [ "wtf,what the fudge" ] + analyzer: + custom_syns: + tokenizer: standard + filter: [ lowercase, syns ] + + - do: + indices.analyze: + index: test + body: + analyzer: custom_syns + text: "say what the fudge dude" + + - length: { tokens: 6 } + - match: { tokens.0.token: say } + - match: { tokens.0.position: 0 } + - match: { tokens.0.positionLength: null } + - match: { tokens.1.token: what } + - match: { tokens.1.position: 1 } + - match: { tokens.1.positionLength: null } + - match: { tokens.2.token: wtf } + - match: { tokens.2.position: 1 } + - match: { tokens.2.positionLength: 3 } + - match: { tokens.3.token: the } + - match: { tokens.3.position: 2 } + - match: { tokens.3.positionLength: null } + - match: { tokens.4.token: fudge } + - match: { tokens.4.position: 3 } + - match: { tokens.4.positionLength: null } + - match: { tokens.5.token: dude } + - match: { tokens.5.position: 4 } + - match: { tokens.5.positionLength: null } + diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/indices/validate_query/10_synonyms.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/indices/validate_query/10_synonyms.yml new file mode 100644 index 00000000000..a0ef4463f21 --- /dev/null +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/indices/validate_query/10_synonyms.yml @@ -0,0 +1,82 @@ +--- +"validate query with synonyms": + - do: + indices.create: + index: test + body: + settings: + index: + analysis: + filter: + syns: + type: synonym + synonyms: [ "one,two" ] + analyzer: + syns: + tokenizer: standard + filter: [ syns ] + mappings: + test: + properties: + field: + type: text + analyzer: syns + + - do: + indices.validate_query: + index: test + explain: true + body: + query: + match_phrase_prefix: + field: + query: foo + + - is_true: valid + - length: { explanations: 1 } + - match: { explanations.0.explanation: "/field:\"foo\\*\"/" } + + - do: + indices.validate_query: + index: test + explain: true + body: + query: + match_phrase_prefix: + field: + query: foo bar + + - is_true: valid + - length: { explanations: 1 } + - match: { explanations.0.explanation: "field:\"foo bar*\"" } + + - do: + indices.validate_query: + index: test + explain: true + body: + query: + match_phrase_prefix: + field: + query: one bar + + - is_true: valid + - length: { explanations: 1 } + - match: { explanations.0.explanation: "field:\"(one two) bar*\"" } + + - do: + indices.validate_query: + index: test + explain: true + body: + query: + match_phrase_prefix: + field: + query: foo one + + - is_true: valid + - length: { explanations: 1 } + - match: { explanations.0.explanation: "field:\"foo (one* two*)\"" } + + + diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/50_queries_with_synonyms.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/50_queries_with_synonyms.yml new file mode 100644 index 00000000000..c7a8122337e --- /dev/null +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/50_queries_with_synonyms.yml @@ -0,0 +1,307 @@ +--- +"Test common terms query with stacked tokens": + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + syns: + type: synonym + synonyms: [ "quick,fast" ] + analyzer: + syns: + tokenizer: standard + filter: [ "syns" ] + mappings: + test: + properties: + field1: + type: text + analyzer: syns + field2: + type: text + analyzer: syns + + - do: + index: + index: test + type: test + id: 3 + body: + field1: quick lazy huge brown pidgin + field2: the quick lazy huge brown fox jumps over the tree + + - do: + index: + index: test + type: test + id: 1 + body: + field1: the quick brown fox + + - do: + index: + index: test + type: test + id: 2 + body: + field1: the quick lazy huge brown fox jumps over the tree + refresh: true + + - do: + search: + body: + query: + common: + field1: + query: the fast brown + cutoff_frequency: 3 + low_freq_operator: or + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + search: + body: + query: + common: + field1: + query: the fast brown + cutoff_frequency: 3 + low_freq_operator: and + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + + - do: + search: + body: + query: + common: + field1: + query: the fast brown + cutoff_frequency: 3 + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + search: + body: + query: + common: + field1: + query: the fast huge fox + minimum_should_match: + low_freq: 3 + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "2" } + + - do: + search: + body: + query: + common: + field1: + query: the fast lazy fox brown + cutoff_frequency: 1 + minimum_should_match: + high_freq: 5 + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "1" } + + - do: + search: + body: + query: + common: + field1: + query: the fast lazy fox brown + cutoff_frequency: 1 + minimum_should_match: + high_freq: 6 + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "2" } + + - do: + search: + body: + query: + common: + field1: + query: the fast lazy fox brown + cutoff_frequency: 1 + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "2" } + + - do: + search: + body: + query: + common: + field1: + query: the quick brown + cutoff_frequency: 3 + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + search: + body: + query: + match: + field1: + query: the fast brown + cutoff_frequency: 3 + operator: and + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + + - do: + search: + body: + query: + match: + field1: + query: the fast brown + cutoff_frequency: 3 + operator: or + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + search: + body: + query: + match: + field1: + query: the fast brown + cutoff_frequency: 3 + minimum_should_match: 3 + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + + - do: + search: + body: + query: + multi_match: + query: the fast brown + fields: [ "field1", "field2" ] + cutoff_frequency: 3 + operator: and + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.2._id: "2" } + +--- +"Test match query with synonyms - see #3881 for extensive description of the issue": + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + synonym: + type: synonym + synonyms: [ "quick,fast" ] + analyzer: + index: + type: custom + tokenizer: standard + filter: lowercase + search: + type: custom + tokenizer: standard + filter: [ lowercase, synonym ] + mappings: + test: + properties: + text: + type: text + analyzer: index + search_analyzer: search + + - do: + index: + index: test + type: test + id: 1 + body: + text: quick brown fox + refresh: true + + - do: + search: + body: + query: + match: + text: + query: quick + operator: and + - match: { hits.total: 1 } + + - do: + search: + body: + query: + match: + text: + query: quick brown + operator: and + - match: { hits.total: 1 } + + - do: + search: + body: + query: + match: + text: + query: fast + operator: and + - match: { hits.total: 1 } + + - do: + index: + index: test + type: test + id: 2 + body: + text: fast brown fox + refresh: true + + - do: + search: + body: + query: + match: + text: + query: quick + operator: and + - match: { hits.total: 2 } + + - do: + search: + body: + query: + match: + text: + query: quick brown + operator: and + - match: { hits.total: 2 } + + diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/60_synonym_graph.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/60_synonym_graph.yml new file mode 100644 index 00000000000..5fd1fed6ffa --- /dev/null +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/search.query/60_synonym_graph.yml @@ -0,0 +1,205 @@ +setup: + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + syns: + type: synonym + synonyms: [ "wtf, what the fudge", "foo, bar baz" ] + graph_syns: + type: synonym_graph + synonyms: [ "wtf, what the fudge", "foo, bar baz" ] + analyzer: + lower_syns: + type: custom + tokenizer: standard + filter: [ lowercase, syns ] + lower_graph_syns: + type: custom + tokenizer: standard + filter: [ lowercase, graph_syns ] + mappings: + test: + properties: + field: + type: text + + - do: + index: + index: test + type: test + id: 1 + body: + text: say wtf happened foo + - do: + index: + index: test + type: test + id: 2 + body: + text: bar baz what the fudge man + + - do: + index: + index: test + type: test + id: 3 + body: + text: wtf + + - do: + index: + index: test + type: test + id: 4 + body: + text: what is the name for fudge + + - do: + index: + index: test + type: test + id: 5 + body: + text: bar two three + + - do: + index: + index: test + type: test + id: 6 + body: + text: bar baz two three + refresh: true + +--- +"simple multiterm phrase": + - do: + search: + body: + query: + match_phrase: + text: + query: foo two three + analyzer: lower_syns + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "5" } # incorrect match because we're not using graph synonyms + + - do: + search: + body: + query: + match_phrase: + text: + query: foo two three + analyzer: lower_graph_syns + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "6" } # correct match because we're using graph synonyms + +--- +"simple multiterm and": + - do: + search: + body: + query: + match: + text: + query: say what the fudge + analyzer: lower_syns + operator: and + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } # non-graph synonyms coincidentally give us the correct answer here + + - do: + search: + body: + query: + match: + text: + query: say what the fudge + analyzer: lower_graph_syns + operator: and + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } + +--- +"minimum should match": + - do: + search: + body: + query: + match: + text: + query: three what the fudge foo + operator: or + analyzer: lower_graph_syns + auto_generate_synonyms_phrase_query: false + - match: { hits.total: 6 } + + - do: + search: + body: + query: + match: + text: + query: three what the fudge foo + operator: or + analyzer: lower_graph_syns + minimum_should_match: 80% + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "6" } + - match: { hits.hits.2._id: "1" } + +--- +"multiterm synonyms phrase": + - do: + search: + body: + query: + match: + text: + query: wtf + operator: and + analyzer: lower_graph_syns + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.2._id: "1" } + +--- +"phrase prefix": + - do: + index: + index: test + type: test + id: 7 + body: + text: "WTFD!" + + - do: + index: + index: test + type: test + id: 8 + body: + text: "Weird Al's WHAT THE FUDGESICLE" + refresh: true + + - do: + search: + body: + query: + match_phrase_prefix: + text: + query: wtf + analyzer: lower_graph_syns + - match: { hits.total: 5 } + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.1._id: "7" } + - match: { hits.hits.2._id: "1" } + - match: { hits.hits.3._id: "8" } + - match: { hits.hits.4._id: "2" } diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/search.suggest/30_synonyms.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/search.suggest/30_synonyms.yml new file mode 100644 index 00000000000..85bc348fa41 --- /dev/null +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/search.suggest/30_synonyms.yml @@ -0,0 +1,44 @@ +--- +"suggestions with synonyms": + - do: + indices.create: + index: test + body: + settings: + analysis: + analyzer: + suggest_analyzer_synonyms: + type: custom + tokenizer: standard + filter: [ lowercase, my_synonyms ] + filter: + my_synonyms: + type: synonym + synonyms: [ "foo,renamed"] + mappings: + test: + properties: + field: + type: completion + analyzer: suggest_analyzer_synonyms + + - do: + index: + index: test + type: test + id: 1 + body: + field: + input: [ "Foo Fighters" ] + refresh: true + + - do: + search: + index: test + body: + suggest: + text: r + test: + completion: + field: field + - match: {suggest.test.0.options.0.text: Foo Fighters} diff --git a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java index 8a51b9a494b..06f4b728c8c 100644 --- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java +++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapperTests.java @@ -82,20 +82,15 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase { @Before public void setup() { Settings settings = Settings.builder() - .put("index.analysis.filter.mySynonyms.type", "synonym") - .putList("index.analysis.filter.mySynonyms.synonyms", Collections.singletonList("car, auto")) - .put("index.analysis.analyzer.synonym.tokenizer", "standard") - .put("index.analysis.analyzer.synonym.filter", "mySynonyms") - // Stop filter remains in server as it is part of lucene-core .put("index.analysis.analyzer.my_stop_analyzer.tokenizer", "standard") .put("index.analysis.analyzer.my_stop_analyzer.filter", "stop") .build(); indexService = createIndex("test", settings); parser = indexService.mapperService().documentMapperParser(); - } - - - + } + + + @Override protected Collection> getPlugins() { List> classpathPlugins = new ArrayList<>(); @@ -107,16 +102,16 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase { protected String getFieldType() { return "annotated_text"; - } - + } + public void testAnnotationInjection() throws IOException { - + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field").field("type", getFieldType()).endObject().endObject() .endObject().endObject()); DocumentMapper mapper = indexService.mapperService().merge("type", - new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE); + new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE); // Use example of typed and untyped annotations String annotatedText = "He paid [Stormy Daniels](Stephanie+Clifford&Payee) hush money"; @@ -140,12 +135,12 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase { try (Engine.Searcher searcher = shard.acquireSearcher("test")) { LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader(); TermsEnum terms = leaf.terms("field").iterator(); - + assertTrue(terms.seekExact(new BytesRef("stormy"))); PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS); assertEquals(0, postings.nextDoc()); - assertEquals(2, postings.nextPosition()); - + assertEquals(2, postings.nextPosition()); + assertTrue(terms.seekExact(new BytesRef("Stephanie Clifford"))); postings = terms.postings(null, PostingsEnum.POSITIONS); assertEquals(0, postings.nextDoc()); @@ -156,23 +151,23 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase { assertEquals(0, postings.nextDoc()); assertEquals(2, postings.nextPosition()); - + assertTrue(terms.seekExact(new BytesRef("hush"))); postings = terms.postings(null, PostingsEnum.POSITIONS); assertEquals(0, postings.nextDoc()); - assertEquals(4, postings.nextPosition()); - + assertEquals(4, postings.nextPosition()); + } - } - + } + public void testToleranceForBadAnnotationMarkup() throws IOException { - + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("field").field("type", getFieldType()).endObject().endObject() .endObject().endObject()); DocumentMapper mapper = indexService.mapperService().merge("type", - new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE); + new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE); String annotatedText = "foo [bar](MissingEndBracket baz"; SourceToParse sourceToParse = SourceToParse.source("test", "type", "1", BytesReference @@ -195,12 +190,12 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase { try (Engine.Searcher searcher = shard.acquireSearcher("test")) { LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader(); TermsEnum terms = leaf.terms("field").iterator(); - + assertTrue(terms.seekExact(new BytesRef("foo"))); PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS); assertEquals(0, postings.nextDoc()); - assertEquals(0, postings.nextPosition()); - + assertEquals(0, postings.nextPosition()); + assertTrue(terms.seekExact(new BytesRef("bar"))); postings = terms.postings(null, PostingsEnum.POSITIONS); assertEquals(0, postings.nextDoc()); @@ -209,18 +204,18 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase { assertFalse(terms.seekExact(new BytesRef("MissingEndBracket"))); // Bad markup means value is treated as plain text and fed through tokenisation assertTrue(terms.seekExact(new BytesRef("missingendbracket"))); - + } - } - + } + public void testAgainstTermVectorsAPI() throws IOException { String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties").startObject("tvfield").field("type", getFieldType()) .field("term_vector", "with_positions_offsets_payloads") .endObject().endObject() - .endObject().endObject()); - indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE); - + .endObject().endObject()); + indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE); + int max = between(3, 10); BulkRequestBuilder bulk = client().prepareBulk(); @@ -231,13 +226,13 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase { bulk.get(); TermVectorsRequest request = new TermVectorsRequest("test", "type", "0").termStatistics(true); - + IndicesService indicesService = getInstanceFromNode(IndicesService.class); IndexService test = indicesService.indexService(resolveIndex("test")); IndexShard shard = test.getShardOrNull(0); assertThat(shard, notNullValue()); - TermVectorsResponse response = TermVectorsService.getTermVectors(shard, request); - assertEquals(1, response.getFields().size()); + TermVectorsResponse response = TermVectorsService.getTermVectors(shard, request); + assertEquals(1, response.getFields().size()); Terms terms = response.getFields().terms("tvfield"); TermsEnum iterator = terms.iterator(); @@ -245,14 +240,14 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase { Set foundTerms = new HashSet<>(); while ((term = iterator.next()) != null) { foundTerms.add(term.utf8ToString()); - } + } //Check we have both text and annotation tokens assertTrue(foundTerms.contains("brown")); assertTrue(foundTerms.contains("Color")); assertTrue(foundTerms.contains("fox")); - - } - + + } + // ===== Code below copied from TextFieldMapperTests ======== public void testDefaults() throws IOException { @@ -616,7 +611,7 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase { assertThat(doc.rootDoc().getField("field6").fieldType().storeTermVectorPositions(), equalTo(true)); assertThat(doc.rootDoc().getField("field6").fieldType().storeTermVectorPayloads(), equalTo(true)); } - + public void testNullConfigValuesFail() throws MapperParsingException, IOException { String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject() .startObject("type") @@ -677,5 +672,5 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase { } - + } diff --git a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java index d7a283f3158..86b9c5a65c5 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java @@ -158,16 +158,8 @@ public final class AnalysisRegistry implements Closeable { public Map buildTokenFilterFactories(IndexSettings indexSettings) throws IOException { final Map tokenFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_FILTER); - Map> tokenFilters = new HashMap<>(this.tokenFilters); - /* - * synonym and synonym_graph are different than everything else since they need access to the tokenizer factories for the index. - * instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and - * hide internal data-structures as much as possible. - */ - tokenFilters.put("synonym", requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings))); - tokenFilters.put("synonym_graph", requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings))); - - return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.preConfiguredTokenFilters); + return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, + Collections.unmodifiableMap(this.tokenFilters), prebuiltAnalysis.preConfiguredTokenFilters); } public Map buildTokenizerFactories(IndexSettings indexSettings) throws IOException { @@ -222,18 +214,7 @@ public final class AnalysisRegistry implements Closeable { if (tokenFilterSettings.containsKey(tokenFilter)) { Settings currentSettings = tokenFilterSettings.get(tokenFilter); String typeName = currentSettings.get("type"); - /* - * synonym and synonym_graph are different than everything else since they need access to the tokenizer factories for the index. - * instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and - * hide internal data-structures as much as possible. - */ - if ("synonym".equals(typeName)) { - return requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)); - } else if ("synonym_graph".equals(typeName)) { - return requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings)); - } else { - return getAnalysisProvider(Component.FILTER, tokenFilters, tokenFilter, typeName); - } + return getAnalysisProvider(Component.FILTER, tokenFilters, tokenFilter, typeName); } else { return getTokenFilterProvider(tokenFilter); } @@ -257,19 +238,6 @@ public final class AnalysisRegistry implements Closeable { } } - private static AnalysisModule.AnalysisProvider requiresAnalysisSettings(AnalysisModule.AnalysisProvider provider) { - return new AnalysisModule.AnalysisProvider() { - @Override - public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException { - return provider.get(indexSettings, environment, name, settings); - } - @Override - public boolean requiresAnalysisSettings() { - return true; - } - }; - } - enum Component { ANALYZER { @Override diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index 1211488e466..f4856d51a2d 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.analysis.MockSynonymAnalyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.FieldType; @@ -55,6 +56,7 @@ import org.elasticsearch.index.mapper.MapperService.MergeReason; import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType; import org.elasticsearch.index.query.MatchPhraseQueryBuilder; import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.search.MatchQuery; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; @@ -82,10 +84,6 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase { @Before public void setup() { Settings settings = Settings.builder() - .put("index.analysis.filter.mySynonyms.type", "synonym") - .putList("index.analysis.filter.mySynonyms.synonyms", Collections.singletonList("car, auto")) - .put("index.analysis.analyzer.synonym.tokenizer", "standard") - .put("index.analysis.analyzer.synonym.filter", "mySynonyms") // Stop filter remains in server as it is part of lucene-core .put("index.analysis.analyzer.my_stop_analyzer.tokenizer", "standard") .put("index.analysis.analyzer.my_stop_analyzer.filter", "stop") @@ -734,7 +732,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase { .endObject() .startObject("synfield") .field("type", "text") - .field("analyzer", "synonym") + .field("analyzer", "standard") // will be replaced with MockSynonymAnalyzer .field("index_phrases", true) .endObject() .endObject() @@ -761,11 +759,13 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase { assertThat(q5, is(new PhraseQuery.Builder().add(new Term("field", "sparkle")).add(new Term("field", "stopword"), 2).build())); - Query q6 = new MatchPhraseQueryBuilder("synfield", "motor car").toQuery(queryShardContext); + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q6 = matchQuery.parse(MatchQuery.Type.PHRASE, "synfield", "motor dogs"); assertThat(q6, is(new MultiPhraseQuery.Builder() .add(new Term[]{ - new Term("synfield._index_phrase", "motor car"), - new Term("synfield._index_phrase", "motor auto")}) + new Term("synfield._index_phrase", "motor dogs"), + new Term("synfield._index_phrase", "motor dog")}) .build())); ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference diff --git a/server/src/test/java/org/elasticsearch/index/search/MatchQueryIT.java b/server/src/test/java/org/elasticsearch/index/search/MatchQueryIT.java deleted file mode 100644 index aa154d93925..00000000000 --- a/server/src/test/java/org/elasticsearch/index/search/MatchQueryIT.java +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.search; - -import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; -import org.elasticsearch.action.index.IndexRequestBuilder; -import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentFactory; -import org.elasticsearch.index.query.Operator; -import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.test.ESIntegTestCase; -import org.junit.Before; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.ExecutionException; - -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits; - -public class MatchQueryIT extends ESIntegTestCase { - private static final String INDEX = "test"; - - /** - * Test setup. - */ - @Before - public void setUp() throws Exception { - super.setUp(); - CreateIndexRequestBuilder builder = prepareCreate(INDEX).setSettings( - Settings.builder() - .put(indexSettings()) - .put("index.analysis.filter.syns.type", "synonym") - .putList("index.analysis.filter.syns.synonyms", "wtf, what the fudge", "foo, bar baz") - .put("index.analysis.analyzer.lower_syns.type", "custom") - .put("index.analysis.analyzer.lower_syns.tokenizer", "standard") - .putList("index.analysis.analyzer.lower_syns.filter", "lowercase", "syns") - .put("index.analysis.filter.graphsyns.type", "synonym_graph") - .putList("index.analysis.filter.graphsyns.synonyms", "wtf, what the fudge", "foo, bar baz") - .put("index.analysis.analyzer.lower_graphsyns.type", "custom") - .put("index.analysis.analyzer.lower_graphsyns.tokenizer", "standard") - .putList("index.analysis.analyzer.lower_graphsyns.filter", "lowercase", "graphsyns") - ); - - assertAcked(builder.addMapping(INDEX, createMapping())); - ensureGreen(); - } - - private List getDocs() { - List builders = new ArrayList<>(); - builders.add(client().prepareIndex("test", "test", "1").setSource("field", "say wtf happened foo")); - builders.add(client().prepareIndex("test", "test", "2").setSource("field", "bar baz what the fudge man")); - builders.add(client().prepareIndex("test", "test", "3").setSource("field", "wtf")); - builders.add(client().prepareIndex("test", "test", "4").setSource("field", "what is the name for fudge")); - builders.add(client().prepareIndex("test", "test", "5").setSource("field", "bar two three")); - builders.add(client().prepareIndex("test", "test", "6").setSource("field", "bar baz two three")); - - return builders; - } - - /** - * Setup the index mappings for the test index. - * - * @return the json builder with the index mappings - * @throws IOException on error creating mapping json - */ - private XContentBuilder createMapping() throws IOException { - return XContentFactory.jsonBuilder() - .startObject() - .startObject(INDEX) - .startObject("properties") - .startObject("field") - .field("type", "text") - .endObject() - .endObject() - .endObject() - .endObject(); - } - - public void testSimpleMultiTermPhrase() throws ExecutionException, InterruptedException { - indexRandom(true, false, getDocs()); - - // first search using regular synonym field using phrase - SearchResponse searchResponse = client().prepareSearch(INDEX) - .setQuery(QueryBuilders.matchPhraseQuery("field", "foo two three").analyzer("lower_syns")).get(); - - // because foo -> "bar baz" where "foo" and "bar" at position 0, "baz" and "two" at position 1. - // "bar two three", "bar baz three", "foo two three", "foo baz three" - assertHitCount(searchResponse, 1L); - assertSearchHits(searchResponse, "5"); // we should not match this but we do - - // same query using graph should find correct result - searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchPhraseQuery("field", "foo two three") - .analyzer("lower_graphsyns")).get(); - - assertHitCount(searchResponse, 1L); - assertSearchHits(searchResponse, "6"); - } - - public void testSimpleMultiTermAnd() throws ExecutionException, InterruptedException { - indexRandom(true, false, getDocs()); - - // first search using regular synonym field using phrase - SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "say what the fudge") - .operator(Operator.AND).analyzer("lower_syns")).get(); - - // Old synonyms work fine in that case, but it is coincidental - assertHitCount(searchResponse, 1L); - assertSearchHits(searchResponse, "1"); - - // same query using graph should find correct result - searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "say what the fudge") - .operator(Operator.AND).analyzer("lower_graphsyns")).get(); - - assertHitCount(searchResponse, 1L); - assertSearchHits(searchResponse, "1"); - } - - public void testMinShouldMatch() throws ExecutionException, InterruptedException { - indexRandom(true, false, getDocs()); - - // no min should match - SearchResponse searchResponse = client().prepareSearch(INDEX) - .setQuery( - QueryBuilders.matchQuery("field", "three what the fudge foo") - .operator(Operator.OR).analyzer("lower_graphsyns").autoGenerateSynonymsPhraseQuery(false) - ) - .get(); - - assertHitCount(searchResponse, 6L); - assertSearchHits(searchResponse, "1", "2", "3", "4", "5", "6"); - - // same query, with min_should_match of 2 - searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "three what the fudge foo") - .operator(Operator.OR).analyzer("lower_graphsyns").minimumShouldMatch("80%")).get(); - - // three wtf foo = 2 terms, match #1 - // three wtf bar baz = 3 terms, match #6 - // three what the fudge foo = 4 terms, no match - // three what the fudge bar baz = 4 terms, match #2 - assertHitCount(searchResponse, 3L); - assertSearchHits(searchResponse, "1", "2", "6"); - } - - public void testMultiTermsSynonymsPhrase() throws ExecutionException, InterruptedException { - List builders = getDocs(); - indexRandom(true, false, builders); - SearchResponse searchResponse = client().prepareSearch(INDEX) - .setQuery( - QueryBuilders.matchQuery("field", "wtf") - .analyzer("lower_graphsyns") - .operator(Operator.AND)) - .get(); - assertHitCount(searchResponse, 3L); - assertSearchHits(searchResponse, "1", "2", "3"); - } - - public void testPhrasePrefix() throws ExecutionException, InterruptedException { - List builders = getDocs(); - builders.add(client().prepareIndex("test", "test", "7").setSource("field", "WTFD!")); - builders.add(client().prepareIndex("test", "test", "8").setSource("field", "Weird Al's WHAT THE FUDGESICLE")); - indexRandom(true, false, builders); - - SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchPhrasePrefixQuery("field", "wtf") - .analyzer("lower_graphsyns")).get(); - - assertHitCount(searchResponse, 5L); - assertSearchHits(searchResponse, "1", "2", "3", "7", "8"); - } - - public void testCommonTerms() throws ExecutionException, InterruptedException { - String route = "commonTermsTest"; - List builders = getDocs(); - for (IndexRequestBuilder indexRequet : builders) { - // route all docs to same shard for this test - indexRequet.setRouting(route); - } - indexRandom(true, false, builders); - - // do a search with no cutoff frequency to show which docs should match - SearchResponse searchResponse = client().prepareSearch(INDEX) - .setRouting(route) - .setQuery(QueryBuilders.matchQuery("field", "bar three happened") - .operator(Operator.OR)).get(); - - assertHitCount(searchResponse, 4L); - assertSearchHits(searchResponse, "1", "2", "5", "6"); - - // do same search with cutoff and see less documents match - // in this case, essentially everything but "happened" gets excluded - searchResponse = client().prepareSearch(INDEX) - .setRouting(route) - .setQuery(QueryBuilders.matchQuery("field", "bar three happened") - .operator(Operator.OR).cutoffFrequency(1f)).get(); - - assertHitCount(searchResponse, 1L); - assertSearchHits(searchResponse, "1"); - } -} diff --git a/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java b/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java index 184d54f43b8..1087bbbf9fd 100644 --- a/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java +++ b/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java @@ -73,11 +73,7 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase { @Before public void setup() throws IOException { - Settings settings = Settings.builder() - .put("index.analysis.filter.syns.type","synonym") - .putList("index.analysis.filter.syns.synonyms","quick,fast") - .put("index.analysis.analyzer.syns.tokenizer","standard") - .put("index.analysis.analyzer.syns.filter","syns").build(); + Settings settings = Settings.builder().build(); IndexService indexService = createIndex("test", settings); MapperService mapperService = indexService.mapperService(); String mapping = "{\n" + @@ -87,11 +83,11 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase { " \"properties\":{\n" + " \"first\": {\n" + " \"type\":\"text\",\n" + - " \"analyzer\":\"syns\"\n" + + " \"analyzer\":\"standard\"\n" + " }," + " \"last\": {\n" + " \"type\":\"text\",\n" + - " \"analyzer\":\"syns\"\n" + + " \"analyzer\":\"standard\"\n" + " }" + " }" + " }\n" + @@ -221,25 +217,27 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase { QueryShardContext queryShardContext = indexService.newQueryShardContext( randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null); + MultiMatchQuery parser = new MultiMatchQuery(queryShardContext); + parser.setAnalyzer(new MockSynonymAnalyzer()); + Map fieldNames = new HashMap<>(); + fieldNames.put("name.first", 1.0f); + // check that synonym query is used for a single field - Query parsedQuery = - multiMatchQuery("quick").field("name.first") - .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext); + Query parsedQuery = parser.parse(MultiMatchQueryBuilder.Type.CROSS_FIELDS, fieldNames, "dogs", null); Term[] terms = new Term[2]; - terms[0] = new Term("name.first", "quick"); - terms[1] = new Term("name.first", "fast"); + terms[0] = new Term("name.first", "dog"); + terms[1] = new Term("name.first", "dogs"); Query expectedQuery = new SynonymQuery(terms); assertThat(parsedQuery, equalTo(expectedQuery)); // check that blended term query is used for multiple fields - parsedQuery = - multiMatchQuery("quick").field("name.first").field("name.last") - .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext); + fieldNames.put("name.last", 1.0f); + parsedQuery = parser.parse(MultiMatchQueryBuilder.Type.CROSS_FIELDS, fieldNames, "dogs", null); terms = new Term[4]; - terms[0] = new Term("name.first", "quick"); - terms[1] = new Term("name.first", "fast"); - terms[2] = new Term("name.last", "quick"); - terms[3] = new Term("name.last", "fast"); + terms[0] = new Term("name.first", "dog"); + terms[1] = new Term("name.first", "dogs"); + terms[2] = new Term("name.last", "dog"); + terms[3] = new Term("name.last", "dogs"); float[] boosts = new float[4]; Arrays.fill(boosts, 1.0f); expectedQuery = BlendedTermQuery.dismaxBlendedQuery(terms, boosts, 1.0f); diff --git a/server/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java b/server/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java index f814f4c227a..2094c20c890 100644 --- a/server/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java +++ b/server/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java @@ -121,54 +121,6 @@ public class AnalyzeActionIT extends ESIntegTestCase { assertThat(analyzeResponse.getTokens().get(0).getPositionLength(), equalTo(1)); } - public void testAnalyzeWithNonDefaultPostionLength() throws Exception { - assertAcked(prepareCreate("test").addAlias(new Alias("alias")) - .setSettings(Settings.builder().put(indexSettings()) - .put("index.analysis.filter.syns.type", "synonym") - .putList("index.analysis.filter.syns.synonyms", "wtf, what the fudge") - .put("index.analysis.analyzer.custom_syns.tokenizer", "standard") - .putList("index.analysis.analyzer.custom_syns.filter", "lowercase", "syns"))); - ensureGreen(); - - AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("say what the fudge").setIndex("test").setAnalyzer("custom_syns").get(); - assertThat(analyzeResponse.getTokens().size(), equalTo(5)); - - AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0); - assertThat(token.getTerm(), equalTo("say")); - assertThat(token.getPosition(), equalTo(0)); - assertThat(token.getStartOffset(), equalTo(0)); - assertThat(token.getEndOffset(), equalTo(3)); - assertThat(token.getPositionLength(), equalTo(1)); - - token = analyzeResponse.getTokens().get(1); - assertThat(token.getTerm(), equalTo("what")); - assertThat(token.getPosition(), equalTo(1)); - assertThat(token.getStartOffset(), equalTo(4)); - assertThat(token.getEndOffset(), equalTo(8)); - assertThat(token.getPositionLength(), equalTo(1)); - - token = analyzeResponse.getTokens().get(2); - assertThat(token.getTerm(), equalTo("wtf")); - assertThat(token.getPosition(), equalTo(1)); - assertThat(token.getStartOffset(), equalTo(4)); - assertThat(token.getEndOffset(), equalTo(18)); - assertThat(token.getPositionLength(), equalTo(3)); - - token = analyzeResponse.getTokens().get(3); - assertThat(token.getTerm(), equalTo("the")); - assertThat(token.getPosition(), equalTo(2)); - assertThat(token.getStartOffset(), equalTo(9)); - assertThat(token.getEndOffset(), equalTo(12)); - assertThat(token.getPositionLength(), equalTo(1)); - - token = analyzeResponse.getTokens().get(4); - assertThat(token.getTerm(), equalTo("fudge")); - assertThat(token.getPosition(), equalTo(3)); - assertThat(token.getStartOffset(), equalTo(13)); - assertThat(token.getEndOffset(), equalTo(18)); - assertThat(token.getPositionLength(), equalTo(1)); - } - public void testAnalyzerWithFieldOrTypeTests() throws Exception { assertAcked(prepareCreate("test").addAlias(new Alias("alias"))); ensureGreen(); diff --git a/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java index 069c72c10b4..5120abfbdc6 100644 --- a/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java +++ b/server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java @@ -44,7 +44,6 @@ import org.elasticsearch.index.query.AbstractQueryBuilder; import org.elasticsearch.index.query.IdsQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.MultiMatchQueryBuilder; -import org.elasticsearch.index.query.Operator; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; @@ -84,7 +83,6 @@ import static org.elasticsearch.index.query.QueryBuilders.commonTermsQuery; import static org.elasticsearch.index.query.QueryBuilders.constantScoreQuery; import static org.elasticsearch.index.query.QueryBuilders.existsQuery; import static org.elasticsearch.index.query.QueryBuilders.fuzzyQuery; -import static org.elasticsearch.index.query.QueryBuilders.matchPhrasePrefixQuery; import static org.elasticsearch.index.query.QueryBuilders.matchPhraseQuery; import static org.elasticsearch.index.query.QueryBuilders.matchQuery; import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery; @@ -1475,117 +1473,6 @@ public class HighlighterSearchIT extends ESIntegTestCase { assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); } - public void testPhrasePrefix() throws IOException { - Builder builder = Settings.builder() - .put(indexSettings()) - .put("index.analysis.analyzer.synonym.tokenizer", "standard") - .putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase") - .put("index.analysis.filter.synonym.type", "synonym") - .putList("index.analysis.filter.synonym.synonyms", "quick => fast"); - - assertAcked(prepareCreate("first_test_index").setSettings(builder.build()).addMapping("type1", type1TermVectorMapping())); - - ensureGreen(); - - client().prepareIndex("first_test_index", "type1", "0").setSource( - "field0", "The quick brown fox jumps over the lazy dog", - "field1", "The quick brown fox jumps over the lazy dog").get(); - client().prepareIndex("first_test_index", "type1", "1").setSource("field1", - "The quick browse button is a fancy thing, right bro?").get(); - refresh(); - logger.info("--> highlighting and searching on field0"); - - SearchSourceBuilder source = searchSource() - .query(matchPhrasePrefixQuery("field0", "bro")) - .highlighter(highlight().field("field0").order("score").preTags("").postTags("")); - SearchResponse searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet(); - - assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); - - source = searchSource() - .query(matchPhrasePrefixQuery("field0", "quick bro")) - .highlighter(highlight().field("field0").order("score").preTags("").postTags("")); - - searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field0", 0, 1, - equalTo("The quick brown fox jumps over the lazy dog")); - - logger.info("--> highlighting and searching on field1"); - source = searchSource() - .query(boolQuery() - .should(matchPhrasePrefixQuery("field1", "test")) - .should(matchPhrasePrefixQuery("field1", "bro")) - ) - .highlighter(highlight().field("field1").order("score").preTags("").postTags("")); - - searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet(); - assertThat(searchResponse.getHits().totalHits, equalTo(2L)); - for (int i = 0; i < 2; i++) { - assertHighlight(searchResponse, i, "field1", 0, 1, anyOf( - equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog"))); - } - - source = searchSource() - .query(matchPhrasePrefixQuery("field1", "quick bro")) - .highlighter(highlight().field("field1").order("score").preTags("").postTags("")); - - searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet(); - - assertHighlight(searchResponse, 0, "field1", 0, 1, anyOf( - equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog"))); - assertHighlight(searchResponse, 1, "field1", 0, 1, anyOf( - equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog"))); - - assertAcked(prepareCreate("second_test_index").setSettings(builder.build()).addMapping("doc", - "field4", "type=text,term_vector=with_positions_offsets,analyzer=synonym", - "field3", "type=text,analyzer=synonym")); - // with synonyms - client().prepareIndex("second_test_index", "doc", "0").setSource( - "type", "type2", - "field4", "The quick brown fox jumps over the lazy dog", - "field3", "The quick brown fox jumps over the lazy dog").get(); - client().prepareIndex("second_test_index", "doc", "1").setSource( - "type", "type2", - "field4", "The quick browse button is a fancy thing, right bro?").get(); - client().prepareIndex("second_test_index", "doc", "2").setSource( - "type", "type2", - "field4", "a quick fast blue car").get(); - refresh(); - - source = searchSource().postFilter(termQuery("type", "type2")).query(matchPhrasePrefixQuery("field3", "fast bro")) - .highlighter(highlight().field("field3").order("score").preTags("").postTags("")); - - searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet(); - - assertHighlight(searchResponse, 0, "field3", 0, 1, - equalTo("The quick brown fox jumps over the lazy dog")); - - logger.info("--> highlighting and searching on field4"); - source = searchSource().postFilter(termQuery("type", "type2")).query(matchPhrasePrefixQuery("field4", "the fast bro")) - .highlighter(highlight().field("field4").order("score").preTags("").postTags("")); - searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet(); - - assertHighlight(searchResponse, 0, "field4", 0, 1, anyOf( - equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog"))); - assertHighlight(searchResponse, 1, "field4", 0, 1, anyOf( - equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog"))); - - logger.info("--> highlighting and searching on field4"); - source = searchSource().postFilter(termQuery("type", "type2")) - .query(matchPhrasePrefixQuery("field4", "a fast quick blue ca")) - .highlighter(highlight().field("field4").order("score").preTags("").postTags("")); - searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet(); - - assertHighlight(searchResponse, 0, "field4", 0, 1, - anyOf(equalTo("a quick fast blue car"), - equalTo("a quick fast blue car"))); - } - public void testPlainHighlightDifferentFragmenter() throws Exception { assertAcked(prepareCreate("test") .addMapping("type1", "tags", "type=text")); @@ -2919,46 +2806,7 @@ public class HighlighterSearchIT extends ESIntegTestCase { assertThat(field.getFragments()[0].string(), equalTo("brown")); } - public void testSynonyms() throws IOException { - Builder builder = Settings.builder() - .put(indexSettings()) - .put("index.analysis.analyzer.synonym.tokenizer", "standard") - .putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase") - .put("index.analysis.filter.synonym.type", "synonym") - .putList("index.analysis.filter.synonym.synonyms", "fast,quick"); - assertAcked(prepareCreate("test").setSettings(builder.build()) - .addMapping("type1", "field1", - "type=text,term_vector=with_positions_offsets,search_analyzer=synonym," + - "analyzer=standard,index_options=offsets")); - ensureGreen(); - - client().prepareIndex("test", "type1", "0").setSource( - "field1", "The quick brown fox jumps over the lazy dog").get(); - refresh(); - for (String highlighterType : ALL_TYPES) { - logger.info("--> highlighting (type=" + highlighterType + ") and searching on field1"); - SearchSourceBuilder source = searchSource() - .query(matchQuery("field1", "quick brown fox").operator(Operator.AND)) - .highlighter( - highlight() - .field("field1") - .order("score") - .preTags("") - .postTags("") - .highlighterType(highlighterType)); - SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field1", 0, 1, - equalTo("The quick brown fox jumps over the lazy dog")); - - source = searchSource() - .query(matchQuery("field1", "fast brown fox").operator(Operator.AND)) - .highlighter(highlight().field("field1").order("score").preTags("").postTags("")); - searchResponse = client().search(searchRequest("test").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field1", 0, 1, - equalTo("The quick brown fox jumps over the lazy dog")); - } - } public void testHighlightQueryRewriteDatesWithNow() throws Exception { assertAcked(client().admin().indices().prepareCreate("index-1").addMapping("type", "d", "type=date", diff --git a/server/src/test/java/org/elasticsearch/search/functionscore/QueryRescorerIT.java b/server/src/test/java/org/elasticsearch/search/functionscore/QueryRescorerIT.java index fe50aaf9b73..aa9d9c4b87e 100644 --- a/server/src/test/java/org/elasticsearch/search/functionscore/QueryRescorerIT.java +++ b/server/src/test/java/org/elasticsearch/search/functionscore/QueryRescorerIT.java @@ -156,13 +156,9 @@ public class QueryRescorerIT extends ESIntegTestCase { public void testMoreDocs() throws Exception { Builder builder = Settings.builder(); - builder.put("index.analysis.analyzer.synonym.tokenizer", "standard"); - builder.putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase"); - builder.put("index.analysis.filter.synonym.type", "synonym"); - builder.putList("index.analysis.filter.synonym.synonyms", "ave => ave, avenue", "street => str, street"); XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("properties") - .startObject("field1").field("type", "text").field("analyzer", "whitespace").field("search_analyzer", "synonym") + .startObject("field1").field("type", "text").field("analyzer", "whitespace") .endObject().endObject().endObject().endObject(); assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", mapping) @@ -234,13 +230,9 @@ public class QueryRescorerIT extends ESIntegTestCase { // Tests a rescore window smaller than number of hits: public void testSmallRescoreWindow() throws Exception { Builder builder = Settings.builder(); - builder.put("index.analysis.analyzer.synonym.tokenizer", "standard"); - builder.putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase"); - builder.put("index.analysis.filter.synonym.type", "synonym"); - builder.putList("index.analysis.filter.synonym.synonyms", "ave => ave, avenue", "street => str, street"); XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("properties") - .startObject("field1").field("type", "text").field("analyzer", "whitespace").field("search_analyzer", "synonym") + .startObject("field1").field("type", "text").field("analyzer", "whitespace") .endObject().endObject().endObject().endObject(); assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", mapping) @@ -306,13 +298,9 @@ public class QueryRescorerIT extends ESIntegTestCase { // Tests a rescorer that penalizes the scores: public void testRescorerMadeScoresWorse() throws Exception { Builder builder = Settings.builder(); - builder.put("index.analysis.analyzer.synonym.tokenizer", "standard"); - builder.putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase"); - builder.put("index.analysis.filter.synonym.type", "synonym"); - builder.putList("index.analysis.filter.synonym.synonyms", "ave => ave, avenue", "street => str, street"); XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("properties") - .startObject("field1").field("type", "text").field("analyzer", "whitespace").field("search_analyzer", "synonym") + .startObject("field1").field("type", "text").field("analyzer", "whitespace") .endObject().endObject().endObject().endObject(); assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", mapping) diff --git a/server/src/test/java/org/elasticsearch/search/query/QueryStringIT.java b/server/src/test/java/org/elasticsearch/search/query/QueryStringIT.java index f2d69fc377d..8a09e5a919a 100644 --- a/server/src/test/java/org/elasticsearch/search/query/QueryStringIT.java +++ b/server/src/test/java/org/elasticsearch/search/query/QueryStringIT.java @@ -20,16 +20,13 @@ package org.elasticsearch.search.query; import org.elasticsearch.ExceptionsHelper; -import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.query.Operator; -import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.QueryStringQueryBuilder; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.SearchHits; @@ -48,7 +45,6 @@ import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; -import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -252,92 +248,6 @@ public class QueryStringIT extends ESIntegTestCase { containsString("unit [D] not supported for date math [-2D]")); } - private void setupIndexWithGraph(String index) throws Exception { - CreateIndexRequestBuilder builder = prepareCreate(index).setSettings( - Settings.builder() - .put(indexSettings()) - .put("index.analysis.filter.graphsyns.type", "synonym_graph") - .putList("index.analysis.filter.graphsyns.synonyms", "wtf, what the fudge", "foo, bar baz") - .put("index.analysis.analyzer.lower_graphsyns.type", "custom") - .put("index.analysis.analyzer.lower_graphsyns.tokenizer", "standard") - .putList("index.analysis.analyzer.lower_graphsyns.filter", "lowercase", "graphsyns") - ); - - XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject(index).startObject("properties") - .startObject("field").field("type", "text").endObject().endObject().endObject().endObject(); - - assertAcked(builder.addMapping(index, mapping)); - ensureGreen(); - - List builders = new ArrayList<>(); - builders.add(client().prepareIndex(index, index, "1").setSource("field", "say wtf happened foo")); - builders.add(client().prepareIndex(index, index, "2").setSource("field", "bar baz what the fudge man")); - builders.add(client().prepareIndex(index, index, "3").setSource("field", "wtf")); - builders.add(client().prepareIndex(index, index, "4").setSource("field", "what is the name for fudge")); - builders.add(client().prepareIndex(index, index, "5").setSource("field", "bar two three")); - builders.add(client().prepareIndex(index, index, "6").setSource("field", "bar baz two three")); - - indexRandom(true, false, builders); - } - - public void testGraphQueries() throws Exception { - String index = "graph_test_index"; - setupIndexWithGraph(index); - - // phrase - SearchResponse searchResponse = client().prepareSearch(index).setQuery( - QueryBuilders.queryStringQuery("\"foo two three\"") - .defaultField("field") - .analyzer("lower_graphsyns")).get(); - - assertHitCount(searchResponse, 1L); - assertSearchHits(searchResponse, "6"); - - // and - searchResponse = client().prepareSearch(index).setQuery( - QueryBuilders.queryStringQuery("say what the fudge") - .defaultField("field") - .defaultOperator(Operator.AND) - .autoGenerateSynonymsPhraseQuery(false) - .analyzer("lower_graphsyns")).get(); - - assertHitCount(searchResponse, 1L); - assertSearchHits(searchResponse, "1"); - - // or - searchResponse = client().prepareSearch(index).setQuery( - QueryBuilders.queryStringQuery("three what the fudge foo") - .defaultField("field") - .defaultOperator(Operator.OR) - .autoGenerateSynonymsPhraseQuery(false) - .analyzer("lower_graphsyns")).get(); - - assertHitCount(searchResponse, 6L); - assertSearchHits(searchResponse, "1", "2", "3", "4", "5", "6"); - - // min should match - searchResponse = client().prepareSearch(index).setQuery( - QueryBuilders.queryStringQuery("three what the fudge foo") - .defaultField("field") - .defaultOperator(Operator.OR) - .autoGenerateSynonymsPhraseQuery(false) - .analyzer("lower_graphsyns") - .minimumShouldMatch("80%")).get(); - - assertHitCount(searchResponse, 3L); - assertSearchHits(searchResponse, "1", "2", "6"); - - // multi terms synonyms phrase - searchResponse = client().prepareSearch(index).setQuery( - QueryBuilders.queryStringQuery("what the fudge") - .defaultField("field") - .defaultOperator(Operator.AND) - .analyzer("lower_graphsyns")) - .get(); - assertHitCount(searchResponse, 3L); - assertSearchHits(searchResponse, "1", "2", "3"); - } - public void testLimitOnExpandedFields() throws Exception { XContentBuilder builder = jsonBuilder(); builder.startObject(); diff --git a/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java b/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java index a861cc76655..6068f890259 100644 --- a/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java +++ b/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java @@ -21,7 +21,6 @@ package org.elasticsearch.search.query; import org.apache.lucene.search.join.ScoreMode; import org.apache.lucene.util.English; -import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchPhaseExecutionException; import org.elasticsearch.action.search.SearchResponse; @@ -349,98 +348,6 @@ public class SearchQueryIT extends ESIntegTestCase { assertThirdHit(searchResponse, hasId("2")); } - public void testCommonTermsQueryStackedTokens() throws Exception { - assertAcked(prepareCreate("test") - .setSettings(Settings.builder() - .put(indexSettings()) - .put(SETTING_NUMBER_OF_SHARDS,1) - .put("index.analysis.filter.syns.type","synonym") - .putList("index.analysis.filter.syns.synonyms","quick,fast") - .put("index.analysis.analyzer.syns.tokenizer","standard") - .put("index.analysis.analyzer.syns.filter","syns") - ) - .addMapping("type1", "field1", "type=text,analyzer=syns", "field2", "type=text,analyzer=syns")); - - indexRandom(true, client().prepareIndex("test", "type1", "3").setSource("field1", "quick lazy huge brown pidgin", "field2", "the quick lazy huge brown fox jumps over the tree"), - client().prepareIndex("test", "type1", "1").setSource("field1", "the quick brown fox"), - client().prepareIndex("test", "type1", "2").setSource("field1", "the quick lazy huge brown fox jumps over the tree") ); - - SearchResponse searchResponse = client().prepareSearch().setQuery(commonTermsQuery("field1", "the fast brown").cutoffFrequency(3).lowFreqOperator(Operator.OR)).get(); - assertHitCount(searchResponse, 3L); - assertFirstHit(searchResponse, hasId("1")); - assertSecondHit(searchResponse, hasId("2")); - assertThirdHit(searchResponse, hasId("3")); - - searchResponse = client().prepareSearch().setQuery(commonTermsQuery("field1", "the fast brown").cutoffFrequency(3).lowFreqOperator(Operator.AND)).get(); - assertThat(searchResponse.getHits().getTotalHits(), equalTo(2L)); - assertFirstHit(searchResponse, hasId("1")); - assertSecondHit(searchResponse, hasId("2")); - - // Default - searchResponse = client().prepareSearch().setQuery(commonTermsQuery("field1", "the fast brown").cutoffFrequency(3)).get(); - assertHitCount(searchResponse, 3L); - assertFirstHit(searchResponse, hasId("1")); - assertSecondHit(searchResponse, hasId("2")); - assertThirdHit(searchResponse, hasId("3")); - - - searchResponse = client().prepareSearch().setQuery(commonTermsQuery("field1", "the fast huge fox").lowFreqMinimumShouldMatch("3")).get(); - assertHitCount(searchResponse, 1L); - assertFirstHit(searchResponse, hasId("2")); - - searchResponse = client().prepareSearch().setQuery(commonTermsQuery("field1", "the fast lazy fox brown").cutoffFrequency(1).highFreqMinimumShouldMatch("5")).get(); - assertHitCount(searchResponse, 2L); - assertFirstHit(searchResponse, hasId("2")); - assertSecondHit(searchResponse, hasId("1")); - - searchResponse = client().prepareSearch().setQuery(commonTermsQuery("field1", "the fast lazy fox brown").cutoffFrequency(1).highFreqMinimumShouldMatch("6")).get(); - assertHitCount(searchResponse, 1L); - assertFirstHit(searchResponse, hasId("2")); - - // Default - searchResponse = client().prepareSearch().setQuery(commonTermsQuery("field1", "the fast lazy fox brown").cutoffFrequency(1)).get(); - assertHitCount(searchResponse, 1L); - assertFirstHit(searchResponse, hasId("2")); - - searchResponse = client().prepareSearch().setQuery(commonTermsQuery("field1", "the quick brown").cutoffFrequency(3).analyzer("stop")).get(); - assertHitCount(searchResponse, 3L); - // stop drops "the" since its a stopword - assertFirstHit(searchResponse, hasId("1")); - assertSecondHit(searchResponse, hasId("3")); - assertThirdHit(searchResponse, hasId("2")); - - // try the same with match query - searchResponse = client().prepareSearch().setQuery(matchQuery("field1", "the fast brown").cutoffFrequency(3).operator(Operator.AND)).get(); - assertHitCount(searchResponse, 2L); - assertFirstHit(searchResponse, hasId("1")); - assertSecondHit(searchResponse, hasId("2")); - - searchResponse = client().prepareSearch().setQuery(matchQuery("field1", "the fast brown").cutoffFrequency(3).operator(Operator.OR)).get(); - assertHitCount(searchResponse, 3L); - assertFirstHit(searchResponse, hasId("1")); - assertSecondHit(searchResponse, hasId("2")); - assertThirdHit(searchResponse, hasId("3")); - - searchResponse = client().prepareSearch().setQuery(matchQuery("field1", "the fast brown").cutoffFrequency(3).operator(Operator.AND).analyzer("stop")).get(); - assertHitCount(searchResponse, 3L); - // stop drops "the" since its a stopword - assertFirstHit(searchResponse, hasId("1")); - assertSecondHit(searchResponse, hasId("3")); - assertThirdHit(searchResponse, hasId("2")); - - searchResponse = client().prepareSearch().setQuery(matchQuery("field1", "the fast brown").cutoffFrequency(3).minimumShouldMatch("3")).get(); - assertHitCount(searchResponse, 2L); - assertFirstHit(searchResponse, hasId("1")); - assertSecondHit(searchResponse, hasId("2")); - - // try the same with multi match query - searchResponse = client().prepareSearch().setQuery(multiMatchQuery("the fast brown", "field1", "field2").cutoffFrequency(3).operator(Operator.AND)).get(); - assertHitCount(searchResponse, 3L); - assertFirstHit(searchResponse, hasId("3")); - assertSecondHit(searchResponse, hasId("1")); - assertThirdHit(searchResponse, hasId("2")); - } - public void testQueryStringAnalyzedWildcard() throws Exception { createIndex("test"); @@ -1535,69 +1442,6 @@ public class SearchQueryIT extends ESIntegTestCase { assertHitCount(client().prepareSearch("test").setQuery(queryStringQuery("field\\*:/value[01]/")).get(), 1); } - // see #3881 - for extensive description of the issue - public void testMatchQueryWithSynonyms() throws IOException { - CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder() - .put(indexSettings()) - .put("index.analysis.analyzer.index.type", "custom") - .put("index.analysis.analyzer.index.tokenizer", "standard") - .put("index.analysis.analyzer.index.filter", "lowercase") - .put("index.analysis.analyzer.search.type", "custom") - .put("index.analysis.analyzer.search.tokenizer", "standard") - .putList("index.analysis.analyzer.search.filter", "lowercase", "synonym") - .put("index.analysis.filter.synonym.type", "synonym") - .putList("index.analysis.filter.synonym.synonyms", "fast, quick")); - assertAcked(builder.addMapping("test", "text", "type=text,analyzer=index,search_analyzer=search")); - - client().prepareIndex("test", "test", "1").setSource("text", "quick brown fox").get(); - refresh(); - SearchResponse searchResponse = client().prepareSearch("test").setQuery(matchQuery("text", "quick").operator(Operator.AND)).get(); - assertHitCount(searchResponse, 1); - searchResponse = client().prepareSearch("test").setQuery(matchQuery("text", "quick brown").operator(Operator.AND)).get(); - assertHitCount(searchResponse, 1); - searchResponse = client().prepareSearch("test").setQuery(matchQuery("text", "fast").operator(Operator.AND)).get(); - assertHitCount(searchResponse, 1); - - client().prepareIndex("test", "test", "2").setSource("text", "fast brown fox").get(); - refresh(); - searchResponse = client().prepareSearch("test").setQuery(matchQuery("text", "quick").operator(Operator.AND)).get(); - assertHitCount(searchResponse, 2); - searchResponse = client().prepareSearch("test").setQuery(matchQuery("text", "quick brown").operator(Operator.AND)).get(); - assertHitCount(searchResponse, 2); - } - - public void testQueryStringWithSynonyms() throws IOException { - CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder() - .put(indexSettings()) - .put("index.analysis.analyzer.index.type", "custom") - .put("index.analysis.analyzer.index.tokenizer", "standard") - .put("index.analysis.analyzer.index.filter", "lowercase") - .put("index.analysis.analyzer.search.type", "custom") - .put("index.analysis.analyzer.search.tokenizer", "standard") - .putList("index.analysis.analyzer.search.filter", "lowercase", "synonym") - .put("index.analysis.filter.synonym.type", "synonym") - .putList("index.analysis.filter.synonym.synonyms", "fast, quick")); - assertAcked(builder.addMapping("test", "text", "type=text,analyzer=index,search_analyzer=search")); - - client().prepareIndex("test", "test", "1").setSource("text", "quick brown fox").get(); - refresh(); - - SearchResponse searchResponse = client().prepareSearch("test").setQuery(queryStringQuery("quick").defaultField("text").defaultOperator(Operator.AND)).get(); - assertHitCount(searchResponse, 1); - searchResponse = client().prepareSearch("test").setQuery(queryStringQuery("quick brown").defaultField("text").defaultOperator(Operator.AND)).get(); - assertHitCount(searchResponse, 1); - searchResponse = client().prepareSearch().setQuery(queryStringQuery("fast").defaultField("text").defaultOperator(Operator.AND)).get(); - assertHitCount(searchResponse, 1); - - client().prepareIndex("test", "test", "2").setSource("text", "fast brown fox").get(); - refresh(); - - searchResponse = client().prepareSearch("test").setQuery(queryStringQuery("quick").defaultField("text").defaultOperator(Operator.AND)).get(); - assertHitCount(searchResponse, 2); - searchResponse = client().prepareSearch("test").setQuery(queryStringQuery("quick brown").defaultField("text").defaultOperator(Operator.AND)).get(); - assertHitCount(searchResponse, 2); - } - // see #3797 public void testMultiMatchLenientIssue3797() { createIndex("test"); diff --git a/server/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchIT.java b/server/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchIT.java index ca21cbc86ca..52893a3c032 100644 --- a/server/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchIT.java +++ b/server/src/test/java/org/elasticsearch/search/suggest/CompletionSuggestSearchIT.java @@ -522,28 +522,6 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase { assertSuggestions("b", "The Beatles"); } - public void testThatSynonymsWork() throws Exception { - Settings.Builder settingsBuilder = Settings.builder() - .put("analysis.analyzer.suggest_analyzer_synonyms.type", "custom") - .put("analysis.analyzer.suggest_analyzer_synonyms.tokenizer", "standard") - .putList("analysis.analyzer.suggest_analyzer_synonyms.filter", "lowercase", "my_synonyms") - .put("analysis.filter.my_synonyms.type", "synonym") - .putList("analysis.filter.my_synonyms.synonyms", "foo,renamed"); - completionMappingBuilder.searchAnalyzer("suggest_analyzer_synonyms").indexAnalyzer("suggest_analyzer_synonyms"); - createIndexAndMappingAndSettings(settingsBuilder.build(), completionMappingBuilder); - - client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder() - .startObject().startObject(FIELD) - .startArray("input").value("Foo Fighters").endArray() - .endObject().endObject() - ).get(); - - refresh(); - - // get suggestions for renamed - assertSuggestions("r", "Foo Fighters"); - } - public void testThatUpgradeToMultiFieldsWorks() throws Exception { final XContentBuilder mapping = jsonBuilder() .startObject() diff --git a/server/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java b/server/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java index 8e907fe8a1a..96ddfc44dba 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java +++ b/server/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java @@ -2118,17 +2118,14 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas .put("compress", randomBoolean()) .put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES))); - logger.info("--> create test index with synonyms search analyzer"); + logger.info("--> create test index with case-preserving search analyzer"); Settings.Builder indexSettings = Settings.builder() .put(indexSettings()) .put(SETTING_NUMBER_OF_REPLICAS, between(0, 1)) .put(INDEX_REFRESH_INTERVAL_SETTING.getKey(), "10s") .put("index.analysis.analyzer.my_analyzer.type", "custom") - .put("index.analysis.analyzer.my_analyzer.tokenizer", "standard") - .putList("index.analysis.analyzer.my_analyzer.filter", "lowercase", "my_synonym") - .put("index.analysis.filter.my_synonym.type", "synonym") - .put("index.analysis.filter.my_synonym.synonyms", "foo => bar"); + .put("index.analysis.analyzer.my_analyzer.tokenizer", "standard"); assertAcked(prepareCreate("test-idx", 2, indexSettings)); @@ -2137,12 +2134,13 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas final int numdocs = randomIntBetween(10, 100); IndexRequestBuilder[] builders = new IndexRequestBuilder[numdocs]; for (int i = 0; i < builders.length; i++) { - builders[i] = client().prepareIndex("test-idx", "type1", Integer.toString(i)).setSource("field1", "bar " + i); + builders[i] = client().prepareIndex("test-idx", "type1", Integer.toString(i)).setSource("field1", "Foo bar " + i); } indexRandom(true, builders); flushAndRefresh(); assertHitCount(client.prepareSearch("test-idx").setSize(0).setQuery(matchQuery("field1", "foo")).get(), numdocs); + assertHitCount(client.prepareSearch("test-idx").setSize(0).setQuery(matchQuery("field1", "Foo")).get(), 0); assertHitCount(client.prepareSearch("test-idx").setSize(0).setQuery(matchQuery("field1", "bar")).get(), numdocs); logger.info("--> snapshot it"); @@ -2195,9 +2193,8 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas // Make sure that number of shards didn't change assertThat(getSettingsResponse.getSetting("test-idx", SETTING_NUMBER_OF_SHARDS), equalTo("" + numberOfShards)); assertThat(getSettingsResponse.getSetting("test-idx", "index.analysis.analyzer.my_analyzer.type"), equalTo("standard")); - assertThat(getSettingsResponse.getSetting("test-idx", "index.analysis.filter.my_synonym.type"), nullValue()); - assertHitCount(client.prepareSearch("test-idx").setSize(0).setQuery(matchQuery("field1", "foo")).get(), 0); + assertHitCount(client.prepareSearch("test-idx").setSize(0).setQuery(matchQuery("field1", "Foo")).get(), numdocs); assertHitCount(client.prepareSearch("test-idx").setSize(0).setQuery(matchQuery("field1", "bar")).get(), numdocs); logger.info("--> delete the index and recreate it while deleting all index settings"); @@ -2217,7 +2214,7 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas // Make sure that number of shards didn't change assertThat(getSettingsResponse.getSetting("test-idx", SETTING_NUMBER_OF_SHARDS), equalTo("" + numberOfShards)); - assertHitCount(client.prepareSearch("test-idx").setSize(0).setQuery(matchQuery("field1", "foo")).get(), 0); + assertHitCount(client.prepareSearch("test-idx").setSize(0).setQuery(matchQuery("field1", "Foo")).get(), numdocs); assertHitCount(client.prepareSearch("test-idx").setSize(0).setQuery(matchQuery("field1", "bar")).get(), numdocs); } diff --git a/server/src/test/java/org/elasticsearch/validate/SimpleValidateQueryIT.java b/server/src/test/java/org/elasticsearch/validate/SimpleValidateQueryIT.java index cdbc2c702d8..9ef47af29cd 100644 --- a/server/src/test/java/org/elasticsearch/validate/SimpleValidateQueryIT.java +++ b/server/src/test/java/org/elasticsearch/validate/SimpleValidateQueryIT.java @@ -180,42 +180,6 @@ public class SimpleValidateQueryIT extends ESIntegTestCase { assertThat(validateQueryResponse.getQueryExplanation().get(0).getExplanation(), containsString("field:value1")); } - public void testExplainMatchPhrasePrefix() { - assertAcked(prepareCreate("test").setSettings( - Settings.builder().put(indexSettings()) - .put("index.analysis.filter.syns.type", "synonym") - .putList("index.analysis.filter.syns.synonyms", "one,two") - .put("index.analysis.analyzer.syns.tokenizer", "standard") - .putList("index.analysis.analyzer.syns.filter", "syns") - ).addMapping("test", "field","type=text,analyzer=syns")); - ensureGreen(); - - ValidateQueryResponse validateQueryResponse = client().admin().indices().prepareValidateQuery("test") - .setQuery(QueryBuilders.matchPhrasePrefixQuery("field", "foo")).setExplain(true).get(); - assertThat(validateQueryResponse.isValid(), equalTo(true)); - assertThat(validateQueryResponse.getQueryExplanation().size(), equalTo(1)); - assertThat(validateQueryResponse.getQueryExplanation().get(0).getExplanation(), containsString("field:\"foo*\"")); - - validateQueryResponse = client().admin().indices().prepareValidateQuery("test") - .setQuery(QueryBuilders.matchPhrasePrefixQuery("field", "foo bar")).setExplain(true).get(); - assertThat(validateQueryResponse.isValid(), equalTo(true)); - assertThat(validateQueryResponse.getQueryExplanation().size(), equalTo(1)); - assertThat(validateQueryResponse.getQueryExplanation().get(0).getExplanation(), containsString("field:\"foo bar*\"")); - - // Stacked tokens - validateQueryResponse = client().admin().indices().prepareValidateQuery("test") - .setQuery(QueryBuilders.matchPhrasePrefixQuery("field", "one bar")).setExplain(true).get(); - assertThat(validateQueryResponse.isValid(), equalTo(true)); - assertThat(validateQueryResponse.getQueryExplanation().size(), equalTo(1)); - assertThat(validateQueryResponse.getQueryExplanation().get(0).getExplanation(), containsString("field:\"(one two) bar*\"")); - - validateQueryResponse = client().admin().indices().prepareValidateQuery("test") - .setQuery(QueryBuilders.matchPhrasePrefixQuery("field", "foo one")).setExplain(true).get(); - assertThat(validateQueryResponse.isValid(), equalTo(true)); - assertThat(validateQueryResponse.getQueryExplanation().size(), equalTo(1)); - assertThat(validateQueryResponse.getQueryExplanation().get(0).getExplanation(), containsString("field:\"foo (one* two*)\"")); - } - public void testExplainWithRewriteValidateQuery() throws Exception { client().admin().indices().prepareCreate("test") .addMapping("type1", "field", "type=text,analyzer=whitespace") diff --git a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java index 2164fe32a39..70a42032ea4 100644 --- a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java @@ -31,8 +31,6 @@ import org.elasticsearch.index.analysis.PreConfiguredTokenizer; import org.elasticsearch.index.analysis.ShingleTokenFilterFactory; import org.elasticsearch.index.analysis.StandardTokenizerFactory; import org.elasticsearch.index.analysis.StopTokenFilterFactory; -import org.elasticsearch.index.analysis.SynonymGraphTokenFilterFactory; -import org.elasticsearch.index.analysis.SynonymTokenFilterFactory; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.test.ESTestCase; @@ -169,8 +167,8 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase { .put("stemmeroverride", MovedToAnalysisCommon.class) .put("stop", StopTokenFilterFactory.class) .put("swedishlightstem", MovedToAnalysisCommon.class) - .put("synonym", SynonymTokenFilterFactory.class) - .put("synonymgraph", SynonymGraphTokenFilterFactory.class) + .put("synonym", MovedToAnalysisCommon.class) + .put("synonymgraph", MovedToAnalysisCommon.class) .put("trim", MovedToAnalysisCommon.class) .put("truncate", MovedToAnalysisCommon.class) .put("turkishlowercase", MovedToAnalysisCommon.class)