Remove special-casing of Synonym filters in AnalysisRegistry (#34034)

The synonym filters no longer need access to the AnalysisRegistry in their
constructors, so we can remove the special-case code and move them to the
common analysis module.

This commit means that synonyms are no longer available for `server` integration tests,
so several of these are either rewritten or migrated to the common analysis module
as rest-spec-api tests
This commit is contained in:
Alan Woodward 2018-09-28 09:02:47 +01:00 committed by GitHub
parent 9129948f60
commit f243d75f59
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 1023 additions and 876 deletions

View File

@ -275,6 +275,8 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin, Scri
filters.put("sorani_normalization", SoraniNormalizationFilterFactory::new);
filters.put("stemmer_override", requiresAnalysisSettings(StemmerOverrideTokenFilterFactory::new));
filters.put("stemmer", StemmerTokenFilterFactory::new);
filters.put("synonym", requiresAnalysisSettings(SynonymTokenFilterFactory::new));
filters.put("synonym_graph", requiresAnalysisSettings(SynonymGraphTokenFilterFactory::new));
filters.put("trim", TrimTokenFilterFactory::new);
filters.put("truncate", requiresAnalysisSettings(TruncateTokenFilterFactory::new));
filters.put("unique", UniqueTokenFilterFactory::new);

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@ -26,16 +26,18 @@ import org.apache.lucene.analysis.synonym.SynonymMap;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import java.io.IOException;
import java.util.List;
import java.util.function.Function;
public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory {
public SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
String name, Settings settings) throws IOException {
super(indexSettings, env, analysisRegistry, name, settings);
SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env,
String name, Settings settings) {
super(indexSettings, env, name, settings);
}
@Override

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@ -26,8 +26,13 @@ import org.apache.lucene.analysis.synonym.SynonymMap;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.Analysis;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.List;
@ -35,14 +40,14 @@ import java.util.function.Function;
public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
protected final String format;
protected final boolean expand;
protected final boolean lenient;
private final String format;
private final boolean expand;
private final boolean lenient;
protected final Settings settings;
protected final Environment environment;
public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
String name, Settings settings) throws IOException {
SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env,
String name, Settings settings) {
super(indexSettings, name, settings);
this.settings = settings;
@ -83,15 +88,15 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
};
}
protected Analyzer buildSynonymAnalyzer(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters,
List<TokenFilterFactory> tokenFilters) {
Analyzer buildSynonymAnalyzer(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters,
List<TokenFilterFactory> tokenFilters) {
return new CustomAnalyzer("synonyms", tokenizer, charFilters.toArray(new CharFilterFactory[0]),
tokenFilters.stream()
.map(TokenFilterFactory::getSynonymFilter)
.toArray(TokenFilterFactory[]::new));
}
protected SynonymMap buildSynonyms(Analyzer analyzer, Reader rules) {
SynonymMap buildSynonyms(Analyzer analyzer, Reader rules) {
try {
SynonymMap.Builder parser;
if ("wordnet".equalsIgnoreCase(format)) {
@ -107,7 +112,7 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
}
}
protected Reader getRulesFromSettings(Environment env) {
Reader getRulesFromSettings(Environment env) {
Reader rulesReader;
if (settings.getAsList("synonyms", null) != null) {
List<String> rulesList = Analysis.getWordList(env, settings, "synonyms");

View File

@ -24,7 +24,6 @@ import org.apache.lucene.analysis.en.PorterStemFilterFactory;
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory;
import org.apache.lucene.analysis.reverse.ReverseStringFilterFactory;
import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory;
import org.elasticsearch.index.analysis.SynonymTokenFilterFactory;
import org.elasticsearch.indices.analysis.AnalysisFactoryTestCase;
import java.util.List;
@ -106,6 +105,7 @@ public class CommonAnalysisFactoryTests extends AnalysisFactoryTestCase {
filters.put("stemmeroverride", StemmerOverrideTokenFilterFactory.class);
filters.put("kstem", KStemTokenFilterFactory.class);
filters.put("synonym", SynonymTokenFilterFactory.class);
filters.put("synonymgraph", SynonymGraphTokenFilterFactory.class);
filters.put("dictionarycompoundword", DictionaryCompoundWordTokenFilterFactory.class);
filters.put("hyphenationcompoundword", HyphenationCompoundWordTokenFilterFactory.class);
filters.put("reversestring", ReverseTokenFilterFactory.class);

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopFilter;

View File

@ -17,7 +17,7 @@
* under the License.
*/
package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopFilter;

View File

@ -21,9 +21,12 @@ package org.elasticsearch.analysis.common;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.query.Operator;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.test.ESIntegTestCase;
@ -31,11 +34,18 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import static org.elasticsearch.client.Requests.searchRequest;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchPhrasePrefixQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchPhraseQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
import static org.elasticsearch.search.builder.SearchSourceBuilder.highlight;
import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHighlight;
import static org.hamcrest.Matchers.anyOf;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.startsWith;
@ -153,4 +163,165 @@ public class HighlighterWithAnalyzersTests extends ESIntegTestCase {
+ "<em>http://www.facebook.com</em> <em>http://elasticsearch.org</em> "
+ "<em>http://xing.com</em> <em>http://cnn.com</em> http://quora.com"));
}
public void testSynonyms() throws IOException {
Settings.Builder builder = Settings.builder()
.put(indexSettings())
.put("index.analysis.analyzer.synonym.tokenizer", "standard")
.putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase")
.put("index.analysis.filter.synonym.type", "synonym")
.putList("index.analysis.filter.synonym.synonyms", "fast,quick");
assertAcked(prepareCreate("test").setSettings(builder.build())
.addMapping("type1", "field1",
"type=text,term_vector=with_positions_offsets,search_analyzer=synonym," +
"analyzer=standard,index_options=offsets"));
ensureGreen();
client().prepareIndex("test", "type1", "0").setSource(
"field1", "The quick brown fox jumps over the lazy dog").get();
refresh();
for (String highlighterType : new String[] {"plain", "fvh", "unified"}) {
logger.info("--> highlighting (type=" + highlighterType + ") and searching on field1");
SearchSourceBuilder source = searchSource()
.query(matchQuery("field1", "quick brown fox").operator(Operator.AND))
.highlighter(
highlight()
.field("field1")
.order("score")
.preTags("<x>")
.postTags("</x>")
.highlighterType(highlighterType));
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field1", 0, 1,
equalTo("The <x>quick</x> <x>brown</x> <x>fox</x> jumps over the lazy dog"));
source = searchSource()
.query(matchQuery("field1", "fast brown fox").operator(Operator.AND))
.highlighter(highlight().field("field1").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field1", 0, 1,
equalTo("The <x>quick</x> <x>brown</x> <x>fox</x> jumps over the lazy dog"));
}
}
public void testPhrasePrefix() throws IOException {
Settings.Builder builder = Settings.builder()
.put(indexSettings())
.put("index.analysis.analyzer.synonym.tokenizer", "standard")
.putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase")
.put("index.analysis.filter.synonym.type", "synonym")
.putList("index.analysis.filter.synonym.synonyms", "quick => fast");
assertAcked(prepareCreate("first_test_index").setSettings(builder.build()).addMapping("type1", type1TermVectorMapping()));
ensureGreen();
client().prepareIndex("first_test_index", "type1", "0").setSource(
"field0", "The quick brown fox jumps over the lazy dog",
"field1", "The quick brown fox jumps over the lazy dog").get();
client().prepareIndex("first_test_index", "type1", "1").setSource("field1",
"The quick browse button is a fancy thing, right bro?").get();
refresh();
logger.info("--> highlighting and searching on field0");
SearchSourceBuilder source = searchSource()
.query(matchPhrasePrefixQuery("field0", "bro"))
.highlighter(highlight().field("field0").order("score").preTags("<x>").postTags("</x>"));
SearchResponse searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The quick <x>brown</x> fox jumps over the lazy dog"));
source = searchSource()
.query(matchPhrasePrefixQuery("field0", "quick bro"))
.highlighter(highlight().field("field0").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field0", 0, 1,
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
logger.info("--> highlighting and searching on field1");
source = searchSource()
.query(boolQuery()
.should(matchPhrasePrefixQuery("field1", "test"))
.should(matchPhrasePrefixQuery("field1", "bro"))
)
.highlighter(highlight().field("field1").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
assertThat(searchResponse.getHits().totalHits, equalTo(2L));
for (int i = 0; i < 2; i++) {
assertHighlight(searchResponse, i, "field1", 0, 1, anyOf(
equalTo("The quick <x>browse</x> button is a fancy thing, right <x>bro</x>?"),
equalTo("The quick <x>brown</x> fox jumps over the lazy dog")));
}
source = searchSource()
.query(matchPhrasePrefixQuery("field1", "quick bro"))
.highlighter(highlight().field("field1").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field1", 0, 1, anyOf(
equalTo("The <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
assertHighlight(searchResponse, 1, "field1", 0, 1, anyOf(
equalTo("The <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
assertAcked(prepareCreate("second_test_index").setSettings(builder.build()).addMapping("doc",
"field4", "type=text,term_vector=with_positions_offsets,analyzer=synonym",
"field3", "type=text,analyzer=synonym"));
// with synonyms
client().prepareIndex("second_test_index", "doc", "0").setSource(
"type", "type2",
"field4", "The quick brown fox jumps over the lazy dog",
"field3", "The quick brown fox jumps over the lazy dog").get();
client().prepareIndex("second_test_index", "doc", "1").setSource(
"type", "type2",
"field4", "The quick browse button is a fancy thing, right bro?").get();
client().prepareIndex("second_test_index", "doc", "2").setSource(
"type", "type2",
"field4", "a quick fast blue car").get();
refresh();
source = searchSource().postFilter(termQuery("type", "type2")).query(matchPhrasePrefixQuery("field3", "fast bro"))
.highlighter(highlight().field("field3").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field3", 0, 1,
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
logger.info("--> highlighting and searching on field4");
source = searchSource().postFilter(termQuery("type", "type2")).query(matchPhrasePrefixQuery("field4", "the fast bro"))
.highlighter(highlight().field("field4").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field4", 0, 1, anyOf(
equalTo("<x>The</x> <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("<x>The</x> <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
assertHighlight(searchResponse, 1, "field4", 0, 1, anyOf(
equalTo("<x>The</x> <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("<x>The</x> <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
logger.info("--> highlighting and searching on field4");
source = searchSource().postFilter(termQuery("type", "type2"))
.query(matchPhrasePrefixQuery("field4", "a fast quick blue ca"))
.highlighter(highlight().field("field4").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field4", 0, 1,
anyOf(equalTo("<x>a quick fast blue car</x>"),
equalTo("<x>a</x> <x>quick</x> <x>fast</x> <x>blue</x> <x>car</x>")));
}
public static XContentBuilder type1TermVectorMapping() throws IOException {
return XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties")
.startObject("field1").field("type", "text").field("term_vector", "with_positions_offsets").endObject()
.startObject("field2").field("type", "text").field("term_vector", "with_positions_offsets").endObject()
.endObject()
.endObject().endObject();
}
}

View File

@ -223,6 +223,68 @@
- match: { tokens.0.token: Foo }
- match: { tokens.1.token: Bar! }
---
"synonym":
- do:
indices.create:
index: test
body:
settings:
analysis:
filter:
my_synonym:
type: synonym
synonyms: ["car,auto"]
- do:
indices.analyze:
index: test
body:
text: what car magazine
tokenizer: whitespace
filter: [ my_synonym ]
- length: { tokens: 4 }
- match: { tokens.0.token: what }
- match: { tokens.0.position: 0 }
- match: { tokens.1.token: car }
- match: { tokens.1.position: 1 }
- match: { tokens.2.token: auto }
- match: { tokens.2.position: 1 }
- match: { tokens.3.token: magazine }
- match: { tokens.3.position: 2 }
---
"synonym_graph":
- do:
indices.create:
index: test
body:
settings:
analysis:
filter:
my_graph_synonym:
type: synonym_graph
synonyms: [ "guinea pig,cavy" ]
- do:
indices.analyze:
index: test
body:
text: my guinea pig snores
tokenizer: whitespace
filter: [ my_graph_synonym ]
- length: { tokens: 5 }
- match: { tokens.0.token: my }
- match: { tokens.1.token: cavy }
- match: { tokens.1.position: 1 }
- match: { tokens.1.positionLength: 2 }
- match: { tokens.2.token: guinea }
- match: { tokens.2.position: 1 }
- match: { tokens.3.token: pig }
- match: { tokens.3.position: 2 }
- match: { tokens.4.token: snores }
- match: { tokens.4.position: 3 }
---
"synonym_graph and flatten_graph":
- do:

View File

@ -1,3 +1,4 @@
---
"Synonym filter with char_filter":
# Tests analyze with synonym and char_filter. This is in the analysis-common module
# because there are no char filters in core.
@ -30,3 +31,49 @@
- match: { tokens.2.token: the }
- match: { tokens.3.token: elasticsearch }
- match: { tokens.4.token: man! }
---
"Non-standard position length":
- do:
indices.create:
index: test
body:
settings:
index:
analysis:
filter:
syns:
type: synonym
synonyms: [ "wtf,what the fudge" ]
analyzer:
custom_syns:
tokenizer: standard
filter: [ lowercase, syns ]
- do:
indices.analyze:
index: test
body:
analyzer: custom_syns
text: "say what the fudge dude"
- length: { tokens: 6 }
- match: { tokens.0.token: say }
- match: { tokens.0.position: 0 }
- match: { tokens.0.positionLength: null }
- match: { tokens.1.token: what }
- match: { tokens.1.position: 1 }
- match: { tokens.1.positionLength: null }
- match: { tokens.2.token: wtf }
- match: { tokens.2.position: 1 }
- match: { tokens.2.positionLength: 3 }
- match: { tokens.3.token: the }
- match: { tokens.3.position: 2 }
- match: { tokens.3.positionLength: null }
- match: { tokens.4.token: fudge }
- match: { tokens.4.position: 3 }
- match: { tokens.4.positionLength: null }
- match: { tokens.5.token: dude }
- match: { tokens.5.position: 4 }
- match: { tokens.5.positionLength: null }

View File

@ -0,0 +1,82 @@
---
"validate query with synonyms":
- do:
indices.create:
index: test
body:
settings:
index:
analysis:
filter:
syns:
type: synonym
synonyms: [ "one,two" ]
analyzer:
syns:
tokenizer: standard
filter: [ syns ]
mappings:
test:
properties:
field:
type: text
analyzer: syns
- do:
indices.validate_query:
index: test
explain: true
body:
query:
match_phrase_prefix:
field:
query: foo
- is_true: valid
- length: { explanations: 1 }
- match: { explanations.0.explanation: "/field:\"foo\\*\"/" }
- do:
indices.validate_query:
index: test
explain: true
body:
query:
match_phrase_prefix:
field:
query: foo bar
- is_true: valid
- length: { explanations: 1 }
- match: { explanations.0.explanation: "field:\"foo bar*\"" }
- do:
indices.validate_query:
index: test
explain: true
body:
query:
match_phrase_prefix:
field:
query: one bar
- is_true: valid
- length: { explanations: 1 }
- match: { explanations.0.explanation: "field:\"(one two) bar*\"" }
- do:
indices.validate_query:
index: test
explain: true
body:
query:
match_phrase_prefix:
field:
query: foo one
- is_true: valid
- length: { explanations: 1 }
- match: { explanations.0.explanation: "field:\"foo (one* two*)\"" }

View File

@ -0,0 +1,307 @@
---
"Test common terms query with stacked tokens":
- do:
indices.create:
index: test
body:
settings:
analysis:
filter:
syns:
type: synonym
synonyms: [ "quick,fast" ]
analyzer:
syns:
tokenizer: standard
filter: [ "syns" ]
mappings:
test:
properties:
field1:
type: text
analyzer: syns
field2:
type: text
analyzer: syns
- do:
index:
index: test
type: test
id: 3
body:
field1: quick lazy huge brown pidgin
field2: the quick lazy huge brown fox jumps over the tree
- do:
index:
index: test
type: test
id: 1
body:
field1: the quick brown fox
- do:
index:
index: test
type: test
id: 2
body:
field1: the quick lazy huge brown fox jumps over the tree
refresh: true
- do:
search:
body:
query:
common:
field1:
query: the fast brown
cutoff_frequency: 3
low_freq_operator: or
- match: { hits.total: 3 }
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.1._id: "2" }
- match: { hits.hits.2._id: "3" }
- do:
search:
body:
query:
common:
field1:
query: the fast brown
cutoff_frequency: 3
low_freq_operator: and
- match: { hits.total: 2 }
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.1._id: "2" }
- do:
search:
body:
query:
common:
field1:
query: the fast brown
cutoff_frequency: 3
- match: { hits.total: 3 }
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.1._id: "2" }
- match: { hits.hits.2._id: "3" }
- do:
search:
body:
query:
common:
field1:
query: the fast huge fox
minimum_should_match:
low_freq: 3
- match: { hits.total: 1 }
- match: { hits.hits.0._id: "2" }
- do:
search:
body:
query:
common:
field1:
query: the fast lazy fox brown
cutoff_frequency: 1
minimum_should_match:
high_freq: 5
- match: { hits.total: 2 }
- match: { hits.hits.0._id: "2" }
- match: { hits.hits.1._id: "1" }
- do:
search:
body:
query:
common:
field1:
query: the fast lazy fox brown
cutoff_frequency: 1
minimum_should_match:
high_freq: 6
- match: { hits.total: 1 }
- match: { hits.hits.0._id: "2" }
- do:
search:
body:
query:
common:
field1:
query: the fast lazy fox brown
cutoff_frequency: 1
- match: { hits.total: 1 }
- match: { hits.hits.0._id: "2" }
- do:
search:
body:
query:
common:
field1:
query: the quick brown
cutoff_frequency: 3
- match: { hits.total: 3 }
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.1._id: "2" }
- match: { hits.hits.2._id: "3" }
- do:
search:
body:
query:
match:
field1:
query: the fast brown
cutoff_frequency: 3
operator: and
- match: { hits.total: 2 }
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.1._id: "2" }
- do:
search:
body:
query:
match:
field1:
query: the fast brown
cutoff_frequency: 3
operator: or
- match: { hits.total: 3 }
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.1._id: "2" }
- match: { hits.hits.2._id: "3" }
- do:
search:
body:
query:
match:
field1:
query: the fast brown
cutoff_frequency: 3
minimum_should_match: 3
- match: { hits.total: 2 }
- match: { hits.hits.0._id: "1" }
- match: { hits.hits.1._id: "2" }
- do:
search:
body:
query:
multi_match:
query: the fast brown
fields: [ "field1", "field2" ]
cutoff_frequency: 3
operator: and
- match: { hits.total: 3 }
- match: { hits.hits.0._id: "3" }
- match: { hits.hits.1._id: "1" }
- match: { hits.hits.2._id: "2" }
---
"Test match query with synonyms - see #3881 for extensive description of the issue":
- do:
indices.create:
index: test
body:
settings:
analysis:
filter:
synonym:
type: synonym
synonyms: [ "quick,fast" ]
analyzer:
index:
type: custom
tokenizer: standard
filter: lowercase
search:
type: custom
tokenizer: standard
filter: [ lowercase, synonym ]
mappings:
test:
properties:
text:
type: text
analyzer: index
search_analyzer: search
- do:
index:
index: test
type: test
id: 1
body:
text: quick brown fox
refresh: true
- do:
search:
body:
query:
match:
text:
query: quick
operator: and
- match: { hits.total: 1 }
- do:
search:
body:
query:
match:
text:
query: quick brown
operator: and
- match: { hits.total: 1 }
- do:
search:
body:
query:
match:
text:
query: fast
operator: and
- match: { hits.total: 1 }
- do:
index:
index: test
type: test
id: 2
body:
text: fast brown fox
refresh: true
- do:
search:
body:
query:
match:
text:
query: quick
operator: and
- match: { hits.total: 2 }
- do:
search:
body:
query:
match:
text:
query: quick brown
operator: and
- match: { hits.total: 2 }

View File

@ -0,0 +1,205 @@
setup:
- do:
indices.create:
index: test
body:
settings:
analysis:
filter:
syns:
type: synonym
synonyms: [ "wtf, what the fudge", "foo, bar baz" ]
graph_syns:
type: synonym_graph
synonyms: [ "wtf, what the fudge", "foo, bar baz" ]
analyzer:
lower_syns:
type: custom
tokenizer: standard
filter: [ lowercase, syns ]
lower_graph_syns:
type: custom
tokenizer: standard
filter: [ lowercase, graph_syns ]
mappings:
test:
properties:
field:
type: text
- do:
index:
index: test
type: test
id: 1
body:
text: say wtf happened foo
- do:
index:
index: test
type: test
id: 2
body:
text: bar baz what the fudge man
- do:
index:
index: test
type: test
id: 3
body:
text: wtf
- do:
index:
index: test
type: test
id: 4
body:
text: what is the name for fudge
- do:
index:
index: test
type: test
id: 5
body:
text: bar two three
- do:
index:
index: test
type: test
id: 6
body:
text: bar baz two three
refresh: true
---
"simple multiterm phrase":
- do:
search:
body:
query:
match_phrase:
text:
query: foo two three
analyzer: lower_syns
- match: { hits.total: 1 }
- match: { hits.hits.0._id: "5" } # incorrect match because we're not using graph synonyms
- do:
search:
body:
query:
match_phrase:
text:
query: foo two three
analyzer: lower_graph_syns
- match: { hits.total: 1 }
- match: { hits.hits.0._id: "6" } # correct match because we're using graph synonyms
---
"simple multiterm and":
- do:
search:
body:
query:
match:
text:
query: say what the fudge
analyzer: lower_syns
operator: and
- match: { hits.total: 1 }
- match: { hits.hits.0._id: "1" } # non-graph synonyms coincidentally give us the correct answer here
- do:
search:
body:
query:
match:
text:
query: say what the fudge
analyzer: lower_graph_syns
operator: and
- match: { hits.total: 1 }
- match: { hits.hits.0._id: "1" }
---
"minimum should match":
- do:
search:
body:
query:
match:
text:
query: three what the fudge foo
operator: or
analyzer: lower_graph_syns
auto_generate_synonyms_phrase_query: false
- match: { hits.total: 6 }
- do:
search:
body:
query:
match:
text:
query: three what the fudge foo
operator: or
analyzer: lower_graph_syns
minimum_should_match: 80%
- match: { hits.total: 3 }
- match: { hits.hits.0._id: "2" }
- match: { hits.hits.1._id: "6" }
- match: { hits.hits.2._id: "1" }
---
"multiterm synonyms phrase":
- do:
search:
body:
query:
match:
text:
query: wtf
operator: and
analyzer: lower_graph_syns
- match: { hits.total: 3 }
- match: { hits.hits.0._id: "2" }
- match: { hits.hits.1._id: "3" }
- match: { hits.hits.2._id: "1" }
---
"phrase prefix":
- do:
index:
index: test
type: test
id: 7
body:
text: "WTFD!"
- do:
index:
index: test
type: test
id: 8
body:
text: "Weird Al's WHAT THE FUDGESICLE"
refresh: true
- do:
search:
body:
query:
match_phrase_prefix:
text:
query: wtf
analyzer: lower_graph_syns
- match: { hits.total: 5 }
- match: { hits.hits.0._id: "3" }
- match: { hits.hits.1._id: "7" }
- match: { hits.hits.2._id: "1" }
- match: { hits.hits.3._id: "8" }
- match: { hits.hits.4._id: "2" }

View File

@ -0,0 +1,44 @@
---
"suggestions with synonyms":
- do:
indices.create:
index: test
body:
settings:
analysis:
analyzer:
suggest_analyzer_synonyms:
type: custom
tokenizer: standard
filter: [ lowercase, my_synonyms ]
filter:
my_synonyms:
type: synonym
synonyms: [ "foo,renamed"]
mappings:
test:
properties:
field:
type: completion
analyzer: suggest_analyzer_synonyms
- do:
index:
index: test
type: test
id: 1
body:
field:
input: [ "Foo Fighters" ]
refresh: true
- do:
search:
index: test
body:
suggest:
text: r
test:
completion:
field: field
- match: {suggest.test.0.options.0.text: Foo Fighters}

View File

@ -82,20 +82,15 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase {
@Before
public void setup() {
Settings settings = Settings.builder()
.put("index.analysis.filter.mySynonyms.type", "synonym")
.putList("index.analysis.filter.mySynonyms.synonyms", Collections.singletonList("car, auto"))
.put("index.analysis.analyzer.synonym.tokenizer", "standard")
.put("index.analysis.analyzer.synonym.filter", "mySynonyms")
// Stop filter remains in server as it is part of lucene-core
.put("index.analysis.analyzer.my_stop_analyzer.tokenizer", "standard")
.put("index.analysis.analyzer.my_stop_analyzer.filter", "stop")
.build();
indexService = createIndex("test", settings);
parser = indexService.mapperService().documentMapperParser();
}
}
@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
List<Class<? extends Plugin>> classpathPlugins = new ArrayList<>();
@ -107,16 +102,16 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase {
protected String getFieldType() {
return "annotated_text";
}
}
public void testAnnotationInjection() throws IOException {
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", getFieldType()).endObject().endObject()
.endObject().endObject());
DocumentMapper mapper = indexService.mapperService().merge("type",
new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);
new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);
// Use example of typed and untyped annotations
String annotatedText = "He paid [Stormy Daniels](Stephanie+Clifford&Payee) hush money";
@ -140,12 +135,12 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase {
try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader();
TermsEnum terms = leaf.terms("field").iterator();
assertTrue(terms.seekExact(new BytesRef("stormy")));
PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
assertEquals(0, postings.nextDoc());
assertEquals(2, postings.nextPosition());
assertEquals(2, postings.nextPosition());
assertTrue(terms.seekExact(new BytesRef("Stephanie Clifford")));
postings = terms.postings(null, PostingsEnum.POSITIONS);
assertEquals(0, postings.nextDoc());
@ -156,23 +151,23 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase {
assertEquals(0, postings.nextDoc());
assertEquals(2, postings.nextPosition());
assertTrue(terms.seekExact(new BytesRef("hush")));
postings = terms.postings(null, PostingsEnum.POSITIONS);
assertEquals(0, postings.nextDoc());
assertEquals(4, postings.nextPosition());
assertEquals(4, postings.nextPosition());
}
}
}
public void testToleranceForBadAnnotationMarkup() throws IOException {
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field").field("type", getFieldType()).endObject().endObject()
.endObject().endObject());
DocumentMapper mapper = indexService.mapperService().merge("type",
new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);
new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);
String annotatedText = "foo [bar](MissingEndBracket baz";
SourceToParse sourceToParse = SourceToParse.source("test", "type", "1", BytesReference
@ -195,12 +190,12 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase {
try (Engine.Searcher searcher = shard.acquireSearcher("test")) {
LeafReader leaf = searcher.getDirectoryReader().leaves().get(0).reader();
TermsEnum terms = leaf.terms("field").iterator();
assertTrue(terms.seekExact(new BytesRef("foo")));
PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
assertEquals(0, postings.nextDoc());
assertEquals(0, postings.nextPosition());
assertEquals(0, postings.nextPosition());
assertTrue(terms.seekExact(new BytesRef("bar")));
postings = terms.postings(null, PostingsEnum.POSITIONS);
assertEquals(0, postings.nextDoc());
@ -209,18 +204,18 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase {
assertFalse(terms.seekExact(new BytesRef("MissingEndBracket")));
// Bad markup means value is treated as plain text and fed through tokenisation
assertTrue(terms.seekExact(new BytesRef("missingendbracket")));
}
}
}
public void testAgainstTermVectorsAPI() throws IOException {
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("tvfield").field("type", getFieldType())
.field("term_vector", "with_positions_offsets_payloads")
.endObject().endObject()
.endObject().endObject());
indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);
.endObject().endObject());
indexService.mapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE);
int max = between(3, 10);
BulkRequestBuilder bulk = client().prepareBulk();
@ -231,13 +226,13 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase {
bulk.get();
TermVectorsRequest request = new TermVectorsRequest("test", "type", "0").termStatistics(true);
IndicesService indicesService = getInstanceFromNode(IndicesService.class);
IndexService test = indicesService.indexService(resolveIndex("test"));
IndexShard shard = test.getShardOrNull(0);
assertThat(shard, notNullValue());
TermVectorsResponse response = TermVectorsService.getTermVectors(shard, request);
assertEquals(1, response.getFields().size());
TermVectorsResponse response = TermVectorsService.getTermVectors(shard, request);
assertEquals(1, response.getFields().size());
Terms terms = response.getFields().terms("tvfield");
TermsEnum iterator = terms.iterator();
@ -245,14 +240,14 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase {
Set<String> foundTerms = new HashSet<>();
while ((term = iterator.next()) != null) {
foundTerms.add(term.utf8ToString());
}
}
//Check we have both text and annotation tokens
assertTrue(foundTerms.contains("brown"));
assertTrue(foundTerms.contains("Color"));
assertTrue(foundTerms.contains("fox"));
}
}
// ===== Code below copied from TextFieldMapperTests ========
public void testDefaults() throws IOException {
@ -616,7 +611,7 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase {
assertThat(doc.rootDoc().getField("field6").fieldType().storeTermVectorPositions(), equalTo(true));
assertThat(doc.rootDoc().getField("field6").fieldType().storeTermVectorPayloads(), equalTo(true));
}
public void testNullConfigValuesFail() throws MapperParsingException, IOException {
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject()
.startObject("type")
@ -677,5 +672,5 @@ public class AnnotatedTextFieldMapperTests extends ESSingleNodeTestCase {
}
}

View File

@ -158,16 +158,8 @@ public final class AnalysisRegistry implements Closeable {
public Map<String, TokenFilterFactory> buildTokenFilterFactories(IndexSettings indexSettings) throws IOException {
final Map<String, Settings> tokenFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_FILTER);
Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> tokenFilters = new HashMap<>(this.tokenFilters);
/*
* synonym and synonym_graph are different than everything else since they need access to the tokenizer factories for the index.
* instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
* hide internal data-structures as much as possible.
*/
tokenFilters.put("synonym", requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
tokenFilters.put("synonym_graph", requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings)));
return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.preConfiguredTokenFilters);
return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings,
Collections.unmodifiableMap(this.tokenFilters), prebuiltAnalysis.preConfiguredTokenFilters);
}
public Map<String, TokenizerFactory> buildTokenizerFactories(IndexSettings indexSettings) throws IOException {
@ -222,18 +214,7 @@ public final class AnalysisRegistry implements Closeable {
if (tokenFilterSettings.containsKey(tokenFilter)) {
Settings currentSettings = tokenFilterSettings.get(tokenFilter);
String typeName = currentSettings.get("type");
/*
* synonym and synonym_graph are different than everything else since they need access to the tokenizer factories for the index.
* instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
* hide internal data-structures as much as possible.
*/
if ("synonym".equals(typeName)) {
return requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings));
} else if ("synonym_graph".equals(typeName)) {
return requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings));
} else {
return getAnalysisProvider(Component.FILTER, tokenFilters, tokenFilter, typeName);
}
return getAnalysisProvider(Component.FILTER, tokenFilters, tokenFilter, typeName);
} else {
return getTokenFilterProvider(tokenFilter);
}
@ -257,19 +238,6 @@ public final class AnalysisRegistry implements Closeable {
}
}
private static <T> AnalysisModule.AnalysisProvider<T> requiresAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
return new AnalysisModule.AnalysisProvider<T>() {
@Override
public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {
return provider.get(indexSettings, environment, name, settings);
}
@Override
public boolean requiresAnalysisSettings() {
return true;
}
};
}
enum Component {
ANALYZER {
@Override

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.mapper;
import org.apache.lucene.analysis.MockSynonymAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.FieldType;
@ -55,6 +56,7 @@ import org.elasticsearch.index.mapper.MapperService.MergeReason;
import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.search.MatchQuery;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;
@ -82,10 +84,6 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
@Before
public void setup() {
Settings settings = Settings.builder()
.put("index.analysis.filter.mySynonyms.type", "synonym")
.putList("index.analysis.filter.mySynonyms.synonyms", Collections.singletonList("car, auto"))
.put("index.analysis.analyzer.synonym.tokenizer", "standard")
.put("index.analysis.analyzer.synonym.filter", "mySynonyms")
// Stop filter remains in server as it is part of lucene-core
.put("index.analysis.analyzer.my_stop_analyzer.tokenizer", "standard")
.put("index.analysis.analyzer.my_stop_analyzer.filter", "stop")
@ -734,7 +732,7 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
.endObject()
.startObject("synfield")
.field("type", "text")
.field("analyzer", "synonym")
.field("analyzer", "standard") // will be replaced with MockSynonymAnalyzer
.field("index_phrases", true)
.endObject()
.endObject()
@ -761,11 +759,13 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
assertThat(q5,
is(new PhraseQuery.Builder().add(new Term("field", "sparkle")).add(new Term("field", "stopword"), 2).build()));
Query q6 = new MatchPhraseQueryBuilder("synfield", "motor car").toQuery(queryShardContext);
MatchQuery matchQuery = new MatchQuery(queryShardContext);
matchQuery.setAnalyzer(new MockSynonymAnalyzer());
Query q6 = matchQuery.parse(MatchQuery.Type.PHRASE, "synfield", "motor dogs");
assertThat(q6, is(new MultiPhraseQuery.Builder()
.add(new Term[]{
new Term("synfield._index_phrase", "motor car"),
new Term("synfield._index_phrase", "motor auto")})
new Term("synfield._index_phrase", "motor dogs"),
new Term("synfield._index_phrase", "motor dog")})
.build()));
ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference

View File

@ -1,220 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.search;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.Operator;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.test.ESIntegTestCase;
import org.junit.Before;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutionException;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
public class MatchQueryIT extends ESIntegTestCase {
private static final String INDEX = "test";
/**
* Test setup.
*/
@Before
public void setUp() throws Exception {
super.setUp();
CreateIndexRequestBuilder builder = prepareCreate(INDEX).setSettings(
Settings.builder()
.put(indexSettings())
.put("index.analysis.filter.syns.type", "synonym")
.putList("index.analysis.filter.syns.synonyms", "wtf, what the fudge", "foo, bar baz")
.put("index.analysis.analyzer.lower_syns.type", "custom")
.put("index.analysis.analyzer.lower_syns.tokenizer", "standard")
.putList("index.analysis.analyzer.lower_syns.filter", "lowercase", "syns")
.put("index.analysis.filter.graphsyns.type", "synonym_graph")
.putList("index.analysis.filter.graphsyns.synonyms", "wtf, what the fudge", "foo, bar baz")
.put("index.analysis.analyzer.lower_graphsyns.type", "custom")
.put("index.analysis.analyzer.lower_graphsyns.tokenizer", "standard")
.putList("index.analysis.analyzer.lower_graphsyns.filter", "lowercase", "graphsyns")
);
assertAcked(builder.addMapping(INDEX, createMapping()));
ensureGreen();
}
private List<IndexRequestBuilder> getDocs() {
List<IndexRequestBuilder> builders = new ArrayList<>();
builders.add(client().prepareIndex("test", "test", "1").setSource("field", "say wtf happened foo"));
builders.add(client().prepareIndex("test", "test", "2").setSource("field", "bar baz what the fudge man"));
builders.add(client().prepareIndex("test", "test", "3").setSource("field", "wtf"));
builders.add(client().prepareIndex("test", "test", "4").setSource("field", "what is the name for fudge"));
builders.add(client().prepareIndex("test", "test", "5").setSource("field", "bar two three"));
builders.add(client().prepareIndex("test", "test", "6").setSource("field", "bar baz two three"));
return builders;
}
/**
* Setup the index mappings for the test index.
*
* @return the json builder with the index mappings
* @throws IOException on error creating mapping json
*/
private XContentBuilder createMapping() throws IOException {
return XContentFactory.jsonBuilder()
.startObject()
.startObject(INDEX)
.startObject("properties")
.startObject("field")
.field("type", "text")
.endObject()
.endObject()
.endObject()
.endObject();
}
public void testSimpleMultiTermPhrase() throws ExecutionException, InterruptedException {
indexRandom(true, false, getDocs());
// first search using regular synonym field using phrase
SearchResponse searchResponse = client().prepareSearch(INDEX)
.setQuery(QueryBuilders.matchPhraseQuery("field", "foo two three").analyzer("lower_syns")).get();
// because foo -> "bar baz" where "foo" and "bar" at position 0, "baz" and "two" at position 1.
// "bar two three", "bar baz three", "foo two three", "foo baz three"
assertHitCount(searchResponse, 1L);
assertSearchHits(searchResponse, "5"); // we should not match this but we do
// same query using graph should find correct result
searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchPhraseQuery("field", "foo two three")
.analyzer("lower_graphsyns")).get();
assertHitCount(searchResponse, 1L);
assertSearchHits(searchResponse, "6");
}
public void testSimpleMultiTermAnd() throws ExecutionException, InterruptedException {
indexRandom(true, false, getDocs());
// first search using regular synonym field using phrase
SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "say what the fudge")
.operator(Operator.AND).analyzer("lower_syns")).get();
// Old synonyms work fine in that case, but it is coincidental
assertHitCount(searchResponse, 1L);
assertSearchHits(searchResponse, "1");
// same query using graph should find correct result
searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "say what the fudge")
.operator(Operator.AND).analyzer("lower_graphsyns")).get();
assertHitCount(searchResponse, 1L);
assertSearchHits(searchResponse, "1");
}
public void testMinShouldMatch() throws ExecutionException, InterruptedException {
indexRandom(true, false, getDocs());
// no min should match
SearchResponse searchResponse = client().prepareSearch(INDEX)
.setQuery(
QueryBuilders.matchQuery("field", "three what the fudge foo")
.operator(Operator.OR).analyzer("lower_graphsyns").autoGenerateSynonymsPhraseQuery(false)
)
.get();
assertHitCount(searchResponse, 6L);
assertSearchHits(searchResponse, "1", "2", "3", "4", "5", "6");
// same query, with min_should_match of 2
searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "three what the fudge foo")
.operator(Operator.OR).analyzer("lower_graphsyns").minimumShouldMatch("80%")).get();
// three wtf foo = 2 terms, match #1
// three wtf bar baz = 3 terms, match #6
// three what the fudge foo = 4 terms, no match
// three what the fudge bar baz = 4 terms, match #2
assertHitCount(searchResponse, 3L);
assertSearchHits(searchResponse, "1", "2", "6");
}
public void testMultiTermsSynonymsPhrase() throws ExecutionException, InterruptedException {
List<IndexRequestBuilder> builders = getDocs();
indexRandom(true, false, builders);
SearchResponse searchResponse = client().prepareSearch(INDEX)
.setQuery(
QueryBuilders.matchQuery("field", "wtf")
.analyzer("lower_graphsyns")
.operator(Operator.AND))
.get();
assertHitCount(searchResponse, 3L);
assertSearchHits(searchResponse, "1", "2", "3");
}
public void testPhrasePrefix() throws ExecutionException, InterruptedException {
List<IndexRequestBuilder> builders = getDocs();
builders.add(client().prepareIndex("test", "test", "7").setSource("field", "WTFD!"));
builders.add(client().prepareIndex("test", "test", "8").setSource("field", "Weird Al's WHAT THE FUDGESICLE"));
indexRandom(true, false, builders);
SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchPhrasePrefixQuery("field", "wtf")
.analyzer("lower_graphsyns")).get();
assertHitCount(searchResponse, 5L);
assertSearchHits(searchResponse, "1", "2", "3", "7", "8");
}
public void testCommonTerms() throws ExecutionException, InterruptedException {
String route = "commonTermsTest";
List<IndexRequestBuilder> builders = getDocs();
for (IndexRequestBuilder indexRequet : builders) {
// route all docs to same shard for this test
indexRequet.setRouting(route);
}
indexRandom(true, false, builders);
// do a search with no cutoff frequency to show which docs should match
SearchResponse searchResponse = client().prepareSearch(INDEX)
.setRouting(route)
.setQuery(QueryBuilders.matchQuery("field", "bar three happened")
.operator(Operator.OR)).get();
assertHitCount(searchResponse, 4L);
assertSearchHits(searchResponse, "1", "2", "5", "6");
// do same search with cutoff and see less documents match
// in this case, essentially everything but "happened" gets excluded
searchResponse = client().prepareSearch(INDEX)
.setRouting(route)
.setQuery(QueryBuilders.matchQuery("field", "bar three happened")
.operator(Operator.OR).cutoffFrequency(1f)).get();
assertHitCount(searchResponse, 1L);
assertSearchHits(searchResponse, "1");
}
}

View File

@ -73,11 +73,7 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {
@Before
public void setup() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.filter.syns.type","synonym")
.putList("index.analysis.filter.syns.synonyms","quick,fast")
.put("index.analysis.analyzer.syns.tokenizer","standard")
.put("index.analysis.analyzer.syns.filter","syns").build();
Settings settings = Settings.builder().build();
IndexService indexService = createIndex("test", settings);
MapperService mapperService = indexService.mapperService();
String mapping = "{\n" +
@ -87,11 +83,11 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {
" \"properties\":{\n" +
" \"first\": {\n" +
" \"type\":\"text\",\n" +
" \"analyzer\":\"syns\"\n" +
" \"analyzer\":\"standard\"\n" +
" }," +
" \"last\": {\n" +
" \"type\":\"text\",\n" +
" \"analyzer\":\"syns\"\n" +
" \"analyzer\":\"standard\"\n" +
" }" +
" }" +
" }\n" +
@ -221,25 +217,27 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase {
QueryShardContext queryShardContext = indexService.newQueryShardContext(
randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null);
MultiMatchQuery parser = new MultiMatchQuery(queryShardContext);
parser.setAnalyzer(new MockSynonymAnalyzer());
Map<String, Float> fieldNames = new HashMap<>();
fieldNames.put("name.first", 1.0f);
// check that synonym query is used for a single field
Query parsedQuery =
multiMatchQuery("quick").field("name.first")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext);
Query parsedQuery = parser.parse(MultiMatchQueryBuilder.Type.CROSS_FIELDS, fieldNames, "dogs", null);
Term[] terms = new Term[2];
terms[0] = new Term("name.first", "quick");
terms[1] = new Term("name.first", "fast");
terms[0] = new Term("name.first", "dog");
terms[1] = new Term("name.first", "dogs");
Query expectedQuery = new SynonymQuery(terms);
assertThat(parsedQuery, equalTo(expectedQuery));
// check that blended term query is used for multiple fields
parsedQuery =
multiMatchQuery("quick").field("name.first").field("name.last")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext);
fieldNames.put("name.last", 1.0f);
parsedQuery = parser.parse(MultiMatchQueryBuilder.Type.CROSS_FIELDS, fieldNames, "dogs", null);
terms = new Term[4];
terms[0] = new Term("name.first", "quick");
terms[1] = new Term("name.first", "fast");
terms[2] = new Term("name.last", "quick");
terms[3] = new Term("name.last", "fast");
terms[0] = new Term("name.first", "dog");
terms[1] = new Term("name.first", "dogs");
terms[2] = new Term("name.last", "dog");
terms[3] = new Term("name.last", "dogs");
float[] boosts = new float[4];
Arrays.fill(boosts, 1.0f);
expectedQuery = BlendedTermQuery.dismaxBlendedQuery(terms, boosts, 1.0f);

View File

@ -121,54 +121,6 @@ public class AnalyzeActionIT extends ESIntegTestCase {
assertThat(analyzeResponse.getTokens().get(0).getPositionLength(), equalTo(1));
}
public void testAnalyzeWithNonDefaultPostionLength() throws Exception {
assertAcked(prepareCreate("test").addAlias(new Alias("alias"))
.setSettings(Settings.builder().put(indexSettings())
.put("index.analysis.filter.syns.type", "synonym")
.putList("index.analysis.filter.syns.synonyms", "wtf, what the fudge")
.put("index.analysis.analyzer.custom_syns.tokenizer", "standard")
.putList("index.analysis.analyzer.custom_syns.filter", "lowercase", "syns")));
ensureGreen();
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("say what the fudge").setIndex("test").setAnalyzer("custom_syns").get();
assertThat(analyzeResponse.getTokens().size(), equalTo(5));
AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0);
assertThat(token.getTerm(), equalTo("say"));
assertThat(token.getPosition(), equalTo(0));
assertThat(token.getStartOffset(), equalTo(0));
assertThat(token.getEndOffset(), equalTo(3));
assertThat(token.getPositionLength(), equalTo(1));
token = analyzeResponse.getTokens().get(1);
assertThat(token.getTerm(), equalTo("what"));
assertThat(token.getPosition(), equalTo(1));
assertThat(token.getStartOffset(), equalTo(4));
assertThat(token.getEndOffset(), equalTo(8));
assertThat(token.getPositionLength(), equalTo(1));
token = analyzeResponse.getTokens().get(2);
assertThat(token.getTerm(), equalTo("wtf"));
assertThat(token.getPosition(), equalTo(1));
assertThat(token.getStartOffset(), equalTo(4));
assertThat(token.getEndOffset(), equalTo(18));
assertThat(token.getPositionLength(), equalTo(3));
token = analyzeResponse.getTokens().get(3);
assertThat(token.getTerm(), equalTo("the"));
assertThat(token.getPosition(), equalTo(2));
assertThat(token.getStartOffset(), equalTo(9));
assertThat(token.getEndOffset(), equalTo(12));
assertThat(token.getPositionLength(), equalTo(1));
token = analyzeResponse.getTokens().get(4);
assertThat(token.getTerm(), equalTo("fudge"));
assertThat(token.getPosition(), equalTo(3));
assertThat(token.getStartOffset(), equalTo(13));
assertThat(token.getEndOffset(), equalTo(18));
assertThat(token.getPositionLength(), equalTo(1));
}
public void testAnalyzerWithFieldOrTypeTests() throws Exception {
assertAcked(prepareCreate("test").addAlias(new Alias("alias")));
ensureGreen();

View File

@ -44,7 +44,6 @@ import org.elasticsearch.index.query.AbstractQueryBuilder;
import org.elasticsearch.index.query.IdsQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.Operator;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder;
@ -84,7 +83,6 @@ import static org.elasticsearch.index.query.QueryBuilders.commonTermsQuery;
import static org.elasticsearch.index.query.QueryBuilders.constantScoreQuery;
import static org.elasticsearch.index.query.QueryBuilders.existsQuery;
import static org.elasticsearch.index.query.QueryBuilders.fuzzyQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchPhrasePrefixQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchPhraseQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery;
@ -1475,117 +1473,6 @@ public class HighlighterSearchIT extends ESIntegTestCase {
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
}
public void testPhrasePrefix() throws IOException {
Builder builder = Settings.builder()
.put(indexSettings())
.put("index.analysis.analyzer.synonym.tokenizer", "standard")
.putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase")
.put("index.analysis.filter.synonym.type", "synonym")
.putList("index.analysis.filter.synonym.synonyms", "quick => fast");
assertAcked(prepareCreate("first_test_index").setSettings(builder.build()).addMapping("type1", type1TermVectorMapping()));
ensureGreen();
client().prepareIndex("first_test_index", "type1", "0").setSource(
"field0", "The quick brown fox jumps over the lazy dog",
"field1", "The quick brown fox jumps over the lazy dog").get();
client().prepareIndex("first_test_index", "type1", "1").setSource("field1",
"The quick browse button is a fancy thing, right bro?").get();
refresh();
logger.info("--> highlighting and searching on field0");
SearchSourceBuilder source = searchSource()
.query(matchPhrasePrefixQuery("field0", "bro"))
.highlighter(highlight().field("field0").order("score").preTags("<x>").postTags("</x>"));
SearchResponse searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The quick <x>brown</x> fox jumps over the lazy dog"));
source = searchSource()
.query(matchPhrasePrefixQuery("field0", "quick bro"))
.highlighter(highlight().field("field0").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field0", 0, 1,
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
logger.info("--> highlighting and searching on field1");
source = searchSource()
.query(boolQuery()
.should(matchPhrasePrefixQuery("field1", "test"))
.should(matchPhrasePrefixQuery("field1", "bro"))
)
.highlighter(highlight().field("field1").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
assertThat(searchResponse.getHits().totalHits, equalTo(2L));
for (int i = 0; i < 2; i++) {
assertHighlight(searchResponse, i, "field1", 0, 1, anyOf(
equalTo("The quick <x>browse</x> button is a fancy thing, right <x>bro</x>?"),
equalTo("The quick <x>brown</x> fox jumps over the lazy dog")));
}
source = searchSource()
.query(matchPhrasePrefixQuery("field1", "quick bro"))
.highlighter(highlight().field("field1").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("first_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field1", 0, 1, anyOf(
equalTo("The <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
assertHighlight(searchResponse, 1, "field1", 0, 1, anyOf(
equalTo("The <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
assertAcked(prepareCreate("second_test_index").setSettings(builder.build()).addMapping("doc",
"field4", "type=text,term_vector=with_positions_offsets,analyzer=synonym",
"field3", "type=text,analyzer=synonym"));
// with synonyms
client().prepareIndex("second_test_index", "doc", "0").setSource(
"type", "type2",
"field4", "The quick brown fox jumps over the lazy dog",
"field3", "The quick brown fox jumps over the lazy dog").get();
client().prepareIndex("second_test_index", "doc", "1").setSource(
"type", "type2",
"field4", "The quick browse button is a fancy thing, right bro?").get();
client().prepareIndex("second_test_index", "doc", "2").setSource(
"type", "type2",
"field4", "a quick fast blue car").get();
refresh();
source = searchSource().postFilter(termQuery("type", "type2")).query(matchPhrasePrefixQuery("field3", "fast bro"))
.highlighter(highlight().field("field3").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field3", 0, 1,
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
logger.info("--> highlighting and searching on field4");
source = searchSource().postFilter(termQuery("type", "type2")).query(matchPhrasePrefixQuery("field4", "the fast bro"))
.highlighter(highlight().field("field4").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field4", 0, 1, anyOf(
equalTo("<x>The</x> <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("<x>The</x> <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
assertHighlight(searchResponse, 1, "field4", 0, 1, anyOf(
equalTo("<x>The</x> <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("<x>The</x> <x>quick</x> <x>brown</x> fox jumps over the lazy dog")));
logger.info("--> highlighting and searching on field4");
source = searchSource().postFilter(termQuery("type", "type2"))
.query(matchPhrasePrefixQuery("field4", "a fast quick blue ca"))
.highlighter(highlight().field("field4").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("second_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field4", 0, 1,
anyOf(equalTo("<x>a quick fast blue car</x>"),
equalTo("<x>a</x> <x>quick</x> <x>fast</x> <x>blue</x> <x>car</x>")));
}
public void testPlainHighlightDifferentFragmenter() throws Exception {
assertAcked(prepareCreate("test")
.addMapping("type1", "tags", "type=text"));
@ -2919,46 +2806,7 @@ public class HighlighterSearchIT extends ESIntegTestCase {
assertThat(field.getFragments()[0].string(), equalTo("<em>brown</em>"));
}
public void testSynonyms() throws IOException {
Builder builder = Settings.builder()
.put(indexSettings())
.put("index.analysis.analyzer.synonym.tokenizer", "standard")
.putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase")
.put("index.analysis.filter.synonym.type", "synonym")
.putList("index.analysis.filter.synonym.synonyms", "fast,quick");
assertAcked(prepareCreate("test").setSettings(builder.build())
.addMapping("type1", "field1",
"type=text,term_vector=with_positions_offsets,search_analyzer=synonym," +
"analyzer=standard,index_options=offsets"));
ensureGreen();
client().prepareIndex("test", "type1", "0").setSource(
"field1", "The quick brown fox jumps over the lazy dog").get();
refresh();
for (String highlighterType : ALL_TYPES) {
logger.info("--> highlighting (type=" + highlighterType + ") and searching on field1");
SearchSourceBuilder source = searchSource()
.query(matchQuery("field1", "quick brown fox").operator(Operator.AND))
.highlighter(
highlight()
.field("field1")
.order("score")
.preTags("<x>")
.postTags("</x>")
.highlighterType(highlighterType));
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field1", 0, 1,
equalTo("The <x>quick</x> <x>brown</x> <x>fox</x> jumps over the lazy dog"));
source = searchSource()
.query(matchQuery("field1", "fast brown fox").operator(Operator.AND))
.highlighter(highlight().field("field1").order("score").preTags("<x>").postTags("</x>"));
searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field1", 0, 1,
equalTo("The <x>quick</x> <x>brown</x> <x>fox</x> jumps over the lazy dog"));
}
}
public void testHighlightQueryRewriteDatesWithNow() throws Exception {
assertAcked(client().admin().indices().prepareCreate("index-1").addMapping("type", "d", "type=date",

View File

@ -156,13 +156,9 @@ public class QueryRescorerIT extends ESIntegTestCase {
public void testMoreDocs() throws Exception {
Builder builder = Settings.builder();
builder.put("index.analysis.analyzer.synonym.tokenizer", "standard");
builder.putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase");
builder.put("index.analysis.filter.synonym.type", "synonym");
builder.putList("index.analysis.filter.synonym.synonyms", "ave => ave, avenue", "street => str, street");
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("field1").field("type", "text").field("analyzer", "whitespace").field("search_analyzer", "synonym")
.startObject("field1").field("type", "text").field("analyzer", "whitespace")
.endObject().endObject().endObject().endObject();
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", mapping)
@ -234,13 +230,9 @@ public class QueryRescorerIT extends ESIntegTestCase {
// Tests a rescore window smaller than number of hits:
public void testSmallRescoreWindow() throws Exception {
Builder builder = Settings.builder();
builder.put("index.analysis.analyzer.synonym.tokenizer", "standard");
builder.putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase");
builder.put("index.analysis.filter.synonym.type", "synonym");
builder.putList("index.analysis.filter.synonym.synonyms", "ave => ave, avenue", "street => str, street");
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("field1").field("type", "text").field("analyzer", "whitespace").field("search_analyzer", "synonym")
.startObject("field1").field("type", "text").field("analyzer", "whitespace")
.endObject().endObject().endObject().endObject();
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", mapping)
@ -306,13 +298,9 @@ public class QueryRescorerIT extends ESIntegTestCase {
// Tests a rescorer that penalizes the scores:
public void testRescorerMadeScoresWorse() throws Exception {
Builder builder = Settings.builder();
builder.put("index.analysis.analyzer.synonym.tokenizer", "standard");
builder.putList("index.analysis.analyzer.synonym.filter", "synonym", "lowercase");
builder.put("index.analysis.filter.synonym.type", "synonym");
builder.putList("index.analysis.filter.synonym.synonyms", "ave => ave, avenue", "street => str, street");
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("field1").field("type", "text").field("analyzer", "whitespace").field("search_analyzer", "synonym")
.startObject("field1").field("type", "text").field("analyzer", "whitespace")
.endObject().endObject().endObject().endObject();
assertAcked(client().admin().indices().prepareCreate("test").addMapping("type1", mapping)

View File

@ -20,16 +20,13 @@
package org.elasticsearch.search.query;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.query.Operator;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.QueryStringQueryBuilder;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
@ -48,7 +45,6 @@ import static org.elasticsearch.test.StreamsUtils.copyToStringFromClasspath;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
@ -252,92 +248,6 @@ public class QueryStringIT extends ESIntegTestCase {
containsString("unit [D] not supported for date math [-2D]"));
}
private void setupIndexWithGraph(String index) throws Exception {
CreateIndexRequestBuilder builder = prepareCreate(index).setSettings(
Settings.builder()
.put(indexSettings())
.put("index.analysis.filter.graphsyns.type", "synonym_graph")
.putList("index.analysis.filter.graphsyns.synonyms", "wtf, what the fudge", "foo, bar baz")
.put("index.analysis.analyzer.lower_graphsyns.type", "custom")
.put("index.analysis.analyzer.lower_graphsyns.tokenizer", "standard")
.putList("index.analysis.analyzer.lower_graphsyns.filter", "lowercase", "graphsyns")
);
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject(index).startObject("properties")
.startObject("field").field("type", "text").endObject().endObject().endObject().endObject();
assertAcked(builder.addMapping(index, mapping));
ensureGreen();
List<IndexRequestBuilder> builders = new ArrayList<>();
builders.add(client().prepareIndex(index, index, "1").setSource("field", "say wtf happened foo"));
builders.add(client().prepareIndex(index, index, "2").setSource("field", "bar baz what the fudge man"));
builders.add(client().prepareIndex(index, index, "3").setSource("field", "wtf"));
builders.add(client().prepareIndex(index, index, "4").setSource("field", "what is the name for fudge"));
builders.add(client().prepareIndex(index, index, "5").setSource("field", "bar two three"));
builders.add(client().prepareIndex(index, index, "6").setSource("field", "bar baz two three"));
indexRandom(true, false, builders);
}
public void testGraphQueries() throws Exception {
String index = "graph_test_index";
setupIndexWithGraph(index);
// phrase
SearchResponse searchResponse = client().prepareSearch(index).setQuery(
QueryBuilders.queryStringQuery("\"foo two three\"")
.defaultField("field")
.analyzer("lower_graphsyns")).get();
assertHitCount(searchResponse, 1L);
assertSearchHits(searchResponse, "6");
// and
searchResponse = client().prepareSearch(index).setQuery(
QueryBuilders.queryStringQuery("say what the fudge")
.defaultField("field")
.defaultOperator(Operator.AND)
.autoGenerateSynonymsPhraseQuery(false)
.analyzer("lower_graphsyns")).get();
assertHitCount(searchResponse, 1L);
assertSearchHits(searchResponse, "1");
// or
searchResponse = client().prepareSearch(index).setQuery(
QueryBuilders.queryStringQuery("three what the fudge foo")
.defaultField("field")
.defaultOperator(Operator.OR)
.autoGenerateSynonymsPhraseQuery(false)
.analyzer("lower_graphsyns")).get();
assertHitCount(searchResponse, 6L);
assertSearchHits(searchResponse, "1", "2", "3", "4", "5", "6");
// min should match
searchResponse = client().prepareSearch(index).setQuery(
QueryBuilders.queryStringQuery("three what the fudge foo")
.defaultField("field")
.defaultOperator(Operator.OR)
.autoGenerateSynonymsPhraseQuery(false)
.analyzer("lower_graphsyns")
.minimumShouldMatch("80%")).get();
assertHitCount(searchResponse, 3L);
assertSearchHits(searchResponse, "1", "2", "6");
// multi terms synonyms phrase
searchResponse = client().prepareSearch(index).setQuery(
QueryBuilders.queryStringQuery("what the fudge")
.defaultField("field")
.defaultOperator(Operator.AND)
.analyzer("lower_graphsyns"))
.get();
assertHitCount(searchResponse, 3L);
assertSearchHits(searchResponse, "1", "2", "3");
}
public void testLimitOnExpandedFields() throws Exception {
XContentBuilder builder = jsonBuilder();
builder.startObject();

View File

@ -21,7 +21,6 @@ package org.elasticsearch.search.query;
import org.apache.lucene.search.join.ScoreMode;
import org.apache.lucene.util.English;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchResponse;
@ -349,98 +348,6 @@ public class SearchQueryIT extends ESIntegTestCase {
assertThirdHit(searchResponse, hasId("2"));
}
public void testCommonTermsQueryStackedTokens() throws Exception {
assertAcked(prepareCreate("test")
.setSettings(Settings.builder()
.put(indexSettings())
.put(SETTING_NUMBER_OF_SHARDS,1)
.put("index.analysis.filter.syns.type","synonym")
.putList("index.analysis.filter.syns.synonyms","quick,fast")
.put("index.analysis.analyzer.syns.tokenizer","standard")
.put("index.analysis.analyzer.syns.filter","syns")
)
.addMapping("type1", "field1", "type=text,analyzer=syns", "field2", "type=text,analyzer=syns"));
indexRandom(true, client().prepareIndex("test", "type1", "3").setSource("field1", "quick lazy huge brown pidgin", "field2", "the quick lazy huge brown fox jumps over the tree"),
client().prepareIndex("test", "type1", "1").setSource("field1", "the quick brown fox"),
client().prepareIndex("test", "type1", "2").setSource("field1", "the quick lazy huge brown fox jumps over the tree") );
SearchResponse searchResponse = client().prepareSearch().setQuery(commonTermsQuery("field1", "the fast brown").cutoffFrequency(3).lowFreqOperator(Operator.OR)).get();
assertHitCount(searchResponse, 3L);
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("2"));
assertThirdHit(searchResponse, hasId("3"));
searchResponse = client().prepareSearch().setQuery(commonTermsQuery("field1", "the fast brown").cutoffFrequency(3).lowFreqOperator(Operator.AND)).get();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(2L));
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("2"));
// Default
searchResponse = client().prepareSearch().setQuery(commonTermsQuery("field1", "the fast brown").cutoffFrequency(3)).get();
assertHitCount(searchResponse, 3L);
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("2"));
assertThirdHit(searchResponse, hasId("3"));
searchResponse = client().prepareSearch().setQuery(commonTermsQuery("field1", "the fast huge fox").lowFreqMinimumShouldMatch("3")).get();
assertHitCount(searchResponse, 1L);
assertFirstHit(searchResponse, hasId("2"));
searchResponse = client().prepareSearch().setQuery(commonTermsQuery("field1", "the fast lazy fox brown").cutoffFrequency(1).highFreqMinimumShouldMatch("5")).get();
assertHitCount(searchResponse, 2L);
assertFirstHit(searchResponse, hasId("2"));
assertSecondHit(searchResponse, hasId("1"));
searchResponse = client().prepareSearch().setQuery(commonTermsQuery("field1", "the fast lazy fox brown").cutoffFrequency(1).highFreqMinimumShouldMatch("6")).get();
assertHitCount(searchResponse, 1L);
assertFirstHit(searchResponse, hasId("2"));
// Default
searchResponse = client().prepareSearch().setQuery(commonTermsQuery("field1", "the fast lazy fox brown").cutoffFrequency(1)).get();
assertHitCount(searchResponse, 1L);
assertFirstHit(searchResponse, hasId("2"));
searchResponse = client().prepareSearch().setQuery(commonTermsQuery("field1", "the quick brown").cutoffFrequency(3).analyzer("stop")).get();
assertHitCount(searchResponse, 3L);
// stop drops "the" since its a stopword
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("3"));
assertThirdHit(searchResponse, hasId("2"));
// try the same with match query
searchResponse = client().prepareSearch().setQuery(matchQuery("field1", "the fast brown").cutoffFrequency(3).operator(Operator.AND)).get();
assertHitCount(searchResponse, 2L);
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("2"));
searchResponse = client().prepareSearch().setQuery(matchQuery("field1", "the fast brown").cutoffFrequency(3).operator(Operator.OR)).get();
assertHitCount(searchResponse, 3L);
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("2"));
assertThirdHit(searchResponse, hasId("3"));
searchResponse = client().prepareSearch().setQuery(matchQuery("field1", "the fast brown").cutoffFrequency(3).operator(Operator.AND).analyzer("stop")).get();
assertHitCount(searchResponse, 3L);
// stop drops "the" since its a stopword
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("3"));
assertThirdHit(searchResponse, hasId("2"));
searchResponse = client().prepareSearch().setQuery(matchQuery("field1", "the fast brown").cutoffFrequency(3).minimumShouldMatch("3")).get();
assertHitCount(searchResponse, 2L);
assertFirstHit(searchResponse, hasId("1"));
assertSecondHit(searchResponse, hasId("2"));
// try the same with multi match query
searchResponse = client().prepareSearch().setQuery(multiMatchQuery("the fast brown", "field1", "field2").cutoffFrequency(3).operator(Operator.AND)).get();
assertHitCount(searchResponse, 3L);
assertFirstHit(searchResponse, hasId("3"));
assertSecondHit(searchResponse, hasId("1"));
assertThirdHit(searchResponse, hasId("2"));
}
public void testQueryStringAnalyzedWildcard() throws Exception {
createIndex("test");
@ -1535,69 +1442,6 @@ public class SearchQueryIT extends ESIntegTestCase {
assertHitCount(client().prepareSearch("test").setQuery(queryStringQuery("field\\*:/value[01]/")).get(), 1);
}
// see #3881 - for extensive description of the issue
public void testMatchQueryWithSynonyms() throws IOException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
.put(indexSettings())
.put("index.analysis.analyzer.index.type", "custom")
.put("index.analysis.analyzer.index.tokenizer", "standard")
.put("index.analysis.analyzer.index.filter", "lowercase")
.put("index.analysis.analyzer.search.type", "custom")
.put("index.analysis.analyzer.search.tokenizer", "standard")
.putList("index.analysis.analyzer.search.filter", "lowercase", "synonym")
.put("index.analysis.filter.synonym.type", "synonym")
.putList("index.analysis.filter.synonym.synonyms", "fast, quick"));
assertAcked(builder.addMapping("test", "text", "type=text,analyzer=index,search_analyzer=search"));
client().prepareIndex("test", "test", "1").setSource("text", "quick brown fox").get();
refresh();
SearchResponse searchResponse = client().prepareSearch("test").setQuery(matchQuery("text", "quick").operator(Operator.AND)).get();
assertHitCount(searchResponse, 1);
searchResponse = client().prepareSearch("test").setQuery(matchQuery("text", "quick brown").operator(Operator.AND)).get();
assertHitCount(searchResponse, 1);
searchResponse = client().prepareSearch("test").setQuery(matchQuery("text", "fast").operator(Operator.AND)).get();
assertHitCount(searchResponse, 1);
client().prepareIndex("test", "test", "2").setSource("text", "fast brown fox").get();
refresh();
searchResponse = client().prepareSearch("test").setQuery(matchQuery("text", "quick").operator(Operator.AND)).get();
assertHitCount(searchResponse, 2);
searchResponse = client().prepareSearch("test").setQuery(matchQuery("text", "quick brown").operator(Operator.AND)).get();
assertHitCount(searchResponse, 2);
}
public void testQueryStringWithSynonyms() throws IOException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
.put(indexSettings())
.put("index.analysis.analyzer.index.type", "custom")
.put("index.analysis.analyzer.index.tokenizer", "standard")
.put("index.analysis.analyzer.index.filter", "lowercase")
.put("index.analysis.analyzer.search.type", "custom")
.put("index.analysis.analyzer.search.tokenizer", "standard")
.putList("index.analysis.analyzer.search.filter", "lowercase", "synonym")
.put("index.analysis.filter.synonym.type", "synonym")
.putList("index.analysis.filter.synonym.synonyms", "fast, quick"));
assertAcked(builder.addMapping("test", "text", "type=text,analyzer=index,search_analyzer=search"));
client().prepareIndex("test", "test", "1").setSource("text", "quick brown fox").get();
refresh();
SearchResponse searchResponse = client().prepareSearch("test").setQuery(queryStringQuery("quick").defaultField("text").defaultOperator(Operator.AND)).get();
assertHitCount(searchResponse, 1);
searchResponse = client().prepareSearch("test").setQuery(queryStringQuery("quick brown").defaultField("text").defaultOperator(Operator.AND)).get();
assertHitCount(searchResponse, 1);
searchResponse = client().prepareSearch().setQuery(queryStringQuery("fast").defaultField("text").defaultOperator(Operator.AND)).get();
assertHitCount(searchResponse, 1);
client().prepareIndex("test", "test", "2").setSource("text", "fast brown fox").get();
refresh();
searchResponse = client().prepareSearch("test").setQuery(queryStringQuery("quick").defaultField("text").defaultOperator(Operator.AND)).get();
assertHitCount(searchResponse, 2);
searchResponse = client().prepareSearch("test").setQuery(queryStringQuery("quick brown").defaultField("text").defaultOperator(Operator.AND)).get();
assertHitCount(searchResponse, 2);
}
// see #3797
public void testMultiMatchLenientIssue3797() {
createIndex("test");

View File

@ -522,28 +522,6 @@ public class CompletionSuggestSearchIT extends ESIntegTestCase {
assertSuggestions("b", "The Beatles");
}
public void testThatSynonymsWork() throws Exception {
Settings.Builder settingsBuilder = Settings.builder()
.put("analysis.analyzer.suggest_analyzer_synonyms.type", "custom")
.put("analysis.analyzer.suggest_analyzer_synonyms.tokenizer", "standard")
.putList("analysis.analyzer.suggest_analyzer_synonyms.filter", "lowercase", "my_synonyms")
.put("analysis.filter.my_synonyms.type", "synonym")
.putList("analysis.filter.my_synonyms.synonyms", "foo,renamed");
completionMappingBuilder.searchAnalyzer("suggest_analyzer_synonyms").indexAnalyzer("suggest_analyzer_synonyms");
createIndexAndMappingAndSettings(settingsBuilder.build(), completionMappingBuilder);
client().prepareIndex(INDEX, TYPE, "1").setSource(jsonBuilder()
.startObject().startObject(FIELD)
.startArray("input").value("Foo Fighters").endArray()
.endObject().endObject()
).get();
refresh();
// get suggestions for renamed
assertSuggestions("r", "Foo Fighters");
}
public void testThatUpgradeToMultiFieldsWorks() throws Exception {
final XContentBuilder mapping = jsonBuilder()
.startObject()

View File

@ -2118,17 +2118,14 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas
.put("compress", randomBoolean())
.put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES)));
logger.info("--> create test index with synonyms search analyzer");
logger.info("--> create test index with case-preserving search analyzer");
Settings.Builder indexSettings = Settings.builder()
.put(indexSettings())
.put(SETTING_NUMBER_OF_REPLICAS, between(0, 1))
.put(INDEX_REFRESH_INTERVAL_SETTING.getKey(), "10s")
.put("index.analysis.analyzer.my_analyzer.type", "custom")
.put("index.analysis.analyzer.my_analyzer.tokenizer", "standard")
.putList("index.analysis.analyzer.my_analyzer.filter", "lowercase", "my_synonym")
.put("index.analysis.filter.my_synonym.type", "synonym")
.put("index.analysis.filter.my_synonym.synonyms", "foo => bar");
.put("index.analysis.analyzer.my_analyzer.tokenizer", "standard");
assertAcked(prepareCreate("test-idx", 2, indexSettings));
@ -2137,12 +2134,13 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas
final int numdocs = randomIntBetween(10, 100);
IndexRequestBuilder[] builders = new IndexRequestBuilder[numdocs];
for (int i = 0; i < builders.length; i++) {
builders[i] = client().prepareIndex("test-idx", "type1", Integer.toString(i)).setSource("field1", "bar " + i);
builders[i] = client().prepareIndex("test-idx", "type1", Integer.toString(i)).setSource("field1", "Foo bar " + i);
}
indexRandom(true, builders);
flushAndRefresh();
assertHitCount(client.prepareSearch("test-idx").setSize(0).setQuery(matchQuery("field1", "foo")).get(), numdocs);
assertHitCount(client.prepareSearch("test-idx").setSize(0).setQuery(matchQuery("field1", "Foo")).get(), 0);
assertHitCount(client.prepareSearch("test-idx").setSize(0).setQuery(matchQuery("field1", "bar")).get(), numdocs);
logger.info("--> snapshot it");
@ -2195,9 +2193,8 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas
// Make sure that number of shards didn't change
assertThat(getSettingsResponse.getSetting("test-idx", SETTING_NUMBER_OF_SHARDS), equalTo("" + numberOfShards));
assertThat(getSettingsResponse.getSetting("test-idx", "index.analysis.analyzer.my_analyzer.type"), equalTo("standard"));
assertThat(getSettingsResponse.getSetting("test-idx", "index.analysis.filter.my_synonym.type"), nullValue());
assertHitCount(client.prepareSearch("test-idx").setSize(0).setQuery(matchQuery("field1", "foo")).get(), 0);
assertHitCount(client.prepareSearch("test-idx").setSize(0).setQuery(matchQuery("field1", "Foo")).get(), numdocs);
assertHitCount(client.prepareSearch("test-idx").setSize(0).setQuery(matchQuery("field1", "bar")).get(), numdocs);
logger.info("--> delete the index and recreate it while deleting all index settings");
@ -2217,7 +2214,7 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas
// Make sure that number of shards didn't change
assertThat(getSettingsResponse.getSetting("test-idx", SETTING_NUMBER_OF_SHARDS), equalTo("" + numberOfShards));
assertHitCount(client.prepareSearch("test-idx").setSize(0).setQuery(matchQuery("field1", "foo")).get(), 0);
assertHitCount(client.prepareSearch("test-idx").setSize(0).setQuery(matchQuery("field1", "Foo")).get(), numdocs);
assertHitCount(client.prepareSearch("test-idx").setSize(0).setQuery(matchQuery("field1", "bar")).get(), numdocs);
}

View File

@ -180,42 +180,6 @@ public class SimpleValidateQueryIT extends ESIntegTestCase {
assertThat(validateQueryResponse.getQueryExplanation().get(0).getExplanation(), containsString("field:value1"));
}
public void testExplainMatchPhrasePrefix() {
assertAcked(prepareCreate("test").setSettings(
Settings.builder().put(indexSettings())
.put("index.analysis.filter.syns.type", "synonym")
.putList("index.analysis.filter.syns.synonyms", "one,two")
.put("index.analysis.analyzer.syns.tokenizer", "standard")
.putList("index.analysis.analyzer.syns.filter", "syns")
).addMapping("test", "field","type=text,analyzer=syns"));
ensureGreen();
ValidateQueryResponse validateQueryResponse = client().admin().indices().prepareValidateQuery("test")
.setQuery(QueryBuilders.matchPhrasePrefixQuery("field", "foo")).setExplain(true).get();
assertThat(validateQueryResponse.isValid(), equalTo(true));
assertThat(validateQueryResponse.getQueryExplanation().size(), equalTo(1));
assertThat(validateQueryResponse.getQueryExplanation().get(0).getExplanation(), containsString("field:\"foo*\""));
validateQueryResponse = client().admin().indices().prepareValidateQuery("test")
.setQuery(QueryBuilders.matchPhrasePrefixQuery("field", "foo bar")).setExplain(true).get();
assertThat(validateQueryResponse.isValid(), equalTo(true));
assertThat(validateQueryResponse.getQueryExplanation().size(), equalTo(1));
assertThat(validateQueryResponse.getQueryExplanation().get(0).getExplanation(), containsString("field:\"foo bar*\""));
// Stacked tokens
validateQueryResponse = client().admin().indices().prepareValidateQuery("test")
.setQuery(QueryBuilders.matchPhrasePrefixQuery("field", "one bar")).setExplain(true).get();
assertThat(validateQueryResponse.isValid(), equalTo(true));
assertThat(validateQueryResponse.getQueryExplanation().size(), equalTo(1));
assertThat(validateQueryResponse.getQueryExplanation().get(0).getExplanation(), containsString("field:\"(one two) bar*\""));
validateQueryResponse = client().admin().indices().prepareValidateQuery("test")
.setQuery(QueryBuilders.matchPhrasePrefixQuery("field", "foo one")).setExplain(true).get();
assertThat(validateQueryResponse.isValid(), equalTo(true));
assertThat(validateQueryResponse.getQueryExplanation().size(), equalTo(1));
assertThat(validateQueryResponse.getQueryExplanation().get(0).getExplanation(), containsString("field:\"foo (one* two*)\""));
}
public void testExplainWithRewriteValidateQuery() throws Exception {
client().admin().indices().prepareCreate("test")
.addMapping("type1", "field", "type=text,analyzer=whitespace")

View File

@ -31,8 +31,6 @@ import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
import org.elasticsearch.index.analysis.StopTokenFilterFactory;
import org.elasticsearch.index.analysis.SynonymGraphTokenFilterFactory;
import org.elasticsearch.index.analysis.SynonymTokenFilterFactory;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.test.ESTestCase;
@ -169,8 +167,8 @@ public abstract class AnalysisFactoryTestCase extends ESTestCase {
.put("stemmeroverride", MovedToAnalysisCommon.class)
.put("stop", StopTokenFilterFactory.class)
.put("swedishlightstem", MovedToAnalysisCommon.class)
.put("synonym", SynonymTokenFilterFactory.class)
.put("synonymgraph", SynonymGraphTokenFilterFactory.class)
.put("synonym", MovedToAnalysisCommon.class)
.put("synonymgraph", MovedToAnalysisCommon.class)
.put("trim", MovedToAnalysisCommon.class)
.put("truncate", MovedToAnalysisCommon.class)
.put("turkishlowercase", MovedToAnalysisCommon.class)