Fallback to extract terms if MultiPhraseQuery is large

Currently if MPQ is very large highlighing can take down a node
or cause high CPU / RAM consumption. If the query grows > 16 terms
we just extract the terms and do term by term highlighting.

Closes  #3142 #3128
This commit is contained in:
Simon Willnauer 2013-06-06 11:09:36 +02:00
parent f995c9c130
commit 1c513bc262
2 changed files with 66 additions and 0 deletions

View File

@ -104,6 +104,21 @@ public class CustomFieldQuery extends FieldQuery {
} }
private void convertMultiPhraseQuery(int currentPos, int[] termsIdx, MultiPhraseQuery orig, List<Term[]> terms, int[] pos, IndexReader reader, Collection<Query> flatQueries) throws IOException { private void convertMultiPhraseQuery(int currentPos, int[] termsIdx, MultiPhraseQuery orig, List<Term[]> terms, int[] pos, IndexReader reader, Collection<Query> flatQueries) throws IOException {
if (currentPos == 0) {
// if we have more than 16 terms
int numTerms = 0;
for (Term[] currentPosTerm : terms) {
numTerms += currentPosTerm.length;
}
if (numTerms > 16) {
for (Term[] currentPosTerm : terms) {
for (Term term : currentPosTerm) {
super.flatten(new TermQuery(term), reader, flatQueries);
}
}
return;
}
}
/* /*
* we walk all possible ways and for each path down the MPQ we create a PhraseQuery this is what FieldQuery supports. * we walk all possible ways and for each path down the MPQ we create a PhraseQuery this is what FieldQuery supports.
* It seems expensive but most queries will pretty small. * It seems expensive but most queries will pretty small.

View File

@ -54,6 +54,7 @@ import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHighlight; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHighlight;
import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.startsWith;
import static org.testng.Assert.fail; import static org.testng.Assert.fail;
/** /**
@ -125,6 +126,56 @@ public class HighlighterSearchTests extends AbstractSharedClusterTest {
assertHighlight(search, 0, "name.autocomplete", 0, equalTo("ARCO<em>TEL</em> Ho<em>tel</em>s <em>Deut</em>schland")); assertHighlight(search, 0, "name.autocomplete", 0, equalTo("ARCO<em>TEL</em> Ho<em>tel</em>s <em>Deut</em>schland"));
} }
@Test
public void testMultiPhraseCutoff() throws ElasticSearchException, IOException {
/*
* MultiPhraseQuery can literally kill an entire node if there are too many terms in the
* query. We cut off and extract terms if there are more than 16 terms in the query
*/
prepareCreate("test")
.addMapping("test", jsonBuilder()
.startObject()
.startObject("test")
.startObject("properties")
.startObject("body")
.field("type", "string")
.field("index_analyzer", "custom_analyzer")
.field("search_analyzer", "custom_analyzer")
.field("term_vector", "with_positions_offsets")
.endObject()
.endObject()
.endObject()
.endObject())
.setSettings(ImmutableSettings.settingsBuilder()
.put("index.number_of_shards", 1)
.put("index.number_of_replicas", 0)
.put("analysis.filter.wordDelimiter.type", "word_delimiter")
.put("analysis.filter.wordDelimiter.type.split_on_numerics", false)
.put("analysis.filter.wordDelimiter.generate_word_parts", true)
.put("analysis.filter.wordDelimiter.generate_number_parts", true)
.put("analysis.filter.wordDelimiter.catenate_words", true)
.put("analysis.filter.wordDelimiter.catenate_numbers", true)
.put("analysis.filter.wordDelimiter.catenate_all", false)
.put("analysis.analyzer.custom_analyzer.tokenizer", "whitespace")
.putArray("analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter"))
.execute().actionGet();
ensureGreen();
client().prepareIndex("test", "test", "1")
.setSource(XContentFactory.jsonBuilder()
.startObject()
.field("body", "Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com http://twitter.com this is a test for highlighting feature Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com http://twitter.com this is a test for highlighting feature")
.endObject())
.execute().actionGet();
refresh();
SearchResponse search = client().prepareSearch().setQuery(matchQuery("body", "Test: http://www.facebook.com ").type(Type.PHRASE)).addHighlightedField("body").execute().actionGet();
assertHighlight(search, 0, "body", 0, startsWith("<em>Test: http://www.facebook.com</em>"));
search = client().prepareSearch().setQuery(matchQuery("body", "Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com http://twitter.com this is a test for highlighting feature Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com http://twitter.com this is a test for highlighting feature").type(Type.PHRASE)).addHighlightedField("body").execute().actionGet();
assertHighlight(search, 0, "body", 0, equalTo("<em>Test</em>: <em>http</em>://<em>www</em>.<em>facebook</em>.<em>com</em> <em>http</em>://<em>elasticsearch</em>.<em>org</em> <em>http</em>://<em>xing</em>.<em>com</em> <em>http</em>://<em>cnn</em>.<em>com</em> <em>http</em>://<em>quora</em>.com"));
}
@Test @Test
public void testNgramHighlightingPreLucene42() throws ElasticSearchException, IOException { public void testNgramHighlightingPreLucene42() throws ElasticSearchException, IOException {
boolean[] doStore = {true, false}; boolean[] doStore = {true, false};