Merge pull request #13188 from nik9000/limit_on_size

Limit the size of the result window to a dynamic property
This commit is contained in:
Nik Everett 2015-09-10 15:40:22 -04:00
commit 766a25e3cb
7 changed files with 146 additions and 50 deletions

View File

@ -90,6 +90,7 @@ import org.elasticsearch.indices.recovery.RecoverySettings;
import org.elasticsearch.indices.store.IndicesStore;
import org.elasticsearch.indices.ttl.IndicesTTLService;
import org.elasticsearch.search.SearchService;
import org.elasticsearch.search.internal.DefaultSearchContext;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;
@ -275,6 +276,7 @@ public class ClusterModule extends AbstractModule {
registerIndexDynamicSetting(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED, Validator.BOOLEAN);
registerIndexDynamicSetting(IndicesRequestCache.DEPRECATED_INDEX_CACHE_REQUEST_ENABLED, Validator.BOOLEAN);
registerIndexDynamicSetting(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING, Validator.TIME);
registerIndexDynamicSetting(DefaultSearchContext.MAX_RESULT_WINDOW, Validator.POSITIVE_INTEGER);
}
public void registerIndexDynamicSetting(String setting, Validator validator) {

View File

@ -77,6 +77,20 @@ import java.util.Map;
*
*/
public class DefaultSearchContext extends SearchContext {
/**
* Index setting describing the maximum value of from + size on a query.
*/
public static final String MAX_RESULT_WINDOW = "index.max_result_window";
public static class Defaults {
/**
* Default maximum value of from + size on a query. 10,000 was chosen as
* a conservative default as it is sure to not cause trouble. Users can
* certainly profile their cluster and decide to set it to 100,000
* safely. 1,000,000 is probably way to high for any cluster to set
* safely.
*/
public static final int MAX_RESULT_WINDOW = 10000;
}
private final long id;
private final ShardSearchRequest request;
@ -168,12 +182,20 @@ public class DefaultSearchContext extends SearchContext {
*/
@Override
public void preProcess() {
if (!(from() == -1 && size() == -1)) {
// from and size have been set.
int numHits = from() + size();
if (numHits < 0) {
String msg = "Result window is too large, from + size must be less than or equal to: [" + Integer.MAX_VALUE + "] but was [" + (((long) from()) + ((long) size())) + "]";
throw new QueryPhaseExecutionException(this, msg);
if (scrollContext == null) {
long from = from() == -1 ? 0 : from();
long size = size() == -1 ? 10 : size();
long resultWindow = from + size;
// We need settingsService's view of the settings because its dynamic.
// indexService's isn't.
int maxResultWindow = indexService.settingsService().getSettings().getAsInt(MAX_RESULT_WINDOW, Defaults.MAX_RESULT_WINDOW);
if (resultWindow > maxResultWindow) {
throw new QueryPhaseExecutionException(this,
"Result window is too large, from + size must be less than or equal to: [" + maxResultWindow + "] but was ["
+ resultWindow + "]. See the scroll api for a more efficient way to request large data sets. "
+ "This limit can be set by changing the [" + DefaultSearchContext.MAX_RESULT_WINDOW
+ "] index level parameter.");
}
}

View File

@ -74,27 +74,27 @@ public class SearchScrollIT extends ESIntegTestCase {
.execute().actionGet();
try {
long counter = 0;
assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
assertThat(searchResponse.getHits().hits().length, equalTo(35));
for (SearchHit hit : searchResponse.getHits()) {
assertThat(((Number) hit.sortValues()[0]).longValue(), equalTo(counter++));
}
searchResponse = client().prepareSearchScroll(searchResponse.getScrollId())
.setScroll(TimeValue.timeValueMinutes(2))
.execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
assertThat(searchResponse.getHits().hits().length, equalTo(35));
for (SearchHit hit : searchResponse.getHits()) {
assertThat(((Number) hit.sortValues()[0]).longValue(), equalTo(counter++));
}
searchResponse = client().prepareSearchScroll(searchResponse.getScrollId())
.setScroll(TimeValue.timeValueMinutes(2))
.execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
assertThat(searchResponse.getHits().hits().length, equalTo(30));
for (SearchHit hit : searchResponse.getHits()) {
@ -133,47 +133,47 @@ public class SearchScrollIT extends ESIntegTestCase {
.execute().actionGet();
try {
long counter = 0;
assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
assertThat(searchResponse.getHits().hits().length, equalTo(3));
for (SearchHit hit : searchResponse.getHits()) {
assertThat(((Number) hit.sortValues()[0]).longValue(), equalTo(counter++));
}
for (int i = 0; i < 32; i++) {
searchResponse = client().prepareSearchScroll(searchResponse.getScrollId())
.setScroll(TimeValue.timeValueMinutes(2))
.execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
assertThat(searchResponse.getHits().hits().length, equalTo(3));
for (SearchHit hit : searchResponse.getHits()) {
assertThat(((Number) hit.sortValues()[0]).longValue(), equalTo(counter++));
}
}
// and now, the last one is one
searchResponse = client().prepareSearchScroll(searchResponse.getScrollId())
.setScroll(TimeValue.timeValueMinutes(2))
.execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
assertThat(searchResponse.getHits().hits().length, equalTo(1));
for (SearchHit hit : searchResponse.getHits()) {
assertThat(((Number) hit.sortValues()[0]).longValue(), equalTo(counter++));
}
// a the last is zero
searchResponse = client().prepareSearchScroll(searchResponse.getScrollId())
.setScroll(TimeValue.timeValueMinutes(2))
.execute().actionGet();
assertThat(searchResponse.getHits().getTotalHits(), equalTo(100l));
assertThat(searchResponse.getHits().hits().length, equalTo(0));
for (SearchHit hit : searchResponse.getHits()) {
assertThat(((Number) hit.sortValues()[0]).longValue(), equalTo(counter++));
}
} finally {
clearScroll(searchResponse.getScrollId());
}
@ -212,7 +212,7 @@ public class SearchScrollIT extends ESIntegTestCase {
}
searchResponse = client().prepareSearchScroll(searchResponse.getScrollId()).setScroll(TimeValue.timeValueMinutes(2)).execute().actionGet();
} while (searchResponse.getHits().hits().length > 0);
client().admin().indices().prepareRefresh().execute().actionGet();
assertThat(client().prepareCount().setQuery(matchAllQuery()).execute().actionGet().getCount(), equalTo(500l));
assertThat(client().prepareCount().setQuery(termQuery("message", "test")).execute().actionGet().getCount(), equalTo(0l));
@ -410,9 +410,7 @@ public class SearchScrollIT extends ESIntegTestCase {
assertThrows(internalCluster().transportClient().prepareSearchScroll(searchResponse2.getScrollId()).setScroll(TimeValue.timeValueMinutes(2)), RestStatus.NOT_FOUND);
}
@Test
// https://github.com/elasticsearch/elasticsearch/issues/4156
public void testDeepPaginationWithOneDocIndexAndDoNotBlowUp() throws Exception {
public void testDeepScrollingDoesNotBlowUp() throws Exception {
client().prepareIndex("index", "type", "1")
.setSource("field", "value")
.setRefresh(true)
@ -422,11 +420,8 @@ public class SearchScrollIT extends ESIntegTestCase {
SearchRequestBuilder builder = client().prepareSearch("index")
.setSearchType(searchType)
.setQuery(QueryBuilders.matchAllQuery())
.setSize(Integer.MAX_VALUE);
if (randomBoolean()) {
builder.setScroll("1m");
}
.setSize(Integer.MAX_VALUE)
.setScroll("1m");
SearchResponse response = builder.execute().actionGet();
try {

View File

@ -22,11 +22,13 @@ package org.elasticsearch.search.simple;
import org.apache.lucene.util.Constants;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.internal.DefaultSearchContext;
import org.elasticsearch.test.ESIntegTestCase;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
@ -38,12 +40,12 @@ import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
import static org.hamcrest.Matchers.containsString;
public class SimpleSearchIT extends ESIntegTestCase {
@Test
public void testSearchNullIndex() {
try {
client().prepareSearch((String) null).setQuery(QueryBuilders.termQuery("_id", "XXX1")).execute().actionGet();
@ -60,7 +62,6 @@ public class SimpleSearchIT extends ESIntegTestCase {
}
}
@Test
public void testSearchRandomPreference() throws InterruptedException, ExecutionException {
createIndex("test");
indexRandom(true, client().prepareIndex("test", "type", "1").setSource("field", "value"),
@ -84,8 +85,7 @@ public class SimpleSearchIT extends ESIntegTestCase {
}
}
@Test
public void simpleIpTests() throws Exception {
public void testSimpleIp() throws Exception {
createIndex("test");
client().admin().indices().preparePutMapping("test").setType("type1")
@ -104,8 +104,7 @@ public class SimpleSearchIT extends ESIntegTestCase {
assertHitCount(search, 1l);
}
@Test
public void simpleIdTests() {
public void testSimpleId() {
createIndex("test");
client().prepareIndex("test", "type", "XXX1").setSource("field", "value").setRefresh(true).execute().actionGet();
@ -124,8 +123,7 @@ public class SimpleSearchIT extends ESIntegTestCase {
assertHitCount(searchResponse, 1l);
}
@Test
public void simpleDateRangeTests() throws Exception {
public void testSimpleDateRange() throws Exception {
createIndex("test");
client().prepareIndex("test", "type1", "1").setSource("field", "2010-01-05T02:00").execute().actionGet();
client().prepareIndex("test", "type1", "2").setSource("field", "2010-01-06T02:00").execute().actionGet();
@ -150,9 +148,8 @@ public class SimpleSearchIT extends ESIntegTestCase {
searchResponse = client().prepareSearch("test").setQuery(QueryBuilders.queryStringQuery("field:[2010-01-03||+2d TO 2010-01-04||+2d/d]")).execute().actionGet();
assertHitCount(searchResponse, 2l);
}
@Test
public void localeDependentDateTests() throws Exception {
public void testLocaleDependentDate() throws Exception {
assumeFalse("Locals are buggy on JDK9EA", Constants.JRE_IS_MINIMUM_JAVA9 && systemPropertyAsBoolean("tests.security.manager", false));
assertAcked(prepareCreate("test")
.addMapping("type1",
@ -189,8 +186,7 @@ public class SimpleSearchIT extends ESIntegTestCase {
}
}
@Test
public void simpleTerminateAfterCountTests() throws Exception {
public void testSimpleTerminateAfterCount() throws Exception {
prepareCreate("test").setSettings(
SETTING_NUMBER_OF_SHARDS, 1,
SETTING_NUMBER_OF_REPLICAS, 0).get();
@ -225,16 +221,76 @@ public class SimpleSearchIT extends ESIntegTestCase {
assertFalse(searchResponse.isTerminatedEarly());
}
@Test
public void testInsaneFrom() throws Exception {
public void testInsaneFromAndSize() throws Exception {
createIndex("idx");
indexRandom(true, client().prepareIndex("idx", "type").setSource("{}"));
assertWindowFails(client().prepareSearch("idx").setFrom(Integer.MAX_VALUE));
assertWindowFails(client().prepareSearch("idx").setSize(Integer.MAX_VALUE));
}
public void testTooLargeFromAndSize() throws Exception {
createIndex("idx");
indexRandom(true, client().prepareIndex("idx", "type").setSource("{}"));
assertWindowFails(client().prepareSearch("idx").setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW));
assertWindowFails(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW + 1));
assertWindowFails(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW)
.setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW));
}
public void testLargeFromAndSizeSucceeds() throws Exception {
createIndex("idx");
indexRandom(true, client().prepareIndex("idx", "type").setSource("{}"));
assertHitCount(client().prepareSearch("idx").setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW - 10).get(), 1);
assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW).get(), 1);
assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW / 2)
.setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW / 2 - 1).get(), 1);
}
public void testTooLargeFromAndSizeOkBySetting() throws Exception {
prepareCreate("idx").setSettings(DefaultSearchContext.MAX_RESULT_WINDOW, DefaultSearchContext.Defaults.MAX_RESULT_WINDOW * 2).get();
indexRandom(true, client().prepareIndex("idx", "type").setSource("{}"));
assertHitCount(client().prepareSearch("idx").setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW).get(), 1);
assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW + 1).get(), 1);
assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW)
.setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW).get(), 1);
}
public void testTooLargeFromAndSizeOkByDynamicSetting() throws Exception {
createIndex("idx");
assertAcked(client().admin().indices().prepareUpdateSettings("idx")
.setSettings(
Settings.builder().put(DefaultSearchContext.MAX_RESULT_WINDOW, DefaultSearchContext.Defaults.MAX_RESULT_WINDOW * 2))
.get());
indexRandom(true, client().prepareIndex("idx", "type").setSource("{}"));
assertHitCount(client().prepareSearch("idx").setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW).get(), 1);
assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW + 1).get(), 1);
assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW)
.setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW).get(), 1);
}
public void testTooLargeFromAndSizeBackwardsCompatibilityRecommendation() throws Exception {
prepareCreate("idx").setSettings(DefaultSearchContext.MAX_RESULT_WINDOW, Integer.MAX_VALUE).get();
indexRandom(true, client().prepareIndex("idx", "type").setSource("{}"));
assertHitCount(client().prepareSearch("idx").setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW * 10).get(), 1);
assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW * 10).get(), 1);
assertHitCount(client().prepareSearch("idx").setSize(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW * 10)
.setFrom(DefaultSearchContext.Defaults.MAX_RESULT_WINDOW * 10).get(), 1);
}
private void assertWindowFails(SearchRequestBuilder search) {
try {
client().prepareSearch("idx").setFrom(Integer.MAX_VALUE).get();
search.get();
fail();
} catch (SearchPhaseExecutionException e) {
assertThat(e.toString(), containsString("Result window is too large, from + size must be less than or equal to:"));
assertThat(e.toString(), containsString("Result window is too large, from + size must be less than or equal to: ["
+ DefaultSearchContext.Defaults.MAX_RESULT_WINDOW));
assertThat(e.toString(), containsString("See the scroll api for a more efficient way to request large data sets"));
}
}
}

View File

@ -98,6 +98,14 @@ specific index module:
index visible to search. Defaults to `1s`. Can be set to `-1` to disable
refresh.
`index.max_result_window`::
The maximum value of `from + size` for searches to this index. Defaults to
`10000`. Search requests take heap memory and time proportional to
`from + size` and this limits that memory. See
{ref}/search-request-scroll.html[Scroll] for a more efficient alternative
to raising this.
`index.blocks.read_only`::
Set to `true` to make the index and index metadata read only, `false` to
@ -184,5 +192,3 @@ include::index-modules/slowlog.asciidoc[]
include::index-modules/store.asciidoc[]
include::index-modules/translog.asciidoc[]

View File

@ -26,6 +26,16 @@ Scroll requests sorted by `_doc` have been optimized to more efficiently resume
from where the previous request stopped, so this will have the same performance
characteristics as the former `scan` search type.
==== from + size limits
Elasticsearch will now return an error message if a query's `from` + `size` is
more than the `index.max_result_window` parameter. This parameter defaults to
10,000 which is safe for almost all clusters. Values higher than can consume
significant chunks of heap memory per search and per shard executing the
search. It's safest to leave this value as it is an use the scroll api for any
deep scrolling but this setting is dynamic so it can raised or lowered as
needed.
=== Update changes
==== Updates now `detect_noop` by default

View File

@ -19,3 +19,8 @@ defaults to `10`.
}
}
--------------------------------------------------
Note that `from` + `size` can not be more than the `index.max_result_window`
index setting which defaults to 10,000. See the
{ref}/search-request-scroll.html[Scroll] api for more efficient ways to do deep
scrolling.