Add a soft limit for the number of requested doc-value fields (#26574)

Requesting to many docvalue_fields in a search request can potentially be costly
because it might incur a per-field per-document seek. This change introduces a
soft limit on the number of fields that can be retrieved. The setting can be
changed per index using the `index.max_docvalue_fields_search` setting.

Relates to #26390
This commit is contained in:
Christoph Büscher 2017-09-13 11:57:06 +02:00 committed by GitHub
parent a34db4e09f
commit e00db235bc
7 changed files with 102 additions and 2 deletions

View File

@ -111,6 +111,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
IndexSettings.INDEX_REFRESH_INTERVAL_SETTING, IndexSettings.INDEX_REFRESH_INTERVAL_SETTING,
IndexSettings.MAX_RESULT_WINDOW_SETTING, IndexSettings.MAX_RESULT_WINDOW_SETTING,
IndexSettings.MAX_INNER_RESULT_WINDOW_SETTING, IndexSettings.MAX_INNER_RESULT_WINDOW_SETTING,
IndexSettings.MAX_DOCVALUE_FIELDS_SEARCH_SETTING,
IndexSettings.MAX_RESCORE_WINDOW_SETTING, IndexSettings.MAX_RESCORE_WINDOW_SETTING,
IndexSettings.MAX_ADJACENCY_MATRIX_FILTERS_SETTING, IndexSettings.MAX_ADJACENCY_MATRIX_FILTERS_SETTING,
IndexSettings.INDEX_TRANSLOG_SYNC_INTERVAL_SETTING, IndexSettings.INDEX_TRANSLOG_SYNC_INTERVAL_SETTING,

View File

@ -98,6 +98,13 @@ public final class IndexSettings {
*/ */
public static final Setting<Integer> MAX_INNER_RESULT_WINDOW_SETTING = public static final Setting<Integer> MAX_INNER_RESULT_WINDOW_SETTING =
Setting.intSetting("index.max_inner_result_window", 100, 1, Property.Dynamic, Property.IndexScope); Setting.intSetting("index.max_inner_result_window", 100, 1, Property.Dynamic, Property.IndexScope);
/**
* Index setting describing the maximum value of allowed `docvalue_fields`that can be retrieved
* per search request. The default maximum of 100 is defensive for the reason that retrieving
* doc values might incur a per-field per-document seek.
*/
public static final Setting<Integer> MAX_DOCVALUE_FIELDS_SEARCH_SETTING =
Setting.intSetting("index.max_docvalue_fields_search", 100, 0, Property.Dynamic, Property.IndexScope);
/** /**
* Index setting describing the maximum size of the rescore window. Defaults to {@link #MAX_RESULT_WINDOW_SETTING} * Index setting describing the maximum size of the rescore window. Defaults to {@link #MAX_RESULT_WINDOW_SETTING}
* because they both do the same thing: control the size of the heap of hits. * because they both do the same thing: control the size of the heap of hits.
@ -221,6 +228,7 @@ public final class IndexSettings {
private volatile int maxInnerResultWindow; private volatile int maxInnerResultWindow;
private volatile int maxAdjacencyMatrixFilters; private volatile int maxAdjacencyMatrixFilters;
private volatile int maxRescoreWindow; private volatile int maxRescoreWindow;
private volatile int maxDocvalueFields;
private volatile boolean TTLPurgeDisabled; private volatile boolean TTLPurgeDisabled;
/** /**
* The maximum number of refresh listeners allows on this shard. * The maximum number of refresh listeners allows on this shard.
@ -322,6 +330,7 @@ public final class IndexSettings {
maxInnerResultWindow = scopedSettings.get(MAX_INNER_RESULT_WINDOW_SETTING); maxInnerResultWindow = scopedSettings.get(MAX_INNER_RESULT_WINDOW_SETTING);
maxAdjacencyMatrixFilters = scopedSettings.get(MAX_ADJACENCY_MATRIX_FILTERS_SETTING); maxAdjacencyMatrixFilters = scopedSettings.get(MAX_ADJACENCY_MATRIX_FILTERS_SETTING);
maxRescoreWindow = scopedSettings.get(MAX_RESCORE_WINDOW_SETTING); maxRescoreWindow = scopedSettings.get(MAX_RESCORE_WINDOW_SETTING);
maxDocvalueFields = scopedSettings.get(MAX_DOCVALUE_FIELDS_SEARCH_SETTING);
TTLPurgeDisabled = scopedSettings.get(INDEX_TTL_DISABLE_PURGE_SETTING); TTLPurgeDisabled = scopedSettings.get(INDEX_TTL_DISABLE_PURGE_SETTING);
maxRefreshListeners = scopedSettings.get(MAX_REFRESH_LISTENERS_PER_SHARD); maxRefreshListeners = scopedSettings.get(MAX_REFRESH_LISTENERS_PER_SHARD);
maxSlicesPerScroll = scopedSettings.get(MAX_SLICES_PER_SCROLL); maxSlicesPerScroll = scopedSettings.get(MAX_SLICES_PER_SCROLL);
@ -351,6 +360,7 @@ public final class IndexSettings {
scopedSettings.addSettingsUpdateConsumer(MAX_INNER_RESULT_WINDOW_SETTING, this::setMaxInnerResultWindow); scopedSettings.addSettingsUpdateConsumer(MAX_INNER_RESULT_WINDOW_SETTING, this::setMaxInnerResultWindow);
scopedSettings.addSettingsUpdateConsumer(MAX_ADJACENCY_MATRIX_FILTERS_SETTING, this::setMaxAdjacencyMatrixFilters); scopedSettings.addSettingsUpdateConsumer(MAX_ADJACENCY_MATRIX_FILTERS_SETTING, this::setMaxAdjacencyMatrixFilters);
scopedSettings.addSettingsUpdateConsumer(MAX_RESCORE_WINDOW_SETTING, this::setMaxRescoreWindow); scopedSettings.addSettingsUpdateConsumer(MAX_RESCORE_WINDOW_SETTING, this::setMaxRescoreWindow);
scopedSettings.addSettingsUpdateConsumer(MAX_DOCVALUE_FIELDS_SEARCH_SETTING, this::setMaxDocvalueFields);
scopedSettings.addSettingsUpdateConsumer(INDEX_WARMER_ENABLED_SETTING, this::setEnableWarmer); scopedSettings.addSettingsUpdateConsumer(INDEX_WARMER_ENABLED_SETTING, this::setEnableWarmer);
scopedSettings.addSettingsUpdateConsumer(INDEX_GC_DELETES_SETTING, this::setGCDeletes); scopedSettings.addSettingsUpdateConsumer(INDEX_GC_DELETES_SETTING, this::setGCDeletes);
scopedSettings.addSettingsUpdateConsumer(INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING, this::setTranslogFlushThresholdSize); scopedSettings.addSettingsUpdateConsumer(INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING, this::setTranslogFlushThresholdSize);
@ -607,6 +617,17 @@ public final class IndexSettings {
this.maxRescoreWindow = maxRescoreWindow; this.maxRescoreWindow = maxRescoreWindow;
} }
/**
* Returns the maximum number of allowed docvalue_fields to retrieve in a search request
*/
public int getMaxDocvalueFields() {
return this.maxDocvalueFields;
}
private void setMaxDocvalueFields(int maxDocvalueFields) {
this.maxDocvalueFields = maxDocvalueFields;
}
/** /**
* Returns the GC deletes cycle in milliseconds. * Returns the GC deletes cycle in milliseconds.
*/ */

View File

@ -43,7 +43,6 @@ import org.elasticsearch.common.util.concurrent.ConcurrentMapLong;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexService; import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.MergeSchedulerConfig;
import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.engine.Engine;
import org.elasticsearch.index.query.InnerHitContextBuilder; import org.elasticsearch.index.query.InnerHitContextBuilder;
import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.index.query.MatchAllQueryBuilder;
@ -771,6 +770,13 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv
context.fetchSourceContext(source.fetchSource()); context.fetchSourceContext(source.fetchSource());
} }
if (source.docValueFields() != null) { if (source.docValueFields() != null) {
int maxAllowedDocvalueFields = context.mapperService().getIndexSettings().getMaxDocvalueFields();
if (source.docValueFields().size() > maxAllowedDocvalueFields) {
throw new IllegalArgumentException(
"Trying to retrieve too many docvalue_fields. Must be less than or equal to: [" + maxAllowedDocvalueFields
+ "] but was [" + source.docValueFields().size() + "]. This limit can be set by changing the ["
+ IndexSettings.MAX_DOCVALUE_FIELDS_SEARCH_SETTING.getKey() + "] index level setting.");
}
context.docValueFieldsContext(new DocValueFieldsContext(source.docValueFields())); context.docValueFieldsContext(new DocValueFieldsContext(source.docValueFields()));
} }
if (source.highlighter() != null) { if (source.highlighter() != null) {

View File

@ -310,6 +310,22 @@ public class IndexSettingsTests extends ESTestCase {
assertEquals(IndexSettings.MAX_INNER_RESULT_WINDOW_SETTING.get(Settings.EMPTY).intValue(), settings.getMaxInnerResultWindow()); assertEquals(IndexSettings.MAX_INNER_RESULT_WINDOW_SETTING.get(Settings.EMPTY).intValue(), settings.getMaxInnerResultWindow());
} }
public void testMaxDocvalueFields() {
IndexMetaData metaData = newIndexMeta("index", Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put(IndexSettings.MAX_DOCVALUE_FIELDS_SEARCH_SETTING.getKey(), 200).build());
IndexSettings settings = new IndexSettings(metaData, Settings.EMPTY);
assertEquals(200, settings.getMaxDocvalueFields());
settings.updateIndexMetaData(
newIndexMeta("index", Settings.builder().put(IndexSettings.MAX_DOCVALUE_FIELDS_SEARCH_SETTING.getKey(), 50).build()));
assertEquals(50, settings.getMaxDocvalueFields());
settings.updateIndexMetaData(newIndexMeta("index", Settings.EMPTY));
assertEquals(IndexSettings.MAX_DOCVALUE_FIELDS_SEARCH_SETTING.get(Settings.EMPTY).intValue(), settings.getMaxDocvalueFields());
metaData = newIndexMeta("index", Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build());
settings = new IndexSettings(metaData, Settings.EMPTY);
assertEquals(IndexSettings.MAX_DOCVALUE_FIELDS_SEARCH_SETTING.get(Settings.EMPTY).intValue(), settings.getMaxDocvalueFields());
}
public void testMaxAdjacencyMatrixFiltersSetting() { public void testMaxAdjacencyMatrixFiltersSetting() {
IndexMetaData metaData = newIndexMeta("index", Settings.builder() IndexMetaData metaData = newIndexMeta("index", Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)

View File

@ -25,7 +25,6 @@ import org.apache.lucene.store.AlreadyClosedException;
import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.action.index.IndexResponse;
import org.elasticsearch.action.search.SearchPhaseExecutionException; import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchTask; import org.elasticsearch.action.search.SearchTask;
import org.elasticsearch.action.search.SearchType; import org.elasticsearch.action.search.SearchType;
@ -262,6 +261,35 @@ public class SearchServiceTests extends ESSingleNodeTestCase {
} }
/**
* test that getting more than the allowed number of docvalue_fields throws an exception
*/
public void testMaxDocvalueFieldsSearch() throws IOException {
createIndex("index");
final SearchService service = getInstanceFromNode(SearchService.class);
final IndicesService indicesService = getInstanceFromNode(IndicesService.class);
final IndexService indexService = indicesService.indexServiceSafe(resolveIndex("index"));
final IndexShard indexShard = indexService.getShard(0);
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
// adding the maximum allowed number of docvalue_fields to retrieve
for (int i = 0; i < indexService.getIndexSettings().getMaxDocvalueFields(); i++) {
searchSourceBuilder.docValueField("field" + i);
}
try (SearchContext context = service.createContext(new ShardSearchLocalRequest(indexShard.shardId(), 1, SearchType.DEFAULT,
searchSourceBuilder, new String[0], false, new AliasFilter(null, Strings.EMPTY_ARRAY), 1.0f), null)) {
assertNotNull(context);
searchSourceBuilder.docValueField("one_field_too_much");
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
() -> service.createContext(new ShardSearchLocalRequest(indexShard.shardId(), 1, SearchType.DEFAULT,
searchSourceBuilder, new String[0], false, new AliasFilter(null, Strings.EMPTY_ARRAY), 1.0f), null));
assertEquals(
"Trying to retrieve too many docvalue_fields. Must be less than or equal to: [100] but was [101]. "
+ "This limit can be set by changing the [index.max_docvalue_fields_search] index level setting.",
ex.getMessage());
}
}
public static class FailOnRewriteQueryPlugin extends Plugin implements SearchPlugin { public static class FailOnRewriteQueryPlugin extends Plugin implements SearchPlugin {
@Override @Override
public List<QuerySpec<?>> getQueries() { public List<QuerySpec<?>> getQueries() {

View File

@ -133,6 +133,12 @@ specific index module:
requests take heap memory and time proportional to requests take heap memory and time proportional to
`max(window_size, from + size)` and this limits that memory. `max(window_size, from + size)` and this limits that memory.
`index.max_docvalue_fields_search`::
The maximum number of `docvalue_fields` that are allowed in a query.
Defaults to `100`. Doc-value fields are costly since they might incur
a per-field per-document seek.
`index.blocks.read_only`:: `index.blocks.read_only`::
Set to `true` to make the index and index metadata read only, `false` to Set to `true` to make the index and index metadata read only, `false` to

View File

@ -50,3 +50,25 @@ setup:
match_all: {} match_all: {}
query_weight: 1 query_weight: 1
rescore_query_weight: 2 rescore_query_weight: 2
---
"Docvalues_fields size limit":
- skip:
version: " - 6.99.99"
reason: soft limit for docvalue_fields only available as of 7.0.0
- do:
indices.create:
index: test_2
body:
settings:
index.max_docvalue_fields_search: 2
- do:
catch: /Trying to retrieve too many docvalue_fields\. Must be less than or equal to[:] \[2\] but was \[3\]\. This limit can be set by changing the \[index.max_docvalue_fields_search\] index level setting\./
search:
index: test_2
body:
query:
match_all: {}
docvalue_fields: ["one", "two", "three"]