Introduce limit to the number of terms in Terms Query (#27968)

- Introduce index level settings to control the maximum number of terms
    that can be used in a Terms Query
- Throw an error if a request exceeds this max number

Closes #18829
This commit is contained in:
Mayya Sharipova 2017-12-28 17:36:29 -05:00 committed by GitHub
parent da0ed578b2
commit dcde895f49
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 105 additions and 0 deletions

View File

@ -119,6 +119,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
IndexSettings.MAX_RESCORE_WINDOW_SETTING,
IndexSettings.MAX_ADJACENCY_MATRIX_FILTERS_SETTING,
IndexSettings.MAX_ANALYZED_OFFSET_SETTING,
IndexSettings.MAX_TERMS_COUNT_SETTING,
IndexSettings.INDEX_TRANSLOG_SYNC_INTERVAL_SETTING,
IndexSettings.DEFAULT_FIELD_SETTING,
IndexSettings.QUERY_STRING_LENIENT_SETTING,

View File

@ -129,6 +129,15 @@ public final class IndexSettings {
public static final Setting<Integer> MAX_ANALYZED_OFFSET_SETTING =
Setting.intSetting("index.highlight.max_analyzed_offset", 10000, 1, Property.Dynamic, Property.IndexScope);
/**
* Index setting describing the maximum number of terms that can be used in Terms Query.
* The default maximum of 65536 terms is defensive, as extra processing and memory is involved
* for each additional term, and a large number of terms degrade the cluster performance.
*/
public static final Setting<Integer> MAX_TERMS_COUNT_SETTING =
Setting.intSetting("index.max_terms_count", 65536, 1, Property.Dynamic, Property.IndexScope);
/**
* Index setting describing for NGramTokenizer and NGramTokenFilter
* the maximum difference between
@ -287,6 +296,7 @@ public final class IndexSettings {
private volatile boolean TTLPurgeDisabled;
private volatile TimeValue searchIdleAfter;
private volatile int maxAnalyzedOffset;
private volatile int maxTermsCount;
/**
* The maximum number of refresh listeners allows on this shard.
@ -397,6 +407,7 @@ public final class IndexSettings {
maxRefreshListeners = scopedSettings.get(MAX_REFRESH_LISTENERS_PER_SHARD);
maxSlicesPerScroll = scopedSettings.get(MAX_SLICES_PER_SCROLL);
maxAnalyzedOffset = scopedSettings.get(MAX_ANALYZED_OFFSET_SETTING);
maxTermsCount = scopedSettings.get(MAX_TERMS_COUNT_SETTING);
this.mergePolicyConfig = new MergePolicyConfig(logger, this);
this.indexSortConfig = new IndexSortConfig(this);
searchIdleAfter = scopedSettings.get(INDEX_SEARCH_IDLE_AFTER);
@ -440,6 +451,7 @@ public final class IndexSettings {
scopedSettings.addSettingsUpdateConsumer(INDEX_REFRESH_INTERVAL_SETTING, this::setRefreshInterval);
scopedSettings.addSettingsUpdateConsumer(MAX_REFRESH_LISTENERS_PER_SHARD, this::setMaxRefreshListeners);
scopedSettings.addSettingsUpdateConsumer(MAX_ANALYZED_OFFSET_SETTING, this::setHighlightMaxAnalyzedOffset);
scopedSettings.addSettingsUpdateConsumer(MAX_TERMS_COUNT_SETTING, this::setMaxTermsCount);
scopedSettings.addSettingsUpdateConsumer(MAX_SLICES_PER_SCROLL, this::setMaxSlicesPerScroll);
scopedSettings.addSettingsUpdateConsumer(DEFAULT_FIELD_SETTING, this::setDefaultFields);
scopedSettings.addSettingsUpdateConsumer(INDEX_SEARCH_IDLE_AFTER, this::setSearchIdleAfter);
@ -734,6 +746,13 @@ public final class IndexSettings {
private void setHighlightMaxAnalyzedOffset(int maxAnalyzedOffset) { this.maxAnalyzedOffset = maxAnalyzedOffset; }
/**
* Returns the maximum number of terms that can be used in a Terms Query request
*/
public int getMaxTermsCount() { return this.maxTermsCount; }
private void setMaxTermsCount (int maxTermsCount) { this.maxTermsCount = maxTermsCount; }
/**
* Returns the maximum number of allowed script_fields to retrieve in a search request
*/

View File

@ -39,6 +39,7 @@ import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.indices.TermsLookup;
@ -416,6 +417,13 @@ public class TermsQueryBuilder extends AbstractQueryBuilder<TermsQueryBuilder> {
if (values == null || values.isEmpty()) {
return Queries.newMatchNoDocsQuery("No terms supplied for \"" + getName() + "\" query.");
}
int maxTermsCount = context.getIndexSettings().getMaxTermsCount();
if (values.size() > maxTermsCount){
throw new IllegalArgumentException(
"The number of terms [" + values.size() + "] used in the Terms Query request has exceeded " +
"the allowed maximum of [" + maxTermsCount + "]. " + "This maximum can be set by changing the [" +
IndexSettings.MAX_TOKEN_COUNT_SETTING.getKey() + "] index level setting.");
}
MappedFieldType fieldType = context.fieldMapper(fieldName);
if (fieldType != null) {

View File

@ -204,6 +204,11 @@ specific index module:
This setting is only applicable when highlighting is requested on a text that was indexed without offsets or term vectors.
Defaults to `10000`.
`index.max_terms_count`::
The maximum number of terms that can be used in Terms Query.
Defaults to `65536`.
[float]
=== Settings in other index modules

View File

@ -49,3 +49,12 @@ removed.
* `levenstein` - replaced by `levenshtein`
* `jarowinkler` - replaced by `jaro_winkler`
==== Limiting the number of terms that can be used in a Terms Query request
Executing a Terms Query with a lot of terms may degrade the cluster performance,
as each additional term demands extra processing and memory.
To safeguard against this, the maximum number of terms that can be used in a
Terms Query request has been limited to 65536. This default maximum can be changed
for a particular index with the index setting `index.max_terms_count`.

View File

@ -57,6 +57,14 @@ across all nodes if the "reference" terms data is not large. The lookup
terms filter will prefer to execute the get request on a local node if
possible, reducing the need for networking.
[WARNING]
Executing a Terms Query request with a lot of terms can be quite slow,
as each additional term demands extra processing and memory.
To safeguard against this, the maximum number of terms that can be used
in a Terms Query both directly or through lookup has been limited to `65536`.
This default maximum can be changed for a particular index with the index setting
`index.max_terms_count`.
[float]
===== Terms lookup twitter example
At first we index the information for user with id 2, specifically, its

View File

@ -0,0 +1,55 @@
---
"Terms Query with No.of terms exceeding index.max_terms_count should FAIL":
- skip:
version: " - 6.99.99"
reason: index.max_terms_count setting has been added in 7.0.0
- do:
indices.create:
index: test_index
body:
settings:
number_of_shards: 1
index.max_terms_count: 2
mappings:
test_type:
properties:
user:
type: keyword
followers:
type: keyword
- do:
bulk:
refresh: true
body:
- '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u1"}}'
- '{"user": "u1", "followers": ["u2", "u3"]}'
- '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u2"}}'
- '{"user": "u2", "followers": ["u1", "u3", "u4"]}'
- '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u3"}}'
- '{"user": "u3", "followers": ["u1"]}'
- '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u4"}}'
- '{"user": "u4", "followers": ["u3"]}'
- do:
search:
index: test_index
body: {"query" : {"terms" : {"user" : ["u1", "u2"]}}}
- match: { hits.total: 2 }
- do:
catch: bad_request
search:
index: test_index
body: {"query" : {"terms" : {"user" : ["u1", "u2", "u3"]}}}
- do:
search:
index: test_index
body: {"query" : {"terms" : {"user" : {"index" : "test_index", "type" : "test_type", "id" : "u1", "path" : "followers"}}}}
- match: { hits.total: 2 }
- do:
catch: bad_request
search:
index: test_index
body: {"query" : {"terms" : {"user" : {"index" : "test_index", "type" : "test_type", "id" : "u2", "path" : "followers"}}}}