Introduce limit to the number of terms in Terms Query (#27968)
- Introduce index level settings to control the maximum number of terms that can be used in a Terms Query - Throw an error if a request exceeds this max number Closes #18829
This commit is contained in:
parent
da0ed578b2
commit
dcde895f49
|
@ -119,6 +119,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
|
|||
IndexSettings.MAX_RESCORE_WINDOW_SETTING,
|
||||
IndexSettings.MAX_ADJACENCY_MATRIX_FILTERS_SETTING,
|
||||
IndexSettings.MAX_ANALYZED_OFFSET_SETTING,
|
||||
IndexSettings.MAX_TERMS_COUNT_SETTING,
|
||||
IndexSettings.INDEX_TRANSLOG_SYNC_INTERVAL_SETTING,
|
||||
IndexSettings.DEFAULT_FIELD_SETTING,
|
||||
IndexSettings.QUERY_STRING_LENIENT_SETTING,
|
||||
|
|
|
@ -129,6 +129,15 @@ public final class IndexSettings {
|
|||
public static final Setting<Integer> MAX_ANALYZED_OFFSET_SETTING =
|
||||
Setting.intSetting("index.highlight.max_analyzed_offset", 10000, 1, Property.Dynamic, Property.IndexScope);
|
||||
|
||||
|
||||
/**
|
||||
* Index setting describing the maximum number of terms that can be used in Terms Query.
|
||||
* The default maximum of 65536 terms is defensive, as extra processing and memory is involved
|
||||
* for each additional term, and a large number of terms degrade the cluster performance.
|
||||
*/
|
||||
public static final Setting<Integer> MAX_TERMS_COUNT_SETTING =
|
||||
Setting.intSetting("index.max_terms_count", 65536, 1, Property.Dynamic, Property.IndexScope);
|
||||
|
||||
/**
|
||||
* Index setting describing for NGramTokenizer and NGramTokenFilter
|
||||
* the maximum difference between
|
||||
|
@ -287,6 +296,7 @@ public final class IndexSettings {
|
|||
private volatile boolean TTLPurgeDisabled;
|
||||
private volatile TimeValue searchIdleAfter;
|
||||
private volatile int maxAnalyzedOffset;
|
||||
private volatile int maxTermsCount;
|
||||
|
||||
/**
|
||||
* The maximum number of refresh listeners allows on this shard.
|
||||
|
@ -397,6 +407,7 @@ public final class IndexSettings {
|
|||
maxRefreshListeners = scopedSettings.get(MAX_REFRESH_LISTENERS_PER_SHARD);
|
||||
maxSlicesPerScroll = scopedSettings.get(MAX_SLICES_PER_SCROLL);
|
||||
maxAnalyzedOffset = scopedSettings.get(MAX_ANALYZED_OFFSET_SETTING);
|
||||
maxTermsCount = scopedSettings.get(MAX_TERMS_COUNT_SETTING);
|
||||
this.mergePolicyConfig = new MergePolicyConfig(logger, this);
|
||||
this.indexSortConfig = new IndexSortConfig(this);
|
||||
searchIdleAfter = scopedSettings.get(INDEX_SEARCH_IDLE_AFTER);
|
||||
|
@ -440,6 +451,7 @@ public final class IndexSettings {
|
|||
scopedSettings.addSettingsUpdateConsumer(INDEX_REFRESH_INTERVAL_SETTING, this::setRefreshInterval);
|
||||
scopedSettings.addSettingsUpdateConsumer(MAX_REFRESH_LISTENERS_PER_SHARD, this::setMaxRefreshListeners);
|
||||
scopedSettings.addSettingsUpdateConsumer(MAX_ANALYZED_OFFSET_SETTING, this::setHighlightMaxAnalyzedOffset);
|
||||
scopedSettings.addSettingsUpdateConsumer(MAX_TERMS_COUNT_SETTING, this::setMaxTermsCount);
|
||||
scopedSettings.addSettingsUpdateConsumer(MAX_SLICES_PER_SCROLL, this::setMaxSlicesPerScroll);
|
||||
scopedSettings.addSettingsUpdateConsumer(DEFAULT_FIELD_SETTING, this::setDefaultFields);
|
||||
scopedSettings.addSettingsUpdateConsumer(INDEX_SEARCH_IDLE_AFTER, this::setSearchIdleAfter);
|
||||
|
@ -734,6 +746,13 @@ public final class IndexSettings {
|
|||
|
||||
private void setHighlightMaxAnalyzedOffset(int maxAnalyzedOffset) { this.maxAnalyzedOffset = maxAnalyzedOffset; }
|
||||
|
||||
/**
|
||||
* Returns the maximum number of terms that can be used in a Terms Query request
|
||||
*/
|
||||
public int getMaxTermsCount() { return this.maxTermsCount; }
|
||||
|
||||
private void setMaxTermsCount (int maxTermsCount) { this.maxTermsCount = maxTermsCount; }
|
||||
|
||||
/**
|
||||
* Returns the maximum number of allowed script_fields to retrieve in a search request
|
||||
*/
|
||||
|
|
|
@ -39,6 +39,7 @@ import org.elasticsearch.common.lucene.search.Queries;
|
|||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.common.xcontent.support.XContentMapValues;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.indices.TermsLookup;
|
||||
|
||||
|
@ -416,6 +417,13 @@ public class TermsQueryBuilder extends AbstractQueryBuilder<TermsQueryBuilder> {
|
|||
if (values == null || values.isEmpty()) {
|
||||
return Queries.newMatchNoDocsQuery("No terms supplied for \"" + getName() + "\" query.");
|
||||
}
|
||||
int maxTermsCount = context.getIndexSettings().getMaxTermsCount();
|
||||
if (values.size() > maxTermsCount){
|
||||
throw new IllegalArgumentException(
|
||||
"The number of terms [" + values.size() + "] used in the Terms Query request has exceeded " +
|
||||
"the allowed maximum of [" + maxTermsCount + "]. " + "This maximum can be set by changing the [" +
|
||||
IndexSettings.MAX_TOKEN_COUNT_SETTING.getKey() + "] index level setting.");
|
||||
}
|
||||
MappedFieldType fieldType = context.fieldMapper(fieldName);
|
||||
|
||||
if (fieldType != null) {
|
||||
|
|
|
@ -204,6 +204,11 @@ specific index module:
|
|||
This setting is only applicable when highlighting is requested on a text that was indexed without offsets or term vectors.
|
||||
Defaults to `10000`.
|
||||
|
||||
`index.max_terms_count`::
|
||||
|
||||
The maximum number of terms that can be used in Terms Query.
|
||||
Defaults to `65536`.
|
||||
|
||||
|
||||
[float]
|
||||
=== Settings in other index modules
|
||||
|
|
|
@ -49,3 +49,12 @@ removed.
|
|||
|
||||
* `levenstein` - replaced by `levenshtein`
|
||||
* `jarowinkler` - replaced by `jaro_winkler`
|
||||
|
||||
|
||||
==== Limiting the number of terms that can be used in a Terms Query request
|
||||
|
||||
Executing a Terms Query with a lot of terms may degrade the cluster performance,
|
||||
as each additional term demands extra processing and memory.
|
||||
To safeguard against this, the maximum number of terms that can be used in a
|
||||
Terms Query request has been limited to 65536. This default maximum can be changed
|
||||
for a particular index with the index setting `index.max_terms_count`.
|
||||
|
|
|
@ -57,6 +57,14 @@ across all nodes if the "reference" terms data is not large. The lookup
|
|||
terms filter will prefer to execute the get request on a local node if
|
||||
possible, reducing the need for networking.
|
||||
|
||||
[WARNING]
|
||||
Executing a Terms Query request with a lot of terms can be quite slow,
|
||||
as each additional term demands extra processing and memory.
|
||||
To safeguard against this, the maximum number of terms that can be used
|
||||
in a Terms Query both directly or through lookup has been limited to `65536`.
|
||||
This default maximum can be changed for a particular index with the index setting
|
||||
`index.max_terms_count`.
|
||||
|
||||
[float]
|
||||
===== Terms lookup twitter example
|
||||
At first we index the information for user with id 2, specifically, its
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
---
|
||||
"Terms Query with No.of terms exceeding index.max_terms_count should FAIL":
|
||||
- skip:
|
||||
version: " - 6.99.99"
|
||||
reason: index.max_terms_count setting has been added in 7.0.0
|
||||
- do:
|
||||
indices.create:
|
||||
index: test_index
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
index.max_terms_count: 2
|
||||
mappings:
|
||||
test_type:
|
||||
properties:
|
||||
user:
|
||||
type: keyword
|
||||
followers:
|
||||
type: keyword
|
||||
- do:
|
||||
bulk:
|
||||
refresh: true
|
||||
body:
|
||||
- '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u1"}}'
|
||||
- '{"user": "u1", "followers": ["u2", "u3"]}'
|
||||
- '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u2"}}'
|
||||
- '{"user": "u2", "followers": ["u1", "u3", "u4"]}'
|
||||
- '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u3"}}'
|
||||
- '{"user": "u3", "followers": ["u1"]}'
|
||||
- '{"index": {"_index": "test_index", "_type": "test_type", "_id": "u4"}}'
|
||||
- '{"user": "u4", "followers": ["u3"]}'
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test_index
|
||||
body: {"query" : {"terms" : {"user" : ["u1", "u2"]}}}
|
||||
- match: { hits.total: 2 }
|
||||
|
||||
- do:
|
||||
catch: bad_request
|
||||
search:
|
||||
index: test_index
|
||||
body: {"query" : {"terms" : {"user" : ["u1", "u2", "u3"]}}}
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test_index
|
||||
body: {"query" : {"terms" : {"user" : {"index" : "test_index", "type" : "test_type", "id" : "u1", "path" : "followers"}}}}
|
||||
- match: { hits.total: 2 }
|
||||
|
||||
- do:
|
||||
catch: bad_request
|
||||
search:
|
||||
index: test_index
|
||||
body: {"query" : {"terms" : {"user" : {"index" : "test_index", "type" : "test_type", "id" : "u2", "path" : "followers"}}}}
|
Loading…
Reference in New Issue