From 3812d3cb4334e8b64983e671d6131099b868e21e Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 31 Oct 2017 09:59:06 +0100 Subject: [PATCH 01/17] TopHitsAggregator must propagate calls to `setScorer`. (#27138) It is required in order to work correctly with bulk scorer implementations that change the scorer during the collection process. Otherwise sub collectors might call `Scorer.score()` on the wrong scorer. Closes #27131 --- .../metrics/tophits/TopHitsAggregator.java | 5 ++ .../tophits/TopHitsAggregatorTests.java | 51 +++++++++++++++++++ .../aggregations/AggregatorTestCase.java | 4 +- 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregator.java index 0f42118683a..700acdf797a 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregator.java @@ -20,6 +20,8 @@ package org.elasticsearch.search.aggregations.metrics.tophits; import com.carrotsearch.hppc.LongObjectHashMap; +import com.carrotsearch.hppc.cursors.ObjectCursor; + import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.LeafCollector; @@ -93,6 +95,9 @@ public class TopHitsAggregator extends MetricsAggregator { public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; super.setScorer(scorer); + for (ObjectCursor cursor : leafCollectors.values()) { + cursor.value.setScorer(scorer); + } } @Override diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregatorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregatorTests.java index 43686a1465e..5555e987ec4 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregatorTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregatorTests.java @@ -21,15 +21,22 @@ package org.elasticsearch.search.aggregations.metrics.tophits; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.QueryParser; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.elasticsearch.index.mapper.KeywordFieldMapper; @@ -39,6 +46,7 @@ import org.elasticsearch.index.mapper.UidFieldMapper; import org.elasticsearch.search.SearchHits; import org.elasticsearch.search.aggregations.Aggregation; import org.elasticsearch.search.aggregations.AggregationBuilder; +import org.elasticsearch.search.aggregations.AggregationBuilders; import org.elasticsearch.search.aggregations.AggregatorTestCase; import org.elasticsearch.search.aggregations.bucket.terms.Terms; import org.elasticsearch.search.sort.SortOrder; @@ -148,4 +156,47 @@ public class TopHitsAggregatorTests extends AggregatorTestCase { } return document; } + + public void testSetScorer() throws Exception { + Directory directory = newDirectory(); + IndexWriter w = new IndexWriter(directory, newIndexWriterConfig() + // only merge adjacent segments + .setMergePolicy(newLogMergePolicy())); + // first window (see BooleanScorer) has matches on one clause only + for (int i = 0; i < 2048; ++i) { + Document doc = new Document(); + doc.add(new StringField("_id", Uid.encodeId(Integer.toString(i)), Store.YES)); + if (i == 1000) { // any doc in 0..2048 + doc.add(new StringField("string", "bar", Store.NO)); + } + w.addDocument(doc); + } + // second window has matches in two clauses + for (int i = 0; i < 2048; ++i) { + Document doc = new Document(); + doc.add(new StringField("_id", Uid.encodeId(Integer.toString(2048 + i)), Store.YES)); + if (i == 500) { // any doc in 0..2048 + doc.add(new StringField("string", "baz", Store.NO)); + } else if (i == 1500) { + doc.add(new StringField("string", "bar", Store.NO)); + } + w.addDocument(doc); + } + + w.forceMerge(1); // we need all docs to be in the same segment + + IndexReader reader = DirectoryReader.open(w); + w.close(); + + IndexSearcher searcher = new IndexSearcher(reader); + Query query = new BooleanQuery.Builder() + .add(new TermQuery(new Term("string", "bar")), Occur.SHOULD) + .add(new TermQuery(new Term("string", "baz")), Occur.SHOULD) + .build(); + AggregationBuilder agg = AggregationBuilders.topHits("top_hits"); + TopHits result = searchAndReduce(searcher, query, agg, STRING_FIELD_TYPE); + assertEquals(3, result.getHits().totalHits); + reader.close(); + directory.close(); + } } diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index bc9de3e06aa..d3e83f03d3a 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -91,6 +91,7 @@ import static org.mockito.Mockito.when; public abstract class AggregatorTestCase extends ESTestCase { private static final String NESTEDFIELD_PREFIX = "nested_"; private List releasables = new ArrayList<>(); + private static final String TYPE_NAME = "type"; /** Create a factory for the given aggregation builder. */ protected AggregatorFactory createAggregatorFactory(AggregationBuilder aggregationBuilder, @@ -104,6 +105,7 @@ public abstract class AggregatorTestCase extends ESTestCase { MapperService mapperService = mapperServiceMock(); when(mapperService.getIndexSettings()).thenReturn(indexSettings); when(mapperService.hasNested()).thenReturn(false); + when(mapperService.types()).thenReturn(Collections.singleton(TYPE_NAME)); when(searchContext.mapperService()).thenReturn(mapperService); IndexFieldDataService ifds = new IndexFieldDataService(indexSettings, new IndicesFieldDataCache(Settings.EMPTY, new IndexFieldDataCache.Listener() { @@ -115,7 +117,7 @@ public abstract class AggregatorTestCase extends ESTestCase { } }); - SearchLookup searchLookup = new SearchLookup(mapperService, ifds::getForField, new String[]{"type"}); + SearchLookup searchLookup = new SearchLookup(mapperService, ifds::getForField, new String[]{TYPE_NAME}); when(searchContext.lookup()).thenReturn(searchLookup); QueryShardContext queryShardContext = queryShardContextMock(mapperService, fieldTypes, circuitBreakerService); From a4c159e91eda56b6b75e44708dc55100a69e6c7d Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Tue, 31 Oct 2017 10:01:33 +0100 Subject: [PATCH 02/17] prevent duplicate fields when mixing parent and root nested includes (#27072) Closes #26990 --- .../index/mapper/RootObjectMapper.java | 32 ++++++++++ .../index/mapper/NestedObjectMapperTests.java | 64 +++++++++++++++++++ 2 files changed, 96 insertions(+) diff --git a/core/src/main/java/org/elasticsearch/index/mapper/RootObjectMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/RootObjectMapper.java index 4b2f3265323..42341bfb96b 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/RootObjectMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/RootObjectMapper.java @@ -74,6 +74,38 @@ public class RootObjectMapper extends ObjectMapper { return this; } + @Override + public RootObjectMapper build(BuilderContext context) { + fixRedundantIncludes(this, true); + return super.build(context); + } + + /** + * Removes redundant root includes in {@link ObjectMapper.Nested} trees to avoid duplicate + * fields on the root mapper when {@code isIncludeInRoot} is {@code true} for a node that is + * itself included into a parent node, for which either {@code isIncludeInRoot} is + * {@code true} or which is transitively included in root by a chain of nodes with + * {@code isIncludeInParent} returning {@code true}. + * @param omb Builder whose children to check. + * @param parentIncluded True iff node is a child of root or a node that is included in + * root + */ + private static void fixRedundantIncludes(ObjectMapper.Builder omb, boolean parentIncluded) { + for (Object mapper : omb.mappersBuilders) { + if (mapper instanceof ObjectMapper.Builder) { + ObjectMapper.Builder child = (ObjectMapper.Builder) mapper; + Nested nested = child.nested; + boolean isNested = nested.isNested(); + boolean includeInRootViaParent = parentIncluded && isNested && nested.isIncludeInParent(); + boolean includedInRoot = isNested && nested.isIncludeInRoot(); + if (includeInRootViaParent && includedInRoot) { + child.nested = Nested.newNested(true, false); + } + fixRedundantIncludes(child, includeInRootViaParent || includedInRoot); + } + } + } + @Override protected ObjectMapper createMapper(String name, String fullPath, boolean enabled, Nested nested, Dynamic dynamic, Map mappers, @Nullable Settings settings) { diff --git a/core/src/test/java/org/elasticsearch/index/mapper/NestedObjectMapperTests.java b/core/src/test/java/org/elasticsearch/index/mapper/NestedObjectMapperTests.java index a3b477a4b6f..39d4de2359e 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/NestedObjectMapperTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/NestedObjectMapperTests.java @@ -19,6 +19,9 @@ package org.elasticsearch.index.mapper; +import java.util.HashMap; +import java.util.HashSet; +import org.apache.lucene.index.IndexableField; import org.elasticsearch.Version; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.settings.Settings; @@ -333,6 +336,67 @@ public class NestedObjectMapperTests extends ESSingleNodeTestCase { assertThat(doc.docs().get(6).getFields("nested1.nested2.field2").length, equalTo(4)); } + /** + * Checks that multiple levels of nested includes where a node is both directly and transitively + * included in root by {@code include_in_root} and a chain of {@code include_in_parent} does not + * lead to duplicate fields on the root document. + */ + public void testMultipleLevelsIncludeRoot1() throws Exception { + String mapping = XContentFactory.jsonBuilder() + .startObject().startObject("type").startObject("properties") + .startObject("nested1").field("type", "nested").field("include_in_root", true).field("include_in_parent", true).startObject("properties") + .startObject("nested2").field("type", "nested").field("include_in_root", true).field("include_in_parent", true) + .endObject().endObject().endObject() + .endObject().endObject().endObject().string(); + + DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); + + ParsedDocument doc = docMapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder() + .startObject().startArray("nested1") + .startObject().startArray("nested2").startObject().field("foo", "bar") + .endObject().endArray().endObject().endArray() + .endObject() + .bytes(), + XContentType.JSON)); + + final Collection fields = doc.rootDoc().getFields(); + assertThat(fields.size(), equalTo(new HashSet<>(fields).size())); + } + + /** + * Same as {@link NestedObjectMapperTests#testMultipleLevelsIncludeRoot1()} but tests for the + * case where the transitive {@code include_in_parent} and redundant {@code include_in_root} + * happen on a chain of nodes that starts from a parent node that is not directly connected to + * root by a chain of {@code include_in_parent}, i.e. that has {@code include_in_parent} set to + * {@code false} and {@code include_in_root} set to {@code true}. + */ + public void testMultipleLevelsIncludeRoot2() throws Exception { + String mapping = XContentFactory.jsonBuilder() + .startObject().startObject("type").startObject("properties") + .startObject("nested1").field("type", "nested") + .field("include_in_root", true).field("include_in_parent", true).startObject("properties") + .startObject("nested2").field("type", "nested") + .field("include_in_root", true).field("include_in_parent", false).startObject("properties") + .startObject("nested3").field("type", "nested") + .field("include_in_root", true).field("include_in_parent", true) + .endObject().endObject().endObject().endObject().endObject() + .endObject().endObject().endObject().string(); + + DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); + + ParsedDocument doc = docMapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder() + .startObject().startArray("nested1") + .startObject().startArray("nested2") + .startObject().startArray("nested3").startObject().field("foo", "bar") + .endObject().endArray().endObject().endArray().endObject().endArray() + .endObject() + .bytes(), + XContentType.JSON)); + + final Collection fields = doc.rootDoc().getFields(); + assertThat(fields.size(), equalTo(new HashSet<>(fields).size())); + } + public void testNestedArrayStrict() throws Exception { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties") .startObject("nested1").field("type", "nested").field("dynamic", "strict").startObject("properties") From c3e2bdf20c05b3c4f5be381fddaac1449b5f8bcb Mon Sep 17 00:00:00 2001 From: kel Date: Tue, 31 Oct 2017 06:17:27 -0500 Subject: [PATCH 03/17] Raise IllegalArgumentException if query validation failed (#26811) Closes #26799 --- .../search/DefaultSearchContext.java | 13 +- .../elasticsearch/search/SearchService.java | 2 +- .../AdjacencyMatrixAggregationBuilder.java | 2 +- .../search/DefaultSearchContextTests.java | 178 ++++++++++++++++++ ...djacencyMatrixAggregationBuilderTests.java | 84 +++++++++ .../search/scroll/SearchScrollIT.java | 18 +- .../migration/migrate_7_0/search.asciidoc | 4 + .../rest-api-spec/test/scroll/12_slices.yml | 6 +- .../elasticsearch/test/TestSearchContext.java | 3 - 9 files changed, 286 insertions(+), 24 deletions(-) create mode 100644 core/src/test/java/org/elasticsearch/search/DefaultSearchContextTests.java create mode 100644 core/src/test/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregationBuilderTests.java diff --git a/core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java b/core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java index 2c707a2b9a2..34c3c03f758 100644 --- a/core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java +++ b/core/src/main/java/org/elasticsearch/search/DefaultSearchContext.java @@ -24,7 +24,6 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Collector; import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.Query; -import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Counter; import org.elasticsearch.action.search.SearchTask; import org.elasticsearch.action.search.SearchType; @@ -81,7 +80,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.concurrent.ExecutorService; final class DefaultSearchContext extends SearchContext { @@ -200,29 +198,28 @@ final class DefaultSearchContext extends SearchContext { if (resultWindow > maxResultWindow) { if (scrollContext == null) { - throw new QueryPhaseExecutionException(this, + throw new IllegalArgumentException( "Result window is too large, from + size must be less than or equal to: [" + maxResultWindow + "] but was [" + resultWindow + "]. See the scroll api for a more efficient way to request large data sets. " + "This limit can be set by changing the [" + IndexSettings.MAX_RESULT_WINDOW_SETTING.getKey() + "] index level setting."); } - throw new QueryPhaseExecutionException(this, + throw new IllegalArgumentException( "Batch size is too large, size must be less than or equal to: [" + maxResultWindow + "] but was [" + resultWindow + "]. Scroll batch sizes cost as much memory as result windows so they are controlled by the [" + IndexSettings.MAX_RESULT_WINDOW_SETTING.getKey() + "] index level setting."); } if (rescore != null) { if (sort != null) { - throw new QueryPhaseExecutionException(this, "Cannot use [sort] option in conjunction with [rescore]."); + throw new IllegalArgumentException("Cannot use [sort] option in conjunction with [rescore]."); } int maxWindow = indexService.getIndexSettings().getMaxRescoreWindow(); for (RescoreContext rescoreContext: rescore) { if (rescoreContext.getWindowSize() > maxWindow) { - throw new QueryPhaseExecutionException(this, "Rescore window [" + rescoreContext.getWindowSize() + "] is too large. " + throw new IllegalArgumentException("Rescore window [" + rescoreContext.getWindowSize() + "] is too large. " + "It must be less than [" + maxWindow + "]. This prevents allocating massive heaps for storing the results " + "to be rescored. This limit can be set by changing the [" + IndexSettings.MAX_RESCORE_WINDOW_SETTING.getKey() + "] index level setting."); - } } } @@ -231,7 +228,7 @@ final class DefaultSearchContext extends SearchContext { int sliceLimit = indexService.getIndexSettings().getMaxSlicesPerScroll(); int numSlices = sliceBuilder.getMax(); if (numSlices > sliceLimit) { - throw new QueryPhaseExecutionException(this, "The number of slices [" + numSlices + "] is too large. It must " + throw new IllegalArgumentException("The number of slices [" + numSlices + "] is too large. It must " + "be less than [" + sliceLimit + "]. This limit can be set by changing the [" + IndexSettings.MAX_SLICES_PER_SCROLL.getKey() + "] index level setting."); } diff --git a/core/src/main/java/org/elasticsearch/search/SearchService.java b/core/src/main/java/org/elasticsearch/search/SearchService.java index 4ff3a65553a..8277f2733a4 100644 --- a/core/src/main/java/org/elasticsearch/search/SearchService.java +++ b/core/src/main/java/org/elasticsearch/search/SearchService.java @@ -650,7 +650,7 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv private void contextScrollKeepAlive(SearchContext context, long keepAlive) throws IOException { if (keepAlive > maxKeepAlive) { - throw new QueryPhaseExecutionException(context, + throw new IllegalArgumentException( "Keep alive for scroll (" + TimeValue.timeValueMillis(keepAlive).format() + ") is too large. " + "It must be less than (" + TimeValue.timeValueMillis(maxKeepAlive).format() + "). " + "This limit can be set by changing the [" + MAX_KEEPALIVE_SETTING.getKey() + "] cluster level setting."); diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregationBuilder.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregationBuilder.java index 7d969e4067e..325e8b07ca6 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregationBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregationBuilder.java @@ -189,7 +189,7 @@ public class AdjacencyMatrixAggregationBuilder extends AbstractAggregationBuilde throws IOException { int maxFilters = context.indexShard().indexSettings().getMaxAdjacencyMatrixFilters(); if (filters.size() > maxFilters){ - throw new QueryPhaseExecutionException(context, + throw new IllegalArgumentException( "Number of filters is too large, must be less than or equal to: [" + maxFilters + "] but was [" + filters.size() + "]." + "This limit can be set by changing the [" + IndexSettings.MAX_ADJACENCY_MATRIX_FILTERS_SETTING.getKey() diff --git a/core/src/test/java/org/elasticsearch/search/DefaultSearchContextTests.java b/core/src/test/java/org/elasticsearch/search/DefaultSearchContextTests.java new file mode 100644 index 00000000000..c20724b8a92 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/DefaultSearchContextTests.java @@ -0,0 +1,178 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.QueryCachingPolicy; +import org.apache.lucene.search.Sort; +import org.apache.lucene.store.Directory; +import org.elasticsearch.Version; +import org.elasticsearch.action.search.SearchType; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.unit.TimeValue; +import org.elasticsearch.common.util.BigArrays; +import org.elasticsearch.common.util.MockBigArrays; +import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.cache.IndexCache; +import org.elasticsearch.index.cache.query.QueryCache; +import org.elasticsearch.index.engine.Engine; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.query.AbstractQueryBuilder; +import org.elasticsearch.index.query.ParsedQuery; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.shard.IndexShard; +import org.elasticsearch.index.shard.ShardId; +import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; +import org.elasticsearch.search.internal.AliasFilter; +import org.elasticsearch.search.internal.ScrollContext; +import org.elasticsearch.search.internal.ShardSearchRequest; +import org.elasticsearch.search.rescore.RescoreContext; +import org.elasticsearch.search.slice.SliceBuilder; +import org.elasticsearch.search.sort.SortAndFormats; +import org.elasticsearch.test.ESTestCase; + +import java.util.UUID; + +import static org.hamcrest.Matchers.equalTo; +import static org.mockito.Matchers.anyObject; +import static org.mockito.Matchers.anyString; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + + +public class DefaultSearchContextTests extends ESTestCase { + + public void testPreProcess() throws Exception { + TimeValue timeout = new TimeValue(randomIntBetween(1, 100)); + ShardSearchRequest shardSearchRequest = mock(ShardSearchRequest.class); + when(shardSearchRequest.searchType()).thenReturn(SearchType.DEFAULT); + ShardId shardId = new ShardId("index", UUID.randomUUID().toString(), 1); + when(shardSearchRequest.shardId()).thenReturn(shardId); + when(shardSearchRequest.types()).thenReturn(new String[]{}); + + IndexShard indexShard = mock(IndexShard.class); + QueryCachingPolicy queryCachingPolicy = mock(QueryCachingPolicy.class); + when(indexShard.getQueryCachingPolicy()).thenReturn(queryCachingPolicy); + + int maxResultWindow = randomIntBetween(50, 100); + int maxRescoreWindow = randomIntBetween(50, 100); + int maxSlicesPerScroll = randomIntBetween(50, 100); + Settings settings = Settings.builder() + .put("index.max_result_window", maxResultWindow) + .put("index.max_slices_per_scroll", maxSlicesPerScroll) + .put("index.max_rescore_window", maxRescoreWindow) + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1) + .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 2) + .build(); + + IndexService indexService = mock(IndexService.class); + IndexCache indexCache = mock(IndexCache.class); + QueryCache queryCache = mock(QueryCache.class); + when(indexCache.query()).thenReturn(queryCache); + when(indexService.cache()).thenReturn(indexCache); + QueryShardContext queryShardContext = mock(QueryShardContext.class); + when(indexService.newQueryShardContext(eq(shardId.id()), anyObject(), anyObject(), anyString())).thenReturn(queryShardContext); + MapperService mapperService = mock(MapperService.class); + when(mapperService.hasNested()).thenReturn(randomBoolean()); + when(indexService.mapperService()).thenReturn(mapperService); + + IndexMetaData indexMetaData = IndexMetaData.builder("index").settings(settings).build(); + IndexSettings indexSettings = new IndexSettings(indexMetaData, Settings.EMPTY); + when(indexService.getIndexSettings()).thenReturn(indexSettings); + + BigArrays bigArrays = new MockBigArrays(Settings.EMPTY, new NoneCircuitBreakerService()); + + try (Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + IndexReader reader = w.getReader(); + Engine.Searcher searcher = new Engine.Searcher("test", new IndexSearcher(reader))) { + + DefaultSearchContext context1 = new DefaultSearchContext(1L, shardSearchRequest, null, searcher, indexService, + indexShard, bigArrays, null, timeout, null, null); + context1.from(300); + + // resultWindow greater than maxResultWindow and scrollContext is null + IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> context1.preProcess(false)); + assertThat(exception.getMessage(), equalTo("Result window is too large, from + size must be less than or equal to:" + + " [" + maxResultWindow + "] but was [310]. See the scroll api for a more efficient way to request large data sets. " + + "This limit can be set by changing the [" + IndexSettings.MAX_RESULT_WINDOW_SETTING.getKey() + + "] index level setting.")); + + // resultWindow greater than maxResultWindow and scrollContext isn't null + context1.scrollContext(new ScrollContext()); + exception = expectThrows(IllegalArgumentException.class, () -> context1.preProcess(false)); + assertThat(exception.getMessage(), equalTo("Batch size is too large, size must be less than or equal to: [" + + maxResultWindow + "] but was [310]. Scroll batch sizes cost as much memory as result windows so they are " + + "controlled by the [" + IndexSettings.MAX_RESULT_WINDOW_SETTING.getKey() + "] index level setting.")); + + // resultWindow not greater than maxResultWindow and both rescore and sort are not null + context1.from(0); + DocValueFormat docValueFormat = mock(DocValueFormat.class); + SortAndFormats sortAndFormats = new SortAndFormats(new Sort(), new DocValueFormat[]{docValueFormat}); + context1.sort(sortAndFormats); + + RescoreContext rescoreContext = mock(RescoreContext.class); + when(rescoreContext.getWindowSize()).thenReturn(500); + context1.addRescore(rescoreContext); + + exception = expectThrows(IllegalArgumentException.class, () -> context1.preProcess(false)); + assertThat(exception.getMessage(), equalTo("Cannot use [sort] option in conjunction with [rescore].")); + + // rescore is null but sort is not null and rescoreContext.getWindowSize() exceeds maxResultWindow + context1.sort(null); + exception = expectThrows(IllegalArgumentException.class, () -> context1.preProcess(false)); + + assertThat(exception.getMessage(), equalTo("Rescore window [" + rescoreContext.getWindowSize() + "] is too large. " + + "It must be less than [" + maxRescoreWindow + "]. This prevents allocating massive heaps for storing the results " + + "to be rescored. This limit can be set by changing the [" + IndexSettings.MAX_RESCORE_WINDOW_SETTING.getKey() + + "] index level setting.")); + + // rescore is null but sliceBuilder is not null + DefaultSearchContext context2 = new DefaultSearchContext(2L, shardSearchRequest, null, searcher, indexService, + indexShard, bigArrays, null, timeout, null, null); + + SliceBuilder sliceBuilder = mock(SliceBuilder.class); + int numSlices = maxSlicesPerScroll + randomIntBetween(1, 100); + when(sliceBuilder.getMax()).thenReturn(numSlices); + context2.sliceBuilder(sliceBuilder); + + exception = expectThrows(IllegalArgumentException.class, () -> context2.preProcess(false)); + assertThat(exception.getMessage(), equalTo("The number of slices [" + numSlices + "] is too large. It must " + + "be less than [" + maxSlicesPerScroll + "]. This limit can be set by changing the [" + + IndexSettings.MAX_SLICES_PER_SCROLL.getKey() + "] index level setting.")); + + // No exceptions should be thrown + when(shardSearchRequest.getAliasFilter()).thenReturn(AliasFilter.EMPTY); + when(shardSearchRequest.indexBoost()).thenReturn(AbstractQueryBuilder.DEFAULT_BOOST); + + DefaultSearchContext context3 = new DefaultSearchContext(3L, shardSearchRequest, null, searcher, indexService, + indexShard, bigArrays, null, timeout, null, null); + ParsedQuery parsedQuery = ParsedQuery.parsedMatchAllQuery(); + context3.sliceBuilder(null).parsedQuery(parsedQuery).preProcess(false); + assertEquals(context3.query(), context3.buildFilteredQuery(parsedQuery.query())); + } + } +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregationBuilderTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregationBuilderTests.java new file mode 100644 index 00000000000..e62a38a5651 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregationBuilderTests.java @@ -0,0 +1,84 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket.adjacency; + +import org.elasticsearch.Version; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.query.QueryBuilder; +import org.elasticsearch.index.query.QueryShardContext; +import org.elasticsearch.index.shard.IndexShard; +import org.elasticsearch.search.aggregations.AggregatorFactories; +import org.elasticsearch.search.aggregations.AggregatorFactory; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.TestSearchContext; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.is; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class AdjacencyMatrixAggregationBuilderTests extends ESTestCase { + + + public void testFilterSizeLimitation() throws Exception { + // filter size grater than max size should thrown a exception + QueryShardContext queryShardContext = mock(QueryShardContext.class); + IndexShard indexShard = mock(IndexShard.class); + Settings settings = Settings.builder() + .put("index.max_adjacency_matrix_filters", 2) + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1) + .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 2) + .build(); + IndexMetaData indexMetaData = IndexMetaData.builder("index").settings(settings).build(); + IndexSettings indexSettings = new IndexSettings(indexMetaData, Settings.EMPTY); + when(indexShard.indexSettings()).thenReturn(indexSettings); + SearchContext context = new TestSearchContext(queryShardContext, indexShard); + + Map filters = new HashMap<>(3); + for (int i = 0; i < 3; i++) { + QueryBuilder queryBuilder = mock(QueryBuilder.class); + // return builder itself to skip rewrite + when(queryBuilder.rewrite(queryShardContext)).thenReturn(queryBuilder); + filters.put("filter" + i, queryBuilder); + } + AdjacencyMatrixAggregationBuilder builder = new AdjacencyMatrixAggregationBuilder("dummy", filters); + IllegalArgumentException ex + = expectThrows(IllegalArgumentException.class, () -> builder.doBuild(context, null, new AggregatorFactories.Builder())); + assertThat(ex.getMessage(), equalTo("Number of filters is too large, must be less than or equal to: [2] but was [3]." + + "This limit can be set by changing the [" + IndexSettings.MAX_ADJACENCY_MATRIX_FILTERS_SETTING.getKey() + + "] index level setting.")); + + // filter size not grater than max size should return an instance of AdjacencyMatrixAggregatorFactory + Map emptyFilters = Collections.emptyMap(); + + AdjacencyMatrixAggregationBuilder aggregationBuilder = new AdjacencyMatrixAggregationBuilder("dummy", emptyFilters); + AggregatorFactory factory = aggregationBuilder.doBuild(context, null, new AggregatorFactories.Builder()); + assertThat(factory instanceof AdjacencyMatrixAggregatorFactory, is(true)); + assertThat(factory.name(), equalTo("dummy")); + } +} diff --git a/core/src/test/java/org/elasticsearch/search/scroll/SearchScrollIT.java b/core/src/test/java/org/elasticsearch/search/scroll/SearchScrollIT.java index a32db632e50..b030043faf7 100644 --- a/core/src/test/java/org/elasticsearch/search/scroll/SearchScrollIT.java +++ b/core/src/test/java/org/elasticsearch/search/scroll/SearchScrollIT.java @@ -19,7 +19,6 @@ package org.elasticsearch.search.scroll; -import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.action.search.ClearScrollResponse; import org.elasticsearch.action.search.SearchRequestBuilder; @@ -37,7 +36,6 @@ import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.SearchHit; -import org.elasticsearch.search.query.QueryPhaseExecutionException; import org.elasticsearch.search.sort.FieldSortBuilder; import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.test.ESIntegTestCase; @@ -575,10 +573,10 @@ public class SearchScrollIT extends ESIntegTestCase { .setSize(1) .setScroll(TimeValue.timeValueHours(2)) .execute().actionGet()); - QueryPhaseExecutionException queryPhaseExecutionException = - (QueryPhaseExecutionException) ExceptionsHelper.unwrap(exc, QueryPhaseExecutionException.class); - assertNotNull(queryPhaseExecutionException); - assertThat(queryPhaseExecutionException.getMessage(), containsString("Keep alive for scroll (2 hours) is too large")); + IllegalArgumentException illegalArgumentException = + (IllegalArgumentException) ExceptionsHelper.unwrap(exc, IllegalArgumentException.class); + assertNotNull(illegalArgumentException); + assertThat(illegalArgumentException.getMessage(), containsString("Keep alive for scroll (2 hours) is too large")); SearchResponse searchResponse = client().prepareSearch() .setQuery(matchAllQuery()) @@ -592,10 +590,10 @@ public class SearchScrollIT extends ESIntegTestCase { exc = expectThrows(Exception.class, () -> client().prepareSearchScroll(searchResponse.getScrollId()) .setScroll(TimeValue.timeValueHours(3)).get()); - queryPhaseExecutionException = - (QueryPhaseExecutionException) ExceptionsHelper.unwrap(exc, QueryPhaseExecutionException.class); - assertNotNull(queryPhaseExecutionException); - assertThat(queryPhaseExecutionException.getMessage(), containsString("Keep alive for scroll (3 hours) is too large")); + illegalArgumentException = + (IllegalArgumentException) ExceptionsHelper.unwrap(exc, IllegalArgumentException.class); + assertNotNull(illegalArgumentException); + assertThat(illegalArgumentException.getMessage(), containsString("Keep alive for scroll (3 hours) is too large")); } private void assertToXContentResponse(ClearScrollResponse response, boolean succeed, int numFreed) throws IOException { diff --git a/docs/reference/migration/migrate_7_0/search.asciidoc b/docs/reference/migration/migrate_7_0/search.asciidoc index 5811af3ba6b..e33a172ca27 100644 --- a/docs/reference/migration/migrate_7_0/search.asciidoc +++ b/docs/reference/migration/migrate_7_0/search.asciidoc @@ -22,3 +22,7 @@ PUT /_cluster/settings -------------------------------------------------- // CONSOLE +=== `_search/scroll` returns `400` for invalid requests + +The `/_search/scroll` endpoint returns `400 - Bad request` when the request invalid, while it would previously +return `500 - Internal Server Error` in such case. diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/scroll/12_slices.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/scroll/12_slices.yml index ac66af0095e..4acc4d13232 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/scroll/12_slices.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/scroll/12_slices.yml @@ -103,8 +103,12 @@ setup: --- "Sliced scroll with invalid arguments": + - skip: + version: " - 6.99.99" + reason: Prior versions return 500 rather than 404 + - do: - catch: /query_phase_execution_exception.*The number of slices.*index.max_slices_per_scroll/ + catch: bad_request search: index: test_sliced_scroll size: 1 diff --git a/test/framework/src/main/java/org/elasticsearch/test/TestSearchContext.java b/test/framework/src/main/java/org/elasticsearch/test/TestSearchContext.java index 64f190f402c..9d033835616 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/TestSearchContext.java +++ b/test/framework/src/main/java/org/elasticsearch/test/TestSearchContext.java @@ -24,15 +24,12 @@ import org.apache.lucene.search.Query; import org.apache.lucene.util.Counter; import org.elasticsearch.action.search.SearchTask; import org.elasticsearch.action.search.SearchType; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.BigArrays; -import org.elasticsearch.common.util.concurrent.ThreadContext; import org.elasticsearch.index.IndexService; import org.elasticsearch.index.cache.bitset.BitsetFilterCache; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.fielddata.IndexFieldData; -import org.elasticsearch.index.fielddata.IndexFieldDataService; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.ObjectMapper; From 34666844b3d2a2e36f0681e982a8a30b0c1a8666 Mon Sep 17 00:00:00 2001 From: javanna Date: Tue, 31 Oct 2017 12:36:00 +0100 Subject: [PATCH 04/17] [DOCS] Clarify migrate guide and search request validation Relates to #26811 --- .../AdjacencyMatrixAggregationBuilderTests.java | 1 - .../reference/migration/migrate_7_0/search.asciidoc | 13 ++++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregationBuilderTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregationBuilderTests.java index e62a38a5651..643344bb3bb 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregationBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/adjacency/AdjacencyMatrixAggregationBuilderTests.java @@ -43,7 +43,6 @@ import static org.mockito.Mockito.when; public class AdjacencyMatrixAggregationBuilderTests extends ESTestCase { - public void testFilterSizeLimitation() throws Exception { // filter size grater than max size should thrown a exception QueryShardContext queryShardContext = mock(QueryShardContext.class); diff --git a/docs/reference/migration/migrate_7_0/search.asciidoc b/docs/reference/migration/migrate_7_0/search.asciidoc index e33a172ca27..a2e5d1ccf85 100644 --- a/docs/reference/migration/migrate_7_0/search.asciidoc +++ b/docs/reference/migration/migrate_7_0/search.asciidoc @@ -22,7 +22,14 @@ PUT /_cluster/settings -------------------------------------------------- // CONSOLE -=== `_search/scroll` returns `400` for invalid requests +==== Search API returns `400` for invalid requests -The `/_search/scroll` endpoint returns `400 - Bad request` when the request invalid, while it would previously -return `500 - Internal Server Error` in such case. +The Search API returns `400 - Bad request` while it would previously return +`500 - Internal Server Error` in the following cases of invalid request: + +* the result window is too large +* sort is used in combination with rescore +* the rescore window is too large +* the number of slices is too large +* keep alive for scroll is too large +* number of filters in the adjacency matrix aggregation is too large From bd0261916c19a73da351d53d465d2394f6bde408 Mon Sep 17 00:00:00 2001 From: Shai Erera Date: Tue, 31 Oct 2017 14:08:44 +0200 Subject: [PATCH 05/17] Fix Laplace scorer to multiply by alpha (and not add) (#27125) --- .../search/suggest/phrase/CandidateScorer.java | 2 +- .../search/suggest/phrase/LaplaceScorer.java | 9 +++++++-- .../search/suggest/phrase/WordScorer.java | 11 ++++++----- .../search/suggesters/phrase-suggest.asciidoc | 2 +- 4 files changed, 15 insertions(+), 9 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateScorer.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateScorer.java index d24ce6b3c29..3928a16b7c9 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateScorer.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/CandidateScorer.java @@ -93,7 +93,7 @@ final class CandidateScorer { private void updateTop(CandidateSet[] candidates, Candidate[] path, PriorityQueue corrections, double cutoffScore, double score) throws IOException { score = Math.exp(score); - assert Math.abs(score - score(path, candidates)) < 0.00001; + assert Math.abs(score - score(path, candidates)) < 0.00001 : "cur_score=" + score + ", path_score=" + score(path,candidates); if (score > cutoffScore) { if (corrections.size() < maxNumCorrections) { Candidate[] c = new Candidate[candidates.length]; diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/LaplaceScorer.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/LaplaceScorer.java index 562da448466..d9797a4207e 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/LaplaceScorer.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/LaplaceScorer.java @@ -38,10 +38,15 @@ final class LaplaceScorer extends WordScorer { return this.alpha; } + @Override + protected double scoreUnigram(Candidate word) throws IOException { + return (alpha + frequency(word.term)) / (vocabluarySize + alpha * numTerms); + } + @Override protected double scoreBigram(Candidate word, Candidate w_1) throws IOException { join(separator, spare, w_1.term, word.term); - return (alpha + frequency(spare.get())) / (alpha + w_1.frequency + vocabluarySize); + return (alpha + frequency(spare.get())) / (w_1.frequency + alpha * numTerms); } @Override @@ -49,7 +54,7 @@ final class LaplaceScorer extends WordScorer { join(separator, spare, w_2.term, w_1.term, word.term); long trigramCount = frequency(spare.get()); join(separator, spare, w_1.term, word.term); - return (alpha + trigramCount) / (alpha + frequency(spare.get()) + vocabluarySize); + return (alpha + trigramCount) / (frequency(spare.get()) + alpha * numTerms); } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/WordScorer.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/WordScorer.java index a1c41e40151..22515489ee2 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/WordScorer.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/WordScorer.java @@ -40,8 +40,8 @@ public abstract class WordScorer { protected final double realWordLikelyhood; protected final BytesRefBuilder spare = new BytesRefBuilder(); protected final BytesRef separator; + protected final long numTerms; private final TermsEnum termsEnum; - private final long numTerms; private final boolean useTotalTermFreq; public WordScorer(IndexReader reader, String field, double realWordLikelyHood, BytesRef separator) throws IOException { @@ -57,10 +57,11 @@ public abstract class WordScorer { final long vocSize = terms.getSumTotalTermFreq(); this.vocabluarySize = vocSize == -1 ? reader.maxDoc() : vocSize; this.useTotalTermFreq = vocSize != -1; - long numTerms = terms.size(); - // -1 cannot be used as value, because scoreUnigram(...) can then divide by 0 if vocabluarySize is 1. - // -1 is returned when terms is a MultiTerms instance. - this.numTerms = vocabluarySize + numTerms > 1 ? numTerms : 0; + // terms.size() might be -1 if it's a MultiTerms instance. In that case, + // use reader.maxDoc() as an approximation. This also protects from + // division by zero, by scoreUnigram. + final long nTerms = terms.size(); + this.numTerms = nTerms == -1 ? reader.maxDoc() : nTerms; this.termsEnum = new FreqTermsEnum(reader, field, !useTotalTermFreq, useTotalTermFreq, null, BigArrays.NON_RECYCLING_INSTANCE); // non recycling for now this.reader = reader; this.realWordLikelyhood = realWordLikelyHood; diff --git a/docs/reference/search/suggesters/phrase-suggest.asciidoc b/docs/reference/search/suggesters/phrase-suggest.asciidoc index 92138e7ecdf..cba299e97cb 100644 --- a/docs/reference/search/suggesters/phrase-suggest.asciidoc +++ b/docs/reference/search/suggesters/phrase-suggest.asciidoc @@ -126,7 +126,7 @@ The response contains suggestions scored by the most likely spell correction fir "options" : [ { "text" : "nobel prize", "highlighted": "nobel prize", - "score" : 0.5962314 + "score" : 0.48614594 }] } ] From 506a2c276d905c84254e3cc2a90436e0fa21e83d Mon Sep 17 00:00:00 2001 From: javanna Date: Tue, 31 Oct 2017 15:24:46 +0100 Subject: [PATCH 06/17] [DOCS] Link remote info API in Cross Cluster Search docs page Closes #26327 --- docs/reference/modules/cross-cluster-search.asciidoc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/reference/modules/cross-cluster-search.asciidoc b/docs/reference/modules/cross-cluster-search.asciidoc index eb755564ed5..315149941f4 100644 --- a/docs/reference/modules/cross-cluster-search.asciidoc +++ b/docs/reference/modules/cross-cluster-search.asciidoc @@ -204,3 +204,11 @@ will be prefixed with their remote cluster name: to `false` (defaults to `true`) to prevent certain nodes from connecting to remote clusters. Cross-cluster search requests must be sent to a node that is allowed to act as a cross-cluster client. + +[float] +[[retrieve-remote-clusters-info]] +=== Retrieving remote clusters info + +The <> allows to retrieve +information about the configured remote clusters, as well as the remote +nodes that the Cross Cluster Search node is connected to. From 13cd08b1e6bc014b39b3d8a03ede3962c98a0b88 Mon Sep 17 00:00:00 2001 From: Tanguy Leroux Date: Tue, 31 Oct 2017 16:11:18 +0100 Subject: [PATCH 07/17] Convert index blocks to cluster block exceptions (#27050) --- .../template/delete/TransportDeleteIndexTemplateAction.java | 2 +- .../indices/template/put/TransportPutIndexTemplateAction.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/template/delete/TransportDeleteIndexTemplateAction.java b/core/src/main/java/org/elasticsearch/action/admin/indices/template/delete/TransportDeleteIndexTemplateAction.java index bb18b57fa93..ad9f73b55b0 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/template/delete/TransportDeleteIndexTemplateAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/template/delete/TransportDeleteIndexTemplateAction.java @@ -62,7 +62,7 @@ public class TransportDeleteIndexTemplateAction extends TransportMasterNodeActio @Override protected ClusterBlockException checkBlock(DeleteIndexTemplateRequest request, ClusterState state) { - return state.blocks().indexBlockedException(ClusterBlockLevel.METADATA_WRITE, ""); + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_WRITE); } @Override diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/template/put/TransportPutIndexTemplateAction.java b/core/src/main/java/org/elasticsearch/action/admin/indices/template/put/TransportPutIndexTemplateAction.java index 342b2397773..7d9897b112e 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/template/put/TransportPutIndexTemplateAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/template/put/TransportPutIndexTemplateAction.java @@ -66,7 +66,7 @@ public class TransportPutIndexTemplateAction extends TransportMasterNodeAction

Date: Tue, 31 Oct 2017 20:04:00 -0400 Subject: [PATCH 08/17] Docs: restore now fails if it encounters incompatible settings (#26933) This change was introduced in 5.0.0, but the documentation wasn't updated to reflect it. Closes #26453 --- docs/reference/modules/snapshots.asciidoc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/reference/modules/snapshots.asciidoc b/docs/reference/modules/snapshots.asciidoc index d269ce04567..59d951f6aea 100644 --- a/docs/reference/modules/snapshots.asciidoc +++ b/docs/reference/modules/snapshots.asciidoc @@ -505,16 +505,18 @@ same size or topology. However, the version of the new cluster should be the sa If the new cluster has a smaller size additional considerations should be made. First of all it's necessary to make sure that new cluster have enough capacity to store all indices in the snapshot. It's possible to change indices settings during restore to reduce the number of replicas, which can help with restoring snapshots into smaller cluster. It's also -possible to select only subset of the indices using the `indices` parameter. Prior to version 1.5.0 elasticsearch -didn't check restored persistent settings making it possible to accidentally restore an incompatible -`discovery.zen.minimum_master_nodes` setting, and as a result disable a smaller cluster until the required number of -master eligible nodes is added. Starting with version 1.5.0 incompatible settings are ignored. +possible to select only subset of the indices using the `indices` parameter. If indices in the original cluster were assigned to particular nodes using <>, the same rules will be enforced in the new cluster. Therefore if the new cluster doesn't contain nodes with appropriate attributes that a restored index can be allocated on, such index will not be successfully restored unless these index allocation settings are changed during restore operation. +The restore operation also checks that restored persistent settings are compatible with the current cluster to avoid accidentally +restoring an incompatible settings such as `discovery.zen.minimum_master_nodes` and as a result disable a smaller cluster until the +required number of master eligible nodes is added. If you need to restore a snapshot with incompatible persistent settings, try +restoring it without the global cluster state. + [float] === Snapshot status @@ -596,7 +598,7 @@ state. Once recovery of primary shards is completed Elasticsearch is switching t creates the required number of replicas at this moment cluster switches to the `yellow` state. Once all required replicas are created, the cluster switches to the `green` states. -The cluster health operation provides only a high level status of the restore process. It’s possible to get more +The cluster health operation provides only a high level status of the restore process. It's possible to get more detailed insight into the current state of the recovery process by using <> and <> APIs. From fd73e5fa41a8e50e1ea1103ba67e3e23f483d358 Mon Sep 17 00:00:00 2001 From: Jack Conradson Date: Tue, 31 Oct 2017 17:49:52 -0700 Subject: [PATCH 09/17] Add version 6.0.0 --- core/src/main/java/org/elasticsearch/Version.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/core/src/main/java/org/elasticsearch/Version.java b/core/src/main/java/org/elasticsearch/Version.java index dbb7bb13734..666ef0e59e9 100644 --- a/core/src/main/java/org/elasticsearch/Version.java +++ b/core/src/main/java/org/elasticsearch/Version.java @@ -118,6 +118,9 @@ public class Version implements Comparable { public static final int V_6_0_0_rc2_ID = 6000052; public static final Version V_6_0_0_rc2 = new Version(V_6_0_0_rc2_ID, org.apache.lucene.util.Version.LUCENE_7_0_1); + public static final int V_6_0_0_ID = 6000099; + public static final Version V_6_0_0 = + new Version(V_6_0_0_ID, org.apache.lucene.util.Version.LUCENE_7_0_1); public static final int V_6_1_0_ID = 6010099; public static final Version V_6_1_0 = new Version(V_6_1_0_ID, org.apache.lucene.util.Version.LUCENE_7_1_0); @@ -143,6 +146,8 @@ public class Version implements Comparable { return V_7_0_0_alpha1; case V_6_1_0_ID: return V_6_1_0; + case V_6_0_0_ID: + return V_6_0_0; case V_6_0_0_rc2_ID: return V_6_0_0_rc2; case V_6_0_0_beta2_ID: From b71f7d355941f1a9c548abad4b77c0c9958e6aa2 Mon Sep 17 00:00:00 2001 From: Toby McLaughlin Date: Wed, 1 Nov 2017 14:24:30 +1100 Subject: [PATCH 10/17] Update Docker docs for 6.0.0-rc2 (#27166) * Update Docker docs for 6.0.0-rc2 * Update the docs to match the new Docker "image flavours" of "basic", "platinum", and "oss". * Clarifications for Openshift and bind-mounts * Bump docker-compose 2.x format to 2.2 * Combine Docker Toolbox instructions for setting vm.max_map_count for both macOS + Windows * devicemapper is not the default storage driver any more on RHEL --- docs/Versions.asciidoc | 3 +- docs/reference/setup/install.asciidoc | 4 +- docs/reference/setup/install/docker.asciidoc | 125 ++++++++++++------- 3 files changed, 87 insertions(+), 45 deletions(-) diff --git a/docs/Versions.asciidoc b/docs/Versions.asciidoc index b6a6f91d232..6127d599acf 100644 --- a/docs/Versions.asciidoc +++ b/docs/Versions.asciidoc @@ -14,7 +14,8 @@ release-state can be: released | prerelease | unreleased :issue: https://github.com/elastic/elasticsearch/issues/ :pull: https://github.com/elastic/elasticsearch/pull/ -:docker-image: docker.elastic.co/elasticsearch/elasticsearch:{version} +:docker-repo: docker.elastic.co/elasticsearch/elasticsearch +:docker-image: {docker-repo}:{version} :plugin_url: https://artifacts.elastic.co/downloads/elasticsearch-plugins /////// diff --git a/docs/reference/setup/install.asciidoc b/docs/reference/setup/install.asciidoc index 484d9dea970..babdccc2d95 100644 --- a/docs/reference/setup/install.asciidoc +++ b/docs/reference/setup/install.asciidoc @@ -37,7 +37,9 @@ Elasticsearch on Windows. MSIs may be downloaded from the Elasticsearch website. `docker`:: -An image is available for running Elasticsearch as a Docker container. It ships with {xpack-ref}/index.html[X-Pack] pre-installed and may be downloaded from the Elastic Docker Registry. +Images are available for running Elasticsearch as Docker containers. They may be +downloaded from the Elastic Docker Registry. The default image ships with +{xpack-ref}/index.html[X-Pack] pre-installed. + <> diff --git a/docs/reference/setup/install/docker.asciidoc b/docs/reference/setup/install/docker.asciidoc index db5518694ab..1bcdefc5bc2 100644 --- a/docs/reference/setup/install/docker.asciidoc +++ b/docs/reference/setup/install/docker.asciidoc @@ -1,32 +1,54 @@ [[docker]] === Install Elasticsearch with Docker -Elasticsearch is also available as a Docker image. -The image is built with {xpack-ref}/index.html[X-Pack] and uses https://hub.docker.com/_/centos/[centos:7] as the base image. -The source code can be found on https://github.com/elastic/elasticsearch-docker/tree/{branch}[GitHub]. +Elasticsearch is also available as Docker images. +The images use https://hub.docker.com/_/centos/[centos:7] as the base image and +are available with {xpack-ref}/xpack-introduction.html[X-Pack]. -==== Security note +A list of all published Docker images and tags can be found in https://www.docker.elastic.co[www.docker.elastic.co]. The source code can be found +on https://github.com/elastic/elasticsearch-docker/tree/{branch}[GitHub]. -NOTE: {xpack-ref}/index.html[X-Pack] is preinstalled in this image. -Please take a few minutes to familiarize yourself with {xpack-ref}/security-getting-started.html[X-Pack Security] and how to change default passwords. The default password for the `elastic` user is `changeme`. +==== Image types -NOTE: X-Pack includes a trial license for 30 days. After that, you can obtain one of the https://www.elastic.co/subscriptions[available subscriptions] or {ref}/security-settings.html[disable Security]. The Basic license is free and includes the https://www.elastic.co/products/x-pack/monitoring[Monitoring] extension. +The images are available in three different configurations or "flavors". The +`basic` flavor, which is the default, ships with X-Pack Basic features +pre-installed and automatically activated with a free licence. The `platinum` +flavor features all X-Pack functionally under a 30-day trial licence. The `oss` +flavor does not include X-Pack, and contains only open-source Elasticsearch. + +NOTE: {xpack-ref}/xpack-security.html[X-Pack Security] is enabled in the `platinum` +image. To access your cluster, it's necessary to set an initial password for the +`elastic` user. The initial password can be set at start up time via the +`ELASTIC_PASSWORD` environment variable: + +["source","txt",subs="attributes"] +-------------------------------------------- +docker run -e ELASTIC_PASSWORD=MagicWord {docker-repo}-platinum:{version} +-------------------------------------------- + +NOTE: The `platinum` image includes a trial license for 30 days. After that, you +can obtain one of the https://www.elastic.co/subscriptions[available +subscriptions] or revert to a Basic licence. The Basic license is free and +includes a selection of X-Pack features. Obtaining Elasticsearch for Docker is as simple as issuing a +docker pull+ command against the Elastic Docker registry. ifeval::["{release-state}"=="unreleased"] -WARNING: Version {version} of Elasticsearch has not yet been released, so no Docker image is currently available for this version. +WARNING: Version {version} of Elasticsearch has not yet been released, so no +Docker image is currently available for this version. endif::[] ifeval::["{release-state}"!="unreleased"] -The Docker image can be retrieved with the following command: +Docker images can be retrieved with the following commands: ["source","sh",subs="attributes"] -------------------------------------------- -docker pull {docker-image} +docker pull {docker-repo}:{version} +docker pull {docker-repo}-platinum:{version} +docker pull {docker-repo}-oss:{version} -------------------------------------------- endif::[] @@ -76,7 +98,7 @@ vm.max_map_count=262144 + To apply the setting on a live system type: `sysctl -w vm.max_map_count=262144` + -* OSX with https://docs.docker.com/engine/installation/mac/#/docker-for-mac[Docker for Mac] +* macOS with https://docs.docker.com/engine/installation/mac/#/docker-for-mac[Docker for Mac] + The `vm.max_map_count` setting must be set within the xhyve virtual machine: + @@ -93,11 +115,11 @@ Then configure the `sysctl` setting as you would for Linux: sysctl -w vm.max_map_count=262144 -------------------------------------------- + -* OSX with https://docs.docker.com/engine/installation/mac/#docker-toolbox[Docker Toolbox] +* Windows and macOS with https://www.docker.com/products/docker-toolbox[Docker Toolbox] + The `vm.max_map_count` setting must be set via docker-machine: + -["source","sh"] +["source","txt"] -------------------------------------------- docker-machine ssh sudo sysctl -w vm.max_map_count=262144 @@ -109,7 +131,8 @@ To bring up the cluster, use the < bin/elasticsearch -Ecluster.name=mynewclusternam ==== Notes for production use and defaults We have collected a number of best practices for production use. +Any Docker parameters mentioned below assume the use of `docker run`. -NOTE: Any Docker parameters mentioned below assume the use of `docker run`. - -. Elasticsearch runs inside the container as user `elasticsearch` using uid:gid `1000:1000`. If you are bind-mounting a local directory or file, ensure it is readable by this user, while the <> additionally require write access. +. By default, Elasticsearch runs inside the container as user `elasticsearch` using uid:gid `1000:1000`. ++ +CAUTION: One exception is https://docs.openshift.com/container-platform/3.6/creating_images/guidelines.html#openshift-specific-guidelines[Openshift] which runs containers using an arbitrarily assigned user ID. Openshift will present persistent volumes with the gid set to `0` which will work without any adjustments. ++ +If you are bind-mounting a local directory or file, ensure it is readable by this user, while the <> additionally require write access. A good strategy is to grant group access to gid `1000` or `0` for the local directory. As an example, to prepare a local directory for storing data through a bind-mount: ++ + mkdir esdatadir + chmod g+rwx esdatadir + chgrp 1000 esdatadir ++ +As a last resort, you can also force the container to mutate the ownership of any bind-mounts used for the <> through the environment variable `TAKE_FILE_OWNERSHIP`; in this case they will be owned by uid:gid `1000:0` providing read/write access to the elasticsearch process as required. + . It is important to ensure increased ulimits for <> and <> are available for the Elasticsearch containers. Verify the https://github.com/moby/moby/tree/ea4d1243953e6b652082305a9c3cda8656edab26/contrib/init[init system] for the Docker daemon is already setting those to acceptable values and, if needed, adjust them in the Daemon, or override them per container, for example using `docker run`: + @@ -273,13 +300,22 @@ NOTE: One way of checking the Docker daemon defaults for the aforementioned ulim + docker run --rm centos:7 /bin/bash -c 'ulimit -Hn && ulimit -Sn && ulimit -Hu && ulimit -Su' + -. Swapping needs to be disabled for performance and node stability. This can be achieved through any of the methods mentioned in the <>. If you opt for the `bootstrap.memory_lock: true` approach, apart from defining it through any of the <>, you will additionally need the `memlock: true` ulimit, either defined in the https://docs.docker.com/engine/reference/commandline/dockerd/#default-ulimits[Docker Daemon] or specifically set for the container. This has been demonstrated earlier in the <>, or using `docker run`: +. Swapping needs to be disabled for performance and node stability. This can be +achieved through any of the methods mentioned in the +<>. If you opt for the +`bootstrap.memory_lock: true` approach, apart from defining it through any of +the <>, you will +additionally need the `memlock: true` ulimit, either defined in the +https://docs.docker.com/engine/reference/commandline/dockerd/#default-ulimits[Docker +Daemon] or specifically set for the container. This is demonstrated above in the +<>. If using `docker run`: + -e "bootstrap.memory_lock=true" --ulimit memlock=-1:-1 + . The image https://docs.docker.com/engine/reference/builder/#/expose[exposes] TCP ports 9200 and 9300. For clusters it is recommended to randomize the published ports with `--publish-all`, unless you are pinning one container per host. + -. Use the `ES_JAVA_OPTS` environment variable to set heap size, e.g. to use 16GB use `-e ES_JAVA_OPTS="-Xms16g -Xmx16g"` with `docker run`. It is also recommended to set a https://docs.docker.com/engine/reference/run/#user-memory-constraints[memory limit] for the container. +. Use the `ES_JAVA_OPTS` environment variable to set heap size, e.g. to use 16GB +use `-e ES_JAVA_OPTS="-Xms16g -Xmx16g"` with `docker run`. + . Pin your deployments to a specific version of the Elasticsearch Docker image, e.g. +docker.elastic.co/elasticsearch/elasticsearch:{version}+. + @@ -289,7 +325,10 @@ NOTE: One way of checking the Docker daemon defaults for the aforementioned ulim .. Elasticsearch is I/O sensitive and the Docker storage driver is not ideal for fast I/O .. It allows the use of advanced https://docs.docker.com/engine/extend/plugins/#volume-plugins[Docker volume plugins] + -. If you are using the devicemapper storage driver (default on at least RedHat (rpm) based distributions) make sure you are not using the default `loop-lvm` mode. Configure docker-engine to use https://docs.docker.com/engine/userguide/storagedriver/device-mapper-driver/#configure-docker-with-devicemapper[direct-lvm] instead. +. If you are using the devicemapper storage driver, make sure you are not using +the default `loop-lvm` mode. Configure docker-engine to use +https://docs.docker.com/engine/userguide/storagedriver/device-mapper-driver/#configure-docker-with-devicemapper[direct-lvm] +instead. + . Consider centralizing your logs by using a different https://docs.docker.com/engine/admin/logging/overview/[logging driver]. Also note that the default json-file logging driver is not ideally suited for production use. From 354862c26eff441ae1c7e8d1c82f9d2c4cff4312 Mon Sep 17 00:00:00 2001 From: olcbean Date: Wed, 1 Nov 2017 10:54:43 +0100 Subject: [PATCH 11/17] Set request body to required to reflect the code base (#27188) Update API, Cluster Update Settings API and Put Index Template API didn't have the request body set to required in their spec, hence this commit updates the spec to align them with reality. --- .../api/cluster.put_settings.json | 3 ++- .../rest-api-spec/api/search_template.json | 23 ++++++++++--------- .../resources/rest-api-spec/api/update.json | 3 ++- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.put_settings.json b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.put_settings.json index 393d1350dd3..5fcf0310283 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.put_settings.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/cluster.put_settings.json @@ -22,7 +22,8 @@ } }, "body": { - "description": "The settings to be updated. Can be either `transient` or `persistent` (survives cluster restart)." + "description": "The settings to be updated. Can be either `transient` or `persistent` (survives cluster restart).", + "required" : true } } } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/search_template.json b/rest-api-spec/src/main/resources/rest-api-spec/api/search_template.json index c58cf219911..a78295dd4f5 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/search_template.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/search_template.json @@ -7,8 +7,8 @@ "paths": ["/_search/template", "/{index}/_search/template", "/{index}/{type}/_search/template"], "parts": { "index": { - "type" : "list", - "description" : "A comma-separated list of index names to search; use `_all` or empty string to perform the operation on all indices" + "type" : "list", + "description" : "A comma-separated list of index names to search; use `_all` or empty string to perform the operation on all indices" }, "type": { "type" : "list", @@ -17,18 +17,18 @@ }, "params" : { "ignore_unavailable": { - "type" : "boolean", - "description" : "Whether specified concrete indices should be ignored when unavailable (missing or closed)" + "type" : "boolean", + "description" : "Whether specified concrete indices should be ignored when unavailable (missing or closed)" }, "allow_no_indices": { - "type" : "boolean", - "description" : "Whether to ignore if a wildcard indices expression resolves into no concrete indices. (This includes `_all` string or when no indices have been specified)" + "type" : "boolean", + "description" : "Whether to ignore if a wildcard indices expression resolves into no concrete indices. (This includes `_all` string or when no indices have been specified)" }, "expand_wildcards": { - "type" : "enum", - "options" : ["open","closed","none","all"], - "default" : "open", - "description" : "Whether to expand wildcard expression to concrete indices that are open, closed or both." + "type" : "enum", + "options" : ["open","closed","none","all"], + "default" : "open", + "description" : "Whether to expand wildcard expression to concrete indices that are open, closed or both." }, "preference": { "type" : "string", @@ -62,7 +62,8 @@ } }, "body": { - "description": "The search definition template and its params" + "description": "The search definition template and its params", + "required" : true } } } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/update.json b/rest-api-spec/src/main/resources/rest-api-spec/api/update.json index 7cacda722f5..97725917e1e 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/update.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/update.json @@ -88,7 +88,8 @@ } }, "body": { - "description": "The request definition using either `script` or partial `doc`" + "description": "The request definition requires either `script` or partial `doc`", + "required": true } } } From d805c41b289b9c5594e4de7f8a09a02fa607b4f7 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 27 Oct 2017 13:14:44 +0200 Subject: [PATCH 12/17] Added new terms_set query This query returns documents that match with at least one ore more of the provided terms. The number of terms that must match varies per document and is either controlled by a minimum should match field or computed per document in a minimum should match script. Closes #26915 --- .../index/query/TermsQueryBuilder.java | 2 +- .../index/query/TermsSetQueryBuilder.java | 369 ++++++++++++++++++ .../elasticsearch/search/SearchModule.java | 2 + .../query/TermsSetQueryBuilderTests.java | 248 ++++++++++++ .../search/SearchModuleTests.java | 1 + .../query-dsl/term-level-queries.asciidoc | 8 + .../query-dsl/terms-set-query.asciidoc | 122 ++++++ 7 files changed, 751 insertions(+), 1 deletion(-) create mode 100644 core/src/main/java/org/elasticsearch/index/query/TermsSetQueryBuilder.java create mode 100644 core/src/test/java/org/elasticsearch/index/query/TermsSetQueryBuilderTests.java create mode 100644 docs/reference/query-dsl/terms-set-query.asciidoc diff --git a/core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java index 1eefbb158a6..ffb7e9d607f 100644 --- a/core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/TermsQueryBuilder.java @@ -391,7 +391,7 @@ public class TermsQueryBuilder extends AbstractQueryBuilder { .queryName(queryName); } - private static List parseValues(XContentParser parser) throws IOException { + static List parseValues(XContentParser parser) throws IOException { List values = new ArrayList<>(); while (parser.nextToken() != XContentParser.Token.END_ARRAY) { Object value = parser.objectBytes(); diff --git a/core/src/main/java/org/elasticsearch/index/query/TermsSetQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/TermsSetQueryBuilder.java new file mode 100644 index 00000000000..0947a67212d --- /dev/null +++ b/core/src/main/java/org/elasticsearch/index/query/TermsSetQueryBuilder.java @@ -0,0 +1,369 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.query; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CoveringQuery; +import org.apache.lucene.search.DoubleValues; +import org.apache.lucene.search.LongValues; +import org.apache.lucene.search.LongValuesSource; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParsingException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.lucene.BytesRefs; +import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.fielddata.IndexNumericFieldData; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.script.Script; +import org.elasticsearch.script.SearchScript; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +public final class TermsSetQueryBuilder extends AbstractQueryBuilder { + + public static final String NAME = "terms_set"; + + static final ParseField TERMS_FIELD = new ParseField("terms"); + static final ParseField MINIMUM_SHOULD_MATCH_FIELD = new ParseField("minimum_should_match_field"); + static final ParseField MINIMUM_SHOULD_MATCH_SCRIPT = new ParseField("minimum_should_match_script"); + + private final String fieldName; + private final List values; + + private String minimumShouldMatchField; + private Script minimumShouldMatchScript; + + public TermsSetQueryBuilder(String fieldName, List values) { + this.fieldName = Objects.requireNonNull(fieldName); + this.values = TermsQueryBuilder.convert(Objects.requireNonNull(values)); + } + + public TermsSetQueryBuilder(StreamInput in) throws IOException { + super(in); + this.fieldName = in.readString(); + this.values = (List) in.readGenericValue(); + this.minimumShouldMatchField = in.readOptionalString(); + this.minimumShouldMatchScript = in.readOptionalWriteable(Script::new); + } + + @Override + protected void doWriteTo(StreamOutput out) throws IOException { + out.writeString(fieldName); + out.writeGenericValue(values); + out.writeOptionalString(minimumShouldMatchField); + out.writeOptionalWriteable(minimumShouldMatchScript); + } + + public List getValues() { + return values; + } + + public String getMinimumShouldMatchField() { + return minimumShouldMatchField; + } + + public TermsSetQueryBuilder setMinimumShouldMatchField(String minimumShouldMatchField) { + if (minimumShouldMatchScript != null) { + throw new IllegalArgumentException("A script has already been specified. Cannot specify both a field and script"); + } + this.minimumShouldMatchField = minimumShouldMatchField; + return this; + } + + public Script getMinimumShouldMatchScript() { + return minimumShouldMatchScript; + } + + public TermsSetQueryBuilder setMinimumShouldMatchScript(Script minimumShouldMatchScript) { + if (minimumShouldMatchField != null) { + throw new IllegalArgumentException("A field has already been specified. Cannot specify both a field and script"); + } + this.minimumShouldMatchScript = minimumShouldMatchScript; + return this; + } + + @Override + protected boolean doEquals(TermsSetQueryBuilder other) { + return Objects.equals(fieldName, this.fieldName) && Objects.equals(values, this.values) && + Objects.equals(minimumShouldMatchField, this.minimumShouldMatchField) && + Objects.equals(minimumShouldMatchScript, this.minimumShouldMatchScript); + } + + @Override + protected int doHashCode() { + return Objects.hash(fieldName, values, minimumShouldMatchField, minimumShouldMatchScript); + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + protected void doXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAME); + builder.startObject(fieldName); + builder.field(TERMS_FIELD.getPreferredName(), TermsQueryBuilder.convertBack(values)); + if (minimumShouldMatchField != null) { + builder.field(MINIMUM_SHOULD_MATCH_FIELD.getPreferredName(), minimumShouldMatchField); + } + if (minimumShouldMatchScript != null) { + builder.field(MINIMUM_SHOULD_MATCH_SCRIPT.getPreferredName(), minimumShouldMatchScript); + } + printBoostAndQueryName(builder); + builder.endObject(); + builder.endObject(); + } + + public static TermsSetQueryBuilder fromXContent(XContentParser parser) throws IOException { + XContentParser.Token token = parser.nextToken(); + if (token != XContentParser.Token.FIELD_NAME) { + throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + "]"); + } + String currentFieldName = parser.currentName(); + String fieldName = currentFieldName; + + token = parser.nextToken(); + if (token != XContentParser.Token.START_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + "]"); + } + + List values = new ArrayList<>(); + String minimumShouldMatchField = null; + Script minimumShouldMatchScript = null; + String queryName = null; + float boost = AbstractQueryBuilder.DEFAULT_BOOST; + + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token == XContentParser.Token.START_ARRAY) { + if (TERMS_FIELD.match(currentFieldName)) { + values = TermsQueryBuilder.parseValues(parser); + } else { + throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] query does not support [" + + currentFieldName + "]"); + } + } else if (token == XContentParser.Token.START_OBJECT) { + if (MINIMUM_SHOULD_MATCH_SCRIPT.match(currentFieldName)) { + minimumShouldMatchScript = Script.parse(parser); + } else { + throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] query does not support [" + + currentFieldName + "]"); + } + } else if (token.isValue()) { + if (MINIMUM_SHOULD_MATCH_FIELD.match(currentFieldName)) { + minimumShouldMatchField = parser.text(); + } else if (AbstractQueryBuilder.BOOST_FIELD.match(currentFieldName)) { + boost = parser.floatValue(); + } else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName)) { + queryName = parser.text(); + } else { + throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] query does not support [" + + currentFieldName + "]"); + } + } else { + throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + + "] after [" + currentFieldName + "]"); + } + } + + token = parser.nextToken(); + if (token != XContentParser.Token.END_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + "]"); + } + + TermsSetQueryBuilder queryBuilder = new TermsSetQueryBuilder(fieldName, values) + .queryName(queryName).boost(boost); + if (minimumShouldMatchField != null) { + queryBuilder.setMinimumShouldMatchField(minimumShouldMatchField); + } + if (minimumShouldMatchScript != null) { + queryBuilder.setMinimumShouldMatchScript(minimumShouldMatchScript); + } + return queryBuilder; + } + + @Override + protected Query doToQuery(QueryShardContext context) throws IOException { + if (values.isEmpty()) { + return Queries.newMatchNoDocsQuery("No terms supplied for \"" + getName() + "\" query."); + } + // Fail before we attempt to create the term queries: + if (values.size() > BooleanQuery.getMaxClauseCount()) { + throw new BooleanQuery.TooManyClauses(); + } + + final MappedFieldType fieldType = context.fieldMapper(fieldName); + final List queries = new ArrayList<>(values.size()); + for (Object value : values) { + if (fieldType != null) { + queries.add(fieldType.termQuery(value, context)); + } else { + queries.add(new TermQuery(new Term(fieldName, BytesRefs.toBytesRef(value)))); + } + } + final LongValuesSource longValuesSource; + if (minimumShouldMatchField != null) { + MappedFieldType msmFieldType = context.fieldMapper(minimumShouldMatchField); + if (msmFieldType == null) { + throw new QueryShardException(context, "failed to find minimum_should_match field [" + minimumShouldMatchField + "]"); + } + + IndexNumericFieldData fieldData = context.getForField(msmFieldType); + longValuesSource = new FieldValuesSource(fieldData); + } else if (minimumShouldMatchScript != null) { + SearchScript.Factory factory = context.getScriptService().compile(minimumShouldMatchScript, SearchScript.CONTEXT); + Map params = new HashMap<>(); + params.putAll(minimumShouldMatchScript.getParams()); + params.put("num_terms", queries.size()); + SearchScript.LeafFactory leafFactory = factory.newFactory(params, context.lookup()); + longValuesSource = new ScriptLongValueSource(minimumShouldMatchScript, leafFactory); + } else { + throw new IllegalStateException("No minimum should match has been specified"); + } + return new CoveringQuery(queries, longValuesSource); + } + + static final class ScriptLongValueSource extends LongValuesSource { + + private final Script script; + private final SearchScript.LeafFactory leafFactory; + + ScriptLongValueSource(Script script, SearchScript.LeafFactory leafFactory) { + this.script = script; + this.leafFactory = leafFactory; + } + + @Override + public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { + SearchScript searchScript = leafFactory.newInstance(ctx); + return new LongValues() { + @Override + public long longValue() throws IOException { + return searchScript.runAsLong(); + } + + @Override + public boolean advanceExact(int doc) throws IOException { + searchScript.setDocument(doc); + return searchScript.run() != null; + } + }; + } + + @Override + public boolean needsScores() { + return false; + } + + @Override + public int hashCode() { + // CoveringQuery with this field value source cannot be cachable + return System.identityHashCode(this); + } + + @Override + public boolean equals(Object obj) { + return this == obj; + } + + @Override + public String toString() { + return "script(" + script.toString() + ")"; + } + + } + + // Forked from LongValuesSource.FieldValuesSource and changed getValues() method to always use sorted numeric + // doc values, because that is what is being used in NumberFieldMapper. + static class FieldValuesSource extends LongValuesSource { + + private final IndexNumericFieldData field; + + FieldValuesSource(IndexNumericFieldData field) { + this.field = field; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + FieldValuesSource that = (FieldValuesSource) o; + return Objects.equals(field, that.field); + } + + @Override + public String toString() { + return "long(" + field + ")"; + } + + @Override + public int hashCode() { + return Objects.hash(field); + } + + @Override + public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { + SortedNumericDocValues values = field.load(ctx).getLongValues(); + return new LongValues() { + + long current = -1; + + @Override + public long longValue() throws IOException { + return current; + } + + @Override + public boolean advanceExact(int doc) throws IOException { + boolean hasValue = values.advanceExact(doc); + if (hasValue) { + assert values.docValueCount() == 1; + current = values.nextValue(); + return true; + } else { + return false; + } + } + }; + } + + @Override + public boolean needsScores() { + return false; + } + } + +} diff --git a/core/src/main/java/org/elasticsearch/search/SearchModule.java b/core/src/main/java/org/elasticsearch/search/SearchModule.java index 7f47d11403a..53f8840f8bf 100644 --- a/core/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/core/src/main/java/org/elasticsearch/search/SearchModule.java @@ -69,6 +69,7 @@ import org.elasticsearch.index.query.SpanTermQueryBuilder; import org.elasticsearch.index.query.SpanWithinQueryBuilder; import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.index.query.TermsQueryBuilder; +import org.elasticsearch.index.query.TermsSetQueryBuilder; import org.elasticsearch.index.query.TypeQueryBuilder; import org.elasticsearch.index.query.WildcardQueryBuilder; import org.elasticsearch.index.query.WrapperQueryBuilder; @@ -748,6 +749,7 @@ public class SearchModule { registerQuery(new QuerySpec<>(GeoPolygonQueryBuilder.NAME, GeoPolygonQueryBuilder::new, GeoPolygonQueryBuilder::fromXContent)); registerQuery(new QuerySpec<>(ExistsQueryBuilder.NAME, ExistsQueryBuilder::new, ExistsQueryBuilder::fromXContent)); registerQuery(new QuerySpec<>(MatchNoneQueryBuilder.NAME, MatchNoneQueryBuilder::new, MatchNoneQueryBuilder::fromXContent)); + registerQuery(new QuerySpec<>(TermsSetQueryBuilder.NAME, TermsSetQueryBuilder::new, TermsSetQueryBuilder::fromXContent)); if (ShapesAvailability.JTS_AVAILABLE && ShapesAvailability.SPATIAL4J_AVAILABLE) { registerQuery(new QuerySpec<>(GeoShapeQueryBuilder.NAME, GeoShapeQueryBuilder::new, GeoShapeQueryBuilder::fromXContent)); diff --git a/core/src/test/java/org/elasticsearch/index/query/TermsSetQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/TermsSetQueryBuilderTests.java new file mode 100644 index 00000000000..f3226acc2ea --- /dev/null +++ b/core/src/test/java/org/elasticsearch/index/query/TermsSetQueryBuilderTests.java @@ -0,0 +1,248 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.index.query; + +import org.apache.lucene.analysis.core.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.NoMergePolicy; +import org.apache.lucene.search.CoveringQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest; +import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.index.fielddata.ScriptDocValues; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.script.MockScriptEngine; +import org.elasticsearch.script.MockScriptPlugin; +import org.elasticsearch.script.Script; +import org.elasticsearch.script.ScriptType; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.test.AbstractQueryTestCase; +import org.elasticsearch.test.rest.yaml.ObjectPath; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.function.Function; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; + +public class TermsSetQueryBuilderTests extends AbstractQueryTestCase { + + @Override + protected Collection> getPlugins() { + return Collections.singleton(CustomScriptPlugin.class); + } + + @Override + protected void initializeAdditionalMappings(MapperService mapperService) throws IOException { + String docType = "doc"; + mapperService.merge(docType, new CompressedXContent(PutMappingRequest.buildFromSimplifiedDef(docType, + "m_s_m", "type=long" + ).string()), MapperService.MergeReason.MAPPING_UPDATE, false); + } + + @Override + protected TermsSetQueryBuilder doCreateTestQueryBuilder() { + String fieldName; + do { + fieldName = randomFrom(MAPPED_FIELD_NAMES); + } while (fieldName.equals(GEO_POINT_FIELD_NAME) || fieldName.equals(GEO_SHAPE_FIELD_NAME)); + int numValues = randomIntBetween(0, 10); + List randomTerms = new ArrayList<>(numValues); + for (int i = 0; i < numValues; i++) { + randomTerms.add(getRandomValueForFieldName(fieldName)); + } + TermsSetQueryBuilder queryBuilder = new TermsSetQueryBuilder(STRING_FIELD_NAME, randomTerms); + if (randomBoolean()) { + queryBuilder.setMinimumShouldMatchField("m_s_m"); + } else { + queryBuilder.setMinimumShouldMatchScript( + new Script(ScriptType.INLINE, MockScriptEngine.NAME, "_script", Collections.emptyMap())); + } + return queryBuilder; + } + + @Override + protected void doAssertLuceneQuery(TermsSetQueryBuilder queryBuilder, Query query, SearchContext context) throws IOException { + if (queryBuilder.getValues().isEmpty()) { + assertThat(query, instanceOf(MatchNoDocsQuery.class)); + MatchNoDocsQuery matchNoDocsQuery = (MatchNoDocsQuery) query; + assertThat(matchNoDocsQuery.toString(), containsString("No terms supplied for \"terms_set\" query.")); + } else { + assertThat(query, instanceOf(CoveringQuery.class)); + } + } + + @Override + protected boolean isCachable(TermsSetQueryBuilder queryBuilder) { + return queryBuilder.getMinimumShouldMatchField() != null || + (queryBuilder.getMinimumShouldMatchScript() != null && queryBuilder.getValues().isEmpty()); + } + + @Override + protected boolean builderGeneratesCacheableQueries() { + return false; + } + + public void testBothFieldAndScriptSpecified() { + TermsSetQueryBuilder queryBuilder = new TermsSetQueryBuilder("_field", Collections.emptyList()); + queryBuilder.setMinimumShouldMatchScript(new Script("")); + expectThrows(IllegalArgumentException.class, () -> queryBuilder.setMinimumShouldMatchField("_field")); + + queryBuilder.setMinimumShouldMatchScript(null); + queryBuilder.setMinimumShouldMatchField("_field"); + expectThrows(IllegalArgumentException.class, () -> queryBuilder.setMinimumShouldMatchScript(new Script(""))); + } + + public void testDoToQuery() throws Exception { + try (Directory directory = newDirectory()) { + IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer()); + config.setMergePolicy(NoMergePolicy.INSTANCE); + try (IndexWriter iw = new IndexWriter(directory, config)) { + Document document = new Document(); + document.add(new TextField("message", "a b", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 1)); + iw.addDocument(document); + + document = new Document(); + document.add(new TextField("message", "a b c", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 1)); + iw.addDocument(document); + + document = new Document(); + document.add(new TextField("message", "a b c", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 2)); + iw.addDocument(document); + + document = new Document(); + document.add(new TextField("message", "a b c d", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 1)); + iw.addDocument(document); + + document = new Document(); + document.add(new TextField("message", "a b c d", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 2)); + iw.addDocument(document); + + document = new Document(); + document.add(new TextField("message", "a b c d", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 3)); + iw.addDocument(document); + } + + try (IndexReader ir = DirectoryReader.open(directory)) { + QueryShardContext context = createShardContext(); + Query query = new TermsSetQueryBuilder("message", Arrays.asList("c", "d")) + .setMinimumShouldMatchField("m_s_m").doToQuery(context); + IndexSearcher searcher = new IndexSearcher(ir); + TopDocs topDocs = searcher.search(query, 10, new Sort(SortField.FIELD_DOC)); + assertThat(topDocs.totalHits, equalTo(3L)); + assertThat(topDocs.scoreDocs[0].doc, equalTo(1)); + assertThat(topDocs.scoreDocs[1].doc, equalTo(3)); + assertThat(topDocs.scoreDocs[2].doc, equalTo(4)); + } + } + } + + public void testDoToQuery_msmScriptField() throws Exception { + try (Directory directory = newDirectory()) { + IndexWriterConfig config = new IndexWriterConfig(new WhitespaceAnalyzer()); + config.setMergePolicy(NoMergePolicy.INSTANCE); + try (IndexWriter iw = new IndexWriter(directory, config)) { + Document document = new Document(); + document.add(new TextField("message", "a b x y", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 50)); + iw.addDocument(document); + + document = new Document(); + document.add(new TextField("message", "a b x y", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 75)); + iw.addDocument(document); + + document = new Document(); + document.add(new TextField("message", "a b c x", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 75)); + iw.addDocument(document); + + document = new Document(); + document.add(new TextField("message", "a b c x", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 100)); + iw.addDocument(document); + + document = new Document(); + document.add(new TextField("message", "a b c d", Field.Store.NO)); + document.add(new SortedNumericDocValuesField("m_s_m", 100)); + iw.addDocument(document); + } + + try (IndexReader ir = DirectoryReader.open(directory)) { + QueryShardContext context = createShardContext(); + Script script = new Script(ScriptType.INLINE, MockScriptEngine.NAME, "_script", Collections.emptyMap()); + Query query = new TermsSetQueryBuilder("message", Arrays.asList("a", "b", "c", "d")) + .setMinimumShouldMatchScript(script).doToQuery(context); + IndexSearcher searcher = new IndexSearcher(ir); + TopDocs topDocs = searcher.search(query, 10, new Sort(SortField.FIELD_DOC)); + assertThat(topDocs.totalHits, equalTo(3L)); + assertThat(topDocs.scoreDocs[0].doc, equalTo(0)); + assertThat(topDocs.scoreDocs[1].doc, equalTo(2)); + assertThat(topDocs.scoreDocs[2].doc, equalTo(4)); + } + } + } + + public static class CustomScriptPlugin extends MockScriptPlugin { + + @Override + protected Map, Object>> pluginScripts() { + return Collections.singletonMap("_script", args -> { + try { + int clauseCount = ObjectPath.evaluate(args, "params.num_terms"); + long msm = ((ScriptDocValues.Longs) ObjectPath.evaluate(args, "doc.m_s_m")).getValue(); + return clauseCount * (msm / 100d); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }); + } + } + +} + diff --git a/core/src/test/java/org/elasticsearch/search/SearchModuleTests.java b/core/src/test/java/org/elasticsearch/search/SearchModuleTests.java index 4988d75ca41..fccec4ed468 100644 --- a/core/src/test/java/org/elasticsearch/search/SearchModuleTests.java +++ b/core/src/test/java/org/elasticsearch/search/SearchModuleTests.java @@ -323,6 +323,7 @@ public class SearchModuleTests extends ModuleTestCase { "span_within", "term", "terms", + "terms_set", "type", "wildcard", "wrapper" diff --git a/docs/reference/query-dsl/term-level-queries.asciidoc b/docs/reference/query-dsl/term-level-queries.asciidoc index a6aae489668..883fd4c36b5 100644 --- a/docs/reference/query-dsl/term-level-queries.asciidoc +++ b/docs/reference/query-dsl/term-level-queries.asciidoc @@ -21,6 +21,12 @@ The queries in this group are: Find documents which contain any of the exact terms specified in the field specified. +<>:: + + Find documents which match with one or more of the specified terms. The + number of terms that must match depend on the specified minimum should + match field or script. + <>:: Find documents where the field specified contains values (dates, numbers, @@ -66,6 +72,8 @@ include::term-query.asciidoc[] include::terms-query.asciidoc[] +include::terms-set-query.asciidoc[] + include::range-query.asciidoc[] include::exists-query.asciidoc[] diff --git a/docs/reference/query-dsl/terms-set-query.asciidoc b/docs/reference/query-dsl/terms-set-query.asciidoc new file mode 100644 index 00000000000..659f840cccb --- /dev/null +++ b/docs/reference/query-dsl/terms-set-query.asciidoc @@ -0,0 +1,122 @@ +[[query-dsl-terms-set-query]] +=== Terms Set Query + +experimental[The terms_set query is a new query and its syntax may change in the future] + +Returns any documents that match with at least one or more of the +provided terms. The terms are not analyzed and thus must match exactly. +The number of terms that must match varies per document and is either +controlled by a minimum should match field or computed per document in +a minimum should match script. + +The field that controls the number of required terms that must match must +be a number field: + +[source,js] +-------------------------------------------------- +PUT /my-index +{ + "mappings": { + "doc": { + "properties": { + "required_matches": { + "type": "long" + } + } + } + } +} + +PUT /my-index/doc/1?refresh +{ + "codes": ["ghi", "jkl"], + "required_matches": 2 +} + +PUT /my-index/doc/2?refresh +{ + "codes": ["def", "ghi"], + "required_matches": 2 +} +-------------------------------------------------- +// CONSOLE +// TESTSETUP + +An example that uses the minimum should match field: + +[source,js] +-------------------------------------------------- +GET /my-index/_search +{ + "query": { + "terms_set": { + "codes" : { + "terms" : ["abc", "def", "ghi"], + "minimum_should_match_field": "required_matches" + } + } + } +} +-------------------------------------------------- +// CONSOLE + +Response: + +[source,js] +-------------------------------------------------- +{ + "took": 13, + "timed_out": false, + "_shards": { + "total": 5, + "successful": 5, + "skipped" : 0, + "failed": 0 + }, + "hits": { + "total": 1, + "max_score": 0.5753642, + "hits": [ + { + "_index": "my-index", + "_type": "doc", + "_id": "2", + "_score": 0.5753642, + "_source": { + "codes": ["def", "ghi"], + "required_matches": 2 + } + } + ] + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"took": 13,/"took": "$body.took",/] + +Scripts can also be used to control how many terms are required to match +in a more dynamic way. For example a create date or a popularity field +can be used as basis for the number of required terms to match. + +Also the `params.num_terms` parameter is available in the script to indicate the +number of terms that have been specified. + +An example that always limits the number of required terms to match to never +become larger than the number of terms specified: + +[source,js] +-------------------------------------------------- +GET /my-index/_search +{ + "query": { + "terms_set": { + "codes" : { + "terms" : ["abc", "def", "ghi"], + "minimum_should_match_script": { + "source": "Math.min(params.num_terms, doc['required_matches'].value)" + } + } + } + } +} +-------------------------------------------------- +// CONSOLE From 99aca9cdfc1e207eaeb26004ee532349c7644cf4 Mon Sep 17 00:00:00 2001 From: Colin Goodheart-Smithe Date: Wed, 1 Nov 2017 10:46:59 +0000 Subject: [PATCH 13/17] Enhances exists queries to reduce need for `_field_names` (#26930) * Enhances exists queries to reduce need for `_field_names` Before this change we wrote the name all the fields in a document to a `_field_names` field and then implemented exists queries as a term query on this field. The problem with this approach is that it bloats the index and also affects indexing performance. This change adds a new method `existsQuery()` to `MappedFieldType` which is implemented by each sub-class. For most field types if doc values are available a `DocValuesFieldExistsQuery` is used, falling back to using `_field_names` if doc values are disabled. Note that only fields where no doc values are available are written to `_field_names`. Closes #26770 * Addresses review comments * Addresses more review comments * implements existsQuery explicitly on every mapper * Reinstates ability to perform term query on `_field_names` * Added bwc depending on index created version * Review Comments * Skips tests that are not supported in 6.1.0 These values will need to be changed after backporting this PR to 6.x --- .../index/mapper/BinaryFieldMapper.java | 18 + .../index/mapper/BooleanFieldMapper.java | 14 + .../index/mapper/CompletionFieldMapper.java | 14 + .../index/mapper/DateFieldMapper.java | 14 + .../index/mapper/FieldMapper.java | 12 + .../index/mapper/FieldNamesFieldMapper.java | 20 +- .../index/mapper/GeoPointFieldMapper.java | 19 + .../index/mapper/GeoShapeFieldMapper.java | 25 +- .../index/mapper/IdFieldMapper.java | 8 +- .../index/mapper/IndexFieldMapper.java | 7 +- .../index/mapper/IpFieldMapper.java | 14 + .../index/mapper/KeywordFieldMapper.java | 15 + .../index/mapper/MappedFieldType.java | 2 + .../index/mapper/NumberFieldMapper.java | 15 + .../index/mapper/ParentFieldMapper.java | 6 + .../index/mapper/RoutingFieldMapper.java | 10 + .../index/mapper/SeqNoFieldMapper.java | 6 + .../index/mapper/SourceFieldMapper.java | 6 +- .../index/mapper/TextFieldMapper.java | 14 + .../index/mapper/TypeFieldMapper.java | 5 + .../index/mapper/UidFieldMapper.java | 6 + .../index/mapper/VersionFieldMapper.java | 6 + .../index/query/ExistsQueryBuilder.java | 57 +- .../mapper/DocumentFieldMapperTests.java | 15 +- .../index/mapper/ExternalMapper.java | 16 +- .../index/mapper/FakeStringFieldMapper.java | 19 +- .../mapper/FieldNamesFieldMapperTests.java | 144 +- .../mapper/FieldNamesFieldTypeTests.java | 39 +- .../index/mapper/FieldTypeLookupTests.java | 14 + .../index/query/ExistsQueryBuilderTests.java | 58 +- .../query/QueryStringQueryBuilderTests.java | 4 +- .../index/query/RangeQueryBuilderTests.java | 20 +- .../search/collapse/CollapseBuilderTests.java | 4 + .../search/slice/SliceBuilderTests.java | 12 + .../index/mapper/RangeFieldMapper.java | 15 + .../index/mapper/ScaledFloatFieldMapper.java | 15 + .../join/mapper/MetaJoinFieldMapper.java | 7 + .../join/mapper/ParentIdFieldMapper.java | 7 + .../join/mapper/ParentJoinFieldMapper.java | 8 + .../percolator/PercolatorFieldMapper.java | 16 + .../ICUCollationKeywordFieldMapper.java | 15 + .../mapper/murmur3/Murmur3FieldMapper.java | 14 +- .../test/search/160_exists_query.yml | 1289 +++++++++++++++++ .../index/mapper/FieldTypeTestCase.java | 6 + .../index/mapper/MockFieldMapper.java | 20 +- 45 files changed, 1880 insertions(+), 190 deletions(-) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query.yml diff --git a/core/src/main/java/org/elasticsearch/index/mapper/BinaryFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/BinaryFieldMapper.java index 024e0439ac5..1838b60050e 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/BinaryFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/BinaryFieldMapper.java @@ -20,10 +20,14 @@ package org.elasticsearch.index.mapper; import com.carrotsearch.hppc.ObjectArrayList; + import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchException; @@ -126,6 +130,15 @@ public class BinaryFieldMapper extends FieldMapper { return new BytesBinaryDVIndexFieldData.Builder(); } + @Override + public Query existsQuery(QueryShardContext context) { + if (hasDocValues()) { + return new DocValuesFieldExistsQuery(name()); + } else { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } + } + @Override public Query termQuery(Object value, QueryShardContext context) { throw new QueryShardException(context, "Binary fields do not support searching"); @@ -165,6 +178,11 @@ public class BinaryFieldMapper extends FieldMapper { } else { field.add(value); } + } else { + // Only add an entry to the field names field if the field is stored + // but has no doc values so exists query will work on a field with + // no doc values + createFieldNamesField(context, fields); } } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java index 6fe8a37a46e..45cd9e17ad1 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java @@ -23,7 +23,10 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; @@ -136,6 +139,15 @@ public class BooleanFieldMapper extends FieldMapper { return CONTENT_TYPE; } + @Override + public Query existsQuery(QueryShardContext context) { + if (hasDocValues()) { + return new DocValuesFieldExistsQuery(name()); + } else { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } + } + @Override public Boolean nullValue() { return (Boolean)super.nullValue(); @@ -253,6 +265,8 @@ public class BooleanFieldMapper extends FieldMapper { } if (fieldType().hasDocValues()) { fields.add(new SortedNumericDocValuesField(fieldType().name(), value ? 1 : 0)); + } else { + createFieldNamesField(context, fields); } } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java index 1ab84eda639..1c92150676c 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java @@ -21,6 +21,8 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.suggest.document.Completion50PostingsFormat; import org.apache.lucene.search.suggest.document.CompletionAnalyzer; import org.apache.lucene.search.suggest.document.CompletionQuery; @@ -40,11 +42,13 @@ import org.elasticsearch.common.xcontent.XContentParser.NumberType; import org.elasticsearch.common.xcontent.XContentParser.Token; import org.elasticsearch.index.analysis.AnalyzerScope; import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.search.suggest.completion.CompletionSuggester; import org.elasticsearch.search.suggest.completion.context.ContextMapping; import org.elasticsearch.search.suggest.completion.context.ContextMappings; import java.io.IOException; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; @@ -257,6 +261,11 @@ public class CompletionFieldMapper extends FieldMapper implements ArrayValueMapp return postingsFormat; } + @Override + public Query existsQuery(QueryShardContext context) { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } + /** * Completion prefix query */ @@ -456,6 +465,11 @@ public class CompletionFieldMapper extends FieldMapper implements ArrayValueMapp context.doc().add(new SuggestField(fieldType().name(), input, metaData.weight)); } } + List fields = new ArrayList<>(1); + createFieldNamesField(context, fields); + for (IndexableField field : fields) { + context.doc().add(field); + } multiFields.parse(this, context); return null; } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java index 6de6e860a8f..36e7a73aa9a 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java @@ -26,9 +26,12 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.PointValues; +import org.apache.lucene.index.Term; import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; import org.elasticsearch.common.Explicit; @@ -245,6 +248,15 @@ public class DateFieldMapper extends FieldMapper { return dateTimeFormatter().parser().parseMillis(value); } + @Override + public Query existsQuery(QueryShardContext context) { + if (hasDocValues()) { + return new DocValuesFieldExistsQuery(name()); + } else { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } + } + @Override public Query termQuery(Object value, @Nullable QueryShardContext context) { Query query = rangeQuery(value, value, true, true, ShapeRelation.INTERSECTS, null, null, context); @@ -451,6 +463,8 @@ public class DateFieldMapper extends FieldMapper { } if (fieldType().hasDocValues()) { fields.add(new SortedNumericDocValuesField(fieldType().name(), timestamp)); + } else if (fieldType().stored() || fieldType().indexOptions() != IndexOptions.NONE) { + createFieldNamesField(context, fields); } if (fieldType().stored()) { fields.add(new StoredField(fieldType().name(), timestamp)); diff --git a/core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index 589670fcccd..c6e0dd9c00b 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -22,6 +22,7 @@ package org.elasticsearch.index.mapper; import com.carrotsearch.hppc.cursors.ObjectCursor; import com.carrotsearch.hppc.cursors.ObjectObjectCursor; +import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; @@ -33,6 +34,7 @@ import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.mapper.FieldNamesFieldMapper.FieldNamesFieldType; import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.index.similarity.SimilarityService; @@ -285,6 +287,16 @@ public abstract class FieldMapper extends Mapper implements Cloneable { */ protected abstract void parseCreateField(ParseContext context, List fields) throws IOException; + protected void createFieldNamesField(ParseContext context, List fields) { + FieldNamesFieldType fieldNamesFieldType = (FieldNamesFieldMapper.FieldNamesFieldType) context.docMapper() + .metadataMapper(FieldNamesFieldMapper.class).fieldType(); + if (fieldNamesFieldType != null && fieldNamesFieldType.isEnabled()) { + for (String fieldName : FieldNamesFieldMapper.extractFieldNames(fieldType().name())) { + fields.add(new Field(FieldNamesFieldMapper.NAME, fieldName, fieldNamesFieldType)); + } + } + } + @Override public Iterator iterator() { return multiFields.iterator(); diff --git a/core/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java index c2923be4c74..8482a94cfc7 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java @@ -23,6 +23,10 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.Query; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.common.logging.DeprecationLogger; +import org.elasticsearch.common.logging.ESLoggerFactory; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -44,6 +48,9 @@ import java.util.Objects; */ public class FieldNamesFieldMapper extends MetadataFieldMapper { + private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger( + ESLoggerFactory.getLogger(FieldNamesFieldMapper.class)); + public static final String NAME = "_field_names"; public static final String CONTENT_TYPE = "_field_names"; @@ -178,11 +185,18 @@ public class FieldNamesFieldMapper extends MetadataFieldMapper { return enabled; } + @Override + public Query existsQuery(QueryShardContext context) { + throw new UnsupportedOperationException("Cannot run exists query on _field_names"); + } + @Override public Query termQuery(Object value, QueryShardContext context) { if (isEnabled() == false) { throw new IllegalStateException("Cannot run [exists] queries if the [_field_names] field is disabled"); } + DEPRECATION_LOGGER.deprecated( + "terms query on the _field_names field is deprecated and will be removed, use exists query instead"); return super.termQuery(value, context); } } @@ -206,12 +220,14 @@ public class FieldNamesFieldMapper extends MetadataFieldMapper { @Override public void postParse(ParseContext context) throws IOException { - super.parse(context); + if (context.indexSettings().getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).before(Version.V_6_1_0)) { + super.parse(context); + } } @Override public Mapper parse(ParseContext context) throws IOException { - // we parse in post parse + // Adding values to the _field_names field is handled by the mappers for each field type return null; } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java index 700b44a204e..45237eb572d 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java @@ -23,7 +23,10 @@ import org.apache.lucene.document.LatLonPoint; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.geo.GeoPoint; @@ -37,6 +40,7 @@ import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.QueryShardException; import java.io.IOException; +import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -180,6 +184,15 @@ public class GeoPointFieldMapper extends FieldMapper implements ArrayValueMapper return new AbstractLatLonPointDVIndexFieldData.Builder(); } + @Override + public Query existsQuery(QueryShardContext context) { + if (hasDocValues()) { + return new DocValuesFieldExistsQuery(name()); + } else { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } + } + @Override public Query termQuery(Object value, QueryShardContext context) { throw new QueryShardException(context, "Geo fields do not support exact searching, use dedicated geo queries instead: [" @@ -207,6 +220,12 @@ public class GeoPointFieldMapper extends FieldMapper implements ArrayValueMapper } if (fieldType.hasDocValues()) { context.doc().add(new LatLonDocValuesField(fieldType().name(), point.lat(), point.lon())); + } else if (fieldType().stored() || fieldType().indexOptions() != IndexOptions.NONE) { + List fields = new ArrayList<>(1); + createFieldNamesField(context, fields); + for (IndexableField field : fields) { + context.doc().add(field); + } } // if the mapping contains multifields then use the geohash string if (multiFields.iterator().hasNext()) { diff --git a/core/src/main/java/org/elasticsearch/index/mapper/GeoShapeFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/GeoShapeFieldMapper.java index 72bb35668bd..c605b8d0936 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/GeoShapeFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/GeoShapeFieldMapper.java @@ -18,10 +18,12 @@ */ package org.elasticsearch.index.mapper; -import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.spatial.prefix.PrefixTreeStrategy; import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy; import org.apache.lucene.spatial.prefix.TermQueryPrefixTreeStrategy; @@ -29,6 +31,7 @@ import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree; import org.apache.lucene.spatial.prefix.tree.PackedQuadPrefixTree; import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree; import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; +import org.elasticsearch.Version; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.geo.GeoUtils; import org.elasticsearch.common.geo.SpatialStrategy; @@ -44,6 +47,8 @@ import org.locationtech.spatial4j.shape.Shape; import org.locationtech.spatial4j.shape.jts.JtsGeometry; import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -125,6 +130,11 @@ public class GeoShapeFieldMapper extends FieldMapper { return builder; } + @Override + protected boolean defaultDocValues(Version indexCreated) { + return false; + } + protected Explicit coerce(BuilderContext context) { if (coerce != null) { return new Explicit<>(coerce, true); @@ -406,6 +416,11 @@ public class GeoShapeFieldMapper extends FieldMapper { throw new IllegalArgumentException("Unknown prefix tree strategy [" + strategyName + "]"); } + @Override + public Query existsQuery(QueryShardContext context) { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } + @Override public Query termQuery(Object value, QueryShardContext context) { throw new QueryShardException(context, "Geo fields do not support exact searching, use dedicated geo queries instead"); @@ -440,11 +455,9 @@ public class GeoShapeFieldMapper extends FieldMapper { throw new MapperParsingException("[{" + fieldType().name() + "}] is configured for points only but a " + ((shape instanceof JtsGeometry) ? ((JtsGeometry)shape).getGeom().getGeometryType() : shape.getClass()) + " was found"); } - Field[] fields = fieldType().defaultStrategy().createIndexableFields(shape); - if (fields == null || fields.length == 0) { - return null; - } - for (Field field : fields) { + List fields = new ArrayList<>(Arrays.asList(fieldType().defaultStrategy().createIndexableFields(shape))); + createFieldNamesField(context, fields); + for (IndexableField field : fields) { context.doc().add(field); } } catch (Exception e) { diff --git a/core/src/main/java/org/elasticsearch/index/mapper/IdFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/IdFieldMapper.java index 55898e5f96c..41256d3a5bb 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/IdFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/IdFieldMapper.java @@ -23,6 +23,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermInSetQuery; @@ -36,10 +37,10 @@ import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.fielddata.AtomicFieldData; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested; -import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource; import org.elasticsearch.index.fielddata.IndexFieldDataCache; import org.elasticsearch.index.fielddata.ScriptDocValues; import org.elasticsearch.index.fielddata.SortedBinaryDocValues; +import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource; import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.indices.breaker.CircuitBreakerService; @@ -126,6 +127,11 @@ public class IdFieldMapper extends MetadataFieldMapper { return termsQuery(Arrays.asList(value), context); } + @Override + public Query existsQuery(QueryShardContext context) { + return new MatchAllDocsQuery(); + } + @Override public Query termsQuery(List values, QueryShardContext context) { if (indexOptions() != IndexOptions.NONE) { diff --git a/core/src/main/java/org/elasticsearch/index/mapper/IndexFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/IndexFieldMapper.java index 7f3f934419a..0010211a955 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/IndexFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/IndexFieldMapper.java @@ -21,9 +21,9 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.Version; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.search.Queries; @@ -111,6 +111,11 @@ public class IndexFieldMapper extends MetadataFieldMapper { return true; } + @Override + public Query existsQuery(QueryShardContext context) { + return new MatchAllDocsQuery(); + } + /** * This termQuery impl looks at the context to determine the index that * is being queried and then returns a MATCH_ALL_QUERY or MATCH_NO_QUERY diff --git a/core/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java index faa486dd972..bc811d041e3 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java @@ -25,8 +25,11 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Explicit; @@ -153,6 +156,15 @@ public class IpFieldMapper extends FieldMapper { } } + @Override + public Query existsQuery(QueryShardContext context) { + if (hasDocValues()) { + return new DocValuesFieldExistsQuery(name()); + } else { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } + } + @Override public Query termQuery(Object value, @Nullable QueryShardContext context) { failIfNotIndexed(); @@ -369,6 +381,8 @@ public class IpFieldMapper extends FieldMapper { } if (fieldType().hasDocValues()) { fields.add(new SortedSetDocValuesField(fieldType().name(), new BytesRef(InetAddressPoint.encode(address)))); + } else if (fieldType().stored() || fieldType().indexOptions() != IndexOptions.NONE) { + createFieldNamesField(context, fields); } if (fieldType().stored()) { fields.add(new StoredField(fieldType().name(), new BytesRef(InetAddressPoint.encode(address)))); diff --git a/core/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 32fe4c95c43..cb2c4b6b6fd 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -25,7 +25,10 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.Settings; @@ -35,6 +38,7 @@ import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.plain.DocValuesIndexFieldData; +import org.elasticsearch.index.query.QueryShardContext; import java.io.IOException; import java.util.Iterator; @@ -210,6 +214,15 @@ public final class KeywordFieldMapper extends FieldMapper { this.normalizer = normalizer; } + @Override + public Query existsQuery(QueryShardContext context) { + if (hasDocValues()) { + return new DocValuesFieldExistsQuery(name()); + } else { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } + } + @Override public Query nullValueQuery() { if (nullValue() == null) { @@ -328,6 +341,8 @@ public final class KeywordFieldMapper extends FieldMapper { } if (fieldType().hasDocValues()) { fields.add(new SortedSetDocValuesField(fieldType().name(), binaryValue)); + } else if (fieldType().stored() || fieldType().indexOptions() != IndexOptions.NONE) { + createFieldNamesField(context, fields); } } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/core/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 2796a5342c1..6eab9087534 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -379,6 +379,8 @@ public abstract class MappedFieldType extends FieldType { return new ConstantScoreQuery(termQuery(nullValue, null)); } + public abstract Query existsQuery(QueryShardContext context); + /** * An enum used to describe the relation between the range of terms in a * shard when compared with a query range diff --git a/core/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java index 80dc01a965f..a44611d6406 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java @@ -29,10 +29,13 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.Explicit; @@ -867,6 +870,15 @@ public class NumberFieldMapper extends FieldMapper { return type.name; } + @Override + public Query existsQuery(QueryShardContext context) { + if (hasDocValues()) { + return new DocValuesFieldExistsQuery(name()); + } else { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } + } + @Override public Query termQuery(Object value, QueryShardContext context) { failIfNotIndexed(); @@ -1001,6 +1013,9 @@ public class NumberFieldMapper extends FieldMapper { boolean docValued = fieldType().hasDocValues(); boolean stored = fieldType().stored(); fields.addAll(fieldType().type.createFields(fieldType().name(), numericValue, indexed, docValued, stored)); + if (docValued == false && (stored || indexed)) { + createFieldNamesField(context, fields); + } } @Override diff --git a/core/src/main/java/org/elasticsearch/index/mapper/ParentFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/ParentFieldMapper.java index df6612cb222..73109a3ecd8 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/ParentFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/ParentFieldMapper.java @@ -25,6 +25,7 @@ import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.DocValuesTermsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; @@ -178,6 +179,11 @@ public class ParentFieldMapper extends MetadataFieldMapper { return CONTENT_TYPE; } + @Override + public Query existsQuery(QueryShardContext context) { + return new DocValuesFieldExistsQuery(name()); + } + @Override public Query termQuery(Object value, @Nullable QueryShardContext context) { return termsQuery(Collections.singletonList(value), context); diff --git a/core/src/main/java/org/elasticsearch/index/mapper/RoutingFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/RoutingFieldMapper.java index 88679d910b1..a4b009f9f1f 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/RoutingFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/RoutingFieldMapper.java @@ -22,9 +22,13 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.query.QueryShardContext; import java.io.IOException; import java.util.Collections; @@ -121,6 +125,11 @@ public class RoutingFieldMapper extends MetadataFieldMapper { public String typeName() { return CONTENT_TYPE; } + + @Override + public Query existsQuery(QueryShardContext context) { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } } private boolean required; @@ -165,6 +174,7 @@ public class RoutingFieldMapper extends MetadataFieldMapper { if (routing != null) { if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) { fields.add(new Field(fieldType().name(), routing, fieldType())); + createFieldNamesField(context, fields); } } } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java index bcf901388f1..7d74f9e52aa 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/SeqNoFieldMapper.java @@ -24,6 +24,7 @@ import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; @@ -162,6 +163,11 @@ public class SeqNoFieldMapper extends MetadataFieldMapper { return Long.parseLong(value.toString()); } + @Override + public Query existsQuery(QueryShardContext context) { + return new DocValuesFieldExistsQuery(name()); + } + @Override public Query termQuery(Object value, @Nullable QueryShardContext context) { long v = parse(value); diff --git a/core/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java index fc177e45d5c..47d5e64438e 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/SourceFieldMapper.java @@ -24,7 +24,6 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.Version; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.Tuple; @@ -162,6 +161,11 @@ public class SourceFieldMapper extends MetadataFieldMapper { return CONTENT_TYPE; } + @Override + public Query existsQuery(QueryShardContext context) { + throw new QueryShardException(context, "The _source field is not searchable"); + } + @Override public Query termQuery(Object value, QueryShardContext context) { throw new QueryShardException(context, "The _source field is not searchable"); diff --git a/core/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 82223ae2de7..24c3443658a 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -22,13 +22,17 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.support.XContentMapValues; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData; +import org.elasticsearch.index.query.QueryShardContext; import java.io.IOException; import java.util.Iterator; @@ -274,6 +278,15 @@ public class TextFieldMapper extends FieldMapper { return CONTENT_TYPE; } + @Override + public Query existsQuery(QueryShardContext context) { + if (hasDocValues()) { + return new DocValuesFieldExistsQuery(name()); + } else { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } + } + @Override public Query nullValueQuery() { if (nullValue() == null) { @@ -332,6 +345,7 @@ public class TextFieldMapper extends FieldMapper { if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) { Field field = new Field(fieldType().name(), value, fieldType()); fields.add(field); + createFieldNamesField(context, fields); } } diff --git a/core/src/main/java/org/elasticsearch/index/mapper/TypeFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/TypeFieldMapper.java index 09ef33f0795..d0e30e77c9e 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/TypeFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/TypeFieldMapper.java @@ -132,6 +132,11 @@ public class TypeFieldMapper extends MetadataFieldMapper { return true; } + @Override + public Query existsQuery(QueryShardContext context) { + return new MatchAllDocsQuery(); + } + @Override public Query termQuery(Object value, QueryShardContext context) { return termsQuery(Arrays.asList(value), context); diff --git a/core/src/main/java/org/elasticsearch/index/mapper/UidFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/UidFieldMapper.java index f981fc94b2d..95dc40bca63 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/UidFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/UidFieldMapper.java @@ -22,6 +22,7 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermInSetQuery; @@ -133,6 +134,11 @@ public class UidFieldMapper extends MetadataFieldMapper { } } + @Override + public Query existsQuery(QueryShardContext context) { + return new MatchAllDocsQuery(); + } + @Override public Query termQuery(Object value, @Nullable QueryShardContext context) { return termsQuery(Arrays.asList(value), context); diff --git a/core/src/main/java/org/elasticsearch/index/mapper/VersionFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/VersionFieldMapper.java index 1d2e997acba..90ea85024c1 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/VersionFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/VersionFieldMapper.java @@ -24,6 +24,7 @@ import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.Query; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -87,6 +88,11 @@ public class VersionFieldMapper extends MetadataFieldMapper { return CONTENT_TYPE; } + @Override + public Query existsQuery(QueryShardContext context) { + return new DocValuesFieldExistsQuery(name()); + } + @Override public Query termQuery(Object value, QueryShardContext context) { throw new QueryShardException(context, "The _version field is not searchable"); diff --git a/core/src/main/java/org/elasticsearch/index/query/ExistsQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/ExistsQueryBuilder.java index 799998e2c9f..97378e01236 100644 --- a/core/src/main/java/org/elasticsearch/index/query/ExistsQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/ExistsQueryBuilder.java @@ -19,10 +19,14 @@ package org.elasticsearch.index.query; +import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.elasticsearch.Version; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.Strings; @@ -32,6 +36,7 @@ import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.mapper.FieldNamesFieldMapper; +import org.elasticsearch.index.mapper.MappedFieldType; import java.io.IOException; import java.util.Collection; @@ -126,8 +131,9 @@ public class ExistsQueryBuilder extends AbstractQueryBuilder } public static Query newFilter(QueryShardContext context, String fieldPattern) { - final FieldNamesFieldMapper.FieldNamesFieldType fieldNamesFieldType = - (FieldNamesFieldMapper.FieldNamesFieldType) context.getMapperService().fullName(FieldNamesFieldMapper.NAME); + + final FieldNamesFieldMapper.FieldNamesFieldType fieldNamesFieldType = (FieldNamesFieldMapper.FieldNamesFieldType) context + .getMapperService().fullName(FieldNamesFieldMapper.NAME); if (fieldNamesFieldType == null) { // can only happen when no types exist, so no docs exist either return Queries.newMatchNoDocsQuery("Missing types in \"" + NAME + "\" query."); @@ -142,19 +148,62 @@ public class ExistsQueryBuilder extends AbstractQueryBuilder fields = context.simpleMatchToIndexNames(fieldPattern); } + if (context.indexVersionCreated().before(Version.V_6_1_0)) { + return newLegacyExistsQuery(fields); + } + if (fields.size() == 1) { - Query filter = fieldNamesFieldType.termQuery(fields.iterator().next(), context); + String field = fields.iterator().next(); + return newFieldExistsQuery(context, field); + } + + BooleanQuery.Builder boolFilterBuilder = new BooleanQuery.Builder(); + for (String field : fields) { + boolFilterBuilder.add(newFieldExistsQuery(context, field), BooleanClause.Occur.SHOULD); + } + return new ConstantScoreQuery(boolFilterBuilder.build()); + } + + private static Query newLegacyExistsQuery(Collection fields) { + // We create TermsQuery directly here rather than using FieldNamesFieldType.termsQuery() + // so we don't end up with deprecation warnings + if (fields.size() == 1) { + Query filter = new TermQuery(new Term(FieldNamesFieldMapper.NAME, fields.iterator().next())); return new ConstantScoreQuery(filter); } BooleanQuery.Builder boolFilterBuilder = new BooleanQuery.Builder(); for (String field : fields) { - Query filter = fieldNamesFieldType.termQuery(field, context); + Query filter = new TermQuery(new Term(FieldNamesFieldMapper.NAME, field)); boolFilterBuilder.add(filter, BooleanClause.Occur.SHOULD); } return new ConstantScoreQuery(boolFilterBuilder.build()); } + private static Query newFieldExistsQuery(QueryShardContext context, String field) { + MappedFieldType fieldType = context.getMapperService().fullName(field); + if (fieldType == null) { + // The field does not exist as a leaf but could be an object so + // check for an object mapper + if (context.getObjectMapper(field) != null) { + return newObjectFieldExistsQuery(context, field); + } + return Queries.newMatchNoDocsQuery("No field \"" + field + "\" exists in mappings."); + } + Query filter = fieldType.existsQuery(context); + return new ConstantScoreQuery(filter); + } + + private static Query newObjectFieldExistsQuery(QueryShardContext context, String objField) { + BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); + Collection fields = context.simpleMatchToIndexNames(objField + ".*"); + for (String field : fields) { + Query existsQuery = context.getMapperService().fullName(field).existsQuery(context); + booleanQuery.add(existsQuery, Occur.SHOULD); + } + return new ConstantScoreQuery(booleanQuery.build()); + } + @Override protected int doHashCode() { return Objects.hash(fieldName); diff --git a/core/src/test/java/org/elasticsearch/index/mapper/DocumentFieldMapperTests.java b/core/src/test/java/org/elasticsearch/index/mapper/DocumentFieldMapperTests.java index 398708d75f9..4e79a68c50e 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/DocumentFieldMapperTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/DocumentFieldMapperTests.java @@ -23,14 +23,18 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.LuceneTestCase; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.analysis.AnalyzerScope; import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.query.QueryShardContext; import java.io.IOException; import java.io.StringReader; @@ -88,6 +92,15 @@ public class DocumentFieldMapperTests extends LuceneTestCase { return "fake"; } + @Override + public Query existsQuery(QueryShardContext context) { + if (hasDocValues()) { + return new DocValuesFieldExistsQuery(name()); + } else { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } + } + } static class FakeFieldMapper extends FieldMapper { diff --git a/core/src/test/java/org/elasticsearch/index/mapper/ExternalMapper.java b/core/src/test/java/org/elasticsearch/index/mapper/ExternalMapper.java index 4a2c36d829f..33e3bc20183 100755 --- a/core/src/test/java/org/elasticsearch/index/mapper/ExternalMapper.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/ExternalMapper.java @@ -20,12 +20,17 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.index.IndexableField; -import org.locationtech.spatial4j.shape.Point; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.elasticsearch.common.collect.Iterators; import org.elasticsearch.common.geo.GeoPoint; import org.elasticsearch.common.geo.builders.ShapeBuilders; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.query.QueryShardContext; +import org.locationtech.spatial4j.shape.Point; import java.io.IOException; import java.nio.charset.Charset; @@ -128,6 +133,15 @@ public class ExternalMapper extends FieldMapper { public String typeName() { return "faketype"; } + + @Override + public Query existsQuery(QueryShardContext context) { + if (hasDocValues()) { + return new DocValuesFieldExistsQuery(name()); + } else { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } + } } private final String generatedValue; diff --git a/core/src/test/java/org/elasticsearch/index/mapper/FakeStringFieldMapper.java b/core/src/test/java/org/elasticsearch/index/mapper/FakeStringFieldMapper.java index 642282c9d5c..464b0d9f840 100755 --- a/core/src/test/java/org/elasticsearch/index/mapper/FakeStringFieldMapper.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/FakeStringFieldMapper.java @@ -23,16 +23,14 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.index.mapper.FieldMapper; -import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.index.mapper.Mapper; -import org.elasticsearch.index.mapper.MapperParsingException; -import org.elasticsearch.index.mapper.ParseContext; -import org.elasticsearch.index.mapper.StringFieldType; +import org.elasticsearch.index.query.QueryShardContext; import java.io.IOException; import java.util.List; @@ -114,6 +112,15 @@ public class FakeStringFieldMapper extends FieldMapper { } return termQuery(nullValue(), null); } + + @Override + public Query existsQuery(QueryShardContext context) { + if (hasDocValues()) { + return new DocValuesFieldExistsQuery(name()); + } else { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } + } } protected FakeStringFieldMapper(String simpleName, FakeStringFieldType fieldType, MappedFieldType defaultFieldType, diff --git a/core/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldMapperTests.java b/core/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldMapperTests.java index 9d25e2b70b8..70022dc6326 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldMapperTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldMapperTests.java @@ -19,29 +19,16 @@ package org.elasticsearch.index.mapper; -import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.IndexableField; -import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.compress.CompressedXContent; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; -import org.elasticsearch.index.IndexService; -import org.elasticsearch.index.query.QueryShardContext; -import org.elasticsearch.indices.IndicesModule; -import org.elasticsearch.indices.mapper.MapperRegistry; import org.elasticsearch.test.ESSingleNodeTestCase; -import java.io.IOException; import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; -import java.util.function.Supplier; public class FieldNamesFieldMapperTests extends ESSingleNodeTestCase { @@ -100,12 +87,13 @@ public class FieldNamesFieldMapperTests extends ESSingleNodeTestCase { .bytes(), XContentType.JSON)); - assertFieldNames(set("a", "a.keyword", "b", "b.c", "_id", "_version", "_seq_no", "_primary_term", "_source"), doc); + assertFieldNames(set("a"), doc); } public void testExplicitEnabled() throws Exception { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("_field_names").field("enabled", true).endObject() + .startObject("properties").startObject("field").field("type", "keyword").field("doc_values", false).endObject().endObject() .endObject().endObject().string(); DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); FieldNamesFieldMapper fieldNamesMapper = docMapper.metadataMapper(FieldNamesFieldMapper.class); @@ -118,27 +106,7 @@ public class FieldNamesFieldMapperTests extends ESSingleNodeTestCase { .bytes(), XContentType.JSON)); - assertFieldNames(set("field", "field.keyword", "_id", "_version", "_seq_no", "_primary_term", "_source"), doc); - } - - public void testDedup() throws Exception { - String mapping = XContentFactory.jsonBuilder().startObject().startObject("type") - .startObject("_field_names").field("enabled", true).endObject() - .endObject().endObject().string(); - DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); - FieldNamesFieldMapper fieldNamesMapper = docMapper.metadataMapper(FieldNamesFieldMapper.class); - assertTrue(fieldNamesMapper.fieldType().isEnabled()); - - ParsedDocument doc = docMapper.parse(SourceToParse.source("test", "type", "1", XContentFactory.jsonBuilder() - .startObject() - .field("field", 3) // will create 2 lucene fields under the hood: index and doc values - .endObject() - .bytes(), - XContentType.JSON)); - - Set fields = set("field", "_id", "_version", "_seq_no", "_primary_term", "_source"); - assertFieldNames(fields, doc); - assertEquals(fields.size(), doc.rootDoc().getValues("_field_names").length); + assertFieldNames(set("field"), doc); } public void testDisabled() throws Exception { @@ -175,110 +143,4 @@ public class FieldNamesFieldMapperTests extends ESSingleNodeTestCase { mapperEnabled = mapperService.merge("type", new CompressedXContent(enabledMapping), MapperService.MergeReason.MAPPING_UPDATE, false); assertTrue(mapperEnabled.metadataMapper(FieldNamesFieldMapper.class).fieldType().isEnabled()); } - - private static class DummyMetadataFieldMapper extends MetadataFieldMapper { - - public static class TypeParser implements MetadataFieldMapper.TypeParser { - - @Override - public Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { - return new MetadataFieldMapper.Builder("_dummy", FIELD_TYPE, FIELD_TYPE) { - @Override - public DummyMetadataFieldMapper build(BuilderContext context) { - return new DummyMetadataFieldMapper(context.indexSettings()); - } - }; - } - - @Override - public MetadataFieldMapper getDefault(MappedFieldType fieldType, ParserContext context) { - final Settings indexSettings = context.mapperService().getIndexSettings().getSettings(); - return new DummyMetadataFieldMapper(indexSettings); - } - - } - - private static class DummyFieldType extends TermBasedFieldType { - - DummyFieldType() { - super(); - } - - private DummyFieldType(MappedFieldType other) { - super(other); - } - - @Override - public MappedFieldType clone() { - return new DummyFieldType(this); - } - - @Override - public String typeName() { - return "_dummy"; - } - - } - - private static final MappedFieldType FIELD_TYPE = new DummyFieldType(); - static { - FIELD_TYPE.setTokenized(false); - FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); - FIELD_TYPE.setName("_dummy"); - FIELD_TYPE.freeze(); - } - - protected DummyMetadataFieldMapper(Settings indexSettings) { - super("_dummy", FIELD_TYPE, FIELD_TYPE, indexSettings); - } - - @Override - public void preParse(ParseContext context) throws IOException { - } - - @Override - public void postParse(ParseContext context) throws IOException { - context.doc().add(new Field("_dummy", "dummy", FIELD_TYPE)); - } - - @Override - protected void parseCreateField(ParseContext context, List fields) throws IOException { - } - - @Override - protected String contentType() { - return "_dummy"; - } - - } - - public void testSeesFieldsFromPlugins() throws IOException { - IndexService indexService = createIndex("test"); - IndicesModule indicesModule = newTestIndicesModule( - Collections.emptyMap(), - Collections.singletonMap("_dummy", new DummyMetadataFieldMapper.TypeParser()) - ); - final MapperRegistry mapperRegistry = indicesModule.getMapperRegistry(); - Supplier queryShardContext = () -> { - return indexService.newQueryShardContext(0, null, () -> { throw new UnsupportedOperationException(); }, null); - }; - MapperService mapperService = new MapperService(indexService.getIndexSettings(), indexService.getIndexAnalyzers(), - indexService.xContentRegistry(), indexService.similarityService(), mapperRegistry, queryShardContext); - DocumentMapperParser parser = new DocumentMapperParser(indexService.getIndexSettings(), mapperService, - indexService.getIndexAnalyzers(), indexService.xContentRegistry(), indexService.similarityService(), mapperRegistry, - queryShardContext); - String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").endObject().endObject().string(); - DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping)); - ParsedDocument parsedDocument = mapper.parse(SourceToParse.source("index", "type", "id", new BytesArray("{}"), - XContentType.JSON)); - IndexableField[] fields = parsedDocument.rootDoc().getFields(FieldNamesFieldMapper.NAME); - boolean found = false; - for (IndexableField f : fields) { - if ("_dummy".equals(f.stringValue())) { - found = true; - break; - } - } - assertTrue("Could not find the dummy field among " + Arrays.toString(fields), found); - } } diff --git a/core/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldTypeTests.java b/core/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldTypeTests.java index b3c9da806fa..945407fc394 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldTypeTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/FieldNamesFieldTypeTests.java @@ -21,10 +21,18 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; -import org.elasticsearch.index.mapper.FieldNamesFieldMapper; -import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.metadata.IndexMetaData; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.query.QueryShardContext; import org.junit.Before; +import java.util.Collections; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + public class FieldNamesFieldTypeTests extends FieldTypeTestCase { @Override protected MappedFieldType createDefaultFieldType() { @@ -43,13 +51,28 @@ public class FieldNamesFieldTypeTests extends FieldTypeTestCase { } public void testTermQuery() { - FieldNamesFieldMapper.FieldNamesFieldType type = new FieldNamesFieldMapper.FieldNamesFieldType(); - type.setName(FieldNamesFieldMapper.CONTENT_TYPE); - type.setEnabled(true); - Query termQuery = type.termQuery("field_name", null); + + FieldNamesFieldMapper.FieldNamesFieldType fieldNamesFieldType = new FieldNamesFieldMapper.FieldNamesFieldType(); + fieldNamesFieldType.setName(FieldNamesFieldMapper.CONTENT_TYPE); + KeywordFieldMapper.KeywordFieldType fieldType = new KeywordFieldMapper.KeywordFieldType(); + fieldType.setName("field_name"); + + Settings settings = settings(Version.CURRENT).build(); + IndexSettings indexSettings = new IndexSettings( + new IndexMetaData.Builder("foo").settings(settings).numberOfShards(1).numberOfReplicas(0).build(), settings); + MapperService mapperService = mock(MapperService.class); + when(mapperService.fullName("_field_names")).thenReturn(fieldNamesFieldType); + when(mapperService.fullName("field_name")).thenReturn(fieldType); + when(mapperService.simpleMatchToIndexNames("field_name")).thenReturn(Collections.singletonList("field_name")); + + QueryShardContext queryShardContext = new QueryShardContext(0, + indexSettings, null, null, mapperService, null, null, null, null, null, null, () -> 0L, null); + fieldNamesFieldType.setEnabled(true); + Query termQuery = fieldNamesFieldType.termQuery("field_name", queryShardContext); assertEquals(new TermQuery(new Term(FieldNamesFieldMapper.CONTENT_TYPE, "field_name")), termQuery); - type.setEnabled(false); - IllegalStateException e = expectThrows(IllegalStateException.class, () -> type.termQuery("field_name", null)); + assertWarnings("terms query on the _field_names field is deprecated and will be removed, use exists query instead"); + fieldNamesFieldType.setEnabled(false); + IllegalStateException e = expectThrows(IllegalStateException.class, () -> fieldNamesFieldType.termQuery("field_name", null)); assertEquals("Cannot run [exists] queries if the [_field_names] field is disabled", e.getMessage()); } } diff --git a/core/src/test/java/org/elasticsearch/index/mapper/FieldTypeLookupTests.java b/core/src/test/java/org/elasticsearch/index/mapper/FieldTypeLookupTests.java index 4ae9b004413..fe885a46b87 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/FieldTypeLookupTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/FieldTypeLookupTests.java @@ -19,6 +19,11 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.test.ESTestCase; import java.util.Arrays; @@ -223,5 +228,14 @@ public class FieldTypeLookupTests extends ESTestCase { public String typeName() { return "otherfaketype"; } + + @Override + public Query existsQuery(QueryShardContext context) { + if (hasDocValues()) { + return new DocValuesFieldExistsQuery(name()); + } else { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } + } } } diff --git a/core/src/test/java/org/elasticsearch/index/query/ExistsQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/ExistsQueryBuilderTests.java index cfc2d789420..40d3f90dd4e 100644 --- a/core/src/test/java/org/elasticsearch/index/query/ExistsQueryBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/ExistsQueryBuilderTests.java @@ -22,15 +22,20 @@ package org.elasticsearch.index.query; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.test.AbstractQueryTestCase; import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; +import java.util.List; +import java.util.stream.Collectors; import static org.hamcrest.CoreMatchers.containsString; import static org.hamcrest.CoreMatchers.equalTo; @@ -60,23 +65,66 @@ public class ExistsQueryBuilderTests extends AbstractQueryTestCase fields = context.getQueryShardContext().simpleMatchToIndexNames(fieldPattern); + Collection mappedFields = fields.stream().filter((field) -> context.getQueryShardContext().getObjectMapper(field) != null + || context.getQueryShardContext().getMapperService().fullName(field) != null).collect(Collectors.toList()); if (getCurrentTypes().length == 0) { assertThat(query, instanceOf(MatchNoDocsQuery.class)); MatchNoDocsQuery matchNoDocsQuery = (MatchNoDocsQuery) query; assertThat(matchNoDocsQuery.toString(null), containsString("Missing types in \"exists\" query.")); + } else if (context.mapperService().getIndexSettings().getIndexVersionCreated().before(Version.V_6_1_0)) { + if (fields.size() == 1) { + assertThat(query, instanceOf(ConstantScoreQuery.class)); + ConstantScoreQuery constantScoreQuery = (ConstantScoreQuery) query; + String field = fields.iterator().next(); + assertThat(constantScoreQuery.getQuery(), instanceOf(TermQuery.class)); + TermQuery termQuery = (TermQuery) constantScoreQuery.getQuery(); + assertEquals(field, termQuery.getTerm().text()); + } else { + assertThat(query, instanceOf(ConstantScoreQuery.class)); + ConstantScoreQuery constantScoreQuery = (ConstantScoreQuery) query; + assertThat(constantScoreQuery.getQuery(), instanceOf(BooleanQuery.class)); + BooleanQuery booleanQuery = (BooleanQuery) constantScoreQuery.getQuery(); + assertThat(booleanQuery.clauses().size(), equalTo(mappedFields.size())); + for (int i = 0; i < mappedFields.size(); i++) { + BooleanClause booleanClause = booleanQuery.clauses().get(i); + assertThat(booleanClause.getOccur(), equalTo(BooleanClause.Occur.SHOULD)); + } + } + } else if (fields.size() == 1 && mappedFields.size() == 0) { + assertThat(query, instanceOf(MatchNoDocsQuery.class)); + MatchNoDocsQuery matchNoDocsQuery = (MatchNoDocsQuery) query; + assertThat(matchNoDocsQuery.toString(null), + containsString("No field \"" + fields.iterator().next() + "\" exists in mappings.")); } else if (fields.size() == 1) { assertThat(query, instanceOf(ConstantScoreQuery.class)); ConstantScoreQuery constantScoreQuery = (ConstantScoreQuery) query; - assertThat(constantScoreQuery.getQuery(), instanceOf(TermQuery.class)); - TermQuery termQuery = (TermQuery) constantScoreQuery.getQuery(); - assertEquals(fields.iterator().next(), termQuery.getTerm().text()); + String field = fields.iterator().next(); + if (context.getQueryShardContext().getObjectMapper(field) != null) { + assertThat(constantScoreQuery.getQuery(), instanceOf(BooleanQuery.class)); + BooleanQuery booleanQuery = (BooleanQuery) constantScoreQuery.getQuery(); + List childFields = new ArrayList<>(); + context.getQueryShardContext().getObjectMapper(field).forEach(mapper -> childFields.add(mapper.name())); + assertThat(booleanQuery.clauses().size(), equalTo(childFields.size())); + for (int i = 0; i < childFields.size(); i++) { + BooleanClause booleanClause = booleanQuery.clauses().get(i); + assertThat(booleanClause.getOccur(), equalTo(BooleanClause.Occur.SHOULD)); + } + } else if (context.getQueryShardContext().getMapperService().fullName(field).hasDocValues()) { + assertThat(constantScoreQuery.getQuery(), instanceOf(DocValuesFieldExistsQuery.class)); + DocValuesFieldExistsQuery dvExistsQuery = (DocValuesFieldExistsQuery) constantScoreQuery.getQuery(); + assertEquals(field, dvExistsQuery.getField()); + } else { + assertThat(constantScoreQuery.getQuery(), instanceOf(TermQuery.class)); + TermQuery termQuery = (TermQuery) constantScoreQuery.getQuery(); + assertEquals(field, termQuery.getTerm().text()); + } } else { assertThat(query, instanceOf(ConstantScoreQuery.class)); ConstantScoreQuery constantScoreQuery = (ConstantScoreQuery) query; assertThat(constantScoreQuery.getQuery(), instanceOf(BooleanQuery.class)); BooleanQuery booleanQuery = (BooleanQuery) constantScoreQuery.getQuery(); - assertThat(booleanQuery.clauses().size(), equalTo(fields.size())); - for (int i = 0; i < fields.size(); i++) { + assertThat(booleanQuery.clauses().size(), equalTo(mappedFields.size())); + for (int i = 0; i < mappedFields.size(); i++) { BooleanClause booleanClause = booleanQuery.clauses().get(i); assertThat(booleanClause.getOccur(), equalTo(BooleanClause.Occur.SHOULD)); } diff --git a/core/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java index 94b55fba618..cf71bc0872a 100644 --- a/core/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java @@ -801,11 +801,11 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase 0) { - expected = new ConstantScoreQuery(new TermQuery(new Term("_field_names", "foo"))); + expected = new ConstantScoreQuery(new TermQuery(new Term("_field_names", STRING_FIELD_NAME))); } else { expected = new MatchNoDocsQuery(); } diff --git a/core/src/test/java/org/elasticsearch/index/query/RangeQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/RangeQueryBuilderTests.java index 4c0e2192cec..ad3946a6755 100644 --- a/core/src/test/java/org/elasticsearch/index/query/RangeQueryBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/RangeQueryBuilderTests.java @@ -23,6 +23,7 @@ import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.Term; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointRangeQuery; @@ -30,6 +31,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeQuery; import org.elasticsearch.ElasticsearchParseException; +import org.elasticsearch.Version; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.geo.ShapeRelation; import org.elasticsearch.common.lucene.BytesRefs; @@ -124,7 +126,12 @@ public class RangeQueryBuilderTests extends AbstractQueryTestCase 0) { - expectedQuery = new ConstantScoreQuery(new TermQuery(new Term(FieldNamesFieldMapper.NAME, queryBuilder.fieldName()))); + if (context.mapperService().getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_6_1_0) + && context.mapperService().fullName(queryBuilder.fieldName()).hasDocValues()) { + expectedQuery = new ConstantScoreQuery(new DocValuesFieldExistsQuery(queryBuilder.fieldName())); + } else { + expectedQuery = new ConstantScoreQuery(new TermQuery(new Term(FieldNamesFieldMapper.NAME, queryBuilder.fieldName()))); + } } else { expectedQuery = new MatchNoDocsQuery("no mappings yet"); } @@ -385,7 +392,7 @@ public class RangeQueryBuilderTests extends AbstractQueryTestCase 0) { - expectedQuery = new ConstantScoreQuery(new TermQuery(new Term(FieldNamesFieldMapper.NAME, query.fieldName()))); + if (queryShardContext.getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_6_1_0) + && queryShardContext.fieldMapper(query.fieldName()).hasDocValues()) { + expectedQuery = new ConstantScoreQuery(new DocValuesFieldExistsQuery(query.fieldName())); + } else { + expectedQuery = new ConstantScoreQuery(new TermQuery(new Term(FieldNamesFieldMapper.NAME, query.fieldName()))); + } } else { expectedQuery = new MatchNoDocsQuery("no mappings yet"); } @@ -416,7 +428,7 @@ public class RangeQueryBuilderTests extends AbstractQueryTestCase fields = new ArrayList<>(1); + createFieldNamesField(context, fields); + for (IndexableField field : fields) { + context.doc().add(field); + } } static Query parseQuery(QueryShardContext context, boolean mapUnmappedFieldsAsString, XContentParser parser) throws IOException { diff --git a/plugins/analysis-icu/src/main/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapper.java b/plugins/analysis-icu/src/main/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapper.java index ea966c5fc3a..f927f920f90 100644 --- a/plugins/analysis-icu/src/main/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapper.java +++ b/plugins/analysis-icu/src/main/java/org/elasticsearch/index/mapper/ICUCollationKeywordFieldMapper.java @@ -23,13 +23,17 @@ import com.ibm.icu.text.Collator; import com.ibm.icu.text.RawCollationKey; import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.util.ULocale; + import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; import org.elasticsearch.common.io.stream.StreamOutput; @@ -122,6 +126,15 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper { this.collator = collator.isFrozen() ? collator : collator.freeze(); } + @Override + public Query existsQuery(QueryShardContext context) { + if (hasDocValues()) { + return new DocValuesFieldExistsQuery(name()); + } else { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } + } + @Override public Query nullValueQuery() { if (nullValue() == null) { @@ -750,6 +763,8 @@ public class ICUCollationKeywordFieldMapper extends FieldMapper { if (fieldType().hasDocValues()) { fields.add(getDVField.apply(fieldType().name(), binaryValue)); + } else if (fieldType().indexOptions() != IndexOptions.NONE || fieldType().stored()) { + createFieldNamesField(context, fields); } } } diff --git a/plugins/mapper-murmur3/src/main/java/org/elasticsearch/index/mapper/murmur3/Murmur3FieldMapper.java b/plugins/mapper-murmur3/src/main/java/org/elasticsearch/index/mapper/murmur3/Murmur3FieldMapper.java index 6824c8bf202..a6dc27b1f8a 100644 --- a/plugins/mapper-murmur3/src/main/java/org/elasticsearch/index/mapper/murmur3/Murmur3FieldMapper.java +++ b/plugins/mapper-murmur3/src/main/java/org/elasticsearch/index/mapper/murmur3/Murmur3FieldMapper.java @@ -19,14 +19,11 @@ package org.elasticsearch.index.mapper.murmur3; -import java.io.IOException; -import java.util.List; -import java.util.Map; - import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.DocValuesFieldExistsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; @@ -44,6 +41,10 @@ import org.elasticsearch.index.mapper.TypeParsers; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.QueryShardException; +import java.io.IOException; +import java.util.List; +import java.util.Map; + public class Murmur3FieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "murmur3"; @@ -127,6 +128,11 @@ public class Murmur3FieldMapper extends FieldMapper { return new DocValuesIndexFieldData.Builder().numericType(NumericType.LONG); } + @Override + public Query existsQuery(QueryShardContext context) { + return new DocValuesFieldExistsQuery(name()); + } + @Override public Query termQuery(Object value, QueryShardContext context) { throw new QueryShardException(context, "Murmur3 fields are not searchable: [" + name() + "]"); diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query.yml new file mode 100644 index 00000000000..a34b110e910 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query.yml @@ -0,0 +1,1289 @@ +setup: + - skip: + features: ["headers"] + + - do: + indices.create: + index: test + body: + mappings: + test: + dynamic: false + properties: + binary: + type: binary + doc_values: true + boolean: + type: boolean + date: + type: date + geo_point: + type: geo_point + geo_shape: + type: geo_shape + ip: + type: ip + keyword: + type: keyword + byte: + type: byte + double: + type: double + float: + type: float + half_float: + type: half_float + integer: + type: integer + long: + type: long + short: + type: short + object: + type: object + properties: + inner1: + type: keyword + inner2: + type: keyword + text: + type: text + + - do: + headers: + Content-Type: application/json + index: + index: "test" + type: "test" + id: 1 + body: + binary: "YWJjZGUxMjM0" + boolean: true + date: "2017-01-01" + geo_point: [0.0, 20.0] + geo_shape: + type: "point" + coordinates: [0.0, 20.0] + ip: "192.168.0.1" + keyword: "foo" + byte: 1 + double: 1.0 + float: 1.0 + half_float: 1.0 + integer: 1 + long: 1 + short: 1 + object: + inner1: "foo" + inner2: "bar" + text: "foo bar" + + - do: + headers: + Content-Type: application/json + index: + index: "test" + type: "test" + id: 2 + body: + binary: "YWJjZGUxMjM0" + boolean: false + date: "2017-01-01" + geo_point: [0.0, 20.0] + geo_shape: + type: "point" + coordinates: [0.0, 20.0] + ip: "192.168.0.1" + keyword: "foo" + byte: 1 + double: 1.0 + float: 1.0 + half_float: 1.0 + integer: 1 + long: 1 + short: 1 + object: + inner1: "foo" + text: "foo bar" + + - do: + headers: + Content-Type: application/json + index: + index: "test" + type: "test" + id: 3 + routing: "route_me" + body: + binary: "YWJjZGUxMjM0" + boolean: true + date: "2017-01-01" + geo_point: [0.0, 20.0] + geo_shape: + type: "point" + coordinates: [0.0, 20.0] + ip: "192.168.0.1" + keyword: "foo" + byte: 1 + double: 1.0 + float: 1.0 + half_float: 1.0 + integer: 1 + long: 1 + short: 1 + object: + inner2: "bar" + text: "foo bar" + + - do: + index: + index: "test" + type: "test" + id: 4 + body: {} + + - do: + indices.create: + index: test-no-dv + body: + mappings: + test: + dynamic: false + properties: + binary: + type: binary + doc_values: false + store: true + boolean: + type: boolean + doc_values: false + date: + type: date + doc_values: false + geo_point: + type: geo_point + doc_values: false + geo_shape: + type: geo_shape + ip: + type: ip + doc_values: false + keyword: + type: keyword + doc_values: false + byte: + type: byte + doc_values: false + double: + type: double + doc_values: false + float: + type: float + doc_values: false + half_float: + type: half_float + doc_values: false + integer: + type: integer + doc_values: false + long: + type: long + doc_values: false + short: + type: short + doc_values: false + object: + type: object + properties: + inner1: + type: keyword + doc_values: false + inner2: + type: keyword + doc_values: false + text: + type: text + doc_values: false + + - do: + headers: + Content-Type: application/json + index: + index: "test-no-dv" + type: "test" + id: 1 + body: + binary: "YWJjZGUxMjM0" + boolean: true + date: "2017-01-01" + geo_point: [0.0, 20.0] + geo_shape: + type: "point" + coordinates: [0.0, 20.0] + ip: "192.168.0.1" + keyword: "foo" + byte: 1 + double: 1.0 + float: 1.0 + half_float: 1.0 + integer: 1 + long: 1 + short: 1 + object: + inner1: "foo" + inner2: "bar" + text: "foo bar" + + - do: + headers: + Content-Type: application/json + index: + index: "test-no-dv" + type: "test" + id: 2 + body: + binary: "YWJjZGUxMjM0" + boolean: false + date: "2017-01-01" + geo_point: [0.0, 20.0] + geo_shape: + type: "point" + coordinates: [0.0, 20.0] + ip: "192.168.0.1" + keyword: "foo" + byte: 1 + double: 1.0 + float: 1.0 + half_float: 1.0 + integer: 1 + long: 1 + short: 1 + object: + inner1: "foo" + text: "foo bar" + + - do: + headers: + Content-Type: application/json + index: + index: "test-no-dv" + type: "test" + id: 3 + routing: "route_me" + body: + binary: "YWJjZGUxMjM0" + boolean: true + date: "2017-01-01" + geo_point: [0.0, 20.0] + geo_shape: + type: "point" + coordinates: [0.0, 20.0] + ip: "192.168.0.1" + keyword: "foo" + byte: 1 + double: 1.0 + float: 1.0 + half_float: 1.0 + integer: 1 + long: 1 + short: 1 + object: + inner2: "bar" + text: "foo bar" + + - do: + index: + index: "test-no-dv" + type: "test" + id: 4 + body: {} + + - do: + indices.create: + index: test-unmapped + body: + mappings: + test: + dynamic: false + properties: + unrelated: + type: keyword + + - do: + index: + index: "test-unmapped" + type: "test" + id: 1 + body: + unrelated: "foo" + + - do: + indices.create: + index: test-empty + body: + mappings: + test: + dynamic: false + properties: + binary: + type: binary + date: + type: date + geo_point: + type: geo_point + geo_shape: + type: geo_shape + ip: + type: ip + keyword: + type: keyword + byte: + type: byte + double: + type: double + float: + type: float + half_float: + type: half_float + integer: + type: integer + long: + type: long + short: + type: short + object: + type: object + properties: + inner1: + type: keyword + inner2: + type: keyword + text: + type: text + + - do: + indices.refresh: + index: [test, test-unmapped, test-empty, test-no-dv] + +--- +"Test exists query on mapped binary field": + - do: + search: + index: test + body: + query: + exists: + field: binary + + - match: {hits.total: 3} + +--- +"Test exists query on mapped boolean field": + - do: + search: + index: test + body: + query: + exists: + field: boolean + + - match: {hits.total: 3} + +--- +"Test exists query on mapped date field": + - do: + search: + index: test + body: + query: + exists: + field: date + + - match: {hits.total: 3} + +--- +"Test exists query on mapped geo_point field": + - do: + search: + index: test + body: + query: + exists: + field: geo_point + + - match: {hits.total: 3} + +--- +"Test exists query on mapped geo_shape field": + - do: + search: + index: test + body: + query: + exists: + field: geo_shape + + - match: {hits.total: 3} + +--- +"Test exists query on mapped ip field": + - do: + search: + index: test + body: + query: + exists: + field: ip + + - match: {hits.total: 3} + +--- +"Test exists query on mapped keyword field": + - do: + search: + index: test + body: + query: + exists: + field: keyword + + - match: {hits.total: 3} + +--- +"Test exists query on mapped byte field": + - do: + search: + index: test + body: + query: + exists: + field: byte + + - match: {hits.total: 3} + +--- +"Test exists query on mapped double field": + - do: + search: + index: test + body: + query: + exists: + field: double + + - match: {hits.total: 3} + +--- +"Test exists query on mapped float field": + - do: + search: + index: test + body: + query: + exists: + field: float + + - match: {hits.total: 3} + +--- +"Test exists query on mapped half_float field": + - do: + search: + index: test + body: + query: + exists: + field: half_float + + - match: {hits.total: 3} + +--- +"Test exists query on mapped integer field": + - do: + search: + index: test + body: + query: + exists: + field: integer + + - match: {hits.total: 3} + +--- +"Test exists query on mapped long field": + - do: + search: + index: test + body: + query: + exists: + field: long + + - match: {hits.total: 3} + +--- +"Test exists query on mapped short field": + - do: + search: + index: test + body: + query: + exists: + field: short + + - match: {hits.total: 3} + +--- +"Test exists query on mapped object field": + - do: + search: + index: test + body: + query: + exists: + field: object + + - match: {hits.total: 3} + +--- +"Test exists query on mapped object inner field": + - do: + search: + index: test + body: + query: + exists: + field: object.inner1 + + - match: {hits.total: 2} + +--- +"Test exists query on mapped text field": + - do: + search: + index: test + body: + query: + exists: + field: text + + - match: {hits.total: 3} + +--- +"Test exists query on _id field": + - do: + search: + index: test + body: + query: + exists: + field: _id + + - match: {hits.total: 4} + +--- +"Test exists query on _uid field": + - skip: + version: " - 6.1.0" + reason: exists on _uid not supported prior to 6.1.0 + - do: + search: + index: test + body: + query: + exists: + field: _uid + + - match: {hits.total: 4} + +--- +"Test exists query on _index field": + - skip: + version: " - 6.1.0" + reason: exists on _index not supported prior to 6.1.0 + - do: + search: + index: test + body: + query: + exists: + field: _index + + - match: {hits.total: 4} + +--- +"Test exists query on _type field": + - skip: + version: " - 6.1.0" + reason: exists on _type not supported prior to 6.1.0 + - do: + search: + index: test + body: + query: + exists: + field: _type + + - match: {hits.total: 4} + +--- +"Test exists query on _routing field": + - do: + search: + index: test + body: + query: + exists: + field: _routing + + - match: {hits.total: 1} + +--- +"Test exists query on _seq_no field": + - do: + search: + index: test + body: + query: + exists: + field: _seq_no + + - match: {hits.total: 4} + +--- +"Test exists query on _source field": + - skip: + version: " - 6.1.0" + reason: exists on _source not supported prior to 6.1.0 + - do: + catch: /query_shard_exception/ + search: + index: test + body: + query: + exists: + field: _source + +--- +"Test exists query on _version field": + - do: + search: + index: test + body: + query: + exists: + field: _version + + - match: {hits.total: 4} + +--- +"Test exists query on unmapped binary field": + - do: + search: + index: test-unmapped + body: + query: + exists: + field: binary + + - match: {hits.total: 0} + +--- +"Test exists query on unmapped boolean field": + - do: + search: + index: test-unmapped + body: + query: + exists: + field: boolean + + - match: {hits.total: 0} + +--- +"Test exists query on unmapped date field": + - do: + search: + index: test-unmapped + body: + query: + exists: + field: date + + - match: {hits.total: 0} + +--- +"Test exists query on unmapped geo_point field": + - do: + search: + index: test-unmapped + body: + query: + exists: + field: geo_point + + - match: {hits.total: 0} + +--- +"Test exists query on unmapped geo_shape field": + - do: + search: + index: test-unmapped + body: + query: + exists: + field: geo_shape + + - match: {hits.total: 0} + +--- +"Test exists query on unmapped ip field": + - do: + search: + index: test-unmapped + body: + query: + exists: + field: ip + + - match: {hits.total: 0} + +--- +"Test exists query on unmapped keyword field": + - do: + search: + index: test-unmapped + body: + query: + exists: + field: keyword + + - match: {hits.total: 0} + +--- +"Test exists query on unmapped byte field": + - do: + search: + index: test-unmapped + body: + query: + exists: + field: byte + + - match: {hits.total: 0} + +--- +"Test exists query on unmapped double field": + - do: + search: + index: test-unmapped + body: + query: + exists: + field: double + + - match: {hits.total: 0} + +--- +"Test exists query on unmapped float field": + - do: + search: + index: test-unmapped + body: + query: + exists: + field: float + + - match: {hits.total: 0} + +--- +"Test exists query on unmapped half_float field": + - do: + search: + index: test-unmapped + body: + query: + exists: + field: half_float + + - match: {hits.total: 0} + +--- +"Test exists query on unmapped integer field": + - do: + search: + index: test-unmapped + body: + query: + exists: + field: integer + + - match: {hits.total: 0} + +--- +"Test exists query on unmapped long field": + - do: + search: + index: test-unmapped + body: + query: + exists: + field: long + + - match: {hits.total: 0} + +--- +"Test exists query on unmapped short field": + - do: + search: + index: test-unmapped + body: + query: + exists: + field: short + + - match: {hits.total: 0} + +--- +"Test exists query on unmapped object field": + - do: + search: + index: test-unmapped + body: + query: + exists: + field: object + + - match: {hits.total: 0} + +--- +"Test exists query on unmapped object inner field": + - do: + search: + index: test-unmapped + body: + query: + exists: + field: object.inner1 + + - match: {hits.total: 0} + +--- +"Test exists query on unmapped text field": + - do: + search: + index: test-unmapped + body: + query: + exists: + field: text + + - match: {hits.total: 0} + +--- +"Test exists query on binary field in empty index": + - do: + search: + index: test-empty + body: + query: + exists: + field: binary + + - match: {hits.total: 0} + +--- +"Test exists query on boolean field in empty index": + - do: + search: + index: test-empty + body: + query: + exists: + field: boolean + + - match: {hits.total: 0} + +--- +"Test exists query on date field in empty index": + - do: + search: + index: test-empty + body: + query: + exists: + field: date + + - match: {hits.total: 0} + +--- +"Test exists query on geo_point field in empty index": + - do: + search: + index: test-empty + body: + query: + exists: + field: geo_point + + - match: {hits.total: 0} + +--- +"Test exists query on geo_shape field in empty index": + - do: + search: + index: test-empty + body: + query: + exists: + field: geo_shape + + - match: {hits.total: 0} + +--- +"Test exists query on ip field in empty index": + - do: + search: + index: test-empty + body: + query: + exists: + field: ip + + - match: {hits.total: 0} + +--- +"Test exists query on keyword field in empty index": + - do: + search: + index: test-empty + body: + query: + exists: + field: keyword + + - match: {hits.total: 0} + +--- +"Test exists query on byte field in empty index": + - do: + search: + index: test-empty + body: + query: + exists: + field: byte + + - match: {hits.total: 0} + +--- +"Test exists query on double field in empty index": + - do: + search: + index: test-empty + body: + query: + exists: + field: double + + - match: {hits.total: 0} + +--- +"Test exists query on float field in empty index": + - do: + search: + index: test-empty + body: + query: + exists: + field: float + + - match: {hits.total: 0} + +--- +"Test exists query on half_float field in empty index": + - do: + search: + index: test-empty + body: + query: + exists: + field: half_float + + - match: {hits.total: 0} + +--- +"Test exists query on integer field in empty index": + - do: + search: + index: test-empty + body: + query: + exists: + field: integer + + - match: {hits.total: 0} + +--- +"Test exists query on long field in empty index": + - do: + search: + index: test-empty + body: + query: + exists: + field: long + + - match: {hits.total: 0} + +--- +"Test exists query on short field in empty index": + - do: + search: + index: test-empty + body: + query: + exists: + field: short + + - match: {hits.total: 0} + +--- +"Test exists query on object field in empty index": + - do: + search: + index: test-empty + body: + query: + exists: + field: object + + - match: {hits.total: 0} + +--- +"Test exists query on object inner field in empty index": + - do: + search: + index: test-empty + body: + query: + exists: + field: object.inner1 + + - match: {hits.total: 0} + +--- +"Test exists query on text field in empty index": + - do: + search: + index: test-empty + body: + query: + exists: + field: text + + - match: {hits.total: 0} + +--- +"Test exists query on mapped binary field with no doc values": + - do: + search: + index: test-no-dv + body: + query: + exists: + field: binary + + - match: {hits.total: 3} + +--- +"Test exists query on mapped boolean field with no doc values": + - do: + search: + index: test-no-dv + body: + query: + exists: + field: boolean + + - match: {hits.total: 3} + +--- +"Test exists query on mapped date field with no doc values": + - do: + search: + index: test-no-dv + body: + query: + exists: + field: date + + - match: {hits.total: 3} + +--- +"Test exists query on mapped geo_point field with no doc values": + - do: + search: + index: test-no-dv + body: + query: + exists: + field: geo_point + + - match: {hits.total: 3} + +--- +"Test exists query on mapped geo_shape field with no doc values": + - do: + search: + index: test-no-dv + body: + query: + exists: + field: geo_shape + + - match: {hits.total: 3} + +--- +"Test exists query on mapped ip field with no doc values": + - do: + search: + index: test-no-dv + body: + query: + exists: + field: ip + + - match: {hits.total: 3} + +--- +"Test exists query on mapped keyword field with no doc values": + - do: + search: + index: test-no-dv + body: + query: + exists: + field: keyword + + - match: {hits.total: 3} + +--- +"Test exists query on mapped byte field with no doc values": + - do: + search: + index: test-no-dv + body: + query: + exists: + field: byte + + - match: {hits.total: 3} + +--- +"Test exists query on mapped double field with no doc values": + - do: + search: + index: test-no-dv + body: + query: + exists: + field: double + + - match: {hits.total: 3} + +--- +"Test exists query on mapped float field with no doc values": + - do: + search: + index: test-no-dv + body: + query: + exists: + field: float + + - match: {hits.total: 3} + +--- +"Test exists query on mapped half_float field with no doc values": + - do: + search: + index: test-no-dv + body: + query: + exists: + field: half_float + + - match: {hits.total: 3} + +--- +"Test exists query on mapped integer field with no doc values": + - do: + search: + index: test-no-dv + body: + query: + exists: + field: integer + + - match: {hits.total: 3} + +--- +"Test exists query on mapped long field with no doc values": + - do: + search: + index: test-no-dv + body: + query: + exists: + field: long + + - match: {hits.total: 3} + +--- +"Test exists query on mapped short field with no doc values": + - do: + search: + index: test-no-dv + body: + query: + exists: + field: short + + - match: {hits.total: 3} + +--- +"Test exists query on mapped object field with no doc values": + - do: + search: + index: test-no-dv + body: + query: + exists: + field: object + + - match: {hits.total: 3} + +--- +"Test exists query on mapped object inner field with no doc values": + - do: + search: + index: test-no-dv + body: + query: + exists: + field: object.inner1 + + - match: {hits.total: 2} + +--- +"Test exists query on mapped text field with no doc values": + - do: + search: + index: test-no-dv + body: + query: + exists: + field: text + + - match: {hits.total: 3} diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java index ae91a791535..2b6f4c38a90 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java @@ -19,11 +19,13 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.search.Query; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.analysis.AnalyzerScope; import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.similarity.BM25SimilarityProvider; import org.elasticsearch.test.ESTestCase; @@ -285,6 +287,8 @@ public abstract class FieldTypeTestCase extends ESTestCase { public MappedFieldType clone() {return null;} @Override public String typeName() { return fieldType.typeName();} + @Override + public Query existsQuery(QueryShardContext context) { return null; } }; try { fieldType.checkCompatibility(bogus, conflicts, random().nextBoolean()); @@ -299,6 +303,8 @@ public abstract class FieldTypeTestCase extends ESTestCase { public MappedFieldType clone() {return null;} @Override public String typeName() { return "othertype";} + @Override + public Query existsQuery(QueryShardContext context) { return null; } }; try { fieldType.checkCompatibility(other, conflicts, random().nextBoolean()); diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MockFieldMapper.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MockFieldMapper.java index d8c9c8d797b..b374a6b4034 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MockFieldMapper.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MockFieldMapper.java @@ -19,12 +19,17 @@ package org.elasticsearch.index.mapper; -import java.io.IOException; -import java.util.List; - +import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocValuesFieldExistsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.query.QueryShardContext; + +import java.io.IOException; +import java.util.List; // this sucks how much must be overridden just do get a dummy field mapper... public class MockFieldMapper extends FieldMapper { @@ -66,6 +71,15 @@ public class MockFieldMapper extends FieldMapper { public String typeName() { return "faketype"; } + + @Override + public Query existsQuery(QueryShardContext context) { + if (hasDocValues()) { + return new DocValuesFieldExistsQuery(name()); + } else { + return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name())); + } + } } @Override From d6d830ff0ba4e2338359acbdddc72b33b944a5f7 Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Wed, 1 Nov 2017 13:01:45 -0400 Subject: [PATCH 14/17] Fix logic detecting unreleased versions When partitioning version constants into released and unreleased versions, today we have a bug in finding the last unreleased version. Namely, consider the following version constants on the 6.x branch: ..., 5.6.3, 5.6.4, 6.0.0-alpha1, ..., 6.0.0-rc1, 6.0.0-rc2, 6.0.0, 6.1.0. In this case, our convention dictates that: 5.6.4, 6.0.0, and 6.1.0 are unreleased. Today we correctly detect that 6.0.0 and 6.1.0 are unreleased, and then we say the previous patch version is unreleased too. The problem is the logic to remove that previous patch version is broken, it does not skip alphas/betas/RCs which have been released. This commit fixes this by skipping backwards over pre-release versions when finding the previous patch version to remove. Relates #27206 --- .../java/org/elasticsearch/test/VersionUtils.java | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/test/VersionUtils.java b/test/framework/src/main/java/org/elasticsearch/test/VersionUtils.java index 8b2f51cf8a9..74a9b58a78e 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/VersionUtils.java +++ b/test/framework/src/main/java/org/elasticsearch/test/VersionUtils.java @@ -100,9 +100,17 @@ public class VersionUtils { Version unreleased = versions.remove(unreleasedIndex); if (unreleased.revision == 0) { - /* If the last unreleased version is itself a patch release then gradle enforces - * that there is yet another unreleased version before that. */ - unreleasedIndex--; + /* + * If the last unreleased version is itself a patch release then Gradle enforces that there is yet another unreleased version + * before that. However, we have to skip alpha/betas/RCs too (e.g., consider when the version constants are ..., 5.6.3, 5.6.4, + * 6.0.0-alpha1, ..., 6.0.0-rc1, 6.0.0-rc2, 6.0.0, 6.1.0 on the 6.x branch. In this case, we will have pruned 6.0.0 and 6.1.0 as + * unreleased versions, but we also need to prune 5.6.4. At this point though, unreleasedIndex will be pointing to 6.0.0-rc2, so + * we have to skip backwards until we find a non-alpha/beta/RC again. Then we can prune that version as an unreleased version + * too. + */ + do { + unreleasedIndex--; + } while (versions.get(unreleasedIndex).isRelease() == false); Version earlierUnreleased = versions.remove(unreleasedIndex); return new Tuple<>(unmodifiableList(versions), unmodifiableList(Arrays.asList(earlierUnreleased, unreleased, current))); } From ac9addd454056f4bb8483c0eb8679be6bcb3d13d Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Wed, 1 Nov 2017 15:21:18 -0400 Subject: [PATCH 15/17] Fix stable BWC branch detection logic This commit fixes the logic for finding the stable BWC branch. A .x branch should only be chosen if we are testing BWC with a previous major version. --- distribution/bwc/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distribution/bwc/build.gradle b/distribution/bwc/build.gradle index c84c6a6d5a2..245d719709b 100644 --- a/distribution/bwc/build.gradle +++ b/distribution/bwc/build.gradle @@ -56,7 +56,7 @@ if (enabled) { def (String major, String minor, String bugfix) = bwcVersion.split('\\.') def (String currentMajor, String currentMinor, String currentBugfix) = version.split('\\.') String bwcBranch - if (project.name == 'bwc-stable-snapshot') { + if (project.name == 'bwc-stable-snapshot' && major != currentMajor) { bwcBranch = "${major}.x" } else { bwcBranch = "${major}.${minor}" From 90d63174378d7a2369539ba627a35f6a10a6e6af Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Wed, 1 Nov 2017 21:13:01 -0400 Subject: [PATCH 16/17] Remove checkpoint tracker bit sets setting We added an index-level setting for controlling the size of the bit sets used to back the local checkpoint tracker. This setting is really only needed to control the memory footprint of the bit sets but we do not think this setting is going to be needed. This commit removes this setting before it is released to the wild after which we would have to worry about BWC implications. Relates #27191 --- .../common/settings/IndexScopedSettings.java | 1 - .../index/seqno/LocalCheckpointTracker.java | 28 +++++++------------ .../index/seqno/SequenceNumbersService.java | 2 +- .../recovery/RecoverySourceHandler.java | 2 +- .../seqno/LocalCheckpointTrackerTests.java | 24 ++++------------ 5 files changed, 18 insertions(+), 39 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 9d4d30b066f..ed686bf9236 100644 --- a/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/core/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -120,7 +120,6 @@ public final class IndexScopedSettings extends AbstractScopedSettings { IndexSettings.QUERY_STRING_LENIENT_SETTING, IndexSettings.ALLOW_UNMAPPED, IndexSettings.INDEX_CHECK_ON_STARTUP, - LocalCheckpointTracker.SETTINGS_BIT_ARRAYS_SIZE, IndexSettings.MAX_REFRESH_LISTENERS_PER_SHARD, IndexSettings.MAX_SLICES_PER_SCROLL, ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING, diff --git a/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointTracker.java b/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointTracker.java index 6a7844057fd..5380a3b2b7f 100644 --- a/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointTracker.java +++ b/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointTracker.java @@ -34,10 +34,9 @@ public class LocalCheckpointTracker { /** * We keep a bit for each sequence number that is still pending. To optimize allocation, we do so in multiple arrays allocating them on - * demand and cleaning up while completed. This setting controls the size of the arrays. + * demand and cleaning up while completed. This constant controls the size of the arrays. */ - public static Setting SETTINGS_BIT_ARRAYS_SIZE = - Setting.intSetting("index.seq_no.checkpoint.bit_arrays_size", 1024, 4, Setting.Property.IndexScope); + static final int BIT_ARRAYS_SIZE = 1024; /** * An ordered list of bit arrays representing pending sequence numbers. The list is "anchored" in {@link #firstProcessedSeqNo} which @@ -45,11 +44,6 @@ public class LocalCheckpointTracker { */ final LinkedList processedSeqNo = new LinkedList<>(); - /** - * The size of each bit set representing processed sequence numbers. - */ - private final int bitArraysSize; - /** * The sequence number that the first bit in the first array corresponds to. */ @@ -70,11 +64,10 @@ public class LocalCheckpointTracker { * {@link SequenceNumbers#NO_OPS_PERFORMED} and {@code localCheckpoint} should be set to the last known local checkpoint, * or {@link SequenceNumbers#NO_OPS_PERFORMED}. * - * @param indexSettings the index settings * @param maxSeqNo the last sequence number assigned, or {@link SequenceNumbers#NO_OPS_PERFORMED} * @param localCheckpoint the last known local checkpoint, or {@link SequenceNumbers#NO_OPS_PERFORMED} */ - public LocalCheckpointTracker(final IndexSettings indexSettings, final long maxSeqNo, final long localCheckpoint) { + public LocalCheckpointTracker(final long maxSeqNo, final long localCheckpoint) { if (localCheckpoint < 0 && localCheckpoint != SequenceNumbers.NO_OPS_PERFORMED) { throw new IllegalArgumentException( "local checkpoint must be non-negative or [" + SequenceNumbers.NO_OPS_PERFORMED + "] " @@ -84,7 +77,6 @@ public class LocalCheckpointTracker { throw new IllegalArgumentException( "max seq. no. must be non-negative or [" + SequenceNumbers.NO_OPS_PERFORMED + "] but was [" + maxSeqNo + "]"); } - bitArraysSize = SETTINGS_BIT_ARRAYS_SIZE.get(indexSettings.getSettings()); firstProcessedSeqNo = localCheckpoint == SequenceNumbers.NO_OPS_PERFORMED ? 0 : localCheckpoint + 1; nextSeqNo = maxSeqNo == SequenceNumbers.NO_OPS_PERFORMED ? 0 : maxSeqNo + 1; checkpoint = localCheckpoint; @@ -183,7 +175,7 @@ public class LocalCheckpointTracker { @SuppressForbidden(reason = "Object#notifyAll") private void updateCheckpoint() { assert Thread.holdsLock(this); - assert checkpoint < firstProcessedSeqNo + bitArraysSize - 1 : + assert checkpoint < firstProcessedSeqNo + BIT_ARRAYS_SIZE - 1 : "checkpoint should be below the end of the first bit set (o.w. current bit set is completed and shouldn't be there)"; assert getBitSetForSeqNo(checkpoint + 1) == processedSeqNo.getFirst() : "checkpoint + 1 doesn't point to the first bit set (o.w. current bit set is completed and shouldn't be there)"; @@ -196,10 +188,10 @@ public class LocalCheckpointTracker { checkpoint++; // the checkpoint always falls in the first bit set or just before. If it falls // on the last bit of the current bit set, we can clean it. - if (checkpoint == firstProcessedSeqNo + bitArraysSize - 1) { + if (checkpoint == firstProcessedSeqNo + BIT_ARRAYS_SIZE - 1) { processedSeqNo.removeFirst(); - firstProcessedSeqNo += bitArraysSize; - assert checkpoint - firstProcessedSeqNo < bitArraysSize; + firstProcessedSeqNo += BIT_ARRAYS_SIZE; + assert checkpoint - firstProcessedSeqNo < BIT_ARRAYS_SIZE; current = processedSeqNo.peekFirst(); } } while (current != null && current.get(seqNoToBitSetOffset(checkpoint + 1))); @@ -218,13 +210,13 @@ public class LocalCheckpointTracker { private FixedBitSet getBitSetForSeqNo(final long seqNo) { assert Thread.holdsLock(this); assert seqNo >= firstProcessedSeqNo : "seqNo: " + seqNo + " firstProcessedSeqNo: " + firstProcessedSeqNo; - final long bitSetOffset = (seqNo - firstProcessedSeqNo) / bitArraysSize; + final long bitSetOffset = (seqNo - firstProcessedSeqNo) / BIT_ARRAYS_SIZE; if (bitSetOffset > Integer.MAX_VALUE) { throw new IndexOutOfBoundsException( "sequence number too high; got [" + seqNo + "], firstProcessedSeqNo [" + firstProcessedSeqNo + "]"); } while (bitSetOffset >= processedSeqNo.size()) { - processedSeqNo.add(new FixedBitSet(bitArraysSize)); + processedSeqNo.add(new FixedBitSet(BIT_ARRAYS_SIZE)); } return processedSeqNo.get((int) bitSetOffset); } @@ -239,7 +231,7 @@ public class LocalCheckpointTracker { private int seqNoToBitSetOffset(final long seqNo) { assert Thread.holdsLock(this); assert seqNo >= firstProcessedSeqNo; - return ((int) (seqNo - firstProcessedSeqNo)) % bitArraysSize; + return ((int) (seqNo - firstProcessedSeqNo)) % BIT_ARRAYS_SIZE; } } diff --git a/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java b/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java index 1c0b3205584..1b46eedacc4 100644 --- a/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java +++ b/core/src/main/java/org/elasticsearch/index/seqno/SequenceNumbersService.java @@ -56,7 +56,7 @@ public class SequenceNumbersService extends AbstractIndexShardComponent { final long localCheckpoint, final long globalCheckpoint) { super(shardId, indexSettings); - localCheckpointTracker = new LocalCheckpointTracker(indexSettings, maxSeqNo, localCheckpoint); + localCheckpointTracker = new LocalCheckpointTracker(maxSeqNo, localCheckpoint); globalCheckpointTracker = new GlobalCheckpointTracker(shardId, allocationId, indexSettings, globalCheckpoint); } diff --git a/core/src/main/java/org/elasticsearch/indices/recovery/RecoverySourceHandler.java b/core/src/main/java/org/elasticsearch/indices/recovery/RecoverySourceHandler.java index c717e29353b..5f692d8e8f5 100644 --- a/core/src/main/java/org/elasticsearch/indices/recovery/RecoverySourceHandler.java +++ b/core/src/main/java/org/elasticsearch/indices/recovery/RecoverySourceHandler.java @@ -243,7 +243,7 @@ public class RecoverySourceHandler { logger.trace("all operations up to [{}] completed, checking translog content", endingSeqNo); - final LocalCheckpointTracker tracker = new LocalCheckpointTracker(shard.indexSettings(), startingSeqNo, startingSeqNo - 1); + final LocalCheckpointTracker tracker = new LocalCheckpointTracker(startingSeqNo, startingSeqNo - 1); try (Translog.Snapshot snapshot = shard.getTranslog().newSnapshotFromMinSeqNo(startingSeqNo)) { Translog.Operation operation; while ((operation = snapshot.next()) != null) { diff --git a/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointTrackerTests.java b/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointTrackerTests.java index ab513c787c3..ae167bb59f0 100644 --- a/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointTrackerTests.java +++ b/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointTrackerTests.java @@ -21,10 +21,8 @@ package org.elasticsearch.index.seqno; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.Randomness; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.AbstractRunnable; import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.test.IndexSettingsModule; import org.junit.Before; import java.util.ArrayList; @@ -38,6 +36,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; import java.util.stream.IntStream; +import static org.elasticsearch.index.seqno.LocalCheckpointTracker.BIT_ARRAYS_SIZE; import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.isOneOf; @@ -46,19 +45,8 @@ public class LocalCheckpointTrackerTests extends ESTestCase { private LocalCheckpointTracker tracker; - private static final int SMALL_CHUNK_SIZE = 4; - public static LocalCheckpointTracker createEmptyTracker() { - return new LocalCheckpointTracker( - IndexSettingsModule.newIndexSettings( - "test", - Settings - .builder() - .put(LocalCheckpointTracker.SETTINGS_BIT_ARRAYS_SIZE.getKey(), SMALL_CHUNK_SIZE) - .build()), - SequenceNumbers.NO_OPS_PERFORMED, - SequenceNumbers.NO_OPS_PERFORMED - ); + return new LocalCheckpointTracker(SequenceNumbers.NO_OPS_PERFORMED, SequenceNumbers.NO_OPS_PERFORMED); } @Override @@ -98,7 +86,7 @@ public class LocalCheckpointTrackerTests extends ESTestCase { public void testSimpleOverFlow() { List seqNoList = new ArrayList<>(); final boolean aligned = randomBoolean(); - final int maxOps = SMALL_CHUNK_SIZE * randomIntBetween(1, 5) + (aligned ? 0 : randomIntBetween(1, SMALL_CHUNK_SIZE - 1)); + final int maxOps = BIT_ARRAYS_SIZE * randomIntBetween(1, 5) + (aligned ? 0 : randomIntBetween(1, BIT_ARRAYS_SIZE - 1)); for (int i = 0; i < maxOps; i++) { seqNoList.add(i); @@ -109,7 +97,7 @@ public class LocalCheckpointTrackerTests extends ESTestCase { } assertThat(tracker.checkpoint, equalTo(maxOps - 1L)); assertThat(tracker.processedSeqNo.size(), equalTo(aligned ? 0 : 1)); - assertThat(tracker.firstProcessedSeqNo, equalTo(((long) maxOps / SMALL_CHUNK_SIZE) * SMALL_CHUNK_SIZE)); + assertThat(tracker.firstProcessedSeqNo, equalTo(((long) maxOps / BIT_ARRAYS_SIZE) * BIT_ARRAYS_SIZE)); } public void testConcurrentPrimary() throws InterruptedException { @@ -150,7 +138,7 @@ public class LocalCheckpointTrackerTests extends ESTestCase { tracker.markSeqNoAsCompleted(unFinishedSeq); assertThat(tracker.getCheckpoint(), equalTo(maxOps - 1L)); assertThat(tracker.processedSeqNo.size(), isOneOf(0, 1)); - assertThat(tracker.firstProcessedSeqNo, equalTo(((long) maxOps / SMALL_CHUNK_SIZE) * SMALL_CHUNK_SIZE)); + assertThat(tracker.firstProcessedSeqNo, equalTo(((long) maxOps / BIT_ARRAYS_SIZE) * BIT_ARRAYS_SIZE)); } public void testConcurrentReplica() throws InterruptedException { @@ -198,7 +186,7 @@ public class LocalCheckpointTrackerTests extends ESTestCase { assertThat(tracker.getCheckpoint(), equalTo(unFinishedSeq - 1L)); tracker.markSeqNoAsCompleted(unFinishedSeq); assertThat(tracker.getCheckpoint(), equalTo(maxOps - 1L)); - assertThat(tracker.firstProcessedSeqNo, equalTo(((long) maxOps / SMALL_CHUNK_SIZE) * SMALL_CHUNK_SIZE)); + assertThat(tracker.firstProcessedSeqNo, equalTo(((long) maxOps / BIT_ARRAYS_SIZE) * BIT_ARRAYS_SIZE)); } public void testWaitForOpsToComplete() throws BrokenBarrierException, InterruptedException { From 59657ad1cb6c1725923f011fafb0bbb1544973a8 Mon Sep 17 00:00:00 2001 From: Jason Tedor Date: Wed, 1 Nov 2017 21:26:52 -0400 Subject: [PATCH 17/17] Lazy initialize checkpoint tracker bit sets This local checkpoint tracker uses collections of bit sets to track which sequence numbers are complete, eventually removing these bit sets when the local checkpoint advances. However, these bit sets were eagerly allocated so that if a sequence number far ahead of the checkpoint was marked as completed, all bit sets between the "last" bit set and the bit set needed to track the marked sequence number were allocated. If this sequence number was too far ahead, the memory requirements could be excessive. This commit opts for a different strategy for holding on to these bit sets and enables them to be lazily allocated. Relates #27179 --- .../index/seqno/LocalCheckpointTracker.java | 106 +++++++++--------- .../seqno/LocalCheckpointTrackerTests.java | 43 +++++-- 2 files changed, 90 insertions(+), 59 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointTracker.java b/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointTracker.java index 5380a3b2b7f..54751e8958a 100644 --- a/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointTracker.java +++ b/core/src/main/java/org/elasticsearch/index/seqno/LocalCheckpointTracker.java @@ -19,12 +19,9 @@ package org.elasticsearch.index.seqno; +import com.carrotsearch.hppc.LongObjectHashMap; import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.common.SuppressForbidden; -import org.elasticsearch.common.settings.Setting; -import org.elasticsearch.index.IndexSettings; - -import java.util.LinkedList; /** * This class generates sequences numbers and keeps track of the so-called "local checkpoint" which is the highest number for which all @@ -33,21 +30,16 @@ import java.util.LinkedList; public class LocalCheckpointTracker { /** - * We keep a bit for each sequence number that is still pending. To optimize allocation, we do so in multiple arrays allocating them on - * demand and cleaning up while completed. This constant controls the size of the arrays. + * We keep a bit for each sequence number that is still pending. To optimize allocation, we do so in multiple sets allocating them on + * demand and cleaning up while completed. This constant controls the size of the sets. */ - static final int BIT_ARRAYS_SIZE = 1024; + static final int BIT_SET_SIZE = 1024; /** - * An ordered list of bit arrays representing pending sequence numbers. The list is "anchored" in {@link #firstProcessedSeqNo} which - * marks the sequence number the fist bit in the first array corresponds to. + * A collection of bit sets representing pending sequence numbers. Each sequence number is mapped to a bit set by dividing by the + * bit set size. */ - final LinkedList processedSeqNo = new LinkedList<>(); - - /** - * The sequence number that the first bit in the first array corresponds to. - */ - long firstProcessedSeqNo; + final LongObjectHashMap processedSeqNo = new LongObjectHashMap<>(); /** * The current local checkpoint, i.e., all sequence numbers no more than this number have been completed. @@ -77,7 +69,6 @@ public class LocalCheckpointTracker { throw new IllegalArgumentException( "max seq. no. must be non-negative or [" + SequenceNumbers.NO_OPS_PERFORMED + "] but was [" + maxSeqNo + "]"); } - firstProcessedSeqNo = localCheckpoint == SequenceNumbers.NO_OPS_PERFORMED ? 0 : localCheckpoint + 1; nextSeqNo = maxSeqNo == SequenceNumbers.NO_OPS_PERFORMED ? 0 : maxSeqNo + 1; checkpoint = localCheckpoint; } @@ -122,7 +113,6 @@ public class LocalCheckpointTracker { assert checkpoint != SequenceNumbers.UNASSIGNED_SEQ_NO; assert checkpoint <= this.checkpoint; processedSeqNo.clear(); - firstProcessedSeqNo = checkpoint + 1; this.checkpoint = checkpoint; } @@ -175,24 +165,28 @@ public class LocalCheckpointTracker { @SuppressForbidden(reason = "Object#notifyAll") private void updateCheckpoint() { assert Thread.holdsLock(this); - assert checkpoint < firstProcessedSeqNo + BIT_ARRAYS_SIZE - 1 : - "checkpoint should be below the end of the first bit set (o.w. current bit set is completed and shouldn't be there)"; - assert getBitSetForSeqNo(checkpoint + 1) == processedSeqNo.getFirst() : - "checkpoint + 1 doesn't point to the first bit set (o.w. current bit set is completed and shouldn't be there)"; assert getBitSetForSeqNo(checkpoint + 1).get(seqNoToBitSetOffset(checkpoint + 1)) : "updateCheckpoint is called but the bit following the checkpoint is not set"; try { // keep it simple for now, get the checkpoint one by one; in the future we can optimize and read words - FixedBitSet current = processedSeqNo.getFirst(); + long bitSetKey = getBitSetKey(checkpoint); + FixedBitSet current = processedSeqNo.get(bitSetKey); + if (current == null) { + // the bit set corresponding to the checkpoint has already been removed, set ourselves up for the next bit set + assert checkpoint % BIT_SET_SIZE == BIT_SET_SIZE - 1; + current = processedSeqNo.get(++bitSetKey); + } do { checkpoint++; - // the checkpoint always falls in the first bit set or just before. If it falls - // on the last bit of the current bit set, we can clean it. - if (checkpoint == firstProcessedSeqNo + BIT_ARRAYS_SIZE - 1) { - processedSeqNo.removeFirst(); - firstProcessedSeqNo += BIT_ARRAYS_SIZE; - assert checkpoint - firstProcessedSeqNo < BIT_ARRAYS_SIZE; - current = processedSeqNo.peekFirst(); + /* + * The checkpoint always falls in the current bit set or we have already cleaned it; if it falls on the last bit of the + * current bit set, we can clean it. + */ + if (checkpoint == lastSeqNoInBitSet(bitSetKey)) { + assert current != null; + final FixedBitSet removed = processedSeqNo.remove(bitSetKey); + assert removed == current; + current = processedSeqNo.get(++bitSetKey); } } while (current != null && current.get(seqNoToBitSetOffset(checkpoint + 1))); } finally { @@ -201,37 +195,45 @@ public class LocalCheckpointTracker { } } - /** - * Return the bit array for the provided sequence number, possibly allocating a new array if needed. - * - * @param seqNo the sequence number to obtain the bit array for - * @return the bit array corresponding to the provided sequence number - */ - private FixedBitSet getBitSetForSeqNo(final long seqNo) { - assert Thread.holdsLock(this); - assert seqNo >= firstProcessedSeqNo : "seqNo: " + seqNo + " firstProcessedSeqNo: " + firstProcessedSeqNo; - final long bitSetOffset = (seqNo - firstProcessedSeqNo) / BIT_ARRAYS_SIZE; - if (bitSetOffset > Integer.MAX_VALUE) { - throw new IndexOutOfBoundsException( - "sequence number too high; got [" + seqNo + "], firstProcessedSeqNo [" + firstProcessedSeqNo + "]"); - } - while (bitSetOffset >= processedSeqNo.size()) { - processedSeqNo.add(new FixedBitSet(BIT_ARRAYS_SIZE)); - } - return processedSeqNo.get((int) bitSetOffset); + private long lastSeqNoInBitSet(final long bitSetKey) { + return (1 + bitSetKey) * BIT_SET_SIZE - 1; } /** - * Obtain the position in the bit array corresponding to the provided sequence number. The bit array corresponding to the sequence - * number can be obtained via {@link #getBitSetForSeqNo(long)}. + * Return the bit set for the provided sequence number, possibly allocating a new set if needed. + * + * @param seqNo the sequence number to obtain the bit set for + * @return the bit set corresponding to the provided sequence number + */ + private long getBitSetKey(final long seqNo) { + assert Thread.holdsLock(this); + return seqNo / BIT_SET_SIZE; + } + + private FixedBitSet getBitSetForSeqNo(final long seqNo) { + assert Thread.holdsLock(this); + final long bitSetKey = getBitSetKey(seqNo); + final int index = processedSeqNo.indexOf(bitSetKey); + final FixedBitSet bitSet; + if (processedSeqNo.indexExists(index)) { + bitSet = processedSeqNo.indexGet(index); + } else { + bitSet = new FixedBitSet(BIT_SET_SIZE); + processedSeqNo.indexInsert(index, bitSetKey, bitSet); + } + return bitSet; + } + + /** + * Obtain the position in the bit set corresponding to the provided sequence number. The bit set corresponding to the sequence number + * can be obtained via {@link #getBitSetForSeqNo(long)}. * * @param seqNo the sequence number to obtain the position for - * @return the position in the bit array corresponding to the provided sequence number + * @return the position in the bit set corresponding to the provided sequence number */ private int seqNoToBitSetOffset(final long seqNo) { assert Thread.holdsLock(this); - assert seqNo >= firstProcessedSeqNo; - return ((int) (seqNo - firstProcessedSeqNo)) % BIT_ARRAYS_SIZE; + return Math.toIntExact(seqNo % BIT_SET_SIZE); } } diff --git a/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointTrackerTests.java b/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointTrackerTests.java index ae167bb59f0..eb62391e0b0 100644 --- a/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointTrackerTests.java +++ b/core/src/test/java/org/elasticsearch/index/seqno/LocalCheckpointTrackerTests.java @@ -19,10 +19,14 @@ package org.elasticsearch.index.seqno; +import com.carrotsearch.hppc.LongObjectHashMap; +import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.Randomness; import org.elasticsearch.common.util.concurrent.AbstractRunnable; import org.elasticsearch.test.ESTestCase; +import org.hamcrest.BaseMatcher; +import org.hamcrest.Description; import org.junit.Before; import java.util.ArrayList; @@ -36,8 +40,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; import java.util.stream.IntStream; -import static org.elasticsearch.index.seqno.LocalCheckpointTracker.BIT_ARRAYS_SIZE; -import static org.hamcrest.Matchers.empty; +import static org.elasticsearch.index.seqno.LocalCheckpointTracker.BIT_SET_SIZE; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.isOneOf; @@ -83,10 +86,19 @@ public class LocalCheckpointTrackerTests extends ESTestCase { assertThat(tracker.getCheckpoint(), equalTo(2L)); } + public void testLazyInitialization() { + /* + * Previously this would allocate the entire chain of bit sets to the one for the sequence number being marked; for very large + * sequence numbers this could lead to excessive memory usage resulting in out of memory errors. + */ + tracker.markSeqNoAsCompleted(randomNonNegativeLong()); + assertThat(tracker.processedSeqNo.size(), equalTo(1)); + } + public void testSimpleOverFlow() { List seqNoList = new ArrayList<>(); final boolean aligned = randomBoolean(); - final int maxOps = BIT_ARRAYS_SIZE * randomIntBetween(1, 5) + (aligned ? 0 : randomIntBetween(1, BIT_ARRAYS_SIZE - 1)); + final int maxOps = BIT_SET_SIZE * randomIntBetween(1, 5) + (aligned ? 0 : randomIntBetween(1, BIT_SET_SIZE - 1)); for (int i = 0; i < maxOps; i++) { seqNoList.add(i); @@ -97,7 +109,9 @@ public class LocalCheckpointTrackerTests extends ESTestCase { } assertThat(tracker.checkpoint, equalTo(maxOps - 1L)); assertThat(tracker.processedSeqNo.size(), equalTo(aligned ? 0 : 1)); - assertThat(tracker.firstProcessedSeqNo, equalTo(((long) maxOps / BIT_ARRAYS_SIZE) * BIT_ARRAYS_SIZE)); + if (aligned == false) { + assertThat(tracker.processedSeqNo.keys().iterator().next().value, equalTo(tracker.checkpoint / BIT_SET_SIZE)); + } } public void testConcurrentPrimary() throws InterruptedException { @@ -138,7 +152,9 @@ public class LocalCheckpointTrackerTests extends ESTestCase { tracker.markSeqNoAsCompleted(unFinishedSeq); assertThat(tracker.getCheckpoint(), equalTo(maxOps - 1L)); assertThat(tracker.processedSeqNo.size(), isOneOf(0, 1)); - assertThat(tracker.firstProcessedSeqNo, equalTo(((long) maxOps / BIT_ARRAYS_SIZE) * BIT_ARRAYS_SIZE)); + if (tracker.processedSeqNo.size() == 1) { + assertThat(tracker.processedSeqNo.keys().iterator().next().value, equalTo(tracker.checkpoint / BIT_SET_SIZE)); + } } public void testConcurrentReplica() throws InterruptedException { @@ -186,7 +202,10 @@ public class LocalCheckpointTrackerTests extends ESTestCase { assertThat(tracker.getCheckpoint(), equalTo(unFinishedSeq - 1L)); tracker.markSeqNoAsCompleted(unFinishedSeq); assertThat(tracker.getCheckpoint(), equalTo(maxOps - 1L)); - assertThat(tracker.firstProcessedSeqNo, equalTo(((long) maxOps / BIT_ARRAYS_SIZE) * BIT_ARRAYS_SIZE)); + assertThat(tracker.processedSeqNo.size(), isOneOf(0, 1)); + if (tracker.processedSeqNo.size() == 1) { + assertThat(tracker.processedSeqNo.keys().iterator().next().value, equalTo(tracker.checkpoint / BIT_SET_SIZE)); + } } public void testWaitForOpsToComplete() throws BrokenBarrierException, InterruptedException { @@ -241,7 +260,17 @@ public class LocalCheckpointTrackerTests extends ESTestCase { tracker.resetCheckpoint(localCheckpoint); assertThat(tracker.getCheckpoint(), equalTo((long) localCheckpoint)); assertThat(tracker.getMaxSeqNo(), equalTo((long) maxSeqNo)); - assertThat(tracker.processedSeqNo, empty()); + assertThat(tracker.processedSeqNo, new BaseMatcher>() { + @Override + public boolean matches(Object item) { + return (item instanceof LongObjectHashMap && ((LongObjectHashMap) item).isEmpty()); + } + + @Override + public void describeTo(Description description) { + description.appendText("empty"); + } + }); assertThat(tracker.generateSeqNo(), equalTo((long) (maxSeqNo + 1))); } }