From 525101b64d9a22ffff9a29c0309444f721ebb763 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Thu, 20 Apr 2017 22:12:20 +0200 Subject: [PATCH 1/8] Query string default field (#24214) Currently any `query_string` query that use a wildcard field with no matching field is rewritten with the `_all` field. For instance: ```` #creating test doc PUT testing/t/1 { "test": { "field_one": "hello", "field_two": "world" } } #searching abc.* (does not exist) -> hit GET testing/t/_search { "query": { "query_string": { "fields": [ "abc.*" ], "query": "hello" } } } ```` This bug first appeared in 5.0 after the query refactoring and impacts only users that use `_all` as default field. Indices created in 6.x will not have this problem since `_all` is deactivated in this version. This change fixes this bug by returning a MatchNoDocsQuery for any term that expand to an empty list of field. --- .../classic/MapperQueryParser.java | 15 ++++++++--- .../index/query/QueryStringQueryBuilder.java | 6 ++++- .../query/QueryStringQueryBuilderTests.java | 26 ++++++++++++++++--- 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java b/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java index 79f522e8c1f..015972c5684 100644 --- a/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java +++ b/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java @@ -57,7 +57,7 @@ import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; - +import java.util.Collections; import static java.util.Collections.unmodifiableMap; import static org.elasticsearch.common.lucene.search.Queries.fixNegativeQueryIfNeeded; @@ -91,7 +91,8 @@ public class MapperQueryParser extends QueryParser { public void reset(QueryParserSettings settings) { this.settings = settings; - if (settings.fieldsAndWeights().isEmpty()) { + if (settings.fieldsAndWeights() == null) { + // this query has no explicit fields to query so we fallback to the default field this.field = settings.defaultField(); } else if (settings.fieldsAndWeights().size() == 1) { this.field = settings.fieldsAndWeights().keySet().iterator().next(); @@ -148,6 +149,11 @@ public class MapperQueryParser extends QueryParser { if (fields != null) { if (fields.size() == 1) { return getFieldQuerySingle(fields.iterator().next(), queryText, quoted); + } else if (fields.isEmpty()) { + // the requested fields do not match any field in the mapping + // happens for wildcard fields only since we cannot expand to a valid field name + // if there is no match in the mappings. + return new MatchNoDocsQuery("empty fields"); } if (settings.useDisMax()) { List queries = new ArrayList<>(); @@ -721,7 +727,7 @@ public class MapperQueryParser extends QueryParser { } private Query applyBoost(String field, Query q) { - Float fieldBoost = settings.fieldsAndWeights().get(field); + Float fieldBoost = settings.fieldsAndWeights() == null ? null : settings.fieldsAndWeights().get(field); if (fieldBoost != null && fieldBoost != 1f) { return new BoostQuery(q, fieldBoost); } @@ -780,7 +786,8 @@ public class MapperQueryParser extends QueryParser { if (field != null) { fields = context.simpleMatchToIndexNames(field); } else { - fields = settings.fieldsAndWeights().keySet(); + Map fieldsAndWeights = settings.fieldsAndWeights(); + fields = fieldsAndWeights == null ? Collections.emptyList() : settings.fieldsAndWeights().keySet(); } return fields; } diff --git a/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java index ca716372571..fd6f33e27ba 100644 --- a/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/QueryStringQueryBuilder.java @@ -981,7 +981,11 @@ public class QueryStringQueryBuilder extends AbstractQueryBuilder 0); + QueryStringQueryBuilder queryStringQueryBuilder = + new QueryStringQueryBuilder("foo bar").field("invalid*"); + Query query = queryStringQueryBuilder.toQuery(createShardContext()); + Query expectedQuery = new BooleanQuery.Builder() + .add(new MatchNoDocsQuery("empty fields"), Occur.SHOULD) + .add(new MatchNoDocsQuery("empty fields"), Occur.SHOULD) + .build(); + assertThat(expectedQuery, equalTo(query)); + + queryStringQueryBuilder = + new QueryStringQueryBuilder("field:foo bar").field("invalid*"); + query = queryStringQueryBuilder.toQuery(createShardContext()); + expectedQuery = new BooleanQuery.Builder() + .add(new TermQuery(new Term("field", "foo")), Occur.SHOULD) + .add(new MatchNoDocsQuery("empty fields"), Occur.SHOULD) + .build(); + assertThat(expectedQuery, equalTo(query)); + + + } + public void testToQuerySplitOnWhitespace() throws IOException { assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); // splitOnWhitespace=false From 4a45579506513f3a88cd51d79d2ff0c342021f93 Mon Sep 17 00:00:00 2001 From: Fabien Baligand Date: Fri, 21 Apr 2017 00:53:28 +0200 Subject: [PATCH 2/8] token_count type : add an option to count tokens (fix #23227) (#24175) Add option "enable_position_increments" with default value true. If option is set to false, indexed value is the number of tokens (not position increments count) --- .../index/mapper/TokenCountFieldMapper.java | 53 +++++++++++++++---- .../TokenCountFieldMapperIntegrationIT.java | 35 ++++++++---- .../mapper/TokenCountFieldMapperTests.java | 52 ++++++++++++------ .../mapping/types/token-count.asciidoc | 12 ++--- 4 files changed, 108 insertions(+), 44 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/mapper/TokenCountFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/TokenCountFieldMapper.java index a2d40cd08bd..2ed6658e87c 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/TokenCountFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/TokenCountFieldMapper.java @@ -22,10 +22,8 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; -import org.elasticsearch.Version; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.analysis.NamedAnalyzer; @@ -36,6 +34,7 @@ import java.util.List; import java.util.Map; import static org.elasticsearch.common.xcontent.support.XContentMapValues.nodeIntegerValue; +import static org.elasticsearch.common.xcontent.support.XContentMapValues.nodeBooleanValue; import static org.elasticsearch.index.mapper.TypeParsers.parseField; /** @@ -47,10 +46,12 @@ public class TokenCountFieldMapper extends FieldMapper { public static class Defaults { public static final MappedFieldType FIELD_TYPE = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.INTEGER); + public static final boolean DEFAULT_POSITION_INCREMENTS = true; } public static class Builder extends FieldMapper.Builder { private NamedAnalyzer analyzer; + private boolean enablePositionIncrements = Defaults.DEFAULT_POSITION_INCREMENTS; public Builder(String name) { super(name, Defaults.FIELD_TYPE, Defaults.FIELD_TYPE); @@ -66,18 +67,26 @@ public class TokenCountFieldMapper extends FieldMapper { return analyzer; } + public Builder enablePositionIncrements(boolean enablePositionIncrements) { + this.enablePositionIncrements = enablePositionIncrements; + return this; + } + + public boolean enablePositionIncrements() { + return enablePositionIncrements; + } + @Override public TokenCountFieldMapper build(BuilderContext context) { setupFieldType(context); return new TokenCountFieldMapper(name, fieldType, defaultFieldType, - context.indexSettings(), analyzer, multiFieldsBuilder.build(this, context), copyTo); + context.indexSettings(), analyzer, enablePositionIncrements, multiFieldsBuilder.build(this, context), copyTo); } } public static class TypeParser implements Mapper.TypeParser { @Override - @SuppressWarnings("unchecked") - public Mapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { + public Mapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { TokenCountFieldMapper.Builder builder = new TokenCountFieldMapper.Builder(name); for (Iterator> iterator = node.entrySet().iterator(); iterator.hasNext();) { Map.Entry entry = iterator.next(); @@ -93,6 +102,9 @@ public class TokenCountFieldMapper extends FieldMapper { } builder.analyzer(analyzer); iterator.remove(); + } else if (propName.equals("enable_position_increments")) { + builder.enablePositionIncrements(nodeBooleanValue(propNode)); + iterator.remove(); } } parseField(builder, name, node, parserContext); @@ -104,11 +116,13 @@ public class TokenCountFieldMapper extends FieldMapper { } private NamedAnalyzer analyzer; + private boolean enablePositionIncrements; protected TokenCountFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType, - Settings indexSettings, NamedAnalyzer analyzer, MultiFields multiFields, CopyTo copyTo) { + Settings indexSettings, NamedAnalyzer analyzer, boolean enablePositionIncrements, MultiFields multiFields, CopyTo copyTo) { super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo); this.analyzer = analyzer; + this.enablePositionIncrements = enablePositionIncrements; } @Override @@ -124,7 +138,7 @@ public class TokenCountFieldMapper extends FieldMapper { if (value == null) { tokenCount = (Integer) fieldType().nullValue(); } else { - tokenCount = countPositions(analyzer, name(), value); + tokenCount = countPositions(analyzer, name(), value, enablePositionIncrements); } boolean indexed = fieldType().indexOptions() != IndexOptions.NONE; @@ -138,19 +152,26 @@ public class TokenCountFieldMapper extends FieldMapper { * @param analyzer analyzer to create token stream * @param fieldName field name to pass to analyzer * @param fieldValue field value to pass to analyzer + * @param enablePositionIncrements should we count position increments ? * @return number of position increments in a token stream * @throws IOException if tokenStream throws it */ - static int countPositions(Analyzer analyzer, String fieldName, String fieldValue) throws IOException { + static int countPositions(Analyzer analyzer, String fieldName, String fieldValue, boolean enablePositionIncrements) throws IOException { try (TokenStream tokenStream = analyzer.tokenStream(fieldName, fieldValue)) { int count = 0; PositionIncrementAttribute position = tokenStream.addAttribute(PositionIncrementAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { - count += position.getPositionIncrement(); + if (enablePositionIncrements) { + count += position.getPositionIncrement(); + } else { + count += Math.min(1, position.getPositionIncrement()); + } } tokenStream.end(); - count += position.getPositionIncrement(); + if (enablePositionIncrements) { + count += position.getPositionIncrement(); + } return count; } } @@ -163,6 +184,14 @@ public class TokenCountFieldMapper extends FieldMapper { return analyzer.name(); } + /** + * Indicates if position increments are counted. + * @return true if position increments are counted + */ + public boolean enablePositionIncrements() { + return enablePositionIncrements; + } + @Override protected String contentType() { return CONTENT_TYPE; @@ -172,12 +201,16 @@ public class TokenCountFieldMapper extends FieldMapper { protected void doMerge(Mapper mergeWith, boolean updateAllTypes) { super.doMerge(mergeWith, updateAllTypes); this.analyzer = ((TokenCountFieldMapper) mergeWith).analyzer; + this.enablePositionIncrements = ((TokenCountFieldMapper) mergeWith).enablePositionIncrements; } @Override protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, Params params) throws IOException { super.doXContentBody(builder, includeDefaults, params); builder.field("analyzer", analyzer()); + if (includeDefaults || enablePositionIncrements() != Defaults.DEFAULT_POSITION_INCREMENTS) { + builder.field("enable_position_increments", enablePositionIncrements()); + } } } diff --git a/core/src/test/java/org/elasticsearch/index/mapper/TokenCountFieldMapperIntegrationIT.java b/core/src/test/java/org/elasticsearch/index/mapper/TokenCountFieldMapperIntegrationIT.java index 48307b6349c..75b588df85a 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/TokenCountFieldMapperIntegrationIT.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/TokenCountFieldMapperIntegrationIT.java @@ -131,6 +131,12 @@ public class TokenCountFieldMapperIntegrationIT extends ESIntegTestCase { .field("analyzer", "standard") .field("doc_values", true) .endObject() + .startObject("token_count_without_position_increments") + .field("type", "token_count") + .field("analyzer", "english") + .field("enable_position_increments", false) + .field("store", true) + .endObject() .endObject() .endObject() .endObject() @@ -169,6 +175,7 @@ public class TokenCountFieldMapperIntegrationIT extends ESIntegTestCase { private SearchRequestBuilder prepareSearch() { SearchRequestBuilder request = client().prepareSearch("test").setTypes("test"); request.addStoredField("foo.token_count"); + request.addStoredField("foo.token_count_without_position_increments"); if (loadCountedFields) { request.addStoredField("foo"); } @@ -186,32 +193,38 @@ public class TokenCountFieldMapperIntegrationIT extends ESIntegTestCase { for (SearchHit hit : result.getHits()) { String id = hit.getId(); if (id.equals("single")) { - assertSearchHit(hit, 4); + assertSearchHit(hit, new int[]{4}, new int[]{4}); } else if (id.equals("bulk1")) { - assertSearchHit(hit, 3); + assertSearchHit(hit, new int[]{3}, new int[]{3}); } else if (id.equals("bulk2")) { - assertSearchHit(hit, 5); + assertSearchHit(hit, new int[]{5}, new int[]{4}); } else if (id.equals("multi")) { - assertSearchHit(hit, 2, 7); + assertSearchHit(hit, new int[]{2, 7}, new int[]{2, 7}); } else if (id.equals("multibulk1")) { - assertSearchHit(hit, 1, 8); + assertSearchHit(hit, new int[]{1, 8}, new int[]{1, 8}); } else if (id.equals("multibulk2")) { - assertSearchHit(hit, 6, 10); + assertSearchHit(hit, new int[]{6, 10}, new int[]{3, 9}); } else { throw new ElasticsearchException("Unexpected response!"); } } } - private void assertSearchHit(SearchHit hit, int... termCounts) { + private void assertSearchHit(SearchHit hit, int[] standardTermCounts, int[] englishTermCounts) { assertThat(hit.field("foo.token_count"), not(nullValue())); - assertThat(hit.field("foo.token_count").getValues().size(), equalTo(termCounts.length)); - for (int i = 0; i < termCounts.length; i++) { - assertThat((Integer) hit.field("foo.token_count").getValues().get(i), equalTo(termCounts[i])); + assertThat(hit.field("foo.token_count").getValues().size(), equalTo(standardTermCounts.length)); + for (int i = 0; i < standardTermCounts.length; i++) { + assertThat((Integer) hit.field("foo.token_count").getValues().get(i), equalTo(standardTermCounts[i])); + } + + assertThat(hit.field("foo.token_count_without_position_increments"), not(nullValue())); + assertThat(hit.field("foo.token_count_without_position_increments").getValues().size(), equalTo(englishTermCounts.length)); + for (int i = 0; i < englishTermCounts.length; i++) { + assertThat((Integer) hit.field("foo.token_count_without_position_increments").getValues().get(i), equalTo(englishTermCounts[i])); } if (loadCountedFields && storeCountedFields) { - assertThat(hit.field("foo").getValues().size(), equalTo(termCounts.length)); + assertThat(hit.field("foo").getValues().size(), equalTo(standardTermCounts.length)); } } } diff --git a/core/src/test/java/org/elasticsearch/index/mapper/TokenCountFieldMapperTests.java b/core/src/test/java/org/elasticsearch/index/mapper/TokenCountFieldMapperTests.java index 02128a4254a..0f976e12f39 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/TokenCountFieldMapperTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/TokenCountFieldMapperTests.java @@ -24,23 +24,18 @@ import org.apache.lucene.analysis.CannedTokenStream; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; -import org.elasticsearch.Version; -import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.compress.CompressedXContent; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.IndexService; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; import org.elasticsearch.test.InternalSettingsPlugin; -import org.elasticsearch.test.VersionUtils; import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.Collections; -import static com.carrotsearch.randomizedtesting.RandomizedTest.getRandom; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; @@ -80,26 +75,49 @@ public class TokenCountFieldMapperTests extends ESSingleNodeTestCase { assertThat(((TokenCountFieldMapper) stage2.mappers().smartNameFieldMapper("tc")).analyzer(), equalTo("standard")); } - public void testCountPositions() throws IOException { - // We're looking to make sure that we: - Token t1 = new Token(); // Don't count tokens without an increment + /** + * When position increments are counted, we're looking to make sure that we: + - don't count tokens without an increment + - count normal tokens with one increment + - count funny tokens with more than one increment + - count the final token increments on the rare token streams that have them + */ + public void testCountPositionsWithIncrements() throws IOException { + Analyzer analyzer = createMockAnalyzer(); + assertThat(TokenCountFieldMapper.countPositions(analyzer, "", "", true), equalTo(7)); + } + + /** + * When position increments are not counted (only positions are counted), we're looking to make sure that we: + - don't count tokens without an increment + - count normal tokens with one increment + - count funny tokens with more than one increment as only one + - don't count the final token increments on the rare token streams that have them + */ + public void testCountPositionsWithoutIncrements() throws IOException { + Analyzer analyzer = createMockAnalyzer(); + assertThat(TokenCountFieldMapper.countPositions(analyzer, "", "", false), equalTo(2)); + } + + private Analyzer createMockAnalyzer() { + Token t1 = new Token(); // Token without an increment t1.setPositionIncrement(0); Token t2 = new Token(); - t2.setPositionIncrement(1); // Count normal tokens with one increment + t2.setPositionIncrement(1); // Normal token with one increment Token t3 = new Token(); - t2.setPositionIncrement(2); // Count funny tokens with more than one increment - int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them + t2.setPositionIncrement(2); // Funny token with more than one increment + int finalTokenIncrement = 4; // Final token increment Token[] tokens = new Token[] {t1, t2, t3}; Collections.shuffle(Arrays.asList(tokens), random()); final TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens); // TODO: we have no CannedAnalyzer? Analyzer analyzer = new Analyzer() { - @Override - public TokenStreamComponents createComponents(String fieldName) { - return new TokenStreamComponents(new MockTokenizer(), tokenStream); - } - }; - assertThat(TokenCountFieldMapper.countPositions(analyzer, "", ""), equalTo(7)); + @Override + public TokenStreamComponents createComponents(String fieldName) { + return new TokenStreamComponents(new MockTokenizer(), tokenStream); + } + }; + return analyzer; } @Override diff --git a/docs/reference/mapping/types/token-count.asciidoc b/docs/reference/mapping/types/token-count.asciidoc index 704a94de0f7..60a95ec1d40 100644 --- a/docs/reference/mapping/types/token-count.asciidoc +++ b/docs/reference/mapping/types/token-count.asciidoc @@ -48,12 +48,6 @@ GET my_index/_search <2> The `name.length` field is a `token_count` <> which will index the number of tokens in the `name` field. <3> This query matches only the document containing `Rachel Alice Williams`, as it contains three tokens. -[NOTE] -=================================================================== -Technically the `token_count` type sums position increments rather than -counting tokens. This means that even if the analyzer filters out stop -words they are included in the count. -=================================================================== [[token-count-params]] ==== Parameters for `token_count` fields @@ -68,6 +62,12 @@ The following parameters are accepted by `token_count` fields: value. Required. For best performance, use an analyzer without token filters. +`enable_position_increments`:: + +Indicates if position increments should be counted. +Set to `false` if you don't want to count tokens removed by analyzer filters (like <>). +Defaults to `true`. + <>:: Mapping field-level query time boosting. Accepts a floating point number, defaults From a4365971a09337bf4d639bbabb2377137c576167 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 21 Apr 2017 02:56:26 +0200 Subject: [PATCH 3/8] [TEST] make sure that the random query_string query generator defines a default_field or a list of fields --- .../apache/lucene/queryparser/classic/MapperQueryParser.java | 2 +- .../index/query/QueryStringQueryBuilderTests.java | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java b/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java index 015972c5684..947c7cf3ccd 100644 --- a/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java +++ b/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java @@ -787,7 +787,7 @@ public class MapperQueryParser extends QueryParser { fields = context.simpleMatchToIndexNames(field); } else { Map fieldsAndWeights = settings.fieldsAndWeights(); - fields = fieldsAndWeights == null ? Collections.emptyList() : settings.fieldsAndWeights().keySet(); + fields = fieldsAndWeights == null ? Collections.emptyList() : fieldsAndWeights.keySet(); } return fields; } diff --git a/core/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java b/core/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java index 5b9990c81cd..f478b5437b4 100644 --- a/core/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java @@ -85,8 +85,7 @@ public class QueryStringQueryBuilderTests extends AbstractQueryTestCase Date: Fri, 21 Apr 2017 10:32:33 +0200 Subject: [PATCH 4/8] Speed up parsing of large `terms` queries. (#24210) The addition of the normalization feature on keywords slowed down the parsing of large `terms` queries since all terms now have to go through normalization. However this can be avoided in the default case that the analyzer is a `keyword` analyzer since all that normalization will do is a UTF8 conversion. Using `Analyzer.normalize` for that is a bit overkill and could be skipped. --- .../elasticsearch/index/mapper/KeywordFieldMapper.java | 9 +++++++++ .../index/mapper/KeywordFieldTypeTests.java | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/core/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index f988a68d5ef..3e09199abca 100644 --- a/core/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/core/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -236,6 +236,15 @@ public final class KeywordFieldMapper extends FieldMapper { @Override protected BytesRef indexedValueForSearch(Object value) { + if (searchAnalyzer() == Lucene.KEYWORD_ANALYZER) { + // keyword analyzer with the default attribute source which encodes terms using UTF8 + // in that case we skip normalization, which may be slow if there many terms need to + // parse (eg. large terms query) since Analyzer.normalize involves things like creating + // attributes through reflection + // This if statement will be used whenever a normalizer is NOT configured + return super.indexedValueForSearch(value); + } + if (value == null) { return null; } diff --git a/core/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java b/core/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java index 5c418b7ce26..809ceb58310 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java @@ -150,4 +150,13 @@ public class KeywordFieldTypeTests extends FieldTypeTestCase { () -> ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true)); assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); } + + public void testNormalizeQueries() { + MappedFieldType ft = createDefaultFieldType(); + ft.setName("field"); + ft.setSearchAnalyzer(Lucene.KEYWORD_ANALYZER); + assertEquals(new TermQuery(new Term("field", new BytesRef("FOO"))), ft.termQuery("FOO", null)); + ft.setSearchAnalyzer(Lucene.STANDARD_ANALYZER); + assertEquals(new TermQuery(new Term("field", new BytesRef("foo"))), ft.termQuery("FOO", null)); + } } From 81b64ed587817f9f9398dc38d0db2c3fac7b5af7 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 21 Apr 2017 10:33:02 +0200 Subject: [PATCH 5/8] IndicesQueryCache should delegate the scorerSupplier method. (#24209) Otherwise the range improvements that we did on range queries would not work. This is similar to https://issues.apache.org/jira/browse/LUCENE-7749. --- .../indices/IndicesQueryCache.java | 7 ++ .../indices/IndicesQueryCacheTests.java | 79 ++++++++++++++++++- 2 files changed, 84 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/indices/IndicesQueryCache.java b/core/src/main/java/org/elasticsearch/indices/IndicesQueryCache.java index 3fda3d3f806..479208a5038 100644 --- a/core/src/main/java/org/elasticsearch/indices/IndicesQueryCache.java +++ b/core/src/main/java/org/elasticsearch/indices/IndicesQueryCache.java @@ -28,6 +28,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryCache; import org.apache.lucene.search.QueryCachingPolicy; import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.ScorerSupplier; import org.apache.lucene.search.Weight; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.lucene.ShardCoreKeyMap; @@ -145,6 +146,12 @@ public class IndicesQueryCache extends AbstractComponent implements QueryCache, return in.scorer(context); } + @Override + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + shardKeyMap.add(context.reader()); + return in.scorerSupplier(context); + } + @Override public BulkScorer bulkScorer(LeafReaderContext context) throws IOException { shardKeyMap.add(context.reader()); diff --git a/core/src/test/java/org/elasticsearch/indices/IndicesQueryCacheTests.java b/core/src/test/java/org/elasticsearch/indices/IndicesQueryCacheTests.java index 10f098787c0..24cbed2d4bc 100644 --- a/core/src/test/java/org/elasticsearch/indices/IndicesQueryCacheTests.java +++ b/core/src/test/java/org/elasticsearch/indices/IndicesQueryCacheTests.java @@ -23,25 +23,28 @@ import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; import org.apache.lucene.search.ConstantScoreScorer; import org.apache.lucene.search.ConstantScoreWeight; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryCachingPolicy; import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.ScorerSupplier; import org.apache.lucene.search.Weight; import org.apache.lucene.store.Directory; import org.apache.lucene.util.IOUtils; import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.IndexModule; import org.elasticsearch.index.cache.query.QueryCacheStats; import org.elasticsearch.index.shard.ShardId; -import org.elasticsearch.indices.IndicesQueryCache; import org.elasticsearch.test.ESTestCase; import java.io.IOException; +import java.util.Set; public class IndicesQueryCacheTests extends ESTestCase { @@ -328,4 +331,76 @@ public class IndicesQueryCacheTests extends ESTestCase { cache.close(); // this triggers some assertions } + private static class DummyWeight extends Weight { + + private final Weight weight; + private boolean scorerCalled; + private boolean scorerSupplierCalled; + + DummyWeight(Weight weight) { + super(weight.getQuery()); + this.weight = weight; + } + + @Override + public void extractTerms(Set terms) { + weight.extractTerms(terms); + } + + @Override + public Explanation explain(LeafReaderContext context, int doc) throws IOException { + return weight.explain(context, doc); + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + scorerCalled = true; + return weight.scorer(context); + } + + @Override + public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { + scorerSupplierCalled = true; + return weight.scorerSupplier(context); + } + + } + + public void testDelegatesScorerSupplier() throws Exception { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); + w.addDocument(new Document()); + DirectoryReader r = DirectoryReader.open(w); + w.close(); + ShardId shard = new ShardId("index", "_na_", 0); + r = ElasticsearchDirectoryReader.wrap(r, shard); + IndexSearcher s = new IndexSearcher(r); + s.setQueryCachingPolicy(new QueryCachingPolicy() { + @Override + public boolean shouldCache(Query query) throws IOException { + return false; // never cache + } + @Override + public void onUse(Query query) {} + }); + + Settings settings = Settings.builder() + .put(IndicesQueryCache.INDICES_CACHE_QUERY_COUNT_SETTING.getKey(), 10) + .put(IndicesQueryCache.INDICES_QUERIES_CACHE_ALL_SEGMENTS_SETTING.getKey(), true) + .build(); + IndicesQueryCache cache = new IndicesQueryCache(settings); + s.setQueryCache(cache); + Query query = new MatchAllDocsQuery(); + final DummyWeight weight = new DummyWeight(s.createNormalizedWeight(query, false)); + final Weight cached = cache.doCache(weight, s.getQueryCachingPolicy()); + assertNotSame(weight, cached); + assertFalse(weight.scorerCalled); + assertFalse(weight.scorerSupplierCalled); + cached.scorerSupplier(s.getIndexReader().leaves().get(0)); + assertFalse(weight.scorerCalled); + assertTrue(weight.scorerSupplierCalled); + IOUtils.close(r, dir); + cache.onClose(shard); + cache.close(); + } } From c8ad26edc957ef30a8166df8aef9b241fdfcfb7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Fri, 21 Apr 2017 10:38:09 +0200 Subject: [PATCH 6/8] Tests: Extend InternalStatsTests (#24212) Currently we don't test for count = 0 which will make a difference when adding tests for parsing for the high level rest client. Also min/max/sum should also be tested with negative values and on a larger range. --- .../aggregations/metrics/InternalStatsTests.java | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/InternalStatsTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/InternalStatsTests.java index db64bb8c65c..c99d208581c 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/InternalStatsTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/InternalStatsTests.java @@ -32,13 +32,12 @@ public class InternalStatsTests extends InternalAggregationTestCase pipelineAggregators, Map metaData) { - long count = randomIntBetween(1, 50); - double[] minMax = new double[2]; - minMax[0] = randomDouble(); - minMax[0] = randomDouble(); - double sum = randomDoubleBetween(0, 100, true); - return new InternalStats(name, count, sum, minMax[0], minMax[1], DocValueFormat.RAW, - pipelineAggregators, Collections.emptyMap()); + long count = frequently() ? randomIntBetween(1, Integer.MAX_VALUE) : 0; + double min = randomDoubleBetween(-1000000, 1000000, true); + double max = randomDoubleBetween(-1000000, 1000000, true); + double sum = randomDoubleBetween(-1000000, 1000000, true); + DocValueFormat format = randomNumericDocValueFormat(); + return new InternalStats(name, count, sum, min, max, format, pipelineAggregators, Collections.emptyMap()); } @Override @@ -58,7 +57,7 @@ public class InternalStatsTests extends InternalAggregationTestCase Date: Fri, 21 Apr 2017 10:38:36 +0200 Subject: [PATCH 7/8] ESIntegTestCase.indexRandom should not introduce types. (#24202) Since we plan on removing types, `indexRandom` should not introduce new types. This commit refactors `indexRandom` to reuse existing types. --- .../elasticsearch/test/ESIntegTestCase.java | 31 +++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java index 6d15a5e164e..7c8d8cd1a55 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java @@ -1338,9 +1338,6 @@ public abstract class ESIntegTestCase extends ESTestCase { indexRandom(forceRefresh, dummyDocuments, Arrays.asList(builders)); } - - private static final String RANDOM_BOGUS_TYPE = "RANDOM_BOGUS_TYPE______"; - /** * Indexes the given {@link IndexRequestBuilder} instances randomly. It shuffles the given builders and either * indexes them in a blocking or async fashion. This is very useful to catch problems that relate to internal document @@ -1388,31 +1385,33 @@ public abstract class ESIntegTestCase extends ESTestCase { * @param builders the documents to index. */ public void indexRandom(boolean forceRefresh, boolean dummyDocuments, boolean maybeFlush, List builders) throws InterruptedException, ExecutionException { - Random random = random(); - Set indicesSet = new HashSet<>(); + Map> indicesAndTypes = new HashMap<>(); for (IndexRequestBuilder builder : builders) { - indicesSet.add(builder.request().index()); + final Set types = indicesAndTypes.computeIfAbsent(builder.request().index(), index -> new HashSet<>()); + types.add(builder.request().type()); } - Set> bogusIds = new HashSet<>(); + Set> bogusIds = new HashSet<>(); // (index, type, id) if (random.nextBoolean() && !builders.isEmpty() && dummyDocuments) { builders = new ArrayList<>(builders); - final String[] indices = indicesSet.toArray(new String[indicesSet.size()]); // inject some bogus docs final int numBogusDocs = scaledRandomIntBetween(1, builders.size() * 2); final int unicodeLen = between(1, 10); for (int i = 0; i < numBogusDocs; i++) { - String id = randomRealisticUnicodeOfLength(unicodeLen) + Integer.toString(dummmyDocIdGenerator.incrementAndGet()); - String index = RandomPicks.randomFrom(random, indices); - bogusIds.add(new Tuple<>(index, id)); - builders.add(client().prepareIndex(index, RANDOM_BOGUS_TYPE, id).setSource("{}", XContentType.JSON)); + String id = "bogus_doc_" + randomRealisticUnicodeOfLength(unicodeLen) + Integer.toString(dummmyDocIdGenerator.incrementAndGet()); + Map.Entry> indexAndTypes = RandomPicks.randomFrom(random, indicesAndTypes.entrySet()); + String index = indexAndTypes.getKey(); + String type = RandomPicks.randomFrom(random, indexAndTypes.getValue()); + bogusIds.add(Arrays.asList(index, type, id)); + // We configure a routing key in case the mapping requires it + builders.add(client().prepareIndex(index, type, id).setSource("{}", XContentType.JSON).setRouting(id)); } } - final String[] indices = indicesSet.toArray(new String[indicesSet.size()]); Collections.shuffle(builders, random()); final CopyOnWriteArrayList> errors = new CopyOnWriteArrayList<>(); List inFlightAsyncOperations = new ArrayList<>(); // If you are indexing just a few documents then frequently do it one at a time. If many then frequently in bulk. + final String[] indices = indicesAndTypes.keySet().toArray(new String[0]); if (builders.size() < FREQUENT_BULK_THRESHOLD ? frequently() : builders.size() < ALWAYS_BULK_THRESHOLD ? rarely() : false) { if (frequently()) { logger.info("Index [{}] docs async: [{}] bulk: [{}]", builders.size(), true, false); @@ -1454,10 +1453,10 @@ public abstract class ESIntegTestCase extends ESTestCase { assertThat(actualErrors, emptyIterable()); if (!bogusIds.isEmpty()) { // delete the bogus types again - it might trigger merges or at least holes in the segments and enforces deleted docs! - for (Tuple doc : bogusIds) { - assertEquals("failed to delete a dummy doc [" + doc.v1() + "][" + doc.v2() + "]", + for (List doc : bogusIds) { + assertEquals("failed to delete a dummy doc [" + doc.get(0) + "][" + doc.get(2) + "]", DocWriteResponse.Result.DELETED, - client().prepareDelete(doc.v1(), RANDOM_BOGUS_TYPE, doc.v2()).get().getResult()); + client().prepareDelete(doc.get(0), doc.get(1), doc.get(2)).setRouting(doc.get(2)).get().getResult()); } } if (forceRefresh) { From 3c7c4bc824520a42c9afcf29169e23540e02bf24 Mon Sep 17 00:00:00 2001 From: Colin Goodheart-Smithe Date: Fri, 21 Apr 2017 09:50:30 +0100 Subject: [PATCH 8/8] Adds declareNamedObjects methods to ConstructingObjectParser (#24219) * Adds declareNamedObjects methods to ConstructingObjectParser * Addresses review comments --- .../common/xcontent/AbstractObjectParser.java | 90 +++++++++ .../xcontent/ConstructingObjectParser.java | 100 +++++++++- .../common/xcontent/ObjectParser.java | 57 +----- .../ConstructingObjectParserTests.java | 186 ++++++++++++++++++ 4 files changed, 375 insertions(+), 58 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/common/xcontent/AbstractObjectParser.java b/core/src/main/java/org/elasticsearch/common/xcontent/AbstractObjectParser.java index 91acb267056..6c0ce35b804 100644 --- a/core/src/main/java/org/elasticsearch/common/xcontent/AbstractObjectParser.java +++ b/core/src/main/java/org/elasticsearch/common/xcontent/AbstractObjectParser.java @@ -22,6 +22,7 @@ package org.elasticsearch.common.xcontent; import org.elasticsearch.common.CheckedFunction; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.xcontent.ObjectParser.NamedObjectParser; import org.elasticsearch.common.xcontent.ObjectParser.ValueType; import org.elasticsearch.common.xcontent.json.JsonXContent; @@ -30,6 +31,7 @@ import java.util.ArrayList; import java.util.List; import java.util.function.BiConsumer; import java.util.function.BiFunction; +import java.util.function.Consumer; /** * Superclass for {@link ObjectParser} and {@link ConstructingObjectParser}. Defines most of the "declare" methods so they can be shared. @@ -44,6 +46,94 @@ public abstract class AbstractObjectParser public abstract void declareField(BiConsumer consumer, ContextParser parser, ParseField parseField, ValueType type); + /** + * Declares named objects in the style of aggregations. These are named + * inside and object like this: + * + *
+     * 
+     * {
+     *   "aggregations": {
+     *     "name_1": { "aggregation_type": {} },
+     *     "name_2": { "aggregation_type": {} },
+     *     "name_3": { "aggregation_type": {} }
+     *     }
+     *   }
+     * }
+     * 
+     * 
+ * + * Unlike the other version of this method, "ordered" mode (arrays of + * objects) is not supported. + * + * See NamedObjectHolder in ObjectParserTests for examples of how to invoke + * this. + * + * @param consumer + * sets the values once they have been parsed + * @param namedObjectParser + * parses each named object + * @param parseField + * the field to parse + */ + public abstract void declareNamedObjects(BiConsumer> consumer, NamedObjectParser namedObjectParser, + ParseField parseField); + + /** + * Declares named objects in the style of highlighting's field element. + * These are usually named inside and object like this: + * + *
+     * 
+     * {
+     *   "highlight": {
+     *     "fields": {        <------ this one
+     *       "title": {},
+     *       "body": {},
+     *       "category": {}
+     *     }
+     *   }
+     * }
+     * 
+     * 
+ * + * but, when order is important, some may be written this way: + * + *
+     * 
+     * {
+     *   "highlight": {
+     *     "fields": [        <------ this one
+     *       {"title": {}},
+     *       {"body": {}},
+     *       {"category": {}}
+     *     ]
+     *   }
+     * }
+     * 
+     * 
+ * + * This is because json doesn't enforce ordering. Elasticsearch reads it in + * the order sent but tools that generate json are free to put object + * members in an unordered Map, jumbling them. Thus, if you care about order + * you can send the object in the second way. + * + * See NamedObjectHolder in ObjectParserTests for examples of how to invoke + * this. + * + * @param consumer + * sets the values once they have been parsed + * @param namedObjectParser + * parses each named object + * @param orderedModeCallback + * called when the named object is parsed using the "ordered" + * mode (the array of objects) + * @param parseField + * the field to parse + */ + public abstract void declareNamedObjects(BiConsumer> consumer, NamedObjectParser namedObjectParser, + Consumer orderedModeCallback, ParseField parseField); + public void declareField(BiConsumer consumer, CheckedFunction parser, ParseField parseField, ValueType type) { if (parser == null) { diff --git a/core/src/main/java/org/elasticsearch/common/xcontent/ConstructingObjectParser.java b/core/src/main/java/org/elasticsearch/common/xcontent/ConstructingObjectParser.java index 82ee94550c1..95e6d2b97a6 100644 --- a/core/src/main/java/org/elasticsearch/common/xcontent/ConstructingObjectParser.java +++ b/core/src/main/java/org/elasticsearch/common/xcontent/ConstructingObjectParser.java @@ -21,6 +21,7 @@ package org.elasticsearch.common.xcontent; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParsingException; +import org.elasticsearch.common.xcontent.ObjectParser.NamedObjectParser; import org.elasticsearch.common.xcontent.ObjectParser.ValueType; import java.io.IOException; @@ -77,14 +78,14 @@ public final class ConstructingObjectParser extends AbstractObje /** * Consumer that marks a field as a required constructor argument instead of a real object field. */ - private static final BiConsumer REQUIRED_CONSTRUCTOR_ARG_MARKER = (a, b) -> { + private static final BiConsumer REQUIRED_CONSTRUCTOR_ARG_MARKER = (a, b) -> { throw new UnsupportedOperationException("I am just a marker I should never be called."); }; /** * Consumer that marks a field as an optional constructor argument instead of a real object field. */ - private static final BiConsumer OPTIONAL_CONSTRUCTOR_ARG_MARKER = (a, b) -> { + private static final BiConsumer OPTIONAL_CONSTRUCTOR_ARG_MARKER = (a, b) -> { throw new UnsupportedOperationException("I am just a marker I should never be called."); }; @@ -189,7 +190,7 @@ public final class ConstructingObjectParser extends AbstractObje if (consumer == REQUIRED_CONSTRUCTOR_ARG_MARKER || consumer == OPTIONAL_CONSTRUCTOR_ARG_MARKER) { /* - * Constructor arguments are detected by this "marker" consumer. It keeps the API looking clean even if it is a bit sleezy. We + * Constructor arguments are detected by these "marker" consumers. It keeps the API looking clean even if it is a bit sleezy. We * then build a new consumer directly against the object parser that triggers the "constructor arg just arrived behavior" of the * parser. Conveniently, we can close over the position of the constructor in the argument list so we don't need to do any fancy * or expensive lookups whenever the constructor args come in. @@ -204,6 +205,91 @@ public final class ConstructingObjectParser extends AbstractObje } } + @Override + public void declareNamedObjects(BiConsumer> consumer, NamedObjectParser namedObjectParser, + ParseField parseField) { + if (consumer == null) { + throw new IllegalArgumentException("[consumer] is required"); + } + if (namedObjectParser == null) { + throw new IllegalArgumentException("[parser] is required"); + } + if (parseField == null) { + throw new IllegalArgumentException("[parseField] is required"); + } + + if (consumer == REQUIRED_CONSTRUCTOR_ARG_MARKER || consumer == OPTIONAL_CONSTRUCTOR_ARG_MARKER) { + /* + * Constructor arguments are detected by this "marker" consumer. It + * keeps the API looking clean even if it is a bit sleezy. We then + * build a new consumer directly against the object parser that + * triggers the "constructor arg just arrived behavior" of the + * parser. Conveniently, we can close over the position of the + * constructor in the argument list so we don't need to do any fancy + * or expensive lookups whenever the constructor args come in. + */ + int position = constructorArgInfos.size(); + boolean required = consumer == REQUIRED_CONSTRUCTOR_ARG_MARKER; + constructorArgInfos.add(new ConstructorArgInfo(parseField, required)); + objectParser.declareNamedObjects((target, v) -> target.constructorArg(position, parseField, v), namedObjectParser, parseField); + } else { + numberOfFields += 1; + objectParser.declareNamedObjects(queueingConsumer(consumer, parseField), namedObjectParser, parseField); + } + } + + @Override + public void declareNamedObjects(BiConsumer> consumer, NamedObjectParser namedObjectParser, + Consumer orderedModeCallback, ParseField parseField) { + if (consumer == null) { + throw new IllegalArgumentException("[consumer] is required"); + } + if (namedObjectParser == null) { + throw new IllegalArgumentException("[parser] is required"); + } + if (orderedModeCallback == null) { + throw new IllegalArgumentException("[orderedModeCallback] is required"); + } + if (parseField == null) { + throw new IllegalArgumentException("[parseField] is required"); + } + + if (consumer == REQUIRED_CONSTRUCTOR_ARG_MARKER || consumer == OPTIONAL_CONSTRUCTOR_ARG_MARKER) { + /* + * Constructor arguments are detected by this "marker" consumer. It + * keeps the API looking clean even if it is a bit sleezy. We then + * build a new consumer directly against the object parser that + * triggers the "constructor arg just arrived behavior" of the + * parser. Conveniently, we can close over the position of the + * constructor in the argument list so we don't need to do any fancy + * or expensive lookups whenever the constructor args come in. + */ + int position = constructorArgInfos.size(); + boolean required = consumer == REQUIRED_CONSTRUCTOR_ARG_MARKER; + constructorArgInfos.add(new ConstructorArgInfo(parseField, required)); + objectParser.declareNamedObjects((target, v) -> target.constructorArg(position, parseField, v), namedObjectParser, + wrapOrderedModeCallBack(orderedModeCallback), parseField); + } else { + numberOfFields += 1; + objectParser.declareNamedObjects(queueingConsumer(consumer, parseField), namedObjectParser, + wrapOrderedModeCallBack(orderedModeCallback), parseField); + } + } + + private Consumer wrapOrderedModeCallBack(Consumer callback) { + return (target) -> { + if (target.targetObject != null) { + // The target has already been built. Call the callback now. + callback.accept(target.targetObject); + return; + } + /* + * The target hasn't been built. Queue the callback. + */ + target.queuedOrderedModeCallback = callback; + }; + } + /** * Creates the consumer that does the "field just arrived" behavior. If the targetObject hasn't been built then it queues the value. * Otherwise it just applies the value just like {@linkplain ObjectParser} does. @@ -258,6 +344,11 @@ public final class ConstructingObjectParser extends AbstractObje * Fields to be saved to the target object when we can build it. This is only allocated if a field has to be queued. */ private Consumer[] queuedFields; + /** + * OrderedModeCallback to be called with the target object when we can + * build it. This is only allocated if the callback has to be queued. + */ + private Consumer queuedOrderedModeCallback; /** * The count of fields already queued. */ @@ -343,6 +434,9 @@ public final class ConstructingObjectParser extends AbstractObje private void buildTarget() { try { targetObject = builder.apply(constructorArgs); + if (queuedOrderedModeCallback != null) { + queuedOrderedModeCallback.accept(targetObject); + } while (queuedFieldsCount > 0) { queuedFieldsCount -= 1; queuedFields[queuedFieldsCount].accept(targetObject); diff --git a/core/src/main/java/org/elasticsearch/common/xcontent/ObjectParser.java b/core/src/main/java/org/elasticsearch/common/xcontent/ObjectParser.java index 0a7e71c6f06..7e3001b75a2 100644 --- a/core/src/main/java/org/elasticsearch/common/xcontent/ObjectParser.java +++ b/core/src/main/java/org/elasticsearch/common/xcontent/ObjectParser.java @@ -227,41 +227,7 @@ public final class ObjectParser extends AbstractObjectParser - * { - * "highlight": { - * "fields": { <------ this one - * "title": {}, - * "body": {}, - * "category": {} - * } - * } - * } - * - * but, when order is important, some may be written this way: - *

-     * {
-     *   "highlight": {
-     *     "fields": [        <------ this one
-     *       {"title": {}},
-     *       {"body": {}},
-     *       {"category": {}}
-     *     ]
-     *   }
-     * }
-     * 
- * This is because json doesn't enforce ordering. Elasticsearch reads it in the order sent but tools that generate json are free to put - * object members in an unordered Map, jumbling them. Thus, if you care about order you can send the object in the second way. - * - * See NamedObjectHolder in ObjectParserTests for examples of how to invoke this. - * - * @param consumer sets the values once they have been parsed - * @param namedObjectParser parses each named object - * @param orderedModeCallback called when the named object is parsed using the "ordered" mode (the array of objects) - * @param field the field to parse - */ + @Override public void declareNamedObjects(BiConsumer> consumer, NamedObjectParser namedObjectParser, Consumer orderedModeCallback, ParseField field) { // This creates and parses the named object @@ -311,26 +277,7 @@ public final class ObjectParser extends AbstractObjectParser - * { - * "aggregations": { - * "name_1": { "aggregation_type": {} }, - * "name_2": { "aggregation_type": {} }, - * "name_3": { "aggregation_type": {} } - * } - * } - * } - * - * Unlike the other version of this method, "ordered" mode (arrays of objects) is not supported. - * - * See NamedObjectHolder in ObjectParserTests for examples of how to invoke this. - * - * @param consumer sets the values once they have been parsed - * @param namedObjectParser parses each named object - * @param field the field to parse - */ + @Override public void declareNamedObjects(BiConsumer> consumer, NamedObjectParser namedObjectParser, ParseField field) { Consumer orderedModeCallback = (v) -> { diff --git a/core/src/test/java/org/elasticsearch/common/xcontent/ConstructingObjectParserTests.java b/core/src/test/java/org/elasticsearch/common/xcontent/ConstructingObjectParserTests.java index 32ffb33b694..7f6187800c2 100644 --- a/core/src/test/java/org/elasticsearch/common/xcontent/ConstructingObjectParserTests.java +++ b/core/src/test/java/org/elasticsearch/common/xcontent/ConstructingObjectParserTests.java @@ -24,6 +24,7 @@ import org.elasticsearch.common.Nullable; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.xcontent.ObjectParserTests.NamedObject; import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.test.ESTestCase; import org.hamcrest.Matcher; @@ -37,6 +38,7 @@ import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constru import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.nullValue; @@ -397,4 +399,188 @@ public class ConstructingObjectParserTests extends ESTestCase { return parser; } } + + public void testParseNamedObject() throws IOException { + XContentParser parser = createParser(JsonXContent.jsonXContent, + "{\"named\": {\n" + + " \"a\": {}" + + "},\"named_in_constructor\": {\n" + + " \"b\": {}" + + "}}"); + NamedObjectHolder h = NamedObjectHolder.PARSER.apply(parser, null); + assertThat(h.named, hasSize(1)); + assertEquals("a", h.named.get(0).name); + assertThat(h.namedInConstructor, hasSize(1)); + assertEquals("b", h.namedInConstructor.get(0).name); + assertFalse(h.namedSuppliedInOrder); + } + + public void testParseNamedObjectInOrder() throws IOException { + XContentParser parser = createParser(JsonXContent.jsonXContent, + "{\"named\": [\n" + + " {\"a\": {}}" + + "],\"named_in_constructor\": [\n" + + " {\"b\": {}}" + + "]}"); + NamedObjectHolder h = NamedObjectHolder.PARSER.apply(parser, null); + assertThat(h.named, hasSize(1)); + assertEquals("a", h.named.get(0).name); + assertThat(h.namedInConstructor, hasSize(1)); + assertEquals("b", h.namedInConstructor.get(0).name); + assertTrue(h.namedSuppliedInOrder); + } + + public void testParseNamedObjectTwoFieldsInArray() throws IOException { + XContentParser parser = createParser(JsonXContent.jsonXContent, + "{\"named\": [\n" + + " {\"a\": {}, \"b\": {}}" + + "],\"named_in_constructor\": [\n" + + " {\"c\": {}}" + + "]}"); + ParsingException e = expectThrows(ParsingException.class, () -> NamedObjectHolder.PARSER.apply(parser, null)); + assertEquals("[named_object_holder] failed to parse field [named]", e.getMessage()); + assertEquals( + "[named] can be a single object with any number of fields or an array where each entry is an object with a single field", + e.getCause().getMessage()); + } + + public void testParseNamedObjectTwoFieldsInArrayConstructorArg() throws IOException { + XContentParser parser = createParser(JsonXContent.jsonXContent, + "{\"named\": [\n" + + " {\"a\": {}}" + + "],\"named_in_constructor\": [\n" + + " {\"c\": {}, \"d\": {}}" + + "]}"); + ParsingException e = expectThrows(ParsingException.class, () -> NamedObjectHolder.PARSER.apply(parser, null)); + assertEquals("[named_object_holder] failed to parse field [named_in_constructor]", e.getMessage()); + assertEquals( + "[named_in_constructor] can be a single object with any number of fields or an array where each entry is an object with a " + + "single field", e.getCause().getMessage()); + } + + public void testParseNamedObjectNoFieldsInArray() throws IOException { + XContentParser parser = createParser(JsonXContent.jsonXContent, + "{\"named\": [\n" + + " {}" + + "],\"named_in_constructor\": [\n" + + " {\"a\": {}}" + + "]}"); + ParsingException e = expectThrows(ParsingException.class, () -> NamedObjectHolder.PARSER.apply(parser, null)); + assertEquals("[named_object_holder] failed to parse field [named]", e.getMessage()); + assertEquals( + "[named] can be a single object with any number of fields or an array where each entry is an object with a single field", + e.getCause().getMessage()); + } + + public void testParseNamedObjectNoFieldsInArrayConstructorArg() throws IOException { + XContentParser parser = createParser(JsonXContent.jsonXContent, + "{\"named\": [\n" + + " {\"a\": {}}" + + "],\"named_in_constructor\": [\n" + + " {}" + + "]}"); + ParsingException e = expectThrows(ParsingException.class, () -> NamedObjectHolder.PARSER.apply(parser, null)); + assertEquals("[named_object_holder] failed to parse field [named_in_constructor]", e.getMessage()); + assertEquals( + "[named_in_constructor] can be a single object with any number of fields or an array where each entry is an object with a " + + "single field", e.getCause().getMessage()); + } + + public void testParseNamedObjectJunkInArray() throws IOException { + XContentParser parser = createParser(JsonXContent.jsonXContent, + "{\"named\": [\n" + + " \"junk\"" + + "],\"named_in_constructor\": [\n" + + " {\"a\": {}}" + + "]}"); + ParsingException e = expectThrows(ParsingException.class, () -> NamedObjectHolder.PARSER.apply(parser, null)); + assertEquals("[named_object_holder] failed to parse field [named]", e.getMessage()); + assertEquals( + "[named] can be a single object with any number of fields or an array where each entry is an object with a single field", + e.getCause().getMessage()); + } + + public void testParseNamedObjectJunkInArrayConstructorArg() throws IOException { + XContentParser parser = createParser(JsonXContent.jsonXContent, + "{\"named\": [\n" + + " {\"a\": {}}" + + "],\"named_in_constructor\": [\n" + + " \"junk\"" + + "]}"); + ParsingException e = expectThrows(ParsingException.class, () -> NamedObjectHolder.PARSER.apply(parser, null)); + assertEquals("[named_object_holder] failed to parse field [named_in_constructor]", e.getMessage()); + assertEquals( + "[named_in_constructor] can be a single object with any number of fields or an array where each entry is an object with a " + + "single field", e.getCause().getMessage()); + } + + public void testParseNamedObjectInOrderNotSupported() throws IOException { + XContentParser parser = createParser(JsonXContent.jsonXContent, + "{\"named\": [\n" + + " {\"a\": {}}" + + "],\"named_in_constructor\": {\"b\": {}}" + + "}"); + + // Create our own parser for this test so we can disable support for the "ordered" mode specified by the array above + @SuppressWarnings("unchecked") + ConstructingObjectParser objectParser = new ConstructingObjectParser<>("named_object_holder", + a -> new NamedObjectHolder(((List) a[0]))); + objectParser.declareNamedObjects(ConstructingObjectParser.constructorArg(), NamedObject.PARSER, + new ParseField("named_in_constructor")); + objectParser.declareNamedObjects(NamedObjectHolder::setNamed, NamedObject.PARSER, new ParseField("named")); + + // Now firing the xml through it fails + ParsingException e = expectThrows(ParsingException.class, () -> objectParser.apply(parser, null)); + assertEquals("[named_object_holder] failed to parse field [named]", e.getMessage()); + assertEquals("[named] doesn't support arrays. Use a single object with multiple fields.", e.getCause().getMessage()); + } + + public void testParseNamedObjectInOrderNotSupportedConstructorArg() throws IOException { + XContentParser parser = createParser(JsonXContent.jsonXContent, + "{\"named\": {\"a\": {}}" + + ",\"named_in_constructor\": [\n" + + " {\"b\": {}}" + + "]}"); + + // Create our own parser for this test so we can disable support for the "ordered" mode specified by the array above + @SuppressWarnings("unchecked") + ConstructingObjectParser objectParser = new ConstructingObjectParser<>("named_object_holder", + a -> new NamedObjectHolder(((List) a[0]))); + objectParser.declareNamedObjects(ConstructingObjectParser.constructorArg(), NamedObject.PARSER, + new ParseField("named_in_constructor")); + objectParser.declareNamedObjects(NamedObjectHolder::setNamed, NamedObject.PARSER, new ParseField("named")); + + // Now firing the xml through it fails + ParsingException e = expectThrows(ParsingException.class, () -> objectParser.apply(parser, null)); + assertEquals("[named_object_holder] failed to parse field [named_in_constructor]", e.getMessage()); + assertEquals("[named_in_constructor] doesn't support arrays. Use a single object with multiple fields.", e.getCause().getMessage()); + } + + static class NamedObjectHolder { + @SuppressWarnings("unchecked") + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("named_object_holder", + a -> new NamedObjectHolder(((List) a[0]))); + static { + PARSER.declareNamedObjects(ConstructingObjectParser.constructorArg(), NamedObject.PARSER, NamedObjectHolder::keepNamedInOrder, + new ParseField("named_in_constructor")); + PARSER.declareNamedObjects(NamedObjectHolder::setNamed, NamedObject.PARSER, NamedObjectHolder::keepNamedInOrder, + new ParseField("named")); + } + + private List named; + private List namedInConstructor; + private boolean namedSuppliedInOrder = false; + + NamedObjectHolder(List namedInConstructor) { + this.namedInConstructor = namedInConstructor; + } + + public void setNamed(List named) { + this.named = named; + } + + public void keepNamedInOrder() { + namedSuppliedInOrder = true; + } + } }