From e6d0f7df01c95eb3dfdadc0a797764ba0c90052b Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 10 Jan 2020 16:18:11 -0500 Subject: [PATCH] Fix format problem in composite of unmapped (#50869) (#50875) When a composite aggregation is reduced using the results from an index that has one of the fields unmapped we were throwing away the formatter. This is mildly annoying, except in the case of IP addresses which were coming out as non-utf-8-characters. And tripping assertions. This carefully preserves the formatter from the working bucket. Closes #50600 --- .../test/search.aggregation/230_composite.yml | 47 +++++++++++++++++++ .../elasticsearch/search/DocValueFormat.java | 10 ++++ .../bucket/composite/InternalComposite.java | 34 ++++++++++++-- .../composite/InternalCompositeTests.java | 36 +++++++++++++- 4 files changed, 122 insertions(+), 5 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/230_composite.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/230_composite.yml index 0fc1e8e3e0b..7aa495fb0c3 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/230_composite.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/230_composite.yml @@ -729,6 +729,7 @@ setup: - match: { aggregations.test.buckets.3.key.geo: "12/2048/0" } - match: { aggregations.test.buckets.3.key.kw: "bar" } - match: { aggregations.test.buckets.3.doc_count: 1 } + --- "Simple Composite aggregation with geotile grid add aggregate after": - skip: @@ -770,3 +771,49 @@ setup: - match: { aggregations.test.buckets.2.key.geo: "12/2048/0" } - match: { aggregations.test.buckets.2.key.kw: "bar" } - match: { aggregations.test.buckets.2.doc_count: 1 } + +--- +"Mixed ip and unmapped fields": + - skip: + version: " - 7.99.99" + reason: This will fail against 7.x until the fix is backported there + # It is important that the index *without* the ip field be sorted *before* + # the index *with* the ip field because that has caused bugs in the past. + - do: + indices.create: + index: test_1 + - do: + indices.create: + index: test_2 + body: + mappings: + properties: + f: + type: ip + - do: + index: + index: test_2 + id: 1 + body: { "f": "192.168.0.1" } + refresh: true + + - do: + search: + index: test_* + body: + aggregations: + test: + composite: + sources: [ + "f": { + "terms": { + "field": "f" + } + } + ] + + - match: { hits.total.value: 1 } + - match: { hits.total.relation: eq } + - length: { aggregations.test.buckets: 1 } + - match: { aggregations.test.buckets.0.key.f: "192.168.0.1" } + - match: { aggregations.test.buckets.0.doc_count: 1 } diff --git a/server/src/main/java/org/elasticsearch/search/DocValueFormat.java b/server/src/main/java/org/elasticsearch/search/DocValueFormat.java index 02ce9959bb3..a634b4f7ad2 100644 --- a/server/src/main/java/org/elasticsearch/search/DocValueFormat.java +++ b/server/src/main/java/org/elasticsearch/search/DocValueFormat.java @@ -141,6 +141,11 @@ public interface DocValueFormat extends NamedWriteable { public BytesRef parseBytesRef(String value) { return new BytesRef(value); } + + @Override + public String toString() { + return "raw"; + } }; DocValueFormat BINARY = new DocValueFormat() { @@ -346,6 +351,11 @@ public interface DocValueFormat extends NamedWriteable { public BytesRef parseBytesRef(String value) { return new BytesRef(InetAddressPoint.encode(InetAddresses.forString(value))); } + + @Override + public String toString() { + return "ip"; + } }; final class Decimal implements DocValueFormat { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/InternalComposite.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/InternalComposite.java index 3ef667f145c..fc63417f060 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/InternalComposite.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/InternalComposite.java @@ -149,6 +149,13 @@ public class InternalComposite return buckets; } + /** + * The formats used when writing the keys. Package private for testing. + */ + List getFormats() { + return formats; + } + @Override public Map afterKey() { if (afterKey != null) { @@ -204,8 +211,17 @@ public class InternalComposite reduceContext.consumeBucketsAndMaybeBreak(1); result.add(reduceBucket); } - final CompositeKey lastKey = result.size() > 0 ? result.get(result.size()-1).getRawKey() : null; - return new InternalComposite(name, size, sourceNames, formats, result, lastKey, reverseMuls, + + List reducedFormats = formats; + CompositeKey lastKey = null; + if (result.size() > 0) { + lastBucket = result.get(result.size() - 1); + /* Attach the formats from the last bucket to the reduced composite + * so that we can properly format the after key. */ + reducedFormats = lastBucket.formats; + lastKey = lastBucket.getRawKey(); + } + return new InternalComposite(name, size, sourceNames, reducedFormats, result, lastKey, reverseMuls, earlyTerminated, pipelineAggregators(), metaData); } @@ -219,7 +235,12 @@ public class InternalComposite aggregations.add(bucket.aggregations); } InternalAggregations aggs = InternalAggregations.reduce(aggregations, context); - return new InternalBucket(sourceNames, formats, buckets.get(0).key, reverseMuls, docCount, aggs); + /* Use the formats from the bucket because they'll be right to format + * the key. The formats on the InternalComposite doing the reducing are + * just whatever formats make sense for *its* index. This can be real + * trouble when the index doing the reducing is unmapped. */ + List reducedFormats = buckets.get(0).formats; + return new InternalBucket(sourceNames, reducedFormats, buckets.get(0).key, reverseMuls, docCount, aggs); } @Override @@ -349,6 +370,13 @@ public class InternalComposite return aggregations; } + /** + * The formats used when writing the keys. Package private for testing. + */ + List getFormats() { + return formats; + } + @Override public int compareKey(InternalBucket other) { for (int i = 0; i < key.size(); i++) { diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/InternalCompositeTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/InternalCompositeTests.java index 756b6439504..624b4e6d77a 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/InternalCompositeTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/InternalCompositeTests.java @@ -20,6 +20,7 @@ package org.elasticsearch.search.aggregations.bucket.composite; import com.google.common.collect.Lists; + import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.time.DateFormatter; @@ -36,6 +37,7 @@ import org.junit.After; import java.io.IOException; import java.time.ZoneOffset; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -47,6 +49,9 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; import static com.carrotsearch.randomizedtesting.RandomizedTest.randomAsciiLettersOfLengthBetween; +import static java.util.Collections.emptyList; +import static java.util.Collections.emptyMap; +import static java.util.stream.Collectors.toList; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; @@ -238,8 +243,7 @@ public class InternalCompositeTests extends InternalMultiBucketAggregationTestCa for (int i = 0; i < numSame; i++) { toReduce.add(result); } - InternalComposite finalReduce = (InternalComposite) result.reduce(toReduce, - new InternalAggregation.ReduceContext(BigArrays.NON_RECYCLING_INSTANCE, null, true)); + InternalComposite finalReduce = (InternalComposite) result.reduce(toReduce, reduceContext()); assertThat(finalReduce.getBuckets().size(), equalTo(result.getBuckets().size())); Iterator expectedIt = result.getBuckets().iterator(); for (InternalComposite.InternalBucket bucket : finalReduce.getBuckets()) { @@ -249,6 +253,30 @@ public class InternalCompositeTests extends InternalMultiBucketAggregationTestCa } } + /** + * Check that reducing with an unmapped index produces useful formats. + */ + public void testReduceUnmapped() throws IOException { + InternalComposite mapped = createTestInstance(randomAlphaOfLength(10), emptyList(), emptyMap(), InternalAggregations.EMPTY); + List rawFormats = formats.stream().map(f -> DocValueFormat.RAW).collect(toList()); + InternalComposite unmapped = new InternalComposite(mapped.getName(), mapped.getSize(), sourceNames, + rawFormats, emptyList(), null, reverseMuls, true, emptyList(), emptyMap()); + List toReduce = Arrays.asList(unmapped, mapped); + Collections.shuffle(toReduce, random()); + InternalComposite finalReduce = (InternalComposite) unmapped.reduce(toReduce, reduceContext()); + assertThat(finalReduce.getBuckets().size(), equalTo(mapped.getBuckets().size())); + if (false == mapped.getBuckets().isEmpty()) { + assertThat(finalReduce.getFormats(), equalTo(mapped.getFormats())); + } + Iterator expectedIt = mapped.getBuckets().iterator(); + for (InternalComposite.InternalBucket bucket : finalReduce.getBuckets()) { + InternalComposite.InternalBucket expectedBucket = expectedIt.next(); + assertThat(bucket.getRawKey(), equalTo(expectedBucket.getRawKey())); + assertThat(bucket.getDocCount(), equalTo(expectedBucket.getDocCount())); + assertThat(bucket.getFormats(), equalTo(expectedBucket.getFormats())); + } + } + public void testCompareCompositeKeyBiggerFieldName() { InternalComposite.ArrayMap key1 = createMap( Lists.newArrayList("field1", "field2"), @@ -381,4 +409,8 @@ public class InternalCompositeTests extends InternalMultiBucketAggregationTestCa values ); } + + private InternalAggregation.ReduceContext reduceContext() { + return new InternalAggregation.ReduceContext(BigArrays.NON_RECYCLING_INSTANCE, null, true); + } }