From 556d80501c9efd565dd489fbba6b9566447bdaae Mon Sep 17 00:00:00 2001 From: Britta Weber Date: Mon, 23 Nov 2015 18:28:55 +0100 Subject: [PATCH] aggs: fix significant terms reduce for long terms Significant terms were not reduced correctly if they were long terms. Also, clean up the bwc test a little. Upgrades are not needed. related to #13522 --- .../significant/InternalSignificantTerms.java | 2 +- ...gnificantTermsBackwardCompatibilityIT.java | 16 +--- .../SignificanceHeuristicTests.java | 76 +++++++++++++++---- 3 files changed, 68 insertions(+), 26 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTerms.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTerms.java index 27d9f58a7ff..21b92e83fff 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTerms.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTerms.java @@ -173,7 +173,7 @@ public abstract class InternalSignificantTerms terms = (InternalSignificantTerms) aggregation; for (Bucket bucket : terms.buckets) { - List existingBuckets = buckets.get(bucket.getKey()); + List existingBuckets = buckets.get(bucket.getKeyAsString()); if (existingBuckets == null) { existingBuckets = new ArrayList<>(aggregations.size()); buckets.put(bucket.getKeyAsString(), existingBuckets); diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsBackwardCompatibilityIT.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsBackwardCompatibilityIT.java index 5b7976043f8..d6afb35547f 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsBackwardCompatibilityIT.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/SignificantTermsBackwardCompatibilityIT.java @@ -48,24 +48,16 @@ public class SignificantTermsBackwardCompatibilityIT extends ESBackcompatTestCas static final String CLASS_FIELD = "class"; /** - * Simple upgrade test for streaming significant terms buckets + * Test for streaming significant terms buckets to old es versions. */ public void testBucketStreaming() throws IOException, ExecutionException, InterruptedException { logger.debug("testBucketStreaming: indexing documents"); String type = randomBoolean() ? "string" : "long"; String settings = "{\"index.number_of_shards\": 5, \"index.number_of_replicas\": 0}"; index01Docs(type, settings); - + ensureGreen(); logClusterState(); - boolean upgraded; - int upgradedNodesCounter = 1; - do { - logger.debug("testBucketStreaming: upgrading {}st node", upgradedNodesCounter++); - upgraded = backwardsCluster().upgradeOneNode(); - ensureGreen(); - logClusterState(); - checkSignificantTermsAggregationCorrect(); - } while (upgraded); + checkSignificantTermsAggregationCorrect(); logger.debug("testBucketStreaming: done testing significant terms while upgrading"); } @@ -101,7 +93,7 @@ public class SignificantTermsBackwardCompatibilityIT extends ESBackcompatTestCas .execute() .actionGet(); assertSearchResponse(response); - StringTerms classes = (StringTerms) response.getAggregations().get("class"); + StringTerms classes = response.getAggregations().get("class"); assertThat(classes.getBuckets().size(), equalTo(2)); for (Terms.Bucket classBucket : classes.getBuckets()) { Map aggs = classBucket.getAggregations().asMap(); diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificanceHeuristicTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificanceHeuristicTests.java index c911da06ae9..b10dfd31b35 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificanceHeuristicTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificanceHeuristicTests.java @@ -28,6 +28,7 @@ import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.search.SearchShardTarget; +import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.InternalAggregations; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND; @@ -38,6 +39,8 @@ import org.elasticsearch.search.aggregations.bucket.significant.heuristics.Signi import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicBuilder; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicParser; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicParserMapper; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.elasticsearch.search.aggregations.support.format.ValueFormatter; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.TestSearchContext; @@ -45,18 +48,11 @@ import org.elasticsearch.test.TestSearchContext; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Set; +import java.nio.charset.StandardCharsets; +import java.util.*; import static org.elasticsearch.test.VersionUtils.randomVersion; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.greaterThan; -import static org.hamcrest.Matchers.greaterThanOrEqualTo; -import static org.hamcrest.Matchers.lessThan; -import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.hamcrest.Matchers.*; /** * @@ -83,24 +79,28 @@ public class SignificanceHeuristicTests extends ESTestCase { ByteArrayOutputStream outBuffer = new ByteArrayOutputStream(); OutputStreamStreamOutput out = new OutputStreamStreamOutput(outBuffer); out.setVersion(version); - sigTerms[0].writeTo(out); // read ByteArrayInputStream inBuffer = new ByteArrayInputStream(outBuffer.toByteArray()); InputStreamStreamInput in = new InputStreamStreamInput(inBuffer); in.setVersion(version); - sigTerms[1].readFrom(in); assertTrue(sigTerms[1].significanceHeuristic.equals(sigTerms[0].significanceHeuristic)); + InternalSignificantTerms.Bucket originalBucket = (InternalSignificantTerms.Bucket) sigTerms[0].buckets.get(0); + InternalSignificantTerms.Bucket streamedBucket = (InternalSignificantTerms.Bucket) sigTerms[1].buckets.get(0); + assertThat(originalBucket.getKeyAsString(), equalTo(streamedBucket.getKeyAsString())); + assertThat(originalBucket.getSupersetDf(), equalTo(streamedBucket.getSupersetDf())); + assertThat(originalBucket.getSubsetDf(), equalTo(streamedBucket.getSubsetDf())); + assertThat(streamedBucket.getSubsetSize(), equalTo(10l)); + assertThat(streamedBucket.getSupersetSize(), equalTo(20l)); } InternalSignificantTerms[] getRandomSignificantTerms(SignificanceHeuristic heuristic) { InternalSignificantTerms[] sTerms = new InternalSignificantTerms[2]; ArrayList buckets = new ArrayList<>(); if (randomBoolean()) { - BytesRef term = new BytesRef("123.0"); buckets.add(new SignificantLongTerms.Bucket(1, 2, 3, 4, 123, InternalAggregations.EMPTY, null)); sTerms[0] = new SignificantLongTerms(10, 20, "some_name", null, 1, 1, heuristic, buckets, Collections.EMPTY_LIST, null); @@ -125,6 +125,56 @@ public class SignificanceHeuristicTests extends ESTestCase { return heuristics.get(randomInt(3)); } + public void testReduce() { + List aggs = createInternalAggregations(); + SignificantTerms reducedAgg = (SignificantTerms) aggs.get(0).doReduce(aggs, null); + assertThat(reducedAgg.getBuckets().size(), equalTo(2)); + assertThat(reducedAgg.getBuckets().get(0).getSubsetDf(), equalTo(8l)); + assertThat(reducedAgg.getBuckets().get(0).getSubsetSize(), equalTo(16l)); + assertThat(reducedAgg.getBuckets().get(0).getSupersetDf(), equalTo(10l)); + assertThat(reducedAgg.getBuckets().get(0).getSupersetSize(), equalTo(30l)); + assertThat(reducedAgg.getBuckets().get(1).getSubsetDf(), equalTo(8l)); + assertThat(reducedAgg.getBuckets().get(1).getSubsetSize(), equalTo(16l)); + assertThat(reducedAgg.getBuckets().get(1).getSupersetDf(), equalTo(10l)); + assertThat(reducedAgg.getBuckets().get(1).getSupersetSize(), equalTo(30l)); + } + + // Create aggregations as they might come from three different shards and return as list. + private List createInternalAggregations() { + + String type = randomBoolean() ? "long" : "string"; + SignificanceHeuristic significanceHeuristic = getRandomSignificanceheuristic(); + + List aggs = new ArrayList<>(); + List terms0Buckets = new ArrayList<>(); + terms0Buckets.add(createBucket(type, 4, 4, 5, 10, 0)); + aggs.add(createAggregation(type, significanceHeuristic, terms0Buckets, 4, 10)); + List terms1Buckets = new ArrayList<>(); + terms0Buckets.add(createBucket(type, 4, 4, 5, 10, 1)); + aggs.add(createAggregation(type, significanceHeuristic, terms1Buckets, 4, 10)); + List terms01Buckets = new ArrayList<>(); + terms0Buckets.add(createBucket(type, 4, 8, 5, 10, 0)); + terms0Buckets.add(createBucket(type, 4, 8, 5, 10, 1)); + aggs.add(createAggregation(type, significanceHeuristic, terms01Buckets, 8, 10)); + return aggs; + } + + private InternalSignificantTerms createAggregation(String type, SignificanceHeuristic significanceHeuristic, List buckets, long subsetSize, long supersetSize) { + if (type.equals("string")) { + return new SignificantStringTerms(subsetSize, supersetSize, "sig_terms", 2, -1, significanceHeuristic, buckets, new ArrayList(), new HashMap()); + } else { + return new SignificantLongTerms(subsetSize, supersetSize, "sig_terms", ValueFormatter.RAW, 2, -1, significanceHeuristic, buckets, new ArrayList(), new HashMap()); + } + } + + private InternalSignificantTerms.Bucket createBucket(String type, long subsetDF, long subsetSize, long supersetDF, long supersetSize, long label) { + if (type.equals("string")) { + return new SignificantStringTerms.Bucket(new BytesRef(Long.toString(label).getBytes(StandardCharsets.UTF_8)), subsetDF, subsetSize, supersetDF, supersetSize, InternalAggregations.EMPTY); + } else { + return new SignificantLongTerms.Bucket(subsetDF, subsetSize, supersetDF, supersetSize, label, InternalAggregations.EMPTY, ValueFormatter.RAW); + } + } + // test that // 1. The output of the builders can actually be parsed // 2. The parser does not swallow parameters after a significance heuristic was defined