aggs: fix significant terms reduce for long terms
Significant terms were not reduced correctly if they were long terms. Also, clean up the bwc test a little. Upgrades are not needed. related to #13522
This commit is contained in:
parent
866bed0864
commit
556d80501c
|
@ -173,7 +173,7 @@ public abstract class InternalSignificantTerms<A extends InternalSignificantTerm
|
|||
for (InternalAggregation aggregation : aggregations) {
|
||||
InternalSignificantTerms<A, B> terms = (InternalSignificantTerms<A, B>) aggregation;
|
||||
for (Bucket bucket : terms.buckets) {
|
||||
List<Bucket> existingBuckets = buckets.get(bucket.getKey());
|
||||
List<Bucket> existingBuckets = buckets.get(bucket.getKeyAsString());
|
||||
if (existingBuckets == null) {
|
||||
existingBuckets = new ArrayList<>(aggregations.size());
|
||||
buckets.put(bucket.getKeyAsString(), existingBuckets);
|
||||
|
|
|
@ -48,24 +48,16 @@ public class SignificantTermsBackwardCompatibilityIT extends ESBackcompatTestCas
|
|||
static final String CLASS_FIELD = "class";
|
||||
|
||||
/**
|
||||
* Simple upgrade test for streaming significant terms buckets
|
||||
* Test for streaming significant terms buckets to old es versions.
|
||||
*/
|
||||
public void testBucketStreaming() throws IOException, ExecutionException, InterruptedException {
|
||||
logger.debug("testBucketStreaming: indexing documents");
|
||||
String type = randomBoolean() ? "string" : "long";
|
||||
String settings = "{\"index.number_of_shards\": 5, \"index.number_of_replicas\": 0}";
|
||||
index01Docs(type, settings);
|
||||
|
||||
logClusterState();
|
||||
boolean upgraded;
|
||||
int upgradedNodesCounter = 1;
|
||||
do {
|
||||
logger.debug("testBucketStreaming: upgrading {}st node", upgradedNodesCounter++);
|
||||
upgraded = backwardsCluster().upgradeOneNode();
|
||||
ensureGreen();
|
||||
logClusterState();
|
||||
checkSignificantTermsAggregationCorrect();
|
||||
} while (upgraded);
|
||||
logger.debug("testBucketStreaming: done testing significant terms while upgrading");
|
||||
}
|
||||
|
||||
|
@ -101,7 +93,7 @@ public class SignificantTermsBackwardCompatibilityIT extends ESBackcompatTestCas
|
|||
.execute()
|
||||
.actionGet();
|
||||
assertSearchResponse(response);
|
||||
StringTerms classes = (StringTerms) response.getAggregations().get("class");
|
||||
StringTerms classes = response.getAggregations().get("class");
|
||||
assertThat(classes.getBuckets().size(), equalTo(2));
|
||||
for (Terms.Bucket classBucket : classes.getBuckets()) {
|
||||
Map<String, Aggregation> aggs = classBucket.getAggregations().asMap();
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.elasticsearch.common.xcontent.XContentFactory;
|
|||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.common.xcontent.json.JsonXContent;
|
||||
import org.elasticsearch.search.SearchShardTarget;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregation;
|
||||
import org.elasticsearch.search.aggregations.InternalAggregations;
|
||||
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
|
||||
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
|
||||
|
@ -38,6 +39,8 @@ import org.elasticsearch.search.aggregations.bucket.significant.heuristics.Signi
|
|||
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicBuilder;
|
||||
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicParser;
|
||||
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicParserMapper;
|
||||
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
|
||||
import org.elasticsearch.search.aggregations.support.format.ValueFormatter;
|
||||
import org.elasticsearch.search.internal.SearchContext;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.test.TestSearchContext;
|
||||
|
@ -45,18 +48,11 @@ import org.elasticsearch.test.TestSearchContext;
|
|||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.*;
|
||||
|
||||
import static org.elasticsearch.test.VersionUtils.randomVersion;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.greaterThan;
|
||||
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
|
||||
import static org.hamcrest.Matchers.lessThan;
|
||||
import static org.hamcrest.Matchers.lessThanOrEqualTo;
|
||||
import static org.hamcrest.Matchers.*;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -83,24 +79,28 @@ public class SignificanceHeuristicTests extends ESTestCase {
|
|||
ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
|
||||
OutputStreamStreamOutput out = new OutputStreamStreamOutput(outBuffer);
|
||||
out.setVersion(version);
|
||||
|
||||
sigTerms[0].writeTo(out);
|
||||
|
||||
// read
|
||||
ByteArrayInputStream inBuffer = new ByteArrayInputStream(outBuffer.toByteArray());
|
||||
InputStreamStreamInput in = new InputStreamStreamInput(inBuffer);
|
||||
in.setVersion(version);
|
||||
|
||||
sigTerms[1].readFrom(in);
|
||||
|
||||
assertTrue(sigTerms[1].significanceHeuristic.equals(sigTerms[0].significanceHeuristic));
|
||||
InternalSignificantTerms.Bucket originalBucket = (InternalSignificantTerms.Bucket) sigTerms[0].buckets.get(0);
|
||||
InternalSignificantTerms.Bucket streamedBucket = (InternalSignificantTerms.Bucket) sigTerms[1].buckets.get(0);
|
||||
assertThat(originalBucket.getKeyAsString(), equalTo(streamedBucket.getKeyAsString()));
|
||||
assertThat(originalBucket.getSupersetDf(), equalTo(streamedBucket.getSupersetDf()));
|
||||
assertThat(originalBucket.getSubsetDf(), equalTo(streamedBucket.getSubsetDf()));
|
||||
assertThat(streamedBucket.getSubsetSize(), equalTo(10l));
|
||||
assertThat(streamedBucket.getSupersetSize(), equalTo(20l));
|
||||
}
|
||||
|
||||
InternalSignificantTerms[] getRandomSignificantTerms(SignificanceHeuristic heuristic) {
|
||||
InternalSignificantTerms[] sTerms = new InternalSignificantTerms[2];
|
||||
ArrayList<InternalSignificantTerms.Bucket> buckets = new ArrayList<>();
|
||||
if (randomBoolean()) {
|
||||
BytesRef term = new BytesRef("123.0");
|
||||
buckets.add(new SignificantLongTerms.Bucket(1, 2, 3, 4, 123, InternalAggregations.EMPTY, null));
|
||||
sTerms[0] = new SignificantLongTerms(10, 20, "some_name", null, 1, 1, heuristic, buckets,
|
||||
Collections.EMPTY_LIST, null);
|
||||
|
@ -125,6 +125,56 @@ public class SignificanceHeuristicTests extends ESTestCase {
|
|||
return heuristics.get(randomInt(3));
|
||||
}
|
||||
|
||||
public void testReduce() {
|
||||
List<InternalAggregation> aggs = createInternalAggregations();
|
||||
SignificantTerms reducedAgg = (SignificantTerms) aggs.get(0).doReduce(aggs, null);
|
||||
assertThat(reducedAgg.getBuckets().size(), equalTo(2));
|
||||
assertThat(reducedAgg.getBuckets().get(0).getSubsetDf(), equalTo(8l));
|
||||
assertThat(reducedAgg.getBuckets().get(0).getSubsetSize(), equalTo(16l));
|
||||
assertThat(reducedAgg.getBuckets().get(0).getSupersetDf(), equalTo(10l));
|
||||
assertThat(reducedAgg.getBuckets().get(0).getSupersetSize(), equalTo(30l));
|
||||
assertThat(reducedAgg.getBuckets().get(1).getSubsetDf(), equalTo(8l));
|
||||
assertThat(reducedAgg.getBuckets().get(1).getSubsetSize(), equalTo(16l));
|
||||
assertThat(reducedAgg.getBuckets().get(1).getSupersetDf(), equalTo(10l));
|
||||
assertThat(reducedAgg.getBuckets().get(1).getSupersetSize(), equalTo(30l));
|
||||
}
|
||||
|
||||
// Create aggregations as they might come from three different shards and return as list.
|
||||
private List<InternalAggregation> createInternalAggregations() {
|
||||
|
||||
String type = randomBoolean() ? "long" : "string";
|
||||
SignificanceHeuristic significanceHeuristic = getRandomSignificanceheuristic();
|
||||
|
||||
List<InternalAggregation> aggs = new ArrayList<>();
|
||||
List<InternalSignificantTerms.Bucket> terms0Buckets = new ArrayList<>();
|
||||
terms0Buckets.add(createBucket(type, 4, 4, 5, 10, 0));
|
||||
aggs.add(createAggregation(type, significanceHeuristic, terms0Buckets, 4, 10));
|
||||
List<InternalSignificantTerms.Bucket> terms1Buckets = new ArrayList<>();
|
||||
terms0Buckets.add(createBucket(type, 4, 4, 5, 10, 1));
|
||||
aggs.add(createAggregation(type, significanceHeuristic, terms1Buckets, 4, 10));
|
||||
List<InternalSignificantTerms.Bucket> terms01Buckets = new ArrayList<>();
|
||||
terms0Buckets.add(createBucket(type, 4, 8, 5, 10, 0));
|
||||
terms0Buckets.add(createBucket(type, 4, 8, 5, 10, 1));
|
||||
aggs.add(createAggregation(type, significanceHeuristic, terms01Buckets, 8, 10));
|
||||
return aggs;
|
||||
}
|
||||
|
||||
private InternalSignificantTerms createAggregation(String type, SignificanceHeuristic significanceHeuristic, List<InternalSignificantTerms.Bucket> buckets, long subsetSize, long supersetSize) {
|
||||
if (type.equals("string")) {
|
||||
return new SignificantStringTerms(subsetSize, supersetSize, "sig_terms", 2, -1, significanceHeuristic, buckets, new ArrayList<PipelineAggregator>(), new HashMap<String, Object>());
|
||||
} else {
|
||||
return new SignificantLongTerms(subsetSize, supersetSize, "sig_terms", ValueFormatter.RAW, 2, -1, significanceHeuristic, buckets, new ArrayList<PipelineAggregator>(), new HashMap<String, Object>());
|
||||
}
|
||||
}
|
||||
|
||||
private InternalSignificantTerms.Bucket createBucket(String type, long subsetDF, long subsetSize, long supersetDF, long supersetSize, long label) {
|
||||
if (type.equals("string")) {
|
||||
return new SignificantStringTerms.Bucket(new BytesRef(Long.toString(label).getBytes(StandardCharsets.UTF_8)), subsetDF, subsetSize, supersetDF, supersetSize, InternalAggregations.EMPTY);
|
||||
} else {
|
||||
return new SignificantLongTerms.Bucket(subsetDF, subsetSize, supersetDF, supersetSize, label, InternalAggregations.EMPTY, ValueFormatter.RAW);
|
||||
}
|
||||
}
|
||||
|
||||
// test that
|
||||
// 1. The output of the builders can actually be parsed
|
||||
// 2. The parser does not swallow parameters after a significance heuristic was defined
|
||||
|
|
Loading…
Reference in New Issue