Fix AOOBE when setting min_doc_count to 0 in significant_terms (#60823)

This commit fixes the computation of the subset size on empty buckets (doc count of 0).
The aggregator test refactoring in #60683 revealed this bug.
This commit is contained in:
Jim Ferenczi 2020-08-06 18:01:54 +02:00 committed by jimczi
parent fb7c431d8d
commit 14980ff97e
1 changed files with 9 additions and 4 deletions

View File

@ -797,9 +797,14 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
return new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null, format, 0);
}
private long subsetSize(long owningBucketOrd) {
// if the owningBucketOrd is not in the array that means the bucket is empty so the size has to be 0
return owningBucketOrd < subsetSizes.size() ? subsetSizes.get(owningBucketOrd) : 0;
}
@Override
BucketUpdater<SignificantStringTerms.Bucket> bucketUpdater(long owningBucketOrd) throws IOException {
long subsetSize = subsetSizes.get(owningBucketOrd);
long subsetSize = subsetSize(owningBucketOrd);
return (spare, globalOrd, bucketOrd, docCount) -> {
spare.bucketOrd = bucketOrd;
oversizedCopy(lookupGlobalOrd.apply(globalOrd), spare.termBytes);
@ -839,7 +844,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
bucketCountThresholds.getMinDocCount(),
metadata(),
format,
subsetSizes.get(owningBucketOrd),
subsetSize(owningBucketOrd),
supersetSize,
significanceHeuristic,
Arrays.asList(topBuckets)