Aggregations enhancement - remove pointless term frequency lookups.
If the user has set a shard_min_doc_count setting then avoid looking up background frequencies if the term fails to meet the foreground threshold on a shard. Closes #11093
This commit is contained in:
parent
236f6ccad7
commit
89b95dccc8
|
@ -99,6 +99,10 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
|
|||
if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
|
||||
continue;
|
||||
}
|
||||
if (bucketDocCount < bucketCountThresholds.getShardMinDocCount()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (spare == null) {
|
||||
spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null);
|
||||
}
|
||||
|
@ -113,9 +117,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
|
|||
// Back at the central reducer these properties will be updated with
|
||||
// global stats
|
||||
spare.updateScore(termsAggFactory.getSignificanceHeuristic());
|
||||
if (spare.subsetDf >= bucketCountThresholds.getShardMinDocCount()) {
|
||||
spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
|
||||
}
|
||||
spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
|
||||
}
|
||||
|
||||
final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()];
|
||||
|
|
|
@ -24,8 +24,8 @@ import org.elasticsearch.common.Nullable;
|
|||
import org.elasticsearch.common.lease.Releasables;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
|
||||
import org.elasticsearch.search.aggregations.LeafBucketCollector;
|
||||
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.LongTermsAggregator;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
|
||||
import org.elasticsearch.search.aggregations.reducers.Reducer;
|
||||
|
@ -82,11 +82,15 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator {
|
|||
BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size);
|
||||
SignificantLongTerms.Bucket spare = null;
|
||||
for (long i = 0; i < bucketOrds.size(); i++) {
|
||||
final int docCount = bucketDocCount(i);
|
||||
if (docCount < bucketCountThresholds.getShardMinDocCount()) {
|
||||
continue;
|
||||
}
|
||||
if (spare == null) {
|
||||
spare = new SignificantLongTerms.Bucket(0, 0, 0, 0, 0, null, formatter);
|
||||
}
|
||||
spare.term = bucketOrds.get(i);
|
||||
spare.subsetDf = bucketDocCount(i);
|
||||
spare.subsetDf = docCount;
|
||||
spare.subsetSize = subsetSize;
|
||||
spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.term);
|
||||
spare.supersetSize = supersetSize;
|
||||
|
@ -95,9 +99,7 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator {
|
|||
spare.updateScore(termsAggFactory.getSignificanceHeuristic());
|
||||
|
||||
spare.bucketOrd = i;
|
||||
if (spare.subsetDf >= bucketCountThresholds.getShardMinDocCount()) {
|
||||
spare = (SignificantLongTerms.Bucket) ordered.insertWithOverflow(spare);
|
||||
}
|
||||
spare = (SignificantLongTerms.Bucket) ordered.insertWithOverflow(spare);
|
||||
}
|
||||
|
||||
final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()];
|
||||
|
|
|
@ -24,8 +24,8 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.elasticsearch.common.lease.Releasables;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
|
||||
import org.elasticsearch.search.aggregations.LeafBucketCollector;
|
||||
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.StringTermsAggregator;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
|
||||
import org.elasticsearch.search.aggregations.reducers.Reducer;
|
||||
|
@ -81,12 +81,17 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
|
|||
BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size);
|
||||
SignificantStringTerms.Bucket spare = null;
|
||||
for (int i = 0; i < bucketOrds.size(); i++) {
|
||||
final int docCount = bucketDocCount(i);
|
||||
if (docCount < bucketCountThresholds.getShardMinDocCount()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (spare == null) {
|
||||
spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null);
|
||||
}
|
||||
|
||||
bucketOrds.get(i, spare.termBytes);
|
||||
spare.subsetDf = bucketDocCount(i);
|
||||
spare.subsetDf = docCount;
|
||||
spare.subsetSize = subsetSize;
|
||||
spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes);
|
||||
spare.supersetSize = supersetSize;
|
||||
|
@ -97,9 +102,7 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
|
|||
spare.updateScore(termsAggFactory.getSignificanceHeuristic());
|
||||
|
||||
spare.bucketOrd = i;
|
||||
if (spare.subsetDf >= bucketCountThresholds.getShardMinDocCount()) {
|
||||
spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
|
||||
}
|
||||
spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
|
||||
}
|
||||
|
||||
final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()];
|
||||
|
|
Loading…
Reference in New Issue