Aggregations enhancement - remove pointless term frequency lookups.

If the user has set a shard_min_doc_count setting then avoid looking up background frequencies if the term fails to meet the foreground threshold on a shard.

Closes #11093
This commit is contained in:
markharwood 2015-05-11 16:18:59 +01:00
parent 236f6ccad7
commit 89b95dccc8
3 changed files with 20 additions and 13 deletions

View File

@ -99,6 +99,10 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
continue;
}
if (bucketDocCount < bucketCountThresholds.getShardMinDocCount()) {
continue;
}
if (spare == null) {
spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null);
}
@ -113,9 +117,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
// Back at the central reducer these properties will be updated with
// global stats
spare.updateScore(termsAggFactory.getSignificanceHeuristic());
if (spare.subsetDf >= bucketCountThresholds.getShardMinDocCount()) {
spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
}
spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
}
final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()];

View File

@ -24,8 +24,8 @@ import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
import org.elasticsearch.search.aggregations.LeafBucketCollector;
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
import org.elasticsearch.search.aggregations.bucket.terms.LongTermsAggregator;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.reducers.Reducer;
@ -82,11 +82,15 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator {
BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size);
SignificantLongTerms.Bucket spare = null;
for (long i = 0; i < bucketOrds.size(); i++) {
final int docCount = bucketDocCount(i);
if (docCount < bucketCountThresholds.getShardMinDocCount()) {
continue;
}
if (spare == null) {
spare = new SignificantLongTerms.Bucket(0, 0, 0, 0, 0, null, formatter);
}
spare.term = bucketOrds.get(i);
spare.subsetDf = bucketDocCount(i);
spare.subsetDf = docCount;
spare.subsetSize = subsetSize;
spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.term);
spare.supersetSize = supersetSize;
@ -95,9 +99,7 @@ public class SignificantLongTermsAggregator extends LongTermsAggregator {
spare.updateScore(termsAggFactory.getSignificanceHeuristic());
spare.bucketOrd = i;
if (spare.subsetDf >= bucketCountThresholds.getShardMinDocCount()) {
spare = (SignificantLongTerms.Bucket) ordered.insertWithOverflow(spare);
}
spare = (SignificantLongTerms.Bucket) ordered.insertWithOverflow(spare);
}
final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()];

View File

@ -24,8 +24,8 @@ import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
import org.elasticsearch.search.aggregations.LeafBucketCollector;
import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
import org.elasticsearch.search.aggregations.bucket.terms.StringTermsAggregator;
import org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude;
import org.elasticsearch.search.aggregations.reducers.Reducer;
@ -81,12 +81,17 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
BucketSignificancePriorityQueue ordered = new BucketSignificancePriorityQueue(size);
SignificantStringTerms.Bucket spare = null;
for (int i = 0; i < bucketOrds.size(); i++) {
final int docCount = bucketDocCount(i);
if (docCount < bucketCountThresholds.getShardMinDocCount()) {
continue;
}
if (spare == null) {
spare = new SignificantStringTerms.Bucket(new BytesRef(), 0, 0, 0, 0, null);
}
bucketOrds.get(i, spare.termBytes);
spare.subsetDf = bucketDocCount(i);
spare.subsetDf = docCount;
spare.subsetSize = subsetSize;
spare.supersetDf = termsAggFactory.getBackgroundFrequency(spare.termBytes);
spare.supersetSize = supersetSize;
@ -97,9 +102,7 @@ public class SignificantStringTermsAggregator extends StringTermsAggregator {
spare.updateScore(termsAggFactory.getSignificanceHeuristic());
spare.bucketOrd = i;
if (spare.subsetDf >= bucketCountThresholds.getShardMinDocCount()) {
spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
}
spare = (SignificantStringTerms.Bucket) ordered.insertWithOverflow(spare);
}
final InternalSignificantTerms.Bucket[] list = new InternalSignificantTerms.Bucket[ordered.size()];