Changed the type of field docCounts to IntArray instead of LongArray, because a shard can't hold more than Integer.MAX_VALUE a LongArray just takes unnecessary space.
Closes #6529
This commit is contained in:
parent
adb5c19849
commit
cb9548f811
|
@ -19,7 +19,7 @@
|
|||
package org.elasticsearch.search.aggregations.bucket;
|
||||
|
||||
import org.elasticsearch.common.lease.Releasable;
|
||||
import org.elasticsearch.common.util.LongArray;
|
||||
import org.elasticsearch.common.util.IntArray;
|
||||
import org.elasticsearch.search.aggregations.*;
|
||||
import org.elasticsearch.search.aggregations.support.AggregationContext;
|
||||
|
||||
|
@ -32,12 +32,12 @@ import java.util.Arrays;
|
|||
*/
|
||||
public abstract class BucketsAggregator extends Aggregator {
|
||||
|
||||
private LongArray docCounts;
|
||||
private IntArray docCounts;
|
||||
|
||||
public BucketsAggregator(String name, BucketAggregationMode bucketAggregationMode, AggregatorFactories factories,
|
||||
long estimatedBucketsCount, AggregationContext context, Aggregator parent) {
|
||||
super(name, bucketAggregationMode, factories, estimatedBucketsCount, context, parent);
|
||||
docCounts = bigArrays.newLongArray(estimatedBucketsCount, true);
|
||||
docCounts = bigArrays.newIntArray(estimatedBucketsCount, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -63,7 +63,7 @@ public abstract class BucketsAggregator extends Aggregator {
|
|||
collectBucketNoCounts(doc, bucketOrd);
|
||||
}
|
||||
|
||||
public LongArray getDocCounts() {
|
||||
public IntArray getDocCounts() {
|
||||
return docCounts;
|
||||
}
|
||||
|
||||
|
@ -77,7 +77,7 @@ public abstract class BucketsAggregator extends Aggregator {
|
|||
/**
|
||||
* Utility method to increment the doc counts of the given bucket (identified by the bucket ordinal)
|
||||
*/
|
||||
protected final void incrementBucketDocCount(long inc, long bucketOrd) throws IOException {
|
||||
protected final void incrementBucketDocCount(int inc, long bucketOrd) throws IOException {
|
||||
docCounts = bigArrays.grow(docCounts, bucketOrd + 1);
|
||||
docCounts.increment(bucketOrd, inc);
|
||||
}
|
||||
|
@ -85,13 +85,13 @@ public abstract class BucketsAggregator extends Aggregator {
|
|||
/**
|
||||
* Utility method to return the number of documents that fell in the given bucket (identified by the bucket ordinal)
|
||||
*/
|
||||
public final long bucketDocCount(long bucketOrd) {
|
||||
public final int bucketDocCount(long bucketOrd) {
|
||||
if (bucketOrd >= docCounts.size()) {
|
||||
// This may happen eg. if no document in the highest buckets is accepted by a sub aggregator.
|
||||
// For example, if there is a long terms agg on 3 terms 1,2,3 with a sub filter aggregator and if no document with 3 as a value
|
||||
// matches the filter, then the filter will never collect bucket ord 3. However, the long terms agg will call bucketAggregations(3)
|
||||
// on the filter aggregator anyway to build sub-aggregations.
|
||||
return 0L;
|
||||
return 0;
|
||||
} else {
|
||||
return docCounts.get(bucketOrd);
|
||||
}
|
||||
|
|
|
@ -82,7 +82,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
|
|||
continue;
|
||||
}
|
||||
final long bucketOrd = getBucketOrd(globalTermOrd);
|
||||
final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
|
||||
final int bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
|
||||
if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
import org.elasticsearch.ExceptionsHelper;
|
||||
import org.elasticsearch.common.lease.Releasables;
|
||||
import org.elasticsearch.common.text.Text;
|
||||
import org.elasticsearch.common.util.LongArray;
|
||||
import org.elasticsearch.common.util.IntArray;
|
||||
import org.elasticsearch.common.util.LongHash;
|
||||
import org.elasticsearch.index.fielddata.BytesValues;
|
||||
import org.elasticsearch.index.fielddata.ordinals.InternalGlobalOrdinalsBuilder.GlobalOrdinalMapping;
|
||||
|
@ -131,7 +131,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
continue;
|
||||
}
|
||||
final long bucketOrd = getBucketOrd(globalTermOrd);
|
||||
final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
|
||||
final int bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
|
||||
if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
|
||||
continue;
|
||||
}
|
||||
|
@ -260,15 +260,15 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
*/
|
||||
public static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
|
||||
|
||||
private final LongArray segmentDocCounts;
|
||||
private final IntArray segmentDocCounts;
|
||||
|
||||
private Ordinals.Docs segmentOrdinals;
|
||||
private LongArray current;
|
||||
private IntArray current;
|
||||
|
||||
public LowCardinality(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount,
|
||||
long maxOrd, InternalOrder order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode) {
|
||||
super(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, null, aggregationContext, parent, collectionMode);
|
||||
this.segmentDocCounts = bigArrays.newLongArray(maxOrd, true);
|
||||
this.segmentDocCounts = bigArrays.newIntArray(maxOrd, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -315,7 +315,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
// This is the cleanest way I can think of so far
|
||||
GlobalOrdinalMapping mapping = (GlobalOrdinalMapping) globalOrdinals;
|
||||
for (int i = 0; i < segmentDocCounts.size(); i++) {
|
||||
final long inc = segmentDocCounts.set(i, 0);
|
||||
final int inc = segmentDocCounts.set(i, 0);
|
||||
if (inc == 0) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
*/
|
||||
package org.elasticsearch.search.aggregations.bucket.terms;
|
||||
|
||||
import com.google.common.primitives.Longs;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
|
@ -161,9 +160,10 @@ class InternalOrder extends Terms.Order {
|
|||
return new Comparator<Terms.Bucket>() {
|
||||
@Override
|
||||
public int compare(Terms.Bucket o1, Terms.Bucket o2) {
|
||||
long v1 = ((SingleBucketAggregator) aggregator).bucketDocCount(((InternalTerms.Bucket) o1).bucketOrd);
|
||||
long v2 = ((SingleBucketAggregator) aggregator).bucketDocCount(((InternalTerms.Bucket) o2).bucketOrd);
|
||||
return asc ? Long.compare(v1, v2) : Long.compare(v2, v1);
|
||||
int mul = asc ? 1 : -1;
|
||||
int v1 = ((SingleBucketAggregator) aggregator).bucketDocCount(((InternalTerms.Bucket) o1).bucketOrd);
|
||||
int v2 = ((SingleBucketAggregator) aggregator).bucketDocCount(((InternalTerms.Bucket) o2).bucketOrd);
|
||||
return mul * (v1 - v2);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue