Changed the type of field docCounts to IntArray instead of LongArray, because a shard can't hold more than Integer.MAX_VALUE a LongArray just takes unnecessary space.

Closes #6529
This commit is contained in:
Martijn van Groningen 2014-06-17 10:53:43 +02:00
parent adb5c19849
commit cb9548f811
4 changed files with 18 additions and 18 deletions

View File

@ -19,7 +19,7 @@
package org.elasticsearch.search.aggregations.bucket;
import org.elasticsearch.common.lease.Releasable;
import org.elasticsearch.common.util.LongArray;
import org.elasticsearch.common.util.IntArray;
import org.elasticsearch.search.aggregations.*;
import org.elasticsearch.search.aggregations.support.AggregationContext;
@ -32,12 +32,12 @@ import java.util.Arrays;
*/
public abstract class BucketsAggregator extends Aggregator {
private LongArray docCounts;
private IntArray docCounts;
public BucketsAggregator(String name, BucketAggregationMode bucketAggregationMode, AggregatorFactories factories,
long estimatedBucketsCount, AggregationContext context, Aggregator parent) {
super(name, bucketAggregationMode, factories, estimatedBucketsCount, context, parent);
docCounts = bigArrays.newLongArray(estimatedBucketsCount, true);
docCounts = bigArrays.newIntArray(estimatedBucketsCount, true);
}
/**
@ -63,7 +63,7 @@ public abstract class BucketsAggregator extends Aggregator {
collectBucketNoCounts(doc, bucketOrd);
}
public LongArray getDocCounts() {
public IntArray getDocCounts() {
return docCounts;
}
@ -77,7 +77,7 @@ public abstract class BucketsAggregator extends Aggregator {
/**
* Utility method to increment the doc counts of the given bucket (identified by the bucket ordinal)
*/
protected final void incrementBucketDocCount(long inc, long bucketOrd) throws IOException {
protected final void incrementBucketDocCount(int inc, long bucketOrd) throws IOException {
docCounts = bigArrays.grow(docCounts, bucketOrd + 1);
docCounts.increment(bucketOrd, inc);
}
@ -85,13 +85,13 @@ public abstract class BucketsAggregator extends Aggregator {
/**
* Utility method to return the number of documents that fell in the given bucket (identified by the bucket ordinal)
*/
public final long bucketDocCount(long bucketOrd) {
public final int bucketDocCount(long bucketOrd) {
if (bucketOrd >= docCounts.size()) {
// This may happen eg. if no document in the highest buckets is accepted by a sub aggregator.
// For example, if there is a long terms agg on 3 terms 1,2,3 with a sub filter aggregator and if no document with 3 as a value
// matches the filter, then the filter will never collect bucket ord 3. However, the long terms agg will call bucketAggregations(3)
// on the filter aggregator anyway to build sub-aggregations.
return 0L;
return 0;
} else {
return docCounts.get(bucketOrd);
}

View File

@ -82,7 +82,7 @@ public class GlobalOrdinalsSignificantTermsAggregator extends GlobalOrdinalsStri
continue;
}
final long bucketOrd = getBucketOrd(globalTermOrd);
final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
final int bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
continue;
}

View File

@ -27,7 +27,7 @@ import org.apache.lucene.util.RamUsageEstimator;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.util.LongArray;
import org.elasticsearch.common.util.IntArray;
import org.elasticsearch.common.util.LongHash;
import org.elasticsearch.index.fielddata.BytesValues;
import org.elasticsearch.index.fielddata.ordinals.InternalGlobalOrdinalsBuilder.GlobalOrdinalMapping;
@ -131,7 +131,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
continue;
}
final long bucketOrd = getBucketOrd(globalTermOrd);
final long bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
final int bucketDocCount = bucketOrd < 0 ? 0 : bucketDocCount(bucketOrd);
if (bucketCountThresholds.getMinDocCount() > 0 && bucketDocCount == 0) {
continue;
}
@ -260,15 +260,15 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
*/
public static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
private final LongArray segmentDocCounts;
private final IntArray segmentDocCounts;
private Ordinals.Docs segmentOrdinals;
private LongArray current;
private IntArray current;
public LowCardinality(String name, AggregatorFactories factories, ValuesSource.Bytes.WithOrdinals.FieldData valuesSource, long estimatedBucketCount,
long maxOrd, InternalOrder order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode) {
super(name, factories, valuesSource, estimatedBucketCount, maxOrd, order, bucketCountThresholds, null, aggregationContext, parent, collectionMode);
this.segmentDocCounts = bigArrays.newLongArray(maxOrd, true);
this.segmentDocCounts = bigArrays.newIntArray(maxOrd, true);
}
@Override
@ -315,7 +315,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
// This is the cleanest way I can think of so far
GlobalOrdinalMapping mapping = (GlobalOrdinalMapping) globalOrdinals;
for (int i = 0; i < segmentDocCounts.size(); i++) {
final long inc = segmentDocCounts.set(i, 0);
final int inc = segmentDocCounts.set(i, 0);
if (inc == 0) {
continue;
}

View File

@ -18,7 +18,6 @@
*/
package org.elasticsearch.search.aggregations.bucket.terms;
import com.google.common.primitives.Longs;
import org.elasticsearch.Version;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
@ -161,9 +160,10 @@ class InternalOrder extends Terms.Order {
return new Comparator<Terms.Bucket>() {
@Override
public int compare(Terms.Bucket o1, Terms.Bucket o2) {
long v1 = ((SingleBucketAggregator) aggregator).bucketDocCount(((InternalTerms.Bucket) o1).bucketOrd);
long v2 = ((SingleBucketAggregator) aggregator).bucketDocCount(((InternalTerms.Bucket) o2).bucketOrd);
return asc ? Long.compare(v1, v2) : Long.compare(v2, v1);
int mul = asc ? 1 : -1;
int v1 = ((SingleBucketAggregator) aggregator).bucketDocCount(((InternalTerms.Bucket) o1).bucketOrd);
int v2 = ((SingleBucketAggregator) aggregator).bucketDocCount(((InternalTerms.Bucket) o2).bucketOrd);
return mul * (v1 - v2);
}
};
}