Aggregations: Merge LongTermsAggregator and DoubleTermsAggregator.

These two aggregators basically do exactly the same thing, they just interpret
bytes differently. This refactoring found an (unreleased) bug in the long terms
aggregator which didn't work correctly with duplicate values.

Close #7279
This commit is contained in:
Adrien Grand 2014-08-14 13:46:26 +02:00
parent d2ac95d93c
commit 0f63e0a8da
10 changed files with 409 additions and 169 deletions

View File

@ -20,10 +20,7 @@
package org.elasticsearch.index.fielddata;
import org.apache.lucene.index.*;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.*;
import org.elasticsearch.common.geo.GeoPoint;
import org.elasticsearch.common.lucene.Lucene;
@ -145,6 +142,56 @@ public enum FieldData {
};
}
/**
* Given a {@link SortedNumericDoubleValues}, return a {@link SortedNumericDocValues}
* instance that will translate double values to sortable long bits using
* {@link NumericUtils#doubleToSortableLong(double)}.
*/
public static SortedNumericDocValues toSortableLongBits(SortedNumericDoubleValues values) {
final NumericDoubleValues singleton = unwrapSingleton(values);
if (singleton != null) {
final NumericDocValues longBits;
if (singleton instanceof SortableLongBitsToNumericDoubleValues) {
longBits = ((SortableLongBitsToNumericDoubleValues) singleton).getLongValues();
} else {
longBits = new SortableLongBitsNumericDocValues(singleton);
}
final Bits docsWithField = unwrapSingletonBits(values);
return DocValues.singleton(longBits, docsWithField);
} else {
if (values instanceof SortableLongBitsToSortedNumericDoubleValues) {
return ((SortableLongBitsToSortedNumericDoubleValues) values).getLongValues();
} else {
return new SortableLongBitsSortedNumericDocValues(values);
}
}
}
/**
* Given a {@link SortedNumericDocValues}, return a {@link SortedNumericDoubleValues}
* instance that will translate long values to doubles using
* {@link NumericUtils#sortableLongToDouble(long)}.
*/
public static SortedNumericDoubleValues sortableLongBitsToDoubles(SortedNumericDocValues values) {
final NumericDocValues singleton = DocValues.unwrapSingleton(values);
if (singleton != null) {
final NumericDoubleValues doubles;
if (singleton instanceof SortableLongBitsNumericDocValues) {
doubles = ((SortableLongBitsNumericDocValues) singleton).getDoubleValues();
} else {
doubles = new SortableLongBitsToNumericDoubleValues(singleton);
}
final Bits docsWithField = DocValues.unwrapSingletonBits(values);
return singleton(doubles, docsWithField);
} else {
if (values instanceof SortableLongBitsSortedNumericDocValues) {
return ((SortableLongBitsSortedNumericDocValues) values).getDoubleValues();
} else {
return new SortableLongBitsToSortedNumericDoubleValues(values);
}
}
}
/**
* Wrap the provided {@link SortedNumericDocValues} instance to cast all values to doubles.
*/

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.util.NumericUtils;
/**
* {@link NumericDocValues} instance that wraps a {@link NumericDoubleValues}
* and converts the doubles to sortable long bits using
* {@link NumericUtils#doubleToSortableLong(double)}.
*/
final class SortableLongBitsNumericDocValues extends NumericDocValues {
private final NumericDoubleValues values;
SortableLongBitsNumericDocValues(NumericDoubleValues values) {
this.values = values;
}
@Override
public long get(int docID) {
return NumericUtils.doubleToSortableLong(values.get(docID));
}
/** Return the wrapped values. */
public NumericDoubleValues getDoubleValues() {
return values;
}
}

View File

@ -0,0 +1,57 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.util.NumericUtils;
/**
* {@link SortedNumericDocValues} instance that wraps a {@link SortedNumericDoubleValues}
* and converts the doubles to sortable long bits using
* {@link NumericUtils#doubleToSortableLong(double)}.
*/
final class SortableLongBitsSortedNumericDocValues extends SortedNumericDocValues {
private final SortedNumericDoubleValues values;
SortableLongBitsSortedNumericDocValues(SortedNumericDoubleValues values) {
this.values = values;
}
@Override
public void setDocument(int doc) {
values.setDocument(doc);
}
@Override
public long valueAt(int index) {
return NumericUtils.doubleToSortableLong(values.valueAt(index));
}
@Override
public int count() {
return values.count();
}
/** Return the wrapped values. */
public SortedNumericDoubleValues getDoubleValues() {
return values;
}
}

View File

@ -0,0 +1,48 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.util.NumericUtils;
/**
* {@link NumericDoubleValues} instance that wraps a {@link NumericDocValues}
* and converts the doubles to sortable long bits using
* {@link NumericUtils#sortableLongToDouble(long)}.
*/
final class SortableLongBitsToNumericDoubleValues extends NumericDoubleValues {
private final NumericDocValues values;
SortableLongBitsToNumericDoubleValues(NumericDocValues values) {
this.values = values;
}
@Override
public double get(int docID) {
return NumericUtils.sortableLongToDouble(values.get(docID));
}
/** Return the wrapped values. */
public NumericDocValues getLongValues() {
return values;
}
}

View File

@ -0,0 +1,58 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.util.NumericUtils;
/**
* {@link SortedNumericDoubleValues} instance that wraps a {@link SortedNumericDocValues}
* and converts the doubles to sortable long bits using
* {@link NumericUtils#sortableLongToDouble(long)}.
*/
final class SortableLongBitsToSortedNumericDoubleValues extends SortedNumericDoubleValues {
private final SortedNumericDocValues values;
SortableLongBitsToSortedNumericDoubleValues(SortedNumericDocValues values) {
this.values = values;
}
@Override
public void setDocument(int doc) {
values.setDocument(doc);
}
@Override
public double valueAt(int index) {
return NumericUtils.sortableLongToDouble(values.valueAt(index));
}
@Override
public int count() {
return values.count();
}
/** Return the wrapped values. */
public SortedNumericDocValues getLongValues() {
return values;
}
}

View File

@ -235,58 +235,10 @@ public class SortedNumericDVIndexFieldData extends DocValuesIndexFieldData imple
public SortedNumericDoubleValues getDoubleValues() {
try {
SortedNumericDocValues raw = DocValues.getSortedNumeric(reader, field);
NumericDocValues single = DocValues.unwrapSingleton(raw);
if (single != null) {
return FieldData.singleton(new SingleDoubleValues(single), DocValues.unwrapSingletonBits(raw));
} else {
return new MultiDoubleValues(raw);
}
return FieldData.sortableLongBitsToDoubles(raw);
} catch (IOException e) {
throw new ElasticsearchIllegalStateException("Cannot load doc values", e);
}
}
}
/**
* Wraps a NumericDocValues and exposes a single 64-bit double per document.
*/
static final class SingleDoubleValues extends NumericDoubleValues {
final NumericDocValues in;
SingleDoubleValues(NumericDocValues in) {
this.in = in;
}
@Override
public double get(int docID) {
return NumericUtils.sortableLongToDouble(in.get(docID));
}
}
/**
* Wraps a SortedNumericDocValues and exposes multiple 64-bit doubles per document.
*/
static final class MultiDoubleValues extends SortedNumericDoubleValues {
final SortedNumericDocValues in;
MultiDoubleValues(SortedNumericDocValues in) {
this.in = in;
}
@Override
public void setDocument(int doc) {
in.setDocument(doc);
}
@Override
public double valueAt(int index) {
return NumericUtils.sortableLongToDouble(in.valueAt(index));
}
@Override
public int count() {
return in.count();
}
}
}

View File

@ -18,139 +18,58 @@
*/
package org.elasticsearch.search.aggregations.bucket.terms;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.util.LongHash;
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
import org.elasticsearch.index.fielddata.FieldData;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories;
import org.elasticsearch.search.aggregations.bucket.terms.support.BucketPriorityQueue;
import org.elasticsearch.search.aggregations.support.AggregationContext;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.ValuesSource.Numeric;
import org.elasticsearch.search.aggregations.support.format.ValueFormat;
import org.elasticsearch.search.aggregations.support.format.ValueFormatter;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
/**
*
*/
public class DoubleTermsAggregator extends TermsAggregator {
private final ValuesSource.Numeric valuesSource;
private final ValueFormatter formatter;
private final LongHash bucketOrds;
private final boolean showTermDocCountError;
private SortedNumericDoubleValues values;
public class DoubleTermsAggregator extends LongTermsAggregator {
public DoubleTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, @Nullable ValueFormat format, long estimatedBucketCount,
InternalOrder order, BucketCountThresholds bucketCountThresholds, AggregationContext aggregationContext, Aggregator parent, SubAggCollectionMode collectionMode, boolean showTermDocCountError) {
super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent, bucketCountThresholds, order, collectionMode);
this.valuesSource = valuesSource;
this.showTermDocCountError = showTermDocCountError;
this.formatter = format != null ? format.formatter() : null;
bucketOrds = new LongHash(estimatedBucketCount, aggregationContext.bigArrays());
super(name, factories, valuesSource, format, estimatedBucketCount, order, bucketCountThresholds, aggregationContext, parent, collectionMode, showTermDocCountError);
}
@Override
public boolean shouldCollect() {
return true;
}
@Override
public void setNextReader(AtomicReaderContext reader) {
values = valuesSource.doubleValues();
}
@Override
public void collect(int doc, long owningBucketOrdinal) throws IOException {
assert owningBucketOrdinal == 0;
values.setDocument(doc);
final int valuesCount = values.count();
double previous = Double.NaN;
for (int i = 0; i < valuesCount; ++i) {
final double val = values.valueAt(i);
if (val != previous) {
final long bits = Double.doubleToRawLongBits(val);
long bucketOrdinal = bucketOrds.add(bits);
if (bucketOrdinal < 0) { // already seen
bucketOrdinal = - 1 - bucketOrdinal;
collectExistingBucket(doc, bucketOrdinal);
} else {
collectBucket(doc, bucketOrdinal);
}
previous = val;
}
}
protected SortedNumericDocValues getValues(Numeric valuesSource) {
return FieldData.toSortableLongBits(valuesSource.doubleValues());
}
@Override
public DoubleTerms buildAggregation(long owningBucketOrdinal) {
assert owningBucketOrdinal == 0;
if (bucketCountThresholds.getMinDocCount() == 0 && (order != InternalOrder.COUNT_DESC || bucketOrds.size() < bucketCountThresholds.getRequiredSize())) {
// we need to fill-in the blanks
for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) {
context.setNextReader(ctx);
final SortedNumericDoubleValues values = valuesSource.doubleValues();
for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
values.setDocument(docId);
final int valueCount = values.count();
for (int i = 0; i < valueCount; ++i) {
bucketOrds.add(Double.doubleToLongBits(values.valueAt(i)));
}
}
}
}
final int size = (int) Math.min(bucketOrds.size(), bucketCountThresholds.getShardSize());
BucketPriorityQueue ordered = new BucketPriorityQueue(size, order.comparator(this));
DoubleTerms.Bucket spare = null;
for (long i = 0; i < bucketOrds.size(); i++) {
if (spare == null) {
spare = new DoubleTerms.Bucket(0, 0, null, showTermDocCountError, 0);
}
spare.term = Double.longBitsToDouble(bucketOrds.get(i));
spare.docCount = bucketDocCount(i);
spare.bucketOrd = i;
if (bucketCountThresholds.getShardMinDocCount() <= spare.docCount) {
spare = (DoubleTerms.Bucket) ordered.insertWithOverflow(spare);
}
}
// Get the top buckets
final InternalTerms.Bucket[] list = new InternalTerms.Bucket[ordered.size()];
long survivingBucketOrds[] = new long[ordered.size()];
for (int i = ordered.size() - 1; i >= 0; --i) {
final DoubleTerms.Bucket bucket = (DoubleTerms.Bucket) ordered.pop();
survivingBucketOrds[i] = bucket.bucketOrd;
list[i] = bucket;
}
// replay any deferred collections
runDeferredCollections(survivingBucketOrds);
// Now build the aggs
for (int i = 0; i < list.length; i++) {
list[i].aggregations = bucketAggregations(list[i].bucketOrd);
list[i].docCountError = 0;
}
return new DoubleTerms(name, order, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Arrays.asList(list), showTermDocCountError, 0);
final LongTerms terms = (LongTerms) super.buildAggregation(owningBucketOrdinal);
return convertToDouble(terms);
}
@Override
public DoubleTerms buildEmptyAggregation() {
return new DoubleTerms(name, order, formatter, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getShardSize(), bucketCountThresholds.getMinDocCount(), Collections.<InternalTerms.Bucket>emptyList(), showTermDocCountError, 0);
final LongTerms terms = (LongTerms) super.buildEmptyAggregation();
return convertToDouble(terms);
}
@Override
public void doClose() {
Releasables.close(bucketOrds);
private static DoubleTerms.Bucket convertToDouble(InternalTerms.Bucket bucket) {
final long term = bucket.getKeyAsNumber().longValue();
final double value = NumericUtils.sortableLongToDouble(term);
return new DoubleTerms.Bucket(value, bucket.docCount, bucket.aggregations, bucket.showDocCountError, bucket.docCountError);
}
private static DoubleTerms convertToDouble(LongTerms terms) {
final InternalTerms.Bucket[] buckets = terms.getBuckets().toArray(new InternalTerms.Bucket[0]);
for (int i = 0; i < buckets.length; ++i) {
buckets[i] = convertToDouble(buckets[i]);
}
return new DoubleTerms(terms.getName(), terms.order, terms.formatter, terms.requiredSize, terms.shardSize, terms.minDocCount, Arrays.asList(buckets), terms.showTermDocCountError, terms.docCountError);
}
}

View File

@ -95,7 +95,7 @@ public class LongTerms extends InternalTerms {
}
}
private @Nullable ValueFormatter formatter;
@Nullable ValueFormatter formatter;
LongTerms() {} // for serialization

View File

@ -63,9 +63,13 @@ public class LongTermsAggregator extends TermsAggregator {
return true;
}
protected SortedNumericDocValues getValues(ValuesSource.Numeric valuesSource) {
return valuesSource.longValues();
}
@Override
public void setNextReader(AtomicReaderContext reader) {
values = valuesSource.longValues();
values = getValues(valuesSource);
}
@Override
@ -77,7 +81,7 @@ public class LongTermsAggregator extends TermsAggregator {
long previous = Long.MAX_VALUE;
for (int i = 0; i < valuesCount; ++i) {
final long val = values.valueAt(i);
if (previous != val || i != 0) {
if (previous != val || i == 0) {
long bucketOrdinal = bucketOrds.add(val);
if (bucketOrdinal < 0) { // already seen
bucketOrdinal = - 1 - bucketOrdinal;
@ -98,7 +102,7 @@ public class LongTermsAggregator extends TermsAggregator {
// we need to fill-in the blanks
for (AtomicReaderContext ctx : context.searchContext().searcher().getTopReaderContext().leaves()) {
context.setNextReader(ctx);
final SortedNumericDocValues values = valuesSource.longValues();
final SortedNumericDocValues values = getValues(valuesSource);
for (int docId = 0; docId < ctx.reader().maxDoc(); ++docId) {
values.setDocument(docId);
final int valueCount = values.count();

View File

@ -0,0 +1,107 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.fielddata;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.test.ElasticsearchTestCase;
public class FieldDataTests extends ElasticsearchTestCase {
public void testSortableLongBitsToDoubles() {
final double value = randomDouble();
final long valueBits = NumericUtils.doubleToSortableLong(value);
NumericDocValues values = new NumericDocValues() {
@Override
public long get(int docID) {
return valueBits;
}
};
SortedNumericDoubleValues asMultiDoubles = FieldData.sortableLongBitsToDoubles(DocValues.singleton(values, null));
NumericDoubleValues asDoubles = FieldData.unwrapSingleton(asMultiDoubles);
assertNotNull(asDoubles);
assertEquals(value, asDoubles.get(0), 0);
NumericDocValues backToLongs = DocValues.unwrapSingleton(FieldData.toSortableLongBits(asMultiDoubles));
assertSame(values, backToLongs);
SortedNumericDocValues multiValues = new SortedNumericDocValues() {
@Override
public long valueAt(int index) {
return valueBits;
}
@Override
public void setDocument(int doc) {
}
@Override
public int count() {
return 1;
}
};
asMultiDoubles = FieldData.sortableLongBitsToDoubles(multiValues);
assertEquals(value, asMultiDoubles.valueAt(0), 0);
assertSame(multiValues, FieldData.toSortableLongBits(asMultiDoubles));
}
public void testDoublesToSortableLongBits() {
final double value = randomDouble();
final long valueBits = NumericUtils.doubleToSortableLong(value);
NumericDoubleValues values = new NumericDoubleValues() {
@Override
public double get(int docID) {
return value;
}
};
SortedNumericDocValues asMultiLongs = FieldData.toSortableLongBits(FieldData.singleton(values, null));
NumericDocValues asLongs = DocValues.unwrapSingleton(asMultiLongs);
assertNotNull(asLongs);
assertEquals(valueBits, asLongs.get(0));
SortedNumericDoubleValues multiValues = new SortedNumericDoubleValues() {
@Override
public double valueAt(int index) {
return value;
}
@Override
public void setDocument(int doc) {
}
@Override
public int count() {
return 1;
}
};
asMultiLongs = FieldData.toSortableLongBits(multiValues);
assertEquals(valueBits, asMultiLongs.valueAt(0));
assertSame(multiValues, FieldData.sortableLongBitsToDoubles(asMultiLongs));
}
}