diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregator.java index af9f063743f..5494ae8fb7c 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/histogram/HistogramAggregator.java @@ -41,8 +41,6 @@ import java.util.List; public class HistogramAggregator extends BucketsAggregator { - private final static int INITIAL_CAPACITY = 50; // TODO sizing - private final NumericValuesSource valuesSource; private final Rounding rounding; private final InternalOrder order; @@ -59,11 +57,12 @@ public class HistogramAggregator extends BucketsAggregator { boolean keyed, boolean computeEmptyBuckets, @Nullable NumericValuesSource valuesSource, + long initialCapacity, AbstractHistogramBase.Factory histogramFactory, AggregationContext aggregationContext, Aggregator parent) { - super(name, BucketAggregationMode.PER_BUCKET, factories, 50, aggregationContext, parent); + super(name, BucketAggregationMode.PER_BUCKET, factories, initialCapacity, aggregationContext, parent); this.valuesSource = valuesSource; this.rounding = rounding; this.order = order; @@ -71,7 +70,7 @@ public class HistogramAggregator extends BucketsAggregator { this.computeEmptyBuckets = computeEmptyBuckets; this.histogramFactory = histogramFactory; - bucketOrds = new LongHash(INITIAL_CAPACITY); + bucketOrds = new LongHash(initialCapacity); } @Override @@ -152,12 +151,13 @@ public class HistogramAggregator extends BucketsAggregator { @Override protected Aggregator createUnmapped(AggregationContext aggregationContext, Aggregator parent) { - return new HistogramAggregator(name, factories, rounding, order, keyed, computeEmptyBuckets, null, histogramFactory, aggregationContext, parent); + return new HistogramAggregator(name, factories, rounding, order, keyed, computeEmptyBuckets, null, 0, histogramFactory, aggregationContext, parent); } @Override protected Aggregator create(NumericValuesSource valuesSource, long expectedBucketsCount, AggregationContext aggregationContext, Aggregator parent) { - return new HistogramAggregator(name, factories, rounding, order, keyed, computeEmptyBuckets, valuesSource, histogramFactory, aggregationContext, parent); + // todo if we'll keep track of min/max values in IndexFieldData, we could use the max here to come up with a better estimation for the buckets count + return new HistogramAggregator(name, factories, rounding, order, keyed, computeEmptyBuckets, valuesSource, 50, histogramFactory, aggregationContext, parent); } } diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/range/geodistance/GeoDistanceParser.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/range/geodistance/GeoDistanceParser.java index 15f2a61fa5c..b72889e98c4 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/range/geodistance/GeoDistanceParser.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/range/geodistance/GeoDistanceParser.java @@ -225,8 +225,11 @@ public class GeoDistanceParser implements Aggregator.Parser { @Override protected Aggregator create(final GeoPointValuesSource valuesSource, long expectedBucketsCount, AggregationContext aggregationContext, Aggregator parent) { final DistanceValues distanceValues = new DistanceValues(valuesSource, distanceType, origin, unit); - FieldDataSource.Numeric distanceSource = new DistanceSource(distanceValues); - distanceSource = new FieldDataSource.Numeric.SortedAndUnique(distanceSource); + FieldDataSource.Numeric distanceSource = new DistanceSource(distanceValues, valuesSource.metaData()); + if (distanceSource.metaData().multiValued()) { + // we need to ensure uniqueness + distanceSource = new FieldDataSource.Numeric.SortedAndUnique(distanceSource); + } final NumericValuesSource numericSource = new NumericValuesSource(distanceSource, null, null); return new RangeAggregator(name, factories, numericSource, rangeFactory, ranges, keyed, aggregationContext, parent); } @@ -264,9 +267,17 @@ public class GeoDistanceParser implements Aggregator.Parser { private static class DistanceSource extends FieldDataSource.Numeric { private final DoubleValues values; + private final MetaData metaData; - public DistanceSource(DoubleValues values) { + public DistanceSource(DoubleValues values, MetaData metaData) { this.values = values; + // even if the geo points are unique, there's no guarantee the distances are + this.metaData = MetaData.builder(metaData).uniqueness(MetaData.Uniqueness.UNKNOWN).build(); + } + + @Override + public MetaData metaData() { + return metaData; } @Override diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTermsAggregator.java index 54825bcec8c..6945307e4cc 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/DoubleTermsAggregator.java @@ -38,22 +38,20 @@ import java.util.Collections; */ public class DoubleTermsAggregator extends BucketsAggregator { - private static final int INITIAL_CAPACITY = 50; // TODO sizing - private final InternalOrder order; private final int requiredSize; private final int shardSize; private final NumericValuesSource valuesSource; private final LongHash bucketOrds; - public DoubleTermsAggregator(String name, AggregatorFactories factories, NumericValuesSource valuesSource, + public DoubleTermsAggregator(String name, AggregatorFactories factories, NumericValuesSource valuesSource, long estimatedBucketCount, InternalOrder order, int requiredSize, int shardSize, AggregationContext aggregationContext, Aggregator parent) { - super(name, BucketAggregationMode.PER_BUCKET, factories, INITIAL_CAPACITY, aggregationContext, parent); + super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent); this.valuesSource = valuesSource; this.order = order; this.requiredSize = requiredSize; this.shardSize = shardSize; - bucketOrds = new LongHash(INITIAL_CAPACITY); + bucketOrds = new LongHash(estimatedBucketCount); } @Override diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTermsAggregator.java index 24327db27da..ba3df4aafe2 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongTermsAggregator.java @@ -38,22 +38,20 @@ import java.util.Collections; */ public class LongTermsAggregator extends BucketsAggregator { - private static final int INITIAL_CAPACITY = 50; // TODO sizing - private final InternalOrder order; private final int requiredSize; private final int shardSize; private final NumericValuesSource valuesSource; private final LongHash bucketOrds; - public LongTermsAggregator(String name, AggregatorFactories factories, NumericValuesSource valuesSource, + public LongTermsAggregator(String name, AggregatorFactories factories, NumericValuesSource valuesSource, long estimatedBucketCount, InternalOrder order, int requiredSize, int shardSize, AggregationContext aggregationContext, Aggregator parent) { - super(name, BucketAggregationMode.PER_BUCKET, factories, INITIAL_CAPACITY, aggregationContext, parent); + super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent); this.valuesSource = valuesSource; this.order = order; this.requiredSize = requiredSize; this.shardSize = shardSize; - bucketOrds = new LongHash(INITIAL_CAPACITY); + bucketOrds = new LongHash(estimatedBucketCount); } @Override diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java index b8ceece6732..cd272197197 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java @@ -45,8 +45,6 @@ import java.util.Collections; */ public class StringTermsAggregator extends BucketsAggregator { - private static final int INITIAL_CAPACITY = 50; // TODO sizing - private final ValuesSource valuesSource; private final InternalOrder order; private final int requiredSize; @@ -54,11 +52,11 @@ public class StringTermsAggregator extends BucketsAggregator { protected final BytesRefHash bucketOrds; private final IncludeExclude includeExclude; - public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource, + public StringTermsAggregator(String name, AggregatorFactories factories, ValuesSource valuesSource, long estimatedBucketCount, InternalOrder order, int requiredSize, int shardSize, IncludeExclude includeExclude, AggregationContext aggregationContext, Aggregator parent) { - super(name, BucketAggregationMode.PER_BUCKET, factories, INITIAL_CAPACITY, aggregationContext, parent); + super(name, BucketAggregationMode.PER_BUCKET, factories, estimatedBucketCount, aggregationContext, parent); this.valuesSource = valuesSource; this.order = order; this.requiredSize = requiredSize; @@ -145,9 +143,9 @@ public class StringTermsAggregator extends BucketsAggregator { private Ordinals.Docs ordinals; private LongArray ordinalToBucket; - public WithOrdinals(String name, AggregatorFactories factories, BytesValuesSource.WithOrdinals valuesSource, InternalOrder order, int requiredSize, - int shardSize, AggregationContext aggregationContext, Aggregator parent) { - super(name, factories, valuesSource, order, requiredSize, shardSize, null, aggregationContext, parent); + public WithOrdinals(String name, AggregatorFactories factories, BytesValuesSource.WithOrdinals valuesSource, long esitmatedBucketCount, + InternalOrder order, int requiredSize, int shardSize, AggregationContext aggregationContext, Aggregator parent) { + super(name, factories, valuesSource, esitmatedBucketCount, order, requiredSize, shardSize, null, aggregationContext, parent); this.valuesSource = valuesSource; } @@ -164,7 +162,7 @@ public class StringTermsAggregator extends BucketsAggregator { @Override public void collect(int doc, long owningBucketOrdinal) throws IOException { - assert owningBucketOrdinal == 0; + assert owningBucketOrdinal == 0 : "this is a per_bucket aggregator"; final int valuesCount = ordinals.setDocument(doc); for (int i = 0; i < valuesCount; ++i) { diff --git a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index 246c07d29a5..bd8b02399e0 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -71,6 +71,20 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory { @Override protected Aggregator create(ValuesSource valuesSource, long expectedBucketsCount, AggregationContext aggregationContext, Aggregator parent) { + long estimatedBucketCount = valuesSource.metaData().maxAtomicUniqueValuesCount(); + if (estimatedBucketCount < 0) { + // there isn't an estimation available.. 50 should be a good start + estimatedBucketCount = 50; + } + + // adding an upper bound on the estimation as some atomic field data in the future (binary doc values) and not + // going to know their exact cardinality and will return upper bounds in AtomicFieldData.getNumberUniqueValues() + // that may be largely over-estimated.. the value chosen here is arbitrary just to play nice with typical CPU cache + // + // Another reason is that it may be faster to resize upon growth than to start directly with the appropriate size. + // And that all values are not necessarily visited by the matches. + estimatedBucketCount = Math.min(estimatedBucketCount, 512); + if (valuesSource instanceof BytesValuesSource) { if (executionHint != null && !executionHint.equals(EXECUTION_HINT_VALUE_MAP) && !executionHint.equals(EXECUTION_HINT_VALUE_ORDINALS)) { throw new ElasticSearchIllegalArgumentException("execution_hint can only be '" + EXECUTION_HINT_VALUE_MAP + "' or '" + EXECUTION_HINT_VALUE_ORDINALS + "', not " + executionHint); @@ -93,11 +107,12 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory { if (execution.equals(EXECUTION_HINT_VALUE_ORDINALS)) { assert includeExclude == null; - final StringTermsAggregator.WithOrdinals aggregator = new StringTermsAggregator.WithOrdinals(name, factories, (BytesValuesSource.WithOrdinals) valuesSource, order, requiredSize, shardSize, aggregationContext, parent); + final StringTermsAggregator.WithOrdinals aggregator = new StringTermsAggregator.WithOrdinals(name, + factories, (BytesValuesSource.WithOrdinals) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, aggregationContext, parent); aggregationContext.registerReaderContextAware(aggregator); return aggregator; } else { - return new StringTermsAggregator(name, factories, valuesSource, order, requiredSize, shardSize, includeExclude, aggregationContext, parent); + return new StringTermsAggregator(name, factories, valuesSource, estimatedBucketCount, order, requiredSize, shardSize, includeExclude, aggregationContext, parent); } } @@ -108,9 +123,9 @@ public class TermsAggregatorFactory extends ValueSourceAggregatorFactory { if (valuesSource instanceof NumericValuesSource) { if (((NumericValuesSource) valuesSource).isFloatingPoint()) { - return new DoubleTermsAggregator(name, factories, (NumericValuesSource) valuesSource, order, requiredSize, shardSize, aggregationContext, parent); + return new DoubleTermsAggregator(name, factories, (NumericValuesSource) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, aggregationContext, parent); } - return new LongTermsAggregator(name, factories, (NumericValuesSource) valuesSource, order, requiredSize, shardSize, aggregationContext, parent); + return new LongTermsAggregator(name, factories, (NumericValuesSource) valuesSource, estimatedBucketCount, order, requiredSize, shardSize, aggregationContext, parent); } throw new AggregationExecutionException("terms aggregation cannot be applied to field [" + valuesSourceConfig.fieldContext().field() + diff --git a/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java b/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java index 16b3093cda9..b7aa841d4e3 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java +++ b/src/main/java/org/elasticsearch/search/aggregations/support/AggregationContext.java @@ -31,7 +31,6 @@ import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.IndexGeoPointFieldData; import org.elasticsearch.index.fielddata.IndexNumericFieldData; import org.elasticsearch.search.aggregations.AggregationExecutionException; -import org.elasticsearch.search.aggregations.support.FieldDataSource.Uniqueness; import org.elasticsearch.search.aggregations.support.bytes.BytesValuesSource; import org.elasticsearch.search.aggregations.support.geopoints.GeoPointValuesSource; import org.elasticsearch.search.aggregations.support.numeric.NumericValuesSource; @@ -139,7 +138,8 @@ public class AggregationContext implements ReaderContextAware, ScorerAware { private NumericValuesSource numericField(ObjectObjectOpenHashMap fieldDataSources, ValuesSourceConfig config) { FieldDataSource.Numeric dataSource = (FieldDataSource.Numeric) fieldDataSources.get(config.fieldContext.field()); if (dataSource == null) { - dataSource = new FieldDataSource.Numeric.FieldData((IndexNumericFieldData) config.fieldContext.indexFieldData()); + FieldDataSource.MetaData metaData = FieldDataSource.MetaData.load(config.fieldContext.indexFieldData(), searchContext); + dataSource = new FieldDataSource.Numeric.FieldData((IndexNumericFieldData) config.fieldContext.indexFieldData(), metaData); setReaderIfNeeded((ReaderContextAware) dataSource); readerAwares.add((ReaderContextAware) dataSource); fieldDataSources.put(config.fieldContext.field(), dataSource); @@ -166,10 +166,11 @@ public class AggregationContext implements ReaderContextAware, ScorerAware { FieldDataSource dataSource = fieldDataSources.get(config.fieldContext.field()); if (dataSource == null) { final IndexFieldData indexFieldData = config.fieldContext.indexFieldData(); - if (indexFieldData instanceof IndexFieldData.WithOrdinals) { - dataSource = new FieldDataSource.Bytes.WithOrdinals.FieldData((IndexFieldData.WithOrdinals) indexFieldData); + FieldDataSource.MetaData metaData = FieldDataSource.MetaData.load(config.fieldContext.indexFieldData(), searchContext); + if (indexFieldData instanceof IndexFieldData.WithOrdinals) { + dataSource = new FieldDataSource.Bytes.WithOrdinals.FieldData((IndexFieldData.WithOrdinals) indexFieldData, metaData); } else { - dataSource = new FieldDataSource.Bytes.FieldData(indexFieldData); + dataSource = new FieldDataSource.Bytes.FieldData(indexFieldData, metaData); } setReaderIfNeeded((ReaderContextAware) dataSource); readerAwares.add((ReaderContextAware) dataSource); @@ -185,7 +186,7 @@ public class AggregationContext implements ReaderContextAware, ScorerAware { // Even in case we wrap field data, we might still need to wrap for sorting, because the wrapped field data might be // eg. a numeric field data that doesn't sort according to the byte order. However field data values are unique so no // need to wrap for uniqueness - if ((config.ensureUnique && dataSource.getUniqueness() != Uniqueness.UNIQUE) || config.ensureSorted) { + if ((config.ensureUnique && !dataSource.metaData().uniqueness().unique()) || config.ensureSorted) { dataSource = new FieldDataSource.Bytes.SortedAndUnique(dataSource); readerAwares.add((ReaderContextAware) dataSource); } @@ -216,7 +217,8 @@ public class AggregationContext implements ReaderContextAware, ScorerAware { private GeoPointValuesSource geoPointField(ObjectObjectOpenHashMap fieldDataSources, ValuesSourceConfig config) { FieldDataSource.GeoPoint dataSource = (FieldDataSource.GeoPoint) fieldDataSources.get(config.fieldContext.field()); if (dataSource == null) { - dataSource = new FieldDataSource.GeoPoint((IndexGeoPointFieldData) config.fieldContext.indexFieldData()); + FieldDataSource.MetaData metaData = FieldDataSource.MetaData.load(config.fieldContext.indexFieldData(), searchContext); + dataSource = new FieldDataSource.GeoPoint((IndexGeoPointFieldData) config.fieldContext.indexFieldData(), metaData); setReaderIfNeeded(dataSource); readerAwares.add(dataSource); fieldDataSources.put(config.fieldContext.field(), dataSource); diff --git a/src/main/java/org/elasticsearch/search/aggregations/support/FieldDataSource.java b/src/main/java/org/elasticsearch/search/aggregations/support/FieldDataSource.java index ac7c121ea55..5424c985455 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/support/FieldDataSource.java +++ b/src/main/java/org/elasticsearch/search/aggregations/support/FieldDataSource.java @@ -32,30 +32,119 @@ import org.elasticsearch.search.aggregations.support.FieldDataSource.Bytes.Sorte import org.elasticsearch.search.aggregations.support.bytes.ScriptBytesValues; import org.elasticsearch.search.aggregations.support.numeric.ScriptDoubleValues; import org.elasticsearch.search.aggregations.support.numeric.ScriptLongValues; +import org.elasticsearch.search.internal.SearchContext; -/** - * - */ public abstract class FieldDataSource { - /** Whether values are unique or not per document. */ - public enum Uniqueness { - UNIQUE, - NOT_UNIQUE, - UNKNOWN; + public static class MetaData { + + public static final MetaData UNKNOWN = new MetaData(); + + public enum Uniqueness { + UNIQUE, + NOT_UNIQUE, + UNKNOWN; + + public boolean unique() { + return this == UNIQUE; + } + } + + private long maxAtomicUniqueValuesCount = -1; + private boolean multiValued = true; + private Uniqueness uniqueness = Uniqueness.UNKNOWN; + + private MetaData() {} + + private MetaData(MetaData other) { + this.maxAtomicUniqueValuesCount = other.maxAtomicUniqueValuesCount; + this.multiValued = other.multiValued; + this.uniqueness = other.uniqueness; + } + + private MetaData(long maxAtomicUniqueValuesCount, boolean multiValued, Uniqueness uniqueness) { + this.maxAtomicUniqueValuesCount = maxAtomicUniqueValuesCount; + this.multiValued = multiValued; + this.uniqueness = uniqueness; + } + + public long maxAtomicUniqueValuesCount() { + return maxAtomicUniqueValuesCount; + } + + public boolean multiValued() { + return multiValued; + } + + public Uniqueness uniqueness() { + return uniqueness; + } + + public static MetaData load(IndexFieldData indexFieldData, SearchContext context) { + MetaData metaData = new MetaData(); + metaData.uniqueness = Uniqueness.UNIQUE; + for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) { + AtomicFieldData fieldData = indexFieldData.load(readerContext); + metaData.multiValued |= fieldData.isMultiValued(); + metaData.maxAtomicUniqueValuesCount = Math.max(metaData.maxAtomicUniqueValuesCount, fieldData.getNumberUniqueValues()); + } + return metaData; + } + + public static Builder builder() { + return new Builder(); + } + + public static Builder builder(MetaData other) { + return new Builder(other); + } + + public static class Builder { + + private final MetaData metaData; + + private Builder() { + metaData = new MetaData(); + } + + private Builder(MetaData metaData) { + this.metaData = new MetaData(metaData); + } + + public Builder maxAtomicUniqueValuesCount(long maxAtomicUniqueValuesCount) { + metaData.maxAtomicUniqueValuesCount = maxAtomicUniqueValuesCount; + return this; + } + + public Builder multiValued(boolean multiValued) { + metaData.multiValued = multiValued; + return this; + } + + public Builder uniqueness(Uniqueness uniqueness) { + metaData.uniqueness = uniqueness; + return this; + } + + public MetaData build() { + return metaData; + } + } + } - /** Return whether values are unique. */ - public Uniqueness getUniqueness() { - return Uniqueness.UNKNOWN; - } - - /** Get the current {@link BytesValues}. */ + /** + * Get the current {@link BytesValues}. + */ public abstract BytesValues bytesValues(); - /** Ask the underlying data source to provide pre-computed hashes, optional operation. */ + /** + * Ask the underlying data source to provide pre-computed hashes, optional operation. + */ public void setNeedsHashes(boolean needsHashes) {} + public abstract MetaData metaData(); + public static abstract class Bytes extends FieldDataSource { public static abstract class WithOrdinals extends Bytes { @@ -66,17 +155,19 @@ public abstract class FieldDataSource { protected boolean needsHashes; protected final IndexFieldData.WithOrdinals indexFieldData; + protected final MetaData metaData; protected AtomicFieldData.WithOrdinals atomicFieldData; private BytesValues.WithOrdinals bytesValues; - public FieldData(IndexFieldData.WithOrdinals indexFieldData) { + public FieldData(IndexFieldData.WithOrdinals indexFieldData, MetaData metaData) { this.indexFieldData = indexFieldData; + this.metaData = metaData; needsHashes = false; } @Override - public Uniqueness getUniqueness() { - return Uniqueness.UNIQUE; + public MetaData metaData() { + return metaData; } public final void setNeedsHashes(boolean needsHashes) { @@ -107,17 +198,19 @@ public abstract class FieldDataSource { protected boolean needsHashes; protected final IndexFieldData indexFieldData; + protected final MetaData metaData; protected AtomicFieldData atomicFieldData; private BytesValues bytesValues; - public FieldData(IndexFieldData indexFieldData) { + public FieldData(IndexFieldData indexFieldData, MetaData metaData) { this.indexFieldData = indexFieldData; + this.metaData = metaData; needsHashes = false; } @Override - public Uniqueness getUniqueness() { - return Uniqueness.UNIQUE; + public MetaData metaData() { + return metaData; } public final void setNeedsHashes(boolean needsHashes) { @@ -149,25 +242,31 @@ public abstract class FieldDataSource { values = new ScriptBytesValues(script); } + @Override + public MetaData metaData() { + return MetaData.UNKNOWN; + } + @Override public org.elasticsearch.index.fielddata.BytesValues bytesValues() { return values; } - } public static class SortedAndUnique extends Bytes implements ReaderContextAware { private final FieldDataSource delegate; + private final MetaData metaData; private BytesValues bytesValues; public SortedAndUnique(FieldDataSource delegate) { this.delegate = delegate; + this.metaData = MetaData.builder(delegate.metaData()).uniqueness(MetaData.Uniqueness.UNIQUE).build(); } @Override - public Uniqueness getUniqueness() { - return Uniqueness.UNIQUE; + public MetaData metaData() { + return metaData; } @Override @@ -180,7 +279,7 @@ public abstract class FieldDataSource { if (bytesValues == null) { bytesValues = delegate.bytesValues(); if (bytesValues.isMultiValued() && - (delegate.getUniqueness() != Uniqueness.UNIQUE || bytesValues.getOrder() != Order.BYTES)) { + (!delegate.metaData().uniqueness.unique() || bytesValues.getOrder() != Order.BYTES)) { bytesValues = new SortedUniqueBytesValues(bytesValues); } } @@ -254,13 +353,11 @@ public abstract class FieldDataSource { public static class WithScript extends Numeric { - private final Numeric delegate; private final LongValues longValues; private final DoubleValues doubleValues; private final FieldDataSource.WithScript.BytesValues bytesValues; public WithScript(Numeric delegate, SearchScript script) { - this.delegate = delegate; this.longValues = new LongValues(delegate, script); this.doubleValues = new DoubleValues(delegate, script); this.bytesValues = new FieldDataSource.WithScript.BytesValues(delegate, script); @@ -286,6 +383,11 @@ public abstract class FieldDataSource { return doubleValues; } + @Override + public MetaData metaData() { + return MetaData.UNKNOWN; + } + static class LongValues extends org.elasticsearch.index.fielddata.LongValues { private final Numeric source; @@ -337,19 +439,21 @@ public abstract class FieldDataSource { protected boolean needsHashes; protected final IndexNumericFieldData indexFieldData; + protected final MetaData metaData; protected AtomicNumericFieldData atomicFieldData; private BytesValues bytesValues; private LongValues longValues; private DoubleValues doubleValues; - public FieldData(IndexNumericFieldData indexFieldData) { + public FieldData(IndexNumericFieldData indexFieldData, MetaData metaData) { this.indexFieldData = indexFieldData; + this.metaData = metaData; needsHashes = false; } @Override - public Uniqueness getUniqueness() { - return Uniqueness.UNIQUE; + public MetaData metaData() { + return metaData; } @Override @@ -417,6 +521,11 @@ public abstract class FieldDataSource { bytesValues = new ScriptBytesValues(script); } + @Override + public MetaData metaData() { + return MetaData.UNKNOWN; + } + @Override public boolean isFloatingPoint() { return scriptValueType != null ? scriptValueType.isFloatingPoint() : true; @@ -442,17 +551,19 @@ public abstract class FieldDataSource { public static class SortedAndUnique extends Numeric implements ReaderContextAware { private final Numeric delegate; + private final MetaData metaData; private LongValues longValues; private DoubleValues doubleValues; private BytesValues bytesValues; public SortedAndUnique(Numeric delegate) { this.delegate = delegate; + this.metaData = MetaData.builder(delegate.metaData()).uniqueness(MetaData.Uniqueness.UNIQUE).build(); } @Override - public Uniqueness getUniqueness() { - return Uniqueness.UNIQUE; + public MetaData metaData() { + return metaData; } @Override @@ -472,7 +583,7 @@ public abstract class FieldDataSource { if (longValues == null) { longValues = delegate.longValues(); if (longValues.isMultiValued() && - (delegate.getUniqueness() != Uniqueness.UNIQUE || longValues.getOrder() != Order.NUMERIC)) { + (!delegate.metaData().uniqueness.unique() || longValues.getOrder() != Order.NUMERIC)) { longValues = new SortedUniqueLongValues(longValues); } } @@ -484,7 +595,7 @@ public abstract class FieldDataSource { if (doubleValues == null) { doubleValues = delegate.doubleValues(); if (doubleValues.isMultiValued() && - (delegate.getUniqueness() != Uniqueness.UNIQUE || doubleValues.getOrder() != Order.NUMERIC)) { + (!delegate.metaData().uniqueness.unique() || doubleValues.getOrder() != Order.NUMERIC)) { doubleValues = new SortedUniqueDoubleValues(doubleValues); } } @@ -496,7 +607,7 @@ public abstract class FieldDataSource { if (bytesValues == null) { bytesValues = delegate.bytesValues(); if (bytesValues.isMultiValued() && - (delegate.getUniqueness() != Uniqueness.UNIQUE || bytesValues.getOrder() != Order.BYTES)) { + (!delegate.metaData().uniqueness.unique() || bytesValues.getOrder() != Order.BYTES)) { bytesValues = new SortedUniqueBytesValues(bytesValues); } } @@ -632,6 +743,11 @@ public abstract class FieldDataSource { this.bytesValues = new BytesValues(delegate, script); } + @Override + public MetaData metaData() { + return MetaData.UNKNOWN; + } + @Override public BytesValues bytesValues() { return bytesValues; @@ -669,18 +785,20 @@ public abstract class FieldDataSource { protected boolean needsHashes; protected final IndexGeoPointFieldData indexFieldData; + private final MetaData metaData; protected AtomicGeoPointFieldData atomicFieldData; private BytesValues bytesValues; private GeoPointValues geoPointValues; - public GeoPoint(IndexGeoPointFieldData indexFieldData) { + public GeoPoint(IndexGeoPointFieldData indexFieldData, MetaData metaData) { this.indexFieldData = indexFieldData; + this.metaData = metaData; needsHashes = false; } @Override - public Uniqueness getUniqueness() { - return Uniqueness.UNIQUE; + public MetaData metaData() { + return metaData; } @Override diff --git a/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java b/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java index db188dbe364..c23427e6661 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java +++ b/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java @@ -26,6 +26,8 @@ import org.elasticsearch.index.fielddata.BytesValues; */ public interface ValuesSource { + FieldDataSource.MetaData metaData(); + /** * @return A {@link org.apache.lucene.util.BytesRef bytesref} view over the values that are resolved from this value source. */ diff --git a/src/main/java/org/elasticsearch/search/aggregations/support/bytes/BytesValuesSource.java b/src/main/java/org/elasticsearch/search/aggregations/support/bytes/BytesValuesSource.java index 7f47c703a5e..3f0cb294db8 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/support/bytes/BytesValuesSource.java +++ b/src/main/java/org/elasticsearch/search/aggregations/support/bytes/BytesValuesSource.java @@ -34,6 +34,11 @@ public class BytesValuesSource implements ValuesSource { this.source = source; } + @Override + public FieldDataSource.MetaData metaData() { + return source.metaData(); + } + @Override public BytesValues bytesValues() { return source.bytesValues(); diff --git a/src/main/java/org/elasticsearch/search/aggregations/support/geopoints/GeoPointValuesSource.java b/src/main/java/org/elasticsearch/search/aggregations/support/geopoints/GeoPointValuesSource.java index 2134897df46..f902777e9f8 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/support/geopoints/GeoPointValuesSource.java +++ b/src/main/java/org/elasticsearch/search/aggregations/support/geopoints/GeoPointValuesSource.java @@ -35,6 +35,11 @@ public final class GeoPointValuesSource implements ValuesSource { this.source = source; } + @Override + public FieldDataSource.MetaData metaData() { + return source.metaData(); + } + @Override public BytesValues bytesValues() { return source.bytesValues(); diff --git a/src/main/java/org/elasticsearch/search/aggregations/support/numeric/NumericValuesSource.java b/src/main/java/org/elasticsearch/search/aggregations/support/numeric/NumericValuesSource.java index e01bf478c00..cbddadfa162 100644 --- a/src/main/java/org/elasticsearch/search/aggregations/support/numeric/NumericValuesSource.java +++ b/src/main/java/org/elasticsearch/search/aggregations/support/numeric/NumericValuesSource.java @@ -41,6 +41,11 @@ public final class NumericValuesSource implements ValuesSource { this.parser = parser; } + @Override + public FieldDataSource.MetaData metaData() { + return source.metaData(); + } + @Override public BytesValues bytesValues() { return source.bytesValues(); diff --git a/src/test/java/org/elasticsearch/search/aggregations/bucket/GeoDistanceTests.java b/src/test/java/org/elasticsearch/search/aggregations/bucket/GeoDistanceTests.java index 5d8924e027e..1b946ea3ac7 100644 --- a/src/test/java/org/elasticsearch/search/aggregations/bucket/GeoDistanceTests.java +++ b/src/test/java/org/elasticsearch/search/aggregations/bucket/GeoDistanceTests.java @@ -60,13 +60,15 @@ public class GeoDistanceTests extends ElasticsearchIntegrationTest { .build(); } - private IndexRequestBuilder indexCity(String name, String latLon) throws Exception { + private IndexRequestBuilder indexCity(String idx, String name, String... latLons) throws Exception { XContentBuilder source = jsonBuilder().startObject().field("city", name); - if (latLon != null) { - source = source.field("location", latLon); + source.startArray("location"); + for (int i = 0; i < latLons.length; i++) { + source.value(latLons[i]); } + source.endArray(); source = source.endObject(); - return client().prepareIndex("idx", "type").setSource(source); + return client().prepareIndex(idx, "type").setSource(source); } @Before @@ -75,27 +77,45 @@ public class GeoDistanceTests extends ElasticsearchIntegrationTest { .addMapping("type", "location", "type=geo_point", "city", "type=string,index=not_analyzed") .execute().actionGet(); + prepareCreate("idx-multi") + .addMapping("type", "location", "type=geo_point", "city", "type=string,index=not_analyzed") + .execute().actionGet(); + createIndex("idx_unmapped"); List cities = new ArrayList(); cities.addAll(Arrays.asList( // below 500km - indexCity("utrecht", "52.0945, 5.116"), - indexCity("haarlem", "52.3890, 4.637"), + indexCity("idx", "utrecht", "52.0945, 5.116"), + indexCity("idx", "haarlem", "52.3890, 4.637"), // above 500km, below 1000km - indexCity("berlin", "52.540, 13.409"), - indexCity("prague", "50.086, 14.439"), + indexCity("idx", "berlin", "52.540, 13.409"), + indexCity("idx", "prague", "50.097679, 14.441314"), // above 1000km - indexCity("tel-aviv", "32.0741, 34.777"))); + indexCity("idx", "tel-aviv", "32.0741, 34.777"))); + + // random cities with no location + for (String cityName : Arrays.asList("london", "singapour", "tokyo", "milan")) { + if (randomBoolean()) { + cities.add(indexCity("idx", cityName)); + } + } + indexRandom(true, cities); + + cities.clear(); + cities.addAll(Arrays.asList( + indexCity("idx-multi", "city1", "52.3890, 4.637", "50.097679,14.441314"), // first point is within the ~17.5km, the second is ~710km + indexCity("idx-multi", "city2", "52.540, 13.409", "52.0945, 5.116"), // first point is ~576km, the second is within the ~35km + indexCity("idx-multi", "city3", "32.0741, 34.777"))); // above 1000km // random cities with no location for (String cityName : Arrays.asList("london", "singapour", "tokyo", "milan")) { if (randomBoolean() || true) { - cities.add(indexCity(cityName, null)); + cities.add(indexCity("idx-multi", cityName)); } } - indexRandom(true, cities); + ensureSearchable(); } @@ -370,6 +390,50 @@ public class GeoDistanceTests extends ElasticsearchIntegrationTest { assertThat(geoDistance.buckets().get(0).getFrom(), equalTo(0.0)); assertThat(geoDistance.buckets().get(0).getTo(), equalTo(100.0)); assertThat(geoDistance.buckets().get(0).getDocCount(), equalTo(0l)); - } + + @Test + public void multiValues() throws Exception { + SearchResponse response = client().prepareSearch("idx-multi") + .addAggregation(geoDistance("amsterdam_rings") + .field("location") + .unit(DistanceUnit.KILOMETERS) + .distanceType(org.elasticsearch.common.geo.GeoDistance.ARC) + .point("52.3760, 4.894") // coords of amsterdam + .addUnboundedTo(500) + .addRange(500, 1000) + .addUnboundedFrom(1000)) + .execute().actionGet(); + + assertSearchResponse(response); + + GeoDistance geoDist = response.getAggregations().get("amsterdam_rings"); + assertThat(geoDist, notNullValue()); + assertThat(geoDist.getName(), equalTo("amsterdam_rings")); + assertThat(geoDist.buckets().size(), equalTo(3)); + + GeoDistance.Bucket bucket = geoDist.getByKey("*-500.0"); + assertThat(bucket, notNullValue()); + assertThat(bucket.getKey(), equalTo("*-500.0")); + assertThat(bucket.getFrom(), equalTo(0.0)); + assertThat(bucket.getTo(), equalTo(500.0)); + assertThat(bucket.getDocCount(), equalTo(2l)); + + bucket = geoDist.getByKey("500.0-1000.0"); + assertThat(bucket, notNullValue()); + assertThat(bucket.getKey(), equalTo("500.0-1000.0")); + assertThat(bucket.getFrom(), equalTo(500.0)); + assertThat(bucket.getTo(), equalTo(1000.0)); + assertThat(bucket.getDocCount(), equalTo(2l)); + + bucket = geoDist.getByKey("1000.0-*"); + assertThat(bucket, notNullValue()); + assertThat(bucket.getKey(), equalTo("1000.0-*")); + assertThat(bucket.getFrom(), equalTo(1000.0)); + assertThat(bucket.getTo(), equalTo(Double.POSITIVE_INFINITY)); + assertThat(bucket.getDocCount(), equalTo(1l)); + } + + + }