Fix illegal cast of the "low cardinality" optimization of the `terms` aggregation. (#27543)
The GlobalOrdinalsStringTermsAggregator.LowCardinality aggregator casts global values to `GlobalOrdinalMapping`, even though the implementation of global values is different when a `missing` value is configured. This commit adds a new API that gives access to the ordinal remapping in order to fix this problem.
This commit is contained in:
parent
996990ad1f
commit
d01fcee645
|
@ -29,7 +29,7 @@ import java.io.IOException;
|
||||||
/**
|
/**
|
||||||
* A {@link SortedSetDocValues} implementation that returns ordinals that are global.
|
* A {@link SortedSetDocValues} implementation that returns ordinals that are global.
|
||||||
*/
|
*/
|
||||||
public class GlobalOrdinalMapping extends SortedSetDocValues {
|
final class GlobalOrdinalMapping extends SortedSetDocValues {
|
||||||
|
|
||||||
private final SortedSetDocValues values;
|
private final SortedSetDocValues values;
|
||||||
private final OrdinalMap ordinalMap;
|
private final OrdinalMap ordinalMap;
|
||||||
|
@ -49,7 +49,7 @@ public class GlobalOrdinalMapping extends SortedSetDocValues {
|
||||||
return ordinalMap.getValueCount();
|
return ordinalMap.getValueCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
public final long getGlobalOrd(long segmentOrd) {
|
public long getGlobalOrd(long segmentOrd) {
|
||||||
return mapping.get(segmentOrd);
|
return mapping.get(segmentOrd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.search.LeafCollector;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
|
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
|
||||||
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
|
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
|
||||||
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalMapping;
|
|
||||||
import org.elasticsearch.search.aggregations.support.ValuesSource;
|
import org.elasticsearch.search.aggregations.support.ValuesSource;
|
||||||
import org.elasticsearch.search.sort.SortOrder;
|
import org.elasticsearch.search.sort.SortOrder;
|
||||||
|
|
||||||
|
@ -179,16 +178,10 @@ abstract class CompositeValuesSource<VS extends ValuesSource, T extends Comparab
|
||||||
if (lookup == null) {
|
if (lookup == null) {
|
||||||
lookup = dvs;
|
lookup = dvs;
|
||||||
if (topValue != null && topValueLong == null) {
|
if (topValue != null && topValueLong == null) {
|
||||||
if (lookup instanceof GlobalOrdinalMapping) {
|
topValueLong = lookup.lookupTerm(topValue);
|
||||||
// Find the global ordinal (or the insertion point) for the provided top value.
|
if (topValueLong < 0) {
|
||||||
topValueLong = lookupGlobalOrdinals((GlobalOrdinalMapping) lookup, topValue);
|
// convert negative insert position
|
||||||
} else {
|
topValueLong = -topValueLong - 2;
|
||||||
// Global ordinals are not needed, switch back to ordinals (single segment case).
|
|
||||||
topValueLong = lookup.lookupTerm(topValue);
|
|
||||||
if (topValueLong < 0) {
|
|
||||||
// convert negative insert position
|
|
||||||
topValueLong = -topValueLong - 2;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -202,25 +195,6 @@ abstract class CompositeValuesSource<VS extends ValuesSource, T extends Comparab
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
private static long lookupGlobalOrdinals(GlobalOrdinalMapping mapping, BytesRef key) throws IOException {
|
|
||||||
long low = 0;
|
|
||||||
long high = mapping.getValueCount();
|
|
||||||
|
|
||||||
while (low <= high) {
|
|
||||||
long mid = (low + high) >>> 1;
|
|
||||||
BytesRef midVal = mapping.lookupOrd(mid);
|
|
||||||
int cmp = midVal.compareTo(key);
|
|
||||||
if (cmp < 0) {
|
|
||||||
low = mid + 1;
|
|
||||||
} else if (cmp > 0) {
|
|
||||||
high = mid - 1;
|
|
||||||
} else {
|
|
||||||
return mid;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return low-1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -33,7 +33,6 @@ import org.elasticsearch.common.util.IntArray;
|
||||||
import org.elasticsearch.common.util.LongHash;
|
import org.elasticsearch.common.util.LongHash;
|
||||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||||
import org.elasticsearch.index.fielddata.AbstractSortedSetDocValues;
|
import org.elasticsearch.index.fielddata.AbstractSortedSetDocValues;
|
||||||
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalMapping;
|
|
||||||
import org.elasticsearch.search.DocValueFormat;
|
import org.elasticsearch.search.DocValueFormat;
|
||||||
import org.elasticsearch.search.aggregations.Aggregator;
|
import org.elasticsearch.search.aggregations.Aggregator;
|
||||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||||
|
@ -50,6 +49,7 @@ import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.function.LongUnaryOperator;
|
||||||
|
|
||||||
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
|
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
|
||||||
|
|
||||||
|
@ -295,9 +295,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||||
*/
|
*/
|
||||||
static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
|
static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
|
||||||
|
|
||||||
|
private LongUnaryOperator mapping;
|
||||||
private IntArray segmentDocCounts;
|
private IntArray segmentDocCounts;
|
||||||
private SortedSetDocValues globalOrds;
|
|
||||||
private SortedSetDocValues segmentOrds;
|
|
||||||
|
|
||||||
LowCardinality(String name,
|
LowCardinality(String name,
|
||||||
AggregatorFactories factories,
|
AggregatorFactories factories,
|
||||||
|
@ -321,14 +320,14 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||||
@Override
|
@Override
|
||||||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
|
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
|
||||||
final LeafBucketCollector sub) throws IOException {
|
final LeafBucketCollector sub) throws IOException {
|
||||||
if (segmentOrds != null) {
|
if (mapping != null) {
|
||||||
mapSegmentCountsToGlobalCounts();
|
mapSegmentCountsToGlobalCounts(mapping);
|
||||||
}
|
}
|
||||||
globalOrds = valuesSource.globalOrdinalsValues(ctx);
|
final SortedSetDocValues segmentOrds = valuesSource.ordinalsValues(ctx);
|
||||||
segmentOrds = valuesSource.ordinalsValues(ctx);
|
|
||||||
segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + segmentOrds.getValueCount());
|
segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + segmentOrds.getValueCount());
|
||||||
assert sub == LeafBucketCollector.NO_OP_COLLECTOR;
|
assert sub == LeafBucketCollector.NO_OP_COLLECTOR;
|
||||||
final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds);
|
final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds);
|
||||||
|
mapping = valuesSource.globalOrdinalsMapping(ctx);
|
||||||
if (singleValues != null) {
|
if (singleValues != null) {
|
||||||
return new LeafBucketCollectorBase(sub, segmentOrds) {
|
return new LeafBucketCollectorBase(sub, segmentOrds) {
|
||||||
@Override
|
@Override
|
||||||
|
@ -356,9 +355,10 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void doPostCollection() {
|
protected void doPostCollection() throws IOException {
|
||||||
if (segmentOrds != null) {
|
if (mapping != null) {
|
||||||
mapSegmentCountsToGlobalCounts();
|
mapSegmentCountsToGlobalCounts(mapping);
|
||||||
|
mapping = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -367,16 +367,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||||
Releasables.close(segmentDocCounts);
|
Releasables.close(segmentDocCounts);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void mapSegmentCountsToGlobalCounts() {
|
private void mapSegmentCountsToGlobalCounts(LongUnaryOperator mapping) throws IOException {
|
||||||
// There is no public method in Ordinals.Docs that allows for this mapping...
|
|
||||||
// This is the cleanest way I can think of so far
|
|
||||||
|
|
||||||
GlobalOrdinalMapping mapping;
|
|
||||||
if (globalOrds.getValueCount() == segmentOrds.getValueCount()) {
|
|
||||||
mapping = null;
|
|
||||||
} else {
|
|
||||||
mapping = (GlobalOrdinalMapping) globalOrds;
|
|
||||||
}
|
|
||||||
for (long i = 1; i < segmentDocCounts.size(); i++) {
|
for (long i = 1; i < segmentDocCounts.size(); i++) {
|
||||||
// We use set(...) here, because we need to reset the slow to 0.
|
// We use set(...) here, because we need to reset the slow to 0.
|
||||||
// segmentDocCounts get reused over the segments and otherwise counts would be too high.
|
// segmentDocCounts get reused over the segments and otherwise counts would be too high.
|
||||||
|
@ -385,7 +376,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
final long ord = i - 1; // remember we do +1 when counting
|
final long ord = i - 1; // remember we do +1 when counting
|
||||||
final long globalOrd = mapping == null ? ord : mapping.getGlobalOrd(ord);
|
final long globalOrd = mapping.applyAsLong(ord);
|
||||||
long bucketOrd = getBucketOrd(globalOrd);
|
long bucketOrd = getBucketOrd(globalOrd);
|
||||||
incrementBucketDocCount(bucketOrd, inc);
|
incrementBucketDocCount(bucketOrd, inc);
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,6 +49,8 @@ import java.util.Map;
|
||||||
public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource, TermsAggregatorFactory> {
|
public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource, TermsAggregatorFactory> {
|
||||||
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(TermsAggregatorFactory.class));
|
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(TermsAggregatorFactory.class));
|
||||||
|
|
||||||
|
static Boolean REMAP_GLOBAL_ORDS, COLLECT_SEGMENT_ORDS;
|
||||||
|
|
||||||
private final BucketOrder order;
|
private final BucketOrder order;
|
||||||
private final IncludeExclude includeExclude;
|
private final IncludeExclude includeExclude;
|
||||||
private final String executionHint;
|
private final String executionHint;
|
||||||
|
@ -257,11 +259,13 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
||||||
|
|
||||||
final long maxOrd = getMaxOrd(valuesSource, context.searcher());
|
final long maxOrd = getMaxOrd(valuesSource, context.searcher());
|
||||||
assert maxOrd != -1;
|
assert maxOrd != -1;
|
||||||
final double ratio = maxOrd / ((double) context.searcher().getIndexReader().numDocs());
|
final double ratio = maxOrd / ((double) context.searcher().getIndexReader().numDocs());
|
||||||
|
|
||||||
if (factories == AggregatorFactories.EMPTY &&
|
if (factories == AggregatorFactories.EMPTY &&
|
||||||
includeExclude == null &&
|
includeExclude == null &&
|
||||||
Aggregator.descendsFromBucketAggregator(parent) == false &&
|
Aggregator.descendsFromBucketAggregator(parent) == false &&
|
||||||
ratio <= 0.5 && maxOrd <= 2048) {
|
// we use the static COLLECT_SEGMENT_ORDS to allow tests to force specific optimizations
|
||||||
|
(COLLECT_SEGMENT_ORDS!= null ? COLLECT_SEGMENT_ORDS.booleanValue() : ratio <= 0.5 && maxOrd <= 2048)) {
|
||||||
/**
|
/**
|
||||||
* We can use the low cardinality execution mode iff this aggregator:
|
* We can use the low cardinality execution mode iff this aggregator:
|
||||||
* - has no sub-aggregator AND
|
* - has no sub-aggregator AND
|
||||||
|
@ -276,18 +280,24 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
||||||
|
|
||||||
}
|
}
|
||||||
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
|
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
|
||||||
boolean remapGlobalOrds = true;
|
boolean remapGlobalOrds;
|
||||||
if (includeExclude == null &&
|
if (REMAP_GLOBAL_ORDS != null) {
|
||||||
Aggregator.descendsFromBucketAggregator(parent) == false &&
|
// We use REMAP_GLOBAL_ORDS to allow tests to force specific optimizations
|
||||||
(factories == AggregatorFactories.EMPTY ||
|
remapGlobalOrds = REMAP_GLOBAL_ORDS.booleanValue();
|
||||||
(isAggregationSort(order) == false && subAggCollectMode == SubAggCollectionMode.BREADTH_FIRST))) {
|
} else {
|
||||||
/**
|
remapGlobalOrds = true;
|
||||||
* We don't need to remap global ords iff this aggregator:
|
if (includeExclude == null &&
|
||||||
* - has no include/exclude rules AND
|
Aggregator.descendsFromBucketAggregator(parent) == false &&
|
||||||
* - is not a child of a bucket aggregator AND
|
(factories == AggregatorFactories.EMPTY ||
|
||||||
* - has no sub-aggregator or only sub-aggregator that can be deferred ({@link SubAggCollectionMode#BREADTH_FIRST}).
|
(isAggregationSort(order) == false && subAggCollectMode == SubAggCollectionMode.BREADTH_FIRST))) {
|
||||||
**/
|
/**
|
||||||
remapGlobalOrds = false;
|
* We don't need to remap global ords iff this aggregator:
|
||||||
|
* - has no include/exclude rules AND
|
||||||
|
* - is not a child of a bucket aggregator AND
|
||||||
|
* - has no sub-aggregator or only sub-aggregator that can be deferred ({@link SubAggCollectionMode#BREADTH_FIRST}).
|
||||||
|
**/
|
||||||
|
remapGlobalOrds = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order,
|
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order,
|
||||||
format, bucketCountThresholds, filter, context, parent, remapGlobalOrds, subAggCollectMode, showTermDocCountError,
|
format, bucketCountThresholds, filter, context, parent, remapGlobalOrds, subAggCollectMode, showTermDocCountError,
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
|
||||||
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
|
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.function.LongUnaryOperator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility class that allows to return views of {@link ValuesSource}s that
|
* Utility class that allows to return views of {@link ValuesSource}s that
|
||||||
|
@ -201,6 +202,13 @@ public enum MissingValues {
|
||||||
SortedSetDocValues values = valuesSource.globalOrdinalsValues(context);
|
SortedSetDocValues values = valuesSource.globalOrdinalsValues(context);
|
||||||
return replaceMissing(values, missing);
|
return replaceMissing(values, missing);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context) throws IOException {
|
||||||
|
return getGlobalMapping(valuesSource.ordinalsValues(context),
|
||||||
|
valuesSource.globalOrdinalsValues(context),
|
||||||
|
valuesSource.globalOrdinalsMapping(context), missing);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -311,6 +319,43 @@ public enum MissingValues {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static LongUnaryOperator getGlobalMapping(SortedSetDocValues values, SortedSetDocValues globalValues,
|
||||||
|
LongUnaryOperator segmentToGlobalOrd, BytesRef missing) throws IOException {
|
||||||
|
final long missingGlobalOrd = globalValues.lookupTerm(missing);
|
||||||
|
final long missingSegmentOrd = values.lookupTerm(missing);
|
||||||
|
|
||||||
|
if (missingSegmentOrd >= 0) {
|
||||||
|
// the missing value exists in the segment, nothing to do
|
||||||
|
return segmentToGlobalOrd;
|
||||||
|
} else if (missingGlobalOrd >= 0) {
|
||||||
|
// the missing value exists in another segment, but not the current one
|
||||||
|
final long insertedSegmentOrd = -1L - missingSegmentOrd;
|
||||||
|
final long insertedGlobalOrd = missingGlobalOrd;
|
||||||
|
return segmentOrd -> {
|
||||||
|
if (insertedSegmentOrd == segmentOrd) {
|
||||||
|
return insertedGlobalOrd;
|
||||||
|
} else if (insertedSegmentOrd > segmentOrd) {
|
||||||
|
return segmentToGlobalOrd.applyAsLong(segmentOrd);
|
||||||
|
} else {
|
||||||
|
return segmentToGlobalOrd.applyAsLong(segmentOrd - 1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
// the missing value exists neither in this segment nor in another segment
|
||||||
|
final long insertedSegmentOrd = -1L - missingSegmentOrd;
|
||||||
|
final long insertedGlobalOrd = -1L - missingGlobalOrd;
|
||||||
|
return segmentOrd -> {
|
||||||
|
if (insertedSegmentOrd == segmentOrd) {
|
||||||
|
return insertedGlobalOrd;
|
||||||
|
} else if (insertedSegmentOrd > segmentOrd) {
|
||||||
|
return segmentToGlobalOrd.applyAsLong(segmentOrd);
|
||||||
|
} else {
|
||||||
|
return 1 + segmentToGlobalOrd.applyAsLong(segmentOrd - 1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static ValuesSource.GeoPoint replaceMissing(final ValuesSource.GeoPoint valuesSource, final GeoPoint missing) {
|
public static ValuesSource.GeoPoint replaceMissing(final ValuesSource.GeoPoint valuesSource, final GeoPoint missing) {
|
||||||
return new ValuesSource.GeoPoint() {
|
return new ValuesSource.GeoPoint() {
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.DocValues;
|
import org.apache.lucene.index.DocValues;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
|
import org.apache.lucene.index.OrdinalMap;
|
||||||
import org.apache.lucene.index.SortedNumericDocValues;
|
import org.apache.lucene.index.SortedNumericDocValues;
|
||||||
import org.apache.lucene.index.SortedSetDocValues;
|
import org.apache.lucene.index.SortedSetDocValues;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
@ -47,6 +48,7 @@ import org.elasticsearch.search.aggregations.support.values.ScriptDoubleValues;
|
||||||
import org.elasticsearch.search.aggregations.support.values.ScriptLongValues;
|
import org.elasticsearch.search.aggregations.support.values.ScriptLongValues;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.function.LongUnaryOperator;
|
||||||
|
|
||||||
public abstract class ValuesSource {
|
public abstract class ValuesSource {
|
||||||
|
|
||||||
|
@ -90,6 +92,11 @@ public abstract class ValuesSource {
|
||||||
return org.elasticsearch.index.fielddata.FieldData.emptySortedBinary();
|
return org.elasticsearch.index.fielddata.FieldData.emptySortedBinary();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context) throws IOException {
|
||||||
|
return LongUnaryOperator.identity();
|
||||||
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -105,6 +112,10 @@ public abstract class ValuesSource {
|
||||||
public abstract SortedSetDocValues globalOrdinalsValues(LeafReaderContext context)
|
public abstract SortedSetDocValues globalOrdinalsValues(LeafReaderContext context)
|
||||||
throws IOException;
|
throws IOException;
|
||||||
|
|
||||||
|
/** Returns a mapping from segment ordinals to global ordinals. */
|
||||||
|
public abstract LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context)
|
||||||
|
throws IOException;
|
||||||
|
|
||||||
public long globalMaxOrd(IndexSearcher indexSearcher) throws IOException {
|
public long globalMaxOrd(IndexSearcher indexSearcher) throws IOException {
|
||||||
IndexReader indexReader = indexSearcher.getIndexReader();
|
IndexReader indexReader = indexSearcher.getIndexReader();
|
||||||
if (indexReader.leaves().isEmpty()) {
|
if (indexReader.leaves().isEmpty()) {
|
||||||
|
@ -142,6 +153,18 @@ public abstract class ValuesSource {
|
||||||
final AtomicOrdinalsFieldData atomicFieldData = global.load(context);
|
final AtomicOrdinalsFieldData atomicFieldData = global.load(context);
|
||||||
return atomicFieldData.getOrdinalsValues();
|
return atomicFieldData.getOrdinalsValues();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context) throws IOException {
|
||||||
|
final IndexOrdinalsFieldData global = indexFieldData.loadGlobal((DirectoryReader)context.parent.reader());
|
||||||
|
final OrdinalMap map = global.getOrdinalMap();
|
||||||
|
if (map == null) {
|
||||||
|
// segments and global ordinals are the same
|
||||||
|
return LongUnaryOperator.identity();
|
||||||
|
}
|
||||||
|
final org.apache.lucene.util.LongValues segmentToGlobalOrd = map.getGlobalOrds(context.ord);
|
||||||
|
return segmentToGlobalOrd::get;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
* specific language governing permissions and limitations
|
* specific language governing permissions and limitations
|
||||||
* under the License.
|
* under the License.
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.search.aggregations.bucket;
|
package org.elasticsearch.search.aggregations.bucket.terms;
|
||||||
|
|
||||||
import org.elasticsearch.ElasticsearchException;
|
import org.elasticsearch.ElasticsearchException;
|
||||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||||
|
@ -34,9 +34,11 @@ import org.elasticsearch.search.aggregations.AggregationExecutionException;
|
||||||
import org.elasticsearch.search.aggregations.AggregationTestScriptsPlugin;
|
import org.elasticsearch.search.aggregations.AggregationTestScriptsPlugin;
|
||||||
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
|
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
|
||||||
import org.elasticsearch.search.aggregations.BucketOrder;
|
import org.elasticsearch.search.aggregations.BucketOrder;
|
||||||
|
import org.elasticsearch.search.aggregations.bucket.AbstractTermsTestCase;
|
||||||
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
|
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
|
||||||
import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude;
|
import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude;
|
||||||
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
|
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
|
||||||
|
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory;
|
||||||
import org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket;
|
import org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket;
|
||||||
import org.elasticsearch.search.aggregations.metrics.avg.Avg;
|
import org.elasticsearch.search.aggregations.metrics.avg.Avg;
|
||||||
import org.elasticsearch.search.aggregations.metrics.stats.Stats;
|
import org.elasticsearch.search.aggregations.metrics.stats.Stats;
|
||||||
|
@ -44,6 +46,8 @@ import org.elasticsearch.search.aggregations.metrics.stats.extended.ExtendedStat
|
||||||
import org.elasticsearch.search.aggregations.metrics.sum.Sum;
|
import org.elasticsearch.search.aggregations.metrics.sum.Sum;
|
||||||
import org.elasticsearch.test.ESIntegTestCase;
|
import org.elasticsearch.test.ESIntegTestCase;
|
||||||
import org.hamcrest.Matchers;
|
import org.hamcrest.Matchers;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Before;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -84,6 +88,18 @@ public class StringTermsIT extends AbstractTermsTestCase {
|
||||||
return Collections.singleton(CustomScriptPlugin.class);
|
return Collections.singleton(CustomScriptPlugin.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void randomizeOptimizations() {
|
||||||
|
TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = false;randomBoolean();
|
||||||
|
TermsAggregatorFactory.REMAP_GLOBAL_ORDS = randomBoolean();
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void resetOptimizations() {
|
||||||
|
TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = null;
|
||||||
|
TermsAggregatorFactory.REMAP_GLOBAL_ORDS = null;
|
||||||
|
}
|
||||||
|
|
||||||
public static class CustomScriptPlugin extends AggregationTestScriptsPlugin {
|
public static class CustomScriptPlugin extends AggregationTestScriptsPlugin {
|
||||||
|
|
||||||
@Override
|
@Override
|
|
@ -38,6 +38,7 @@ import org.apache.lucene.util.NumericUtils;
|
||||||
import org.elasticsearch.common.network.InetAddresses;
|
import org.elasticsearch.common.network.InetAddresses;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.util.MockBigArrays;
|
import org.elasticsearch.common.util.MockBigArrays;
|
||||||
|
import org.elasticsearch.index.IndexSettings;
|
||||||
import org.elasticsearch.index.mapper.IpFieldMapper;
|
import org.elasticsearch.index.mapper.IpFieldMapper;
|
||||||
import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
||||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||||
|
@ -68,7 +69,27 @@ import java.util.function.Function;
|
||||||
import static org.hamcrest.Matchers.instanceOf;
|
import static org.hamcrest.Matchers.instanceOf;
|
||||||
|
|
||||||
public class TermsAggregatorTests extends AggregatorTestCase {
|
public class TermsAggregatorTests extends AggregatorTestCase {
|
||||||
|
|
||||||
|
private boolean randomizeAggregatorImpl = true;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected <A extends Aggregator> A createAggregator(AggregationBuilder aggregationBuilder,
|
||||||
|
IndexSearcher indexSearcher, IndexSettings indexSettings, MappedFieldType... fieldTypes) throws IOException {
|
||||||
|
try {
|
||||||
|
if (randomizeAggregatorImpl) {
|
||||||
|
TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = randomBoolean();
|
||||||
|
TermsAggregatorFactory.REMAP_GLOBAL_ORDS = randomBoolean();
|
||||||
|
}
|
||||||
|
return super.createAggregator(aggregationBuilder, indexSearcher, indexSettings, fieldTypes);
|
||||||
|
} finally {
|
||||||
|
TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = null;
|
||||||
|
TermsAggregatorFactory.REMAP_GLOBAL_ORDS = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testGlobalOrdinalsExecutionHint() throws Exception {
|
public void testGlobalOrdinalsExecutionHint() throws Exception {
|
||||||
|
randomizeAggregatorImpl = false;
|
||||||
|
|
||||||
Directory directory = newDirectory();
|
Directory directory = newDirectory();
|
||||||
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
|
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
|
||||||
indexWriter.close();
|
indexWriter.close();
|
||||||
|
|
|
@ -38,6 +38,7 @@ import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.function.LongUnaryOperator;
|
||||||
|
|
||||||
public class MissingValuesTests extends ESTestCase {
|
public class MissingValuesTests extends ESTestCase {
|
||||||
|
|
||||||
|
@ -111,7 +112,7 @@ public class MissingValuesTests extends ESTestCase {
|
||||||
ords[i][j] = TestUtil.nextInt(random(), ords[i][j], maxOrd - 1);
|
ords[i][j] = TestUtil.nextInt(random(), ords[i][j], maxOrd - 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
SortedSetDocValues asRandomAccessOrds = new AbstractSortedSetDocValues() {
|
SortedSetDocValues asSortedSet = new AbstractSortedSetDocValues() {
|
||||||
|
|
||||||
int doc = -1;
|
int doc = -1;
|
||||||
int i;
|
int i;
|
||||||
|
@ -147,7 +148,7 @@ public class MissingValuesTests extends ESTestCase {
|
||||||
final BytesRef missingMissing = new BytesRef(RandomStrings.randomAsciiOfLength(random(), 5));
|
final BytesRef missingMissing = new BytesRef(RandomStrings.randomAsciiOfLength(random(), 5));
|
||||||
|
|
||||||
for (BytesRef missing : Arrays.asList(existingMissing, missingMissing)) {
|
for (BytesRef missing : Arrays.asList(existingMissing, missingMissing)) {
|
||||||
SortedSetDocValues withMissingReplaced = MissingValues.replaceMissing(asRandomAccessOrds, missing);
|
SortedSetDocValues withMissingReplaced = MissingValues.replaceMissing(asSortedSet, missing);
|
||||||
if (valueSet.contains(missing)) {
|
if (valueSet.contains(missing)) {
|
||||||
assertEquals(values.length, withMissingReplaced.getValueCount());
|
assertEquals(values.length, withMissingReplaced.getValueCount());
|
||||||
} else {
|
} else {
|
||||||
|
@ -169,6 +170,84 @@ public class MissingValuesTests extends ESTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testGlobalMapping() throws IOException {
|
||||||
|
final int numOrds = TestUtil.nextInt(random(), 1, 10);
|
||||||
|
final int numGlobalOrds = TestUtil.nextInt(random(), numOrds, numOrds + 3);
|
||||||
|
|
||||||
|
final Set<BytesRef> valueSet = new HashSet<>();
|
||||||
|
while (valueSet.size() < numOrds) {
|
||||||
|
valueSet.add(new BytesRef(RandomStrings.randomAsciiLettersOfLength(random(), 5)));
|
||||||
|
}
|
||||||
|
final BytesRef[] values = valueSet.toArray(new BytesRef[0]);
|
||||||
|
Arrays.sort(values);
|
||||||
|
|
||||||
|
final Set<BytesRef> globalValueSet = new HashSet<>(valueSet);
|
||||||
|
while (globalValueSet.size() < numGlobalOrds) {
|
||||||
|
globalValueSet.add(new BytesRef(RandomStrings.randomAsciiLettersOfLength(random(), 5)));
|
||||||
|
}
|
||||||
|
final BytesRef[] globalValues = globalValueSet.toArray(new BytesRef[0]);
|
||||||
|
Arrays.sort(globalValues);
|
||||||
|
|
||||||
|
// exists in the current segment
|
||||||
|
BytesRef missing = RandomPicks.randomFrom(random(), values);
|
||||||
|
doTestGlobalMapping(values, globalValues, missing);
|
||||||
|
|
||||||
|
// missing in all segments
|
||||||
|
do {
|
||||||
|
missing = new BytesRef(RandomStrings.randomAsciiLettersOfLength(random(), 5));
|
||||||
|
} while (globalValueSet.contains(missing));
|
||||||
|
doTestGlobalMapping(values, globalValues, missing);
|
||||||
|
|
||||||
|
if (globalValueSet.size() > valueSet.size()) {
|
||||||
|
// exists in other segments only
|
||||||
|
Set<BytesRef> other = new HashSet<>(globalValueSet);
|
||||||
|
other.removeAll(valueSet);
|
||||||
|
missing = RandomPicks.randomFrom(random(), other.toArray(new BytesRef[0]));
|
||||||
|
doTestGlobalMapping(values, globalValues, missing);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doTestGlobalMapping(BytesRef[] values, BytesRef[] globalValues, BytesRef missing) throws IOException {
|
||||||
|
LongUnaryOperator segmentToGlobalOrd = segmentOrd -> Arrays.binarySearch(globalValues, values[Math.toIntExact(segmentOrd)]);
|
||||||
|
SortedSetDocValues sortedValues = asOrds(values);
|
||||||
|
SortedSetDocValues sortedGlobalValues = asOrds(globalValues);
|
||||||
|
|
||||||
|
LongUnaryOperator withMissingSegmentToGlobalOrd = MissingValues.getGlobalMapping(
|
||||||
|
sortedValues, sortedGlobalValues, segmentToGlobalOrd, missing);
|
||||||
|
SortedSetDocValues withMissingValues = MissingValues.replaceMissing(sortedValues, missing);
|
||||||
|
SortedSetDocValues withMissingGlobalValues = MissingValues.replaceMissing(sortedGlobalValues, missing);
|
||||||
|
|
||||||
|
for (long segmentOrd = 0; segmentOrd < withMissingValues.getValueCount(); ++segmentOrd) {
|
||||||
|
long expectedGlobalOrd = withMissingSegmentToGlobalOrd.applyAsLong(segmentOrd);
|
||||||
|
assertEquals(withMissingValues.lookupOrd(segmentOrd), withMissingGlobalValues.lookupOrd(expectedGlobalOrd));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static SortedSetDocValues asOrds(BytesRef[] values) {
|
||||||
|
return new AbstractSortedSetDocValues() {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean advanceExact(int target) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long nextOrd() throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BytesRef lookupOrd(long ord) throws IOException {
|
||||||
|
return values[Math.toIntExact(ord)];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getValueCount() {
|
||||||
|
return values.length;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
public void testMissingLongs() throws IOException {
|
public void testMissingLongs() throws IOException {
|
||||||
final int numDocs = TestUtil.nextInt(random(), 1, 100);
|
final int numDocs = TestUtil.nextInt(random(), 1, 100);
|
||||||
final int[][] values = new int[numDocs][];
|
final int[][] values = new int[numDocs][];
|
||||||
|
|
Loading…
Reference in New Issue