Fix illegal cast of the "low cardinality" optimization of the `terms` aggregation. (#27543)

The GlobalOrdinalsStringTermsAggregator.LowCardinality aggregator casts global
values to `GlobalOrdinalMapping`, even though the implementation of global
values is different when a `missing` value is configured.

This commit adds a new API that gives access to the ordinal remapping in order
to fix this problem.
This commit is contained in:
Adrien Grand 2017-11-28 14:55:09 +01:00 committed by GitHub
parent 996990ad1f
commit d01fcee645
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 229 additions and 70 deletions

View File

@ -29,7 +29,7 @@ import java.io.IOException;
/** /**
* A {@link SortedSetDocValues} implementation that returns ordinals that are global. * A {@link SortedSetDocValues} implementation that returns ordinals that are global.
*/ */
public class GlobalOrdinalMapping extends SortedSetDocValues { final class GlobalOrdinalMapping extends SortedSetDocValues {
private final SortedSetDocValues values; private final SortedSetDocValues values;
private final OrdinalMap ordinalMap; private final OrdinalMap ordinalMap;
@ -49,7 +49,7 @@ public class GlobalOrdinalMapping extends SortedSetDocValues {
return ordinalMap.getValueCount(); return ordinalMap.getValueCount();
} }
public final long getGlobalOrd(long segmentOrd) { public long getGlobalOrd(long segmentOrd) {
return mapping.get(segmentOrd); return mapping.get(segmentOrd);
} }

View File

@ -25,7 +25,6 @@ import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.fielddata.SortedBinaryDocValues; import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalMapping;
import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.search.sort.SortOrder;
@ -179,16 +178,10 @@ abstract class CompositeValuesSource<VS extends ValuesSource, T extends Comparab
if (lookup == null) { if (lookup == null) {
lookup = dvs; lookup = dvs;
if (topValue != null && topValueLong == null) { if (topValue != null && topValueLong == null) {
if (lookup instanceof GlobalOrdinalMapping) { topValueLong = lookup.lookupTerm(topValue);
// Find the global ordinal (or the insertion point) for the provided top value. if (topValueLong < 0) {
topValueLong = lookupGlobalOrdinals((GlobalOrdinalMapping) lookup, topValue); // convert negative insert position
} else { topValueLong = -topValueLong - 2;
// Global ordinals are not needed, switch back to ordinals (single segment case).
topValueLong = lookup.lookupTerm(topValue);
if (topValueLong < 0) {
// convert negative insert position
topValueLong = -topValueLong - 2;
}
} }
} }
} }
@ -202,25 +195,6 @@ abstract class CompositeValuesSource<VS extends ValuesSource, T extends Comparab
} }
}; };
} }
private static long lookupGlobalOrdinals(GlobalOrdinalMapping mapping, BytesRef key) throws IOException {
long low = 0;
long high = mapping.getValueCount();
while (low <= high) {
long mid = (low + high) >>> 1;
BytesRef midVal = mapping.lookupOrd(mid);
int cmp = midVal.compareTo(key);
if (cmp < 0) {
low = mid + 1;
} else if (cmp > 0) {
high = mid - 1;
} else {
return mid;
}
}
return low-1;
}
} }
/** /**

View File

@ -33,7 +33,6 @@ import org.elasticsearch.common.util.IntArray;
import org.elasticsearch.common.util.LongHash; import org.elasticsearch.common.util.LongHash;
import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.fielddata.AbstractSortedSetDocValues; import org.elasticsearch.index.fielddata.AbstractSortedSetDocValues;
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalMapping;
import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.search.aggregations.AggregatorFactories; import org.elasticsearch.search.aggregations.AggregatorFactories;
@ -50,6 +49,7 @@ import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.function.LongUnaryOperator;
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS; import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
@ -295,9 +295,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
*/ */
static class LowCardinality extends GlobalOrdinalsStringTermsAggregator { static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
private LongUnaryOperator mapping;
private IntArray segmentDocCounts; private IntArray segmentDocCounts;
private SortedSetDocValues globalOrds;
private SortedSetDocValues segmentOrds;
LowCardinality(String name, LowCardinality(String name,
AggregatorFactories factories, AggregatorFactories factories,
@ -321,14 +320,14 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
@Override @Override
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
final LeafBucketCollector sub) throws IOException { final LeafBucketCollector sub) throws IOException {
if (segmentOrds != null) { if (mapping != null) {
mapSegmentCountsToGlobalCounts(); mapSegmentCountsToGlobalCounts(mapping);
} }
globalOrds = valuesSource.globalOrdinalsValues(ctx); final SortedSetDocValues segmentOrds = valuesSource.ordinalsValues(ctx);
segmentOrds = valuesSource.ordinalsValues(ctx);
segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + segmentOrds.getValueCount()); segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + segmentOrds.getValueCount());
assert sub == LeafBucketCollector.NO_OP_COLLECTOR; assert sub == LeafBucketCollector.NO_OP_COLLECTOR;
final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds); final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds);
mapping = valuesSource.globalOrdinalsMapping(ctx);
if (singleValues != null) { if (singleValues != null) {
return new LeafBucketCollectorBase(sub, segmentOrds) { return new LeafBucketCollectorBase(sub, segmentOrds) {
@Override @Override
@ -356,9 +355,10 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
} }
@Override @Override
protected void doPostCollection() { protected void doPostCollection() throws IOException {
if (segmentOrds != null) { if (mapping != null) {
mapSegmentCountsToGlobalCounts(); mapSegmentCountsToGlobalCounts(mapping);
mapping = null;
} }
} }
@ -367,16 +367,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
Releasables.close(segmentDocCounts); Releasables.close(segmentDocCounts);
} }
private void mapSegmentCountsToGlobalCounts() { private void mapSegmentCountsToGlobalCounts(LongUnaryOperator mapping) throws IOException {
// There is no public method in Ordinals.Docs that allows for this mapping...
// This is the cleanest way I can think of so far
GlobalOrdinalMapping mapping;
if (globalOrds.getValueCount() == segmentOrds.getValueCount()) {
mapping = null;
} else {
mapping = (GlobalOrdinalMapping) globalOrds;
}
for (long i = 1; i < segmentDocCounts.size(); i++) { for (long i = 1; i < segmentDocCounts.size(); i++) {
// We use set(...) here, because we need to reset the slow to 0. // We use set(...) here, because we need to reset the slow to 0.
// segmentDocCounts get reused over the segments and otherwise counts would be too high. // segmentDocCounts get reused over the segments and otherwise counts would be too high.
@ -385,7 +376,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
continue; continue;
} }
final long ord = i - 1; // remember we do +1 when counting final long ord = i - 1; // remember we do +1 when counting
final long globalOrd = mapping == null ? ord : mapping.getGlobalOrd(ord); final long globalOrd = mapping.applyAsLong(ord);
long bucketOrd = getBucketOrd(globalOrd); long bucketOrd = getBucketOrd(globalOrd);
incrementBucketDocCount(bucketOrd, inc); incrementBucketDocCount(bucketOrd, inc);
} }

View File

@ -49,6 +49,8 @@ import java.util.Map;
public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource, TermsAggregatorFactory> { public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource, TermsAggregatorFactory> {
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(TermsAggregatorFactory.class)); private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(TermsAggregatorFactory.class));
static Boolean REMAP_GLOBAL_ORDS, COLLECT_SEGMENT_ORDS;
private final BucketOrder order; private final BucketOrder order;
private final IncludeExclude includeExclude; private final IncludeExclude includeExclude;
private final String executionHint; private final String executionHint;
@ -257,11 +259,13 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
final long maxOrd = getMaxOrd(valuesSource, context.searcher()); final long maxOrd = getMaxOrd(valuesSource, context.searcher());
assert maxOrd != -1; assert maxOrd != -1;
final double ratio = maxOrd / ((double) context.searcher().getIndexReader().numDocs()); final double ratio = maxOrd / ((double) context.searcher().getIndexReader().numDocs());
if (factories == AggregatorFactories.EMPTY && if (factories == AggregatorFactories.EMPTY &&
includeExclude == null && includeExclude == null &&
Aggregator.descendsFromBucketAggregator(parent) == false && Aggregator.descendsFromBucketAggregator(parent) == false &&
ratio <= 0.5 && maxOrd <= 2048) { // we use the static COLLECT_SEGMENT_ORDS to allow tests to force specific optimizations
(COLLECT_SEGMENT_ORDS!= null ? COLLECT_SEGMENT_ORDS.booleanValue() : ratio <= 0.5 && maxOrd <= 2048)) {
/** /**
* We can use the low cardinality execution mode iff this aggregator: * We can use the low cardinality execution mode iff this aggregator:
* - has no sub-aggregator AND * - has no sub-aggregator AND
@ -276,18 +280,24 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
} }
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format); final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
boolean remapGlobalOrds = true; boolean remapGlobalOrds;
if (includeExclude == null && if (REMAP_GLOBAL_ORDS != null) {
Aggregator.descendsFromBucketAggregator(parent) == false && // We use REMAP_GLOBAL_ORDS to allow tests to force specific optimizations
(factories == AggregatorFactories.EMPTY || remapGlobalOrds = REMAP_GLOBAL_ORDS.booleanValue();
(isAggregationSort(order) == false && subAggCollectMode == SubAggCollectionMode.BREADTH_FIRST))) { } else {
/** remapGlobalOrds = true;
* We don't need to remap global ords iff this aggregator: if (includeExclude == null &&
* - has no include/exclude rules AND Aggregator.descendsFromBucketAggregator(parent) == false &&
* - is not a child of a bucket aggregator AND (factories == AggregatorFactories.EMPTY ||
* - has no sub-aggregator or only sub-aggregator that can be deferred ({@link SubAggCollectionMode#BREADTH_FIRST}). (isAggregationSort(order) == false && subAggCollectMode == SubAggCollectionMode.BREADTH_FIRST))) {
**/ /**
remapGlobalOrds = false; * We don't need to remap global ords iff this aggregator:
* - has no include/exclude rules AND
* - is not a child of a bucket aggregator AND
* - has no sub-aggregator or only sub-aggregator that can be deferred ({@link SubAggCollectionMode#BREADTH_FIRST}).
**/
remapGlobalOrds = false;
}
} }
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order, return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order,
format, bucketCountThresholds, filter, context, parent, remapGlobalOrds, subAggCollectMode, showTermDocCountError, format, bucketCountThresholds, filter, context, parent, remapGlobalOrds, subAggCollectMode, showTermDocCountError,

View File

@ -31,6 +31,7 @@ import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
import java.io.IOException; import java.io.IOException;
import java.util.function.LongUnaryOperator;
/** /**
* Utility class that allows to return views of {@link ValuesSource}s that * Utility class that allows to return views of {@link ValuesSource}s that
@ -201,6 +202,13 @@ public enum MissingValues {
SortedSetDocValues values = valuesSource.globalOrdinalsValues(context); SortedSetDocValues values = valuesSource.globalOrdinalsValues(context);
return replaceMissing(values, missing); return replaceMissing(values, missing);
} }
@Override
public LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context) throws IOException {
return getGlobalMapping(valuesSource.ordinalsValues(context),
valuesSource.globalOrdinalsValues(context),
valuesSource.globalOrdinalsMapping(context), missing);
}
}; };
} }
@ -311,6 +319,43 @@ public enum MissingValues {
}; };
} }
static LongUnaryOperator getGlobalMapping(SortedSetDocValues values, SortedSetDocValues globalValues,
LongUnaryOperator segmentToGlobalOrd, BytesRef missing) throws IOException {
final long missingGlobalOrd = globalValues.lookupTerm(missing);
final long missingSegmentOrd = values.lookupTerm(missing);
if (missingSegmentOrd >= 0) {
// the missing value exists in the segment, nothing to do
return segmentToGlobalOrd;
} else if (missingGlobalOrd >= 0) {
// the missing value exists in another segment, but not the current one
final long insertedSegmentOrd = -1L - missingSegmentOrd;
final long insertedGlobalOrd = missingGlobalOrd;
return segmentOrd -> {
if (insertedSegmentOrd == segmentOrd) {
return insertedGlobalOrd;
} else if (insertedSegmentOrd > segmentOrd) {
return segmentToGlobalOrd.applyAsLong(segmentOrd);
} else {
return segmentToGlobalOrd.applyAsLong(segmentOrd - 1);
}
};
} else {
// the missing value exists neither in this segment nor in another segment
final long insertedSegmentOrd = -1L - missingSegmentOrd;
final long insertedGlobalOrd = -1L - missingGlobalOrd;
return segmentOrd -> {
if (insertedSegmentOrd == segmentOrd) {
return insertedGlobalOrd;
} else if (insertedSegmentOrd > segmentOrd) {
return segmentToGlobalOrd.applyAsLong(segmentOrd);
} else {
return 1 + segmentToGlobalOrd.applyAsLong(segmentOrd - 1);
}
};
}
}
public static ValuesSource.GeoPoint replaceMissing(final ValuesSource.GeoPoint valuesSource, final GeoPoint missing) { public static ValuesSource.GeoPoint replaceMissing(final ValuesSource.GeoPoint valuesSource, final GeoPoint missing) {
return new ValuesSource.GeoPoint() { return new ValuesSource.GeoPoint() {

View File

@ -22,6 +22,7 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
@ -47,6 +48,7 @@ import org.elasticsearch.search.aggregations.support.values.ScriptDoubleValues;
import org.elasticsearch.search.aggregations.support.values.ScriptLongValues; import org.elasticsearch.search.aggregations.support.values.ScriptLongValues;
import java.io.IOException; import java.io.IOException;
import java.util.function.LongUnaryOperator;
public abstract class ValuesSource { public abstract class ValuesSource {
@ -90,6 +92,11 @@ public abstract class ValuesSource {
return org.elasticsearch.index.fielddata.FieldData.emptySortedBinary(); return org.elasticsearch.index.fielddata.FieldData.emptySortedBinary();
} }
@Override
public LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context) throws IOException {
return LongUnaryOperator.identity();
}
}; };
@Override @Override
@ -105,6 +112,10 @@ public abstract class ValuesSource {
public abstract SortedSetDocValues globalOrdinalsValues(LeafReaderContext context) public abstract SortedSetDocValues globalOrdinalsValues(LeafReaderContext context)
throws IOException; throws IOException;
/** Returns a mapping from segment ordinals to global ordinals. */
public abstract LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context)
throws IOException;
public long globalMaxOrd(IndexSearcher indexSearcher) throws IOException { public long globalMaxOrd(IndexSearcher indexSearcher) throws IOException {
IndexReader indexReader = indexSearcher.getIndexReader(); IndexReader indexReader = indexSearcher.getIndexReader();
if (indexReader.leaves().isEmpty()) { if (indexReader.leaves().isEmpty()) {
@ -142,6 +153,18 @@ public abstract class ValuesSource {
final AtomicOrdinalsFieldData atomicFieldData = global.load(context); final AtomicOrdinalsFieldData atomicFieldData = global.load(context);
return atomicFieldData.getOrdinalsValues(); return atomicFieldData.getOrdinalsValues();
} }
@Override
public LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context) throws IOException {
final IndexOrdinalsFieldData global = indexFieldData.loadGlobal((DirectoryReader)context.parent.reader());
final OrdinalMap map = global.getOrdinalMap();
if (map == null) {
// segments and global ordinals are the same
return LongUnaryOperator.identity();
}
final org.apache.lucene.util.LongValues segmentToGlobalOrd = map.getGlobalOrds(context.ord);
return segmentToGlobalOrd::get;
}
} }
} }

View File

@ -16,7 +16,7 @@
* specific language governing permissions and limitations * specific language governing permissions and limitations
* under the License. * under the License.
*/ */
package org.elasticsearch.search.aggregations.bucket; package org.elasticsearch.search.aggregations.bucket.terms;
import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.index.IndexRequestBuilder;
@ -34,9 +34,11 @@ import org.elasticsearch.search.aggregations.AggregationExecutionException;
import org.elasticsearch.search.aggregations.AggregationTestScriptsPlugin; import org.elasticsearch.search.aggregations.AggregationTestScriptsPlugin;
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
import org.elasticsearch.search.aggregations.BucketOrder; import org.elasticsearch.search.aggregations.BucketOrder;
import org.elasticsearch.search.aggregations.bucket.AbstractTermsTestCase;
import org.elasticsearch.search.aggregations.bucket.filter.Filter; import org.elasticsearch.search.aggregations.bucket.filter.Filter;
import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude; import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude;
import org.elasticsearch.search.aggregations.bucket.terms.Terms; import org.elasticsearch.search.aggregations.bucket.terms.Terms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory;
import org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket; import org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket;
import org.elasticsearch.search.aggregations.metrics.avg.Avg; import org.elasticsearch.search.aggregations.metrics.avg.Avg;
import org.elasticsearch.search.aggregations.metrics.stats.Stats; import org.elasticsearch.search.aggregations.metrics.stats.Stats;
@ -44,6 +46,8 @@ import org.elasticsearch.search.aggregations.metrics.stats.extended.ExtendedStat
import org.elasticsearch.search.aggregations.metrics.sum.Sum; import org.elasticsearch.search.aggregations.metrics.sum.Sum;
import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.ESIntegTestCase;
import org.hamcrest.Matchers; import org.hamcrest.Matchers;
import org.junit.After;
import org.junit.Before;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
@ -84,6 +88,18 @@ public class StringTermsIT extends AbstractTermsTestCase {
return Collections.singleton(CustomScriptPlugin.class); return Collections.singleton(CustomScriptPlugin.class);
} }
@Before
public void randomizeOptimizations() {
TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = false;randomBoolean();
TermsAggregatorFactory.REMAP_GLOBAL_ORDS = randomBoolean();
}
@After
public void resetOptimizations() {
TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = null;
TermsAggregatorFactory.REMAP_GLOBAL_ORDS = null;
}
public static class CustomScriptPlugin extends AggregationTestScriptsPlugin { public static class CustomScriptPlugin extends AggregationTestScriptsPlugin {
@Override @Override

View File

@ -38,6 +38,7 @@ import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.common.network.InetAddresses; import org.elasticsearch.common.network.InetAddresses;
import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.MockBigArrays; import org.elasticsearch.common.util.MockBigArrays;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.IpFieldMapper; import org.elasticsearch.index.mapper.IpFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MappedFieldType;
@ -68,7 +69,27 @@ import java.util.function.Function;
import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.instanceOf;
public class TermsAggregatorTests extends AggregatorTestCase { public class TermsAggregatorTests extends AggregatorTestCase {
private boolean randomizeAggregatorImpl = true;
@Override
protected <A extends Aggregator> A createAggregator(AggregationBuilder aggregationBuilder,
IndexSearcher indexSearcher, IndexSettings indexSettings, MappedFieldType... fieldTypes) throws IOException {
try {
if (randomizeAggregatorImpl) {
TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = randomBoolean();
TermsAggregatorFactory.REMAP_GLOBAL_ORDS = randomBoolean();
}
return super.createAggregator(aggregationBuilder, indexSearcher, indexSettings, fieldTypes);
} finally {
TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = null;
TermsAggregatorFactory.REMAP_GLOBAL_ORDS = null;
}
}
public void testGlobalOrdinalsExecutionHint() throws Exception { public void testGlobalOrdinalsExecutionHint() throws Exception {
randomizeAggregatorImpl = false;
Directory directory = newDirectory(); Directory directory = newDirectory();
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
indexWriter.close(); indexWriter.close();

View File

@ -38,6 +38,7 @@ import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set; import java.util.Set;
import java.util.function.LongUnaryOperator;
public class MissingValuesTests extends ESTestCase { public class MissingValuesTests extends ESTestCase {
@ -111,7 +112,7 @@ public class MissingValuesTests extends ESTestCase {
ords[i][j] = TestUtil.nextInt(random(), ords[i][j], maxOrd - 1); ords[i][j] = TestUtil.nextInt(random(), ords[i][j], maxOrd - 1);
} }
} }
SortedSetDocValues asRandomAccessOrds = new AbstractSortedSetDocValues() { SortedSetDocValues asSortedSet = new AbstractSortedSetDocValues() {
int doc = -1; int doc = -1;
int i; int i;
@ -147,7 +148,7 @@ public class MissingValuesTests extends ESTestCase {
final BytesRef missingMissing = new BytesRef(RandomStrings.randomAsciiOfLength(random(), 5)); final BytesRef missingMissing = new BytesRef(RandomStrings.randomAsciiOfLength(random(), 5));
for (BytesRef missing : Arrays.asList(existingMissing, missingMissing)) { for (BytesRef missing : Arrays.asList(existingMissing, missingMissing)) {
SortedSetDocValues withMissingReplaced = MissingValues.replaceMissing(asRandomAccessOrds, missing); SortedSetDocValues withMissingReplaced = MissingValues.replaceMissing(asSortedSet, missing);
if (valueSet.contains(missing)) { if (valueSet.contains(missing)) {
assertEquals(values.length, withMissingReplaced.getValueCount()); assertEquals(values.length, withMissingReplaced.getValueCount());
} else { } else {
@ -169,6 +170,84 @@ public class MissingValuesTests extends ESTestCase {
} }
} }
public void testGlobalMapping() throws IOException {
final int numOrds = TestUtil.nextInt(random(), 1, 10);
final int numGlobalOrds = TestUtil.nextInt(random(), numOrds, numOrds + 3);
final Set<BytesRef> valueSet = new HashSet<>();
while (valueSet.size() < numOrds) {
valueSet.add(new BytesRef(RandomStrings.randomAsciiLettersOfLength(random(), 5)));
}
final BytesRef[] values = valueSet.toArray(new BytesRef[0]);
Arrays.sort(values);
final Set<BytesRef> globalValueSet = new HashSet<>(valueSet);
while (globalValueSet.size() < numGlobalOrds) {
globalValueSet.add(new BytesRef(RandomStrings.randomAsciiLettersOfLength(random(), 5)));
}
final BytesRef[] globalValues = globalValueSet.toArray(new BytesRef[0]);
Arrays.sort(globalValues);
// exists in the current segment
BytesRef missing = RandomPicks.randomFrom(random(), values);
doTestGlobalMapping(values, globalValues, missing);
// missing in all segments
do {
missing = new BytesRef(RandomStrings.randomAsciiLettersOfLength(random(), 5));
} while (globalValueSet.contains(missing));
doTestGlobalMapping(values, globalValues, missing);
if (globalValueSet.size() > valueSet.size()) {
// exists in other segments only
Set<BytesRef> other = new HashSet<>(globalValueSet);
other.removeAll(valueSet);
missing = RandomPicks.randomFrom(random(), other.toArray(new BytesRef[0]));
doTestGlobalMapping(values, globalValues, missing);
}
}
private void doTestGlobalMapping(BytesRef[] values, BytesRef[] globalValues, BytesRef missing) throws IOException {
LongUnaryOperator segmentToGlobalOrd = segmentOrd -> Arrays.binarySearch(globalValues, values[Math.toIntExact(segmentOrd)]);
SortedSetDocValues sortedValues = asOrds(values);
SortedSetDocValues sortedGlobalValues = asOrds(globalValues);
LongUnaryOperator withMissingSegmentToGlobalOrd = MissingValues.getGlobalMapping(
sortedValues, sortedGlobalValues, segmentToGlobalOrd, missing);
SortedSetDocValues withMissingValues = MissingValues.replaceMissing(sortedValues, missing);
SortedSetDocValues withMissingGlobalValues = MissingValues.replaceMissing(sortedGlobalValues, missing);
for (long segmentOrd = 0; segmentOrd < withMissingValues.getValueCount(); ++segmentOrd) {
long expectedGlobalOrd = withMissingSegmentToGlobalOrd.applyAsLong(segmentOrd);
assertEquals(withMissingValues.lookupOrd(segmentOrd), withMissingGlobalValues.lookupOrd(expectedGlobalOrd));
}
}
private static SortedSetDocValues asOrds(BytesRef[] values) {
return new AbstractSortedSetDocValues() {
@Override
public boolean advanceExact(int target) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public long nextOrd() throws IOException {
throw new UnsupportedOperationException();
}
@Override
public BytesRef lookupOrd(long ord) throws IOException {
return values[Math.toIntExact(ord)];
}
@Override
public long getValueCount() {
return values.length;
}
};
}
public void testMissingLongs() throws IOException { public void testMissingLongs() throws IOException {
final int numDocs = TestUtil.nextInt(random(), 1, 100); final int numDocs = TestUtil.nextInt(random(), 1, 100);
final int[][] values = new int[numDocs][]; final int[][] values = new int[numDocs][];