Fix illegal cast of the "low cardinality" optimization of the `terms` aggregation. (#27543)
The GlobalOrdinalsStringTermsAggregator.LowCardinality aggregator casts global values to `GlobalOrdinalMapping`, even though the implementation of global values is different when a `missing` value is configured. This commit adds a new API that gives access to the ordinal remapping in order to fix this problem.
This commit is contained in:
parent
996990ad1f
commit
d01fcee645
|
@ -29,7 +29,7 @@ import java.io.IOException;
|
|||
/**
|
||||
* A {@link SortedSetDocValues} implementation that returns ordinals that are global.
|
||||
*/
|
||||
public class GlobalOrdinalMapping extends SortedSetDocValues {
|
||||
final class GlobalOrdinalMapping extends SortedSetDocValues {
|
||||
|
||||
private final SortedSetDocValues values;
|
||||
private final OrdinalMap ordinalMap;
|
||||
|
@ -49,7 +49,7 @@ public class GlobalOrdinalMapping extends SortedSetDocValues {
|
|||
return ordinalMap.getValueCount();
|
||||
}
|
||||
|
||||
public final long getGlobalOrd(long segmentOrd) {
|
||||
public long getGlobalOrd(long segmentOrd) {
|
||||
return mapping.get(segmentOrd);
|
||||
}
|
||||
|
||||
|
|
|
@ -25,7 +25,6 @@ import org.apache.lucene.search.LeafCollector;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
|
||||
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
|
||||
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalMapping;
|
||||
import org.elasticsearch.search.aggregations.support.ValuesSource;
|
||||
import org.elasticsearch.search.sort.SortOrder;
|
||||
|
||||
|
@ -179,16 +178,10 @@ abstract class CompositeValuesSource<VS extends ValuesSource, T extends Comparab
|
|||
if (lookup == null) {
|
||||
lookup = dvs;
|
||||
if (topValue != null && topValueLong == null) {
|
||||
if (lookup instanceof GlobalOrdinalMapping) {
|
||||
// Find the global ordinal (or the insertion point) for the provided top value.
|
||||
topValueLong = lookupGlobalOrdinals((GlobalOrdinalMapping) lookup, topValue);
|
||||
} else {
|
||||
// Global ordinals are not needed, switch back to ordinals (single segment case).
|
||||
topValueLong = lookup.lookupTerm(topValue);
|
||||
if (topValueLong < 0) {
|
||||
// convert negative insert position
|
||||
topValueLong = -topValueLong - 2;
|
||||
}
|
||||
topValueLong = lookup.lookupTerm(topValue);
|
||||
if (topValueLong < 0) {
|
||||
// convert negative insert position
|
||||
topValueLong = -topValueLong - 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -202,25 +195,6 @@ abstract class CompositeValuesSource<VS extends ValuesSource, T extends Comparab
|
|||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static long lookupGlobalOrdinals(GlobalOrdinalMapping mapping, BytesRef key) throws IOException {
|
||||
long low = 0;
|
||||
long high = mapping.getValueCount();
|
||||
|
||||
while (low <= high) {
|
||||
long mid = (low + high) >>> 1;
|
||||
BytesRef midVal = mapping.lookupOrd(mid);
|
||||
int cmp = midVal.compareTo(key);
|
||||
if (cmp < 0) {
|
||||
low = mid + 1;
|
||||
} else if (cmp > 0) {
|
||||
high = mid - 1;
|
||||
} else {
|
||||
return mid;
|
||||
}
|
||||
}
|
||||
return low-1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -33,7 +33,6 @@ import org.elasticsearch.common.util.IntArray;
|
|||
import org.elasticsearch.common.util.LongHash;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.index.fielddata.AbstractSortedSetDocValues;
|
||||
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalMapping;
|
||||
import org.elasticsearch.search.DocValueFormat;
|
||||
import org.elasticsearch.search.aggregations.Aggregator;
|
||||
import org.elasticsearch.search.aggregations.AggregatorFactories;
|
||||
|
@ -50,6 +49,7 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.LongUnaryOperator;
|
||||
|
||||
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
|
||||
|
||||
|
@ -295,9 +295,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
*/
|
||||
static class LowCardinality extends GlobalOrdinalsStringTermsAggregator {
|
||||
|
||||
private LongUnaryOperator mapping;
|
||||
private IntArray segmentDocCounts;
|
||||
private SortedSetDocValues globalOrds;
|
||||
private SortedSetDocValues segmentOrds;
|
||||
|
||||
LowCardinality(String name,
|
||||
AggregatorFactories factories,
|
||||
|
@ -321,14 +320,14 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
@Override
|
||||
public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
|
||||
final LeafBucketCollector sub) throws IOException {
|
||||
if (segmentOrds != null) {
|
||||
mapSegmentCountsToGlobalCounts();
|
||||
if (mapping != null) {
|
||||
mapSegmentCountsToGlobalCounts(mapping);
|
||||
}
|
||||
globalOrds = valuesSource.globalOrdinalsValues(ctx);
|
||||
segmentOrds = valuesSource.ordinalsValues(ctx);
|
||||
final SortedSetDocValues segmentOrds = valuesSource.ordinalsValues(ctx);
|
||||
segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + segmentOrds.getValueCount());
|
||||
assert sub == LeafBucketCollector.NO_OP_COLLECTOR;
|
||||
final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds);
|
||||
mapping = valuesSource.globalOrdinalsMapping(ctx);
|
||||
if (singleValues != null) {
|
||||
return new LeafBucketCollectorBase(sub, segmentOrds) {
|
||||
@Override
|
||||
|
@ -356,9 +355,10 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void doPostCollection() {
|
||||
if (segmentOrds != null) {
|
||||
mapSegmentCountsToGlobalCounts();
|
||||
protected void doPostCollection() throws IOException {
|
||||
if (mapping != null) {
|
||||
mapSegmentCountsToGlobalCounts(mapping);
|
||||
mapping = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -367,16 +367,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
Releasables.close(segmentDocCounts);
|
||||
}
|
||||
|
||||
private void mapSegmentCountsToGlobalCounts() {
|
||||
// There is no public method in Ordinals.Docs that allows for this mapping...
|
||||
// This is the cleanest way I can think of so far
|
||||
|
||||
GlobalOrdinalMapping mapping;
|
||||
if (globalOrds.getValueCount() == segmentOrds.getValueCount()) {
|
||||
mapping = null;
|
||||
} else {
|
||||
mapping = (GlobalOrdinalMapping) globalOrds;
|
||||
}
|
||||
private void mapSegmentCountsToGlobalCounts(LongUnaryOperator mapping) throws IOException {
|
||||
for (long i = 1; i < segmentDocCounts.size(); i++) {
|
||||
// We use set(...) here, because we need to reset the slow to 0.
|
||||
// segmentDocCounts get reused over the segments and otherwise counts would be too high.
|
||||
|
@ -385,7 +376,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr
|
|||
continue;
|
||||
}
|
||||
final long ord = i - 1; // remember we do +1 when counting
|
||||
final long globalOrd = mapping == null ? ord : mapping.getGlobalOrd(ord);
|
||||
final long globalOrd = mapping.applyAsLong(ord);
|
||||
long bucketOrd = getBucketOrd(globalOrd);
|
||||
incrementBucketDocCount(bucketOrd, inc);
|
||||
}
|
||||
|
|
|
@ -49,6 +49,8 @@ import java.util.Map;
|
|||
public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource, TermsAggregatorFactory> {
|
||||
private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(TermsAggregatorFactory.class));
|
||||
|
||||
static Boolean REMAP_GLOBAL_ORDS, COLLECT_SEGMENT_ORDS;
|
||||
|
||||
private final BucketOrder order;
|
||||
private final IncludeExclude includeExclude;
|
||||
private final String executionHint;
|
||||
|
@ -257,11 +259,13 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|||
|
||||
final long maxOrd = getMaxOrd(valuesSource, context.searcher());
|
||||
assert maxOrd != -1;
|
||||
final double ratio = maxOrd / ((double) context.searcher().getIndexReader().numDocs());
|
||||
final double ratio = maxOrd / ((double) context.searcher().getIndexReader().numDocs());
|
||||
|
||||
if (factories == AggregatorFactories.EMPTY &&
|
||||
includeExclude == null &&
|
||||
Aggregator.descendsFromBucketAggregator(parent) == false &&
|
||||
ratio <= 0.5 && maxOrd <= 2048) {
|
||||
// we use the static COLLECT_SEGMENT_ORDS to allow tests to force specific optimizations
|
||||
(COLLECT_SEGMENT_ORDS!= null ? COLLECT_SEGMENT_ORDS.booleanValue() : ratio <= 0.5 && maxOrd <= 2048)) {
|
||||
/**
|
||||
* We can use the low cardinality execution mode iff this aggregator:
|
||||
* - has no sub-aggregator AND
|
||||
|
@ -276,18 +280,24 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory<Values
|
|||
|
||||
}
|
||||
final IncludeExclude.OrdinalsFilter filter = includeExclude == null ? null : includeExclude.convertToOrdinalsFilter(format);
|
||||
boolean remapGlobalOrds = true;
|
||||
if (includeExclude == null &&
|
||||
Aggregator.descendsFromBucketAggregator(parent) == false &&
|
||||
(factories == AggregatorFactories.EMPTY ||
|
||||
(isAggregationSort(order) == false && subAggCollectMode == SubAggCollectionMode.BREADTH_FIRST))) {
|
||||
/**
|
||||
* We don't need to remap global ords iff this aggregator:
|
||||
* - has no include/exclude rules AND
|
||||
* - is not a child of a bucket aggregator AND
|
||||
* - has no sub-aggregator or only sub-aggregator that can be deferred ({@link SubAggCollectionMode#BREADTH_FIRST}).
|
||||
**/
|
||||
remapGlobalOrds = false;
|
||||
boolean remapGlobalOrds;
|
||||
if (REMAP_GLOBAL_ORDS != null) {
|
||||
// We use REMAP_GLOBAL_ORDS to allow tests to force specific optimizations
|
||||
remapGlobalOrds = REMAP_GLOBAL_ORDS.booleanValue();
|
||||
} else {
|
||||
remapGlobalOrds = true;
|
||||
if (includeExclude == null &&
|
||||
Aggregator.descendsFromBucketAggregator(parent) == false &&
|
||||
(factories == AggregatorFactories.EMPTY ||
|
||||
(isAggregationSort(order) == false && subAggCollectMode == SubAggCollectionMode.BREADTH_FIRST))) {
|
||||
/**
|
||||
* We don't need to remap global ords iff this aggregator:
|
||||
* - has no include/exclude rules AND
|
||||
* - is not a child of a bucket aggregator AND
|
||||
* - has no sub-aggregator or only sub-aggregator that can be deferred ({@link SubAggCollectionMode#BREADTH_FIRST}).
|
||||
**/
|
||||
remapGlobalOrds = false;
|
||||
}
|
||||
}
|
||||
return new GlobalOrdinalsStringTermsAggregator(name, factories, (ValuesSource.Bytes.WithOrdinals) valuesSource, order,
|
||||
format, bucketCountThresholds, filter, context, parent, remapGlobalOrds, subAggCollectMode, showTermDocCountError,
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
|
|||
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.function.LongUnaryOperator;
|
||||
|
||||
/**
|
||||
* Utility class that allows to return views of {@link ValuesSource}s that
|
||||
|
@ -201,6 +202,13 @@ public enum MissingValues {
|
|||
SortedSetDocValues values = valuesSource.globalOrdinalsValues(context);
|
||||
return replaceMissing(values, missing);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context) throws IOException {
|
||||
return getGlobalMapping(valuesSource.ordinalsValues(context),
|
||||
valuesSource.globalOrdinalsValues(context),
|
||||
valuesSource.globalOrdinalsMapping(context), missing);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -311,6 +319,43 @@ public enum MissingValues {
|
|||
};
|
||||
}
|
||||
|
||||
static LongUnaryOperator getGlobalMapping(SortedSetDocValues values, SortedSetDocValues globalValues,
|
||||
LongUnaryOperator segmentToGlobalOrd, BytesRef missing) throws IOException {
|
||||
final long missingGlobalOrd = globalValues.lookupTerm(missing);
|
||||
final long missingSegmentOrd = values.lookupTerm(missing);
|
||||
|
||||
if (missingSegmentOrd >= 0) {
|
||||
// the missing value exists in the segment, nothing to do
|
||||
return segmentToGlobalOrd;
|
||||
} else if (missingGlobalOrd >= 0) {
|
||||
// the missing value exists in another segment, but not the current one
|
||||
final long insertedSegmentOrd = -1L - missingSegmentOrd;
|
||||
final long insertedGlobalOrd = missingGlobalOrd;
|
||||
return segmentOrd -> {
|
||||
if (insertedSegmentOrd == segmentOrd) {
|
||||
return insertedGlobalOrd;
|
||||
} else if (insertedSegmentOrd > segmentOrd) {
|
||||
return segmentToGlobalOrd.applyAsLong(segmentOrd);
|
||||
} else {
|
||||
return segmentToGlobalOrd.applyAsLong(segmentOrd - 1);
|
||||
}
|
||||
};
|
||||
} else {
|
||||
// the missing value exists neither in this segment nor in another segment
|
||||
final long insertedSegmentOrd = -1L - missingSegmentOrd;
|
||||
final long insertedGlobalOrd = -1L - missingGlobalOrd;
|
||||
return segmentOrd -> {
|
||||
if (insertedSegmentOrd == segmentOrd) {
|
||||
return insertedGlobalOrd;
|
||||
} else if (insertedSegmentOrd > segmentOrd) {
|
||||
return segmentToGlobalOrd.applyAsLong(segmentOrd);
|
||||
} else {
|
||||
return 1 + segmentToGlobalOrd.applyAsLong(segmentOrd - 1);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
public static ValuesSource.GeoPoint replaceMissing(final ValuesSource.GeoPoint valuesSource, final GeoPoint missing) {
|
||||
return new ValuesSource.GeoPoint() {
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.index.DirectoryReader;
|
|||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.OrdinalMap;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -47,6 +48,7 @@ import org.elasticsearch.search.aggregations.support.values.ScriptDoubleValues;
|
|||
import org.elasticsearch.search.aggregations.support.values.ScriptLongValues;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.function.LongUnaryOperator;
|
||||
|
||||
public abstract class ValuesSource {
|
||||
|
||||
|
@ -90,6 +92,11 @@ public abstract class ValuesSource {
|
|||
return org.elasticsearch.index.fielddata.FieldData.emptySortedBinary();
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context) throws IOException {
|
||||
return LongUnaryOperator.identity();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
@Override
|
||||
|
@ -105,6 +112,10 @@ public abstract class ValuesSource {
|
|||
public abstract SortedSetDocValues globalOrdinalsValues(LeafReaderContext context)
|
||||
throws IOException;
|
||||
|
||||
/** Returns a mapping from segment ordinals to global ordinals. */
|
||||
public abstract LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context)
|
||||
throws IOException;
|
||||
|
||||
public long globalMaxOrd(IndexSearcher indexSearcher) throws IOException {
|
||||
IndexReader indexReader = indexSearcher.getIndexReader();
|
||||
if (indexReader.leaves().isEmpty()) {
|
||||
|
@ -142,6 +153,18 @@ public abstract class ValuesSource {
|
|||
final AtomicOrdinalsFieldData atomicFieldData = global.load(context);
|
||||
return atomicFieldData.getOrdinalsValues();
|
||||
}
|
||||
|
||||
@Override
|
||||
public LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context) throws IOException {
|
||||
final IndexOrdinalsFieldData global = indexFieldData.loadGlobal((DirectoryReader)context.parent.reader());
|
||||
final OrdinalMap map = global.getOrdinalMap();
|
||||
if (map == null) {
|
||||
// segments and global ordinals are the same
|
||||
return LongUnaryOperator.identity();
|
||||
}
|
||||
final org.apache.lucene.util.LongValues segmentToGlobalOrd = map.getGlobalOrds(context.ord);
|
||||
return segmentToGlobalOrd::get;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.elasticsearch.search.aggregations.bucket;
|
||||
package org.elasticsearch.search.aggregations.bucket.terms;
|
||||
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||
|
@ -34,9 +34,11 @@ import org.elasticsearch.search.aggregations.AggregationExecutionException;
|
|||
import org.elasticsearch.search.aggregations.AggregationTestScriptsPlugin;
|
||||
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
|
||||
import org.elasticsearch.search.aggregations.BucketOrder;
|
||||
import org.elasticsearch.search.aggregations.bucket.AbstractTermsTestCase;
|
||||
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory;
|
||||
import org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket;
|
||||
import org.elasticsearch.search.aggregations.metrics.avg.Avg;
|
||||
import org.elasticsearch.search.aggregations.metrics.stats.Stats;
|
||||
|
@ -44,6 +46,8 @@ import org.elasticsearch.search.aggregations.metrics.stats.extended.ExtendedStat
|
|||
import org.elasticsearch.search.aggregations.metrics.sum.Sum;
|
||||
import org.elasticsearch.test.ESIntegTestCase;
|
||||
import org.hamcrest.Matchers;
|
||||
import org.junit.After;
|
||||
import org.junit.Before;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
@ -84,6 +88,18 @@ public class StringTermsIT extends AbstractTermsTestCase {
|
|||
return Collections.singleton(CustomScriptPlugin.class);
|
||||
}
|
||||
|
||||
@Before
|
||||
public void randomizeOptimizations() {
|
||||
TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = false;randomBoolean();
|
||||
TermsAggregatorFactory.REMAP_GLOBAL_ORDS = randomBoolean();
|
||||
}
|
||||
|
||||
@After
|
||||
public void resetOptimizations() {
|
||||
TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = null;
|
||||
TermsAggregatorFactory.REMAP_GLOBAL_ORDS = null;
|
||||
}
|
||||
|
||||
public static class CustomScriptPlugin extends AggregationTestScriptsPlugin {
|
||||
|
||||
@Override
|
|
@ -38,6 +38,7 @@ import org.apache.lucene.util.NumericUtils;
|
|||
import org.elasticsearch.common.network.InetAddresses;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.util.MockBigArrays;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.mapper.IpFieldMapper;
|
||||
import org.elasticsearch.index.mapper.KeywordFieldMapper;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
|
@ -68,7 +69,27 @@ import java.util.function.Function;
|
|||
import static org.hamcrest.Matchers.instanceOf;
|
||||
|
||||
public class TermsAggregatorTests extends AggregatorTestCase {
|
||||
|
||||
private boolean randomizeAggregatorImpl = true;
|
||||
|
||||
@Override
|
||||
protected <A extends Aggregator> A createAggregator(AggregationBuilder aggregationBuilder,
|
||||
IndexSearcher indexSearcher, IndexSettings indexSettings, MappedFieldType... fieldTypes) throws IOException {
|
||||
try {
|
||||
if (randomizeAggregatorImpl) {
|
||||
TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = randomBoolean();
|
||||
TermsAggregatorFactory.REMAP_GLOBAL_ORDS = randomBoolean();
|
||||
}
|
||||
return super.createAggregator(aggregationBuilder, indexSearcher, indexSettings, fieldTypes);
|
||||
} finally {
|
||||
TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = null;
|
||||
TermsAggregatorFactory.REMAP_GLOBAL_ORDS = null;
|
||||
}
|
||||
}
|
||||
|
||||
public void testGlobalOrdinalsExecutionHint() throws Exception {
|
||||
randomizeAggregatorImpl = false;
|
||||
|
||||
Directory directory = newDirectory();
|
||||
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
|
||||
indexWriter.close();
|
||||
|
|
|
@ -38,6 +38,7 @@ import java.io.IOException;
|
|||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import java.util.function.LongUnaryOperator;
|
||||
|
||||
public class MissingValuesTests extends ESTestCase {
|
||||
|
||||
|
@ -111,7 +112,7 @@ public class MissingValuesTests extends ESTestCase {
|
|||
ords[i][j] = TestUtil.nextInt(random(), ords[i][j], maxOrd - 1);
|
||||
}
|
||||
}
|
||||
SortedSetDocValues asRandomAccessOrds = new AbstractSortedSetDocValues() {
|
||||
SortedSetDocValues asSortedSet = new AbstractSortedSetDocValues() {
|
||||
|
||||
int doc = -1;
|
||||
int i;
|
||||
|
@ -147,7 +148,7 @@ public class MissingValuesTests extends ESTestCase {
|
|||
final BytesRef missingMissing = new BytesRef(RandomStrings.randomAsciiOfLength(random(), 5));
|
||||
|
||||
for (BytesRef missing : Arrays.asList(existingMissing, missingMissing)) {
|
||||
SortedSetDocValues withMissingReplaced = MissingValues.replaceMissing(asRandomAccessOrds, missing);
|
||||
SortedSetDocValues withMissingReplaced = MissingValues.replaceMissing(asSortedSet, missing);
|
||||
if (valueSet.contains(missing)) {
|
||||
assertEquals(values.length, withMissingReplaced.getValueCount());
|
||||
} else {
|
||||
|
@ -169,6 +170,84 @@ public class MissingValuesTests extends ESTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testGlobalMapping() throws IOException {
|
||||
final int numOrds = TestUtil.nextInt(random(), 1, 10);
|
||||
final int numGlobalOrds = TestUtil.nextInt(random(), numOrds, numOrds + 3);
|
||||
|
||||
final Set<BytesRef> valueSet = new HashSet<>();
|
||||
while (valueSet.size() < numOrds) {
|
||||
valueSet.add(new BytesRef(RandomStrings.randomAsciiLettersOfLength(random(), 5)));
|
||||
}
|
||||
final BytesRef[] values = valueSet.toArray(new BytesRef[0]);
|
||||
Arrays.sort(values);
|
||||
|
||||
final Set<BytesRef> globalValueSet = new HashSet<>(valueSet);
|
||||
while (globalValueSet.size() < numGlobalOrds) {
|
||||
globalValueSet.add(new BytesRef(RandomStrings.randomAsciiLettersOfLength(random(), 5)));
|
||||
}
|
||||
final BytesRef[] globalValues = globalValueSet.toArray(new BytesRef[0]);
|
||||
Arrays.sort(globalValues);
|
||||
|
||||
// exists in the current segment
|
||||
BytesRef missing = RandomPicks.randomFrom(random(), values);
|
||||
doTestGlobalMapping(values, globalValues, missing);
|
||||
|
||||
// missing in all segments
|
||||
do {
|
||||
missing = new BytesRef(RandomStrings.randomAsciiLettersOfLength(random(), 5));
|
||||
} while (globalValueSet.contains(missing));
|
||||
doTestGlobalMapping(values, globalValues, missing);
|
||||
|
||||
if (globalValueSet.size() > valueSet.size()) {
|
||||
// exists in other segments only
|
||||
Set<BytesRef> other = new HashSet<>(globalValueSet);
|
||||
other.removeAll(valueSet);
|
||||
missing = RandomPicks.randomFrom(random(), other.toArray(new BytesRef[0]));
|
||||
doTestGlobalMapping(values, globalValues, missing);
|
||||
}
|
||||
}
|
||||
|
||||
private void doTestGlobalMapping(BytesRef[] values, BytesRef[] globalValues, BytesRef missing) throws IOException {
|
||||
LongUnaryOperator segmentToGlobalOrd = segmentOrd -> Arrays.binarySearch(globalValues, values[Math.toIntExact(segmentOrd)]);
|
||||
SortedSetDocValues sortedValues = asOrds(values);
|
||||
SortedSetDocValues sortedGlobalValues = asOrds(globalValues);
|
||||
|
||||
LongUnaryOperator withMissingSegmentToGlobalOrd = MissingValues.getGlobalMapping(
|
||||
sortedValues, sortedGlobalValues, segmentToGlobalOrd, missing);
|
||||
SortedSetDocValues withMissingValues = MissingValues.replaceMissing(sortedValues, missing);
|
||||
SortedSetDocValues withMissingGlobalValues = MissingValues.replaceMissing(sortedGlobalValues, missing);
|
||||
|
||||
for (long segmentOrd = 0; segmentOrd < withMissingValues.getValueCount(); ++segmentOrd) {
|
||||
long expectedGlobalOrd = withMissingSegmentToGlobalOrd.applyAsLong(segmentOrd);
|
||||
assertEquals(withMissingValues.lookupOrd(segmentOrd), withMissingGlobalValues.lookupOrd(expectedGlobalOrd));
|
||||
}
|
||||
}
|
||||
|
||||
private static SortedSetDocValues asOrds(BytesRef[] values) {
|
||||
return new AbstractSortedSetDocValues() {
|
||||
|
||||
@Override
|
||||
public boolean advanceExact(int target) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long nextOrd() throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef lookupOrd(long ord) throws IOException {
|
||||
return values[Math.toIntExact(ord)];
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValueCount() {
|
||||
return values.length;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public void testMissingLongs() throws IOException {
|
||||
final int numDocs = TestUtil.nextInt(random(), 1, 100);
|
||||
final int[][] values = new int[numDocs][];
|
||||
|
|
Loading…
Reference in New Issue