diff --git a/core/src/main/java/org/elasticsearch/index/fielddata/ordinals/GlobalOrdinalMapping.java b/core/src/main/java/org/elasticsearch/index/fielddata/ordinals/GlobalOrdinalMapping.java index 3b6b206c212..2bf2abac957 100644 --- a/core/src/main/java/org/elasticsearch/index/fielddata/ordinals/GlobalOrdinalMapping.java +++ b/core/src/main/java/org/elasticsearch/index/fielddata/ordinals/GlobalOrdinalMapping.java @@ -29,7 +29,7 @@ import java.io.IOException; /** * A {@link SortedSetDocValues} implementation that returns ordinals that are global. */ -public class GlobalOrdinalMapping extends SortedSetDocValues { +final class GlobalOrdinalMapping extends SortedSetDocValues { private final SortedSetDocValues values; private final OrdinalMap ordinalMap; @@ -49,7 +49,7 @@ public class GlobalOrdinalMapping extends SortedSetDocValues { return ordinalMap.getValueCount(); } - public final long getGlobalOrd(long segmentOrd) { + public long getGlobalOrd(long segmentOrd) { return mapping.get(segmentOrd); } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesSource.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesSource.java index 045d8fa5ed3..db583d14ffd 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesSource.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesSource.java @@ -25,7 +25,6 @@ import org.apache.lucene.search.LeafCollector; import org.apache.lucene.util.BytesRef; import org.elasticsearch.index.fielddata.SortedBinaryDocValues; import org.elasticsearch.index.fielddata.SortedNumericDoubleValues; -import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalMapping; import org.elasticsearch.search.aggregations.support.ValuesSource; import org.elasticsearch.search.sort.SortOrder; @@ -179,16 +178,10 @@ abstract class CompositeValuesSource>> 1; - BytesRef midVal = mapping.lookupOrd(mid); - int cmp = midVal.compareTo(key); - if (cmp < 0) { - low = mid + 1; - } else if (cmp > 0) { - high = mid - 1; - } else { - return mid; - } - } - return low-1; - } } /** diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java index a9d8841dd4d..55023eb263f 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java @@ -33,7 +33,6 @@ import org.elasticsearch.common.util.IntArray; import org.elasticsearch.common.util.LongHash; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.fielddata.AbstractSortedSetDocValues; -import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalMapping; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.search.aggregations.AggregatorFactories; @@ -50,6 +49,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.function.LongUnaryOperator; import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS; @@ -295,9 +295,8 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr */ static class LowCardinality extends GlobalOrdinalsStringTermsAggregator { + private LongUnaryOperator mapping; private IntArray segmentDocCounts; - private SortedSetDocValues globalOrds; - private SortedSetDocValues segmentOrds; LowCardinality(String name, AggregatorFactories factories, @@ -321,14 +320,14 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr @Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { - if (segmentOrds != null) { - mapSegmentCountsToGlobalCounts(); + if (mapping != null) { + mapSegmentCountsToGlobalCounts(mapping); } - globalOrds = valuesSource.globalOrdinalsValues(ctx); - segmentOrds = valuesSource.ordinalsValues(ctx); + final SortedSetDocValues segmentOrds = valuesSource.ordinalsValues(ctx); segmentDocCounts = context.bigArrays().grow(segmentDocCounts, 1 + segmentOrds.getValueCount()); assert sub == LeafBucketCollector.NO_OP_COLLECTOR; final SortedDocValues singleValues = DocValues.unwrapSingleton(segmentOrds); + mapping = valuesSource.globalOrdinalsMapping(ctx); if (singleValues != null) { return new LeafBucketCollectorBase(sub, segmentOrds) { @Override @@ -356,9 +355,10 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr } @Override - protected void doPostCollection() { - if (segmentOrds != null) { - mapSegmentCountsToGlobalCounts(); + protected void doPostCollection() throws IOException { + if (mapping != null) { + mapSegmentCountsToGlobalCounts(mapping); + mapping = null; } } @@ -367,16 +367,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr Releasables.close(segmentDocCounts); } - private void mapSegmentCountsToGlobalCounts() { - // There is no public method in Ordinals.Docs that allows for this mapping... - // This is the cleanest way I can think of so far - - GlobalOrdinalMapping mapping; - if (globalOrds.getValueCount() == segmentOrds.getValueCount()) { - mapping = null; - } else { - mapping = (GlobalOrdinalMapping) globalOrds; - } + private void mapSegmentCountsToGlobalCounts(LongUnaryOperator mapping) throws IOException { for (long i = 1; i < segmentDocCounts.size(); i++) { // We use set(...) here, because we need to reset the slow to 0. // segmentDocCounts get reused over the segments and otherwise counts would be too high. @@ -385,7 +376,7 @@ public class GlobalOrdinalsStringTermsAggregator extends AbstractStringTermsAggr continue; } final long ord = i - 1; // remember we do +1 when counting - final long globalOrd = mapping == null ? ord : mapping.getGlobalOrd(ord); + final long globalOrd = mapping.applyAsLong(ord); long bucketOrd = getBucketOrd(globalOrd); incrementBucketDocCount(bucketOrd, inc); } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java index 386a7da3e64..1027785c577 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorFactory.java @@ -49,6 +49,8 @@ import java.util.Map; public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory { private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(TermsAggregatorFactory.class)); + static Boolean REMAP_GLOBAL_ORDS, COLLECT_SEGMENT_ORDS; + private final BucketOrder order; private final IncludeExclude includeExclude; private final String executionHint; @@ -257,11 +259,13 @@ public class TermsAggregatorFactory extends ValuesSourceAggregatorFactory= 0) { + // the missing value exists in the segment, nothing to do + return segmentToGlobalOrd; + } else if (missingGlobalOrd >= 0) { + // the missing value exists in another segment, but not the current one + final long insertedSegmentOrd = -1L - missingSegmentOrd; + final long insertedGlobalOrd = missingGlobalOrd; + return segmentOrd -> { + if (insertedSegmentOrd == segmentOrd) { + return insertedGlobalOrd; + } else if (insertedSegmentOrd > segmentOrd) { + return segmentToGlobalOrd.applyAsLong(segmentOrd); + } else { + return segmentToGlobalOrd.applyAsLong(segmentOrd - 1); + } + }; + } else { + // the missing value exists neither in this segment nor in another segment + final long insertedSegmentOrd = -1L - missingSegmentOrd; + final long insertedGlobalOrd = -1L - missingGlobalOrd; + return segmentOrd -> { + if (insertedSegmentOrd == segmentOrd) { + return insertedGlobalOrd; + } else if (insertedSegmentOrd > segmentOrd) { + return segmentToGlobalOrd.applyAsLong(segmentOrd); + } else { + return 1 + segmentToGlobalOrd.applyAsLong(segmentOrd - 1); + } + }; + } + } + public static ValuesSource.GeoPoint replaceMissing(final ValuesSource.GeoPoint valuesSource, final GeoPoint missing) { return new ValuesSource.GeoPoint() { diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java b/core/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java index 5a69be8108a..b5a109e89cb 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSource.java @@ -22,6 +22,7 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.IndexSearcher; @@ -47,6 +48,7 @@ import org.elasticsearch.search.aggregations.support.values.ScriptDoubleValues; import org.elasticsearch.search.aggregations.support.values.ScriptLongValues; import java.io.IOException; +import java.util.function.LongUnaryOperator; public abstract class ValuesSource { @@ -90,6 +92,11 @@ public abstract class ValuesSource { return org.elasticsearch.index.fielddata.FieldData.emptySortedBinary(); } + @Override + public LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context) throws IOException { + return LongUnaryOperator.identity(); + } + }; @Override @@ -105,6 +112,10 @@ public abstract class ValuesSource { public abstract SortedSetDocValues globalOrdinalsValues(LeafReaderContext context) throws IOException; + /** Returns a mapping from segment ordinals to global ordinals. */ + public abstract LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context) + throws IOException; + public long globalMaxOrd(IndexSearcher indexSearcher) throws IOException { IndexReader indexReader = indexSearcher.getIndexReader(); if (indexReader.leaves().isEmpty()) { @@ -142,6 +153,18 @@ public abstract class ValuesSource { final AtomicOrdinalsFieldData atomicFieldData = global.load(context); return atomicFieldData.getOrdinalsValues(); } + + @Override + public LongUnaryOperator globalOrdinalsMapping(LeafReaderContext context) throws IOException { + final IndexOrdinalsFieldData global = indexFieldData.loadGlobal((DirectoryReader)context.parent.reader()); + final OrdinalMap map = global.getOrdinalMap(); + if (map == null) { + // segments and global ordinals are the same + return LongUnaryOperator.identity(); + } + final org.apache.lucene.util.LongValues segmentToGlobalOrd = map.getGlobalOrds(context.ord); + return segmentToGlobalOrd::get; + } } } diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/StringTermsIT.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsIT.java similarity index 98% rename from core/src/test/java/org/elasticsearch/search/aggregations/bucket/StringTermsIT.java rename to core/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsIT.java index b27a509b8ec..3b7e686ef4d 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/StringTermsIT.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsIT.java @@ -16,7 +16,7 @@ * specific language governing permissions and limitations * under the License. */ -package org.elasticsearch.search.aggregations.bucket; +package org.elasticsearch.search.aggregations.bucket.terms; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.index.IndexRequestBuilder; @@ -34,9 +34,11 @@ import org.elasticsearch.search.aggregations.AggregationExecutionException; import org.elasticsearch.search.aggregations.AggregationTestScriptsPlugin; import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode; import org.elasticsearch.search.aggregations.BucketOrder; +import org.elasticsearch.search.aggregations.bucket.AbstractTermsTestCase; import org.elasticsearch.search.aggregations.bucket.filter.Filter; import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude; import org.elasticsearch.search.aggregations.bucket.terms.Terms; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory; import org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket; import org.elasticsearch.search.aggregations.metrics.avg.Avg; import org.elasticsearch.search.aggregations.metrics.stats.Stats; @@ -44,6 +46,8 @@ import org.elasticsearch.search.aggregations.metrics.stats.extended.ExtendedStat import org.elasticsearch.search.aggregations.metrics.sum.Sum; import org.elasticsearch.test.ESIntegTestCase; import org.hamcrest.Matchers; +import org.junit.After; +import org.junit.Before; import java.io.IOException; import java.util.ArrayList; @@ -84,6 +88,18 @@ public class StringTermsIT extends AbstractTermsTestCase { return Collections.singleton(CustomScriptPlugin.class); } + @Before + public void randomizeOptimizations() { + TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = false;randomBoolean(); + TermsAggregatorFactory.REMAP_GLOBAL_ORDS = randomBoolean(); + } + + @After + public void resetOptimizations() { + TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = null; + TermsAggregatorFactory.REMAP_GLOBAL_ORDS = null; + } + public static class CustomScriptPlugin extends AggregationTestScriptsPlugin { @Override diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java index 266b1a6e50f..47fccbc83c4 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/TermsAggregatorTests.java @@ -38,6 +38,7 @@ import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.network.InetAddresses; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.MockBigArrays; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.IpFieldMapper; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; @@ -68,7 +69,27 @@ import java.util.function.Function; import static org.hamcrest.Matchers.instanceOf; public class TermsAggregatorTests extends AggregatorTestCase { + + private boolean randomizeAggregatorImpl = true; + + @Override + protected A createAggregator(AggregationBuilder aggregationBuilder, + IndexSearcher indexSearcher, IndexSettings indexSettings, MappedFieldType... fieldTypes) throws IOException { + try { + if (randomizeAggregatorImpl) { + TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = randomBoolean(); + TermsAggregatorFactory.REMAP_GLOBAL_ORDS = randomBoolean(); + } + return super.createAggregator(aggregationBuilder, indexSearcher, indexSettings, fieldTypes); + } finally { + TermsAggregatorFactory.COLLECT_SEGMENT_ORDS = null; + TermsAggregatorFactory.REMAP_GLOBAL_ORDS = null; + } + } + public void testGlobalOrdinalsExecutionHint() throws Exception { + randomizeAggregatorImpl = false; + Directory directory = newDirectory(); RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); indexWriter.close(); diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/support/MissingValuesTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/support/MissingValuesTests.java index 568b8e7996f..fb18cd99032 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/support/MissingValuesTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/support/MissingValuesTests.java @@ -38,6 +38,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.HashSet; import java.util.Set; +import java.util.function.LongUnaryOperator; public class MissingValuesTests extends ESTestCase { @@ -111,7 +112,7 @@ public class MissingValuesTests extends ESTestCase { ords[i][j] = TestUtil.nextInt(random(), ords[i][j], maxOrd - 1); } } - SortedSetDocValues asRandomAccessOrds = new AbstractSortedSetDocValues() { + SortedSetDocValues asSortedSet = new AbstractSortedSetDocValues() { int doc = -1; int i; @@ -147,7 +148,7 @@ public class MissingValuesTests extends ESTestCase { final BytesRef missingMissing = new BytesRef(RandomStrings.randomAsciiOfLength(random(), 5)); for (BytesRef missing : Arrays.asList(existingMissing, missingMissing)) { - SortedSetDocValues withMissingReplaced = MissingValues.replaceMissing(asRandomAccessOrds, missing); + SortedSetDocValues withMissingReplaced = MissingValues.replaceMissing(asSortedSet, missing); if (valueSet.contains(missing)) { assertEquals(values.length, withMissingReplaced.getValueCount()); } else { @@ -169,6 +170,84 @@ public class MissingValuesTests extends ESTestCase { } } + public void testGlobalMapping() throws IOException { + final int numOrds = TestUtil.nextInt(random(), 1, 10); + final int numGlobalOrds = TestUtil.nextInt(random(), numOrds, numOrds + 3); + + final Set valueSet = new HashSet<>(); + while (valueSet.size() < numOrds) { + valueSet.add(new BytesRef(RandomStrings.randomAsciiLettersOfLength(random(), 5))); + } + final BytesRef[] values = valueSet.toArray(new BytesRef[0]); + Arrays.sort(values); + + final Set globalValueSet = new HashSet<>(valueSet); + while (globalValueSet.size() < numGlobalOrds) { + globalValueSet.add(new BytesRef(RandomStrings.randomAsciiLettersOfLength(random(), 5))); + } + final BytesRef[] globalValues = globalValueSet.toArray(new BytesRef[0]); + Arrays.sort(globalValues); + + // exists in the current segment + BytesRef missing = RandomPicks.randomFrom(random(), values); + doTestGlobalMapping(values, globalValues, missing); + + // missing in all segments + do { + missing = new BytesRef(RandomStrings.randomAsciiLettersOfLength(random(), 5)); + } while (globalValueSet.contains(missing)); + doTestGlobalMapping(values, globalValues, missing); + + if (globalValueSet.size() > valueSet.size()) { + // exists in other segments only + Set other = new HashSet<>(globalValueSet); + other.removeAll(valueSet); + missing = RandomPicks.randomFrom(random(), other.toArray(new BytesRef[0])); + doTestGlobalMapping(values, globalValues, missing); + } + } + + private void doTestGlobalMapping(BytesRef[] values, BytesRef[] globalValues, BytesRef missing) throws IOException { + LongUnaryOperator segmentToGlobalOrd = segmentOrd -> Arrays.binarySearch(globalValues, values[Math.toIntExact(segmentOrd)]); + SortedSetDocValues sortedValues = asOrds(values); + SortedSetDocValues sortedGlobalValues = asOrds(globalValues); + + LongUnaryOperator withMissingSegmentToGlobalOrd = MissingValues.getGlobalMapping( + sortedValues, sortedGlobalValues, segmentToGlobalOrd, missing); + SortedSetDocValues withMissingValues = MissingValues.replaceMissing(sortedValues, missing); + SortedSetDocValues withMissingGlobalValues = MissingValues.replaceMissing(sortedGlobalValues, missing); + + for (long segmentOrd = 0; segmentOrd < withMissingValues.getValueCount(); ++segmentOrd) { + long expectedGlobalOrd = withMissingSegmentToGlobalOrd.applyAsLong(segmentOrd); + assertEquals(withMissingValues.lookupOrd(segmentOrd), withMissingGlobalValues.lookupOrd(expectedGlobalOrd)); + } + } + + private static SortedSetDocValues asOrds(BytesRef[] values) { + return new AbstractSortedSetDocValues() { + + @Override + public boolean advanceExact(int target) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public long nextOrd() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public BytesRef lookupOrd(long ord) throws IOException { + return values[Math.toIntExact(ord)]; + } + + @Override + public long getValueCount() { + return values.length; + } + }; + } + public void testMissingLongs() throws IOException { final int numDocs = TestUtil.nextInt(random(), 1, 100); final int[][] values = new int[numDocs][];