diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/ordinals/GlobalOrdinalMapping.java b/server/src/main/java/org/elasticsearch/index/fielddata/ordinals/GlobalOrdinalMapping.java index 2bf2abac957..33073dff251 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/ordinals/GlobalOrdinalMapping.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/ordinals/GlobalOrdinalMapping.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata.ordinals; import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LongValues; @@ -34,12 +35,12 @@ final class GlobalOrdinalMapping extends SortedSetDocValues { private final SortedSetDocValues values; private final OrdinalMap ordinalMap; private final LongValues mapping; - private final SortedSetDocValues[] bytesValues; + private final TermsEnum[] lookups; - GlobalOrdinalMapping(OrdinalMap ordinalMap, SortedSetDocValues[] bytesValues, int segmentIndex) { + GlobalOrdinalMapping(OrdinalMap ordinalMap, SortedSetDocValues values, TermsEnum[] lookups, int segmentIndex) { super(); - this.values = bytesValues[segmentIndex]; - this.bytesValues = bytesValues; + this.values = values; + this.lookups = lookups; this.ordinalMap = ordinalMap; this.mapping = ordinalMap.getGlobalOrds(segmentIndex); } @@ -72,7 +73,8 @@ final class GlobalOrdinalMapping extends SortedSetDocValues { public BytesRef lookupOrd(long globalOrd) throws IOException { final long segmentOrd = ordinalMap.getFirstSegmentOrd(globalOrd); int readerIndex = ordinalMap.getFirstSegmentNumber(globalOrd); - return bytesValues[readerIndex].lookupOrd(segmentOrd); + lookups[readerIndex].seekExact(segmentOrd); + return lookups[readerIndex].term(); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/ordinals/GlobalOrdinalsIndexFieldData.java b/server/src/main/java/org/elasticsearch/index/fielddata/ordinals/GlobalOrdinalsIndexFieldData.java index a869e5a40d4..b3c2b4c9259 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/ordinals/GlobalOrdinalsIndexFieldData.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/ordinals/GlobalOrdinalsIndexFieldData.java @@ -22,53 +22,64 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.SortField; import org.apache.lucene.util.Accountable; import org.elasticsearch.common.Nullable; import org.elasticsearch.index.AbstractIndexComponent; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.fielddata.AtomicOrdinalsFieldData; -import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested; import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData; import org.elasticsearch.index.fielddata.ScriptDocValues; import org.elasticsearch.index.fielddata.plain.AbstractAtomicOrdinalsFieldData; import org.elasticsearch.search.MultiValueMode; +import java.io.IOException; +import java.io.UncheckedIOException; import java.util.Collection; import java.util.Collections; import java.util.function.Function; /** - * {@link IndexFieldData} base class for concrete global ordinals implementations. + * Concrete implementation of {@link IndexOrdinalsFieldData} for global ordinals. + * A single instance of this class should be used to cache global ordinals per {@link DirectoryReader}. + * However {@link #loadGlobal(DirectoryReader)} always creates a new instance of {@link Consumer} from the cached + * value in order to reuse the segment's {@link TermsEnum} that are needed to retrieve terms from global ordinals. + * Each instance of {@link Consumer} uses a new set of {@link TermsEnum} that can be reused during the collection, + * this is done to avoid creating all segment's {@link TermsEnum} each time we want to access the values of a single + * segment. */ -public class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent implements IndexOrdinalsFieldData, Accountable { +public final class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent implements IndexOrdinalsFieldData, Accountable { private final String fieldName; private final long memorySizeInBytes; private final OrdinalMap ordinalMap; - private final Atomic[] atomicReaders; + private final AtomicOrdinalsFieldData[] segmentAfd; private final Function> scriptFunction; - - protected GlobalOrdinalsIndexFieldData(IndexSettings indexSettings, String fieldName, AtomicOrdinalsFieldData[] segmentAfd, - OrdinalMap ordinalMap, long memorySizeInBytes, Function> scriptFunction) { + protected GlobalOrdinalsIndexFieldData(IndexSettings indexSettings, + String fieldName, + AtomicOrdinalsFieldData[] segmentAfd, + OrdinalMap ordinalMap, + long memorySizeInBytes, + Function> scriptFunction) { super(indexSettings); this.fieldName = fieldName; this.memorySizeInBytes = memorySizeInBytes; this.ordinalMap = ordinalMap; - this.atomicReaders = new Atomic[segmentAfd.length]; - for (int i = 0; i < segmentAfd.length; i++) { - atomicReaders[i] = new Atomic(segmentAfd[i], ordinalMap, i); - } + this.segmentAfd = segmentAfd; this.scriptFunction = scriptFunction; } + public IndexOrdinalsFieldData newConsumer(DirectoryReader source) { + return new Consumer(source, indexSettings); + } + @Override public AtomicOrdinalsFieldData loadDirect(LeafReaderContext context) throws Exception { - return load(context); + throw new IllegalStateException("loadDirect(LeafReaderContext) should not be called in this context"); } @Override @@ -92,9 +103,7 @@ public class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent impleme } @Override - public void clear() { - // no need to clear, because this is cached and cleared in AbstractBytesIndexFieldData - } + public void clear() {} @Override public long ramBytesUsed() { @@ -109,7 +118,7 @@ public class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent impleme @Override public AtomicOrdinalsFieldData load(LeafReaderContext context) { - return atomicReaders[context.ord]; + throw new IllegalStateException("load(LeafReaderContext) should not be called in this context"); } @Override @@ -117,46 +126,108 @@ public class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent impleme return ordinalMap; } - private final class Atomic extends AbstractAtomicOrdinalsFieldData { + /** + * A non-thread safe {@link IndexOrdinalsFieldData} for global ordinals that creates the {@link TermsEnum} of each + * segment once and use them to provide a single lookup per segment. + */ + public class Consumer extends AbstractIndexComponent implements IndexOrdinalsFieldData, Accountable { + private final DirectoryReader source; + private TermsEnum[] lookups; - private final AtomicOrdinalsFieldData afd; - private final OrdinalMap ordinalMap; - private final int segmentIndex; + Consumer(DirectoryReader source, IndexSettings settings) { + super(settings); + this.source = source; + } - private Atomic(AtomicOrdinalsFieldData afd, OrdinalMap ordinalMap, int segmentIndex) { - super(scriptFunction); - this.afd = afd; - this.ordinalMap = ordinalMap; - this.segmentIndex = segmentIndex; + /** + * Lazy creation of the {@link TermsEnum} for each segment present in this reader + */ + private TermsEnum[] getOrLoadTermsEnums() { + if (lookups == null) { + lookups = new TermsEnum[segmentAfd.length]; + for (int i = 0; i < lookups.length; i++) { + try { + lookups[i] = segmentAfd[i].getOrdinalsValues().termsEnum(); + } catch (IOException e) { + throw new UncheckedIOException("Failed to load terms enum", e); + } + } + } + return lookups; } @Override - public SortedSetDocValues getOrdinalsValues() { - final SortedSetDocValues values = afd.getOrdinalsValues(); - if (values.getValueCount() == ordinalMap.getValueCount()) { - // segment ordinals match global ordinals - return values; - } - final SortedSetDocValues[] bytesValues = new SortedSetDocValues[atomicReaders.length]; - for (int i = 0; i < bytesValues.length; i++) { - bytesValues[i] = atomicReaders[i].afd.getOrdinalsValues(); - } - return new GlobalOrdinalMapping(ordinalMap, bytesValues, segmentIndex); + public AtomicOrdinalsFieldData loadDirect(LeafReaderContext context) throws Exception { + return load(context); } + @Override + public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) { + return this; + } + + @Override + public IndexOrdinalsFieldData localGlobalDirect(DirectoryReader indexReader) throws Exception { + return this; + } + + @Override + public String getFieldName() { + return fieldName; + } + + @Override + public SortField sortField(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested, boolean reverse) { + throw new UnsupportedOperationException("no global ordinals sorting yet"); + } + + @Override + public void clear() {} + @Override public long ramBytesUsed() { - return afd.ramBytesUsed(); + return memorySizeInBytes; } @Override public Collection getChildResources() { - return afd.getChildResources(); + return Collections.emptyList(); } @Override - public void close() { + public AtomicOrdinalsFieldData load(LeafReaderContext context) { + assert source.getReaderCacheHelper().getKey() == context.parent.reader().getReaderCacheHelper().getKey(); + return new AbstractAtomicOrdinalsFieldData(scriptFunction) { + @Override + public SortedSetDocValues getOrdinalsValues() { + final SortedSetDocValues values = segmentAfd[context.ord].getOrdinalsValues(); + if (values.getValueCount() == ordinalMap.getValueCount()) { + // segment ordinals match global ordinals + return values; + } + final TermsEnum[] atomicLookups = getOrLoadTermsEnums(); + return new GlobalOrdinalMapping(ordinalMap, values, atomicLookups, context.ord); + } + + @Override + public long ramBytesUsed() { + return segmentAfd[context.ord].ramBytesUsed(); + } + + + @Override + public Collection getChildResources() { + return segmentAfd[context.ord].getChildResources(); + } + + @Override + public void close() {} + }; } + @Override + public OrdinalMap getOrdinalMap() { + return ordinalMap; + } } } diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/plain/AbstractIndexOrdinalsFieldData.java b/server/src/main/java/org/elasticsearch/index/fielddata/plain/AbstractIndexOrdinalsFieldData.java index 0dc0de838a3..a7d63828138 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/plain/AbstractIndexOrdinalsFieldData.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/plain/AbstractIndexOrdinalsFieldData.java @@ -32,6 +32,7 @@ import org.elasticsearch.index.fielddata.AtomicOrdinalsFieldData; import org.elasticsearch.index.fielddata.IndexFieldDataCache; import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData; import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder; +import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsIndexFieldData; import org.elasticsearch.indices.breaker.CircuitBreakerService; import java.io.IOException; @@ -60,6 +61,17 @@ public abstract class AbstractIndexOrdinalsFieldData extends AbstractIndexFieldD @Override public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) { + IndexOrdinalsFieldData fieldData = loadGlobalInternal(indexReader); + if (fieldData instanceof GlobalOrdinalsIndexFieldData) { + // we create a new instance of the cached value for each consumer in order + // to avoid creating new TermsEnums for each segment in the cached instance + return ((GlobalOrdinalsIndexFieldData) fieldData).newConsumer(indexReader); + } else { + return fieldData; + } + } + + private IndexOrdinalsFieldData loadGlobalInternal(DirectoryReader indexReader) { if (indexReader.leaves().size() <= 1) { // ordinals are already global return this; diff --git a/server/src/main/java/org/elasticsearch/index/fielddata/plain/SortedSetDVOrdinalsIndexFieldData.java b/server/src/main/java/org/elasticsearch/index/fielddata/plain/SortedSetDVOrdinalsIndexFieldData.java index b71dcc75934..836c332a340 100644 --- a/server/src/main/java/org/elasticsearch/index/fielddata/plain/SortedSetDVOrdinalsIndexFieldData.java +++ b/server/src/main/java/org/elasticsearch/index/fielddata/plain/SortedSetDVOrdinalsIndexFieldData.java @@ -37,6 +37,7 @@ import org.elasticsearch.index.fielddata.IndexFieldDataCache; import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData; import org.elasticsearch.index.fielddata.ScriptDocValues; import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource; +import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsIndexFieldData; import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder; import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.search.MultiValueMode; @@ -92,6 +93,17 @@ public class SortedSetDVOrdinalsIndexFieldData extends DocValuesIndexFieldData i @Override public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) { + IndexOrdinalsFieldData fieldData = loadGlobalInternal(indexReader); + if (fieldData instanceof GlobalOrdinalsIndexFieldData) { + // we create a new instance of the cached value for each consumer in order + // to avoid creating new TermsEnums for each segment in the cached instance + return ((GlobalOrdinalsIndexFieldData) fieldData).newConsumer(indexReader); + } else { + return fieldData; + } + } + + private IndexOrdinalsFieldData loadGlobalInternal(DirectoryReader indexReader) { if (indexReader.leaves().size() <= 1) { // ordinals are already global return this; diff --git a/server/src/test/java/org/elasticsearch/index/fielddata/AbstractStringFieldDataTestCase.java b/server/src/test/java/org/elasticsearch/index/fielddata/AbstractStringFieldDataTestCase.java index 21d84203f6d..1539a800f2a 100644 --- a/server/src/test/java/org/elasticsearch/index/fielddata/AbstractStringFieldDataTestCase.java +++ b/server/src/test/java/org/elasticsearch/index/fielddata/AbstractStringFieldDataTestCase.java @@ -465,7 +465,7 @@ public abstract class AbstractStringFieldDataTestCase extends AbstractFieldDataI assertThat(topLevelReader.leaves().size(), equalTo(3)); // First segment - assertThat(globalOrdinals, instanceOf(GlobalOrdinalsIndexFieldData.class)); + assertThat(globalOrdinals, instanceOf(GlobalOrdinalsIndexFieldData.Consumer.class)); LeafReaderContext leaf = topLevelReader.leaves().get(0); AtomicOrdinalsFieldData afd = globalOrdinals.load(leaf); SortedSetDocValues values = afd.getOrdinalsValues(); @@ -590,7 +590,7 @@ public abstract class AbstractStringFieldDataTestCase extends AbstractFieldDataI IndexOrdinalsFieldData ifd = getForField("string", "value", hasDocValues()); IndexOrdinalsFieldData globalOrdinals = ifd.loadGlobal(topLevelReader); assertNotNull(globalOrdinals.getOrdinalMap()); - assertThat(ifd.loadGlobal(topLevelReader), sameInstance(globalOrdinals)); + assertThat(ifd.loadGlobal(topLevelReader).getOrdinalMap(), sameInstance(globalOrdinals.getOrdinalMap())); // 3 b/c 1 segment level caches and 1 top level cache // in case of doc values, we don't cache atomic FD, so only the top-level cache is there assertThat(indicesFieldDataCache.getCache().weight(), equalTo(hasDocValues() ? 1L : 4L)); @@ -602,7 +602,8 @@ public abstract class AbstractStringFieldDataTestCase extends AbstractFieldDataI break; } } - assertThat(cachedInstance, sameInstance(globalOrdinals)); + assertNotSame(cachedInstance, globalOrdinals); + assertThat(cachedInstance.getOrdinalMap(), sameInstance(globalOrdinals.getOrdinalMap())); topLevelReader.close(); // Now only 3 segment level entries, only the toplevel reader has been closed, but the segment readers are still used by IW assertThat(indicesFieldDataCache.getCache().weight(), equalTo(hasDocValues() ? 0L : 3L));