Reduce the number of docvalues iterator created in the global ordinals fielddata (#43091)
Today the fielddata for global ordinals re-creates docvalues readers of each segment when building the iterator of a single segment. This is required because the lookup of global ordinals needs to access the docvalues's TermsEnum of each segment to retrieve the original terms. This also means that we need to create NxN (where N is the number of segment in the index) docvalues iterators each time we want to collect global ordinal values. This wasn't an issue in previous versions since docvalues readers are stateless before 6.0 so they are reused on each segment but now that docvalues are iterators we need to create a new instance each time we want to access the values. In order to avoid creating too many iterators this change splits the global ordinals fielddata in two classes, one that is used to cache a single instance per directory reader and one that is created from the cached instance that can be used by a single consumer. The latter creates the TermsEnum of each segment once and reuse them to create the segment's iterator. This prevents the creation of all TermsEnums each time we want to access the value of a single segment, hence reducing the number of docvalues iterator to create to Nx2 (one iterator and one lookup per segment).
This commit is contained in:
parent
1f1a035def
commit
c33d62adbc
|
@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata.ordinals;
|
|||
|
||||
import org.apache.lucene.index.OrdinalMap;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LongValues;
|
||||
|
||||
|
@ -34,12 +35,12 @@ final class GlobalOrdinalMapping extends SortedSetDocValues {
|
|||
private final SortedSetDocValues values;
|
||||
private final OrdinalMap ordinalMap;
|
||||
private final LongValues mapping;
|
||||
private final SortedSetDocValues[] bytesValues;
|
||||
private final TermsEnum[] lookups;
|
||||
|
||||
GlobalOrdinalMapping(OrdinalMap ordinalMap, SortedSetDocValues[] bytesValues, int segmentIndex) {
|
||||
GlobalOrdinalMapping(OrdinalMap ordinalMap, SortedSetDocValues values, TermsEnum[] lookups, int segmentIndex) {
|
||||
super();
|
||||
this.values = bytesValues[segmentIndex];
|
||||
this.bytesValues = bytesValues;
|
||||
this.values = values;
|
||||
this.lookups = lookups;
|
||||
this.ordinalMap = ordinalMap;
|
||||
this.mapping = ordinalMap.getGlobalOrds(segmentIndex);
|
||||
}
|
||||
|
@ -72,7 +73,8 @@ final class GlobalOrdinalMapping extends SortedSetDocValues {
|
|||
public BytesRef lookupOrd(long globalOrd) throws IOException {
|
||||
final long segmentOrd = ordinalMap.getFirstSegmentOrd(globalOrd);
|
||||
int readerIndex = ordinalMap.getFirstSegmentNumber(globalOrd);
|
||||
return bytesValues[readerIndex].lookupOrd(segmentOrd);
|
||||
lookups[readerIndex].seekExact(segmentOrd);
|
||||
return lookups[readerIndex].term();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -22,53 +22,64 @@ import org.apache.lucene.index.DirectoryReader;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.OrdinalMap;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.index.AbstractIndexComponent;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.fielddata.AtomicOrdinalsFieldData;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
|
||||
import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData;
|
||||
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
||||
import org.elasticsearch.index.fielddata.plain.AbstractAtomicOrdinalsFieldData;
|
||||
import org.elasticsearch.search.MultiValueMode;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.UncheckedIOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* {@link IndexFieldData} base class for concrete global ordinals implementations.
|
||||
* Concrete implementation of {@link IndexOrdinalsFieldData} for global ordinals.
|
||||
* A single instance of this class should be used to cache global ordinals per {@link DirectoryReader}.
|
||||
* However {@link #loadGlobal(DirectoryReader)} always creates a new instance of {@link Consumer} from the cached
|
||||
* value in order to reuse the segment's {@link TermsEnum} that are needed to retrieve terms from global ordinals.
|
||||
* Each instance of {@link Consumer} uses a new set of {@link TermsEnum} that can be reused during the collection,
|
||||
* this is done to avoid creating all segment's {@link TermsEnum} each time we want to access the values of a single
|
||||
* segment.
|
||||
*/
|
||||
public class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent implements IndexOrdinalsFieldData, Accountable {
|
||||
public final class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent implements IndexOrdinalsFieldData, Accountable {
|
||||
|
||||
private final String fieldName;
|
||||
private final long memorySizeInBytes;
|
||||
|
||||
private final OrdinalMap ordinalMap;
|
||||
private final Atomic[] atomicReaders;
|
||||
private final AtomicOrdinalsFieldData[] segmentAfd;
|
||||
private final Function<SortedSetDocValues, ScriptDocValues<?>> scriptFunction;
|
||||
|
||||
|
||||
protected GlobalOrdinalsIndexFieldData(IndexSettings indexSettings, String fieldName, AtomicOrdinalsFieldData[] segmentAfd,
|
||||
OrdinalMap ordinalMap, long memorySizeInBytes, Function<SortedSetDocValues,
|
||||
ScriptDocValues<?>> scriptFunction) {
|
||||
protected GlobalOrdinalsIndexFieldData(IndexSettings indexSettings,
|
||||
String fieldName,
|
||||
AtomicOrdinalsFieldData[] segmentAfd,
|
||||
OrdinalMap ordinalMap,
|
||||
long memorySizeInBytes,
|
||||
Function<SortedSetDocValues, ScriptDocValues<?>> scriptFunction) {
|
||||
super(indexSettings);
|
||||
this.fieldName = fieldName;
|
||||
this.memorySizeInBytes = memorySizeInBytes;
|
||||
this.ordinalMap = ordinalMap;
|
||||
this.atomicReaders = new Atomic[segmentAfd.length];
|
||||
for (int i = 0; i < segmentAfd.length; i++) {
|
||||
atomicReaders[i] = new Atomic(segmentAfd[i], ordinalMap, i);
|
||||
}
|
||||
this.segmentAfd = segmentAfd;
|
||||
this.scriptFunction = scriptFunction;
|
||||
}
|
||||
|
||||
public IndexOrdinalsFieldData newConsumer(DirectoryReader source) {
|
||||
return new Consumer(source, indexSettings);
|
||||
}
|
||||
|
||||
@Override
|
||||
public AtomicOrdinalsFieldData loadDirect(LeafReaderContext context) throws Exception {
|
||||
return load(context);
|
||||
throw new IllegalStateException("loadDirect(LeafReaderContext) should not be called in this context");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -92,9 +103,7 @@ public class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent impleme
|
|||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
// no need to clear, because this is cached and cleared in AbstractBytesIndexFieldData
|
||||
}
|
||||
public void clear() {}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
|
@ -109,7 +118,7 @@ public class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent impleme
|
|||
|
||||
@Override
|
||||
public AtomicOrdinalsFieldData load(LeafReaderContext context) {
|
||||
return atomicReaders[context.ord];
|
||||
throw new IllegalStateException("load(LeafReaderContext) should not be called in this context");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -117,46 +126,108 @@ public class GlobalOrdinalsIndexFieldData extends AbstractIndexComponent impleme
|
|||
return ordinalMap;
|
||||
}
|
||||
|
||||
private final class Atomic extends AbstractAtomicOrdinalsFieldData {
|
||||
/**
|
||||
* A non-thread safe {@link IndexOrdinalsFieldData} for global ordinals that creates the {@link TermsEnum} of each
|
||||
* segment once and use them to provide a single lookup per segment.
|
||||
*/
|
||||
public class Consumer extends AbstractIndexComponent implements IndexOrdinalsFieldData, Accountable {
|
||||
private final DirectoryReader source;
|
||||
private TermsEnum[] lookups;
|
||||
|
||||
private final AtomicOrdinalsFieldData afd;
|
||||
private final OrdinalMap ordinalMap;
|
||||
private final int segmentIndex;
|
||||
Consumer(DirectoryReader source, IndexSettings settings) {
|
||||
super(settings);
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
private Atomic(AtomicOrdinalsFieldData afd, OrdinalMap ordinalMap, int segmentIndex) {
|
||||
super(scriptFunction);
|
||||
this.afd = afd;
|
||||
this.ordinalMap = ordinalMap;
|
||||
this.segmentIndex = segmentIndex;
|
||||
/**
|
||||
* Lazy creation of the {@link TermsEnum} for each segment present in this reader
|
||||
*/
|
||||
private TermsEnum[] getOrLoadTermsEnums() {
|
||||
if (lookups == null) {
|
||||
lookups = new TermsEnum[segmentAfd.length];
|
||||
for (int i = 0; i < lookups.length; i++) {
|
||||
try {
|
||||
lookups[i] = segmentAfd[i].getOrdinalsValues().termsEnum();
|
||||
} catch (IOException e) {
|
||||
throw new UncheckedIOException("Failed to load terms enum", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
return lookups;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedSetDocValues getOrdinalsValues() {
|
||||
final SortedSetDocValues values = afd.getOrdinalsValues();
|
||||
if (values.getValueCount() == ordinalMap.getValueCount()) {
|
||||
// segment ordinals match global ordinals
|
||||
return values;
|
||||
}
|
||||
final SortedSetDocValues[] bytesValues = new SortedSetDocValues[atomicReaders.length];
|
||||
for (int i = 0; i < bytesValues.length; i++) {
|
||||
bytesValues[i] = atomicReaders[i].afd.getOrdinalsValues();
|
||||
}
|
||||
return new GlobalOrdinalMapping(ordinalMap, bytesValues, segmentIndex);
|
||||
public AtomicOrdinalsFieldData loadDirect(LeafReaderContext context) throws Exception {
|
||||
return load(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexOrdinalsFieldData localGlobalDirect(DirectoryReader indexReader) throws Exception {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getFieldName() {
|
||||
return fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortField sortField(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested, boolean reverse) {
|
||||
throw new UnsupportedOperationException("no global ordinals sorting yet");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return afd.ramBytesUsed();
|
||||
return memorySizeInBytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<Accountable> getChildResources() {
|
||||
return afd.getChildResources();
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
public AtomicOrdinalsFieldData load(LeafReaderContext context) {
|
||||
assert source.getReaderCacheHelper().getKey() == context.parent.reader().getReaderCacheHelper().getKey();
|
||||
return new AbstractAtomicOrdinalsFieldData(scriptFunction) {
|
||||
@Override
|
||||
public SortedSetDocValues getOrdinalsValues() {
|
||||
final SortedSetDocValues values = segmentAfd[context.ord].getOrdinalsValues();
|
||||
if (values.getValueCount() == ordinalMap.getValueCount()) {
|
||||
// segment ordinals match global ordinals
|
||||
return values;
|
||||
}
|
||||
final TermsEnum[] atomicLookups = getOrLoadTermsEnums();
|
||||
return new GlobalOrdinalMapping(ordinalMap, values, atomicLookups, context.ord);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return segmentAfd[context.ord].ramBytesUsed();
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public Collection<Accountable> getChildResources() {
|
||||
return segmentAfd[context.ord].getChildResources();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public OrdinalMap getOrdinalMap() {
|
||||
return ordinalMap;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.elasticsearch.index.fielddata.AtomicOrdinalsFieldData;
|
|||
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
|
||||
import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData;
|
||||
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder;
|
||||
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsIndexFieldData;
|
||||
import org.elasticsearch.indices.breaker.CircuitBreakerService;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -60,6 +61,17 @@ public abstract class AbstractIndexOrdinalsFieldData extends AbstractIndexFieldD
|
|||
|
||||
@Override
|
||||
public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) {
|
||||
IndexOrdinalsFieldData fieldData = loadGlobalInternal(indexReader);
|
||||
if (fieldData instanceof GlobalOrdinalsIndexFieldData) {
|
||||
// we create a new instance of the cached value for each consumer in order
|
||||
// to avoid creating new TermsEnums for each segment in the cached instance
|
||||
return ((GlobalOrdinalsIndexFieldData) fieldData).newConsumer(indexReader);
|
||||
} else {
|
||||
return fieldData;
|
||||
}
|
||||
}
|
||||
|
||||
private IndexOrdinalsFieldData loadGlobalInternal(DirectoryReader indexReader) {
|
||||
if (indexReader.leaves().size() <= 1) {
|
||||
// ordinals are already global
|
||||
return this;
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.elasticsearch.index.fielddata.IndexFieldDataCache;
|
|||
import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData;
|
||||
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
|
||||
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsIndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.ordinals.GlobalOrdinalsBuilder;
|
||||
import org.elasticsearch.indices.breaker.CircuitBreakerService;
|
||||
import org.elasticsearch.search.MultiValueMode;
|
||||
|
@ -92,6 +93,17 @@ public class SortedSetDVOrdinalsIndexFieldData extends DocValuesIndexFieldData i
|
|||
|
||||
@Override
|
||||
public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) {
|
||||
IndexOrdinalsFieldData fieldData = loadGlobalInternal(indexReader);
|
||||
if (fieldData instanceof GlobalOrdinalsIndexFieldData) {
|
||||
// we create a new instance of the cached value for each consumer in order
|
||||
// to avoid creating new TermsEnums for each segment in the cached instance
|
||||
return ((GlobalOrdinalsIndexFieldData) fieldData).newConsumer(indexReader);
|
||||
} else {
|
||||
return fieldData;
|
||||
}
|
||||
}
|
||||
|
||||
private IndexOrdinalsFieldData loadGlobalInternal(DirectoryReader indexReader) {
|
||||
if (indexReader.leaves().size() <= 1) {
|
||||
// ordinals are already global
|
||||
return this;
|
||||
|
|
|
@ -465,7 +465,7 @@ public abstract class AbstractStringFieldDataTestCase extends AbstractFieldDataI
|
|||
assertThat(topLevelReader.leaves().size(), equalTo(3));
|
||||
|
||||
// First segment
|
||||
assertThat(globalOrdinals, instanceOf(GlobalOrdinalsIndexFieldData.class));
|
||||
assertThat(globalOrdinals, instanceOf(GlobalOrdinalsIndexFieldData.Consumer.class));
|
||||
LeafReaderContext leaf = topLevelReader.leaves().get(0);
|
||||
AtomicOrdinalsFieldData afd = globalOrdinals.load(leaf);
|
||||
SortedSetDocValues values = afd.getOrdinalsValues();
|
||||
|
@ -590,7 +590,7 @@ public abstract class AbstractStringFieldDataTestCase extends AbstractFieldDataI
|
|||
IndexOrdinalsFieldData ifd = getForField("string", "value", hasDocValues());
|
||||
IndexOrdinalsFieldData globalOrdinals = ifd.loadGlobal(topLevelReader);
|
||||
assertNotNull(globalOrdinals.getOrdinalMap());
|
||||
assertThat(ifd.loadGlobal(topLevelReader), sameInstance(globalOrdinals));
|
||||
assertThat(ifd.loadGlobal(topLevelReader).getOrdinalMap(), sameInstance(globalOrdinals.getOrdinalMap()));
|
||||
// 3 b/c 1 segment level caches and 1 top level cache
|
||||
// in case of doc values, we don't cache atomic FD, so only the top-level cache is there
|
||||
assertThat(indicesFieldDataCache.getCache().weight(), equalTo(hasDocValues() ? 1L : 4L));
|
||||
|
@ -602,7 +602,8 @@ public abstract class AbstractStringFieldDataTestCase extends AbstractFieldDataI
|
|||
break;
|
||||
}
|
||||
}
|
||||
assertThat(cachedInstance, sameInstance(globalOrdinals));
|
||||
assertNotSame(cachedInstance, globalOrdinals);
|
||||
assertThat(cachedInstance.getOrdinalMap(), sameInstance(globalOrdinals.getOrdinalMap()));
|
||||
topLevelReader.close();
|
||||
// Now only 3 segment level entries, only the toplevel reader has been closed, but the segment readers are still used by IW
|
||||
assertThat(indicesFieldDataCache.getCache().weight(), equalTo(hasDocValues() ? 0L : 3L));
|
||||
|
|
Loading…
Reference in New Issue