LUCENE-8557: LeafReader.getFieldInfos should always return the same instance

MemoryIndex: compute/cache up-front
Solr Collapse/Expand with top_fc: compute/cache up-front
Json Facets numerics / hash DV: use the cached fieldInfos on SolrIndexSearcher
SolrIndexSearcher: move the cached FieldInfos to SlowCompositeReaderWrapper
This commit is contained in:
David Smiley 2018-11-06 14:45:32 -05:00
parent b230543b47
commit d0cd4245bd
15 changed files with 95 additions and 135 deletions

View File

@ -252,6 +252,10 @@ Improvements:
first 4 are index dimensions defining the bounding box of the Triangle and the
remaining 3 data dimensions define the vertices of the triangle. (Nick Knize)
* LUCENE-8557: LeafReader.getFieldInfos is now documented and tested that it ought to return
the same cached instance. MemoryIndex's impl now pre-creates the FieldInfos instead of
re-calculating a new instance each time. (Tim Underwood, David Smiley)
Other:
* LUCENE-8523: Correct typo in JapaneseNumberFilterFactory javadocs (Ankush Jhalani

View File

@ -38,6 +38,10 @@ import org.apache.lucene.util.ArrayUtil;
* @lucene.experimental
*/
public class FieldInfos implements Iterable<FieldInfo> {
/** An instance without any fields. */
public final static FieldInfos EMPTY = new FieldInfos(new FieldInfo[0]);
private final boolean hasFreq;
private final boolean hasProx;
private final boolean hasPayloads;

View File

@ -206,6 +206,10 @@ public abstract class LeafReader extends IndexReader {
/**
* Get the {@link FieldInfos} describing all fields in
* this reader.
*
* Note: Implementations should cache the FieldInfos
* instance returned by this method such that subsequent
* calls to this method return the same instance.
* @lucene.experimental
*/
public abstract FieldInfos getFieldInfos();

View File

@ -132,7 +132,7 @@ public class TestPendingDeletes extends LuceneTestCase {
SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 3, false, Codec.getDefault(),
Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1);
FieldInfos fieldInfos = new FieldInfos(new FieldInfo[0]);
FieldInfos fieldInfos = FieldInfos.EMPTY;
si.getCodec().fieldInfosFormat().write(dir, si, "", fieldInfos, IOContext.DEFAULT);
PendingDeletes deletes = newPendingDeletes(commitInfo);
for (int i = 0; i < 3; i++) {

View File

@ -47,7 +47,7 @@ public class TestTermVectorsReader extends LuceneTestCase {
private int[][] positions = new int[testTerms.length][];
private Directory dir;
private SegmentCommitInfo seg;
private FieldInfos fieldInfos = new FieldInfos(new FieldInfo[0]);
private FieldInfos fieldInfos = FieldInfos.EMPTY;
private static int TERM_FREQ = 3;
private static class TestToken implements Comparable<TestToken> {

View File

@ -1142,12 +1142,20 @@ public class MemoryIndex {
private final class MemoryIndexReader extends LeafReader {
private final MemoryFields memoryFields = new MemoryFields(fields);
private final FieldInfos fieldInfos;
private MemoryIndexReader() {
super(); // avoid as much superclass baggage as possible
FieldInfo[] fieldInfosArr = new FieldInfo[fields.size()];
int i = 0;
for (Info info : fields.values()) {
info.prepareDocValuesAndPointValues();
fieldInfosArr[i++] = info.fieldInfo;
}
fieldInfos = new FieldInfos(fieldInfosArr);
}
private Info getInfoForExpectedDocValuesType(String fieldName, DocValuesType expectedType) {
@ -1171,12 +1179,7 @@ public class MemoryIndex {
@Override
public FieldInfos getFieldInfos() {
FieldInfo[] fieldInfos = new FieldInfo[fields.size()];
int i = 0;
for (Info info : fields.values()) {
fieldInfos[i++] = info.fieldInfo;
}
return new FieldInfos(fieldInfos);
return fieldInfos;
}
@Override

View File

@ -22,7 +22,6 @@ import java.util.Random;
import junit.framework.Assert;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
@ -207,7 +206,7 @@ public class QueryUtils {
@Override
public FieldInfos getFieldInfos() {
return new FieldInfos(new FieldInfo[0]);
return FieldInfos.EMPTY;
}
final Bits liveDocs = new Bits.MatchNoBits(maxDoc);

View File

@ -335,6 +335,11 @@ public final class TestUtil {
}
assert Accountables.toString(sr) != null;
}
// FieldInfos should be cached at the reader and always return the same instance
if (reader.getFieldInfos() != reader.getFieldInfos()) {
throw new RuntimeException("getFieldInfos() returned different instances for class: "+reader.getClass());
}
}
// used by TestUtil.checkReader to check some things really unrelated to the index,

View File

@ -30,6 +30,7 @@ Jetty 9.4.11.v20180605
Upgrade Notes
----------------------
* SOLR-12767: The min_rf parameter is no longer needed, Solr will always return the achieved replication factor (rf)
in the response header.
@ -187,6 +188,9 @@ Improvements
* SOLR-12699: Make contrib/ltr LTRScoringModel immutable and cache its hashCode.
(Stanislav Livotov, Edward Ribeiro, Christine Poerschke)
* LUCENE-8557: Some internal LeafReader.getFieldInfos implementations were being re-computed on-demand instead of
once up front leading to some slowdowns in places like JSON Facets and field collapsing. (Tim Underwood, David Smiley)
================== 7.5.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -19,7 +19,6 @@ package org.apache.solr.handler.component;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
@ -33,10 +32,6 @@ import com.carrotsearch.hppc.cursors.LongCursor;
import com.carrotsearch.hppc.cursors.LongObjectCursor;
import com.carrotsearch.hppc.cursors.ObjectCursor;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
@ -84,7 +79,6 @@ import org.apache.solr.search.DocSlice;
import org.apache.solr.search.QParser;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortSpecParsing;
import org.apache.solr.uninverting.UninvertingReader;
import org.apache.solr.util.plugin.PluginInfoInitialized;
import org.apache.solr.util.plugin.SolrCoreAware;
@ -214,9 +208,8 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
if(fieldType instanceof StrField) {
//Get The Top Level SortedDocValues
if(CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
@SuppressWarnings("resource") LeafReader uninvertingReader = UninvertingReader.wrap(
new ReaderWrapper(searcher.getSlowAtomicReader(), field),
Collections.singletonMap(field, UninvertingReader.Type.SORTED)::get);
@SuppressWarnings("resource")
LeafReader uninvertingReader = CollapsingQParserPlugin.getTopFieldCacheReader(searcher, field);
values = uninvertingReader.getSortedDocValues(field);
} else {
values = DocValues.getSorted(reader, field);
@ -384,9 +377,8 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
if(values != null) {
//Get The Top Level SortedDocValues again so we can re-iterate:
if(CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
@SuppressWarnings("resource") LeafReader uninvertingReader = UninvertingReader.wrap(
new ReaderWrapper(searcher.getSlowAtomicReader(), field),
Collections.singletonMap(field, UninvertingReader.Type.SORTED)::get);
@SuppressWarnings("resource")
LeafReader uninvertingReader = CollapsingQParserPlugin.getTopFieldCacheReader(searcher, field);
values = uninvertingReader.getSortedDocValues(field);
} else {
values = DocValues.getSorted(reader, field);
@ -764,65 +756,4 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
return Category.QUERY;
}
// this reader alters the content of the given reader so it should not
// delegate the caching stuff
private static class ReaderWrapper extends FilterLeafReader {
private String field;
public ReaderWrapper(LeafReader leafReader, String field) {
super(leafReader);
this.field = field;
}
public SortedDocValues getSortedDocValues(String field) {
return null;
}
public FieldInfos getFieldInfos() {
Iterator<FieldInfo> it = in.getFieldInfos().iterator();
List<FieldInfo> newInfos = new ArrayList<>();
while(it.hasNext()) {
FieldInfo fieldInfo = it.next();
if(fieldInfo.name.equals(field)) {
FieldInfo f = new FieldInfo(fieldInfo.name,
fieldInfo.number,
fieldInfo.hasVectors(),
fieldInfo.hasNorms(),
fieldInfo.hasPayloads(),
fieldInfo.getIndexOptions(),
DocValuesType.NONE,
fieldInfo.getDocValuesGen(),
fieldInfo.attributes(),
fieldInfo.getPointDataDimensionCount(),
fieldInfo.getPointIndexDimensionCount(),
fieldInfo.getPointNumBytes(),
fieldInfo.isSoftDeletesField());
newInfos.add(f);
} else {
newInfos.add(fieldInfo);
}
}
FieldInfos infos = new FieldInfos(newInfos.toArray(new FieldInfo[newInfos.size()]));
return infos;
}
// NOTE: delegating the caches is wrong here as we are altering the content
// of the reader, this should ONLY be used under an uninvertingreader which
// will restore doc values back using uninversion, otherwise all sorts of
// crazy things could happen.
@Override
public CacheHelper getCoreCacheHelper() {
return in.getCoreCacheHelper();
}
@Override
public CacheHelper getReaderCacheHelper() {
return in.getReaderCacheHelper();
}
}
}

View File

@ -48,6 +48,11 @@ public final class SlowCompositeReaderWrapper extends LeafReader {
private final CompositeReader in;
private final LeafMetaData metaData;
// Cached copy of FieldInfos to prevent it from being re-created on each
// getFieldInfos call. Most (if not all) other LeafReader implementations
// also have a cached FieldInfos instance so this is consistent. SOLR-12878
private final FieldInfos fieldInfos;
final Map<String,Terms> cachedTerms = new ConcurrentHashMap<>();
// TODO: consider ConcurrentHashMap ?
@ -86,6 +91,7 @@ public final class SlowCompositeReaderWrapper extends LeafReader {
}
metaData = new LeafMetaData(reader.leaves().get(0).reader().getMetaData().getCreatedVersionMajor(), minVersion, null);
}
fieldInfos = FieldInfos.getMergedFieldInfos(in);
}
@Override
@ -273,8 +279,7 @@ public final class SlowCompositeReaderWrapper extends LeafReader {
@Override
public FieldInfos getFieldInfos() {
ensureOpen();
return FieldInfos.getMergedFieldInfos(in); // TODO cache?
return fieldInfos;
}
@Override

View File

@ -387,13 +387,55 @@ public class CollapsingQParserPlugin extends QParserPlugin {
}
/**
* This forces the use of the top level field cache for String fields.
* This is VERY fast at query time but slower to warm and causes insanity.
*/
public static LeafReader getTopFieldCacheReader(SolrIndexSearcher searcher, String collapseField) {
return UninvertingReader.wrap(
new ReaderWrapper(searcher.getSlowAtomicReader(), collapseField),
Collections.singletonMap(collapseField, UninvertingReader.Type.SORTED)::get);
}
private static class ReaderWrapper extends FilterLeafReader {
private String field;
private final FieldInfos fieldInfos;
public ReaderWrapper(LeafReader leafReader, String field) {
ReaderWrapper(LeafReader leafReader, String field) {
super(leafReader);
this.field = field;
// TODO can we just do "field" and not bother with the other fields?
List<FieldInfo> newInfos = new ArrayList<>(in.getFieldInfos().size());
for (FieldInfo fieldInfo : in.getFieldInfos()) {
if (fieldInfo.name.equals(field)) {
FieldInfo f = new FieldInfo(fieldInfo.name,
fieldInfo.number,
fieldInfo.hasVectors(),
fieldInfo.hasNorms(),
fieldInfo.hasPayloads(),
fieldInfo.getIndexOptions(),
DocValuesType.NONE,
fieldInfo.getDocValuesGen(),
fieldInfo.attributes(),
fieldInfo.getPointDataDimensionCount(),
fieldInfo.getPointIndexDimensionCount(),
fieldInfo.getPointNumBytes(),
fieldInfo.isSoftDeletesField());
newInfos.add(f);
} else {
newInfos.add(fieldInfo);
}
}
FieldInfos infos = new FieldInfos(newInfos.toArray(new FieldInfo[newInfos.size()]));
this.fieldInfos = infos;
}
public FieldInfos getFieldInfos() {
return fieldInfos;
}
public SortedDocValues getSortedDocValues(String field) {
return null;
}
// NOTE: delegating the caches is wrong here as we are altering the content
@ -410,37 +452,6 @@ public class CollapsingQParserPlugin extends QParserPlugin {
public CacheHelper getReaderCacheHelper() {
return in.getReaderCacheHelper();
}
public SortedDocValues getSortedDocValues(String field) {
return null;
}
public FieldInfos getFieldInfos() {
Iterator<FieldInfo> it = in.getFieldInfos().iterator();
List<FieldInfo> newInfos = new ArrayList();
while(it.hasNext()) {
FieldInfo fieldInfo = it.next();
if(fieldInfo.name.equals(field)) {
FieldInfo f = new FieldInfo(fieldInfo.name,
fieldInfo.number,
fieldInfo.hasVectors(),
fieldInfo.hasNorms(),
fieldInfo.hasPayloads(),
fieldInfo.getIndexOptions(),
DocValuesType.NONE,
fieldInfo.getDocValuesGen(),
fieldInfo.attributes(),
0, 0, 0, fieldInfo.isSoftDeletesField());
newInfos.add(f);
} else {
newInfos.add(fieldInfo);
}
}
FieldInfos infos = new FieldInfos(newInfos.toArray(new FieldInfo[newInfos.size()]));
return infos;
}
}
@ -1255,14 +1266,8 @@ public class CollapsingQParserPlugin extends QParserPlugin {
if(collapseFieldType instanceof StrField) {
if(HINT_TOP_FC.equals(hint)) {
/*
* This hint forces the use of the top level field cache for String fields.
* This is VERY fast at query time but slower to warm and causes insanity.
*/
@SuppressWarnings("resource") final LeafReader uninvertingReader = UninvertingReader.wrap(
new ReaderWrapper(searcher.getSlowAtomicReader(), collapseField),
Collections.singletonMap(collapseField, UninvertingReader.Type.SORTED)::get);
@SuppressWarnings("resource")
final LeafReader uninvertingReader = getTopFieldCacheReader(searcher, collapseField);
docValuesProducer = new EmptyDocValuesProducer() {
@Override

View File

@ -126,8 +126,6 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
// list of all caches associated with this searcher.
private final SolrCache[] cacheList;
private final FieldInfos fieldInfos;
private DirectoryFactory directoryFactory;
private final LeafReader leafReader;
@ -263,7 +261,6 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
this.queryResultMaxDocsCached = solrConfig.queryResultMaxDocsCached;
this.useFilterForSortedQuery = solrConfig.useFilterForSortedQuery;
this.fieldInfos = leafReader.getFieldInfos();
this.docFetcher = new SolrDocumentFetcher(this, solrConfig, enableCache);
this.cachingEnabled = enableCache;
@ -319,7 +316,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
}
public FieldInfos getFieldInfos() {
return fieldInfos;
return leafReader.getFieldInfos();
}
/*
@ -494,7 +491,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
* Returns a collection of all field names the index reader knows about.
*/
public Iterable<String> getFieldNames() {
return Iterables.transform(fieldInfos, fieldInfo -> fieldInfo.name);
return Iterables.transform(getFieldInfos(), fieldInfo -> fieldInfo.name);
}
public SolrCache<Query,DocSet> getFilterCache() {

View File

@ -199,7 +199,7 @@ class FacetFieldProcessorByHashDV extends FacetFieldProcessor {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
getClass()+" doesn't support prefix"); // yet, but it could
}
FieldInfo fieldInfo = fcontext.searcher.getSlowAtomicReader().getFieldInfos().fieldInfo(sf.getName());
FieldInfo fieldInfo = fcontext.searcher.getFieldInfos().fieldInfo(sf.getName());
if (fieldInfo != null &&
fieldInfo.getDocValuesType() != DocValuesType.NUMERIC &&
fieldInfo.getDocValuesType() != DocValuesType.SORTED &&

View File

@ -22,7 +22,6 @@ import java.util.List;
import java.util.Random;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
@ -391,7 +390,7 @@ public class TestDocSet extends LuceneTestCase {
@Override
public FieldInfos getFieldInfos() {
return new FieldInfos(new FieldInfo[0]);
return FieldInfos.EMPTY;
}
@Override