mirror of https://github.com/apache/lucene.git
LUCENE-8557: LeafReader.getFieldInfos should always return the same instance
MemoryIndex: compute/cache up-front Solr Collapse/Expand with top_fc: compute/cache up-front Json Facets numerics / hash DV: use the cached fieldInfos on SolrIndexSearcher SolrIndexSearcher: move the cached FieldInfos to SlowCompositeReaderWrapper
This commit is contained in:
parent
b230543b47
commit
d0cd4245bd
|
@ -252,6 +252,10 @@ Improvements:
|
|||
first 4 are index dimensions defining the bounding box of the Triangle and the
|
||||
remaining 3 data dimensions define the vertices of the triangle. (Nick Knize)
|
||||
|
||||
* LUCENE-8557: LeafReader.getFieldInfos is now documented and tested that it ought to return
|
||||
the same cached instance. MemoryIndex's impl now pre-creates the FieldInfos instead of
|
||||
re-calculating a new instance each time. (Tim Underwood, David Smiley)
|
||||
|
||||
Other:
|
||||
|
||||
* LUCENE-8523: Correct typo in JapaneseNumberFilterFactory javadocs (Ankush Jhalani
|
||||
|
|
|
@ -38,6 +38,10 @@ import org.apache.lucene.util.ArrayUtil;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public class FieldInfos implements Iterable<FieldInfo> {
|
||||
|
||||
/** An instance without any fields. */
|
||||
public final static FieldInfos EMPTY = new FieldInfos(new FieldInfo[0]);
|
||||
|
||||
private final boolean hasFreq;
|
||||
private final boolean hasProx;
|
||||
private final boolean hasPayloads;
|
||||
|
|
|
@ -206,6 +206,10 @@ public abstract class LeafReader extends IndexReader {
|
|||
/**
|
||||
* Get the {@link FieldInfos} describing all fields in
|
||||
* this reader.
|
||||
*
|
||||
* Note: Implementations should cache the FieldInfos
|
||||
* instance returned by this method such that subsequent
|
||||
* calls to this method return the same instance.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public abstract FieldInfos getFieldInfos();
|
||||
|
|
|
@ -132,7 +132,7 @@ public class TestPendingDeletes extends LuceneTestCase {
|
|||
SegmentInfo si = new SegmentInfo(dir, Version.LATEST, Version.LATEST, "test", 3, false, Codec.getDefault(),
|
||||
Collections.emptyMap(), StringHelper.randomId(), new HashMap<>(), null);
|
||||
SegmentCommitInfo commitInfo = new SegmentCommitInfo(si, 0, 0, -1, -1, -1);
|
||||
FieldInfos fieldInfos = new FieldInfos(new FieldInfo[0]);
|
||||
FieldInfos fieldInfos = FieldInfos.EMPTY;
|
||||
si.getCodec().fieldInfosFormat().write(dir, si, "", fieldInfos, IOContext.DEFAULT);
|
||||
PendingDeletes deletes = newPendingDeletes(commitInfo);
|
||||
for (int i = 0; i < 3; i++) {
|
||||
|
|
|
@ -47,7 +47,7 @@ public class TestTermVectorsReader extends LuceneTestCase {
|
|||
private int[][] positions = new int[testTerms.length][];
|
||||
private Directory dir;
|
||||
private SegmentCommitInfo seg;
|
||||
private FieldInfos fieldInfos = new FieldInfos(new FieldInfo[0]);
|
||||
private FieldInfos fieldInfos = FieldInfos.EMPTY;
|
||||
private static int TERM_FREQ = 3;
|
||||
|
||||
private static class TestToken implements Comparable<TestToken> {
|
||||
|
|
|
@ -1142,12 +1142,20 @@ public class MemoryIndex {
|
|||
private final class MemoryIndexReader extends LeafReader {
|
||||
|
||||
private final MemoryFields memoryFields = new MemoryFields(fields);
|
||||
private final FieldInfos fieldInfos;
|
||||
|
||||
private MemoryIndexReader() {
|
||||
super(); // avoid as much superclass baggage as possible
|
||||
|
||||
FieldInfo[] fieldInfosArr = new FieldInfo[fields.size()];
|
||||
|
||||
int i = 0;
|
||||
for (Info info : fields.values()) {
|
||||
info.prepareDocValuesAndPointValues();
|
||||
fieldInfosArr[i++] = info.fieldInfo;
|
||||
}
|
||||
|
||||
fieldInfos = new FieldInfos(fieldInfosArr);
|
||||
}
|
||||
|
||||
private Info getInfoForExpectedDocValuesType(String fieldName, DocValuesType expectedType) {
|
||||
|
@ -1171,12 +1179,7 @@ public class MemoryIndex {
|
|||
|
||||
@Override
|
||||
public FieldInfos getFieldInfos() {
|
||||
FieldInfo[] fieldInfos = new FieldInfo[fields.size()];
|
||||
int i = 0;
|
||||
for (Info info : fields.values()) {
|
||||
fieldInfos[i++] = info.fieldInfo;
|
||||
}
|
||||
return new FieldInfos(fieldInfos);
|
||||
return fieldInfos;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.util.Random;
|
|||
|
||||
import junit.framework.Assert;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -207,7 +206,7 @@ public class QueryUtils {
|
|||
|
||||
@Override
|
||||
public FieldInfos getFieldInfos() {
|
||||
return new FieldInfos(new FieldInfo[0]);
|
||||
return FieldInfos.EMPTY;
|
||||
}
|
||||
|
||||
final Bits liveDocs = new Bits.MatchNoBits(maxDoc);
|
||||
|
|
|
@ -335,6 +335,11 @@ public final class TestUtil {
|
|||
}
|
||||
assert Accountables.toString(sr) != null;
|
||||
}
|
||||
|
||||
// FieldInfos should be cached at the reader and always return the same instance
|
||||
if (reader.getFieldInfos() != reader.getFieldInfos()) {
|
||||
throw new RuntimeException("getFieldInfos() returned different instances for class: "+reader.getClass());
|
||||
}
|
||||
}
|
||||
|
||||
// used by TestUtil.checkReader to check some things really unrelated to the index,
|
||||
|
|
|
@ -30,6 +30,7 @@ Jetty 9.4.11.v20180605
|
|||
|
||||
Upgrade Notes
|
||||
----------------------
|
||||
|
||||
* SOLR-12767: The min_rf parameter is no longer needed, Solr will always return the achieved replication factor (rf)
|
||||
in the response header.
|
||||
|
||||
|
@ -187,6 +188,9 @@ Improvements
|
|||
* SOLR-12699: Make contrib/ltr LTRScoringModel immutable and cache its hashCode.
|
||||
(Stanislav Livotov, Edward Ribeiro, Christine Poerschke)
|
||||
|
||||
* LUCENE-8557: Some internal LeafReader.getFieldInfos implementations were being re-computed on-demand instead of
|
||||
once up front leading to some slowdowns in places like JSON Facets and field collapsing. (Tim Underwood, David Smiley)
|
||||
|
||||
================== 7.5.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.solr.handler.component;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -33,10 +32,6 @@ import com.carrotsearch.hppc.cursors.LongCursor;
|
|||
import com.carrotsearch.hppc.cursors.LongObjectCursor;
|
||||
import com.carrotsearch.hppc.cursors.ObjectCursor;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
|
@ -84,7 +79,6 @@ import org.apache.solr.search.DocSlice;
|
|||
import org.apache.solr.search.QParser;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.search.SortSpecParsing;
|
||||
import org.apache.solr.uninverting.UninvertingReader;
|
||||
import org.apache.solr.util.plugin.PluginInfoInitialized;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
|
||||
|
@ -214,9 +208,8 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
|
|||
if(fieldType instanceof StrField) {
|
||||
//Get The Top Level SortedDocValues
|
||||
if(CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
|
||||
@SuppressWarnings("resource") LeafReader uninvertingReader = UninvertingReader.wrap(
|
||||
new ReaderWrapper(searcher.getSlowAtomicReader(), field),
|
||||
Collections.singletonMap(field, UninvertingReader.Type.SORTED)::get);
|
||||
@SuppressWarnings("resource")
|
||||
LeafReader uninvertingReader = CollapsingQParserPlugin.getTopFieldCacheReader(searcher, field);
|
||||
values = uninvertingReader.getSortedDocValues(field);
|
||||
} else {
|
||||
values = DocValues.getSorted(reader, field);
|
||||
|
@ -384,9 +377,8 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
|
|||
if(values != null) {
|
||||
//Get The Top Level SortedDocValues again so we can re-iterate:
|
||||
if(CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
|
||||
@SuppressWarnings("resource") LeafReader uninvertingReader = UninvertingReader.wrap(
|
||||
new ReaderWrapper(searcher.getSlowAtomicReader(), field),
|
||||
Collections.singletonMap(field, UninvertingReader.Type.SORTED)::get);
|
||||
@SuppressWarnings("resource")
|
||||
LeafReader uninvertingReader = CollapsingQParserPlugin.getTopFieldCacheReader(searcher, field);
|
||||
values = uninvertingReader.getSortedDocValues(field);
|
||||
} else {
|
||||
values = DocValues.getSorted(reader, field);
|
||||
|
@ -764,65 +756,4 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
|
|||
return Category.QUERY;
|
||||
}
|
||||
|
||||
// this reader alters the content of the given reader so it should not
|
||||
// delegate the caching stuff
|
||||
private static class ReaderWrapper extends FilterLeafReader {
|
||||
|
||||
private String field;
|
||||
|
||||
public ReaderWrapper(LeafReader leafReader, String field) {
|
||||
super(leafReader);
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
public SortedDocValues getSortedDocValues(String field) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public FieldInfos getFieldInfos() {
|
||||
Iterator<FieldInfo> it = in.getFieldInfos().iterator();
|
||||
List<FieldInfo> newInfos = new ArrayList<>();
|
||||
while(it.hasNext()) {
|
||||
FieldInfo fieldInfo = it.next();
|
||||
|
||||
if(fieldInfo.name.equals(field)) {
|
||||
FieldInfo f = new FieldInfo(fieldInfo.name,
|
||||
fieldInfo.number,
|
||||
fieldInfo.hasVectors(),
|
||||
fieldInfo.hasNorms(),
|
||||
fieldInfo.hasPayloads(),
|
||||
fieldInfo.getIndexOptions(),
|
||||
DocValuesType.NONE,
|
||||
fieldInfo.getDocValuesGen(),
|
||||
fieldInfo.attributes(),
|
||||
fieldInfo.getPointDataDimensionCount(),
|
||||
fieldInfo.getPointIndexDimensionCount(),
|
||||
fieldInfo.getPointNumBytes(),
|
||||
fieldInfo.isSoftDeletesField());
|
||||
newInfos.add(f);
|
||||
|
||||
} else {
|
||||
newInfos.add(fieldInfo);
|
||||
}
|
||||
}
|
||||
FieldInfos infos = new FieldInfos(newInfos.toArray(new FieldInfo[newInfos.size()]));
|
||||
return infos;
|
||||
}
|
||||
|
||||
// NOTE: delegating the caches is wrong here as we are altering the content
|
||||
// of the reader, this should ONLY be used under an uninvertingreader which
|
||||
// will restore doc values back using uninversion, otherwise all sorts of
|
||||
// crazy things could happen.
|
||||
|
||||
@Override
|
||||
public CacheHelper getCoreCacheHelper() {
|
||||
return in.getCoreCacheHelper();
|
||||
}
|
||||
|
||||
@Override
|
||||
public CacheHelper getReaderCacheHelper() {
|
||||
return in.getReaderCacheHelper();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -48,6 +48,11 @@ public final class SlowCompositeReaderWrapper extends LeafReader {
|
|||
private final CompositeReader in;
|
||||
private final LeafMetaData metaData;
|
||||
|
||||
// Cached copy of FieldInfos to prevent it from being re-created on each
|
||||
// getFieldInfos call. Most (if not all) other LeafReader implementations
|
||||
// also have a cached FieldInfos instance so this is consistent. SOLR-12878
|
||||
private final FieldInfos fieldInfos;
|
||||
|
||||
final Map<String,Terms> cachedTerms = new ConcurrentHashMap<>();
|
||||
|
||||
// TODO: consider ConcurrentHashMap ?
|
||||
|
@ -86,6 +91,7 @@ public final class SlowCompositeReaderWrapper extends LeafReader {
|
|||
}
|
||||
metaData = new LeafMetaData(reader.leaves().get(0).reader().getMetaData().getCreatedVersionMajor(), minVersion, null);
|
||||
}
|
||||
fieldInfos = FieldInfos.getMergedFieldInfos(in);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -273,8 +279,7 @@ public final class SlowCompositeReaderWrapper extends LeafReader {
|
|||
|
||||
@Override
|
||||
public FieldInfos getFieldInfos() {
|
||||
ensureOpen();
|
||||
return FieldInfos.getMergedFieldInfos(in); // TODO cache?
|
||||
return fieldInfos;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -387,13 +387,55 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* This forces the use of the top level field cache for String fields.
|
||||
* This is VERY fast at query time but slower to warm and causes insanity.
|
||||
*/
|
||||
public static LeafReader getTopFieldCacheReader(SolrIndexSearcher searcher, String collapseField) {
|
||||
return UninvertingReader.wrap(
|
||||
new ReaderWrapper(searcher.getSlowAtomicReader(), collapseField),
|
||||
Collections.singletonMap(collapseField, UninvertingReader.Type.SORTED)::get);
|
||||
}
|
||||
|
||||
private static class ReaderWrapper extends FilterLeafReader {
|
||||
|
||||
private String field;
|
||||
private final FieldInfos fieldInfos;
|
||||
|
||||
public ReaderWrapper(LeafReader leafReader, String field) {
|
||||
ReaderWrapper(LeafReader leafReader, String field) {
|
||||
super(leafReader);
|
||||
this.field = field;
|
||||
|
||||
// TODO can we just do "field" and not bother with the other fields?
|
||||
List<FieldInfo> newInfos = new ArrayList<>(in.getFieldInfos().size());
|
||||
for (FieldInfo fieldInfo : in.getFieldInfos()) {
|
||||
if (fieldInfo.name.equals(field)) {
|
||||
FieldInfo f = new FieldInfo(fieldInfo.name,
|
||||
fieldInfo.number,
|
||||
fieldInfo.hasVectors(),
|
||||
fieldInfo.hasNorms(),
|
||||
fieldInfo.hasPayloads(),
|
||||
fieldInfo.getIndexOptions(),
|
||||
DocValuesType.NONE,
|
||||
fieldInfo.getDocValuesGen(),
|
||||
fieldInfo.attributes(),
|
||||
fieldInfo.getPointDataDimensionCount(),
|
||||
fieldInfo.getPointIndexDimensionCount(),
|
||||
fieldInfo.getPointNumBytes(),
|
||||
fieldInfo.isSoftDeletesField());
|
||||
newInfos.add(f);
|
||||
} else {
|
||||
newInfos.add(fieldInfo);
|
||||
}
|
||||
}
|
||||
FieldInfos infos = new FieldInfos(newInfos.toArray(new FieldInfo[newInfos.size()]));
|
||||
this.fieldInfos = infos;
|
||||
}
|
||||
|
||||
public FieldInfos getFieldInfos() {
|
||||
return fieldInfos;
|
||||
}
|
||||
|
||||
public SortedDocValues getSortedDocValues(String field) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// NOTE: delegating the caches is wrong here as we are altering the content
|
||||
|
@ -410,37 +452,6 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||
public CacheHelper getReaderCacheHelper() {
|
||||
return in.getReaderCacheHelper();
|
||||
}
|
||||
|
||||
public SortedDocValues getSortedDocValues(String field) {
|
||||
return null;
|
||||
}
|
||||
|
||||
public FieldInfos getFieldInfos() {
|
||||
Iterator<FieldInfo> it = in.getFieldInfos().iterator();
|
||||
List<FieldInfo> newInfos = new ArrayList();
|
||||
while(it.hasNext()) {
|
||||
FieldInfo fieldInfo = it.next();
|
||||
|
||||
if(fieldInfo.name.equals(field)) {
|
||||
FieldInfo f = new FieldInfo(fieldInfo.name,
|
||||
fieldInfo.number,
|
||||
fieldInfo.hasVectors(),
|
||||
fieldInfo.hasNorms(),
|
||||
fieldInfo.hasPayloads(),
|
||||
fieldInfo.getIndexOptions(),
|
||||
DocValuesType.NONE,
|
||||
fieldInfo.getDocValuesGen(),
|
||||
fieldInfo.attributes(),
|
||||
0, 0, 0, fieldInfo.isSoftDeletesField());
|
||||
newInfos.add(f);
|
||||
|
||||
} else {
|
||||
newInfos.add(fieldInfo);
|
||||
}
|
||||
}
|
||||
FieldInfos infos = new FieldInfos(newInfos.toArray(new FieldInfo[newInfos.size()]));
|
||||
return infos;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -1255,14 +1266,8 @@ public class CollapsingQParserPlugin extends QParserPlugin {
|
|||
|
||||
if(collapseFieldType instanceof StrField) {
|
||||
if(HINT_TOP_FC.equals(hint)) {
|
||||
|
||||
/*
|
||||
* This hint forces the use of the top level field cache for String fields.
|
||||
* This is VERY fast at query time but slower to warm and causes insanity.
|
||||
*/
|
||||
@SuppressWarnings("resource") final LeafReader uninvertingReader = UninvertingReader.wrap(
|
||||
new ReaderWrapper(searcher.getSlowAtomicReader(), collapseField),
|
||||
Collections.singletonMap(collapseField, UninvertingReader.Type.SORTED)::get);
|
||||
@SuppressWarnings("resource")
|
||||
final LeafReader uninvertingReader = getTopFieldCacheReader(searcher, collapseField);
|
||||
|
||||
docValuesProducer = new EmptyDocValuesProducer() {
|
||||
@Override
|
||||
|
|
|
@ -126,8 +126,6 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
// list of all caches associated with this searcher.
|
||||
private final SolrCache[] cacheList;
|
||||
|
||||
private final FieldInfos fieldInfos;
|
||||
|
||||
private DirectoryFactory directoryFactory;
|
||||
|
||||
private final LeafReader leafReader;
|
||||
|
@ -263,7 +261,6 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
this.queryResultMaxDocsCached = solrConfig.queryResultMaxDocsCached;
|
||||
this.useFilterForSortedQuery = solrConfig.useFilterForSortedQuery;
|
||||
|
||||
this.fieldInfos = leafReader.getFieldInfos();
|
||||
this.docFetcher = new SolrDocumentFetcher(this, solrConfig, enableCache);
|
||||
|
||||
this.cachingEnabled = enableCache;
|
||||
|
@ -319,7 +316,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
}
|
||||
|
||||
public FieldInfos getFieldInfos() {
|
||||
return fieldInfos;
|
||||
return leafReader.getFieldInfos();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -494,7 +491,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
* Returns a collection of all field names the index reader knows about.
|
||||
*/
|
||||
public Iterable<String> getFieldNames() {
|
||||
return Iterables.transform(fieldInfos, fieldInfo -> fieldInfo.name);
|
||||
return Iterables.transform(getFieldInfos(), fieldInfo -> fieldInfo.name);
|
||||
}
|
||||
|
||||
public SolrCache<Query,DocSet> getFilterCache() {
|
||||
|
|
|
@ -199,7 +199,7 @@ class FacetFieldProcessorByHashDV extends FacetFieldProcessor {
|
|||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
|
||||
getClass()+" doesn't support prefix"); // yet, but it could
|
||||
}
|
||||
FieldInfo fieldInfo = fcontext.searcher.getSlowAtomicReader().getFieldInfos().fieldInfo(sf.getName());
|
||||
FieldInfo fieldInfo = fcontext.searcher.getFieldInfos().fieldInfo(sf.getName());
|
||||
if (fieldInfo != null &&
|
||||
fieldInfo.getDocValuesType() != DocValuesType.NUMERIC &&
|
||||
fieldInfo.getDocValuesType() != DocValuesType.SORTED &&
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.util.List;
|
|||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -391,7 +390,7 @@ public class TestDocSet extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public FieldInfos getFieldInfos() {
|
||||
return new FieldInfos(new FieldInfo[0]);
|
||||
return FieldInfos.EMPTY;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue