SOLR-12782: UninvertingReader avoids FieldInfo creation.

Wrapping is now a bit more lightweight: Does not create FieldInfo for fields that
can't be uninverted (saves mem) and can avoid wrapping the reader altogether if there's nothing to uninvert.
IndexSchema.getUninversionMap refactored to getUninversionMapper and no longer merges FieldInfos.
This commit is contained in:
David Smiley 2018-10-05 20:40:39 -07:00
parent eb47099ee2
commit e2b8beccb0
10 changed files with 120 additions and 123 deletions

View File

@ -169,9 +169,14 @@ Bug Fixes
Improvements Improvements
---------------------- ----------------------
* SOLR-12767: Solr now always includes in the response of update requests the achieved replication factor * SOLR-12767: Solr now always includes in the response of update requests the achieved replication factor
(Tomás Fernández Löbbe) (Tomás Fernández Löbbe)
* SOLR-12782: UninvertingReader wrapping is now a bit more lightweight: Does not create FieldInfo for fields that
can't be uninverted (saves mem) and can avoid wrapping the reader altogether if there's nothing to uninvert.
IndexSchema.getUninversionMap refactored to getUninversionMapper and no longer merges FieldInfos. (David Smiley)
================== 7.5.0 ================== ================== 7.5.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release. Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -19,10 +19,9 @@ package org.apache.solr.handler.component;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.Collections;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map;
import com.carrotsearch.hppc.IntHashSet; import com.carrotsearch.hppc.IntHashSet;
import com.carrotsearch.hppc.IntObjectHashMap; import com.carrotsearch.hppc.IntObjectHashMap;
@ -215,10 +214,9 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
if(fieldType instanceof StrField) { if(fieldType instanceof StrField) {
//Get The Top Level SortedDocValues //Get The Top Level SortedDocValues
if(CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) { if(CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
Map<String, UninvertingReader.Type> mapping = new HashMap(); @SuppressWarnings("resource") LeafReader uninvertingReader = UninvertingReader.wrap(
mapping.put(field, UninvertingReader.Type.SORTED); new ReaderWrapper(searcher.getSlowAtomicReader(), field),
@SuppressWarnings("resource") Collections.singletonMap(field, UninvertingReader.Type.SORTED)::get);
UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
values = uninvertingReader.getSortedDocValues(field); values = uninvertingReader.getSortedDocValues(field);
} else { } else {
values = DocValues.getSorted(reader, field); values = DocValues.getSorted(reader, field);
@ -386,10 +384,9 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
if(values != null) { if(values != null) {
//Get The Top Level SortedDocValues again so we can re-iterate: //Get The Top Level SortedDocValues again so we can re-iterate:
if(CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) { if(CollapsingQParserPlugin.HINT_TOP_FC.equals(hint)) {
Map<String, UninvertingReader.Type> mapping = new HashMap(); @SuppressWarnings("resource") LeafReader uninvertingReader = UninvertingReader.wrap(
mapping.put(field, UninvertingReader.Type.SORTED); new ReaderWrapper(searcher.getSlowAtomicReader(), field),
@SuppressWarnings("resource") Collections.singletonMap(field, UninvertingReader.Type.SORTED)::get);
UninvertingReader uninvertingReader = new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), field), mapping);
values = uninvertingReader.getSortedDocValues(field); values = uninvertingReader.getSortedDocValues(field);
} else { } else {
values = DocValues.getSorted(reader, field); values = DocValues.getSorted(reader, field);

View File

@ -29,6 +29,7 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FilterCodecReader; import org.apache.lucene.index.FilterCodecReader;
import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OneMergeWrappingMergePolicy; import org.apache.lucene.index.OneMergeWrappingMergePolicy;
@ -134,13 +135,13 @@ public class UninvertDocValuesMergePolicyFactory extends WrapperMergePolicyFacto
*/ */
private class UninvertingFilterCodecReader extends FilterCodecReader { private class UninvertingFilterCodecReader extends FilterCodecReader {
private final UninvertingReader uninvertingReader; private final LeafReader uninvertingReader;
private final DocValuesProducer docValuesProducer; private final DocValuesProducer docValuesProducer;
public UninvertingFilterCodecReader(CodecReader in, Map<String,UninvertingReader.Type> uninversionMap) { public UninvertingFilterCodecReader(CodecReader in, Map<String,UninvertingReader.Type> uninversionMap) {
super(in); super(in);
this.uninvertingReader = new UninvertingReader(in, uninversionMap); this.uninvertingReader = UninvertingReader.wrap(in, uninversionMap::get);
this.docValuesProducer = new DocValuesProducer() { this.docValuesProducer = new DocValuesProducer() {
@Override @Override

View File

@ -44,11 +44,7 @@ import java.util.stream.Stream;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.DelegatingAnalyzerWrapper; import org.apache.lucene.analysis.DelegatingAnalyzerWrapper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.queries.payloads.PayloadDecoder; import org.apache.lucene.queries.payloads.PayloadDecoder;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
@ -355,22 +351,15 @@ public class IndexSchema {
queryAnalyzer = new SolrQueryAnalyzer(); queryAnalyzer = new SolrQueryAnalyzer();
} }
public Map<String,UninvertingReader.Type> getUninversionMap(IndexReader reader) { /** @see UninvertingReader */
final Map<String,UninvertingReader.Type> map = new HashMap<>(); public Function<String, UninvertingReader.Type> getUninversionMapper() {
for (FieldInfo f : MultiFields.getMergedFieldInfos(reader)) { return name -> {
if (f.getDocValuesType() == DocValuesType.NONE) { SchemaField sf = getFieldOrNull(name);
// we have a field (of some kind) in the reader w/o DocValues if (sf == null) {
// if we have an equivalent indexed=true field in the schema, trust it's uninversion type (if any) return null;
final SchemaField sf = getFieldOrNull(f.name);
if (sf != null && sf.indexed()) {
final UninvertingReader.Type type = sf.getType().getUninversionType(sf);
if (type != null) {
map.put(f.name, type);
} }
} return sf.getType().getUninversionType(sf);
} };
}
return map;
} }
/** /**

View File

@ -19,8 +19,8 @@ package org.apache.solr.search;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.HashMap;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -1260,11 +1260,9 @@ public class CollapsingQParserPlugin extends QParserPlugin {
* This hint forces the use of the top level field cache for String fields. * This hint forces the use of the top level field cache for String fields.
* This is VERY fast at query time but slower to warm and causes insanity. * This is VERY fast at query time but slower to warm and causes insanity.
*/ */
@SuppressWarnings("resource") final LeafReader uninvertingReader = UninvertingReader.wrap(
Map<String, UninvertingReader.Type> mapping = new HashMap(); new ReaderWrapper(searcher.getSlowAtomicReader(), collapseField),
mapping.put(collapseField, UninvertingReader.Type.SORTED); Collections.singletonMap(collapseField, UninvertingReader.Type.SORTED)::get);
@SuppressWarnings("resource") final UninvertingReader uninvertingReader =
new UninvertingReader(new ReaderWrapper(searcher.getSlowAtomicReader(), collapseField), mapping);
docValuesProducer = new EmptyDocValuesProducer() { docValuesProducer = new EmptyDocValuesProducer() {
@Override @Override

View File

@ -49,8 +49,9 @@ public class Insanity {
* instead of a numeric. * instead of a numeric.
*/ */
public static LeafReader wrapInsanity(LeafReader sane, String insaneField) { public static LeafReader wrapInsanity(LeafReader sane, String insaneField) {
return new UninvertingReader(new InsaneReader(sane, insaneField), return UninvertingReader.wrap(
Collections.singletonMap(insaneField, UninvertingReader.Type.SORTED)); new InsaneReader(sane, insaneField),
Collections.singletonMap(insaneField, UninvertingReader.Type.SORTED)::get);
} }
/** Hides the proper numeric dv type for the field */ /** Hides the proper numeric dv type for the field */

View File

@ -157,25 +157,10 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
private static DirectoryReader wrapReader(SolrCore core, DirectoryReader reader) throws IOException { private static DirectoryReader wrapReader(SolrCore core, DirectoryReader reader) throws IOException {
assert reader != null; assert reader != null;
return ExitableDirectoryReader.wrap( return ExitableDirectoryReader.wrap(
wrapUninvertingReaderPerSegment(core, reader), UninvertingReader.wrap(reader, core.getLatestSchema().getUninversionMapper()),
SolrQueryTimeoutImpl.getInstance()); SolrQueryTimeoutImpl.getInstance());
} }
/**
* If docvalues are enabled or disabled after data has already been indexed for a field, such that
* only some segments have docvalues, uninverting on the top level reader will cause
* IllegalStateException to be thrown when trying to use a field with such mixed data. This is because
* the {@link IndexSchema#getUninversionMap(IndexReader)} method decides to put a field
* into the uninverteding map only if *NO* segment in the index contains docvalues for that field.
*
* Therefore, this class provides a uninverting map per segment such that for any field,
* DocValues are used from segments if they exist and uninversion of the field is performed on the rest
* of the segments.
*/
private static DirectoryReader wrapUninvertingReaderPerSegment(SolrCore core, DirectoryReader reader) throws IOException {
return UninvertingReader.wrap(reader, r -> core.getLatestSchema().getUninversionMap(r));
}
/** /**
* Builds the necessary collector chain (via delegate wrapping) and executes the query against it. This method takes * Builds the necessary collector chain (via delegate wrapping) and executes the query against it. This method takes
* into consideration both the explicitly provided collector and postFilter as well as any needed collector wrappers * into consideration both the explicitly provided collector and postFilter as well as any needed collector wrappers

View File

@ -21,11 +21,11 @@ import java.util.ArrayList;
import java.util.Map; import java.util.Map;
import java.util.function.Function; import java.util.function.Function;
import org.apache.lucene.document.BinaryDocValuesField; // javadocs import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.NumericDocValuesField; // javadocs import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField; // javadocs import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField; // javadocs import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField; // javadocs import org.apache.lucene.document.StringField;
import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.DocValuesType;
@ -174,32 +174,32 @@ public class UninvertingReader extends FilterLeafReader {
} }
/** @see #wrap(DirectoryReader, Function) */
public static DirectoryReader wrap(DirectoryReader reader, Map<String, Type> mapping) throws IOException {
return wrap(reader, mapping::get);
}
/** /**
* * Wraps a provided {@link DirectoryReader}. Note that for convenience, the returned reader
* Wraps a provided DirectoryReader. Note that for convenience, the returned reader
* can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)}) * can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)})
* and so on. * and so on.
* *
* @param in input directory reader * @param in input directory reader
* @param perSegmentMapper function to map a segment reader to a mapping of fields to their uninversion type * @param mapper function to map a field name to an uninversion type. A Null result means to not uninvert.
* @return a wrapped directory reader * @return a wrapped directory reader
*/ */
public static DirectoryReader wrap(DirectoryReader in, final Function<LeafReader, Map<String,Type>> perSegmentMapper) throws IOException { public static DirectoryReader wrap(DirectoryReader in, Function<String, Type> mapper) throws IOException {
return new UninvertingDirectoryReader(in, perSegmentMapper); return new UninvertingDirectoryReader(in, mapper);
}
public static DirectoryReader wrap(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
return UninvertingReader.wrap(in, (r) -> mapping);
} }
static class UninvertingDirectoryReader extends FilterDirectoryReader { static class UninvertingDirectoryReader extends FilterDirectoryReader {
final Function<LeafReader, Map<String,Type>> mapper; final Function<String, Type> mapper;
public UninvertingDirectoryReader(DirectoryReader in, final Function<LeafReader, Map<String,Type>> mapper) throws IOException { public UninvertingDirectoryReader(DirectoryReader in, final Function<String, Type> mapper) throws IOException {
super(in, new FilterDirectoryReader.SubReaderWrapper() { super(in, new FilterDirectoryReader.SubReaderWrapper() {
@Override @Override
public LeafReader wrap(LeafReader reader) { public LeafReader wrap(LeafReader reader) {
return new UninvertingReader(reader, mapper.apply(reader)); return UninvertingReader.wrap(reader, mapper);
} }
}); });
this.mapper = mapper; this.mapper = mapper;
@ -220,25 +220,25 @@ public class UninvertingReader extends FilterLeafReader {
} }
} }
final Map<String,Type> mapping;
final FieldInfos fieldInfos;
/** /**
* Create a new UninvertingReader with the specified mapping * Create a new UninvertingReader with the specified mapping, wrapped around the input. It may be deemed that there
* is no mapping to do, in which case the input is returned.
* <p> * <p>
* Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Function)} * Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Function)} instead.
* instead.
* *
* @lucene.internal * @lucene.internal
*/ */
public UninvertingReader(LeafReader in, Map<String,Type> mapping) { public static LeafReader wrap(LeafReader in, Function<String, Type> mapping) {
super(in); boolean wrap = false;
this.mapping = mapping;
ArrayList<FieldInfo> filteredInfos = new ArrayList<>(); // Calculate a new FieldInfos that has DocValuesType where we didn't before
ArrayList<FieldInfo> newFieldInfos = new ArrayList<>(in.getFieldInfos().size());
for (FieldInfo fi : in.getFieldInfos()) { for (FieldInfo fi : in.getFieldInfos()) {
DocValuesType type = fi.getDocValuesType(); DocValuesType type = fi.getDocValuesType();
if (type == DocValuesType.NONE) { // fields which currently don't have docValues, but are uninvertable (indexed or points data present)
Type t = mapping.get(fi.name); if (type == DocValuesType.NONE &&
(fi.getIndexOptions() != IndexOptions.NONE || (fi.getPointNumBytes() > 0 && fi.getPointDimensionCount() == 1))) {
Type t = mapping.apply(fi.name); // could definitely return null, thus still can't uninvert it
if (t != null) { if (t != null) {
if (t == Type.INTEGER_POINT || t == Type.LONG_POINT || t == Type.FLOAT_POINT || t == Type.DOUBLE_POINT) { if (t == Type.INTEGER_POINT || t == Type.LONG_POINT || t == Type.FLOAT_POINT || t == Type.DOUBLE_POINT) {
// type uses points // type uses points
@ -280,11 +280,30 @@ public class UninvertingReader extends FilterLeafReader {
} }
} }
} }
filteredInfos.add(new FieldInfo(fi.name, fi.number, fi.hasVectors(), fi.omitsNorms(), if (type != fi.getDocValuesType()) { // we changed it
wrap = true;
newFieldInfos.add(new FieldInfo(fi.name, fi.number, fi.hasVectors(), fi.omitsNorms(),
fi.hasPayloads(), fi.getIndexOptions(), type, fi.getDocValuesGen(), fi.attributes(), fi.hasPayloads(), fi.getIndexOptions(), type, fi.getDocValuesGen(), fi.attributes(),
fi.getPointDimensionCount(), fi.getPointNumBytes(), fi.isSoftDeletesField())); fi.getPointDimensionCount(), fi.getPointNumBytes(), fi.isSoftDeletesField()));
} else {
newFieldInfos.add(fi);
} }
fieldInfos = new FieldInfos(filteredInfos.toArray(new FieldInfo[filteredInfos.size()])); }
if (!wrap) {
return in;
} else {
FieldInfos fieldInfos = new FieldInfos(newFieldInfos.toArray(new FieldInfo[newFieldInfos.size()]));
return new UninvertingReader(in, mapping, fieldInfos);
}
}
final Function<String, Type> mapping;
final FieldInfos fieldInfos;
private UninvertingReader(LeafReader in, Function<String, Type> mapping, FieldInfos fieldInfos) {
super(in);
this.mapping = mapping;
this.fieldInfos = fieldInfos;
} }
@Override @Override
@ -388,11 +407,7 @@ public class UninvertingReader extends FilterLeafReader {
* if the field doesn't exist or doesn't have a mapping. * if the field doesn't exist or doesn't have a mapping.
*/ */
private Type getType(String field) { private Type getType(String field) {
FieldInfo info = fieldInfos.fieldInfo(field); return mapping.apply(field);
if (info == null || info.getDocValuesType() == DocValuesType.NONE) {
return null;
}
return mapping.get(field);
} }
// NOTE: delegating the cache helpers is wrong since this wrapper alters the // NOTE: delegating the cache helpers is wrong since this wrapper alters the

View File

@ -50,7 +50,7 @@ final class DeleteByQueryWrapper extends Query {
} }
LeafReader wrap(LeafReader reader) { LeafReader wrap(LeafReader reader) {
return new UninvertingReader(reader, schema.getUninversionMap(reader)); return UninvertingReader.wrap(reader, schema.getUninversionMapper());
} }
// we try to be well-behaved, but we are not (and IW's applyQueryDeletes isn't much better...) // we try to be well-behaved, but we are not (and IW's applyQueryDeletes isn't much better...)

View File

@ -36,6 +36,7 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
@ -367,24 +368,29 @@ public class TestUninvertingReader extends LuceneTestCase {
uninvertingMap.put("dv", Type.LEGACY_INTEGER); uninvertingMap.put("dv", Type.LEGACY_INTEGER);
uninvertingMap.put("dint", Type.INTEGER_POINT); uninvertingMap.put("dint", Type.INTEGER_POINT);
DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), DirectoryReader ir = UninvertingReader.wrap(DirectoryReader.open(dir), uninvertingMap);
uninvertingMap);
LeafReader leafReader = ir.leaves().get(0).reader(); LeafReader leafReader = ir.leaves().get(0).reader();
FieldInfos fieldInfos = leafReader.getFieldInfos();
LeafReader originalLeafReader = ((UninvertingReader)leafReader).getDelegate();
FieldInfo intFInfo = leafReader.getFieldInfos().fieldInfo("int"); assertNotSame(originalLeafReader.getFieldInfos(), fieldInfos);
assertSame("do not rebuild FieldInfo for unaffected fields",
originalLeafReader.getFieldInfos().fieldInfo("id"), fieldInfos.fieldInfo("id"));
FieldInfo intFInfo = fieldInfos.fieldInfo("int");
assertEquals(DocValuesType.NUMERIC, intFInfo.getDocValuesType()); assertEquals(DocValuesType.NUMERIC, intFInfo.getDocValuesType());
assertEquals(0, intFInfo.getPointDimensionCount()); assertEquals(0, intFInfo.getPointDimensionCount());
assertEquals(0, intFInfo.getPointNumBytes()); assertEquals(0, intFInfo.getPointNumBytes());
FieldInfo dintFInfo = leafReader.getFieldInfos().fieldInfo("dint"); FieldInfo dintFInfo = fieldInfos.fieldInfo("dint");
assertEquals(DocValuesType.NUMERIC, dintFInfo.getDocValuesType()); assertEquals(DocValuesType.NUMERIC, dintFInfo.getDocValuesType());
assertEquals(1, dintFInfo.getPointDimensionCount()); assertEquals(1, dintFInfo.getPointDimensionCount());
assertEquals(4, dintFInfo.getPointNumBytes()); assertEquals(4, dintFInfo.getPointNumBytes());
FieldInfo dvFInfo = leafReader.getFieldInfos().fieldInfo("dv"); FieldInfo dvFInfo = fieldInfos.fieldInfo("dv");
assertEquals(DocValuesType.NUMERIC, dvFInfo.getDocValuesType()); assertEquals(DocValuesType.NUMERIC, dvFInfo.getDocValuesType());
FieldInfo storedFInfo = leafReader.getFieldInfos().fieldInfo("stored"); FieldInfo storedFInfo = fieldInfos.fieldInfo("stored");
assertEquals(DocValuesType.NONE, storedFInfo.getDocValuesType()); assertEquals(DocValuesType.NONE, storedFInfo.getDocValuesType());
TestUtil.checkReader(ir); TestUtil.checkReader(ir);