SOLR-10047: Mismatched Docvalues segments cause exception in Sorting/Faceting. Solr now uninverts per segment to avoid such exceptions

Squashed commit of the following:

commit c38f4cabc2828ee83b53b931dd829e29a3e1701c
Author: Keith Laban <kelaban17@gmail.com>
Date:   Tue Apr 11 17:17:05 2017 -0400

    reverted tests to using old wrap interface

commit 806f33e092491cc6a2ee292d2934c76171e40dc7
Author: Keith Laban <kelaban17@gmail.com>
Date:   Tue Apr 11 17:13:34 2017 -0400

    updated UninvertingReader.wrap / tests

commit b10bcab338b362b909491fea1cf13de66f5f17c0
Author: Keith Laban <klaban1@bloomberg.net>
Date:   Wed Apr 5 14:57:28 2017 -0400

    SOLR-10047 - Updated javadoc/renamed class/added getReaderCacheHelper

commit 90ecf5a4ae4feaf3efc42a1ed8643ad21e1c73ce
Author: Keith Laban <klaban1@bloomberg.net>
Date:   Wed Jan 18 16:39:51 2017 -0500

    SOLR-10047 - SolrIndexSearcher, UninvertingReader, uninvert docvalues per segment
This commit is contained in:
Shalin Shekhar Mangar 2017-04-17 13:59:26 +05:30
parent 4df4c52c0c
commit 4da901a072
4 changed files with 108 additions and 8 deletions

View File

@ -202,6 +202,9 @@ Bug Fixes
* SOLR-10473: Correct LBHttpSolrClient's confusing SolrServerException message when timeAllowed is exceeded.
(Christine Poerschke)
* SOLR-10047: Mismatched Docvalues segments cause exception in Sorting/Faceting. Solr now uninverts per segment
to avoid such exceptions. (Keith Laban via shalin)
Other Changes
----------------------

View File

@ -155,10 +155,25 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
private static DirectoryReader wrapReader(SolrCore core, DirectoryReader reader) throws IOException {
assert reader != null;
return ExitableDirectoryReader.wrap(
UninvertingReader.wrap(reader, core.getLatestSchema().getUninversionMap(reader)),
wrapUninvertingReaderPerSegment(core, reader),
SolrQueryTimeoutImpl.getInstance());
}
/**
* If docvalues are enabled or disabled after data has already been indexed for a field, such that
* only some segments have docvalues, uninverting on the top level reader will cause
* IllegalStateException to be thrown when trying to use a field with such mixed data. This is because
* the {@link IndexSchema#getUninversionMap(IndexReader)} method decides to put a field
* into the uninverteding map only if *NO* segment in the index contains docvalues for that field.
*
* Therefore, this class provides a uninverting map per segment such that for any field,
* DocValues are used from segments if they exist and uninversion of the field is performed on the rest
* of the segments.
*/
private static DirectoryReader wrapUninvertingReaderPerSegment(SolrCore core, DirectoryReader reader) throws IOException {
return UninvertingReader.wrap(reader, r -> core.getLatestSchema().getUninversionMap(r));
}
/**
* Builds the necessary collector chain (via delegate wrapping) and executes the query against it. This method takes
* into consideration both the explicitly provided collector and postFilter as well as any needed collector wrappers

View File

@ -19,6 +19,7 @@ package org.apache.solr.uninverting;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import java.util.function.Function;
import org.apache.lucene.document.BinaryDocValuesField; // javadocs
import org.apache.lucene.document.NumericDocValuesField; // javadocs
@ -202,30 +203,39 @@ public class UninvertingReader extends FilterLeafReader {
}
/**
*
* Wraps a provided DirectoryReader. Note that for convenience, the returned reader
* can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)})
* and so on.
*
* @param in input directory reader
* @param perSegmentMapper function to map a segment reader to a mapping of fields to their uninversion type
* @return a wrapped directory reader
*/
public static DirectoryReader wrap(DirectoryReader in, final Function<LeafReader, Map<String,Type>> perSegmentMapper) throws IOException {
return new UninvertingDirectoryReader(in, perSegmentMapper);
}
public static DirectoryReader wrap(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
return new UninvertingDirectoryReader(in, mapping);
return UninvertingReader.wrap(in, (r) -> mapping);
}
static class UninvertingDirectoryReader extends FilterDirectoryReader {
final Map<String,Type> mapping;
final Function<LeafReader, Map<String,Type>> mapper;
public UninvertingDirectoryReader(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
public UninvertingDirectoryReader(DirectoryReader in, final Function<LeafReader, Map<String,Type>> mapper) throws IOException {
super(in, new FilterDirectoryReader.SubReaderWrapper() {
@Override
public LeafReader wrap(LeafReader reader) {
return new UninvertingReader(reader, mapping);
return new UninvertingReader(reader, mapper.apply(reader));
}
});
this.mapping = mapping;
this.mapper = mapper;
}
@Override
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
return new UninvertingDirectoryReader(in, mapping);
return new UninvertingDirectoryReader(in, mapper);
}
// NOTE: delegating the cache helpers is wrong since this wrapper alters the
@ -244,7 +254,7 @@ public class UninvertingReader extends FilterLeafReader {
/**
* Create a new UninvertingReader with the specified mapping
* <p>
* Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Map)}
* Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Function)}
* instead.
*
* @lucene.internal

View File

@ -25,10 +25,14 @@ import java.util.List;
import java.util.function.Function;
import java.util.function.Supplier;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.queries.function.FunctionValues;
@ -151,6 +155,74 @@ public class DocValuesTest extends SolrTestCaseJ4 {
}
}
public void testHalfAndHalfDocValues() throws Exception {
// Insert two docs without docvalues
String fieldname = "string_add_dv_later";
assertU(adoc("id", "3", fieldname, "c"));
assertU(commit());
assertU(adoc("id", "1", fieldname, "a"));
assertU(commit());
try (SolrCore core = h.getCoreInc()) {
assertFalse(core.getLatestSchema().getField(fieldname).hasDocValues());
// Add docvalues to the field type
IndexSchema schema = core.getLatestSchema();
SchemaField oldField = schema.getField(fieldname);
int newProperties = oldField.getProperties() | SchemaField.DOC_VALUES;
SchemaField sf = new SchemaField( fieldname, oldField.getType(), newProperties, null);
schema.getFields().put( fieldname, sf );
// Insert a new doc with docvalues
assertU(adoc("id", "2", fieldname, "b"));
assertU(commit());
// Check there are a mix of segments with and without docvalues
final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true);
final SolrIndexSearcher searcher = searcherRef.get();
try {
final DirectoryReader topReader = searcher.getRawReader();
//Assert no merges
assertEquals(3, topReader.numDocs());
assertEquals(3, topReader.leaves().size());
final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
//The global field type should have docValues because a document with dvs was added
assertEquals(DocValuesType.SORTED, infos.fieldInfo(fieldname).getDocValuesType());
for(LeafReaderContext ctx: topReader.leaves()) {
LeafReader r = ctx.reader();
//Make sure there were no merges
assertEquals(1, r.numDocs());
Document doc = r.document(0);
String id = doc.getField("id").stringValue();
if(id.equals("1") || id.equals("3")) {
assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(fieldname).getDocValuesType());
} else {
assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(fieldname).getDocValuesType());
}
}
} finally {
searcherRef.decref();
}
}
// Assert sort order is correct
assertQ(req("q", "string_add_dv_later:*", "sort", "string_add_dv_later asc"),
"//*[@numFound='3']",
"//result/doc[1]/int[@name='id'][.=1]",
"//result/doc[2]/int[@name='id'][.=2]",
"//result/doc[3]/int[@name='id'][.=3]"
);
}
private void tstToObj(SchemaField sf, Object o) {
List<IndexableField> fields = sf.createFields(o);
for (IndexableField field : fields) {