mirror of https://github.com/apache/lucene.git
SOLR-10047: Mismatched Docvalues segments cause exception in Sorting/Faceting. Solr now uninverts per segment to avoid such exceptions
Squashed commit of the following: commit c38f4cabc2828ee83b53b931dd829e29a3e1701c Author: Keith Laban <kelaban17@gmail.com> Date: Tue Apr 11 17:17:05 2017 -0400 reverted tests to using old wrap interface commit 806f33e092491cc6a2ee292d2934c76171e40dc7 Author: Keith Laban <kelaban17@gmail.com> Date: Tue Apr 11 17:13:34 2017 -0400 updated UninvertingReader.wrap / tests commit b10bcab338b362b909491fea1cf13de66f5f17c0 Author: Keith Laban <klaban1@bloomberg.net> Date: Wed Apr 5 14:57:28 2017 -0400 SOLR-10047 - Updated javadoc/renamed class/added getReaderCacheHelper commit 90ecf5a4ae4feaf3efc42a1ed8643ad21e1c73ce Author: Keith Laban <klaban1@bloomberg.net> Date: Wed Jan 18 16:39:51 2017 -0500 SOLR-10047 - SolrIndexSearcher, UninvertingReader, uninvert docvalues per segment
This commit is contained in:
parent
4df4c52c0c
commit
4da901a072
|
@ -202,6 +202,9 @@ Bug Fixes
|
|||
* SOLR-10473: Correct LBHttpSolrClient's confusing SolrServerException message when timeAllowed is exceeded.
|
||||
(Christine Poerschke)
|
||||
|
||||
* SOLR-10047: Mismatched Docvalues segments cause exception in Sorting/Faceting. Solr now uninverts per segment
|
||||
to avoid such exceptions. (Keith Laban via shalin)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -155,10 +155,25 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
private static DirectoryReader wrapReader(SolrCore core, DirectoryReader reader) throws IOException {
|
||||
assert reader != null;
|
||||
return ExitableDirectoryReader.wrap(
|
||||
UninvertingReader.wrap(reader, core.getLatestSchema().getUninversionMap(reader)),
|
||||
wrapUninvertingReaderPerSegment(core, reader),
|
||||
SolrQueryTimeoutImpl.getInstance());
|
||||
}
|
||||
|
||||
/**
|
||||
* If docvalues are enabled or disabled after data has already been indexed for a field, such that
|
||||
* only some segments have docvalues, uninverting on the top level reader will cause
|
||||
* IllegalStateException to be thrown when trying to use a field with such mixed data. This is because
|
||||
* the {@link IndexSchema#getUninversionMap(IndexReader)} method decides to put a field
|
||||
* into the uninverteding map only if *NO* segment in the index contains docvalues for that field.
|
||||
*
|
||||
* Therefore, this class provides a uninverting map per segment such that for any field,
|
||||
* DocValues are used from segments if they exist and uninversion of the field is performed on the rest
|
||||
* of the segments.
|
||||
*/
|
||||
private static DirectoryReader wrapUninvertingReaderPerSegment(SolrCore core, DirectoryReader reader) throws IOException {
|
||||
return UninvertingReader.wrap(reader, r -> core.getLatestSchema().getUninversionMap(r));
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the necessary collector chain (via delegate wrapping) and executes the query against it. This method takes
|
||||
* into consideration both the explicitly provided collector and postFilter as well as any needed collector wrappers
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.solr.uninverting;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
|
||||
import org.apache.lucene.document.BinaryDocValuesField; // javadocs
|
||||
import org.apache.lucene.document.NumericDocValuesField; // javadocs
|
||||
|
@ -202,30 +203,39 @@ public class UninvertingReader extends FilterLeafReader {
|
|||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Wraps a provided DirectoryReader. Note that for convenience, the returned reader
|
||||
* can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)})
|
||||
* and so on.
|
||||
*
|
||||
* @param in input directory reader
|
||||
* @param perSegmentMapper function to map a segment reader to a mapping of fields to their uninversion type
|
||||
* @return a wrapped directory reader
|
||||
*/
|
||||
public static DirectoryReader wrap(DirectoryReader in, final Function<LeafReader, Map<String,Type>> perSegmentMapper) throws IOException {
|
||||
return new UninvertingDirectoryReader(in, perSegmentMapper);
|
||||
}
|
||||
|
||||
public static DirectoryReader wrap(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
|
||||
return new UninvertingDirectoryReader(in, mapping);
|
||||
return UninvertingReader.wrap(in, (r) -> mapping);
|
||||
}
|
||||
|
||||
static class UninvertingDirectoryReader extends FilterDirectoryReader {
|
||||
final Map<String,Type> mapping;
|
||||
final Function<LeafReader, Map<String,Type>> mapper;
|
||||
|
||||
public UninvertingDirectoryReader(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
|
||||
public UninvertingDirectoryReader(DirectoryReader in, final Function<LeafReader, Map<String,Type>> mapper) throws IOException {
|
||||
super(in, new FilterDirectoryReader.SubReaderWrapper() {
|
||||
@Override
|
||||
public LeafReader wrap(LeafReader reader) {
|
||||
return new UninvertingReader(reader, mapping);
|
||||
return new UninvertingReader(reader, mapper.apply(reader));
|
||||
}
|
||||
});
|
||||
this.mapping = mapping;
|
||||
this.mapper = mapper;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
|
||||
return new UninvertingDirectoryReader(in, mapping);
|
||||
return new UninvertingDirectoryReader(in, mapper);
|
||||
}
|
||||
|
||||
// NOTE: delegating the cache helpers is wrong since this wrapper alters the
|
||||
|
@ -244,7 +254,7 @@ public class UninvertingReader extends FilterLeafReader {
|
|||
/**
|
||||
* Create a new UninvertingReader with the specified mapping
|
||||
* <p>
|
||||
* Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Map)}
|
||||
* Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Function)}
|
||||
* instead.
|
||||
*
|
||||
* @lucene.internal
|
||||
|
|
|
@ -25,10 +25,14 @@ import java.util.List;
|
|||
import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
|
@ -151,6 +155,74 @@ public class DocValuesTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
public void testHalfAndHalfDocValues() throws Exception {
|
||||
// Insert two docs without docvalues
|
||||
String fieldname = "string_add_dv_later";
|
||||
assertU(adoc("id", "3", fieldname, "c"));
|
||||
assertU(commit());
|
||||
assertU(adoc("id", "1", fieldname, "a"));
|
||||
assertU(commit());
|
||||
|
||||
|
||||
try (SolrCore core = h.getCoreInc()) {
|
||||
assertFalse(core.getLatestSchema().getField(fieldname).hasDocValues());
|
||||
// Add docvalues to the field type
|
||||
IndexSchema schema = core.getLatestSchema();
|
||||
SchemaField oldField = schema.getField(fieldname);
|
||||
int newProperties = oldField.getProperties() | SchemaField.DOC_VALUES;
|
||||
|
||||
SchemaField sf = new SchemaField( fieldname, oldField.getType(), newProperties, null);
|
||||
schema.getFields().put( fieldname, sf );
|
||||
|
||||
// Insert a new doc with docvalues
|
||||
assertU(adoc("id", "2", fieldname, "b"));
|
||||
assertU(commit());
|
||||
|
||||
|
||||
// Check there are a mix of segments with and without docvalues
|
||||
final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true);
|
||||
final SolrIndexSearcher searcher = searcherRef.get();
|
||||
try {
|
||||
final DirectoryReader topReader = searcher.getRawReader();
|
||||
|
||||
//Assert no merges
|
||||
|
||||
assertEquals(3, topReader.numDocs());
|
||||
assertEquals(3, topReader.leaves().size());
|
||||
|
||||
final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
|
||||
//The global field type should have docValues because a document with dvs was added
|
||||
assertEquals(DocValuesType.SORTED, infos.fieldInfo(fieldname).getDocValuesType());
|
||||
|
||||
for(LeafReaderContext ctx: topReader.leaves()) {
|
||||
LeafReader r = ctx.reader();
|
||||
//Make sure there were no merges
|
||||
assertEquals(1, r.numDocs());
|
||||
Document doc = r.document(0);
|
||||
String id = doc.getField("id").stringValue();
|
||||
|
||||
if(id.equals("1") || id.equals("3")) {
|
||||
assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(fieldname).getDocValuesType());
|
||||
} else {
|
||||
assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(fieldname).getDocValuesType());
|
||||
}
|
||||
|
||||
}
|
||||
} finally {
|
||||
searcherRef.decref();
|
||||
}
|
||||
}
|
||||
|
||||
// Assert sort order is correct
|
||||
assertQ(req("q", "string_add_dv_later:*", "sort", "string_add_dv_later asc"),
|
||||
"//*[@numFound='3']",
|
||||
"//result/doc[1]/int[@name='id'][.=1]",
|
||||
"//result/doc[2]/int[@name='id'][.=2]",
|
||||
"//result/doc[3]/int[@name='id'][.=3]"
|
||||
);
|
||||
}
|
||||
|
||||
private void tstToObj(SchemaField sf, Object o) {
|
||||
List<IndexableField> fields = sf.createFields(o);
|
||||
for (IndexableField field : fields) {
|
||||
|
|
Loading…
Reference in New Issue