SOLR-10047: Mismatched Docvalues segments cause exception in Sorting/Faceting. Solr now uninverts per segment to avoid such exceptions

Squashed commit of the following: commit c38f4cabc2828ee83b53b931dd829e29a3e1701c Author: Keith Laban <kelaban17@gmail.com> Date: Tue Apr 11 17:17:05 2017 -0400 reverted tests to using old wrap interface commit 806f33e092491cc6a2ee292d2934c76171e40dc7 Author: Keith Laban <kelaban17@gmail.com> Date: Tue Apr 11 17:13:34 2017 -0400 updated UninvertingReader.wrap / tests commit b10bcab338b362b909491fea1cf13de66f5f17c0 Author: Keith Laban <klaban1@bloomberg.net> Date: Wed Apr 5 14:57:28 2017 -0400 SOLR-10047 - Updated javadoc/renamed class/added getReaderCacheHelper commit 90ecf5a4ae4feaf3efc42a1ed8643ad21e1c73ce Author: Keith Laban <klaban1@bloomberg.net> Date: Wed Jan 18 16:39:51 2017 -0500 SOLR-10047 - SolrIndexSearcher, UninvertingReader, uninvert docvalues per segment
2017-04-17 13:59:26 +05:30 · 2017-04-17 13:59:26 +05:30 · 4da901a072
parent 4df4c52c0c
commit 4da901a072
4 changed files with 108 additions and 8 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -202,6 +202,9 @@ Bug Fixes
 * SOLR-10473: Correct LBHttpSolrClient's confusing SolrServerException message when timeAllowed is exceeded.
  (Christine Poerschke)
 * SOLR-10047: Mismatched Docvalues segments cause exception in Sorting/Faceting. Solr now uninverts per segment
  to avoid such exceptions. (Keith Laban via shalin)
 Other Changes
 ----------------------
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@ -155,10 +155,25 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
  private static DirectoryReader wrapReader(SolrCore core, DirectoryReader reader) throws IOException {
    assert reader != null;
    return ExitableDirectoryReader.wrap(
-        UninvertingReader.wrap(reader, core.getLatestSchema().getUninversionMap(reader)),
+        wrapUninvertingReaderPerSegment(core, reader),
        SolrQueryTimeoutImpl.getInstance());
  }
  /**
   * If docvalues are enabled or disabled after data has already been indexed for a field, such that
   * only some segments have docvalues, uninverting on the top level reader will cause 
   * IllegalStateException to be thrown when trying to use a field with such mixed data. This is because
   * the {@link IndexSchema#getUninversionMap(IndexReader)} method decides to put a field 
   * into the uninverteding map only if *NO* segment in the index contains docvalues for that field.
   * 
   * Therefore, this class provides a uninverting map per segment such that for any field, 
   * DocValues are used from segments if they exist and uninversion of the field is performed on the rest
   * of the segments.
   */
   private static DirectoryReader wrapUninvertingReaderPerSegment(SolrCore core, DirectoryReader reader) throws IOException {
     return UninvertingReader.wrap(reader, r -> core.getLatestSchema().getUninversionMap(r));
   }
  /**
   * Builds the necessary collector chain (via delegate wrapping) and executes the query against it. This method takes
   * into consideration both the explicitly provided collector and postFilter as well as any needed collector wrappers
--- a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
@ -19,6 +19,7 @@ package org.apache.solr.uninverting;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Map;
 import java.util.function.Function;
 import org.apache.lucene.document.BinaryDocValuesField; // javadocs
 import org.apache.lucene.document.NumericDocValuesField; // javadocs
@ -202,30 +203,39 @@ public class UninvertingReader extends FilterLeafReader {
  }
  /**
   * 
   * Wraps a provided DirectoryReader. Note that for convenience, the returned reader
   * can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)})
   * and so on. 
   * 
   * @param in input directory reader
   * @param perSegmentMapper function to map a segment reader to a mapping of fields to their uninversion type
   * @return a wrapped directory reader
   */
  public static DirectoryReader wrap(DirectoryReader in, final Function<LeafReader, Map<String,Type>> perSegmentMapper) throws IOException {
    return new UninvertingDirectoryReader(in, perSegmentMapper);
  }
  public static DirectoryReader wrap(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
-    return new UninvertingDirectoryReader(in, mapping);
+    return UninvertingReader.wrap(in, (r) -> mapping);
  }
  static class UninvertingDirectoryReader extends FilterDirectoryReader {
-    final Map<String,Type> mapping;
+    final Function<LeafReader, Map<String,Type>> mapper;
-    public UninvertingDirectoryReader(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
+    public UninvertingDirectoryReader(DirectoryReader in, final Function<LeafReader, Map<String,Type>> mapper) throws IOException {
      super(in, new FilterDirectoryReader.SubReaderWrapper() {
        @Override
        public LeafReader wrap(LeafReader reader) {
-          return new UninvertingReader(reader, mapping);
+          return new UninvertingReader(reader, mapper.apply(reader));
        }
      });
-      this.mapping = mapping;
+      this.mapper = mapper;
    }
    @Override
    protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
-      return new UninvertingDirectoryReader(in, mapping);
+      return new UninvertingDirectoryReader(in, mapper);
    }
    // NOTE: delegating the cache helpers is wrong since this wrapper alters the
@ -244,7 +254,7 @@ public class UninvertingReader extends FilterLeafReader {
  /** 
   * Create a new UninvertingReader with the specified mapping 
   * <p>
-   * Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Map)}
+   * Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Function)}
   * instead.
   *  
   * @lucene.internal
--- a/solr/core/src/test/org/apache/solr/schema/DocValuesTest.java
+++ b/solr/core/src/test/org/apache/solr/schema/DocValuesTest.java
@ -25,10 +25,14 @@ import java.util.List;
 import java.util.function.Function;
 import java.util.function.Supplier;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.queries.function.FunctionValues;
@ -151,6 +155,74 @@ public class DocValuesTest extends SolrTestCaseJ4 {
    }
  }
  public void testHalfAndHalfDocValues() throws Exception {
    // Insert two docs without docvalues
    String fieldname = "string_add_dv_later";
    assertU(adoc("id", "3", fieldname, "c"));
    assertU(commit());
    assertU(adoc("id", "1", fieldname, "a"));
    assertU(commit());
    try (SolrCore core = h.getCoreInc()) {
        assertFalse(core.getLatestSchema().getField(fieldname).hasDocValues());
      // Add docvalues to the field type
      IndexSchema schema = core.getLatestSchema();
      SchemaField oldField = schema.getField(fieldname);
      int newProperties = oldField.getProperties() | SchemaField.DOC_VALUES;
      SchemaField sf = new SchemaField( fieldname, oldField.getType(), newProperties, null);
      schema.getFields().put( fieldname, sf );
      // Insert a new doc with docvalues
      assertU(adoc("id", "2", fieldname, "b"));
      assertU(commit());
      // Check there are a mix of segments with and without docvalues
      final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true);
      final SolrIndexSearcher searcher = searcherRef.get();
      try {
        final DirectoryReader topReader = searcher.getRawReader();
        //Assert no merges
        assertEquals(3, topReader.numDocs());
        assertEquals(3, topReader.leaves().size());
        final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
        //The global field type should have docValues because a document with dvs was added
        assertEquals(DocValuesType.SORTED, infos.fieldInfo(fieldname).getDocValuesType());
        for(LeafReaderContext ctx: topReader.leaves()) {
          LeafReader r = ctx.reader();
          //Make sure there were no merges
          assertEquals(1, r.numDocs());
          Document doc = r.document(0);
          String id = doc.getField("id").stringValue();
          if(id.equals("1") || id.equals("3")) {
            assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(fieldname).getDocValuesType());
          } else {
            assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(fieldname).getDocValuesType());
          }
        }
      } finally {
        searcherRef.decref();
      }
    }
    // Assert sort order is correct
    assertQ(req("q", "string_add_dv_later:*", "sort", "string_add_dv_later asc"),
        "//*[@numFound='3']",
        "//result/doc[1]/int[@name='id'][.=1]",
        "//result/doc[2]/int[@name='id'][.=2]",
        "//result/doc[3]/int[@name='id'][.=3]"
    );
  }
  private void tstToObj(SchemaField sf, Object o) {
    List<IndexableField> fields = sf.createFields(o);
    for (IndexableField field : fields) {