SOLR-10047: Mismatched Docvalues segments cause exception in Sorting/Faceting. Solr now uninverts per segment to avoid such exceptions

Squashed commit of the following: commit c38f4cabc2828ee83b53b931dd829e29a3e1701c Author: Keith Laban <kelaban17@gmail.com> Date: Tue Apr 11 17:17:05 2017 -0400 reverted tests to using old wrap interface commit 806f33e092491cc6a2ee292d2934c76171e40dc7 Author: Keith Laban <kelaban17@gmail.com> Date: Tue Apr 11 17:13:34 2017 -0400 updated UninvertingReader.wrap / tests commit b10bcab338b362b909491fea1cf13de66f5f17c0 Author: Keith Laban <klaban1@bloomberg.net> Date: Wed Apr 5 14:57:28 2017 -0400 SOLR-10047 - Updated javadoc/renamed class/added getReaderCacheHelper commit 90ecf5a4ae4feaf3efc42a1ed8643ad21e1c73ce Author: Keith Laban <klaban1@bloomberg.net> Date: Wed Jan 18 16:39:51 2017 -0500 SOLR-10047 - SolrIndexSearcher, UninvertingReader, uninvert docvalues per segment
2017-04-17 13:59:26 +05:30 · 2017-04-17 13:59:26 +05:30 · 4da901a072
parent 4df4c52c0c
commit 4da901a072
4 changed files with 108 additions and 8 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -202,6 +202,9 @@ Bug Fixes
 * SOLR-10473: Correct LBHttpSolrClient's confusing SolrServerException message when timeAllowed is exceeded.
  (Christine Poerschke)

+* SOLR-10047: Mismatched Docvalues segments cause exception in Sorting/Faceting. Solr now uninverts per segment
+  to avoid such exceptions. (Keith Laban via shalin)
+
 Other Changes
 ----------------------

--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@ -155,10 +155,25 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
  private static DirectoryReader wrapReader(SolrCore core, DirectoryReader reader) throws IOException {
    assert reader != null;
    return ExitableDirectoryReader.wrap(
-        UninvertingReader.wrap(reader, core.getLatestSchema().getUninversionMap(reader)),
+        wrapUninvertingReaderPerSegment(core, reader),
        SolrQueryTimeoutImpl.getInstance());
  }

+  /**
+   * If docvalues are enabled or disabled after data has already been indexed for a field, such that
+   * only some segments have docvalues, uninverting on the top level reader will cause 
+   * IllegalStateException to be thrown when trying to use a field with such mixed data. This is because
+   * the {@link IndexSchema#getUninversionMap(IndexReader)} method decides to put a field 
+   * into the uninverteding map only if *NO* segment in the index contains docvalues for that field.
+   * 
+   * Therefore, this class provides a uninverting map per segment such that for any field, 
+   * DocValues are used from segments if they exist and uninversion of the field is performed on the rest
+   * of the segments.
+   */
+   private static DirectoryReader wrapUninvertingReaderPerSegment(SolrCore core, DirectoryReader reader) throws IOException {
+     return UninvertingReader.wrap(reader, r -> core.getLatestSchema().getUninversionMap(r));
+   }
+
  /**
   * Builds the necessary collector chain (via delegate wrapping) and executes the query against it. This method takes
   * into consideration both the explicitly provided collector and postFilter as well as any needed collector wrappers
--- a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
@ -19,6 +19,7 @@ package org.apache.solr.uninverting;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Map;
+import java.util.function.Function;

 import org.apache.lucene.document.BinaryDocValuesField; // javadocs
 import org.apache.lucene.document.NumericDocValuesField; // javadocs
@ -202,30 +203,39 @@ public class UninvertingReader extends FilterLeafReader {
  }
  
  /**
+   * 
   * Wraps a provided DirectoryReader. Note that for convenience, the returned reader
   * can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)})
   * and so on. 
+   * 
+   * @param in input directory reader
+   * @param perSegmentMapper function to map a segment reader to a mapping of fields to their uninversion type
+   * @return a wrapped directory reader
   */
+  public static DirectoryReader wrap(DirectoryReader in, final Function<LeafReader, Map<String,Type>> perSegmentMapper) throws IOException {
+    return new UninvertingDirectoryReader(in, perSegmentMapper);
+  }
+  
  public static DirectoryReader wrap(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
-    return new UninvertingDirectoryReader(in, mapping);
+    return UninvertingReader.wrap(in, (r) -> mapping);
  }
  
  static class UninvertingDirectoryReader extends FilterDirectoryReader {
-    final Map<String,Type> mapping;
+    final Function<LeafReader, Map<String,Type>> mapper;
    
-    public UninvertingDirectoryReader(DirectoryReader in, final Map<String,Type> mapping) throws IOException {
+    public UninvertingDirectoryReader(DirectoryReader in, final Function<LeafReader, Map<String,Type>> mapper) throws IOException {
      super(in, new FilterDirectoryReader.SubReaderWrapper() {
        @Override
        public LeafReader wrap(LeafReader reader) {
-          return new UninvertingReader(reader, mapping);
+          return new UninvertingReader(reader, mapper.apply(reader));
        }
      });
-      this.mapping = mapping;
+      this.mapper = mapper;
    }

    @Override
    protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
-      return new UninvertingDirectoryReader(in, mapping);
+      return new UninvertingDirectoryReader(in, mapper);
    }

    // NOTE: delegating the cache helpers is wrong since this wrapper alters the
@ -244,7 +254,7 @@ public class UninvertingReader extends FilterLeafReader {
  /** 
   * Create a new UninvertingReader with the specified mapping 
   * <p>
-   * Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Map)}
+   * Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Function)}
   * instead.
   *  
   * @lucene.internal
--- a/solr/core/src/test/org/apache/solr/schema/DocValuesTest.java
+++ b/solr/core/src/test/org/apache/solr/schema/DocValuesTest.java
@ -25,10 +25,14 @@ import java.util.List;
 import java.util.function.Function;
 import java.util.function.Supplier;

+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.queries.function.FunctionValues;
@ -151,6 +155,74 @@ public class DocValuesTest extends SolrTestCaseJ4 {
    }
  }

+
+  public void testHalfAndHalfDocValues() throws Exception {
+    // Insert two docs without docvalues
+    String fieldname = "string_add_dv_later";
+    assertU(adoc("id", "3", fieldname, "c"));
+    assertU(commit());
+    assertU(adoc("id", "1", fieldname, "a"));
+    assertU(commit());
+    
+   
+    try (SolrCore core = h.getCoreInc()) {
+        assertFalse(core.getLatestSchema().getField(fieldname).hasDocValues());
+      // Add docvalues to the field type
+      IndexSchema schema = core.getLatestSchema();
+      SchemaField oldField = schema.getField(fieldname);
+      int newProperties = oldField.getProperties() | SchemaField.DOC_VALUES;
+      
+      SchemaField sf = new SchemaField( fieldname, oldField.getType(), newProperties, null);
+      schema.getFields().put( fieldname, sf );
+      
+      // Insert a new doc with docvalues
+      assertU(adoc("id", "2", fieldname, "b"));
+      assertU(commit());
+    
+    
+      // Check there are a mix of segments with and without docvalues
+      final RefCounted<SolrIndexSearcher> searcherRef = core.openNewSearcher(true, true);
+      final SolrIndexSearcher searcher = searcherRef.get();
+      try {
+        final DirectoryReader topReader = searcher.getRawReader();
+
+        //Assert no merges
+        
+        assertEquals(3, topReader.numDocs());
+        assertEquals(3, topReader.leaves().size());
+        
+        final FieldInfos infos = MultiFields.getMergedFieldInfos(topReader);
+        //The global field type should have docValues because a document with dvs was added
+        assertEquals(DocValuesType.SORTED, infos.fieldInfo(fieldname).getDocValuesType());
+        
+        for(LeafReaderContext ctx: topReader.leaves()) {
+          LeafReader r = ctx.reader();
+          //Make sure there were no merges
+          assertEquals(1, r.numDocs());
+          Document doc = r.document(0);
+          String id = doc.getField("id").stringValue();
+          
+          if(id.equals("1") || id.equals("3")) {
+            assertEquals(DocValuesType.NONE, r.getFieldInfos().fieldInfo(fieldname).getDocValuesType());
+          } else {
+            assertEquals(DocValuesType.SORTED, r.getFieldInfos().fieldInfo(fieldname).getDocValuesType());
+          }
+          
+        }
+      } finally {
+        searcherRef.decref();
+      }
+    }
+    
+    // Assert sort order is correct
+    assertQ(req("q", "string_add_dv_later:*", "sort", "string_add_dv_later asc"),
+        "//*[@numFound='3']",
+        "//result/doc[1]/int[@name='id'][.=1]",
+        "//result/doc[2]/int[@name='id'][.=2]",
+        "//result/doc[3]/int[@name='id'][.=3]"
+    );
+  }
+
  private void tstToObj(SchemaField sf, Object o) {
    List<IndexableField> fields = sf.createFields(o);
    for (IndexableField field : fields) {