SOLR-14194: Highlighters now supports docValues for the uniqueKey

and the original highlighter can highlight docValues.
2020-02-11 02:18:08 -05:00 · 2020-02-11 02:18:08 -05:00 · 9a4f7661e9
parent 71b869381e
commit 9a4f7661e9
10 changed files with 143 additions and 47 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -91,6 +91,9 @@ Improvements

 * SOLR-14245: Validate Replica / ReplicaInfo on creation. (ab)

+* SOLR-14194: Highlighting now works when the uniqueKey field is not stored but has docValues.  And the original
+  highlighter can now highlight text fields from docValues. (Andrzej Wislowski, David Smiley)
+
 Optimizations
 ---------------------

--- a/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/TermVectorComponent.java
@ -27,7 +27,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;

-import org.apache.lucene.document.StoredField;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.PostingsEnum;
@ -272,16 +271,7 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar
      if (keyField != null) {
        // guaranteed to be one and only one since this is uniqueKey!
        SolrDocument solrDoc = docFetcher.solrDoc(docId, srf);
-
-        String uKey = null;
-        Object val = solrDoc.getFieldValue(uniqFieldName);
-        if (val != null) {
-          if (val instanceof StoredField) {
-            uKey = ((StoredField) val).stringValue();
-          } else {
-            uKey = val.toString();
-          }
-        }
+        String uKey = schema.printableUniqueKey(solrDoc);
        assert null != uKey;
        docNL.add("uniqueKey", uKey);
        termVectors.add(uKey, docNL);
--- a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
+++ b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
@ -34,7 +34,6 @@ import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.document.Document;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.FilterLeafReader;
 import org.apache.lucene.index.IndexReader;
@ -63,6 +62,7 @@ import org.apache.lucene.search.vectorhighlight.FieldQuery;
 import org.apache.lucene.search.vectorhighlight.FragListBuilder;
 import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
 import org.apache.lucene.util.AttributeSource.State;
+import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.HighlightParams;
 import org.apache.solr.common.params.MapSolrParams;
@ -73,11 +73,13 @@ import org.apache.solr.core.PluginInfo;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.handler.component.HighlightComponent;
 import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.DocIterator;
 import org.apache.solr.search.DocList;
 import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.search.SolrReturnFields;
 import org.apache.solr.util.plugin.PluginInfoInitialized;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -445,10 +447,13 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
    String[] fieldNames = getHighlightFields(query, req, defaultFields);

    Set<String> preFetchFieldNames = getDocPrefetchFieldNames(fieldNames, req);
+    SolrReturnFields returnFields;
    if (preFetchFieldNames != null) {
      preFetchFieldNames.add(keyField.getName());
+      returnFields = new SolrReturnFields(preFetchFieldNames.toArray(new String[0]), req);
+    } else {
+      returnFields = new SolrReturnFields(new String[0], req);
    }
-
    FvhContainer fvhContainer = new FvhContainer(null, null); // Lazy container for fvh and fieldQuery

    IndexReader reader = new TermVectorReusingLeafReader(req.getSearcher().getSlowAtomicReader()); // SOLR-5855
@ -458,7 +463,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
    DocIterator iterator = docs.iterator();
    for (int i = 0; i < docs.size(); i++) {
      int docId = iterator.nextDoc();
-      Document doc = searcher.doc(docId, preFetchFieldNames);
+      SolrDocument doc = searcher.getDocFetcher().solrDoc(docId, returnFields);

      @SuppressWarnings("rawtypes")
      NamedList docHighlights = new SimpleOrderedMap();
@ -482,9 +487,9 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
    return fragments;
  }

-  protected Object doHighlightingOfField(Document doc, int docId, SchemaField schemaField,
-                                       FvhContainer fvhContainer, Query query, IndexReader reader, SolrQueryRequest req,
-                                       SolrParams params) throws IOException {
+  protected Object doHighlightingOfField(SolrDocument doc, int docId, SchemaField schemaField,
+                                         FvhContainer fvhContainer, Query query, IndexReader reader, SolrQueryRequest req,
+                                         SolrParams params) throws IOException {
    Object fieldHighlights;
    if (schemaField == null) {
      fieldHighlights = null;
@ -527,12 +532,20 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
    return fieldHighlights;
  }

-  /** Returns the field names to be passed to {@link SolrIndexSearcher#doc(int, Set)}.
+  /**
+   * Returns the field names to be passed to {@link org.apache.solr.search.SolrDocumentFetcher#solrDoc(int, SolrReturnFields)}.
   * Subclasses might over-ride to include fields in search-results and other stored field values needed so as to avoid
-   * the possibility of extra trips to disk.  The uniqueKey will be added after if the result isn't null. */
+   * the possibility of extra trips to disk.  The uniqueKey will be added after if the result isn't null.
+   */
  protected Set<String> getDocPrefetchFieldNames(String[] hlFieldNames, SolrQueryRequest req) {
    Set<String> preFetchFieldNames = new HashSet<>(hlFieldNames.length + 1);//+1 for uniqueyKey added after
    Collections.addAll(preFetchFieldNames, hlFieldNames);
+    for (String hlFieldName : hlFieldNames) {
+      String alternateField = req.getParams().getFieldParam(hlFieldName, HighlightParams.ALTERNATE_FIELD);
+      if (alternateField != null) {
+        preFetchFieldNames.add(alternateField);
+      }
+    }
    return preFetchFieldNames;
  }

@ -555,7 +568,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf

  /** Highlights and returns the highlight object for this field -- a String[] by default.  Null if none. */
  @SuppressWarnings("unchecked")
-  protected Object doHighlightingByFastVectorHighlighter(Document doc, int docId,
+  protected Object doHighlightingByFastVectorHighlighter(SolrDocument doc, int docId,
                                                         SchemaField schemaField, FvhContainer fvhContainer,
                                                         IndexReader reader, SolrQueryRequest req) throws IOException {
    SolrParams params = req.getParams();
@ -577,7 +590,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf

  /** Highlights and returns the highlight object for this field -- a String[] by default. Null if none. */
  @SuppressWarnings("unchecked")
-  protected Object doHighlightingByHighlighter(Document doc, int docId, SchemaField schemaField, Query query,
+  protected Object doHighlightingByHighlighter(SolrDocument doc, int docId, SchemaField schemaField, Query query,
                                               IndexReader reader, SolrQueryRequest req) throws IOException {
    final SolrParams params = req.getParams();
    final String fieldName = schemaField.getName();
@ -709,18 +722,25 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
  /** Fetches field values to highlight. If the field value should come from an atypical place (or another aliased
   * field name, then a subclass could override to implement that.
   */
-  protected List<String> getFieldValues(Document doc, String fieldName, int maxValues, int maxCharsToAnalyze,
+  protected List<String> getFieldValues(SolrDocument doc, String fieldName, int maxValues, int maxCharsToAnalyze,
                                        SolrQueryRequest req) {
    // Collect the Fields we will examine (could be more than one if multi-valued)
+    Collection<Object> fieldValues = doc.getFieldValues(fieldName);
+    if (fieldValues == null) {
+      return Collections.emptyList();
+    }
+    FieldType fieldType = req.getSchema().getFieldType(fieldName);
    List<String> result = new ArrayList<>();
-    for (IndexableField thisField : doc.getFields()) {
-      if (! thisField.name().equals(fieldName)) {
-        continue;
+    for (Object value : fieldValues) {
+      String strValue;
+      if (value instanceof IndexableField) {
+        strValue = fieldType.toExternal((IndexableField)value);
+      } else {
+        strValue = value.toString(); // TODO FieldType needs an API for this, e.g. toExternalFromDv()
      }
-      String value = thisField.stringValue();
-      result.add(value);
+      result.add(strValue);

-      maxCharsToAnalyze -= value.length();//we exit early if we'll never get to analyze the value
+      maxCharsToAnalyze -= strValue.length();//we exit early if we'll never get to analyze the value
      maxValues--;
      if (maxValues <= 0 || maxCharsToAnalyze <= 0) {
        break;
@ -743,7 +763,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf

  /** Returns the alternate highlight object for this field -- a String[] by default.  Null if none. */
  @SuppressWarnings("unchecked")
-  protected Object alternateField(Document doc, int docId, String fieldName, FvhContainer fvhContainer, Query query,
+  protected Object alternateField(SolrDocument doc, int docId, String fieldName, FvhContainer fvhContainer, Query query,
                                  IndexReader reader, SolrQueryRequest req) throws IOException {
    IndexSchema schema = req.getSearcher().getSchema();
    SolrParams params = req.getParams();
@ -775,20 +795,15 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf


    // Fallback to static non-highlighted
-    IndexableField[] docFields = doc.getFields(alternateField);
-    if (docFields.length == 0) {
+    List<String> listFields = getFieldValues(doc, alternateField, Integer.MAX_VALUE, Integer.MAX_VALUE, req);
+    if (listFields.isEmpty()) {
      // The alternate field did not exist, treat the original field as fallback instead
-      docFields = doc.getFields(fieldName);
-    }
-    List<String> listFields = new ArrayList<>();
-    for (IndexableField field : docFields) {
-      if (field.binaryValue() == null)
-        listFields.add(field.stringValue());
+      listFields = getFieldValues(doc, fieldName, Integer.MAX_VALUE, Integer.MAX_VALUE, req);
+      if (listFields.isEmpty()) {
+        return null;
+      }
    }

-    if (listFields.isEmpty()) {
-      return null;
-    }
    String[] altTexts = listFields.toArray(new String[listFields.size()]);

    Encoder encoder = getEncoder(fieldName, params);
--- a/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
+++ b/solr/core/src/java/org/apache/solr/highlight/UnifiedSolrHighlighter.java
@ -18,7 +18,6 @@ package org.apache.solr.highlight;

 import java.io.IOException;
 import java.text.BreakIterator;
-import java.util.Collections;
 import java.util.EnumSet;
 import java.util.List;
 import java.util.Locale;
@ -26,7 +25,6 @@ import java.util.Map;
 import java.util.Set;
 import java.util.function.Predicate;

-import org.apache.lucene.document.Document;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Query;
@ -37,6 +35,7 @@ import org.apache.lucene.search.uhighlight.PassageFormatter;
 import org.apache.lucene.search.uhighlight.PassageScorer;
 import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
 import org.apache.lucene.search.uhighlight.WholeBreakIterator;
+import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.HighlightParams;
 import org.apache.solr.common.params.SolrParams;
@ -49,6 +48,7 @@ import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.DocIterator;
 import org.apache.solr.search.DocList;
 import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.search.SolrReturnFields;
 import org.apache.solr.util.RTimerTree;
 import org.apache.solr.util.plugin.PluginInfoInitialized;

@ -210,13 +210,12 @@ public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInf
    IndexSchema schema = searcher.getSchema();
    SchemaField keyField = schema.getUniqueKeyField();
    if (keyField != null) {
-      Set<String> selector = Collections.singleton(keyField.getName());
+      SolrReturnFields returnFields = new SolrReturnFields(keyField.getName(), null);
      String[] uniqueKeys = new String[docIDs.length];
      for (int i = 0; i < docIDs.length; i++) {
        int docid = docIDs[i];
-        Document doc = searcher.doc(docid, selector);
-        String id = schema.printableUniqueKey(doc);
-        uniqueKeys[i] = id;
+        SolrDocument solrDoc = searcher.getDocFetcher().solrDoc(docid, returnFields);
+        uniqueKeys[i] = schema.printableUniqueKey(solrDoc);
      }
      return uniqueKeys;
    } else {
--- a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
+++ b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
@ -50,6 +50,7 @@ import org.apache.lucene.queries.payloads.PayloadDecoder;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.util.Version;
 import org.apache.solr.common.MapSerializable;
+import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
 import org.apache.solr.common.params.CommonParams;
@ -339,6 +340,18 @@ public class IndexSchema {
    return f==null ? null : uniqueKeyFieldType.toExternal(f);
  }

+  /** Like {@link #printableUniqueKey(org.apache.lucene.document.Document)} */
+  public String printableUniqueKey(SolrDocument solrDoc) {
+    Object val = solrDoc.getFieldValue(uniqueKeyFieldName);
+    if (val == null) {
+      return null;
+    } else if (val instanceof IndexableField) {
+      return uniqueKeyFieldType.toExternal((IndexableField) val);
+    } else {
+      return val.toString();
+    }
+  }
+
  private SchemaField getIndexedField(String fname) {
    SchemaField f = getFields().get(fname);
    if (f==null) {
--- a/solr/core/src/test-files/solr/collection1/conf/schema-unifiedhighlight.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-unifiedhighlight.xml
@ -36,7 +36,7 @@
    </analyzer>
  </fieldType>

-  <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
+  <field name="id" type="string" indexed="true" stored="${solr.tests.id.stored:true}" multiValued="false" docValues="${solr.tests.id.docValues:false}" required="false"/>
  <field name="text" type="text_offsets" indexed="true" stored="true"/>
  <field name="text2" type="text" indexed="true" stored="true"/>
  <field name="text3" type="text_offsets" indexed="true" stored="true"         large="true"/>
--- a/solr/core/src/test/org/apache/solr/highlight/HighlighterWithoutStoredIdTest.java
+++ b/solr/core/src/test/org/apache/solr/highlight/HighlighterWithoutStoredIdTest.java
@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.highlight;
+
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+public class HighlighterWithoutStoredIdTest extends HighlighterTest {
+
+  @BeforeClass
+  public static void beforeClassProps() {
+    System.setProperty("solr.tests.id.stored", "false");
+    System.setProperty("solr.tests.id.docValues", "true");
+  }
+
+  @AfterClass
+  public static void afterClassProps() {
+    System.clearProperty("solr.tests.id.stored");
+    System.clearProperty("solr.tests.id.docValues");
+  }
+
+}
--- a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
+++ b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighter.java
@ -45,6 +45,8 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
    System.clearProperty("filterCache.enabled");
    System.clearProperty("queryResultCache.enabled");
    System.clearProperty("documentCache.enabled");
+    System.clearProperty("solr.tests.id.stored");
+    System.clearProperty("solr.tests.id.docValues");
  }
  
  @Override
--- a/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighterWithoutStoredId.java
+++ b/solr/core/src/test/org/apache/solr/highlight/TestUnifiedSolrHighlighterWithoutStoredId.java
@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.highlight;
+
+
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+/** Tests for the UnifiedHighlighter Solr plugin **/
+public class TestUnifiedSolrHighlighterWithoutStoredId extends TestUnifiedSolrHighlighter {
+
+  @BeforeClass
+  public static void beforeClassProps() {
+    System.setProperty("solr.tests.id.stored", "false");
+    System.setProperty("solr.tests.id.docValues", "true");
+  }
+
+  @AfterClass
+  public static void afterClassProps() {
+    System.clearProperty("solr.tests.id.stored");
+    System.clearProperty("solr.tests.id.docValues");
+  }
+}
--- a/solr/solr-ref-guide/src/highlighting.adoc
+++ b/solr/solr-ref-guide/src/highlighting.adoc
@ -173,6 +173,7 @@ The Original Highlighter, sometimes called the "Standard Highlighter" or "Defaul
 Its query accuracy is good enough for most needs, although it's not quite as good/perfect as the Unified Highlighter.
 +
 The Original Highlighter will normally analyze stored text on the fly in order to highlight. It will use full term vectors if available.
+If the text isn't "stored" but is in doc values (`docValues="true"`), this highlighter can work with it.
 +
 Where this highlighter falls short is performance; it's often twice as slow as the Unified Highlighter. And despite being the most customizable, it doesn't have a BreakIterator based fragmenter (all the others do), which could pose a challenge for some languages.