diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 9cb7fef23ea..59d4aafb8dd 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -145,6 +145,9 @@ New Features * SOLR-8988: Adds query option facet.distrib.mco which when set to true allows the use of facet.mincount=1 in cloud mode. (Keith Laban, Dennis Gove) +* SOLR-8583: Apply highlighting to hl.alternateField by default for Default and FastVectorHighlighter. + Turn off with hl.highlightAlternate=false (janhoy, David Smiley) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java index 821af5c9a05..08ae03769fe 100644 --- a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java +++ b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.lang.invoke.MethodHandles; import java.util.ArrayList; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; @@ -61,6 +60,7 @@ import org.apache.lucene.search.vectorhighlight.FragmentsBuilder; import org.apache.lucene.util.AttributeSource.State; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.HighlightParams; +import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; @@ -389,8 +389,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf preFetchFieldNames.add(keyField.getName()); } - FastVectorHighlighter fvh = null; // lazy - FieldQuery fvhFieldQuery = null; // lazy + FvhContainer fvhContainer = new FvhContainer(); // Lazy container for fvh and fieldQuery IndexReader reader = new TermVectorReusingLeafReader(req.getSearcher().getLeafReader()); // SOLR-5855 @@ -408,30 +407,10 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf SchemaField schemaField = schema.getFieldOrNull(fieldName); Object fieldHighlights; // object type allows flexibility for subclassers - if (schemaField == null) { - fieldHighlights = null; - } else if (schemaField.getType() instanceof org.apache.solr.schema.TrieField) { - // TODO: highlighting numeric fields is broken (Lucene) - so we disable them until fixed (see LUCENE-3080)! - fieldHighlights = null; - } else if (useFastVectorHighlighter(params, schemaField)) { - if (fvhFieldQuery == null) { - fvh = new FastVectorHighlighter( - // FVH cannot process hl.usePhraseHighlighter parameter per-field basis - params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true), - // FVH cannot process hl.requireFieldMatch parameter per-field basis - params.getBool(HighlightParams.FIELD_MATCH, false)); - fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, SolrHighlighter.DEFAULT_PHRASE_LIMIT)); - fvhFieldQuery = fvh.getFieldQuery(query, reader); - } - fieldHighlights = - doHighlightingByFastVectorHighlighter(doc, docId, schemaField, fvh, fvhFieldQuery, reader, req); - } else { // standard/default highlighter - fieldHighlights = doHighlightingByHighlighter(doc, docId, schemaField, query, reader, req); - } + fieldHighlights = doHighlightingOfField(doc, docId, schemaField, fvhContainer, query, reader, req, params); if (fieldHighlights == null) { - // no summaries made; copy text from alternate field - fieldHighlights = alternateField(doc, fieldName, req); + fieldHighlights = alternateField(doc, docId, fieldName, fvhContainer, query, reader, req); } if (fieldHighlights != null) { @@ -443,6 +422,34 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf return fragments; } + private Object doHighlightingOfField(Document doc, int docId, SchemaField schemaField, + FvhContainer fvhContainer, Query query, IndexReader reader, SolrQueryRequest req, + SolrParams params) throws IOException { + Object fieldHighlights; + if (schemaField == null) { + fieldHighlights = null; + } else if (schemaField.getType() instanceof org.apache.solr.schema.TrieField) { + // TODO: highlighting numeric fields is broken (Lucene) - so we disable them until fixed (see LUCENE-3080)! + fieldHighlights = null; + } else if (useFastVectorHighlighter(params, schemaField)) { + if (fvhContainer.fieldQuery == null) { + FastVectorHighlighter fvh = new FastVectorHighlighter( + // FVH cannot process hl.usePhraseHighlighter parameter per-field basis + params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true), + // FVH cannot process hl.requireFieldMatch parameter per-field basis + params.getBool(HighlightParams.FIELD_MATCH, false)); + fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, SolrHighlighter.DEFAULT_PHRASE_LIMIT)); + fvhContainer.fvh = fvh; + fvhContainer.fieldQuery = fvh.getFieldQuery(query, reader); + } + fieldHighlights = + doHighlightingByFastVectorHighlighter(doc, docId, schemaField, fvhContainer, reader, req); + } else { // standard/default highlighter + fieldHighlights = doHighlightingByHighlighter(doc, docId, schemaField, query, reader, req); + } + return fieldHighlights; + } + /** Returns the field names to be passed to {@link SolrIndexSearcher#doc(int, Set)}. * Subclasses might over-ride to include fields in search-results and other stored field values needed so as to avoid * the possibility of extra trips to disk. The uniqueKey will be added after if the result isn't null. */ @@ -469,14 +476,13 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf /** Highlights and returns the highlight object for this field -- a String[] by default. Null if none. */ @SuppressWarnings("unchecked") protected Object doHighlightingByFastVectorHighlighter(Document doc, int docId, - SchemaField schemaField, FastVectorHighlighter highlighter, - FieldQuery fieldQuery, + SchemaField schemaField, FvhContainer fvhContainer, IndexReader reader, SolrQueryRequest req) throws IOException { SolrParams params = req.getParams(); String fieldName = schemaField.getName(); SolrFragmentsBuilder solrFb = getSolrFragmentsBuilder(fieldName, params); - String[] snippets = highlighter.getBestFragments( fieldQuery, reader, docId, fieldName, + String[] snippets = fvhContainer.fvh.getBestFragments( fvhContainer.fieldQuery, reader, docId, fieldName, params.getFieldInt( fieldName, HighlightParams.FRAGSIZE, 100 ), params.getFieldInt( fieldName, HighlightParams.SNIPPETS, 1 ), getFragListBuilder( fieldName, params ), @@ -497,12 +503,12 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf final String fieldName = schemaField.getName(); final int mvToExamine = - req.getParams().getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, + params.getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_EXAMINE, (schemaField.multiValued()) ? Integer.MAX_VALUE : 1); // Technically this is the max *fragments* (snippets), not max values: int mvToMatch = - req.getParams().getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_MATCH, Integer.MAX_VALUE); + params.getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_MATCH, Integer.MAX_VALUE); if (mvToExamine <= 0 || mvToMatch <= 0) { return null; } @@ -557,7 +563,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf } Highlighter highlighter; - if (req.getParams().getFieldBool(fieldName, HighlightParams.USE_PHRASE_HIGHLIGHTER, true)) { + if (params.getFieldBool(fieldName, HighlightParams.USE_PHRASE_HIGHLIGHTER, true)) { // We're going to call getPhraseHighlighter and it might consume the tokenStream. If it does, the tokenStream // needs to implement reset() efficiently. @@ -662,12 +668,38 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf /** Returns the alternate highlight object for this field -- a String[] by default. Null if none. */ @SuppressWarnings("unchecked") - protected Object alternateField(Document doc, String fieldName, SolrQueryRequest req) { + protected Object alternateField(Document doc, int docId, String fieldName, FvhContainer fvhContainer, Query query, + IndexReader reader, SolrQueryRequest req) throws IOException { + IndexSchema schema = req.getSearcher().getSchema(); SolrParams params = req.getParams(); String alternateField = params.getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD); + int alternateFieldLen = params.getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH, 0); if (alternateField == null || alternateField.length() == 0) { return null; } + + if (params.getFieldBool(fieldName, HighlightParams.HIGHLIGHT_ALTERNATE, true)) { + // Try to highlight alternate field + Object fieldHighlights = null; + SchemaField schemaField = schema.getFieldOrNull(alternateField); + if (schemaField != null) { + HashMap invariants = new HashMap<>(); + invariants.put("f." + alternateField + "." + HighlightParams.SNIPPETS, "1"); + // Enforce maxAlternateFieldLength by FRAGSIZE. Minimum 18 due to FVH limitations + invariants.put("f." + alternateField + "." + HighlightParams.FRAGSIZE, + alternateFieldLen > 0 ? String.valueOf(Math.max(18, alternateFieldLen)) : String.valueOf(Integer.MAX_VALUE)); + SolrParams origParams = req.getParams(); + req.setParams(SolrParams.wrapDefaults(new MapSolrParams(invariants), origParams)); + fieldHighlights = doHighlightingOfField(doc, docId, schemaField, fvhContainer, query, reader, req, params); + req.setParams(origParams); + if (fieldHighlights != null) { + return fieldHighlights; + } + } + } + + + // Fallback to static non-highlighted IndexableField[] docFields = doc.getFields(alternateField); if (docFields.length == 0) { // The alternate field did not exist, treat the original field as fallback instead @@ -685,7 +717,6 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf String[] altTexts = listFields.toArray(new String[listFields.size()]); Encoder encoder = getEncoder(fieldName, params); - int alternateFieldLen = params.getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH, 0); List altList = new ArrayList<>(); int len = 0; for( String altText: altTexts ){ @@ -707,6 +738,12 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf final TokenStream tStream = schemaField.getType().getIndexAnalyzer().tokenStream(schemaField.getName(), docText); return new TokenOrderingFilter(tStream, 10); } + + // Wraps FVH to allow pass-by-reference + private class FvhContainer { + private FastVectorHighlighter fvh; + private FieldQuery fieldQuery; + } } /** Orders Tokens in a window first by their startOffset ascending. diff --git a/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java b/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java index 1a432db20f5..2cc74abe6fc 100644 --- a/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java +++ b/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java @@ -703,7 +703,87 @@ public class HighlighterTest extends SolrTestCaseJ4 { "//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='a piece of text']" ); } - + + @Test + public void testAlternateSummaryWithHighlighting() { + //long document + assertU(adoc("tv_text", "keyword is only here, tv_text alternate field", + "t_text", "a piece of text to be substituted", + "other_t", "keyword", + "id", "1", + "foo_t","hi")); + assertU(commit()); + assertU(optimize()); + + // Prove that hl.highlightAlternate is default true and respects maxAlternateFieldLength + HashMap args = new HashMap<>(); + args.put("hl", "true"); + args.put("hl.fragsize","0"); + args.put("hl.fl", "t_text"); + args.put("hl.simple.pre", ""); + args.put("hl.simple.post", ""); + args.put("hl.alternateField", "tv_text"); + args.put("hl.maxAlternateFieldLength", "39"); + TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory( + "standard", 0, 200, args); + assertQ("Alternate summarization with highlighting", + sumLRF.makeRequest("tv_text:keyword"), + "//lst[@name='highlighting']/lst[@name='1' and count(*)=1]", + "//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='keyword is only here, tv_text']" + ); + + // Query on other field than hl or alternate. Still we get the hightlighted snippet from alternate + assertQ("Alternate summarization with highlighting, query other field", + sumLRF.makeRequest("other_t:keyword"), + "//lst[@name='highlighting']/lst[@name='1' and count(*)=1]", + "//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='keyword is only here, tv_text']" + ); + + // With hl.requireFieldMatch, will not highlight but fall back to plain-text alternate + args.put("hl.requireFieldMatch", "true"); + sumLRF = h.getRequestFactory( + "standard", 0, 200, args); + assertQ("Alternate summarization with highlighting, requireFieldMatch", + sumLRF.makeRequest("other_t:keyword"), + "//lst[@name='highlighting']/lst[@name='1' and count(*)=1]", + "//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='keyword is only here, tv_text alternate']" + ); + args.put("hl.requireFieldMatch", "false"); + + + // Works with field specific params, overriding maxAlternateFieldLength to return everything + args.remove("hl.alternateField"); + args.put("f.t_text.hl.alternateField", "tv_text"); + args.put("f.t_text.hl.maxAlternateFieldLength", "0"); + sumLRF = h.getRequestFactory("standard", 0, 200, args); + assertQ("Alternate summarization with highlighting", + sumLRF.makeRequest("tv_text:keyword"), + "//lst[@name='highlighting']/lst[@name='1' and count(*)=1]", + "//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='keyword is only here, tv_text alternate field']" + ); + + // Prove fallback highlighting works also with FVH + args.put("hl.useFastVectorHighlighter", "true"); + args.put("hl.tag.pre", ""); + args.put("hl.tag.post", ""); + args.put("f.t_text.hl.maxAlternateFieldLength", "18"); + sumLRF = h.getRequestFactory("standard", 0, 200, args); + assertQ("Alternate summarization with highlighting using FVH", + sumLRF.makeRequest("tv_text:keyword"), + "//lst[@name='highlighting']/lst[@name='1' and count(*)=1]", + "//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='keyword is only here']" + ); + + // Prove it is possible to turn off highlighting of alternate field + args.put("hl.highlightAlternate", "false"); + sumLRF = h.getRequestFactory("standard", 0, 200, args); + assertQ("Alternate summarization without highlighting", + sumLRF.makeRequest("tv_text:keyword"), + "//lst[@name='highlighting']/lst[@name='1' and count(*)=1]", + "//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='keyword is only he']" + ); + } + @Test public void testPhraseHighlighter() { HashMap args = new HashMap<>(); diff --git a/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java b/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java index 21528a94caa..c0d40aaf83e 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java @@ -46,6 +46,7 @@ public interface HighlightParams { public static final String DEFAULT_SUMMARY = HIGHLIGHT + ".defaultSummary"; public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField"; public static final String ALTERNATE_FIELD_LENGTH = HIGHLIGHT+".maxAlternateFieldLength"; + public static final String HIGHLIGHT_ALTERNATE = HIGHLIGHT+".highlightAlternate"; public static final String MAX_MULTIVALUED_TO_EXAMINE = HIGHLIGHT + ".maxMultiValuedToExamine"; public static final String MAX_MULTIVALUED_TO_MATCH = HIGHLIGHT + ".maxMultiValuedToMatch";