SOLR-8583: Apply highlighting to hl.alternateField by default.

This commit is contained in:
Jan Høydahl 2016-05-29 00:34:32 +02:00
parent 6e2aed3081
commit e37e49ed46
4 changed files with 155 additions and 34 deletions

View File

@ -145,6 +145,9 @@ New Features
* SOLR-8988: Adds query option facet.distrib.mco which when set to true allows the use of facet.mincount=1 in cloud mode.
(Keith Laban, Dennis Gove)
* SOLR-8583: Apply highlighting to hl.alternateField by default for Default and FastVectorHighlighter.
Turn off with hl.highlightAlternate=false (janhoy, David Smiley)
Bug Fixes
----------------------

View File

@ -20,7 +20,6 @@ import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
@ -61,6 +60,7 @@ import org.apache.lucene.search.vectorhighlight.FragmentsBuilder;
import org.apache.lucene.util.AttributeSource.State;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
@ -389,8 +389,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
preFetchFieldNames.add(keyField.getName());
}
FastVectorHighlighter fvh = null; // lazy
FieldQuery fvhFieldQuery = null; // lazy
FvhContainer fvhContainer = new FvhContainer(); // Lazy container for fvh and fieldQuery
IndexReader reader = new TermVectorReusingLeafReader(req.getSearcher().getLeafReader()); // SOLR-5855
@ -408,30 +407,10 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
SchemaField schemaField = schema.getFieldOrNull(fieldName);
Object fieldHighlights; // object type allows flexibility for subclassers
if (schemaField == null) {
fieldHighlights = null;
} else if (schemaField.getType() instanceof org.apache.solr.schema.TrieField) {
// TODO: highlighting numeric fields is broken (Lucene) - so we disable them until fixed (see LUCENE-3080)!
fieldHighlights = null;
} else if (useFastVectorHighlighter(params, schemaField)) {
if (fvhFieldQuery == null) {
fvh = new FastVectorHighlighter(
// FVH cannot process hl.usePhraseHighlighter parameter per-field basis
params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true),
// FVH cannot process hl.requireFieldMatch parameter per-field basis
params.getBool(HighlightParams.FIELD_MATCH, false));
fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, SolrHighlighter.DEFAULT_PHRASE_LIMIT));
fvhFieldQuery = fvh.getFieldQuery(query, reader);
}
fieldHighlights =
doHighlightingByFastVectorHighlighter(doc, docId, schemaField, fvh, fvhFieldQuery, reader, req);
} else { // standard/default highlighter
fieldHighlights = doHighlightingByHighlighter(doc, docId, schemaField, query, reader, req);
}
fieldHighlights = doHighlightingOfField(doc, docId, schemaField, fvhContainer, query, reader, req, params);
if (fieldHighlights == null) {
// no summaries made; copy text from alternate field
fieldHighlights = alternateField(doc, fieldName, req);
fieldHighlights = alternateField(doc, docId, fieldName, fvhContainer, query, reader, req);
}
if (fieldHighlights != null) {
@ -443,6 +422,34 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
return fragments;
}
private Object doHighlightingOfField(Document doc, int docId, SchemaField schemaField,
FvhContainer fvhContainer, Query query, IndexReader reader, SolrQueryRequest req,
SolrParams params) throws IOException {
Object fieldHighlights;
if (schemaField == null) {
fieldHighlights = null;
} else if (schemaField.getType() instanceof org.apache.solr.schema.TrieField) {
// TODO: highlighting numeric fields is broken (Lucene) - so we disable them until fixed (see LUCENE-3080)!
fieldHighlights = null;
} else if (useFastVectorHighlighter(params, schemaField)) {
if (fvhContainer.fieldQuery == null) {
FastVectorHighlighter fvh = new FastVectorHighlighter(
// FVH cannot process hl.usePhraseHighlighter parameter per-field basis
params.getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true),
// FVH cannot process hl.requireFieldMatch parameter per-field basis
params.getBool(HighlightParams.FIELD_MATCH, false));
fvh.setPhraseLimit(params.getInt(HighlightParams.PHRASE_LIMIT, SolrHighlighter.DEFAULT_PHRASE_LIMIT));
fvhContainer.fvh = fvh;
fvhContainer.fieldQuery = fvh.getFieldQuery(query, reader);
}
fieldHighlights =
doHighlightingByFastVectorHighlighter(doc, docId, schemaField, fvhContainer, reader, req);
} else { // standard/default highlighter
fieldHighlights = doHighlightingByHighlighter(doc, docId, schemaField, query, reader, req);
}
return fieldHighlights;
}
/** Returns the field names to be passed to {@link SolrIndexSearcher#doc(int, Set)}.
* Subclasses might over-ride to include fields in search-results and other stored field values needed so as to avoid
* the possibility of extra trips to disk. The uniqueKey will be added after if the result isn't null. */
@ -469,14 +476,13 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
/** Highlights and returns the highlight object for this field -- a String[] by default. Null if none. */
@SuppressWarnings("unchecked")
protected Object doHighlightingByFastVectorHighlighter(Document doc, int docId,
SchemaField schemaField, FastVectorHighlighter highlighter,
FieldQuery fieldQuery,
SchemaField schemaField, FvhContainer fvhContainer,
IndexReader reader, SolrQueryRequest req) throws IOException {
SolrParams params = req.getParams();
String fieldName = schemaField.getName();
SolrFragmentsBuilder solrFb = getSolrFragmentsBuilder(fieldName, params);
String[] snippets = highlighter.getBestFragments( fieldQuery, reader, docId, fieldName,
String[] snippets = fvhContainer.fvh.getBestFragments( fvhContainer.fieldQuery, reader, docId, fieldName,
params.getFieldInt( fieldName, HighlightParams.FRAGSIZE, 100 ),
params.getFieldInt( fieldName, HighlightParams.SNIPPETS, 1 ),
getFragListBuilder( fieldName, params ),
@ -497,12 +503,12 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
final String fieldName = schemaField.getName();
final int mvToExamine =
req.getParams().getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_EXAMINE,
params.getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_EXAMINE,
(schemaField.multiValued()) ? Integer.MAX_VALUE : 1);
// Technically this is the max *fragments* (snippets), not max values:
int mvToMatch =
req.getParams().getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_MATCH, Integer.MAX_VALUE);
params.getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_MATCH, Integer.MAX_VALUE);
if (mvToExamine <= 0 || mvToMatch <= 0) {
return null;
}
@ -557,7 +563,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
}
Highlighter highlighter;
if (req.getParams().getFieldBool(fieldName, HighlightParams.USE_PHRASE_HIGHLIGHTER, true)) {
if (params.getFieldBool(fieldName, HighlightParams.USE_PHRASE_HIGHLIGHTER, true)) {
// We're going to call getPhraseHighlighter and it might consume the tokenStream. If it does, the tokenStream
// needs to implement reset() efficiently.
@ -662,12 +668,38 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
/** Returns the alternate highlight object for this field -- a String[] by default. Null if none. */
@SuppressWarnings("unchecked")
protected Object alternateField(Document doc, String fieldName, SolrQueryRequest req) {
protected Object alternateField(Document doc, int docId, String fieldName, FvhContainer fvhContainer, Query query,
IndexReader reader, SolrQueryRequest req) throws IOException {
IndexSchema schema = req.getSearcher().getSchema();
SolrParams params = req.getParams();
String alternateField = params.getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD);
int alternateFieldLen = params.getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH, 0);
if (alternateField == null || alternateField.length() == 0) {
return null;
}
if (params.getFieldBool(fieldName, HighlightParams.HIGHLIGHT_ALTERNATE, true)) {
// Try to highlight alternate field
Object fieldHighlights = null;
SchemaField schemaField = schema.getFieldOrNull(alternateField);
if (schemaField != null) {
HashMap<String, String> invariants = new HashMap<>();
invariants.put("f." + alternateField + "." + HighlightParams.SNIPPETS, "1");
// Enforce maxAlternateFieldLength by FRAGSIZE. Minimum 18 due to FVH limitations
invariants.put("f." + alternateField + "." + HighlightParams.FRAGSIZE,
alternateFieldLen > 0 ? String.valueOf(Math.max(18, alternateFieldLen)) : String.valueOf(Integer.MAX_VALUE));
SolrParams origParams = req.getParams();
req.setParams(SolrParams.wrapDefaults(new MapSolrParams(invariants), origParams));
fieldHighlights = doHighlightingOfField(doc, docId, schemaField, fvhContainer, query, reader, req, params);
req.setParams(origParams);
if (fieldHighlights != null) {
return fieldHighlights;
}
}
}
// Fallback to static non-highlighted
IndexableField[] docFields = doc.getFields(alternateField);
if (docFields.length == 0) {
// The alternate field did not exist, treat the original field as fallback instead
@ -685,7 +717,6 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
String[] altTexts = listFields.toArray(new String[listFields.size()]);
Encoder encoder = getEncoder(fieldName, params);
int alternateFieldLen = params.getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH, 0);
List<String> altList = new ArrayList<>();
int len = 0;
for( String altText: altTexts ){
@ -707,6 +738,12 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
final TokenStream tStream = schemaField.getType().getIndexAnalyzer().tokenStream(schemaField.getName(), docText);
return new TokenOrderingFilter(tStream, 10);
}
// Wraps FVH to allow pass-by-reference
private class FvhContainer {
private FastVectorHighlighter fvh;
private FieldQuery fieldQuery;
}
}
/** Orders Tokens in a window first by their startOffset ascending.

View File

@ -703,7 +703,87 @@ public class HighlighterTest extends SolrTestCaseJ4 {
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='a piece of text']"
);
}
@Test
public void testAlternateSummaryWithHighlighting() {
//long document
assertU(adoc("tv_text", "keyword is only here, tv_text alternate field",
"t_text", "a piece of text to be substituted",
"other_t", "keyword",
"id", "1",
"foo_t","hi"));
assertU(commit());
assertU(optimize());
// Prove that hl.highlightAlternate is default true and respects maxAlternateFieldLength
HashMap<String,String> args = new HashMap<>();
args.put("hl", "true");
args.put("hl.fragsize","0");
args.put("hl.fl", "t_text");
args.put("hl.simple.pre", "<simplepre>");
args.put("hl.simple.post", "</simplepost>");
args.put("hl.alternateField", "tv_text");
args.put("hl.maxAlternateFieldLength", "39");
TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
"standard", 0, 200, args);
assertQ("Alternate summarization with highlighting",
sumLRF.makeRequest("tv_text:keyword"),
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='<simplepre>keyword</simplepost> is only here, tv_text']"
);
// Query on other field than hl or alternate. Still we get the hightlighted snippet from alternate
assertQ("Alternate summarization with highlighting, query other field",
sumLRF.makeRequest("other_t:keyword"),
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='<simplepre>keyword</simplepost> is only here, tv_text']"
);
// With hl.requireFieldMatch, will not highlight but fall back to plain-text alternate
args.put("hl.requireFieldMatch", "true");
sumLRF = h.getRequestFactory(
"standard", 0, 200, args);
assertQ("Alternate summarization with highlighting, requireFieldMatch",
sumLRF.makeRequest("other_t:keyword"),
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='keyword is only here, tv_text alternate']"
);
args.put("hl.requireFieldMatch", "false");
// Works with field specific params, overriding maxAlternateFieldLength to return everything
args.remove("hl.alternateField");
args.put("f.t_text.hl.alternateField", "tv_text");
args.put("f.t_text.hl.maxAlternateFieldLength", "0");
sumLRF = h.getRequestFactory("standard", 0, 200, args);
assertQ("Alternate summarization with highlighting",
sumLRF.makeRequest("tv_text:keyword"),
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='<simplepre>keyword</simplepost> is only here, tv_text alternate field']"
);
// Prove fallback highlighting works also with FVH
args.put("hl.useFastVectorHighlighter", "true");
args.put("hl.tag.pre", "<fvhpre>");
args.put("hl.tag.post", "</fvhpost>");
args.put("f.t_text.hl.maxAlternateFieldLength", "18");
sumLRF = h.getRequestFactory("standard", 0, 200, args);
assertQ("Alternate summarization with highlighting using FVH",
sumLRF.makeRequest("tv_text:keyword"),
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='<fvhpre>keyword</fvhpost> is only here']"
);
// Prove it is possible to turn off highlighting of alternate field
args.put("hl.highlightAlternate", "false");
sumLRF = h.getRequestFactory("standard", 0, 200, args);
assertQ("Alternate summarization without highlighting",
sumLRF.makeRequest("tv_text:keyword"),
"//lst[@name='highlighting']/lst[@name='1' and count(*)=1]",
"//lst[@name='highlighting']/lst[@name='1']/arr[@name='t_text']/str[.='keyword is only he']"
);
}
@Test
public void testPhraseHighlighter() {
HashMap<String,String> args = new HashMap<>();

View File

@ -46,6 +46,7 @@ public interface HighlightParams {
public static final String DEFAULT_SUMMARY = HIGHLIGHT + ".defaultSummary";
public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField";
public static final String ALTERNATE_FIELD_LENGTH = HIGHLIGHT+".maxAlternateFieldLength";
public static final String HIGHLIGHT_ALTERNATE = HIGHLIGHT+".highlightAlternate";
public static final String MAX_MULTIVALUED_TO_EXAMINE = HIGHLIGHT + ".maxMultiValuedToExamine";
public static final String MAX_MULTIVALUED_TO_MATCH = HIGHLIGHT + ".maxMultiValuedToMatch";