mirror of https://github.com/apache/lucene.git
SOLR-6692: Made standard highlighter more extensible
Private methods are now protected, removed bad constructor, and refactored out getFieldValues() and getResponseForFragments() for both clarity and sub-class extension utility. Some refactoring of doHighlightingByHighlighter to clarify logic. These changes should have ZERO effect on highlights. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1673183 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4a4d96600a
commit
0a0dcefb95
|
@ -128,6 +128,8 @@ Other Changes
|
||||||
|
|
||||||
* SOLR-7384: Fix spurious failures in FullSolrCloudDistribCmdsTest. (shalin)
|
* SOLR-7384: Fix spurious failures in FullSolrCloudDistribCmdsTest. (shalin)
|
||||||
|
|
||||||
|
* SOLR-6692: The default highlighter is much more extensible. (David Smiley)
|
||||||
|
|
||||||
================== 5.1.0 ==================
|
================== 5.1.0 ==================
|
||||||
|
|
||||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
|
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release
|
||||||
|
|
|
@ -16,6 +16,18 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.highlight;
|
package org.apache.solr.highlight;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ListIterator;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.CachingTokenFilter;
|
import org.apache.lucene.analysis.CachingTokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
@ -58,18 +70,6 @@ import org.apache.solr.util.plugin.PluginInfoInitialized;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.LinkedList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.ListIterator;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* @since solr 1.3
|
* @since solr 1.3
|
||||||
|
@ -79,11 +79,9 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
||||||
|
|
||||||
public static Logger log = LoggerFactory.getLogger(DefaultSolrHighlighter.class);
|
public static Logger log = LoggerFactory.getLogger(DefaultSolrHighlighter.class);
|
||||||
|
|
||||||
private SolrCore solrCore;
|
protected final SolrCore solrCore;
|
||||||
|
|
||||||
public DefaultSolrHighlighter() {
|
|
||||||
}
|
|
||||||
|
|
||||||
|
/** Will be invoked via reflection */
|
||||||
public DefaultSolrHighlighter(SolrCore solrCore) {
|
public DefaultSolrHighlighter(SolrCore solrCore) {
|
||||||
this.solrCore = solrCore;
|
this.solrCore = solrCore;
|
||||||
}
|
}
|
||||||
|
@ -207,7 +205,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
||||||
* @param fieldName The name of the field
|
* @param fieldName The name of the field
|
||||||
* @param request The SolrQueryRequest
|
* @param request The SolrQueryRequest
|
||||||
*/
|
*/
|
||||||
private QueryScorer getSpanQueryScorer(Query query, String fieldName, TokenStream tokenStream, SolrQueryRequest request) {
|
protected QueryScorer getSpanQueryScorer(Query query, String fieldName, TokenStream tokenStream, SolrQueryRequest request) {
|
||||||
QueryScorer scorer = new QueryScorer(query,
|
QueryScorer scorer = new QueryScorer(query,
|
||||||
request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false) ? fieldName : null);
|
request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false) ? fieldName : null);
|
||||||
scorer.setExpandMultiTermQuery(request.getParams().getBool(HighlightParams.HIGHLIGHT_MULTI_TERM, true));
|
scorer.setExpandMultiTermQuery(request.getParams().getBool(HighlightParams.HIGHLIGHT_MULTI_TERM, true));
|
||||||
|
@ -222,7 +220,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
||||||
* @param fieldName The name of the field
|
* @param fieldName The name of the field
|
||||||
* @param request The SolrQueryRequest
|
* @param request The SolrQueryRequest
|
||||||
*/
|
*/
|
||||||
private Scorer getQueryScorer(Query query, String fieldName, SolrQueryRequest request) {
|
protected Scorer getQueryScorer(Query query, String fieldName, SolrQueryRequest request) {
|
||||||
boolean reqFieldMatch = request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false);
|
boolean reqFieldMatch = request.getParams().getFieldBool(fieldName, HighlightParams.FIELD_MATCH, false);
|
||||||
if (reqFieldMatch) {
|
if (reqFieldMatch) {
|
||||||
return new QueryTermScorer(query, request.getSearcher().getIndexReader(), fieldName);
|
return new QueryTermScorer(query, request.getSearcher().getIndexReader(), fieldName);
|
||||||
|
@ -264,11 +262,11 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
||||||
protected Formatter getFormatter(String fieldName, SolrParams params )
|
protected Formatter getFormatter(String fieldName, SolrParams params )
|
||||||
{
|
{
|
||||||
String str = params.getFieldParam( fieldName, HighlightParams.FORMATTER );
|
String str = params.getFieldParam( fieldName, HighlightParams.FORMATTER );
|
||||||
SolrFormatter formatter = formatters.get( str );
|
SolrFormatter formatter = formatters.get(str);
|
||||||
if( formatter == null ) {
|
if( formatter == null ) {
|
||||||
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown formatter: "+str );
|
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown formatter: "+str );
|
||||||
}
|
}
|
||||||
return formatter.getFormatter( fieldName, params );
|
return formatter.getFormatter(fieldName, params);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -286,7 +284,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
||||||
if( encoder == null ) {
|
if( encoder == null ) {
|
||||||
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown encoder: "+str );
|
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown encoder: "+str );
|
||||||
}
|
}
|
||||||
return encoder.getEncoder( fieldName, params );
|
return encoder.getEncoder(fieldName, params);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -305,33 +303,33 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
||||||
if( frag == null ) {
|
if( frag == null ) {
|
||||||
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragmenter: "+fmt );
|
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragmenter: "+fmt );
|
||||||
}
|
}
|
||||||
return frag.getFragmenter( fieldName, params );
|
return frag.getFragmenter(fieldName, params);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected FragListBuilder getFragListBuilder( String fieldName, SolrParams params ){
|
protected FragListBuilder getFragListBuilder( String fieldName, SolrParams params ){
|
||||||
String flb = params.getFieldParam( fieldName, HighlightParams.FRAG_LIST_BUILDER );
|
String flb = params.getFieldParam( fieldName, HighlightParams.FRAG_LIST_BUILDER );
|
||||||
SolrFragListBuilder solrFlb = fragListBuilders.get( flb );
|
SolrFragListBuilder solrFlb = fragListBuilders.get(flb);
|
||||||
if( solrFlb == null ){
|
if( solrFlb == null ){
|
||||||
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragListBuilder: " + flb );
|
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragListBuilder: " + flb );
|
||||||
}
|
}
|
||||||
return solrFlb.getFragListBuilder( params );
|
return solrFlb.getFragListBuilder(params);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected FragmentsBuilder getFragmentsBuilder( String fieldName, SolrParams params ){
|
protected FragmentsBuilder getFragmentsBuilder( String fieldName, SolrParams params ){
|
||||||
BoundaryScanner bs = getBoundaryScanner(fieldName, params);
|
BoundaryScanner bs = getBoundaryScanner(fieldName, params);
|
||||||
return getSolrFragmentsBuilder( fieldName, params ).getFragmentsBuilder( params, bs );
|
return getSolrFragmentsBuilder( fieldName, params ).getFragmentsBuilder(params, bs);
|
||||||
}
|
}
|
||||||
|
|
||||||
private SolrFragmentsBuilder getSolrFragmentsBuilder( String fieldName, SolrParams params ){
|
protected SolrFragmentsBuilder getSolrFragmentsBuilder( String fieldName, SolrParams params ){
|
||||||
String fb = params.getFieldParam( fieldName, HighlightParams.FRAGMENTS_BUILDER );
|
String fb = params.getFieldParam( fieldName, HighlightParams.FRAGMENTS_BUILDER );
|
||||||
SolrFragmentsBuilder solrFb = fragmentsBuilders.get( fb );
|
SolrFragmentsBuilder solrFb = fragmentsBuilders.get(fb);
|
||||||
if( solrFb == null ){
|
if( solrFb == null ){
|
||||||
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragmentsBuilder: " + fb );
|
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown fragmentsBuilder: " + fb );
|
||||||
}
|
}
|
||||||
return solrFb;
|
return solrFb;
|
||||||
}
|
}
|
||||||
|
|
||||||
private BoundaryScanner getBoundaryScanner(String fieldName, SolrParams params){
|
protected BoundaryScanner getBoundaryScanner(String fieldName, SolrParams params){
|
||||||
String bs = params.getFieldParam(fieldName, HighlightParams.BOUNDARY_SCANNER);
|
String bs = params.getFieldParam(fieldName, HighlightParams.BOUNDARY_SCANNER);
|
||||||
SolrBoundaryScanner solrBs = boundaryScanners.get(bs);
|
SolrBoundaryScanner solrBs = boundaryScanners.get(bs);
|
||||||
if(solrBs == null){
|
if(solrBs == null){
|
||||||
|
@ -406,10 +404,10 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
||||||
* If fieldName is undefined, this method returns false, then
|
* If fieldName is undefined, this method returns false, then
|
||||||
* doHighlightingByHighlighter() will do nothing for the field.
|
* doHighlightingByHighlighter() will do nothing for the field.
|
||||||
*/
|
*/
|
||||||
private boolean useFastVectorHighlighter( SolrParams params, IndexSchema schema, String fieldName ){
|
protected boolean useFastVectorHighlighter( SolrParams params, IndexSchema schema, String fieldName ){
|
||||||
SchemaField schemaField = schema.getFieldOrNull( fieldName );
|
SchemaField schemaField = schema.getFieldOrNull(fieldName);
|
||||||
if( schemaField == null ) return false;
|
if( schemaField == null ) return false;
|
||||||
boolean useFvhParam = params.getFieldBool( fieldName, HighlightParams.USE_FVH, false );
|
boolean useFvhParam = params.getFieldBool(fieldName, HighlightParams.USE_FVH, false);
|
||||||
if( !useFvhParam ) return false;
|
if( !useFvhParam ) return false;
|
||||||
boolean termPosOff = schemaField.storeTermPositions() && schemaField.storeTermOffsets();
|
boolean termPosOff = schemaField.storeTermPositions() && schemaField.storeTermOffsets();
|
||||||
if( !termPosOff ) {
|
if( !termPosOff ) {
|
||||||
|
@ -419,54 +417,84 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
private void doHighlightingByHighlighter( Query query, SolrQueryRequest req, NamedList docSummaries,
|
protected void doHighlightingByFastVectorHighlighter( FastVectorHighlighter highlighter, FieldQuery fieldQuery,
|
||||||
|
SolrQueryRequest req, NamedList docSummaries, int docId, StoredDocument doc,
|
||||||
|
String fieldName ) throws IOException {
|
||||||
|
SolrParams params = req.getParams();
|
||||||
|
SolrFragmentsBuilder solrFb = getSolrFragmentsBuilder(fieldName, params);
|
||||||
|
String[] snippets = highlighter.getBestFragments( fieldQuery, req.getSearcher().getIndexReader(), docId, fieldName,
|
||||||
|
params.getFieldInt( fieldName, HighlightParams.FRAGSIZE, 100 ),
|
||||||
|
params.getFieldInt( fieldName, HighlightParams.SNIPPETS, 1 ),
|
||||||
|
getFragListBuilder( fieldName, params ),
|
||||||
|
getFragmentsBuilder( fieldName, params ),
|
||||||
|
solrFb.getPreTags( params, fieldName ),
|
||||||
|
solrFb.getPostTags( params, fieldName ),
|
||||||
|
getEncoder( fieldName, params ) );
|
||||||
|
if( snippets != null && snippets.length > 0 )
|
||||||
|
docSummaries.add( fieldName, snippets );
|
||||||
|
else
|
||||||
|
alternateField( docSummaries, params, doc, fieldName );
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
protected void doHighlightingByHighlighter( Query query, SolrQueryRequest req, NamedList docSummaries,
|
||||||
int docId, StoredDocument doc, String fieldName ) throws IOException {
|
int docId, StoredDocument doc, String fieldName ) throws IOException {
|
||||||
final SolrIndexSearcher searcher = req.getSearcher();
|
final SolrIndexSearcher searcher = req.getSearcher();
|
||||||
final IndexSchema schema = searcher.getSchema();
|
final IndexSchema schema = searcher.getSchema();
|
||||||
|
final SolrParams params = req.getParams();
|
||||||
|
|
||||||
// TODO: Currently in trunk highlighting numeric fields is broken (Lucene) -
|
// TODO: highlighting numeric fields is broken (Lucene) - so we disable them until fixed (see LUCENE-3080)!
|
||||||
// so we disable them until fixed (see LUCENE-3080)!
|
|
||||||
// BEGIN: Hack
|
// BEGIN: Hack
|
||||||
final SchemaField schemaField = schema.getFieldOrNull(fieldName);
|
final SchemaField schemaField = schema.getFieldOrNull(fieldName);
|
||||||
if (schemaField != null && schemaField.getType() instanceof org.apache.solr.schema.TrieField) return;
|
if (schemaField != null && schemaField.getType() instanceof org.apache.solr.schema.TrieField) return;
|
||||||
// END: Hack
|
// END: Hack
|
||||||
|
|
||||||
SolrParams params = req.getParams();
|
final int mvToExamine =
|
||||||
|
req.getParams().getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_EXAMINE,
|
||||||
|
schemaField.multiValued() ? Integer.MAX_VALUE : 1);
|
||||||
|
// Technically this is the max *fragments* (snippets), not max values:
|
||||||
|
int mvToMatch =
|
||||||
|
req.getParams().getFieldInt(fieldName, HighlightParams.MAX_MULTIVALUED_TO_MATCH, Integer.MAX_VALUE);
|
||||||
|
if (mvToExamine <= 0 || mvToMatch <= 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int maxCharsToAnalyze = params.getFieldInt(fieldName,
|
||||||
|
HighlightParams.MAX_CHARS,
|
||||||
|
Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
|
||||||
|
if (maxCharsToAnalyze < 0) {//e.g. -1
|
||||||
|
maxCharsToAnalyze = Integer.MAX_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<String> fieldValues = getFieldValues(req, doc, fieldName, mvToExamine, maxCharsToAnalyze);
|
||||||
|
if (fieldValues.isEmpty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// preserve order of values in a multiValued list
|
// preserve order of values in a multiValued list
|
||||||
boolean preserveMulti = params.getFieldBool(fieldName, HighlightParams.PRESERVE_MULTI, false);
|
boolean preserveMulti = params.getFieldBool(fieldName, HighlightParams.PRESERVE_MULTI, false);
|
||||||
|
|
||||||
List<StorableField> allFields = doc.getFields();
|
|
||||||
if (allFields == null || allFields.isEmpty()) return; // No explicit contract that getFields returns != null,
|
|
||||||
// although currently it can't.
|
|
||||||
|
|
||||||
int numFragments = getMaxSnippets(fieldName, params);
|
int numFragments = getMaxSnippets(fieldName, params);
|
||||||
boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);
|
boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params);
|
||||||
|
|
||||||
String[] summaries = null;
|
|
||||||
List<TextFragment> frags = new ArrayList<>();
|
List<TextFragment> frags = new ArrayList<>();
|
||||||
|
|
||||||
//Try term vectors, which is faster
|
//Try term vectors, which is faster
|
||||||
TokenStream tvStream = TokenSources.getTokenStreamWithOffsets(searcher.getIndexReader(), docId, fieldName);
|
final TokenStream tvStream =
|
||||||
|
TokenSources.getTokenStreamWithOffsets(searcher.getIndexReader(), docId, fieldName);
|
||||||
|
// We need to wrap in OffsetWindowTokenFilter if multi-valued
|
||||||
final OffsetWindowTokenFilter tvWindowStream;
|
final OffsetWindowTokenFilter tvWindowStream;
|
||||||
if (tvStream != null && schemaField.multiValued() && isActuallyMultiValued(allFields, fieldName)) {
|
if (tvStream != null && fieldValues.size() > 1) {
|
||||||
tvWindowStream = new OffsetWindowTokenFilter(tvStream);
|
tvWindowStream = new OffsetWindowTokenFilter(tvStream);
|
||||||
} else {
|
} else {
|
||||||
tvWindowStream = null;
|
tvWindowStream = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
int mvToExamine = Integer.parseInt(req.getParams().get(HighlightParams.MAX_MULTIVALUED_TO_EXAMINE,
|
for (String thisText : fieldValues) {
|
||||||
Integer.toString(Integer.MAX_VALUE)));
|
if (mvToMatch <= 0 || maxCharsToAnalyze <= 0) {
|
||||||
int mvToMatch = Integer.parseInt(req.getParams().get(HighlightParams.MAX_MULTIVALUED_TO_MATCH,
|
break;
|
||||||
Integer.toString(Integer.MAX_VALUE)));
|
}
|
||||||
|
|
||||||
for (StorableField thisField : allFields) {
|
|
||||||
if (mvToExamine <= 0 || mvToMatch <= 0) break;
|
|
||||||
|
|
||||||
if (! thisField.name().equals(fieldName)) continue; // Is there a better way to do this?
|
|
||||||
|
|
||||||
--mvToExamine;
|
|
||||||
String thisText = thisField.stringValue();
|
|
||||||
TokenStream tstream;
|
TokenStream tstream;
|
||||||
if (tvWindowStream != null) {
|
if (tvWindowStream != null) {
|
||||||
// if we have a multi-valued field with term vectors, then get the next offset window
|
// if we have a multi-valued field with term vectors, then get the next offset window
|
||||||
|
@ -478,12 +506,8 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
||||||
tstream = createAnalyzerTStream(schema, fieldName, thisText);
|
tstream = createAnalyzerTStream(schema, fieldName, thisText);
|
||||||
}
|
}
|
||||||
|
|
||||||
int maxCharsToAnalyze = params.getFieldInt(fieldName,
|
|
||||||
HighlightParams.MAX_CHARS,
|
|
||||||
Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE);
|
|
||||||
|
|
||||||
Highlighter highlighter;
|
Highlighter highlighter;
|
||||||
if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) {
|
if (req.getParams().getFieldBool(fieldName, HighlightParams.USE_PHRASE_HIGHLIGHTER, true)) {
|
||||||
// We're going to call getPhraseHighlighter and it might consume the tokenStream. If it does, the tokenStream
|
// We're going to call getPhraseHighlighter and it might consume the tokenStream. If it does, the tokenStream
|
||||||
// needs to implement reset() efficiently.
|
// needs to implement reset() efficiently.
|
||||||
|
|
||||||
|
@ -491,7 +515,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
||||||
// It should be okay if OffsetLimit won't get applied in this case.
|
// It should be okay if OffsetLimit won't get applied in this case.
|
||||||
final TokenStream tempTokenStream;
|
final TokenStream tempTokenStream;
|
||||||
if (tstream != tvStream) {
|
if (tstream != tvStream) {
|
||||||
if (maxCharsToAnalyze < 0) {
|
if (maxCharsToAnalyze >= thisText.length()) {
|
||||||
tempTokenStream = new CachingTokenFilter(tstream);
|
tempTokenStream = new CachingTokenFilter(tstream);
|
||||||
} else {
|
} else {
|
||||||
tempTokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tstream, maxCharsToAnalyze));
|
tempTokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tstream, maxCharsToAnalyze));
|
||||||
|
@ -508,31 +532,24 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
||||||
tstream = tempTokenStream;
|
tstream = tempTokenStream;
|
||||||
}
|
}
|
||||||
//tstream.reset(); not needed; getBestTextFragments will reset it.
|
//tstream.reset(); not needed; getBestTextFragments will reset it.
|
||||||
}
|
} else {
|
||||||
else {
|
|
||||||
// use "the old way"
|
// use "the old way"
|
||||||
highlighter = getHighlighter(query, fieldName, req);
|
highlighter = getHighlighter(query, fieldName, req);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (maxCharsToAnalyze < 0) {
|
|
||||||
highlighter.setMaxDocCharsToAnalyze(thisText.length());
|
|
||||||
} else {
|
|
||||||
highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
|
highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze);
|
||||||
}
|
|
||||||
|
|
||||||
|
// Highlight!
|
||||||
try {
|
try {
|
||||||
TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tstream, thisText, mergeContiguousFragments, numFragments);
|
TextFragment[] bestTextFragments =
|
||||||
|
highlighter.getBestTextFragments(tstream, thisText, mergeContiguousFragments, numFragments);
|
||||||
for (TextFragment bestTextFragment : bestTextFragments) {
|
for (TextFragment bestTextFragment : bestTextFragments) {
|
||||||
if (preserveMulti) {
|
if (bestTextFragment == null)//can happen via mergeContiguousFragments
|
||||||
if (bestTextFragment != null) {
|
continue;
|
||||||
|
// normally we want a score (must be highlighted), but if preserveMulti then we return a snippet regardless.
|
||||||
|
if (bestTextFragment.getScore() > 0 || preserveMulti) {
|
||||||
frags.add(bestTextFragment);
|
frags.add(bestTextFragment);
|
||||||
--mvToMatch;
|
--mvToMatch; // note: limits fragments (for multi-valued fields), not quite the number of values
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if ((bestTextFragment != null) && (bestTextFragment.getScore() > 0)) {
|
|
||||||
frags.add(bestTextFragment);
|
|
||||||
--mvToMatch;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (InvalidTokenOffsetsException e) {
|
} catch (InvalidTokenOffsetsException e) {
|
||||||
|
@ -540,9 +557,11 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
||||||
}
|
}
|
||||||
}//end field value loop
|
}//end field value loop
|
||||||
|
|
||||||
|
// Put the fragments onto the Solr response (docSummaries)
|
||||||
|
if (frags.size() > 0) {
|
||||||
// sort such that the fragments with the highest score come first
|
// sort such that the fragments with the highest score come first
|
||||||
if (!preserveMulti) {
|
if (!preserveMulti) {
|
||||||
Collections.sort(frags, new Comparator<TextFragment>() {
|
Collections.sort(frags, new Comparator<TextFragment>() {//TODO make TextFragment Comparable
|
||||||
@Override
|
@Override
|
||||||
public int compare(TextFragment arg0, TextFragment arg1) {
|
public int compare(TextFragment arg0, TextFragment arg1) {
|
||||||
return Math.round(arg1.getScore() - arg0.getScore());
|
return Math.round(arg1.getScore() - arg0.getScore());
|
||||||
|
@ -550,70 +569,53 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// convert fragments back into text
|
// Truncate list to hl.snippets, but not when hl.preserveMulti
|
||||||
// TODO: we can include score and position information in output as snippet attributes
|
if (frags.size() > numFragments && !preserveMulti) {
|
||||||
if (frags.size() > 0) {
|
frags = frags.subList(0, numFragments);
|
||||||
ArrayList<String> fragTexts = new ArrayList<>();
|
|
||||||
for (TextFragment fragment: frags) {
|
|
||||||
if (preserveMulti) {
|
|
||||||
if (fragment != null) {
|
|
||||||
fragTexts.add(fragment.toString());
|
|
||||||
}
|
}
|
||||||
|
docSummaries.add(fieldName, getResponseForFragments(frags, req));
|
||||||
} else {
|
} else {
|
||||||
if ((fragment != null) && (fragment.getScore() > 0)) {
|
|
||||||
fragTexts.add(fragment.toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fragTexts.size() >= numFragments && !preserveMulti) break;
|
|
||||||
}
|
|
||||||
summaries = fragTexts.toArray(new String[0]);
|
|
||||||
if (summaries.length > 0)
|
|
||||||
docSummaries.add(fieldName, summaries);
|
|
||||||
}
|
|
||||||
// no summaries made, copy text from alternate field
|
// no summaries made, copy text from alternate field
|
||||||
if (summaries == null || summaries.length == 0) {
|
alternateField(docSummaries, params, doc, fieldName);
|
||||||
alternateField( docSummaries, params, doc, fieldName );
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Is this field *actually* multi-valued for this document's fields? */
|
/** Fetches field values to highlight. If the field value should come from an atypical place (or another aliased
|
||||||
private boolean isActuallyMultiValued(List<StorableField> allFields, String fieldName) {
|
* field name, then a subclass could override to implement that.
|
||||||
boolean foundFirst = false;
|
*/
|
||||||
for (StorableField field : allFields) {
|
protected List<String> getFieldValues(SolrQueryRequest req, StoredDocument doc, String fieldName,
|
||||||
if (field.name().equals(fieldName)) {
|
int maxValues, int maxCharsToAnalyze) {
|
||||||
if (foundFirst) {
|
// Collect the Fields we will examine (could be more than one if multi-valued)
|
||||||
return true;//we found another
|
List<String> result = new ArrayList<>();
|
||||||
} else {
|
for (StorableField thisField : doc.getFields()) {
|
||||||
foundFirst = true;
|
if (! thisField.name().equals(fieldName)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
String value = thisField.stringValue();
|
||||||
|
result.add(value);
|
||||||
|
|
||||||
|
maxValues--;
|
||||||
|
if (maxValues <= 0 || maxCharsToAnalyze <= 0) {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
return false;//0 or 1 value
|
|
||||||
|
/** Given the fragments, return the result to be put in the field {@link NamedList}. This is an extension
|
||||||
|
* point to allow adding other metadata like the absolute offsets or scores.
|
||||||
|
*/
|
||||||
|
protected Object getResponseForFragments(List<TextFragment> frags, SolrQueryRequest req) {
|
||||||
|
// TODO: we can include score and position information in output as snippet attributes
|
||||||
|
ArrayList<String> fragTexts = new ArrayList<>();
|
||||||
|
for (TextFragment fragment : frags) {
|
||||||
|
fragTexts.add(fragment.toString());
|
||||||
|
}
|
||||||
|
return fragTexts.toArray(new String[fragTexts.size()]);
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
private void doHighlightingByFastVectorHighlighter( FastVectorHighlighter highlighter, FieldQuery fieldQuery,
|
protected void alternateField( NamedList docSummaries, SolrParams params, StoredDocument doc, String fieldName ){
|
||||||
SolrQueryRequest req, NamedList docSummaries, int docId, StoredDocument doc,
|
|
||||||
String fieldName ) throws IOException {
|
|
||||||
SolrParams params = req.getParams();
|
|
||||||
SolrFragmentsBuilder solrFb = getSolrFragmentsBuilder( fieldName, params );
|
|
||||||
String[] snippets = highlighter.getBestFragments( fieldQuery, req.getSearcher().getIndexReader(), docId, fieldName,
|
|
||||||
params.getFieldInt( fieldName, HighlightParams.FRAGSIZE, 100 ),
|
|
||||||
params.getFieldInt( fieldName, HighlightParams.SNIPPETS, 1 ),
|
|
||||||
getFragListBuilder( fieldName, params ),
|
|
||||||
getFragmentsBuilder( fieldName, params ),
|
|
||||||
solrFb.getPreTags( params, fieldName ),
|
|
||||||
solrFb.getPostTags( params, fieldName ),
|
|
||||||
getEncoder( fieldName, params ) );
|
|
||||||
if( snippets != null && snippets.length > 0 )
|
|
||||||
docSummaries.add( fieldName, snippets );
|
|
||||||
else
|
|
||||||
alternateField( docSummaries, params, doc, fieldName );
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
private void alternateField( NamedList docSummaries, SolrParams params, StoredDocument doc, String fieldName ){
|
|
||||||
String alternateField = params.getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD);
|
String alternateField = params.getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD);
|
||||||
if (alternateField != null && alternateField.length() > 0) {
|
if (alternateField != null && alternateField.length() > 0) {
|
||||||
StorableField[] docFields = doc.getFields(alternateField);
|
StorableField[] docFields = doc.getFields(alternateField);
|
||||||
|
@ -652,7 +654,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private TokenStream createAnalyzerTStream(IndexSchema schema, String fieldName, String docText) throws IOException {
|
protected TokenStream createAnalyzerTStream(IndexSchema schema, String fieldName, String docText) throws IOException {
|
||||||
return new TokenOrderingFilter(schema.getIndexAnalyzer().tokenStream(fieldName, docText), 10);
|
return new TokenOrderingFilter(schema.getIndexAnalyzer().tokenStream(fieldName, docText), 10);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue