mirror of https://github.com/apache/lucene.git
SOLR-4675: Improve PostingsHighlighter integration
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1465228 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ef8fc3481f
commit
57bf443834
|
@ -125,6 +125,10 @@ New Features
|
|||
prevent the "best" match from being found if it appears later in the MV list than the
|
||||
cutoff specified by either of these params. (Erick Erickson)
|
||||
|
||||
* SOLR-4675: Improve PostingsSolrHighlighter to support per-field/query-time overrides
|
||||
and add additional configuration parameters. See the javadocs for more details and
|
||||
examples. (Robert Muir)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -69,6 +69,30 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
this.solrCore = solrCore;
|
||||
}
|
||||
|
||||
// Thread safe registry
|
||||
protected final Map<String,SolrFormatter> formatters =
|
||||
new HashMap<String, SolrFormatter>();
|
||||
|
||||
// Thread safe registry
|
||||
protected final Map<String,SolrEncoder> encoders =
|
||||
new HashMap<String, SolrEncoder>();
|
||||
|
||||
// Thread safe registry
|
||||
protected final Map<String,SolrFragmenter> fragmenters =
|
||||
new HashMap<String, SolrFragmenter>() ;
|
||||
|
||||
// Thread safe registry
|
||||
protected final Map<String, SolrFragListBuilder> fragListBuilders =
|
||||
new HashMap<String, SolrFragListBuilder>() ;
|
||||
|
||||
// Thread safe registry
|
||||
protected final Map<String, SolrFragmentsBuilder> fragmentsBuilders =
|
||||
new HashMap<String, SolrFragmentsBuilder>() ;
|
||||
|
||||
// Thread safe registry
|
||||
protected final Map<String, SolrBoundaryScanner> boundaryScanners =
|
||||
new HashMap<String, SolrBoundaryScanner>() ;
|
||||
|
||||
@Override
|
||||
public void init(PluginInfo info) {
|
||||
formatters.clear();
|
||||
|
|
|
@ -19,9 +19,7 @@ package org.apache.solr.highlight;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.text.BreakIterator;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
|
@ -50,108 +48,57 @@ import org.apache.solr.util.plugin.PluginInfoInitialized;
|
|||
* <p>
|
||||
* Example configuration:
|
||||
* <pre class="prettyprint">
|
||||
* <requestHandler name="standard" class="solr.StandardRequestHandler">
|
||||
* <lst name="defaults">
|
||||
* <int name="hl.snippets">1</int>
|
||||
* <str name="hl.tag.pre">&lt;em&gt;</str>
|
||||
* <str name="hl.tag.post">&lt;/em&gt;</str>
|
||||
* <str name="hl.tag.ellipsis">... </str>
|
||||
* <bool name="hl.defaultSummary">true</bool>
|
||||
* <float name="hl.score.k1">1.2</float>
|
||||
* <float name="hl.score.b">0.75</float>
|
||||
* <float name="hl.score.pivot">87</float>
|
||||
* <int name="hl.maxAnalyzedChars">10000</int>
|
||||
* </lst>
|
||||
* </requestHandler>
|
||||
* </pre>
|
||||
* ...
|
||||
* <pre class="prettyprint">
|
||||
* <searchComponent class="solr.HighlightComponent" name="highlight">
|
||||
* <highlighting class="org.apache.solr.highlight.PostingsSolrHighlighter"
|
||||
* preTag="&lt;em&gt;"
|
||||
* postTag="&lt;/em&gt;"
|
||||
* ellipsis="... "
|
||||
* k1="1.2"
|
||||
* b="0.75"
|
||||
* pivot="87"
|
||||
* maxLength=10000
|
||||
* summarizeEmpty=true/>
|
||||
* <highlighting class="org.apache.solr.highlight.PostingsSolrHighlighter"/>
|
||||
* </searchComponent>
|
||||
* </pre>
|
||||
* <p>
|
||||
* Notes:
|
||||
* <ul>
|
||||
* <li>fields to highlight must be configured with storeOffsetsWithPositions="true"
|
||||
* <li>hl.fl specifies the field list.
|
||||
* <li>hl.snippets specifies how many underlying sentence fragments form the resulting snippet.
|
||||
* <li>hl.q (string) can specify the query
|
||||
* <li>hl.fl (string) specifies the field list.
|
||||
* <li>hl.snippets (int) specifies how many underlying sentence fragments form the resulting snippet.
|
||||
* <li>hl.tag.pre (string) specifies text which appears before a highlighted term.
|
||||
* <li>hl.tag.post (string) specifies text which appears after a highlighted term.
|
||||
* <li>hl.tag.ellipsis (string) specifies text which joins non-adjacent passages.
|
||||
* <li>hl.defaultSummary (bool) specifies if a field should have a default summary.
|
||||
* <li>hl.score.k1 (float) specifies bm25 scoring parameter 'k1'
|
||||
* <li>hl.score.b (float) specifies bm25 scoring parameter 'b'
|
||||
* <li>hl.score.pivot (float) specifies bm25 scoring parameter 'avgdl'
|
||||
* <li>hl.maxAnalyzedChars specifies how many characters at most will be processed in a document.
|
||||
* NOTE: currently hl.maxAnalyzedChars cannot yet be specified per-field
|
||||
* </ul>
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class PostingsSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized {
|
||||
protected PostingsHighlighter highlighter;
|
||||
|
||||
@Override
|
||||
public void initalize(SolrConfig config) {}
|
||||
|
||||
@Override
|
||||
public void init(PluginInfo info) {
|
||||
Map<String,String> attributes = info.attributes;
|
||||
|
||||
// scorer parameters: k1/b/pivot
|
||||
String k1 = attributes.get("k1");
|
||||
if (k1 == null) {
|
||||
k1 = "1.2";
|
||||
}
|
||||
|
||||
String b = attributes.get("b");
|
||||
if (b == null) {
|
||||
b = "0.75";
|
||||
}
|
||||
|
||||
String pivot = attributes.get("pivot");
|
||||
if (pivot == null) {
|
||||
pivot = "87";
|
||||
}
|
||||
final PassageScorer scorer = new PassageScorer(Float.parseFloat(k1), Float.parseFloat(b), Float.parseFloat(pivot));
|
||||
|
||||
// formatter parameters: preTag/postTag/ellipsis
|
||||
String preTag = attributes.get("preTag");
|
||||
if (preTag == null) {
|
||||
preTag = "<em>";
|
||||
}
|
||||
String postTag = attributes.get("postTag");
|
||||
if (postTag == null) {
|
||||
postTag = "</em>";
|
||||
}
|
||||
String ellipsis = attributes.get("ellipsis");
|
||||
if (ellipsis == null) {
|
||||
ellipsis = "... ";
|
||||
}
|
||||
final PassageFormatter formatter = new PassageFormatter(preTag, postTag, ellipsis);
|
||||
|
||||
String summarizeEmpty = attributes.get("summarizeEmpty");
|
||||
final boolean summarizeEmptyBoolean;
|
||||
if (summarizeEmpty == null) {
|
||||
summarizeEmptyBoolean = true;
|
||||
} else {
|
||||
summarizeEmptyBoolean = Boolean.parseBoolean(summarizeEmpty);
|
||||
}
|
||||
|
||||
// maximum content size to process
|
||||
int maxLength = PostingsHighlighter.DEFAULT_MAX_LENGTH;
|
||||
if (attributes.containsKey("maxLength")) {
|
||||
maxLength = Integer.parseInt(attributes.get("maxLength"));
|
||||
}
|
||||
highlighter = new PostingsHighlighter(maxLength) {
|
||||
@Override
|
||||
protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
|
||||
if (summarizeEmptyBoolean) {
|
||||
return super.getEmptyHighlight(fieldName, bi, maxPassages);
|
||||
} else {
|
||||
return new Passage[0];
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected PassageFormatter getFormatter(String fieldName) {
|
||||
return formatter;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected PassageScorer getScorer(String fieldName) {
|
||||
return scorer;
|
||||
}
|
||||
};
|
||||
}
|
||||
public void init(PluginInfo info) {}
|
||||
|
||||
@Override
|
||||
public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
|
||||
SolrParams params = req.getParams();
|
||||
final SolrParams params = req.getParams();
|
||||
|
||||
// if highlighting isnt enabled, then why call doHighlighting?
|
||||
if (isHighlightingEnabled(params)) {
|
||||
|
@ -162,11 +109,41 @@ public class PostingsSolrHighlighter extends SolrHighlighter implements PluginIn
|
|||
String[] keys = getUniqueKeys(searcher, docIDs);
|
||||
|
||||
// query-time parameters
|
||||
int maxLength = params.getInt(HighlightParams.MAX_CHARS, PostingsHighlighter.DEFAULT_MAX_LENGTH);
|
||||
String[] fieldNames = getHighlightFields(query, req, defaultFields);
|
||||
// TODO: make this per-field
|
||||
int numSnippets = params.getInt(HighlightParams.SNIPPETS, 1);
|
||||
|
||||
int maxPassages[] = new int[fieldNames.length];
|
||||
Arrays.fill(maxPassages, numSnippets);
|
||||
for (int i = 0; i < fieldNames.length; i++) {
|
||||
maxPassages[i] = params.getFieldInt(fieldNames[i], HighlightParams.SNIPPETS, 1);
|
||||
}
|
||||
|
||||
PostingsHighlighter highlighter = new PostingsHighlighter(maxLength) {
|
||||
@Override
|
||||
protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
|
||||
boolean defaultSummary = params.getFieldBool(fieldName, HighlightParams.DEFAULT_SUMMARY, true);
|
||||
if (defaultSummary) {
|
||||
return super.getEmptyHighlight(fieldName, bi, maxPassages);
|
||||
} else {
|
||||
return new Passage[0];
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected PassageFormatter getFormatter(String fieldName) {
|
||||
String preTag = params.getFieldParam(fieldName, HighlightParams.TAG_PRE, "<em>");
|
||||
String postTag = params.getFieldParam(fieldName, HighlightParams.TAG_POST, "</em>");
|
||||
String ellipsis = params.getFieldParam(fieldName, HighlightParams.TAG_ELLIPSIS, "... ");
|
||||
return new PassageFormatter(preTag, postTag, ellipsis);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected PassageScorer getScorer(String fieldName) {
|
||||
float k1 = params.getFieldFloat(fieldName, HighlightParams.SCORE_K1, 1.2f);
|
||||
float b = params.getFieldFloat(fieldName, HighlightParams.SCORE_B, 0.75f);
|
||||
float pivot = params.getFieldFloat(fieldName, HighlightParams.SCORE_PIVOT, 87f);
|
||||
return new PassageScorer(k1, b, pivot);
|
||||
}
|
||||
};
|
||||
|
||||
Map<String,String[]> snippets = highlighter.highlightFields(fieldNames, query, searcher, docIDs, maxPassages);
|
||||
return encodeSnippets(keys, fieldNames, snippets);
|
||||
|
|
|
@ -19,9 +19,7 @@ package org.apache.solr.highlight;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -38,34 +36,9 @@ public abstract class SolrHighlighter
|
|||
{
|
||||
public static Logger log = LoggerFactory.getLogger(SolrHighlighter.class);
|
||||
|
||||
// Thread safe registry
|
||||
protected final Map<String,SolrFormatter> formatters =
|
||||
new HashMap<String, SolrFormatter>();
|
||||
|
||||
// Thread safe registry
|
||||
protected final Map<String,SolrEncoder> encoders =
|
||||
new HashMap<String, SolrEncoder>();
|
||||
|
||||
// Thread safe registry
|
||||
protected final Map<String,SolrFragmenter> fragmenters =
|
||||
new HashMap<String, SolrFragmenter>() ;
|
||||
|
||||
// Thread safe registry
|
||||
protected final Map<String, SolrFragListBuilder> fragListBuilders =
|
||||
new HashMap<String, SolrFragListBuilder>() ;
|
||||
|
||||
// Thread safe registry
|
||||
protected final Map<String, SolrFragmentsBuilder> fragmentsBuilders =
|
||||
new HashMap<String, SolrFragmentsBuilder>() ;
|
||||
|
||||
// Thread safe registry
|
||||
protected final Map<String, SolrBoundaryScanner> boundaryScanners =
|
||||
new HashMap<String, SolrBoundaryScanner>() ;
|
||||
|
||||
@Deprecated
|
||||
public abstract void initalize( SolrConfig config );
|
||||
|
||||
|
||||
/**
|
||||
* Check whether Highlighting is enabled for this request.
|
||||
* @param params The params controlling Highlighting
|
||||
|
|
|
@ -22,9 +22,12 @@
|
|||
<luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>
|
||||
<dataDir>${solr.data.dir:}</dataDir>
|
||||
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
|
||||
<requestHandler name="standard" class="solr.StandardRequestHandler"></requestHandler>
|
||||
|
||||
<requestHandler name="standard" class="solr.StandardRequestHandler">
|
||||
<lst name="defaults">
|
||||
<bool name="hl.defaultSummary">false</bool>
|
||||
</lst>
|
||||
</requestHandler>
|
||||
<searchComponent class="solr.HighlightComponent" name="highlight">
|
||||
<highlighting class="org.apache.solr.highlight.PostingsSolrHighlighter" summarizeEmpty="false"/>
|
||||
<highlighting class="org.apache.solr.highlight.PostingsSolrHighlighter"/>
|
||||
</searchComponent>
|
||||
</config>
|
||||
|
|
|
@ -34,7 +34,7 @@ public class FastVectorHighlighterTest extends SolrTestCaseJ4 {
|
|||
|
||||
@Test
|
||||
public void testConfig(){
|
||||
SolrHighlighter highlighter = HighlightComponent.getHighlighter(h.getCore());
|
||||
DefaultSolrHighlighter highlighter = (DefaultSolrHighlighter) HighlightComponent.getHighlighter(h.getCore());
|
||||
|
||||
// Make sure we loaded one fragListBuilder
|
||||
SolrFragListBuilder solrFlbNull = highlighter.fragListBuilders.get( null );
|
||||
|
|
|
@ -64,7 +64,7 @@ public class HighlighterTest extends SolrTestCaseJ4 {
|
|||
@Test
|
||||
public void testConfig()
|
||||
{
|
||||
SolrHighlighter highlighter = HighlightComponent.getHighlighter(h.getCore());
|
||||
DefaultSolrHighlighter highlighter = (DefaultSolrHighlighter) HighlightComponent.getHighlighter(h.getCore());
|
||||
|
||||
// Make sure we loaded the one formatter
|
||||
SolrFormatter fmt1 = highlighter.formatters.get( null );
|
||||
|
|
|
@ -71,6 +71,14 @@ public class TestPostingsSolrHighlighter extends SolrTestCaseJ4 {
|
|||
"count(//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/*)=0");
|
||||
}
|
||||
|
||||
public void testDefaultSummary() {
|
||||
assertQ("null snippet test",
|
||||
req("q", "text:one OR *:*", "sort", "id asc", "hl", "true", "hl.defaultSummary", "true"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='document <em>one</em>'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second document'");
|
||||
}
|
||||
|
||||
public void testDifferentField() {
|
||||
assertQ("highlighting text3",
|
||||
req("q", "text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text3"),
|
||||
|
@ -100,4 +108,22 @@ public class TestPostingsSolrHighlighter extends SolrTestCaseJ4 {
|
|||
}
|
||||
resetExceptionIgnores();
|
||||
}
|
||||
|
||||
public void testTags() {
|
||||
assertQ("different pre/post tags",
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.tag.pre", "[", "hl.tag.post", "]"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='[document] one'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second [document]'");
|
||||
}
|
||||
|
||||
public void testTagsPerField() {
|
||||
assertQ("highlighting text and text3",
|
||||
req("q", "text:document text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text,text3", "f.text3.hl.tag.pre", "[", "f.text3.hl.tag.post", "]"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em> one'",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/str='crappy [document]'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second <em>document</em>'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier [document]'");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,6 +42,7 @@ public interface HighlightParams {
|
|||
public static final String BS_LANGUAGE = HIGHLIGHT+".bs.language";
|
||||
public static final String BS_COUNTRY = HIGHLIGHT+".bs.country";
|
||||
public static final String FIELD_MATCH = HIGHLIGHT+".requireFieldMatch";
|
||||
public static final String DEFAULT_SUMMARY = HIGHLIGHT + ".defaultSummary";
|
||||
public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField";
|
||||
public static final String ALTERNATE_FIELD_LENGTH = HIGHLIGHT+".maxAlternateFieldLength";
|
||||
public static final String MAX_MULTIVALUED_TO_EXAMINE = HIGHLIGHT + ".maxMultiValuedToExamine";
|
||||
|
@ -55,6 +56,7 @@ public interface HighlightParams {
|
|||
public static final String USE_FVH = HIGHLIGHT + ".useFastVectorHighlighter";
|
||||
public static final String TAG_PRE = HIGHLIGHT + ".tag.pre";
|
||||
public static final String TAG_POST = HIGHLIGHT + ".tag.post";
|
||||
public static final String TAG_ELLIPSIS = HIGHLIGHT + ".tag.ellipsis";
|
||||
public static final String PHRASE_LIMIT = HIGHLIGHT + ".phraseLimit";
|
||||
public static final String MULTI_VALUED_SEPARATOR = HIGHLIGHT + ".multiValuedSeparatorChar";
|
||||
|
||||
|
@ -68,4 +70,10 @@ public interface HighlightParams {
|
|||
public static final String SLOP = HIGHLIGHT+"."+REGEX+".slop";
|
||||
public static final String PATTERN = HIGHLIGHT+"."+REGEX+".pattern";
|
||||
public static final String MAX_RE_CHARS = HIGHLIGHT+"."+REGEX+".maxAnalyzedChars";
|
||||
|
||||
// Scoring parameters
|
||||
public static final String SCORE = "score";
|
||||
public static final String SCORE_K1 = HIGHLIGHT +"."+SCORE+".k1";
|
||||
public static final String SCORE_B = HIGHLIGHT +"."+SCORE+".b";
|
||||
public static final String SCORE_PIVOT = HIGHLIGHT +"."+SCORE+".pivot";
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue