From 57bf4438342ac183d265cf54b6e283286d935537 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sat, 6 Apr 2013 11:48:47 +0000 Subject: [PATCH] SOLR-4675: Improve PostingsHighlighter integration git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1465228 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 4 + .../highlight/DefaultSolrHighlighter.java | 24 +++ .../highlight/PostingsSolrHighlighter.java | 151 ++++++++---------- .../solr/highlight/SolrHighlighter.java | 27 ---- .../conf/solrconfig-postingshighlight.xml | 9 +- .../highlight/FastVectorHighlighterTest.java | 2 +- .../solr/highlight/HighlighterTest.java | 2 +- .../TestPostingsSolrHighlighter.java | 26 +++ .../solr/common/params/HighlightParams.java | 8 + 9 files changed, 134 insertions(+), 119 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index ba8a07fac64..6acd82aca3c 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -125,6 +125,10 @@ New Features prevent the "best" match from being found if it appears later in the MV list than the cutoff specified by either of these params. (Erick Erickson) +* SOLR-4675: Improve PostingsSolrHighlighter to support per-field/query-time overrides + and add additional configuration parameters. See the javadocs for more details and + examples. (Robert Muir) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java index 3cd41948261..f53c5472907 100644 --- a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java +++ b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java @@ -69,6 +69,30 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf this.solrCore = solrCore; } + // Thread safe registry + protected final Map formatters = + new HashMap(); + + // Thread safe registry + protected final Map encoders = + new HashMap(); + + // Thread safe registry + protected final Map fragmenters = + new HashMap() ; + + // Thread safe registry + protected final Map fragListBuilders = + new HashMap() ; + + // Thread safe registry + protected final Map fragmentsBuilders = + new HashMap() ; + + // Thread safe registry + protected final Map boundaryScanners = + new HashMap() ; + @Override public void init(PluginInfo info) { formatters.clear(); diff --git a/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java index 2249c85393b..0f8448b1a64 100644 --- a/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java +++ b/solr/core/src/java/org/apache/solr/highlight/PostingsSolrHighlighter.java @@ -19,9 +19,7 @@ package org.apache.solr.highlight; import java.io.IOException; import java.text.BreakIterator; -import java.util.Arrays; import java.util.Collections; -import java.util.Locale; import java.util.Map; import java.util.Set; @@ -50,108 +48,57 @@ import org.apache.solr.util.plugin.PluginInfoInitialized; *

* Example configuration: *

+ *   <requestHandler name="standard" class="solr.StandardRequestHandler">
+ *     <lst name="defaults">
+ *       <int name="hl.snippets">1</int>
+ *       <str name="hl.tag.pre">&lt;em&gt;</str>
+ *       <str name="hl.tag.post">&lt;/em&gt;</str>
+ *       <str name="hl.tag.ellipsis">... </str>
+ *       <bool name="hl.defaultSummary">true</bool>
+ *       <float name="hl.score.k1">1.2</float>
+ *       <float name="hl.score.b">0.75</float>
+ *       <float name="hl.score.pivot">87</float>
+ *       <int name="hl.maxAnalyzedChars">10000</int>
+ *     </lst>
+ *   </requestHandler>
+ * 
+ * ... + *
  *   <searchComponent class="solr.HighlightComponent" name="highlight">
- *     <highlighting class="org.apache.solr.highlight.PostingsSolrHighlighter"
- *                      preTag="&lt;em&gt;"
- *                      postTag="&lt;/em&gt;"
- *                      ellipsis="... "
- *                      k1="1.2"
- *                      b="0.75"
- *                      pivot="87"
- *                      maxLength=10000
- *                      summarizeEmpty=true/>
+ *     <highlighting class="org.apache.solr.highlight.PostingsSolrHighlighter"/>
  *   </searchComponent>
  * 
*

* Notes: *

    *
  • fields to highlight must be configured with storeOffsetsWithPositions="true" - *
  • hl.fl specifies the field list. - *
  • hl.snippets specifies how many underlying sentence fragments form the resulting snippet. + *
  • hl.q (string) can specify the query + *
  • hl.fl (string) specifies the field list. + *
  • hl.snippets (int) specifies how many underlying sentence fragments form the resulting snippet. + *
  • hl.tag.pre (string) specifies text which appears before a highlighted term. + *
  • hl.tag.post (string) specifies text which appears after a highlighted term. + *
  • hl.tag.ellipsis (string) specifies text which joins non-adjacent passages. + *
  • hl.defaultSummary (bool) specifies if a field should have a default summary. + *
  • hl.score.k1 (float) specifies bm25 scoring parameter 'k1' + *
  • hl.score.b (float) specifies bm25 scoring parameter 'b' + *
  • hl.score.pivot (float) specifies bm25 scoring parameter 'avgdl' + *
  • hl.maxAnalyzedChars specifies how many characters at most will be processed in a document. + * NOTE: currently hl.maxAnalyzedChars cannot yet be specified per-field *
* * @lucene.experimental */ public class PostingsSolrHighlighter extends SolrHighlighter implements PluginInfoInitialized { - protected PostingsHighlighter highlighter; @Override public void initalize(SolrConfig config) {} @Override - public void init(PluginInfo info) { - Map attributes = info.attributes; - - // scorer parameters: k1/b/pivot - String k1 = attributes.get("k1"); - if (k1 == null) { - k1 = "1.2"; - } - - String b = attributes.get("b"); - if (b == null) { - b = "0.75"; - } - - String pivot = attributes.get("pivot"); - if (pivot == null) { - pivot = "87"; - } - final PassageScorer scorer = new PassageScorer(Float.parseFloat(k1), Float.parseFloat(b), Float.parseFloat(pivot)); - - // formatter parameters: preTag/postTag/ellipsis - String preTag = attributes.get("preTag"); - if (preTag == null) { - preTag = ""; - } - String postTag = attributes.get("postTag"); - if (postTag == null) { - postTag = ""; - } - String ellipsis = attributes.get("ellipsis"); - if (ellipsis == null) { - ellipsis = "... "; - } - final PassageFormatter formatter = new PassageFormatter(preTag, postTag, ellipsis); - - String summarizeEmpty = attributes.get("summarizeEmpty"); - final boolean summarizeEmptyBoolean; - if (summarizeEmpty == null) { - summarizeEmptyBoolean = true; - } else { - summarizeEmptyBoolean = Boolean.parseBoolean(summarizeEmpty); - } - - // maximum content size to process - int maxLength = PostingsHighlighter.DEFAULT_MAX_LENGTH; - if (attributes.containsKey("maxLength")) { - maxLength = Integer.parseInt(attributes.get("maxLength")); - } - highlighter = new PostingsHighlighter(maxLength) { - @Override - protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) { - if (summarizeEmptyBoolean) { - return super.getEmptyHighlight(fieldName, bi, maxPassages); - } else { - return new Passage[0]; - } - } - - @Override - protected PassageFormatter getFormatter(String fieldName) { - return formatter; - } - - @Override - protected PassageScorer getScorer(String fieldName) { - return scorer; - } - }; - } + public void init(PluginInfo info) {} @Override public NamedList doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException { - SolrParams params = req.getParams(); + final SolrParams params = req.getParams(); // if highlighting isnt enabled, then why call doHighlighting? if (isHighlightingEnabled(params)) { @@ -162,11 +109,41 @@ public class PostingsSolrHighlighter extends SolrHighlighter implements PluginIn String[] keys = getUniqueKeys(searcher, docIDs); // query-time parameters + int maxLength = params.getInt(HighlightParams.MAX_CHARS, PostingsHighlighter.DEFAULT_MAX_LENGTH); String[] fieldNames = getHighlightFields(query, req, defaultFields); - // TODO: make this per-field - int numSnippets = params.getInt(HighlightParams.SNIPPETS, 1); + int maxPassages[] = new int[fieldNames.length]; - Arrays.fill(maxPassages, numSnippets); + for (int i = 0; i < fieldNames.length; i++) { + maxPassages[i] = params.getFieldInt(fieldNames[i], HighlightParams.SNIPPETS, 1); + } + + PostingsHighlighter highlighter = new PostingsHighlighter(maxLength) { + @Override + protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) { + boolean defaultSummary = params.getFieldBool(fieldName, HighlightParams.DEFAULT_SUMMARY, true); + if (defaultSummary) { + return super.getEmptyHighlight(fieldName, bi, maxPassages); + } else { + return new Passage[0]; + } + } + + @Override + protected PassageFormatter getFormatter(String fieldName) { + String preTag = params.getFieldParam(fieldName, HighlightParams.TAG_PRE, ""); + String postTag = params.getFieldParam(fieldName, HighlightParams.TAG_POST, ""); + String ellipsis = params.getFieldParam(fieldName, HighlightParams.TAG_ELLIPSIS, "... "); + return new PassageFormatter(preTag, postTag, ellipsis); + } + + @Override + protected PassageScorer getScorer(String fieldName) { + float k1 = params.getFieldFloat(fieldName, HighlightParams.SCORE_K1, 1.2f); + float b = params.getFieldFloat(fieldName, HighlightParams.SCORE_B, 0.75f); + float pivot = params.getFieldFloat(fieldName, HighlightParams.SCORE_PIVOT, 87f); + return new PassageScorer(k1, b, pivot); + } + }; Map snippets = highlighter.highlightFields(fieldNames, query, searcher, docIDs, maxPassages); return encodeSnippets(keys, fieldNames, snippets); diff --git a/solr/core/src/java/org/apache/solr/highlight/SolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/SolrHighlighter.java index 8bb6ecdcf92..a68c6d70a5b 100644 --- a/solr/core/src/java/org/apache/solr/highlight/SolrHighlighter.java +++ b/solr/core/src/java/org/apache/solr/highlight/SolrHighlighter.java @@ -19,9 +19,7 @@ package org.apache.solr.highlight; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; -import java.util.HashMap; import java.util.List; -import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -38,34 +36,9 @@ public abstract class SolrHighlighter { public static Logger log = LoggerFactory.getLogger(SolrHighlighter.class); - // Thread safe registry - protected final Map formatters = - new HashMap(); - - // Thread safe registry - protected final Map encoders = - new HashMap(); - - // Thread safe registry - protected final Map fragmenters = - new HashMap() ; - - // Thread safe registry - protected final Map fragListBuilders = - new HashMap() ; - - // Thread safe registry - protected final Map fragmentsBuilders = - new HashMap() ; - - // Thread safe registry - protected final Map boundaryScanners = - new HashMap() ; - @Deprecated public abstract void initalize( SolrConfig config ); - /** * Check whether Highlighting is enabled for this request. * @param params The params controlling Highlighting diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-postingshighlight.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-postingshighlight.xml index 338c9b139b6..8eb9b6bc4a8 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-postingshighlight.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-postingshighlight.xml @@ -22,9 +22,12 @@ ${tests.luceneMatchVersion:LUCENE_CURRENT} ${solr.data.dir:} - - + + + false + + - + diff --git a/solr/core/src/test/org/apache/solr/highlight/FastVectorHighlighterTest.java b/solr/core/src/test/org/apache/solr/highlight/FastVectorHighlighterTest.java index 88459cd7865..7b4fe383f0c 100644 --- a/solr/core/src/test/org/apache/solr/highlight/FastVectorHighlighterTest.java +++ b/solr/core/src/test/org/apache/solr/highlight/FastVectorHighlighterTest.java @@ -34,7 +34,7 @@ public class FastVectorHighlighterTest extends SolrTestCaseJ4 { @Test public void testConfig(){ - SolrHighlighter highlighter = HighlightComponent.getHighlighter(h.getCore()); + DefaultSolrHighlighter highlighter = (DefaultSolrHighlighter) HighlightComponent.getHighlighter(h.getCore()); // Make sure we loaded one fragListBuilder SolrFragListBuilder solrFlbNull = highlighter.fragListBuilders.get( null ); diff --git a/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java b/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java index ccea08a738c..7861a5c5922 100755 --- a/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java +++ b/solr/core/src/test/org/apache/solr/highlight/HighlighterTest.java @@ -64,7 +64,7 @@ public class HighlighterTest extends SolrTestCaseJ4 { @Test public void testConfig() { - SolrHighlighter highlighter = HighlightComponent.getHighlighter(h.getCore()); + DefaultSolrHighlighter highlighter = (DefaultSolrHighlighter) HighlightComponent.getHighlighter(h.getCore()); // Make sure we loaded the one formatter SolrFormatter fmt1 = highlighter.formatters.get( null ); diff --git a/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java b/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java index d65042dbf46..0ac39439cae 100644 --- a/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java +++ b/solr/core/src/test/org/apache/solr/highlight/TestPostingsSolrHighlighter.java @@ -71,6 +71,14 @@ public class TestPostingsSolrHighlighter extends SolrTestCaseJ4 { "count(//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/*)=0"); } + public void testDefaultSummary() { + assertQ("null snippet test", + req("q", "text:one OR *:*", "sort", "id asc", "hl", "true", "hl.defaultSummary", "true"), + "count(//lst[@name='highlighting']/*)=2", + "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='document one'", + "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second document'"); + } + public void testDifferentField() { assertQ("highlighting text3", req("q", "text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text3"), @@ -100,4 +108,22 @@ public class TestPostingsSolrHighlighter extends SolrTestCaseJ4 { } resetExceptionIgnores(); } + + public void testTags() { + assertQ("different pre/post tags", + req("q", "text:document", "sort", "id asc", "hl", "true", "hl.tag.pre", "[", "hl.tag.post", "]"), + "count(//lst[@name='highlighting']/*)=2", + "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='[document] one'", + "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second [document]'"); + } + + public void testTagsPerField() { + assertQ("highlighting text and text3", + req("q", "text:document text3:document", "sort", "id asc", "hl", "true", "hl.fl", "text,text3", "f.text3.hl.tag.pre", "[", "f.text3.hl.tag.post", "]"), + "count(//lst[@name='highlighting']/*)=2", + "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='document one'", + "//lst[@name='highlighting']/lst[@name='101']/arr[@name='text3']/str='crappy [document]'", + "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='second document'", + "//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier [document]'"); + } } diff --git a/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java b/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java index 1922112c1a7..717fb85ddf4 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/HighlightParams.java @@ -42,6 +42,7 @@ public interface HighlightParams { public static final String BS_LANGUAGE = HIGHLIGHT+".bs.language"; public static final String BS_COUNTRY = HIGHLIGHT+".bs.country"; public static final String FIELD_MATCH = HIGHLIGHT+".requireFieldMatch"; + public static final String DEFAULT_SUMMARY = HIGHLIGHT + ".defaultSummary"; public static final String ALTERNATE_FIELD = HIGHLIGHT+".alternateField"; public static final String ALTERNATE_FIELD_LENGTH = HIGHLIGHT+".maxAlternateFieldLength"; public static final String MAX_MULTIVALUED_TO_EXAMINE = HIGHLIGHT + ".maxMultiValuedToExamine"; @@ -55,6 +56,7 @@ public interface HighlightParams { public static final String USE_FVH = HIGHLIGHT + ".useFastVectorHighlighter"; public static final String TAG_PRE = HIGHLIGHT + ".tag.pre"; public static final String TAG_POST = HIGHLIGHT + ".tag.post"; + public static final String TAG_ELLIPSIS = HIGHLIGHT + ".tag.ellipsis"; public static final String PHRASE_LIMIT = HIGHLIGHT + ".phraseLimit"; public static final String MULTI_VALUED_SEPARATOR = HIGHLIGHT + ".multiValuedSeparatorChar"; @@ -68,4 +70,10 @@ public interface HighlightParams { public static final String SLOP = HIGHLIGHT+"."+REGEX+".slop"; public static final String PATTERN = HIGHLIGHT+"."+REGEX+".pattern"; public static final String MAX_RE_CHARS = HIGHLIGHT+"."+REGEX+".maxAnalyzedChars"; + + // Scoring parameters + public static final String SCORE = "score"; + public static final String SCORE_K1 = HIGHLIGHT +"."+SCORE+".k1"; + public static final String SCORE_B = HIGHLIGHT +"."+SCORE+".b"; + public static final String SCORE_PIVOT = HIGHLIGHT +"."+SCORE+".pivot"; }