mirror of https://github.com/apache/lucene.git
SOLR-9935: Add hl.fragsize support when using the UnifiedHighlighter
This commit is contained in:
parent
ea49989524
commit
570880d3ac
|
@ -109,7 +109,7 @@ Upgrade Notes
|
|||
|
||||
* SOLR-9708: You are encouraged to try out the UnifiedHighlighter by setting hl.method=unified and report feedback. It
|
||||
might become the default in 7.0. It's more efficient/faster than the other highlighters, especially compared to the
|
||||
original Highlighter. That said, some options aren't supported yet, notably hl.fragsize.
|
||||
original Highlighter. That said, some options aren't supported yet.
|
||||
It will get more features in time, especially with your input. See HighlightParams.java
|
||||
for a listing of highlight parameters annotated with which highlighters use them.
|
||||
hl.useFastVectorHighlighter is now considered deprecated in lieu of hl.method=fastVector.
|
||||
|
@ -225,6 +225,9 @@ New Features
|
|||
* SOLR-7466: Enable leading wildcard in complexphrase query parser, optimize it with ReversedWildcardFilterFactory
|
||||
when it's provided (Mikhail Khludnev)
|
||||
|
||||
* SOLR-9935: Add hl.fragsize support when using the UnifiedHighlighter to avoid snippets/Passages that are too small.
|
||||
Defaults to 70. (David Smiley)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.postingshighlight.WholeBreakIterator;
|
||||
import org.apache.lucene.search.uhighlight.DefaultPassageFormatter;
|
||||
import org.apache.lucene.search.uhighlight.LengthGoalBreakIterator;
|
||||
import org.apache.lucene.search.uhighlight.PassageFormatter;
|
||||
import org.apache.lucene.search.uhighlight.PassageScorer;
|
||||
import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
|
||||
|
@ -299,7 +300,16 @@ public class UnifiedSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
String variant = params.getFieldParam(field, HighlightParams.BS_VARIANT);
|
||||
Locale locale = parseLocale(language, country, variant);
|
||||
String type = params.getFieldParam(field, HighlightParams.BS_TYPE);
|
||||
return parseBreakIterator(type, locale);
|
||||
BreakIterator baseBI = parseBreakIterator(type, locale);
|
||||
|
||||
// Use a default fragsize the same as the regex Fragmenter (original Highlighter) since we're
|
||||
// both likely shooting for sentence-like patterns.
|
||||
int fragsize = params.getFieldInt(field, HighlightParams.FRAGSIZE, LuceneRegexFragmenter.DEFAULT_FRAGMENT_SIZE);
|
||||
if (fragsize <= 1 || baseBI instanceof WholeBreakIterator) { // no real minimum size
|
||||
return baseBI;
|
||||
}
|
||||
return LengthGoalBreakIterator.createMinLength(baseBI, fragsize);
|
||||
// TODO option for using createClosestToLength()
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -78,7 +78,8 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
|
|||
"text2", "document one", "text3", "crappy document", "id", "101"));
|
||||
assertU(commit());
|
||||
assertQ("multiple snippets test",
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.snippets", "2", "hl.bs.type", "SENTENCE"),
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.snippets", "2", "hl.bs.type", "SENTENCE",
|
||||
"hl.fragsize", "0"),
|
||||
"count(//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr/str[1]='<em>Document</em> snippet one. '",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr/str[2]='<em>Document</em> snippet two.'");
|
||||
|
@ -202,22 +203,35 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
|
|||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text3']/str='crappier [document]'");
|
||||
}
|
||||
|
||||
public void testBreakIterator() {
|
||||
public void testBreakIteratorWord() {
|
||||
assertQ("different breakiterator",
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WORD"),
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WORD", "hl.fragsize", "-1"),
|
||||
"count(//lst[@name='highlighting']/*)=2",
|
||||
"//lst[@name='highlighting']/lst[@name='101']/arr[@name='text']/str='<em>document</em>'",
|
||||
"//lst[@name='highlighting']/lst[@name='102']/arr[@name='text']/str='<em>document</em>'");
|
||||
}
|
||||
|
||||
public void testBreakIterator2() {
|
||||
public void testBreakIteratorWhole() {
|
||||
assertU(adoc("text", "Document one has a first sentence. Document two has a second sentence.", "id", "103"));
|
||||
assertU(commit());
|
||||
assertQ("different breakiterator",
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WHOLE"),
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.bs.type", "WHOLE", "hl.fragsize", "-1"),
|
||||
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first sentence. <em>Document</em> two has a second sentence.'");
|
||||
}
|
||||
|
||||
public void testFragsize() {
|
||||
// test default is 70... so make a sentence that is a little less (closer to 70 than end of text)
|
||||
clearIndex();
|
||||
assertU(adoc("id", "10", "text", "This is a sentence just under seventy chars in length blah blah. Next sentence is here."));
|
||||
assertU(commit());
|
||||
assertQ("default fragsize",
|
||||
req("q", "text:seventy", "hl", "true"),
|
||||
"//lst[@name='highlighting']/lst[@name='10']/arr[@name='text']/str='This is a sentence just under <em>seventy</em> chars in length blah blah. Next sentence is here.'");
|
||||
assertQ("smaller fragsize",
|
||||
req("q", "text:seventy", "hl", "true", "hl.fragsize", "60"), // a bit smaller
|
||||
"//lst[@name='highlighting']/lst[@name='10']/arr[@name='text']/str='This is a sentence just under <em>seventy</em> chars in length blah blah. '");
|
||||
}
|
||||
|
||||
public void testEncoder() {
|
||||
assertU(adoc("text", "Document one has a first <i>sentence</i>.", "id", "103"));
|
||||
assertU(commit());
|
||||
|
|
|
@ -49,7 +49,7 @@ public interface HighlightParams {
|
|||
public static final String HIGHLIGHT_ALTERNATE = HIGHLIGHT+".highlightAlternate"; // OH, FVH
|
||||
|
||||
// sizing
|
||||
public static final String FRAGSIZE = HIGHLIGHT+".fragsize"; // OH, FVH
|
||||
public static final String FRAGSIZE = HIGHLIGHT+".fragsize"; // OH, FVH, UH
|
||||
public static final String FRAGMENTER = HIGHLIGHT+".fragmenter"; // OH
|
||||
public static final String INCREMENT = HIGHLIGHT+".increment"; // OH
|
||||
public static final String REGEX = "regex"; // OH
|
||||
|
|
Loading…
Reference in New Issue