mirror of https://github.com/apache/lucene.git
LUCENE-9431: UnifiedHighlighter WEIGHT_MATCHES is now true by default (#362)
Co-authored-by: Animesh Pandey <apanimesh061@gmail.com>
This commit is contained in:
parent
e3151d6c7d
commit
2719cf6630
|
@ -160,6 +160,9 @@ API Changes
|
|||
|
||||
* LUCENE-9325: Sort is now final, and the `setSort()` method has been removed (Alan Woodward)
|
||||
|
||||
* LUCENE-9431: The UnifiedHighlighter's WEIGHT_MATCHES flag is now set by default, provided its
|
||||
requirements are met. It can be disabled via over-riding getFlags (Animesh Pandey, David Smiley)
|
||||
|
||||
* LUCENE-10158: Add a new interface Unwrappable to the utils package to allow code to
|
||||
unwrap wrappers/delegators that are added by Lucene's testing framework. This will allow
|
||||
testing new MMapDirectory implementation based on JDK Project Panama. (Uwe Schindler)
|
||||
|
|
|
@ -86,7 +86,7 @@ import org.apache.lucene.util.InPlaceMergeSorter;
|
|||
* <li>{@link #getFormatter(String)}: Customize how snippets are formatted.
|
||||
* </ul>
|
||||
*
|
||||
* <p>This is thread-safe.
|
||||
* <p>This is thread-safe, notwithstanding the setters.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
@ -823,6 +823,7 @@ public class UnifiedHighlighter {
|
|||
return filteredTerms.toArray(new BytesRef[filteredTerms.size()]);
|
||||
}
|
||||
|
||||
/** Customize the highlighting flags to use by field. */
|
||||
protected Set<HighlightFlag> getFlags(String field) {
|
||||
Set<HighlightFlag> highlightFlags = EnumSet.noneOf(HighlightFlag.class);
|
||||
if (shouldHandleMultiTermQuery(field)) {
|
||||
|
@ -834,6 +835,11 @@ public class UnifiedHighlighter {
|
|||
if (shouldPreferPassageRelevancyOverSpeed(field)) {
|
||||
highlightFlags.add(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED);
|
||||
}
|
||||
if (highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
|
||||
&& highlightFlags.contains(HighlightFlag.PHRASES)
|
||||
&& highlightFlags.contains(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED)) {
|
||||
highlightFlags.add(HighlightFlag.WEIGHT_MATCHES);
|
||||
}
|
||||
return highlightFlags;
|
||||
}
|
||||
|
||||
|
@ -1168,9 +1174,11 @@ public class UnifiedHighlighter {
|
|||
|
||||
/**
|
||||
* Internally use the {@link Weight#matches(LeafReaderContext, int)} API for highlighting. It's
|
||||
* more accurate to the query, though might not calculate passage relevancy as well. Use of this
|
||||
* flag requires {@link #MULTI_TERM_QUERY} and {@link #PHRASES}. {@link
|
||||
* #PASSAGE_RELEVANCY_OVER_SPEED} will be ignored. False by default.
|
||||
* more accurate to the query, and the snippets can be a little different for phrases because
|
||||
* the whole phrase is marked up instead of each word. The passage relevancy calculation can be
|
||||
* different (maybe worse?) and it's slower when highlighting many fields. Use of this flag
|
||||
* requires {@link #MULTI_TERM_QUERY} and {@link #PHRASES} and {@link
|
||||
* #PASSAGE_RELEVANCY_OVER_SPEED}. True by default because those booleans are true by default.
|
||||
*/
|
||||
WEIGHT_MATCHES
|
||||
|
||||
|
|
|
@ -460,6 +460,25 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
ir.close();
|
||||
}
|
||||
|
||||
public void testHighlighterDefaultFlags() throws Exception {
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
|
||||
Document document = new Document();
|
||||
document.add(new Field("body", "test body", fieldType));
|
||||
iw.addDocument(document);
|
||||
IndexReader ir = iw.getReader();
|
||||
iw.close();
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
|
||||
Set<HighlightFlag> flags = highlighter.getFlags("body");
|
||||
assertTrue(flags.contains(HighlightFlag.PHRASES));
|
||||
assertTrue(flags.contains(HighlightFlag.MULTI_TERM_QUERY));
|
||||
assertTrue(flags.contains(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED));
|
||||
assertTrue(flags.contains(HighlightFlag.WEIGHT_MATCHES));
|
||||
// if more flags are added, bump the number below and add an assertTrue or assertFalse above
|
||||
assertEquals(4, HighlightFlag.values().length);
|
||||
ir.close();
|
||||
}
|
||||
|
||||
public void testCuriousGeorge() throws Exception {
|
||||
String text =
|
||||
"It’s the formula for success for preschoolers—Curious George and fire trucks! "
|
||||
|
|
|
@ -778,10 +778,15 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
snippets = highlighter.highlight("body", query, topDocs);
|
||||
assertEquals(1, snippets.length);
|
||||
|
||||
// All flags are enabled.
|
||||
assertEquals(
|
||||
"<b>Test(body:te*)</b> a <b>one(body:*one*)</b> <b>sentence(body:zentence~~2)</b> document.",
|
||||
"" + highlighter.getFlags("body"),
|
||||
HighlightFlag.values().length,
|
||||
highlighter.getFlags("body").size());
|
||||
assertEquals(
|
||||
"" + highlighter.getFlags("title"),
|
||||
"<b>Test(body:te*)</b> a <b>one(body:*one*)</b> <b>sentence(sentence)</b> document.",
|
||||
snippets[0]);
|
||||
|
||||
ir.close();
|
||||
}
|
||||
|
||||
|
|
|
@ -27,7 +27,6 @@ import java.util.Collections;
|
|||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
@ -87,47 +86,10 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
|||
|
||||
static UnifiedHighlighter randomUnifiedHighlighter(
|
||||
IndexSearcher searcher, Analyzer indexAnalyzer) {
|
||||
return randomUnifiedHighlighter(
|
||||
return TestUnifiedHighlighter.randomUnifiedHighlighter(
|
||||
searcher, indexAnalyzer, EnumSet.noneOf(HighlightFlag.class), null);
|
||||
}
|
||||
|
||||
static UnifiedHighlighter randomUnifiedHighlighter(
|
||||
IndexSearcher searcher,
|
||||
Analyzer indexAnalyzer,
|
||||
EnumSet<HighlightFlag> mandatoryFlags,
|
||||
Boolean requireFieldMatch) {
|
||||
final UnifiedHighlighter uh =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
Set<HighlightFlag> flags; // consistently random set of flags for this test run
|
||||
|
||||
@Override
|
||||
protected Set<HighlightFlag> getFlags(String field) {
|
||||
if (flags != null) {
|
||||
return flags;
|
||||
}
|
||||
final EnumSet<HighlightFlag> result = EnumSet.copyOf(mandatoryFlags);
|
||||
int r = random().nextInt();
|
||||
for (HighlightFlag highlightFlag : HighlightFlag.values()) {
|
||||
if (((1 << highlightFlag.ordinal()) & r) == 0) {
|
||||
result.add(highlightFlag);
|
||||
}
|
||||
}
|
||||
if (result.contains(HighlightFlag.WEIGHT_MATCHES)) {
|
||||
// these two are required for WEIGHT_MATCHES
|
||||
result.add(HighlightFlag.MULTI_TERM_QUERY);
|
||||
result.add(HighlightFlag.PHRASES);
|
||||
}
|
||||
return flags = result;
|
||||
}
|
||||
};
|
||||
uh.setCacheFieldValCharsThreshold(random().nextInt(100));
|
||||
if (requireFieldMatch == Boolean.FALSE
|
||||
|| (requireFieldMatch == null && random().nextBoolean())) {
|
||||
uh.setFieldMatcher(f -> true); // requireFieldMatch==false
|
||||
}
|
||||
return uh;
|
||||
}
|
||||
|
||||
//
|
||||
// Tests below were ported from the PostingsHighlighter. Possibly augmented. Far below are newer
|
||||
// tests.
|
||||
|
@ -1033,15 +995,15 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
|||
assertEquals(1, topDocs.totalHits.value);
|
||||
String[] snippets = highlighter.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
if (highlighter.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) {
|
||||
assertEquals(
|
||||
"" + highlighter.getFlags("title"), "<b>This is the title field</b>.", snippets[0]);
|
||||
} else {
|
||||
assertEquals(
|
||||
"" + highlighter.getFlags("title"),
|
||||
"<b>This</b> <b>is</b> <b>the</b> title <b>field</b>.",
|
||||
snippets[0]);
|
||||
}
|
||||
// All flags are enabled.
|
||||
assertEquals(
|
||||
"" + highlighter.getFlags("title"),
|
||||
HighlightFlag.values().length,
|
||||
highlighter.getFlags("title").size());
|
||||
assertEquals(
|
||||
"" + highlighter.getFlags("title"),
|
||||
"<b>This</b> <b>is</b> <b>the</b> title <b>field</b>.",
|
||||
snippets[0]);
|
||||
ir.close();
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue