LUCENE-10197: UnifiedHighlighter now has a Builder (#412)

Setters (mutable state) on the UH is deprecated.

Co-authored-by: David Smiley <dsmiley@apache.org>
This commit is contained in:
Animesh Pandey 2021-12-19 12:38:23 -05:00 committed by GitHub
parent 5512786dd9
commit b1b1a890d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 796 additions and 391 deletions

View File

@ -41,6 +41,9 @@ API Changes
* LUCENE-10244: MultiCollector::getCollectors is now public, allowing users to access the wrapped
collectors. (Andriy Redko)
* LUCENE-10197: UnifiedHighlighter now has a Builder to construct it. The UH's setters are now
deprecated. (Animesh Pandey, David Smiley)
New Features
---------------------

View File

@ -288,17 +288,19 @@ public class SearchTravRetHighlightTask extends SearchTravTask {
return;
}
lastSearcher = searcher;
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, analyzer)
.withBreakIterator(() -> BreakIterator.getSentenceInstance(Locale.ENGLISH))
.withMaxLength(maxDocCharsToAnalyze)
.withHighlightPhrasesStrictly(true)
.withHandleMultiTermQuery(true);
highlighter =
new UnifiedHighlighter(searcher, analyzer) {
new UnifiedHighlighter(uhBuilder) {
@Override
protected OffsetSource getOffsetSource(String field) {
return offsetSource != null ? offsetSource : super.getOffsetSource(field);
}
};
highlighter.setBreakIterator(() -> BreakIterator.getSentenceInstance(Locale.ENGLISH));
highlighter.setMaxLength(maxDocCharsToAnalyze);
highlighter.setHighlightPhrasesStrictly(true);
highlighter.setHandleMultiTermQuery(true);
}
@Override

View File

@ -53,7 +53,6 @@ import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreDoc;
@ -113,43 +112,52 @@ public class UnifiedHighlighter {
protected static final LabelledCharArrayMatcher[] ZERO_LEN_AUTOMATA_ARRAY =
new LabelledCharArrayMatcher[0];
// All the private defaults will be removed once non-builder based UH is removed.
private static final boolean DEFAULT_ENABLE_MULTI_TERM_QUERY = true;
private static final boolean DEFAULT_ENABLE_HIGHLIGHT_PHRASES_STRICTLY = true;
private static final boolean DEFAULT_ENABLE_WEIGHT_MATCHES = true;
private static final boolean DEFAULT_ENABLE_RELEVANCY_OVER_SPEED = true;
private static final Supplier<BreakIterator> DEFAULT_BREAK_ITERATOR =
() -> BreakIterator.getSentenceInstance(Locale.ROOT);
private static final PassageScorer DEFAULT_PASSAGE_SCORER = new PassageScorer();
private static final PassageFormatter DEFAULT_PASSAGE_FORMATTER = new DefaultPassageFormatter();
private static final int DEFAULT_MAX_HIGHLIGHT_PASSAGES = -1;
protected final IndexSearcher searcher; // if null, can only use highlightWithoutSearcher
protected final Analyzer indexAnalyzer;
private boolean defaultHandleMtq = true; // e.g. wildcards
// lazy initialized with double-check locking; protected so subclass can init
protected volatile FieldInfos fieldInfos;
private boolean defaultHighlightPhrasesStrictly = true; // AKA "accuracy" or "query debugging"
private Predicate<String> fieldMatcher;
private Set<HighlightFlag> flags;
// e.g. wildcards
private boolean handleMultiTermQuery = DEFAULT_ENABLE_MULTI_TERM_QUERY;
// AKA "accuracy" or "query debugging"
private boolean highlightPhrasesStrictly = DEFAULT_ENABLE_HIGHLIGHT_PHRASES_STRICTLY;
private boolean weightMatches = DEFAULT_ENABLE_WEIGHT_MATCHES;
// For analysis, prefer MemoryIndexOffsetStrategy
private boolean defaultPassageRelevancyOverSpeed = true;
private boolean passageRelevancyOverSpeed = DEFAULT_ENABLE_RELEVANCY_OVER_SPEED;
private int maxLength = DEFAULT_MAX_LENGTH;
// BreakIterator is stateful so we use a Supplier factory method
private Supplier<BreakIterator> defaultBreakIterator =
() -> BreakIterator.getSentenceInstance(Locale.ROOT);
private Supplier<BreakIterator> breakIterator = DEFAULT_BREAK_ITERATOR;
private Predicate<String> defaultFieldMatcher;
private PassageScorer scorer = DEFAULT_PASSAGE_SCORER;
private PassageScorer defaultScorer = new PassageScorer();
private PassageFormatter formatter = DEFAULT_PASSAGE_FORMATTER;
private PassageFormatter defaultFormatter = new DefaultPassageFormatter();
private int defaultMaxNoHighlightPassages = -1;
// lazy initialized with double-check locking; protected so subclass can init
protected volatile FieldInfos fieldInfos;
private int maxNoHighlightPassages = DEFAULT_MAX_HIGHLIGHT_PASSAGES;
private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD;
/** Extracts matching terms after rewriting against an empty index */
protected static Set<Term> extractTerms(Query query) throws IOException {
Set<Term> queryTerms = new HashSet<>();
EMPTY_INDEXSEARCHER.rewrite(query).visit(QueryVisitor.termCollector(queryTerms));
return queryTerms;
}
/**
* Constructs the highlighter with the given index searcher and analyzer.
*
@ -157,6 +165,7 @@ public class UnifiedHighlighter {
* String, int)} is used, in which case this needs to be null.
* @param indexAnalyzer Required, even if in some circumstances it isn't used.
*/
@Deprecated
public UnifiedHighlighter(IndexSearcher indexSearcher, Analyzer indexAnalyzer) {
this.searcher = indexSearcher; // TODO: make non nullable
this.indexAnalyzer =
@ -165,14 +174,22 @@ public class UnifiedHighlighter {
"indexAnalyzer is required" + " (even if in some circumstances it isn't used)");
}
@Deprecated
public void setHandleMultiTermQuery(boolean handleMtq) {
this.defaultHandleMtq = handleMtq;
this.handleMultiTermQuery = handleMtq;
}
@Deprecated
public void setHighlightPhrasesStrictly(boolean highlightPhrasesStrictly) {
this.defaultHighlightPhrasesStrictly = highlightPhrasesStrictly;
this.highlightPhrasesStrictly = highlightPhrasesStrictly;
}
@Deprecated
public void setPassageRelevancyOverSpeed(boolean passageRelevancyOverSpeed) {
this.passageRelevancyOverSpeed = passageRelevancyOverSpeed;
}
@Deprecated
public void setMaxLength(int maxLength) {
if (maxLength < 0 || maxLength == Integer.MAX_VALUE) {
// two reasons: no overflow problems in BreakIterator.preceding(offset+1),
@ -182,36 +199,49 @@ public class UnifiedHighlighter {
this.maxLength = maxLength;
}
@Deprecated
public void setBreakIterator(Supplier<BreakIterator> breakIterator) {
this.defaultBreakIterator = breakIterator;
this.breakIterator = breakIterator;
}
@Deprecated
public void setScorer(PassageScorer scorer) {
this.defaultScorer = scorer;
this.scorer = scorer;
}
@Deprecated
public void setFormatter(PassageFormatter formatter) {
this.defaultFormatter = formatter;
this.formatter = formatter;
}
@Deprecated
public void setMaxNoHighlightPassages(int defaultMaxNoHighlightPassages) {
this.defaultMaxNoHighlightPassages = defaultMaxNoHighlightPassages;
this.maxNoHighlightPassages = defaultMaxNoHighlightPassages;
}
@Deprecated
public void setCacheFieldValCharsThreshold(int cacheFieldValCharsThreshold) {
this.cacheFieldValCharsThreshold = cacheFieldValCharsThreshold;
}
@Deprecated
public void setFieldMatcher(Predicate<String> predicate) {
this.defaultFieldMatcher = predicate;
this.fieldMatcher = predicate;
}
@Deprecated
public void setWeightMatches(boolean weightMatches) {
this.weightMatches = weightMatches;
}
/**
* Returns whether {@link MultiTermQuery} derivatives will be highlighted. By default it's
* enabled. MTQ highlighting can be expensive, particularly when using offsets in postings.
* Returns whether {@link org.apache.lucene.search.MultiTermQuery} derivatives will be
* highlighted. By default it's enabled. MTQ highlighting can be expensive, particularly when
* using offsets in postings.
*/
@Deprecated
protected boolean shouldHandleMultiTermQuery(String field) {
return defaultHandleMtq;
return handleMultiTermQuery;
}
/**
@ -219,12 +249,295 @@ public class UnifiedHighlighter {
* highlighted strictly based on query matches (slower) versus any/all occurrences of the
* underlying terms. By default it's enabled, but there's no overhead if such queries aren't used.
*/
@Deprecated
protected boolean shouldHighlightPhrasesStrictly(String field) {
return defaultHighlightPhrasesStrictly;
return highlightPhrasesStrictly;
}
@Deprecated
protected boolean shouldPreferPassageRelevancyOverSpeed(String field) {
return defaultPassageRelevancyOverSpeed;
return passageRelevancyOverSpeed;
}
/** Builder for UnifiedHighlighter. */
public static class Builder {
/** If null, can only use highlightWithoutSearcher. */
private final IndexSearcher searcher;
private final Analyzer indexAnalyzer;
private Predicate<String> fieldMatcher;
private Set<HighlightFlag> flags;
private boolean handleMultiTermQuery = DEFAULT_ENABLE_MULTI_TERM_QUERY;
private boolean highlightPhrasesStrictly = DEFAULT_ENABLE_HIGHLIGHT_PHRASES_STRICTLY;
private boolean passageRelevancyOverSpeed = DEFAULT_ENABLE_RELEVANCY_OVER_SPEED;
private boolean weightMatches = DEFAULT_ENABLE_WEIGHT_MATCHES;
private int maxLength = DEFAULT_MAX_LENGTH;
/** BreakIterator is stateful so we use a Supplier factory method. */
private Supplier<BreakIterator> breakIterator = DEFAULT_BREAK_ITERATOR;
private PassageScorer scorer = DEFAULT_PASSAGE_SCORER;
private PassageFormatter formatter = DEFAULT_PASSAGE_FORMATTER;
private int maxNoHighlightPassages = DEFAULT_MAX_HIGHLIGHT_PASSAGES;
private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD;
/**
* Constructor for UH builder which accepts {@link IndexSearcher} and {@link Analyzer} objects.
* {@link IndexSearcher} object can only be null when {@link #highlightWithoutSearcher(String,
* Query, String, int)} is used.
*
* @param searcher - {@link IndexSearcher}
* @param indexAnalyzer - {@link Analyzer}
*/
public Builder(IndexSearcher searcher, Analyzer indexAnalyzer) {
this.searcher = searcher;
this.indexAnalyzer = indexAnalyzer;
}
/**
* User-defined set of {@link HighlightFlag} values which will override the flags set by {@link
* #withHandleMultiTermQuery(boolean)}, {@link #withHighlightPhrasesStrictly(boolean)}, {@link
* #withPassageRelevancyOverSpeed(boolean)} and {@link #withWeightMatches(boolean)}.
*
* <p>Here the user can either specify the set of {@link HighlightFlag}s to be applied or use
* the boolean flags to populate final list of {@link HighlightFlag}s.
*
* @param values - set of {@link HighlightFlag} values.
*/
public Builder withFlags(Set<HighlightFlag> values) {
this.flags = values;
return this;
}
/**
* Here position sensitive queries (e.g. phrases and {@link SpanQuery}ies) are highlighted
* strictly based on query matches (slower) versus any/all occurrences of the underlying terms.
* By default it's enabled, but there's no overhead if such queries aren't used.
*/
public Builder withHighlightPhrasesStrictly(boolean value) {
this.highlightPhrasesStrictly = value;
return this;
}
/**
* Here {@link org.apache.lucene.search.MultiTermQuery} derivatives will be highlighted. By
* default it's enabled. MTQ highlighting can be expensive, particularly when using offsets in
* postings.
*/
public Builder withHandleMultiTermQuery(boolean value) {
this.handleMultiTermQuery = value;
return this;
}
/** Passage relevancy is more important than speed. True by default. */
public Builder withPassageRelevancyOverSpeed(boolean value) {
this.passageRelevancyOverSpeed = value;
return this;
}
/**
* Internally use the {@link Weight#matches(LeafReaderContext, int)} API for highlighting. It's
* more accurate to the query, and the snippets can be a little different for phrases because
* the whole phrase is marked up instead of each word. The passage relevancy calculation can be
* different (maybe worse?) and it's slower when highlighting many fields. Use of this flag
* requires {@link HighlightFlag#MULTI_TERM_QUERY} and {@link HighlightFlag#PHRASES} and {@link
* HighlightFlag#PASSAGE_RELEVANCY_OVER_SPEED}. True by default because those booleans are true
* by default.
*/
public Builder withWeightMatches(boolean value) {
this.weightMatches = value;
return this;
}
/** The text to be highlight is effectively truncated by this length. */
public Builder withMaxLength(int value) {
if (value < 0 || value == Integer.MAX_VALUE) {
// two reasons: no overflow problems in BreakIterator.preceding(offset+1),
// our sentinel in the offsets queue uses this value to terminate.
throw new IllegalArgumentException("maxLength must be < Integer.MAX_VALUE");
}
this.maxLength = value;
return this;
}
public Builder withBreakIterator(Supplier<BreakIterator> value) {
this.breakIterator = value;
return this;
}
public Builder withFieldMatcher(Predicate<String> value) {
this.fieldMatcher = value;
return this;
}
public Builder withScorer(PassageScorer value) {
this.scorer = value;
return this;
}
public Builder withFormatter(PassageFormatter value) {
this.formatter = value;
return this;
}
public Builder withMaxNoHighlightPassages(int value) {
this.maxNoHighlightPassages = value;
return this;
}
public Builder withCacheFieldValCharsThreshold(int value) {
this.cacheFieldValCharsThreshold = value;
return this;
}
public UnifiedHighlighter build() {
return new UnifiedHighlighter(this);
}
/** ... as passed in from the Builder constructor. */
public IndexSearcher getIndexSearcher() {
return searcher;
}
/** ... as passed in from the Builder constructor. */
public Analyzer getIndexAnalyzer() {
return indexAnalyzer;
}
public Set<HighlightFlag> getFlags() {
return flags;
}
}
/**
* Creates a {@link Builder} object where {@link IndexSearcher} and {@link Analyzer} are not null.
*
* @param searcher - a {@link IndexSearcher} object.
* @param indexAnalyzer - a {@link Analyzer} object.
* @return a {@link Builder} object
*/
public static Builder builder(IndexSearcher searcher, Analyzer indexAnalyzer) {
return new Builder(searcher, indexAnalyzer);
}
/**
* Creates a {@link Builder} object in which you can only use {@link
* UnifiedHighlighter#highlightWithoutSearcher(String, Query, String, int)} for highlighting.
*
* @param indexAnalyzer - a {@link Analyzer} object.
* @return a {@link Builder} object
*/
public static Builder builderWithoutSearcher(Analyzer indexAnalyzer) {
return new Builder(null, indexAnalyzer);
}
/**
* Constructs the highlighter with the given {@link Builder}.
*
* @param builder - a {@link Builder} object.
*/
public UnifiedHighlighter(Builder builder) {
this.searcher = builder.searcher;
this.indexAnalyzer =
Objects.requireNonNull(
builder.indexAnalyzer,
"indexAnalyzer is required (even if in some circumstances it isn't used)");
this.flags = evaluateFlags(builder);
this.maxLength = builder.maxLength;
this.breakIterator = builder.breakIterator;
this.fieldMatcher = builder.fieldMatcher;
this.scorer = builder.scorer;
this.formatter = builder.formatter;
this.maxNoHighlightPassages = builder.maxNoHighlightPassages;
this.cacheFieldValCharsThreshold = builder.cacheFieldValCharsThreshold;
}
/** Extracts matching terms after rewriting against an empty index */
protected static Set<Term> extractTerms(Query query) throws IOException {
Set<Term> queryTerms = new HashSet<>();
EMPTY_INDEXSEARCHER.rewrite(query).visit(QueryVisitor.termCollector(queryTerms));
return queryTerms;
}
/**
* This method returns the set of of {@link HighlightFlag}s, which will be applied to the UH
* object. The output depends on the values provided to {@link
* Builder#withHandleMultiTermQuery(boolean)}, {@link
* Builder#withHighlightPhrasesStrictly(boolean)}, {@link
* Builder#withPassageRelevancyOverSpeed(boolean)} and {@link Builder#withWeightMatches(boolean)}
* OR {@link #setHandleMultiTermQuery(boolean)}, {@link #setHighlightPhrasesStrictly(boolean)},
* {@link #setPassageRelevancyOverSpeed(boolean)} and {@link #setWeightMatches(boolean)}
*
* @param shouldHandleMultiTermQuery - flag for adding Multi-term query
* @param shouldHighlightPhrasesStrictly - flag for adding phrase highlighting
* @param shouldPassageRelevancyOverSpeed - flag for adding passage relevancy
* @param shouldEnableWeightMatches - flag for enabling weight matches
* @return a set of {@link HighlightFlag}s.
*/
protected Set<HighlightFlag> evaluateFlags(
final boolean shouldHandleMultiTermQuery,
final boolean shouldHighlightPhrasesStrictly,
final boolean shouldPassageRelevancyOverSpeed,
final boolean shouldEnableWeightMatches) {
Set<HighlightFlag> highlightFlags = EnumSet.noneOf(HighlightFlag.class);
if (shouldHandleMultiTermQuery) {
highlightFlags.add(HighlightFlag.MULTI_TERM_QUERY);
}
if (shouldHighlightPhrasesStrictly) {
highlightFlags.add(HighlightFlag.PHRASES);
}
if (shouldPassageRelevancyOverSpeed) {
highlightFlags.add(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED);
}
// Evaluate if WEIGHT_MATCHES can be added as a flag.
final boolean applyWeightMatches =
highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
&& highlightFlags.contains(HighlightFlag.PHRASES)
&& highlightFlags.contains(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED)
// User can also opt-out of WEIGHT_MATCHES.
&& shouldEnableWeightMatches;
if (applyWeightMatches) {
highlightFlags.add(HighlightFlag.WEIGHT_MATCHES);
}
return highlightFlags;
}
/**
* Evaluate the highlight flags and set the {@link #flags} variable. This is called only once when
* the Builder object is used to create a UH object.
*
* @param uhBuilder - {@link Builder} object.
* @return {@link HighlightFlag}s.
*/
protected Set<HighlightFlag> evaluateFlags(Builder uhBuilder) {
if (flags != null) {
return flags;
}
return flags =
evaluateFlags(
uhBuilder.handleMultiTermQuery,
uhBuilder.highlightPhrasesStrictly,
uhBuilder.passageRelevancyOverSpeed,
uhBuilder.weightMatches);
}
/**
* Evaluate the highlight flags and set the {@link #flags} variable. This is called every time
* {@link #getFlags(String)} method is called. This is used in the builder and has been marked
* deprecated since it is used only for the mutable initialization of a UH object.
*
* @param uh - {@link UnifiedHighlighter} object.
* @return {@link HighlightFlag}s.
*/
@Deprecated
protected Set<HighlightFlag> evaluateFlags(UnifiedHighlighter uh) {
return evaluateFlags(
uh.handleMultiTermQuery,
uh.highlightPhrasesStrictly,
uh.passageRelevancyOverSpeed,
uh.weightMatches);
}
/**
@ -232,14 +545,25 @@ public class UnifiedHighlighter {
* only queries that target the current field are kept. (AKA requireFieldMatch)
*/
protected Predicate<String> getFieldMatcher(String field) {
if (defaultFieldMatcher != null) {
return defaultFieldMatcher;
if (fieldMatcher != null) {
return fieldMatcher;
} else {
// requireFieldMatch = true
return (qf) -> field.equals(qf);
}
}
/** Returns the {@link HighlightFlag}s applicable for the current UH instance. */
protected Set<HighlightFlag> getFlags(String field) {
// If a builder is used for initializing a UH object, then flags will never be null.
// Once the setters are removed, this method can just return the flags.
if (flags != null) {
return flags;
}
// When not using builder, you have to reevaluate the flags.
return evaluateFlags(this);
}
/**
* The maximum content size to process. Content will be truncated to this size before
* highlighting. Typically snippets closer to the beginning of the document better summarize its
@ -258,7 +582,7 @@ public class UnifiedHighlighter {
* preceding} performs poorly.
*/
protected BreakIterator getBreakIterator(String field) {
return defaultBreakIterator.get();
return breakIterator.get();
}
/**
@ -266,7 +590,7 @@ public class UnifiedHighlighter {
* PassageScorer} by default; subclasses can override to customize.
*/
protected PassageScorer getScorer(String field) {
return defaultScorer;
return scorer;
}
/**
@ -274,7 +598,7 @@ public class UnifiedHighlighter {
* This returns a new {@code PassageFormatter} by default; subclasses can override to customize.
*/
protected PassageFormatter getFormatter(String field) {
return defaultFormatter;
return formatter;
}
/**
@ -284,7 +608,7 @@ public class UnifiedHighlighter {
* null (not formatted).
*/
protected int getMaxNoHighlightPassages(String field) {
return defaultMaxNoHighlightPassages;
return maxNoHighlightPassages;
}
/**
@ -823,26 +1147,6 @@ public class UnifiedHighlighter {
return filteredTerms.toArray(new BytesRef[filteredTerms.size()]);
}
/** Customize the highlighting flags to use by field. */
protected Set<HighlightFlag> getFlags(String field) {
Set<HighlightFlag> highlightFlags = EnumSet.noneOf(HighlightFlag.class);
if (shouldHandleMultiTermQuery(field)) {
highlightFlags.add(HighlightFlag.MULTI_TERM_QUERY);
}
if (shouldHighlightPhrasesStrictly(field)) {
highlightFlags.add(HighlightFlag.PHRASES);
}
if (shouldPreferPassageRelevancyOverSpeed(field)) {
highlightFlags.add(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED);
}
if (highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
&& highlightFlags.contains(HighlightFlag.PHRASES)
&& highlightFlags.contains(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED)) {
highlightFlags.add(HighlightFlag.WEIGHT_MATCHES);
}
return highlightFlags;
}
protected PhraseHelper getPhraseHelper(
String field, Query query, Set<HighlightFlag> highlightFlags) {
boolean useWeightMatchesIter = highlightFlags.contains(HighlightFlag.WEIGHT_MATCHES);
@ -1163,23 +1467,16 @@ public class UnifiedHighlighter {
/** Flags for controlling highlighting behavior. */
public enum HighlightFlag {
/** @see UnifiedHighlighter#setHighlightPhrasesStrictly(boolean) */
/** @see Builder#withHighlightPhrasesStrictly(boolean) */
PHRASES,
/** @see UnifiedHighlighter#setHandleMultiTermQuery(boolean) */
/** @see Builder#withHandleMultiTermQuery(boolean) */
MULTI_TERM_QUERY,
/** Passage relevancy is more important than speed. True by default. */
/** @see Builder#withPassageRelevancyOverSpeed(boolean) */
PASSAGE_RELEVANCY_OVER_SPEED,
/**
* Internally use the {@link Weight#matches(LeafReaderContext, int)} API for highlighting. It's
* more accurate to the query, and the snippets can be a little different for phrases because
* the whole phrase is marked up instead of each word. The passage relevancy calculation can be
* different (maybe worse?) and it's slower when highlighting many fields. Use of this flag
* requires {@link #MULTI_TERM_QUERY} and {@link #PHRASES} and {@link
* #PASSAGE_RELEVANCY_OVER_SPEED}. True by default because those booleans are true by default.
*/
/** @see Builder#withWeightMatches(boolean) */
WEIGHT_MATCHES
// TODO: useQueryBoosts

View File

@ -218,11 +218,13 @@ public class TestLengthGoalBreakIterator extends LuceneTestCase {
private String highlightClosestToLen(
String content, Query query, int lengthGoal, float fragAlign, int maxPassages, char separator)
throws IOException {
UnifiedHighlighter highlighter = new UnifiedHighlighter(null, analyzer);
highlighter.setBreakIterator(
() ->
LengthGoalBreakIterator.createClosestToLength(
new CustomSeparatorBreakIterator(separator), lengthGoal, fragAlign));
UnifiedHighlighter highlighter =
UnifiedHighlighter.builderWithoutSearcher(analyzer)
.withBreakIterator(
() ->
LengthGoalBreakIterator.createClosestToLength(
new CustomSeparatorBreakIterator(separator), lengthGoal, fragAlign))
.build();
return highlighter.highlightWithoutSearcher(FIELD, query, content, maxPassages).toString();
}
@ -235,11 +237,13 @@ public class TestLengthGoalBreakIterator extends LuceneTestCase {
String content, Query query, int lengthGoal, float fragAlign, char separator)
throws IOException {
// differs from above only by "createMinLength"
UnifiedHighlighter highlighter = new UnifiedHighlighter(null, analyzer);
highlighter.setBreakIterator(
() ->
LengthGoalBreakIterator.createMinLength(
new CustomSeparatorBreakIterator(separator), lengthGoal, fragAlign));
UnifiedHighlighter highlighter =
UnifiedHighlighter.builderWithoutSearcher(analyzer)
.withBreakIterator(
() ->
LengthGoalBreakIterator.createMinLength(
new CustomSeparatorBreakIterator(separator), lengthGoal, fragAlign))
.build();
return highlighter.highlightWithoutSearcher(FIELD, query, content, 1).toString();
}
}

View File

@ -27,6 +27,7 @@ import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Predicate;
import org.apache.lucene.analysis.Analyzer;
@ -90,6 +91,23 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
dir.close();
}
static Set<HighlightFlag> generateRandomHighlightFlags(EnumSet<HighlightFlag> requiredFlags) {
final EnumSet<HighlightFlag> result = EnumSet.copyOf(requiredFlags);
int r = random().nextInt();
for (HighlightFlag highlightFlag : HighlightFlag.values()) {
if (((1 << highlightFlag.ordinal()) & r) == 0) {
result.add(highlightFlag);
}
}
if (result.contains(HighlightFlag.WEIGHT_MATCHES)) {
// these two are required for WEIGHT_MATCHES
result.add(HighlightFlag.MULTI_TERM_QUERY);
result.add(HighlightFlag.PHRASES);
}
return result;
}
/** This randomized test method uses builder from the UH class. */
static UnifiedHighlighter randomUnifiedHighlighter(
IndexSearcher searcher, Analyzer indexAnalyzer) {
return randomUnifiedHighlighter(
@ -101,36 +119,54 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
Analyzer indexAnalyzer,
EnumSet<HighlightFlag> mandatoryFlags,
Boolean requireFieldMatch) {
final UnifiedHighlighter uh =
new UnifiedHighlighter(searcher, indexAnalyzer) {
Set<HighlightFlag> flags; // consistently random set of flags for this test run
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
return randomUnifiedHighlighter(uhBuilder, mandatoryFlags, requireFieldMatch);
}
@Override
protected Set<HighlightFlag> getFlags(String field) {
if (flags != null) {
return flags;
}
final EnumSet<HighlightFlag> result = EnumSet.copyOf(mandatoryFlags);
int r = random().nextInt();
for (HighlightFlag highlightFlag : HighlightFlag.values()) {
if (((1 << highlightFlag.ordinal()) & r) == 0) {
result.add(highlightFlag);
}
}
if (result.contains(HighlightFlag.WEIGHT_MATCHES)) {
// these two are required for WEIGHT_MATCHES
result.add(HighlightFlag.MULTI_TERM_QUERY);
result.add(HighlightFlag.PHRASES);
}
return flags = result;
}
};
uh.setCacheFieldValCharsThreshold(random().nextInt(100));
static UnifiedHighlighter randomUnifiedHighlighter(UnifiedHighlighter.Builder uhBuilder) {
return randomUnifiedHighlighter(uhBuilder, EnumSet.noneOf(HighlightFlag.class), null);
}
static UnifiedHighlighter randomUnifiedHighlighter(
UnifiedHighlighter.Builder uhBuilder,
EnumSet<HighlightFlag> mandatoryFlags,
Boolean requireFieldMatch) {
uhBuilder.withCacheFieldValCharsThreshold(random().nextInt(100));
if (requireFieldMatch == Boolean.FALSE
|| (requireFieldMatch == null && random().nextBoolean())) {
uh.setFieldMatcher(f -> true); // requireFieldMatch==false
uhBuilder.withFieldMatcher(f -> true); // requireFieldMatch==false
}
return uh;
return overriddenBuilderForTests(uhBuilder, mandatoryFlags).build();
}
static UnifiedHighlighter overrideFieldMatcherForTests(
UnifiedHighlighter original, Predicate<String> value, String fieldName) {
return UnifiedHighlighter.builder(original.getIndexSearcher(), original.getIndexAnalyzer())
.withFlags(original.getFlags(fieldName))
.withCacheFieldValCharsThreshold(original.getCacheFieldValCharsThreshold())
.withFieldMatcher(value)
.build();
}
static UnifiedHighlighter.Builder overriddenBuilderForTests(
UnifiedHighlighter.Builder uhBuilder, EnumSet<HighlightFlag> mandatoryFlags) {
return new UnifiedHighlighter.Builder(
uhBuilder.getIndexSearcher(), uhBuilder.getIndexAnalyzer()) {
Set<HighlightFlag> flags;
@Override
public UnifiedHighlighter build() {
return new UnifiedHighlighter(uhBuilder) {
@Override
protected Set<HighlightFlag> evaluateFlags(Builder uhBuilder) {
if (Objects.nonNull(flags)) {
return flags;
}
return flags = generateRandomHighlightFlags(mandatoryFlags);
}
};
}
};
}
//
@ -221,8 +257,9 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits.value);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setMaxLength(maxLength);
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer).withMaxLength(maxLength);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
String[] snippets = highlighter.highlight("body", query, topDocs);
ir.close();
@ -301,8 +338,10 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setMaxLength(value.length() * 2 + 1);
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
.withMaxLength(value.length() * 2 + 1);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
Query query = new TermQuery(new Term("body", "field"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits.value);
@ -446,8 +485,9 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
.build();
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits.value);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setHighlightPhrasesStrictly(false);
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer).withHighlightPhrasesStrictly(false);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
if (highlighter
@ -468,7 +508,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
IndexReader ir = iw.getReader();
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
UnifiedHighlighter highlighter = UnifiedHighlighter.builder(searcher, indexAnalyzer).build();
Set<HighlightFlag> flags = highlighter.getFlags("body");
assertTrue(flags.contains(HighlightFlag.PHRASES));
assertTrue(flags.contains(HighlightFlag.MULTI_TERM_QUERY));
@ -501,8 +541,9 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
.build();
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits.value);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setHighlightPhrasesStrictly(false);
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer).withHighlightPhrasesStrictly(false);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertFalse(snippets[0].contains("<b>Curious</b>Curious"));
@ -532,8 +573,10 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
.build();
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits.value);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setMaxLength(Integer.MAX_VALUE - 1);
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
.withMaxLength(Integer.MAX_VALUE - 1);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertTrue(snippets[0].contains("<b>Square</b>"));
@ -595,8 +638,10 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits.value);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setMaxLength(Integer.MAX_VALUE - 1);
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
.withMaxLength(Integer.MAX_VALUE - 1);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertFalse(snippets[0].contains("<b>both</b>"));
@ -618,14 +663,11 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter =
new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected BreakIterator getBreakIterator(String field) {
return new WholeBreakIterator();
}
};
highlighter.setMaxLength(10000);
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
.withBreakIterator(WholeBreakIterator::new)
.withMaxLength(10000);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits.value);
@ -690,24 +732,29 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
UnifiedHighlighter highlighter =
new UnifiedHighlighter(searcher, indexAnalyzer) {
new UnifiedHighlighter.Builder(searcher, indexAnalyzer) {
@Override
protected List<CharSequence[]> loadFieldValues(
String[] fields, DocIdSetIterator docIter, int cacheCharsThreshold)
throws IOException {
assert fields.length == 1;
assert docIter.cost() == 1;
docIter.nextDoc();
return Collections.singletonList(new CharSequence[] {text});
}
public UnifiedHighlighter build() {
return new UnifiedHighlighter(uhBuilder) {
@Override
protected List<CharSequence[]> loadFieldValues(
String[] fields, DocIdSetIterator docIter, int cacheCharsThreshold)
throws IOException {
assert fields.length == 1;
assert docIter.cost() == 1;
docIter.nextDoc();
return Collections.singletonList(new CharSequence[] {text});
}
@Override
protected BreakIterator getBreakIterator(String field) {
return new WholeBreakIterator();
@Override
protected BreakIterator getBreakIterator(String field) {
return new WholeBreakIterator();
}
};
}
};
}.build();
Query query = new TermQuery(new Term("body", "test"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits.value);
@ -803,8 +850,10 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setMaxNoHighlightPassages(0); // don't want any default summary
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
.withMaxNoHighlightPassages(0); // don't want any default summary
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
Query query = new TermQuery(new Term("body", "highlighting"));
int[] docIDs = new int[] {0};
String[] snippets =
@ -836,12 +885,9 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter =
new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected BreakIterator getBreakIterator(String field) {
return new WholeBreakIterator();
}
};
UnifiedHighlighter.builder(searcher, indexAnalyzer)
.withBreakIterator(WholeBreakIterator::new)
.build();
Query query = new TermQuery(new Term("body", "highlighting"));
int[] docIDs = new int[] {0};
String[] snippets =
@ -973,9 +1019,11 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setCacheFieldValCharsThreshold(
random().nextInt(10) * 10); // 0 thru 90 intervals of 10
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
.withCacheFieldValCharsThreshold(
random().nextInt(10) * 10); // 0 thru 90 intervals of 10
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
Query query = new TermQuery(new Term("body", "answer"));
TopDocs hits = searcher.search(query, numDocs);
assertEquals(numDocs, hits.totalHits.value);
@ -1047,12 +1095,9 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter =
new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected PassageFormatter getFormatter(String field) {
return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
}
};
UnifiedHighlighter.builder(searcher, indexAnalyzer)
.withFormatter(new DefaultPassageFormatter("<b>", "</b>", "... ", true))
.build();
Query query = new TermQuery(new Term("body", "highlighting"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits.value);
@ -1080,25 +1125,22 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter =
new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected PassageFormatter getFormatter(String field) {
return new PassageFormatter() {
PassageFormatter defaultFormatter = new DefaultPassageFormatter();
@Override
public String[] format(Passage[] passages, String content) {
// Just turns the String snippet into a length 2
// array of String
return new String[] {
"blah blah", defaultFormatter.format(passages, content).toString()
};
}
PassageFormatter passageFormatter =
new PassageFormatter() {
PassageFormatter defaultFormatter = new DefaultPassageFormatter();
@Override
public String[] format(Passage[] passages, String content) {
// Just turns the String snippet into a length 2
// array of String
return new String[] {
"blah blah", defaultFormatter.format(passages, content).toString()
};
}
};
UnifiedHighlighter highlighter =
UnifiedHighlighter.builder(searcher, indexAnalyzer).withFormatter(passageFormatter).build();
Query query = new TermQuery(new Term("body", "highlighting"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits.value);
@ -1146,15 +1188,11 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
IndexReader ir = indexSomeFields();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighterNoFieldMatch =
new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected Predicate<String> getFieldMatcher(String field) {
// requireFieldMatch=false
return (qf) -> true;
}
};
UnifiedHighlighter highlighterFieldMatch = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighterFieldMatch.setFieldMatcher(null); // default
UnifiedHighlighter.builder(searcher, indexAnalyzer).withFieldMatcher(qf -> true).build();
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
UnifiedHighlighter highlighterFieldMatch =
overrideFieldMatcherForTests(randomUnifiedHighlighter(uhBuilder), null, "text");
BooleanQuery.Builder queryBuilder =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("text", "some")), BooleanClause.Occur.SHOULD)
@ -1179,11 +1217,12 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
highlighterFieldMatch =
overrideFieldMatcherForTests(highlighterFieldMatch, "text"::equals, "text");
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> is the title <b>field</b>.", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "text");
}
// text
@ -1202,11 +1241,12 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
"<b>This</b> is the text <b>field</b>. You can put <b>some</b> text if you want.",
snippets[0]);
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
highlighterFieldMatch =
overrideFieldMatcherForTests(highlighterFieldMatch, "title"::equals, "title");
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the text field. ", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "title");
}
// category
@ -1221,11 +1261,12 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
assertEquals(1, snippets.length);
assertEquals("<b>This</b> is the <b>category</b> field.", snippets[0]);
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
highlighterFieldMatch =
overrideFieldMatcherForTests(highlighterFieldMatch, "title"::equals, "title");
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "title");
}
ir.close();
}
@ -1234,17 +1275,14 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
IndexReader ir = indexSomeFields();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighterNoFieldMatch =
new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected Predicate<String> getFieldMatcher(String field) {
// requireFieldMatch=false
return (qf) -> true;
}
};
UnifiedHighlighter.builder(searcher, indexAnalyzer).withFieldMatcher(qf -> true).build();
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
UnifiedHighlighter highlighterFieldMatch =
randomUnifiedHighlighter(
searcher, indexAnalyzer, EnumSet.of(HighlightFlag.MULTI_TERM_QUERY), null);
highlighterFieldMatch.setFieldMatcher(null); // default
overrideFieldMatcherForTests(
randomUnifiedHighlighter(uhBuilder, EnumSet.of(HighlightFlag.MULTI_TERM_QUERY), null),
null,
"text");
BooleanQuery.Builder queryBuilder =
new BooleanQuery.Builder()
.add(new FuzzyQuery(new Term("text", "sime"), 1), BooleanClause.Occur.SHOULD)
@ -1269,11 +1307,12 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
highlighterFieldMatch =
overrideFieldMatcherForTests(highlighterFieldMatch, "text"::equals, "text");
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> is the title <b>field</b>.", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "text");
}
// text
@ -1292,11 +1331,12 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
"<b>This</b> is the text <b>field</b>. You can put <b>some</b> text if you want.",
snippets[0]);
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
highlighterFieldMatch =
overrideFieldMatcherForTests(highlighterFieldMatch, "title"::equals, "title");
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the text field. ", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "title");
}
// category
@ -1311,11 +1351,12 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
assertEquals(1, snippets.length);
assertEquals("<b>This</b> is the <b>category</b> field.", snippets[0]);
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
highlighterFieldMatch =
overrideFieldMatcherForTests(highlighterFieldMatch, "title"::equals, "title");
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "title");
}
ir.close();
}
@ -1323,7 +1364,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
public void testMatchesSlopBug() throws IOException {
IndexReader ir = indexSomeFields();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
UnifiedHighlighter highlighter = UnifiedHighlighter.builder(searcher, indexAnalyzer).build();
Query query = new PhraseQuery(2, "title", "this", "is", "the", "field");
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits.value);
@ -1341,20 +1382,18 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
IndexReader ir = indexSomeFields();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighterNoFieldMatch =
new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected Predicate<String> getFieldMatcher(String field) {
UnifiedHighlighter.builder(searcher, indexAnalyzer)
// requireFieldMatch=false
return (qf) -> true;
}
};
.withFieldMatcher(qf -> true)
.build();
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
UnifiedHighlighter highlighterFieldMatch =
randomUnifiedHighlighter(
searcher,
indexAnalyzer,
EnumSet.of(HighlightFlag.PHRASES, HighlightFlag.MULTI_TERM_QUERY),
null);
highlighterFieldMatch.setFieldMatcher(null); // default
overrideFieldMatcherForTests(
randomUnifiedHighlighter(
uhBuilder, EnumSet.of(HighlightFlag.PHRASES, HighlightFlag.MULTI_TERM_QUERY), null),
null,
"text");
BooleanQuery.Builder queryBuilder =
new BooleanQuery.Builder()
.add(new PhraseQuery("title", "this", "is", "the", "title"), BooleanClause.Occur.SHOULD)
@ -1388,7 +1427,8 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> field.", snippets[0]);
}
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
highlighterFieldMatch =
overrideFieldMatcherForTests(highlighterFieldMatch, "text"::equals, "text");
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
if (highlighterFieldMatch.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) {
@ -1396,7 +1436,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
} else {
assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
}
highlighterFieldMatch.setFieldMatcher(null);
highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "text");
}
// text
@ -1430,11 +1470,12 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
snippets[0]);
}
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
highlighterFieldMatch =
overrideFieldMatcherForTests(highlighterFieldMatch, "title"::equals, "title");
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("This is the text field. You can put some text if you want.", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "title");
}
// category
@ -1457,7 +1498,8 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
}
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
highlighterFieldMatch =
overrideFieldMatcherForTests(highlighterFieldMatch, "text"::equals, "text");
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
assertEquals(1, snippets.length);
if (highlighterFieldMatch.getFlags("category").contains(HighlightFlag.WEIGHT_MATCHES)) {
@ -1465,7 +1507,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
} else {
assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
}
highlighterFieldMatch.setFieldMatcher(null);
highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "text");
}
ir.close();
}

View File

@ -117,22 +117,20 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
Query query = new WildcardQuery(new Term("body", "te*"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
String[] snippets = highlighter.highlight("body", query, topDocs);
String[] snippets = uhBuilder.build().highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a <b>test</b>.", snippets[0]);
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
// disable MTQ; won't highlight
highlighter.setHandleMultiTermQuery(false);
snippets = highlighter.highlight("body", query, topDocs);
snippets = uhBuilder.withHandleMultiTermQuery(false).build().highlight("body", query, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a test.", snippets[0]);
assertEquals("Test a one sentence document.", snippets[1]);
highlighter.setHandleMultiTermQuery(true); // reset
// wrong field
BooleanQuery bq =
@ -142,7 +140,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
.build();
topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
snippets = highlighter.highlight("body", bq, topDocs);
snippets = uhBuilder.withHandleMultiTermQuery(true).build().highlight("body", bq, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a test.", snippets[0]);
assertEquals("Test a one sentence document.", snippets[1]);
@ -156,6 +154,11 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
searcher, indexAnalyzer, EnumSet.of(HighlightFlag.MULTI_TERM_QUERY), null);
}
private UnifiedHighlighter randomUnifiedHighlighter(UnifiedHighlighter.Builder uhBuilder) {
return TestUnifiedHighlighter.randomUnifiedHighlighter(
uhBuilder, EnumSet.of(HighlightFlag.MULTI_TERM_QUERY), null);
}
public void testOnePrefix() throws Exception {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
@ -172,7 +175,8 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
// wrap in a BoostQuery to also show we see inside it
Query query = new BoostQuery(new PrefixQuery(new Term("body", "te")), 2.0f);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
@ -183,7 +187,6 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
// wrong field
highlighter.setFieldMatcher(null); // default
BooleanQuery bq =
new BooleanQuery.Builder()
.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
@ -191,7 +194,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
.build();
topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
snippets = highlighter.highlight("body", bq, topDocs);
snippets = uhBuilder.withFieldMatcher(null).build().highlight("body", bq, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a test.", snippets[0]);
assertEquals("Test a one sentence document.", snippets[1]);
@ -215,7 +218,8 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
Query query = new RegexpQuery(new Term("body", "te.*"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
@ -225,7 +229,6 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
// wrong field
highlighter.setFieldMatcher(null); // default
BooleanQuery bq =
new BooleanQuery.Builder()
.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
@ -233,7 +236,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
.build();
topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
snippets = highlighter.highlight("body", bq, topDocs);
snippets = uhBuilder.withFieldMatcher(null).build().highlight("body", bq, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a test.", snippets[0]);
assertEquals("Test a one sentence document.", snippets[1]);
@ -257,7 +260,8 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
Query query = new FuzzyQuery(new Term("body", "tets"), 1);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
@ -285,7 +289,6 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
// wrong field
highlighter.setFieldMatcher(null); // default
BooleanQuery bq =
new BooleanQuery.Builder()
.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
@ -293,7 +296,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
.build();
topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
snippets = highlighter.highlight("body", bq, topDocs);
snippets = uhBuilder.withFieldMatcher(null).build().highlight("body", bq, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a test.", snippets[0]);
assertEquals("Test a one sentence document.", snippets[1]);
@ -317,7 +320,8 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
Query query = TermRangeQuery.newStringRange("body", "ta", "tf", true, true);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
@ -393,7 +397,6 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
assertEquals("Test a one sentence document.", snippets[1]);
// wrong field
highlighter.setFieldMatcher(null); // default
bq =
new BooleanQuery.Builder()
.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
@ -403,7 +406,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
.build();
topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
assertEquals(2, topDocs.totalHits.value);
snippets = highlighter.highlight("body", bq, topDocs);
snippets = uhBuilder.withFieldMatcher(null).build().highlight("body", bq, topDocs);
assertEquals(2, snippets.length);
assertEquals("This is a test.", snippets[0]);
assertEquals("Test a one sentence document.", snippets[1]);
@ -726,10 +729,12 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
// Default formatter just bolds each hit:
assertEquals("<b>Test</b> a <b>one</b> <b>sentence</b> document.", snippets[0]);
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
// Now use our own formatter, that also stuffs the
// matching term's text into the result:
highlighter =
new UnifiedHighlighter(searcher, indexAnalyzer) {
new UnifiedHighlighter(uhBuilder) {
@Override
protected PassageFormatter getFormatter(String field) {
@ -809,8 +814,10 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setMaxLength(25); // a little past first sentence
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
.withMaxLength(25); // a little past first sentence
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
BooleanQuery query =
new BooleanQuery.Builder()
@ -843,8 +850,10 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setMaxLength(32); // a little past first sentence
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
.withMaxLength(32); // a little past first sentence
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
BooleanQuery query =
new BooleanQuery.Builder()
@ -894,11 +903,12 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
};
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, buggyAnalyzer);
highlighter.setHandleMultiTermQuery(true);
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, buggyAnalyzer).withHandleMultiTermQuery(true);
if (rarely()) {
highlighter.setMaxLength(25); // a little past first sentence
uhBuilder = uhBuilder.withMaxLength(25); // a little past first sentence
}
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
boolean hasClauses = false;
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
@ -1046,7 +1056,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
UnifiedHighlighter highlighter = UnifiedHighlighter.builder(searcher, indexAnalyzer).build();
int docId = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
@ -1159,8 +1169,10 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
iw.commit();
try (IndexReader ir = iw.getReader()) {
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, analyzer);
highlighter.setBreakIterator(WholeBreakIterator::new);
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, analyzer)
.withBreakIterator(WholeBreakIterator::new);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
// Test PrefixQuery
Query query = new PrefixQuery(new Term(field, UnicodeUtil.newString(valuePoints, 0, 1)));

View File

@ -116,25 +116,21 @@ public class TestUnifiedHighlighterRanking extends LuceneTestCase {
for (int n = 1; n < maxTopN; n++) {
final FakePassageFormatter f1 = new FakePassageFormatter();
UnifiedHighlighter p1 =
new UnifiedHighlighter(is, indexAnalyzer) {
@Override
protected PassageFormatter getFormatter(String field) {
assertEquals("body", field);
return f1;
}
};
p1.setMaxLength(Integer.MAX_VALUE - 1);
creatUHObjectForCurrentTestSuite(
is,
indexAnalyzer,
new UnifiedHighlighter.Builder(is, indexAnalyzer)
.withFormatter(f1)
.withMaxLength(Integer.MAX_VALUE - 1));
final FakePassageFormatter f2 = new FakePassageFormatter();
UnifiedHighlighter p2 =
new UnifiedHighlighter(is, indexAnalyzer) {
@Override
protected PassageFormatter getFormatter(String field) {
assertEquals("body", field);
return f2;
}
};
p2.setMaxLength(Integer.MAX_VALUE - 1);
creatUHObjectForCurrentTestSuite(
is,
indexAnalyzer,
new UnifiedHighlighter.Builder(is, indexAnalyzer)
.withFormatter(f2)
.withMaxLength(Integer.MAX_VALUE - 1));
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
queryBuilder.add(query, BooleanClause.Occur.MUST);
@ -278,8 +274,9 @@ public class TestUnifiedHighlighterRanking extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
UnifiedHighlighter highlighter =
new UnifiedHighlighter(searcher, indexAnalyzer) {
new UnifiedHighlighter(uhBuilder) {
@Override
protected Set<HighlightFlag> getFlags(String field) {
if (random().nextBoolean()) {
@ -330,8 +327,10 @@ public class TestUnifiedHighlighterRanking extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
UnifiedHighlighter highlighter =
new UnifiedHighlighter(searcher, indexAnalyzer) {
new UnifiedHighlighter(uhBuilder) {
@Override
protected Set<HighlightFlag> getFlags(String field) {
if (random().nextBoolean()) {
@ -363,4 +362,22 @@ public class TestUnifiedHighlighterRanking extends LuceneTestCase {
ir.close();
dir.close();
}
private UnifiedHighlighter creatUHObjectForCurrentTestSuite(
IndexSearcher searcher, Analyzer indexAnalyzer, UnifiedHighlighter.Builder uhBuilder) {
UnifiedHighlighter.Builder builder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer) {
@Override
public UnifiedHighlighter build() {
return new UnifiedHighlighter(uhBuilder) {
@Override
protected PassageFormatter getFormatter(String field) {
assertEquals("body", field);
return super.getFormatter(field);
}
};
}
};
return builder.build();
}
}

View File

@ -46,7 +46,8 @@ public class TestUnifiedHighlighterReanalysis extends LuceneTestCase {
.add(new TermQuery(new Term("title", "test")), BooleanClause.Occur.SHOULD)
.build();
UnifiedHighlighter highlighter = new UnifiedHighlighter(null, indexAnalyzer);
UnifiedHighlighter highlighter =
UnifiedHighlighter.builderWithoutSearcher(indexAnalyzer).build();
String snippet = highlighter.highlightWithoutSearcher("body", query, text, 1).toString();
assertEquals("Just a test <b>highlighting</b> without a searcher. ", snippet);
@ -67,7 +68,7 @@ public class TestUnifiedHighlighterReanalysis extends LuceneTestCase {
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
IndexReader indexReader = indexWriter.getReader()) {
IndexSearcher searcher = newSearcher(indexReader);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
UnifiedHighlighter highlighter = UnifiedHighlighter.builder(searcher, indexAnalyzer).build();
highlighter.highlightWithoutSearcher("body", query, text, 1); // should throw
}
}

View File

@ -73,6 +73,7 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
RandomIndexWriter indexWriter;
IndexSearcher searcher;
UnifiedHighlighter highlighter;
UnifiedHighlighter.Builder uhBuilder;
IndexReader indexReader;
// Is it okay if a match (identified by offset pair) appears multiple times in the passage?
@ -113,41 +114,42 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
private void initReaderSearcherHighlighter() throws IOException {
indexReader = indexWriter.getReader();
searcher = newSearcher(indexReader);
uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
highlighter =
TestUnifiedHighlighter.randomUnifiedHighlighter(
searcher,
indexAnalyzer,
EnumSet.of(HighlightFlag.PHRASES, HighlightFlag.MULTI_TERM_QUERY),
true);
uhBuilder, EnumSet.of(HighlightFlag.PHRASES, HighlightFlag.MULTI_TERM_QUERY), true);
// intercept the formatter in order to check constraints on the passage.
final PassageFormatter defaultFormatter = highlighter.getFormatter(null);
highlighter.setFormatter(
new PassageFormatter() {
@Override
public Object format(Passage[] passages, String content) {
boolean thisDupMatchAllowed = dupMatchAllowed.getAndSet(true);
for (Passage passage : passages) {
String prevPair = "";
for (int i = 0; i < passage.getNumMatches(); i++) {
// pad each to make comparable
String pair =
String.format(
Locale.ROOT,
"%03d-%03d",
passage.getMatchStarts()[i],
passage.getMatchEnds()[i]);
int cmp = prevPair.compareTo(pair);
if (cmp == 0) {
assertTrue("dup match in passage at offset " + pair, thisDupMatchAllowed);
} else if (cmp > 0) {
fail("bad match order in passage at offset " + pair);
}
prevPair = pair;
}
}
return defaultFormatter.format(passages, content);
}
});
highlighter =
uhBuilder
.withFormatter(
new PassageFormatter() {
@Override
public Object format(Passage[] passages, String content) {
boolean thisDupMatchAllowed = dupMatchAllowed.getAndSet(true);
for (Passage passage : passages) {
String prevPair = "";
for (int i = 0; i < passage.getNumMatches(); i++) {
// pad each to make comparable
String pair =
String.format(
Locale.ROOT,
"%03d-%03d",
passage.getMatchStarts()[i],
passage.getMatchEnds()[i]);
int cmp = prevPair.compareTo(pair);
if (cmp == 0) {
assertTrue("dup match in passage at offset " + pair, thisDupMatchAllowed);
} else if (cmp > 0) {
fail("bad match order in passage at offset " + pair);
}
prevPair = pair;
}
}
return defaultFormatter.format(passages, content);
}
})
.build();
}
private PhraseQuery newPhraseQuery(String field, String phrase) {
@ -315,8 +317,10 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
}
// do again, this time with MTQ disabled. We should only find "alpha bravo".
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setHandleMultiTermQuery(false); // disable but leave phrase processing enabled
highlighter =
UnifiedHighlighter.builder(searcher, indexAnalyzer)
.withHandleMultiTermQuery(false) // disable but leave phrase processing enabled
.build();
topDocs = searcher.search(query, 10, Sort.INDEXORDER);
snippets = highlighter.highlight("body", query, topDocs);
@ -361,8 +365,10 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
}
// do again, this time with MTQ disabled. We should only find "alpha bravo".
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setHandleMultiTermQuery(false); // disable but leave phrase processing enabled
highlighter =
UnifiedHighlighter.builder(searcher, indexAnalyzer)
.withHandleMultiTermQuery(false) // disable but leave phrase processing enabled
.build();
topDocs = searcher.search(query, 10, Sort.INDEXORDER);
snippets = highlighter.highlight("body", query, topDocs);
@ -408,8 +414,10 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
}
// do again, this time with MTQ disabled.
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setHandleMultiTermQuery(false); // disable but leave phrase processing enabled
highlighter =
UnifiedHighlighter.builder(searcher, indexAnalyzer)
.withHandleMultiTermQuery(false) // disable but leave phrase processing enabled
.build();
topDocs = searcher.search(query, 10, Sort.INDEXORDER);
snippets = highlighter.highlight("body", query, topDocs);
@ -480,7 +488,7 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
indexWriter.addDocument(
newDoc("alpha bravo charlie - gap alpha bravo")); // hyphen is at char 21
initReaderSearcherHighlighter();
highlighter.setMaxLength(21);
highlighter = uhBuilder.withMaxLength(21).build();
BooleanQuery query =
new BooleanQuery.Builder()
@ -527,8 +535,10 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
}
public void testMatchNoDocsQuery() throws IOException {
highlighter = new UnifiedHighlighter(null, indexAnalyzer);
highlighter.setHighlightPhrasesStrictly(true);
highlighter =
UnifiedHighlighter.builderWithoutSearcher(indexAnalyzer)
.withHighlightPhrasesStrictly(true)
.build();
String content = "whatever";
Object o = highlighter.highlightWithoutSearcher("body", new MatchNoDocsQuery(), content, 1);
assertEquals(content, o);
@ -540,24 +550,31 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
"There is no accord and satisfaction with this - Consideration of the accord is arbitrary."));
initReaderSearcherHighlighter();
highlighter =
new UnifiedHighlighter(searcher, indexAnalyzer) {
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer).withHighlightPhrasesStrictly(true);
UnifiedHighlighter.Builder builder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer) {
@Override
protected Set<HighlightFlag> getFlags(String field) {
final Set<HighlightFlag> flags = super.getFlags(field);
flags.remove(HighlightFlag.WEIGHT_MATCHES); // unsupported
return flags;
}
public UnifiedHighlighter build() {
return new UnifiedHighlighter(uhBuilder) {
@Override
protected Set<HighlightFlag> getFlags(String field) {
final Set<HighlightFlag> flags = super.getFlags(field);
flags.remove(HighlightFlag.WEIGHT_MATCHES); // unsupported
return flags;
}
@Override
protected Collection<Query> preSpanQueryRewrite(Query query) {
if (query instanceof MyQuery) {
return Collections.singletonList(((MyQuery) query).wrapped);
}
return null;
@Override
protected Collection<Query> preSpanQueryRewrite(Query query) {
if (query instanceof MyQuery) {
return Collections.singletonList(((MyQuery) query).wrapped);
}
return null;
}
};
}
};
highlighter.setHighlightPhrasesStrictly(true);
highlighter = builder.build();
BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
Query phraseQuery =

View File

@ -90,6 +90,11 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
searcher, indexAnalyzer, EnumSet.noneOf(HighlightFlag.class), null);
}
private UnifiedHighlighter randomUnifiedHighlighter(UnifiedHighlighter.Builder uhBuilder) {
return TestUnifiedHighlighter.randomUnifiedHighlighter(
uhBuilder, EnumSet.noneOf(HighlightFlag.class), null);
}
//
// Tests below were ported from the PostingsHighlighter. Possibly augmented. Far below are newer
// tests.
@ -177,8 +182,9 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits.value);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setMaxLength(maxLength);
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer).withMaxLength(maxLength);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
String[] snippets = highlighter.highlight("body", query, topDocs);
ir.close();
@ -256,8 +262,10 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setMaxLength(value.length() * 2 + 1);
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
.withMaxLength(value.length() * 2 + 1);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
Query query = new IntervalQuery("body", Intervals.term("field"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits.value);
@ -354,8 +362,9 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
Query query = new IntervalQuery("body", Intervals.phrase("buddhist", "origins"));
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits.value);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setHighlightPhrasesStrictly(false);
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer).withHighlightPhrasesStrictly(false);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
// highlighter.getFlags("body").containsAll(EnumSet.of(HighlightFlag.WEIGHT_MATCHES,
@ -383,8 +392,9 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
Query query = new IntervalQuery("body", Intervals.phrase("curious", "george"));
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits.value);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setHighlightPhrasesStrictly(false);
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer).withHighlightPhrasesStrictly(false);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertFalse(snippets[0].contains("<b>Curious</b>Curious"));
@ -422,8 +432,10 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
Intervals.term("massachusetts")));
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits.value);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setMaxLength(Integer.MAX_VALUE - 1);
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
.withMaxLength(Integer.MAX_VALUE - 1);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertTrue(snippets[0].contains("<b>Square</b>"));
@ -478,8 +490,10 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
"body", Intervals.notContaining(Intervals.term("terms"), Intervals.term("both")));
TopDocs topDocs = searcher.search(query, 10);
assertEquals(1, topDocs.totalHits.value);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setMaxLength(Integer.MAX_VALUE - 1);
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
.withMaxLength(Integer.MAX_VALUE - 1);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
assertEquals(1, snippets.length);
assertFalse(snippets[0].contains("<b>both</b>"));
@ -502,14 +516,11 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter =
new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected BreakIterator getBreakIterator(String field) {
return new WholeBreakIterator();
}
};
highlighter.setMaxLength(10000);
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
.withMaxLength(1000)
.withBreakIterator(WholeBreakIterator::new);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
Query query = new IntervalQuery("body", Intervals.term("test"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits.value);
@ -571,9 +582,9 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
UnifiedHighlighter highlighter =
new UnifiedHighlighter(searcher, indexAnalyzer) {
new UnifiedHighlighter(uhBuilder) {
@Override
protected List<CharSequence[]> loadFieldValues(
String[] fields, DocIdSetIterator docIter, int cacheCharsThreshold)
@ -681,8 +692,10 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setMaxNoHighlightPassages(0); // don't want any default summary
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
.withMaxNoHighlightPassages(0); // don't want any default summary
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
Query query = new IntervalQuery("body", Intervals.term("highlighting"));
int[] docIDs = new int[] {0};
String[] snippets =
@ -713,12 +726,9 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter =
new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected BreakIterator getBreakIterator(String field) {
return new WholeBreakIterator();
}
};
UnifiedHighlighter.builder(searcher, indexAnalyzer)
.withBreakIterator(WholeBreakIterator::new)
.build();
Query query = new IntervalQuery("body", Intervals.term("highlighting"));
int[] docIDs = new int[] {0};
String[] snippets =
@ -846,9 +856,11 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
highlighter.setCacheFieldValCharsThreshold(
random().nextInt(10) * 10); // 0 thru 90 intervals of 10
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
.withCacheFieldValCharsThreshold(
random().nextInt(10) * 10); // 0 thru 90 intervals of 10
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
Query query = new IntervalQuery("body", Intervals.term("answer"));
TopDocs hits = searcher.search(query, numDocs);
assertEquals(numDocs, hits.totalHits.value);
@ -883,12 +895,9 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter =
new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected PassageFormatter getFormatter(String field) {
return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
}
};
UnifiedHighlighter.builder(searcher, indexAnalyzer)
.withFormatter(new DefaultPassageFormatter("<b>", "</b>", "... ", true))
.build();
Query query = new IntervalQuery("body", Intervals.term("highlighting"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
@ -917,23 +926,21 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter =
new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected PassageFormatter getFormatter(String field) {
return new PassageFormatter() {
PassageFormatter defaultFormatter = new DefaultPassageFormatter();
UnifiedHighlighter.builder(searcher, indexAnalyzer)
.withFormatter(
new PassageFormatter() {
PassageFormatter defaultFormatter = new DefaultPassageFormatter();
@Override
public String[] format(Passage[] passages, String content) {
// Just turns the String snippet into a length 2
// array of String
return new String[] {
"blah blah", defaultFormatter.format(passages, content).toString()
};
}
};
}
};
@Override
public String[] format(Passage[] passages, String content) {
// Just turns the String snippet into a length 2
// array of String
return new String[] {
"blah blah", defaultFormatter.format(passages, content).toString()
};
}
})
.build();
Query query = new IntervalQuery("body", Intervals.term("highlighting"));
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
@ -980,7 +987,7 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
public void testMatchesSlopBug() throws IOException {
IndexReader ir = indexSomeFields();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
UnifiedHighlighter highlighter = UnifiedHighlighter.builder(searcher, indexAnalyzer).build();
Query query =
new IntervalQuery(
"title",

View File

@ -107,7 +107,7 @@ public class TestUnifiedHighlighterTermVec extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
UnifiedHighlighter highlighter = UnifiedHighlighter.builder(searcher, indexAnalyzer).build();
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
for (String field : fields) {
queryBuilder.add(new TermQuery(new Term(field, "test")), BooleanClause.Occur.MUST);
@ -192,8 +192,9 @@ public class TestUnifiedHighlighterTermVec extends LuceneTestCase {
iw.close();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
UnifiedHighlighter highlighter =
new UnifiedHighlighter(searcher, indexAnalyzer) {
new UnifiedHighlighter(uhBuilder) {
@Override
protected Set<HighlightFlag> getFlags(String field) {
return Collections.emptySet(); // no WEIGHT_MATCHES

View File

@ -97,8 +97,10 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
@Test
public void testUnifiedHighlighterExtensibility() {
final int maxLength = 1000;
UnifiedHighlighter.Builder uhBuilder =
new UnifiedHighlighter.Builder(null, new MockAnalyzer(random()));
UnifiedHighlighter uh =
new UnifiedHighlighter(null, new MockAnalyzer(random())) {
new UnifiedHighlighter(uhBuilder) {
@Override
protected Map<String, Object[]> highlightFieldsAsObjects(
@ -252,7 +254,7 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
* Tests maintaining extensibility/visibility of {@link
* org.apache.lucene.search.uhighlight.FieldHighlighter} out of package.
*/
private static class CustomFieldHighlighter extends FieldHighlighter {
protected static class CustomFieldHighlighter extends FieldHighlighter {
CustomFieldHighlighter(
String field,
FieldOffsetStrategy fieldOffsetStrategy,