mirror of https://github.com/apache/lucene.git
LUCENE-10197: UnifiedHighlighter now has a Builder (#412)
Setters (mutable state) on the UH is deprecated. Co-authored-by: David Smiley <dsmiley@apache.org>
This commit is contained in:
parent
5512786dd9
commit
b1b1a890d0
|
@ -41,6 +41,9 @@ API Changes
|
|||
|
||||
* LUCENE-10244: MultiCollector::getCollectors is now public, allowing users to access the wrapped
|
||||
collectors. (Andriy Redko)
|
||||
|
||||
* LUCENE-10197: UnifiedHighlighter now has a Builder to construct it. The UH's setters are now
|
||||
deprecated. (Animesh Pandey, David Smiley)
|
||||
|
||||
New Features
|
||||
---------------------
|
||||
|
|
|
@ -288,17 +288,19 @@ public class SearchTravRetHighlightTask extends SearchTravTask {
|
|||
return;
|
||||
}
|
||||
lastSearcher = searcher;
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, analyzer)
|
||||
.withBreakIterator(() -> BreakIterator.getSentenceInstance(Locale.ENGLISH))
|
||||
.withMaxLength(maxDocCharsToAnalyze)
|
||||
.withHighlightPhrasesStrictly(true)
|
||||
.withHandleMultiTermQuery(true);
|
||||
highlighter =
|
||||
new UnifiedHighlighter(searcher, analyzer) {
|
||||
new UnifiedHighlighter(uhBuilder) {
|
||||
@Override
|
||||
protected OffsetSource getOffsetSource(String field) {
|
||||
return offsetSource != null ? offsetSource : super.getOffsetSource(field);
|
||||
}
|
||||
};
|
||||
highlighter.setBreakIterator(() -> BreakIterator.getSentenceInstance(Locale.ENGLISH));
|
||||
highlighter.setMaxLength(maxDocCharsToAnalyze);
|
||||
highlighter.setHighlightPhrasesStrictly(true);
|
||||
highlighter.setHandleMultiTermQuery(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -53,7 +53,6 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
|
@ -113,43 +112,52 @@ public class UnifiedHighlighter {
|
|||
protected static final LabelledCharArrayMatcher[] ZERO_LEN_AUTOMATA_ARRAY =
|
||||
new LabelledCharArrayMatcher[0];
|
||||
|
||||
// All the private defaults will be removed once non-builder based UH is removed.
|
||||
private static final boolean DEFAULT_ENABLE_MULTI_TERM_QUERY = true;
|
||||
private static final boolean DEFAULT_ENABLE_HIGHLIGHT_PHRASES_STRICTLY = true;
|
||||
private static final boolean DEFAULT_ENABLE_WEIGHT_MATCHES = true;
|
||||
private static final boolean DEFAULT_ENABLE_RELEVANCY_OVER_SPEED = true;
|
||||
private static final Supplier<BreakIterator> DEFAULT_BREAK_ITERATOR =
|
||||
() -> BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
private static final PassageScorer DEFAULT_PASSAGE_SCORER = new PassageScorer();
|
||||
private static final PassageFormatter DEFAULT_PASSAGE_FORMATTER = new DefaultPassageFormatter();
|
||||
private static final int DEFAULT_MAX_HIGHLIGHT_PASSAGES = -1;
|
||||
|
||||
protected final IndexSearcher searcher; // if null, can only use highlightWithoutSearcher
|
||||
|
||||
protected final Analyzer indexAnalyzer;
|
||||
|
||||
private boolean defaultHandleMtq = true; // e.g. wildcards
|
||||
// lazy initialized with double-check locking; protected so subclass can init
|
||||
protected volatile FieldInfos fieldInfos;
|
||||
|
||||
private boolean defaultHighlightPhrasesStrictly = true; // AKA "accuracy" or "query debugging"
|
||||
private Predicate<String> fieldMatcher;
|
||||
|
||||
private Set<HighlightFlag> flags;
|
||||
|
||||
// e.g. wildcards
|
||||
private boolean handleMultiTermQuery = DEFAULT_ENABLE_MULTI_TERM_QUERY;
|
||||
|
||||
// AKA "accuracy" or "query debugging"
|
||||
private boolean highlightPhrasesStrictly = DEFAULT_ENABLE_HIGHLIGHT_PHRASES_STRICTLY;
|
||||
|
||||
private boolean weightMatches = DEFAULT_ENABLE_WEIGHT_MATCHES;
|
||||
|
||||
// For analysis, prefer MemoryIndexOffsetStrategy
|
||||
private boolean defaultPassageRelevancyOverSpeed = true;
|
||||
private boolean passageRelevancyOverSpeed = DEFAULT_ENABLE_RELEVANCY_OVER_SPEED;
|
||||
|
||||
private int maxLength = DEFAULT_MAX_LENGTH;
|
||||
|
||||
// BreakIterator is stateful so we use a Supplier factory method
|
||||
private Supplier<BreakIterator> defaultBreakIterator =
|
||||
() -> BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
private Supplier<BreakIterator> breakIterator = DEFAULT_BREAK_ITERATOR;
|
||||
|
||||
private Predicate<String> defaultFieldMatcher;
|
||||
private PassageScorer scorer = DEFAULT_PASSAGE_SCORER;
|
||||
|
||||
private PassageScorer defaultScorer = new PassageScorer();
|
||||
private PassageFormatter formatter = DEFAULT_PASSAGE_FORMATTER;
|
||||
|
||||
private PassageFormatter defaultFormatter = new DefaultPassageFormatter();
|
||||
|
||||
private int defaultMaxNoHighlightPassages = -1;
|
||||
|
||||
// lazy initialized with double-check locking; protected so subclass can init
|
||||
protected volatile FieldInfos fieldInfos;
|
||||
private int maxNoHighlightPassages = DEFAULT_MAX_HIGHLIGHT_PASSAGES;
|
||||
|
||||
private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD;
|
||||
|
||||
/** Extracts matching terms after rewriting against an empty index */
|
||||
protected static Set<Term> extractTerms(Query query) throws IOException {
|
||||
Set<Term> queryTerms = new HashSet<>();
|
||||
EMPTY_INDEXSEARCHER.rewrite(query).visit(QueryVisitor.termCollector(queryTerms));
|
||||
return queryTerms;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs the highlighter with the given index searcher and analyzer.
|
||||
*
|
||||
|
@ -157,6 +165,7 @@ public class UnifiedHighlighter {
|
|||
* String, int)} is used, in which case this needs to be null.
|
||||
* @param indexAnalyzer Required, even if in some circumstances it isn't used.
|
||||
*/
|
||||
@Deprecated
|
||||
public UnifiedHighlighter(IndexSearcher indexSearcher, Analyzer indexAnalyzer) {
|
||||
this.searcher = indexSearcher; // TODO: make non nullable
|
||||
this.indexAnalyzer =
|
||||
|
@ -165,14 +174,22 @@ public class UnifiedHighlighter {
|
|||
"indexAnalyzer is required" + " (even if in some circumstances it isn't used)");
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setHandleMultiTermQuery(boolean handleMtq) {
|
||||
this.defaultHandleMtq = handleMtq;
|
||||
this.handleMultiTermQuery = handleMtq;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setHighlightPhrasesStrictly(boolean highlightPhrasesStrictly) {
|
||||
this.defaultHighlightPhrasesStrictly = highlightPhrasesStrictly;
|
||||
this.highlightPhrasesStrictly = highlightPhrasesStrictly;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setPassageRelevancyOverSpeed(boolean passageRelevancyOverSpeed) {
|
||||
this.passageRelevancyOverSpeed = passageRelevancyOverSpeed;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setMaxLength(int maxLength) {
|
||||
if (maxLength < 0 || maxLength == Integer.MAX_VALUE) {
|
||||
// two reasons: no overflow problems in BreakIterator.preceding(offset+1),
|
||||
|
@ -182,36 +199,49 @@ public class UnifiedHighlighter {
|
|||
this.maxLength = maxLength;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setBreakIterator(Supplier<BreakIterator> breakIterator) {
|
||||
this.defaultBreakIterator = breakIterator;
|
||||
this.breakIterator = breakIterator;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setScorer(PassageScorer scorer) {
|
||||
this.defaultScorer = scorer;
|
||||
this.scorer = scorer;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setFormatter(PassageFormatter formatter) {
|
||||
this.defaultFormatter = formatter;
|
||||
this.formatter = formatter;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setMaxNoHighlightPassages(int defaultMaxNoHighlightPassages) {
|
||||
this.defaultMaxNoHighlightPassages = defaultMaxNoHighlightPassages;
|
||||
this.maxNoHighlightPassages = defaultMaxNoHighlightPassages;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setCacheFieldValCharsThreshold(int cacheFieldValCharsThreshold) {
|
||||
this.cacheFieldValCharsThreshold = cacheFieldValCharsThreshold;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setFieldMatcher(Predicate<String> predicate) {
|
||||
this.defaultFieldMatcher = predicate;
|
||||
this.fieldMatcher = predicate;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public void setWeightMatches(boolean weightMatches) {
|
||||
this.weightMatches = weightMatches;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether {@link MultiTermQuery} derivatives will be highlighted. By default it's
|
||||
* enabled. MTQ highlighting can be expensive, particularly when using offsets in postings.
|
||||
* Returns whether {@link org.apache.lucene.search.MultiTermQuery} derivatives will be
|
||||
* highlighted. By default it's enabled. MTQ highlighting can be expensive, particularly when
|
||||
* using offsets in postings.
|
||||
*/
|
||||
@Deprecated
|
||||
protected boolean shouldHandleMultiTermQuery(String field) {
|
||||
return defaultHandleMtq;
|
||||
return handleMultiTermQuery;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -219,12 +249,295 @@ public class UnifiedHighlighter {
|
|||
* highlighted strictly based on query matches (slower) versus any/all occurrences of the
|
||||
* underlying terms. By default it's enabled, but there's no overhead if such queries aren't used.
|
||||
*/
|
||||
@Deprecated
|
||||
protected boolean shouldHighlightPhrasesStrictly(String field) {
|
||||
return defaultHighlightPhrasesStrictly;
|
||||
return highlightPhrasesStrictly;
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
protected boolean shouldPreferPassageRelevancyOverSpeed(String field) {
|
||||
return defaultPassageRelevancyOverSpeed;
|
||||
return passageRelevancyOverSpeed;
|
||||
}
|
||||
|
||||
/** Builder for UnifiedHighlighter. */
|
||||
public static class Builder {
|
||||
/** If null, can only use highlightWithoutSearcher. */
|
||||
private final IndexSearcher searcher;
|
||||
|
||||
private final Analyzer indexAnalyzer;
|
||||
private Predicate<String> fieldMatcher;
|
||||
private Set<HighlightFlag> flags;
|
||||
private boolean handleMultiTermQuery = DEFAULT_ENABLE_MULTI_TERM_QUERY;
|
||||
private boolean highlightPhrasesStrictly = DEFAULT_ENABLE_HIGHLIGHT_PHRASES_STRICTLY;
|
||||
private boolean passageRelevancyOverSpeed = DEFAULT_ENABLE_RELEVANCY_OVER_SPEED;
|
||||
private boolean weightMatches = DEFAULT_ENABLE_WEIGHT_MATCHES;
|
||||
private int maxLength = DEFAULT_MAX_LENGTH;
|
||||
|
||||
/** BreakIterator is stateful so we use a Supplier factory method. */
|
||||
private Supplier<BreakIterator> breakIterator = DEFAULT_BREAK_ITERATOR;
|
||||
|
||||
private PassageScorer scorer = DEFAULT_PASSAGE_SCORER;
|
||||
private PassageFormatter formatter = DEFAULT_PASSAGE_FORMATTER;
|
||||
private int maxNoHighlightPassages = DEFAULT_MAX_HIGHLIGHT_PASSAGES;
|
||||
private int cacheFieldValCharsThreshold = DEFAULT_CACHE_CHARS_THRESHOLD;
|
||||
|
||||
/**
|
||||
* Constructor for UH builder which accepts {@link IndexSearcher} and {@link Analyzer} objects.
|
||||
* {@link IndexSearcher} object can only be null when {@link #highlightWithoutSearcher(String,
|
||||
* Query, String, int)} is used.
|
||||
*
|
||||
* @param searcher - {@link IndexSearcher}
|
||||
* @param indexAnalyzer - {@link Analyzer}
|
||||
*/
|
||||
public Builder(IndexSearcher searcher, Analyzer indexAnalyzer) {
|
||||
this.searcher = searcher;
|
||||
this.indexAnalyzer = indexAnalyzer;
|
||||
}
|
||||
|
||||
/**
|
||||
* User-defined set of {@link HighlightFlag} values which will override the flags set by {@link
|
||||
* #withHandleMultiTermQuery(boolean)}, {@link #withHighlightPhrasesStrictly(boolean)}, {@link
|
||||
* #withPassageRelevancyOverSpeed(boolean)} and {@link #withWeightMatches(boolean)}.
|
||||
*
|
||||
* <p>Here the user can either specify the set of {@link HighlightFlag}s to be applied or use
|
||||
* the boolean flags to populate final list of {@link HighlightFlag}s.
|
||||
*
|
||||
* @param values - set of {@link HighlightFlag} values.
|
||||
*/
|
||||
public Builder withFlags(Set<HighlightFlag> values) {
|
||||
this.flags = values;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Here position sensitive queries (e.g. phrases and {@link SpanQuery}ies) are highlighted
|
||||
* strictly based on query matches (slower) versus any/all occurrences of the underlying terms.
|
||||
* By default it's enabled, but there's no overhead if such queries aren't used.
|
||||
*/
|
||||
public Builder withHighlightPhrasesStrictly(boolean value) {
|
||||
this.highlightPhrasesStrictly = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Here {@link org.apache.lucene.search.MultiTermQuery} derivatives will be highlighted. By
|
||||
* default it's enabled. MTQ highlighting can be expensive, particularly when using offsets in
|
||||
* postings.
|
||||
*/
|
||||
public Builder withHandleMultiTermQuery(boolean value) {
|
||||
this.handleMultiTermQuery = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** Passage relevancy is more important than speed. True by default. */
|
||||
public Builder withPassageRelevancyOverSpeed(boolean value) {
|
||||
this.passageRelevancyOverSpeed = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Internally use the {@link Weight#matches(LeafReaderContext, int)} API for highlighting. It's
|
||||
* more accurate to the query, and the snippets can be a little different for phrases because
|
||||
* the whole phrase is marked up instead of each word. The passage relevancy calculation can be
|
||||
* different (maybe worse?) and it's slower when highlighting many fields. Use of this flag
|
||||
* requires {@link HighlightFlag#MULTI_TERM_QUERY} and {@link HighlightFlag#PHRASES} and {@link
|
||||
* HighlightFlag#PASSAGE_RELEVANCY_OVER_SPEED}. True by default because those booleans are true
|
||||
* by default.
|
||||
*/
|
||||
public Builder withWeightMatches(boolean value) {
|
||||
this.weightMatches = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** The text to be highlight is effectively truncated by this length. */
|
||||
public Builder withMaxLength(int value) {
|
||||
if (value < 0 || value == Integer.MAX_VALUE) {
|
||||
// two reasons: no overflow problems in BreakIterator.preceding(offset+1),
|
||||
// our sentinel in the offsets queue uses this value to terminate.
|
||||
throw new IllegalArgumentException("maxLength must be < Integer.MAX_VALUE");
|
||||
}
|
||||
this.maxLength = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withBreakIterator(Supplier<BreakIterator> value) {
|
||||
this.breakIterator = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withFieldMatcher(Predicate<String> value) {
|
||||
this.fieldMatcher = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withScorer(PassageScorer value) {
|
||||
this.scorer = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withFormatter(PassageFormatter value) {
|
||||
this.formatter = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withMaxNoHighlightPassages(int value) {
|
||||
this.maxNoHighlightPassages = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withCacheFieldValCharsThreshold(int value) {
|
||||
this.cacheFieldValCharsThreshold = value;
|
||||
return this;
|
||||
}
|
||||
|
||||
public UnifiedHighlighter build() {
|
||||
return new UnifiedHighlighter(this);
|
||||
}
|
||||
|
||||
/** ... as passed in from the Builder constructor. */
|
||||
public IndexSearcher getIndexSearcher() {
|
||||
return searcher;
|
||||
}
|
||||
|
||||
/** ... as passed in from the Builder constructor. */
|
||||
public Analyzer getIndexAnalyzer() {
|
||||
return indexAnalyzer;
|
||||
}
|
||||
|
||||
public Set<HighlightFlag> getFlags() {
|
||||
return flags;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link Builder} object where {@link IndexSearcher} and {@link Analyzer} are not null.
|
||||
*
|
||||
* @param searcher - a {@link IndexSearcher} object.
|
||||
* @param indexAnalyzer - a {@link Analyzer} object.
|
||||
* @return a {@link Builder} object
|
||||
*/
|
||||
public static Builder builder(IndexSearcher searcher, Analyzer indexAnalyzer) {
|
||||
return new Builder(searcher, indexAnalyzer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link Builder} object in which you can only use {@link
|
||||
* UnifiedHighlighter#highlightWithoutSearcher(String, Query, String, int)} for highlighting.
|
||||
*
|
||||
* @param indexAnalyzer - a {@link Analyzer} object.
|
||||
* @return a {@link Builder} object
|
||||
*/
|
||||
public static Builder builderWithoutSearcher(Analyzer indexAnalyzer) {
|
||||
return new Builder(null, indexAnalyzer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs the highlighter with the given {@link Builder}.
|
||||
*
|
||||
* @param builder - a {@link Builder} object.
|
||||
*/
|
||||
public UnifiedHighlighter(Builder builder) {
|
||||
this.searcher = builder.searcher;
|
||||
this.indexAnalyzer =
|
||||
Objects.requireNonNull(
|
||||
builder.indexAnalyzer,
|
||||
"indexAnalyzer is required (even if in some circumstances it isn't used)");
|
||||
this.flags = evaluateFlags(builder);
|
||||
this.maxLength = builder.maxLength;
|
||||
this.breakIterator = builder.breakIterator;
|
||||
this.fieldMatcher = builder.fieldMatcher;
|
||||
this.scorer = builder.scorer;
|
||||
this.formatter = builder.formatter;
|
||||
this.maxNoHighlightPassages = builder.maxNoHighlightPassages;
|
||||
this.cacheFieldValCharsThreshold = builder.cacheFieldValCharsThreshold;
|
||||
}
|
||||
|
||||
/** Extracts matching terms after rewriting against an empty index */
|
||||
protected static Set<Term> extractTerms(Query query) throws IOException {
|
||||
Set<Term> queryTerms = new HashSet<>();
|
||||
EMPTY_INDEXSEARCHER.rewrite(query).visit(QueryVisitor.termCollector(queryTerms));
|
||||
return queryTerms;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method returns the set of of {@link HighlightFlag}s, which will be applied to the UH
|
||||
* object. The output depends on the values provided to {@link
|
||||
* Builder#withHandleMultiTermQuery(boolean)}, {@link
|
||||
* Builder#withHighlightPhrasesStrictly(boolean)}, {@link
|
||||
* Builder#withPassageRelevancyOverSpeed(boolean)} and {@link Builder#withWeightMatches(boolean)}
|
||||
* OR {@link #setHandleMultiTermQuery(boolean)}, {@link #setHighlightPhrasesStrictly(boolean)},
|
||||
* {@link #setPassageRelevancyOverSpeed(boolean)} and {@link #setWeightMatches(boolean)}
|
||||
*
|
||||
* @param shouldHandleMultiTermQuery - flag for adding Multi-term query
|
||||
* @param shouldHighlightPhrasesStrictly - flag for adding phrase highlighting
|
||||
* @param shouldPassageRelevancyOverSpeed - flag for adding passage relevancy
|
||||
* @param shouldEnableWeightMatches - flag for enabling weight matches
|
||||
* @return a set of {@link HighlightFlag}s.
|
||||
*/
|
||||
protected Set<HighlightFlag> evaluateFlags(
|
||||
final boolean shouldHandleMultiTermQuery,
|
||||
final boolean shouldHighlightPhrasesStrictly,
|
||||
final boolean shouldPassageRelevancyOverSpeed,
|
||||
final boolean shouldEnableWeightMatches) {
|
||||
Set<HighlightFlag> highlightFlags = EnumSet.noneOf(HighlightFlag.class);
|
||||
if (shouldHandleMultiTermQuery) {
|
||||
highlightFlags.add(HighlightFlag.MULTI_TERM_QUERY);
|
||||
}
|
||||
if (shouldHighlightPhrasesStrictly) {
|
||||
highlightFlags.add(HighlightFlag.PHRASES);
|
||||
}
|
||||
if (shouldPassageRelevancyOverSpeed) {
|
||||
highlightFlags.add(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED);
|
||||
}
|
||||
|
||||
// Evaluate if WEIGHT_MATCHES can be added as a flag.
|
||||
final boolean applyWeightMatches =
|
||||
highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
|
||||
&& highlightFlags.contains(HighlightFlag.PHRASES)
|
||||
&& highlightFlags.contains(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED)
|
||||
// User can also opt-out of WEIGHT_MATCHES.
|
||||
&& shouldEnableWeightMatches;
|
||||
|
||||
if (applyWeightMatches) {
|
||||
highlightFlags.add(HighlightFlag.WEIGHT_MATCHES);
|
||||
}
|
||||
return highlightFlags;
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluate the highlight flags and set the {@link #flags} variable. This is called only once when
|
||||
* the Builder object is used to create a UH object.
|
||||
*
|
||||
* @param uhBuilder - {@link Builder} object.
|
||||
* @return {@link HighlightFlag}s.
|
||||
*/
|
||||
protected Set<HighlightFlag> evaluateFlags(Builder uhBuilder) {
|
||||
if (flags != null) {
|
||||
return flags;
|
||||
}
|
||||
return flags =
|
||||
evaluateFlags(
|
||||
uhBuilder.handleMultiTermQuery,
|
||||
uhBuilder.highlightPhrasesStrictly,
|
||||
uhBuilder.passageRelevancyOverSpeed,
|
||||
uhBuilder.weightMatches);
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluate the highlight flags and set the {@link #flags} variable. This is called every time
|
||||
* {@link #getFlags(String)} method is called. This is used in the builder and has been marked
|
||||
* deprecated since it is used only for the mutable initialization of a UH object.
|
||||
*
|
||||
* @param uh - {@link UnifiedHighlighter} object.
|
||||
* @return {@link HighlightFlag}s.
|
||||
*/
|
||||
@Deprecated
|
||||
protected Set<HighlightFlag> evaluateFlags(UnifiedHighlighter uh) {
|
||||
return evaluateFlags(
|
||||
uh.handleMultiTermQuery,
|
||||
uh.highlightPhrasesStrictly,
|
||||
uh.passageRelevancyOverSpeed,
|
||||
uh.weightMatches);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -232,14 +545,25 @@ public class UnifiedHighlighter {
|
|||
* only queries that target the current field are kept. (AKA requireFieldMatch)
|
||||
*/
|
||||
protected Predicate<String> getFieldMatcher(String field) {
|
||||
if (defaultFieldMatcher != null) {
|
||||
return defaultFieldMatcher;
|
||||
if (fieldMatcher != null) {
|
||||
return fieldMatcher;
|
||||
} else {
|
||||
// requireFieldMatch = true
|
||||
return (qf) -> field.equals(qf);
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns the {@link HighlightFlag}s applicable for the current UH instance. */
|
||||
protected Set<HighlightFlag> getFlags(String field) {
|
||||
// If a builder is used for initializing a UH object, then flags will never be null.
|
||||
// Once the setters are removed, this method can just return the flags.
|
||||
if (flags != null) {
|
||||
return flags;
|
||||
}
|
||||
// When not using builder, you have to reevaluate the flags.
|
||||
return evaluateFlags(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* The maximum content size to process. Content will be truncated to this size before
|
||||
* highlighting. Typically snippets closer to the beginning of the document better summarize its
|
||||
|
@ -258,7 +582,7 @@ public class UnifiedHighlighter {
|
|||
* preceding} performs poorly.
|
||||
*/
|
||||
protected BreakIterator getBreakIterator(String field) {
|
||||
return defaultBreakIterator.get();
|
||||
return breakIterator.get();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -266,7 +590,7 @@ public class UnifiedHighlighter {
|
|||
* PassageScorer} by default; subclasses can override to customize.
|
||||
*/
|
||||
protected PassageScorer getScorer(String field) {
|
||||
return defaultScorer;
|
||||
return scorer;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -274,7 +598,7 @@ public class UnifiedHighlighter {
|
|||
* This returns a new {@code PassageFormatter} by default; subclasses can override to customize.
|
||||
*/
|
||||
protected PassageFormatter getFormatter(String field) {
|
||||
return defaultFormatter;
|
||||
return formatter;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -284,7 +608,7 @@ public class UnifiedHighlighter {
|
|||
* null (not formatted).
|
||||
*/
|
||||
protected int getMaxNoHighlightPassages(String field) {
|
||||
return defaultMaxNoHighlightPassages;
|
||||
return maxNoHighlightPassages;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -823,26 +1147,6 @@ public class UnifiedHighlighter {
|
|||
return filteredTerms.toArray(new BytesRef[filteredTerms.size()]);
|
||||
}
|
||||
|
||||
/** Customize the highlighting flags to use by field. */
|
||||
protected Set<HighlightFlag> getFlags(String field) {
|
||||
Set<HighlightFlag> highlightFlags = EnumSet.noneOf(HighlightFlag.class);
|
||||
if (shouldHandleMultiTermQuery(field)) {
|
||||
highlightFlags.add(HighlightFlag.MULTI_TERM_QUERY);
|
||||
}
|
||||
if (shouldHighlightPhrasesStrictly(field)) {
|
||||
highlightFlags.add(HighlightFlag.PHRASES);
|
||||
}
|
||||
if (shouldPreferPassageRelevancyOverSpeed(field)) {
|
||||
highlightFlags.add(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED);
|
||||
}
|
||||
if (highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
|
||||
&& highlightFlags.contains(HighlightFlag.PHRASES)
|
||||
&& highlightFlags.contains(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED)) {
|
||||
highlightFlags.add(HighlightFlag.WEIGHT_MATCHES);
|
||||
}
|
||||
return highlightFlags;
|
||||
}
|
||||
|
||||
protected PhraseHelper getPhraseHelper(
|
||||
String field, Query query, Set<HighlightFlag> highlightFlags) {
|
||||
boolean useWeightMatchesIter = highlightFlags.contains(HighlightFlag.WEIGHT_MATCHES);
|
||||
|
@ -1163,23 +1467,16 @@ public class UnifiedHighlighter {
|
|||
|
||||
/** Flags for controlling highlighting behavior. */
|
||||
public enum HighlightFlag {
|
||||
/** @see UnifiedHighlighter#setHighlightPhrasesStrictly(boolean) */
|
||||
/** @see Builder#withHighlightPhrasesStrictly(boolean) */
|
||||
PHRASES,
|
||||
|
||||
/** @see UnifiedHighlighter#setHandleMultiTermQuery(boolean) */
|
||||
/** @see Builder#withHandleMultiTermQuery(boolean) */
|
||||
MULTI_TERM_QUERY,
|
||||
|
||||
/** Passage relevancy is more important than speed. True by default. */
|
||||
/** @see Builder#withPassageRelevancyOverSpeed(boolean) */
|
||||
PASSAGE_RELEVANCY_OVER_SPEED,
|
||||
|
||||
/**
|
||||
* Internally use the {@link Weight#matches(LeafReaderContext, int)} API for highlighting. It's
|
||||
* more accurate to the query, and the snippets can be a little different for phrases because
|
||||
* the whole phrase is marked up instead of each word. The passage relevancy calculation can be
|
||||
* different (maybe worse?) and it's slower when highlighting many fields. Use of this flag
|
||||
* requires {@link #MULTI_TERM_QUERY} and {@link #PHRASES} and {@link
|
||||
* #PASSAGE_RELEVANCY_OVER_SPEED}. True by default because those booleans are true by default.
|
||||
*/
|
||||
/** @see Builder#withWeightMatches(boolean) */
|
||||
WEIGHT_MATCHES
|
||||
|
||||
// TODO: useQueryBoosts
|
||||
|
|
|
@ -218,11 +218,13 @@ public class TestLengthGoalBreakIterator extends LuceneTestCase {
|
|||
private String highlightClosestToLen(
|
||||
String content, Query query, int lengthGoal, float fragAlign, int maxPassages, char separator)
|
||||
throws IOException {
|
||||
UnifiedHighlighter highlighter = new UnifiedHighlighter(null, analyzer);
|
||||
highlighter.setBreakIterator(
|
||||
() ->
|
||||
LengthGoalBreakIterator.createClosestToLength(
|
||||
new CustomSeparatorBreakIterator(separator), lengthGoal, fragAlign));
|
||||
UnifiedHighlighter highlighter =
|
||||
UnifiedHighlighter.builderWithoutSearcher(analyzer)
|
||||
.withBreakIterator(
|
||||
() ->
|
||||
LengthGoalBreakIterator.createClosestToLength(
|
||||
new CustomSeparatorBreakIterator(separator), lengthGoal, fragAlign))
|
||||
.build();
|
||||
return highlighter.highlightWithoutSearcher(FIELD, query, content, maxPassages).toString();
|
||||
}
|
||||
|
||||
|
@ -235,11 +237,13 @@ public class TestLengthGoalBreakIterator extends LuceneTestCase {
|
|||
String content, Query query, int lengthGoal, float fragAlign, char separator)
|
||||
throws IOException {
|
||||
// differs from above only by "createMinLength"
|
||||
UnifiedHighlighter highlighter = new UnifiedHighlighter(null, analyzer);
|
||||
highlighter.setBreakIterator(
|
||||
() ->
|
||||
LengthGoalBreakIterator.createMinLength(
|
||||
new CustomSeparatorBreakIterator(separator), lengthGoal, fragAlign));
|
||||
UnifiedHighlighter highlighter =
|
||||
UnifiedHighlighter.builderWithoutSearcher(analyzer)
|
||||
.withBreakIterator(
|
||||
() ->
|
||||
LengthGoalBreakIterator.createMinLength(
|
||||
new CustomSeparatorBreakIterator(separator), lengthGoal, fragAlign))
|
||||
.build();
|
||||
return highlighter.highlightWithoutSearcher(FIELD, query, content, 1).toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@ import java.util.Collections;
|
|||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.function.Predicate;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
@ -90,6 +91,23 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
static Set<HighlightFlag> generateRandomHighlightFlags(EnumSet<HighlightFlag> requiredFlags) {
|
||||
final EnumSet<HighlightFlag> result = EnumSet.copyOf(requiredFlags);
|
||||
int r = random().nextInt();
|
||||
for (HighlightFlag highlightFlag : HighlightFlag.values()) {
|
||||
if (((1 << highlightFlag.ordinal()) & r) == 0) {
|
||||
result.add(highlightFlag);
|
||||
}
|
||||
}
|
||||
if (result.contains(HighlightFlag.WEIGHT_MATCHES)) {
|
||||
// these two are required for WEIGHT_MATCHES
|
||||
result.add(HighlightFlag.MULTI_TERM_QUERY);
|
||||
result.add(HighlightFlag.PHRASES);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/** This randomized test method uses builder from the UH class. */
|
||||
static UnifiedHighlighter randomUnifiedHighlighter(
|
||||
IndexSearcher searcher, Analyzer indexAnalyzer) {
|
||||
return randomUnifiedHighlighter(
|
||||
|
@ -101,36 +119,54 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
Analyzer indexAnalyzer,
|
||||
EnumSet<HighlightFlag> mandatoryFlags,
|
||||
Boolean requireFieldMatch) {
|
||||
final UnifiedHighlighter uh =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
Set<HighlightFlag> flags; // consistently random set of flags for this test run
|
||||
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
|
||||
return randomUnifiedHighlighter(uhBuilder, mandatoryFlags, requireFieldMatch);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Set<HighlightFlag> getFlags(String field) {
|
||||
if (flags != null) {
|
||||
return flags;
|
||||
}
|
||||
final EnumSet<HighlightFlag> result = EnumSet.copyOf(mandatoryFlags);
|
||||
int r = random().nextInt();
|
||||
for (HighlightFlag highlightFlag : HighlightFlag.values()) {
|
||||
if (((1 << highlightFlag.ordinal()) & r) == 0) {
|
||||
result.add(highlightFlag);
|
||||
}
|
||||
}
|
||||
if (result.contains(HighlightFlag.WEIGHT_MATCHES)) {
|
||||
// these two are required for WEIGHT_MATCHES
|
||||
result.add(HighlightFlag.MULTI_TERM_QUERY);
|
||||
result.add(HighlightFlag.PHRASES);
|
||||
}
|
||||
return flags = result;
|
||||
}
|
||||
};
|
||||
uh.setCacheFieldValCharsThreshold(random().nextInt(100));
|
||||
static UnifiedHighlighter randomUnifiedHighlighter(UnifiedHighlighter.Builder uhBuilder) {
|
||||
return randomUnifiedHighlighter(uhBuilder, EnumSet.noneOf(HighlightFlag.class), null);
|
||||
}
|
||||
|
||||
static UnifiedHighlighter randomUnifiedHighlighter(
|
||||
UnifiedHighlighter.Builder uhBuilder,
|
||||
EnumSet<HighlightFlag> mandatoryFlags,
|
||||
Boolean requireFieldMatch) {
|
||||
uhBuilder.withCacheFieldValCharsThreshold(random().nextInt(100));
|
||||
if (requireFieldMatch == Boolean.FALSE
|
||||
|| (requireFieldMatch == null && random().nextBoolean())) {
|
||||
uh.setFieldMatcher(f -> true); // requireFieldMatch==false
|
||||
uhBuilder.withFieldMatcher(f -> true); // requireFieldMatch==false
|
||||
}
|
||||
return uh;
|
||||
return overriddenBuilderForTests(uhBuilder, mandatoryFlags).build();
|
||||
}
|
||||
|
||||
static UnifiedHighlighter overrideFieldMatcherForTests(
|
||||
UnifiedHighlighter original, Predicate<String> value, String fieldName) {
|
||||
return UnifiedHighlighter.builder(original.getIndexSearcher(), original.getIndexAnalyzer())
|
||||
.withFlags(original.getFlags(fieldName))
|
||||
.withCacheFieldValCharsThreshold(original.getCacheFieldValCharsThreshold())
|
||||
.withFieldMatcher(value)
|
||||
.build();
|
||||
}
|
||||
|
||||
static UnifiedHighlighter.Builder overriddenBuilderForTests(
|
||||
UnifiedHighlighter.Builder uhBuilder, EnumSet<HighlightFlag> mandatoryFlags) {
|
||||
return new UnifiedHighlighter.Builder(
|
||||
uhBuilder.getIndexSearcher(), uhBuilder.getIndexAnalyzer()) {
|
||||
Set<HighlightFlag> flags;
|
||||
|
||||
@Override
|
||||
public UnifiedHighlighter build() {
|
||||
return new UnifiedHighlighter(uhBuilder) {
|
||||
@Override
|
||||
protected Set<HighlightFlag> evaluateFlags(Builder uhBuilder) {
|
||||
if (Objects.nonNull(flags)) {
|
||||
return flags;
|
||||
}
|
||||
return flags = generateRandomHighlightFlags(mandatoryFlags);
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -221,8 +257,9 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits.value);
|
||||
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setMaxLength(maxLength);
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer).withMaxLength(maxLength);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
String[] snippets = highlighter.highlight("body", query, topDocs);
|
||||
|
||||
ir.close();
|
||||
|
@ -301,8 +338,10 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setMaxLength(value.length() * 2 + 1);
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
|
||||
.withMaxLength(value.length() * 2 + 1);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
Query query = new TermQuery(new Term("body", "field"));
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits.value);
|
||||
|
@ -446,8 +485,9 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
.build();
|
||||
TopDocs topDocs = searcher.search(query, 10);
|
||||
assertEquals(1, topDocs.totalHits.value);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setHighlightPhrasesStrictly(false);
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer).withHighlightPhrasesStrictly(false);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
|
||||
assertEquals(1, snippets.length);
|
||||
if (highlighter
|
||||
|
@ -468,7 +508,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
IndexReader ir = iw.getReader();
|
||||
iw.close();
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter highlighter = UnifiedHighlighter.builder(searcher, indexAnalyzer).build();
|
||||
Set<HighlightFlag> flags = highlighter.getFlags("body");
|
||||
assertTrue(flags.contains(HighlightFlag.PHRASES));
|
||||
assertTrue(flags.contains(HighlightFlag.MULTI_TERM_QUERY));
|
||||
|
@ -501,8 +541,9 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
.build();
|
||||
TopDocs topDocs = searcher.search(query, 10);
|
||||
assertEquals(1, topDocs.totalHits.value);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setHighlightPhrasesStrictly(false);
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer).withHighlightPhrasesStrictly(false);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
|
||||
assertEquals(1, snippets.length);
|
||||
assertFalse(snippets[0].contains("<b>Curious</b>Curious"));
|
||||
|
@ -532,8 +573,10 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
.build();
|
||||
TopDocs topDocs = searcher.search(query, 10);
|
||||
assertEquals(1, topDocs.totalHits.value);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setMaxLength(Integer.MAX_VALUE - 1);
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
|
||||
.withMaxLength(Integer.MAX_VALUE - 1);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
|
||||
assertEquals(1, snippets.length);
|
||||
assertTrue(snippets[0].contains("<b>Square</b>"));
|
||||
|
@ -595,8 +638,10 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
|
||||
TopDocs topDocs = searcher.search(query, 10);
|
||||
assertEquals(1, topDocs.totalHits.value);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setMaxLength(Integer.MAX_VALUE - 1);
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
|
||||
.withMaxLength(Integer.MAX_VALUE - 1);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
|
||||
assertEquals(1, snippets.length);
|
||||
assertFalse(snippets[0].contains("<b>both</b>"));
|
||||
|
@ -618,14 +663,11 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected BreakIterator getBreakIterator(String field) {
|
||||
return new WholeBreakIterator();
|
||||
}
|
||||
};
|
||||
highlighter.setMaxLength(10000);
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
|
||||
.withBreakIterator(WholeBreakIterator::new)
|
||||
.withMaxLength(10000);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
Query query = new TermQuery(new Term("body", "test"));
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits.value);
|
||||
|
@ -690,24 +732,29 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
|
||||
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter highlighter =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected List<CharSequence[]> loadFieldValues(
|
||||
String[] fields, DocIdSetIterator docIter, int cacheCharsThreshold)
|
||||
throws IOException {
|
||||
assert fields.length == 1;
|
||||
assert docIter.cost() == 1;
|
||||
docIter.nextDoc();
|
||||
return Collections.singletonList(new CharSequence[] {text});
|
||||
}
|
||||
public UnifiedHighlighter build() {
|
||||
return new UnifiedHighlighter(uhBuilder) {
|
||||
@Override
|
||||
protected List<CharSequence[]> loadFieldValues(
|
||||
String[] fields, DocIdSetIterator docIter, int cacheCharsThreshold)
|
||||
throws IOException {
|
||||
assert fields.length == 1;
|
||||
assert docIter.cost() == 1;
|
||||
docIter.nextDoc();
|
||||
return Collections.singletonList(new CharSequence[] {text});
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BreakIterator getBreakIterator(String field) {
|
||||
return new WholeBreakIterator();
|
||||
@Override
|
||||
protected BreakIterator getBreakIterator(String field) {
|
||||
return new WholeBreakIterator();
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
}.build();
|
||||
Query query = new TermQuery(new Term("body", "test"));
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits.value);
|
||||
|
@ -803,8 +850,10 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setMaxNoHighlightPassages(0); // don't want any default summary
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
|
||||
.withMaxNoHighlightPassages(0); // don't want any default summary
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
Query query = new TermQuery(new Term("body", "highlighting"));
|
||||
int[] docIDs = new int[] {0};
|
||||
String[] snippets =
|
||||
|
@ -836,12 +885,9 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected BreakIterator getBreakIterator(String field) {
|
||||
return new WholeBreakIterator();
|
||||
}
|
||||
};
|
||||
UnifiedHighlighter.builder(searcher, indexAnalyzer)
|
||||
.withBreakIterator(WholeBreakIterator::new)
|
||||
.build();
|
||||
Query query = new TermQuery(new Term("body", "highlighting"));
|
||||
int[] docIDs = new int[] {0};
|
||||
String[] snippets =
|
||||
|
@ -973,9 +1019,11 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setCacheFieldValCharsThreshold(
|
||||
random().nextInt(10) * 10); // 0 thru 90 intervals of 10
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
|
||||
.withCacheFieldValCharsThreshold(
|
||||
random().nextInt(10) * 10); // 0 thru 90 intervals of 10
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
Query query = new TermQuery(new Term("body", "answer"));
|
||||
TopDocs hits = searcher.search(query, numDocs);
|
||||
assertEquals(numDocs, hits.totalHits.value);
|
||||
|
@ -1047,12 +1095,9 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected PassageFormatter getFormatter(String field) {
|
||||
return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
|
||||
}
|
||||
};
|
||||
UnifiedHighlighter.builder(searcher, indexAnalyzer)
|
||||
.withFormatter(new DefaultPassageFormatter("<b>", "</b>", "... ", true))
|
||||
.build();
|
||||
Query query = new TermQuery(new Term("body", "highlighting"));
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits.value);
|
||||
|
@ -1080,25 +1125,22 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected PassageFormatter getFormatter(String field) {
|
||||
return new PassageFormatter() {
|
||||
PassageFormatter defaultFormatter = new DefaultPassageFormatter();
|
||||
|
||||
@Override
|
||||
public String[] format(Passage[] passages, String content) {
|
||||
// Just turns the String snippet into a length 2
|
||||
// array of String
|
||||
return new String[] {
|
||||
"blah blah", defaultFormatter.format(passages, content).toString()
|
||||
};
|
||||
}
|
||||
PassageFormatter passageFormatter =
|
||||
new PassageFormatter() {
|
||||
PassageFormatter defaultFormatter = new DefaultPassageFormatter();
|
||||
|
||||
@Override
|
||||
public String[] format(Passage[] passages, String content) {
|
||||
// Just turns the String snippet into a length 2
|
||||
// array of String
|
||||
return new String[] {
|
||||
"blah blah", defaultFormatter.format(passages, content).toString()
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
UnifiedHighlighter highlighter =
|
||||
UnifiedHighlighter.builder(searcher, indexAnalyzer).withFormatter(passageFormatter).build();
|
||||
Query query = new TermQuery(new Term("body", "highlighting"));
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits.value);
|
||||
|
@ -1146,15 +1188,11 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
IndexReader ir = indexSomeFields();
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighterNoFieldMatch =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected Predicate<String> getFieldMatcher(String field) {
|
||||
// requireFieldMatch=false
|
||||
return (qf) -> true;
|
||||
}
|
||||
};
|
||||
UnifiedHighlighter highlighterFieldMatch = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighterFieldMatch.setFieldMatcher(null); // default
|
||||
UnifiedHighlighter.builder(searcher, indexAnalyzer).withFieldMatcher(qf -> true).build();
|
||||
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter highlighterFieldMatch =
|
||||
overrideFieldMatcherForTests(randomUnifiedHighlighter(uhBuilder), null, "text");
|
||||
|
||||
BooleanQuery.Builder queryBuilder =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("text", "some")), BooleanClause.Occur.SHOULD)
|
||||
|
@ -1179,11 +1217,12 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
|
||||
highlighterFieldMatch =
|
||||
overrideFieldMatcherForTests(highlighterFieldMatch, "text"::equals, "text");
|
||||
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> is the title <b>field</b>.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "text");
|
||||
}
|
||||
|
||||
// text
|
||||
|
@ -1202,11 +1241,12 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
"<b>This</b> is the text <b>field</b>. You can put <b>some</b> text if you want.",
|
||||
snippets[0]);
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
|
||||
highlighterFieldMatch =
|
||||
overrideFieldMatcherForTests(highlighterFieldMatch, "title"::equals, "title");
|
||||
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the text field. ", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "title");
|
||||
}
|
||||
|
||||
// category
|
||||
|
@ -1221,11 +1261,12 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> is the <b>category</b> field.", snippets[0]);
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
|
||||
highlighterFieldMatch =
|
||||
overrideFieldMatcherForTests(highlighterFieldMatch, "title"::equals, "title");
|
||||
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "title");
|
||||
}
|
||||
ir.close();
|
||||
}
|
||||
|
@ -1234,17 +1275,14 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
IndexReader ir = indexSomeFields();
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighterNoFieldMatch =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected Predicate<String> getFieldMatcher(String field) {
|
||||
// requireFieldMatch=false
|
||||
return (qf) -> true;
|
||||
}
|
||||
};
|
||||
UnifiedHighlighter.builder(searcher, indexAnalyzer).withFieldMatcher(qf -> true).build();
|
||||
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter highlighterFieldMatch =
|
||||
randomUnifiedHighlighter(
|
||||
searcher, indexAnalyzer, EnumSet.of(HighlightFlag.MULTI_TERM_QUERY), null);
|
||||
highlighterFieldMatch.setFieldMatcher(null); // default
|
||||
overrideFieldMatcherForTests(
|
||||
randomUnifiedHighlighter(uhBuilder, EnumSet.of(HighlightFlag.MULTI_TERM_QUERY), null),
|
||||
null,
|
||||
"text");
|
||||
|
||||
BooleanQuery.Builder queryBuilder =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new FuzzyQuery(new Term("text", "sime"), 1), BooleanClause.Occur.SHOULD)
|
||||
|
@ -1269,11 +1307,12 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
|
||||
highlighterFieldMatch =
|
||||
overrideFieldMatcherForTests(highlighterFieldMatch, "text"::equals, "text");
|
||||
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> is the title <b>field</b>.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "text");
|
||||
}
|
||||
|
||||
// text
|
||||
|
@ -1292,11 +1331,12 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
"<b>This</b> is the text <b>field</b>. You can put <b>some</b> text if you want.",
|
||||
snippets[0]);
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
|
||||
highlighterFieldMatch =
|
||||
overrideFieldMatcherForTests(highlighterFieldMatch, "title"::equals, "title");
|
||||
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the text field. ", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "title");
|
||||
}
|
||||
|
||||
// category
|
||||
|
@ -1311,11 +1351,12 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> is the <b>category</b> field.", snippets[0]);
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
|
||||
highlighterFieldMatch =
|
||||
overrideFieldMatcherForTests(highlighterFieldMatch, "title"::equals, "title");
|
||||
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "title");
|
||||
}
|
||||
ir.close();
|
||||
}
|
||||
|
@ -1323,7 +1364,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
public void testMatchesSlopBug() throws IOException {
|
||||
IndexReader ir = indexSomeFields();
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter highlighter = UnifiedHighlighter.builder(searcher, indexAnalyzer).build();
|
||||
Query query = new PhraseQuery(2, "title", "this", "is", "the", "field");
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits.value);
|
||||
|
@ -1341,20 +1382,18 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
IndexReader ir = indexSomeFields();
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighterNoFieldMatch =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected Predicate<String> getFieldMatcher(String field) {
|
||||
UnifiedHighlighter.builder(searcher, indexAnalyzer)
|
||||
// requireFieldMatch=false
|
||||
return (qf) -> true;
|
||||
}
|
||||
};
|
||||
.withFieldMatcher(qf -> true)
|
||||
.build();
|
||||
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter highlighterFieldMatch =
|
||||
randomUnifiedHighlighter(
|
||||
searcher,
|
||||
indexAnalyzer,
|
||||
EnumSet.of(HighlightFlag.PHRASES, HighlightFlag.MULTI_TERM_QUERY),
|
||||
null);
|
||||
highlighterFieldMatch.setFieldMatcher(null); // default
|
||||
overrideFieldMatcherForTests(
|
||||
randomUnifiedHighlighter(
|
||||
uhBuilder, EnumSet.of(HighlightFlag.PHRASES, HighlightFlag.MULTI_TERM_QUERY), null),
|
||||
null,
|
||||
"text");
|
||||
|
||||
BooleanQuery.Builder queryBuilder =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new PhraseQuery("title", "this", "is", "the", "title"), BooleanClause.Occur.SHOULD)
|
||||
|
@ -1388,7 +1427,8 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> field.", snippets[0]);
|
||||
}
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
|
||||
highlighterFieldMatch =
|
||||
overrideFieldMatcherForTests(highlighterFieldMatch, "text"::equals, "text");
|
||||
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
if (highlighterFieldMatch.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) {
|
||||
|
@ -1396,7 +1436,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
} else {
|
||||
assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
|
||||
}
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "text");
|
||||
}
|
||||
|
||||
// text
|
||||
|
@ -1430,11 +1470,12 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
snippets[0]);
|
||||
}
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
|
||||
highlighterFieldMatch =
|
||||
overrideFieldMatcherForTests(highlighterFieldMatch, "title"::equals, "title");
|
||||
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("This is the text field. You can put some text if you want.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "title");
|
||||
}
|
||||
|
||||
// category
|
||||
|
@ -1457,7 +1498,8 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
|
||||
}
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
|
||||
highlighterFieldMatch =
|
||||
overrideFieldMatcherForTests(highlighterFieldMatch, "text"::equals, "text");
|
||||
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
if (highlighterFieldMatch.getFlags("category").contains(HighlightFlag.WEIGHT_MATCHES)) {
|
||||
|
@ -1465,7 +1507,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
} else {
|
||||
assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
|
||||
}
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
highlighterFieldMatch = overrideFieldMatcherForTests(highlighterFieldMatch, null, "text");
|
||||
}
|
||||
ir.close();
|
||||
}
|
||||
|
|
|
@ -117,22 +117,20 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
|
||||
Query query = new WildcardQuery(new Term("body", "te*"));
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(2, topDocs.totalHits.value);
|
||||
String[] snippets = highlighter.highlight("body", query, topDocs);
|
||||
String[] snippets = uhBuilder.build().highlight("body", query, topDocs);
|
||||
assertEquals(2, snippets.length);
|
||||
assertEquals("This is a <b>test</b>.", snippets[0]);
|
||||
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
|
||||
|
||||
// disable MTQ; won't highlight
|
||||
highlighter.setHandleMultiTermQuery(false);
|
||||
snippets = highlighter.highlight("body", query, topDocs);
|
||||
snippets = uhBuilder.withHandleMultiTermQuery(false).build().highlight("body", query, topDocs);
|
||||
assertEquals(2, snippets.length);
|
||||
assertEquals("This is a test.", snippets[0]);
|
||||
assertEquals("Test a one sentence document.", snippets[1]);
|
||||
highlighter.setHandleMultiTermQuery(true); // reset
|
||||
|
||||
// wrong field
|
||||
BooleanQuery bq =
|
||||
|
@ -142,7 +140,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
.build();
|
||||
topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
|
||||
assertEquals(2, topDocs.totalHits.value);
|
||||
snippets = highlighter.highlight("body", bq, topDocs);
|
||||
snippets = uhBuilder.withHandleMultiTermQuery(true).build().highlight("body", bq, topDocs);
|
||||
assertEquals(2, snippets.length);
|
||||
assertEquals("This is a test.", snippets[0]);
|
||||
assertEquals("Test a one sentence document.", snippets[1]);
|
||||
|
@ -156,6 +154,11 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
searcher, indexAnalyzer, EnumSet.of(HighlightFlag.MULTI_TERM_QUERY), null);
|
||||
}
|
||||
|
||||
private UnifiedHighlighter randomUnifiedHighlighter(UnifiedHighlighter.Builder uhBuilder) {
|
||||
return TestUnifiedHighlighter.randomUnifiedHighlighter(
|
||||
uhBuilder, EnumSet.of(HighlightFlag.MULTI_TERM_QUERY), null);
|
||||
}
|
||||
|
||||
public void testOnePrefix() throws Exception {
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
|
||||
|
||||
|
@ -172,7 +175,8 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
// wrap in a BoostQuery to also show we see inside it
|
||||
Query query = new BoostQuery(new PrefixQuery(new Term("body", "te")), 2.0f);
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
|
@ -183,7 +187,6 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
|
||||
|
||||
// wrong field
|
||||
highlighter.setFieldMatcher(null); // default
|
||||
BooleanQuery bq =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
|
||||
|
@ -191,7 +194,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
.build();
|
||||
topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
|
||||
assertEquals(2, topDocs.totalHits.value);
|
||||
snippets = highlighter.highlight("body", bq, topDocs);
|
||||
snippets = uhBuilder.withFieldMatcher(null).build().highlight("body", bq, topDocs);
|
||||
assertEquals(2, snippets.length);
|
||||
assertEquals("This is a test.", snippets[0]);
|
||||
assertEquals("Test a one sentence document.", snippets[1]);
|
||||
|
@ -215,7 +218,8 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
Query query = new RegexpQuery(new Term("body", "te.*"));
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(2, topDocs.totalHits.value);
|
||||
|
@ -225,7 +229,6 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
|
||||
|
||||
// wrong field
|
||||
highlighter.setFieldMatcher(null); // default
|
||||
BooleanQuery bq =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
|
||||
|
@ -233,7 +236,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
.build();
|
||||
topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
|
||||
assertEquals(2, topDocs.totalHits.value);
|
||||
snippets = highlighter.highlight("body", bq, topDocs);
|
||||
snippets = uhBuilder.withFieldMatcher(null).build().highlight("body", bq, topDocs);
|
||||
assertEquals(2, snippets.length);
|
||||
assertEquals("This is a test.", snippets[0]);
|
||||
assertEquals("Test a one sentence document.", snippets[1]);
|
||||
|
@ -257,7 +260,8 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
Query query = new FuzzyQuery(new Term("body", "tets"), 1);
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(2, topDocs.totalHits.value);
|
||||
|
@ -285,7 +289,6 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
assertEquals("<b>Test</b> a one sentence document.", snippets[1]);
|
||||
|
||||
// wrong field
|
||||
highlighter.setFieldMatcher(null); // default
|
||||
BooleanQuery bq =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
|
||||
|
@ -293,7 +296,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
.build();
|
||||
topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
|
||||
assertEquals(2, topDocs.totalHits.value);
|
||||
snippets = highlighter.highlight("body", bq, topDocs);
|
||||
snippets = uhBuilder.withFieldMatcher(null).build().highlight("body", bq, topDocs);
|
||||
assertEquals(2, snippets.length);
|
||||
assertEquals("This is a test.", snippets[0]);
|
||||
assertEquals("Test a one sentence document.", snippets[1]);
|
||||
|
@ -317,7 +320,8 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
Query query = TermRangeQuery.newStringRange("body", "ta", "tf", true, true);
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(2, topDocs.totalHits.value);
|
||||
|
@ -393,7 +397,6 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
assertEquals("Test a one sentence document.", snippets[1]);
|
||||
|
||||
// wrong field
|
||||
highlighter.setFieldMatcher(null); // default
|
||||
bq =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
|
||||
|
@ -403,7 +406,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
.build();
|
||||
topDocs = searcher.search(bq, 10, Sort.INDEXORDER);
|
||||
assertEquals(2, topDocs.totalHits.value);
|
||||
snippets = highlighter.highlight("body", bq, topDocs);
|
||||
snippets = uhBuilder.withFieldMatcher(null).build().highlight("body", bq, topDocs);
|
||||
assertEquals(2, snippets.length);
|
||||
assertEquals("This is a test.", snippets[0]);
|
||||
assertEquals("Test a one sentence document.", snippets[1]);
|
||||
|
@ -726,10 +729,12 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
// Default formatter just bolds each hit:
|
||||
assertEquals("<b>Test</b> a <b>one</b> <b>sentence</b> document.", snippets[0]);
|
||||
|
||||
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
|
||||
|
||||
// Now use our own formatter, that also stuffs the
|
||||
// matching term's text into the result:
|
||||
highlighter =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
new UnifiedHighlighter(uhBuilder) {
|
||||
|
||||
@Override
|
||||
protected PassageFormatter getFormatter(String field) {
|
||||
|
@ -809,8 +814,10 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setMaxLength(25); // a little past first sentence
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
|
||||
.withMaxLength(25); // a little past first sentence
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
|
||||
BooleanQuery query =
|
||||
new BooleanQuery.Builder()
|
||||
|
@ -843,8 +850,10 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setMaxLength(32); // a little past first sentence
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
|
||||
.withMaxLength(32); // a little past first sentence
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
|
||||
BooleanQuery query =
|
||||
new BooleanQuery.Builder()
|
||||
|
@ -894,11 +903,12 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
};
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, buggyAnalyzer);
|
||||
highlighter.setHandleMultiTermQuery(true);
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, buggyAnalyzer).withHandleMultiTermQuery(true);
|
||||
if (rarely()) {
|
||||
highlighter.setMaxLength(25); // a little past first sentence
|
||||
uhBuilder = uhBuilder.withMaxLength(25); // a little past first sentence
|
||||
}
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
|
||||
boolean hasClauses = false;
|
||||
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
|
||||
|
@ -1046,7 +1056,7 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter highlighter = UnifiedHighlighter.builder(searcher, indexAnalyzer).build();
|
||||
|
||||
int docId = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
|
||||
|
||||
|
@ -1159,8 +1169,10 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
|||
iw.commit();
|
||||
try (IndexReader ir = iw.getReader()) {
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, analyzer);
|
||||
highlighter.setBreakIterator(WholeBreakIterator::new);
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, analyzer)
|
||||
.withBreakIterator(WholeBreakIterator::new);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
|
||||
// Test PrefixQuery
|
||||
Query query = new PrefixQuery(new Term(field, UnicodeUtil.newString(valuePoints, 0, 1)));
|
||||
|
|
|
@ -116,25 +116,21 @@ public class TestUnifiedHighlighterRanking extends LuceneTestCase {
|
|||
for (int n = 1; n < maxTopN; n++) {
|
||||
final FakePassageFormatter f1 = new FakePassageFormatter();
|
||||
UnifiedHighlighter p1 =
|
||||
new UnifiedHighlighter(is, indexAnalyzer) {
|
||||
@Override
|
||||
protected PassageFormatter getFormatter(String field) {
|
||||
assertEquals("body", field);
|
||||
return f1;
|
||||
}
|
||||
};
|
||||
p1.setMaxLength(Integer.MAX_VALUE - 1);
|
||||
creatUHObjectForCurrentTestSuite(
|
||||
is,
|
||||
indexAnalyzer,
|
||||
new UnifiedHighlighter.Builder(is, indexAnalyzer)
|
||||
.withFormatter(f1)
|
||||
.withMaxLength(Integer.MAX_VALUE - 1));
|
||||
|
||||
final FakePassageFormatter f2 = new FakePassageFormatter();
|
||||
UnifiedHighlighter p2 =
|
||||
new UnifiedHighlighter(is, indexAnalyzer) {
|
||||
@Override
|
||||
protected PassageFormatter getFormatter(String field) {
|
||||
assertEquals("body", field);
|
||||
return f2;
|
||||
}
|
||||
};
|
||||
p2.setMaxLength(Integer.MAX_VALUE - 1);
|
||||
creatUHObjectForCurrentTestSuite(
|
||||
is,
|
||||
indexAnalyzer,
|
||||
new UnifiedHighlighter.Builder(is, indexAnalyzer)
|
||||
.withFormatter(f2)
|
||||
.withMaxLength(Integer.MAX_VALUE - 1));
|
||||
|
||||
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
|
||||
queryBuilder.add(query, BooleanClause.Occur.MUST);
|
||||
|
@ -278,8 +274,9 @@ public class TestUnifiedHighlighterRanking extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter highlighter =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
new UnifiedHighlighter(uhBuilder) {
|
||||
@Override
|
||||
protected Set<HighlightFlag> getFlags(String field) {
|
||||
if (random().nextBoolean()) {
|
||||
|
@ -330,8 +327,10 @@ public class TestUnifiedHighlighterRanking extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
|
||||
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter highlighter =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
new UnifiedHighlighter(uhBuilder) {
|
||||
@Override
|
||||
protected Set<HighlightFlag> getFlags(String field) {
|
||||
if (random().nextBoolean()) {
|
||||
|
@ -363,4 +362,22 @@ public class TestUnifiedHighlighterRanking extends LuceneTestCase {
|
|||
ir.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private UnifiedHighlighter creatUHObjectForCurrentTestSuite(
|
||||
IndexSearcher searcher, Analyzer indexAnalyzer, UnifiedHighlighter.Builder uhBuilder) {
|
||||
UnifiedHighlighter.Builder builder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
public UnifiedHighlighter build() {
|
||||
return new UnifiedHighlighter(uhBuilder) {
|
||||
@Override
|
||||
protected PassageFormatter getFormatter(String field) {
|
||||
assertEquals("body", field);
|
||||
return super.getFormatter(field);
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
return builder.build();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -46,7 +46,8 @@ public class TestUnifiedHighlighterReanalysis extends LuceneTestCase {
|
|||
.add(new TermQuery(new Term("title", "test")), BooleanClause.Occur.SHOULD)
|
||||
.build();
|
||||
|
||||
UnifiedHighlighter highlighter = new UnifiedHighlighter(null, indexAnalyzer);
|
||||
UnifiedHighlighter highlighter =
|
||||
UnifiedHighlighter.builderWithoutSearcher(indexAnalyzer).build();
|
||||
String snippet = highlighter.highlightWithoutSearcher("body", query, text, 1).toString();
|
||||
|
||||
assertEquals("Just a test <b>highlighting</b> without a searcher. ", snippet);
|
||||
|
@ -67,7 +68,7 @@ public class TestUnifiedHighlighterReanalysis extends LuceneTestCase {
|
|||
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
|
||||
IndexReader indexReader = indexWriter.getReader()) {
|
||||
IndexSearcher searcher = newSearcher(indexReader);
|
||||
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter highlighter = UnifiedHighlighter.builder(searcher, indexAnalyzer).build();
|
||||
highlighter.highlightWithoutSearcher("body", query, text, 1); // should throw
|
||||
}
|
||||
}
|
||||
|
|
|
@ -73,6 +73,7 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
|
|||
RandomIndexWriter indexWriter;
|
||||
IndexSearcher searcher;
|
||||
UnifiedHighlighter highlighter;
|
||||
UnifiedHighlighter.Builder uhBuilder;
|
||||
IndexReader indexReader;
|
||||
|
||||
// Is it okay if a match (identified by offset pair) appears multiple times in the passage?
|
||||
|
@ -113,41 +114,42 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
|
|||
private void initReaderSearcherHighlighter() throws IOException {
|
||||
indexReader = indexWriter.getReader();
|
||||
searcher = newSearcher(indexReader);
|
||||
uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
|
||||
highlighter =
|
||||
TestUnifiedHighlighter.randomUnifiedHighlighter(
|
||||
searcher,
|
||||
indexAnalyzer,
|
||||
EnumSet.of(HighlightFlag.PHRASES, HighlightFlag.MULTI_TERM_QUERY),
|
||||
true);
|
||||
uhBuilder, EnumSet.of(HighlightFlag.PHRASES, HighlightFlag.MULTI_TERM_QUERY), true);
|
||||
// intercept the formatter in order to check constraints on the passage.
|
||||
final PassageFormatter defaultFormatter = highlighter.getFormatter(null);
|
||||
highlighter.setFormatter(
|
||||
new PassageFormatter() {
|
||||
@Override
|
||||
public Object format(Passage[] passages, String content) {
|
||||
boolean thisDupMatchAllowed = dupMatchAllowed.getAndSet(true);
|
||||
for (Passage passage : passages) {
|
||||
String prevPair = "";
|
||||
for (int i = 0; i < passage.getNumMatches(); i++) {
|
||||
// pad each to make comparable
|
||||
String pair =
|
||||
String.format(
|
||||
Locale.ROOT,
|
||||
"%03d-%03d",
|
||||
passage.getMatchStarts()[i],
|
||||
passage.getMatchEnds()[i]);
|
||||
int cmp = prevPair.compareTo(pair);
|
||||
if (cmp == 0) {
|
||||
assertTrue("dup match in passage at offset " + pair, thisDupMatchAllowed);
|
||||
} else if (cmp > 0) {
|
||||
fail("bad match order in passage at offset " + pair);
|
||||
}
|
||||
prevPair = pair;
|
||||
}
|
||||
}
|
||||
return defaultFormatter.format(passages, content);
|
||||
}
|
||||
});
|
||||
highlighter =
|
||||
uhBuilder
|
||||
.withFormatter(
|
||||
new PassageFormatter() {
|
||||
@Override
|
||||
public Object format(Passage[] passages, String content) {
|
||||
boolean thisDupMatchAllowed = dupMatchAllowed.getAndSet(true);
|
||||
for (Passage passage : passages) {
|
||||
String prevPair = "";
|
||||
for (int i = 0; i < passage.getNumMatches(); i++) {
|
||||
// pad each to make comparable
|
||||
String pair =
|
||||
String.format(
|
||||
Locale.ROOT,
|
||||
"%03d-%03d",
|
||||
passage.getMatchStarts()[i],
|
||||
passage.getMatchEnds()[i]);
|
||||
int cmp = prevPair.compareTo(pair);
|
||||
if (cmp == 0) {
|
||||
assertTrue("dup match in passage at offset " + pair, thisDupMatchAllowed);
|
||||
} else if (cmp > 0) {
|
||||
fail("bad match order in passage at offset " + pair);
|
||||
}
|
||||
prevPair = pair;
|
||||
}
|
||||
}
|
||||
return defaultFormatter.format(passages, content);
|
||||
}
|
||||
})
|
||||
.build();
|
||||
}
|
||||
|
||||
private PhraseQuery newPhraseQuery(String field, String phrase) {
|
||||
|
@ -315,8 +317,10 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// do again, this time with MTQ disabled. We should only find "alpha bravo".
|
||||
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setHandleMultiTermQuery(false); // disable but leave phrase processing enabled
|
||||
highlighter =
|
||||
UnifiedHighlighter.builder(searcher, indexAnalyzer)
|
||||
.withHandleMultiTermQuery(false) // disable but leave phrase processing enabled
|
||||
.build();
|
||||
|
||||
topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
snippets = highlighter.highlight("body", query, topDocs);
|
||||
|
@ -361,8 +365,10 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// do again, this time with MTQ disabled. We should only find "alpha bravo".
|
||||
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setHandleMultiTermQuery(false); // disable but leave phrase processing enabled
|
||||
highlighter =
|
||||
UnifiedHighlighter.builder(searcher, indexAnalyzer)
|
||||
.withHandleMultiTermQuery(false) // disable but leave phrase processing enabled
|
||||
.build();
|
||||
|
||||
topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
snippets = highlighter.highlight("body", query, topDocs);
|
||||
|
@ -408,8 +414,10 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
|
|||
}
|
||||
|
||||
// do again, this time with MTQ disabled.
|
||||
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setHandleMultiTermQuery(false); // disable but leave phrase processing enabled
|
||||
highlighter =
|
||||
UnifiedHighlighter.builder(searcher, indexAnalyzer)
|
||||
.withHandleMultiTermQuery(false) // disable but leave phrase processing enabled
|
||||
.build();
|
||||
|
||||
topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
snippets = highlighter.highlight("body", query, topDocs);
|
||||
|
@ -480,7 +488,7 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
|
|||
indexWriter.addDocument(
|
||||
newDoc("alpha bravo charlie - gap alpha bravo")); // hyphen is at char 21
|
||||
initReaderSearcherHighlighter();
|
||||
highlighter.setMaxLength(21);
|
||||
highlighter = uhBuilder.withMaxLength(21).build();
|
||||
|
||||
BooleanQuery query =
|
||||
new BooleanQuery.Builder()
|
||||
|
@ -527,8 +535,10 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testMatchNoDocsQuery() throws IOException {
|
||||
highlighter = new UnifiedHighlighter(null, indexAnalyzer);
|
||||
highlighter.setHighlightPhrasesStrictly(true);
|
||||
highlighter =
|
||||
UnifiedHighlighter.builderWithoutSearcher(indexAnalyzer)
|
||||
.withHighlightPhrasesStrictly(true)
|
||||
.build();
|
||||
String content = "whatever";
|
||||
Object o = highlighter.highlightWithoutSearcher("body", new MatchNoDocsQuery(), content, 1);
|
||||
assertEquals(content, o);
|
||||
|
@ -540,24 +550,31 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
|
|||
"There is no accord and satisfaction with this - Consideration of the accord is arbitrary."));
|
||||
initReaderSearcherHighlighter();
|
||||
|
||||
highlighter =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer).withHighlightPhrasesStrictly(true);
|
||||
UnifiedHighlighter.Builder builder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected Set<HighlightFlag> getFlags(String field) {
|
||||
final Set<HighlightFlag> flags = super.getFlags(field);
|
||||
flags.remove(HighlightFlag.WEIGHT_MATCHES); // unsupported
|
||||
return flags;
|
||||
}
|
||||
public UnifiedHighlighter build() {
|
||||
return new UnifiedHighlighter(uhBuilder) {
|
||||
@Override
|
||||
protected Set<HighlightFlag> getFlags(String field) {
|
||||
final Set<HighlightFlag> flags = super.getFlags(field);
|
||||
flags.remove(HighlightFlag.WEIGHT_MATCHES); // unsupported
|
||||
return flags;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Collection<Query> preSpanQueryRewrite(Query query) {
|
||||
if (query instanceof MyQuery) {
|
||||
return Collections.singletonList(((MyQuery) query).wrapped);
|
||||
}
|
||||
return null;
|
||||
@Override
|
||||
protected Collection<Query> preSpanQueryRewrite(Query query) {
|
||||
if (query instanceof MyQuery) {
|
||||
return Collections.singletonList(((MyQuery) query).wrapped);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
highlighter.setHighlightPhrasesStrictly(true);
|
||||
highlighter = builder.build();
|
||||
|
||||
BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
|
||||
Query phraseQuery =
|
||||
|
|
|
@ -90,6 +90,11 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
|||
searcher, indexAnalyzer, EnumSet.noneOf(HighlightFlag.class), null);
|
||||
}
|
||||
|
||||
private UnifiedHighlighter randomUnifiedHighlighter(UnifiedHighlighter.Builder uhBuilder) {
|
||||
return TestUnifiedHighlighter.randomUnifiedHighlighter(
|
||||
uhBuilder, EnumSet.noneOf(HighlightFlag.class), null);
|
||||
}
|
||||
|
||||
//
|
||||
// Tests below were ported from the PostingsHighlighter. Possibly augmented. Far below are newer
|
||||
// tests.
|
||||
|
@ -177,8 +182,9 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
|||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits.value);
|
||||
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setMaxLength(maxLength);
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer).withMaxLength(maxLength);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
String[] snippets = highlighter.highlight("body", query, topDocs);
|
||||
|
||||
ir.close();
|
||||
|
@ -256,8 +262,10 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setMaxLength(value.length() * 2 + 1);
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
|
||||
.withMaxLength(value.length() * 2 + 1);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
Query query = new IntervalQuery("body", Intervals.term("field"));
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits.value);
|
||||
|
@ -354,8 +362,9 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
|||
Query query = new IntervalQuery("body", Intervals.phrase("buddhist", "origins"));
|
||||
TopDocs topDocs = searcher.search(query, 10);
|
||||
assertEquals(1, topDocs.totalHits.value);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setHighlightPhrasesStrictly(false);
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer).withHighlightPhrasesStrictly(false);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
|
||||
assertEquals(1, snippets.length);
|
||||
// highlighter.getFlags("body").containsAll(EnumSet.of(HighlightFlag.WEIGHT_MATCHES,
|
||||
|
@ -383,8 +392,9 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
|||
Query query = new IntervalQuery("body", Intervals.phrase("curious", "george"));
|
||||
TopDocs topDocs = searcher.search(query, 10);
|
||||
assertEquals(1, topDocs.totalHits.value);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setHighlightPhrasesStrictly(false);
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer).withHighlightPhrasesStrictly(false);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
|
||||
assertEquals(1, snippets.length);
|
||||
assertFalse(snippets[0].contains("<b>Curious</b>Curious"));
|
||||
|
@ -422,8 +432,10 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
|||
Intervals.term("massachusetts")));
|
||||
TopDocs topDocs = searcher.search(query, 10);
|
||||
assertEquals(1, topDocs.totalHits.value);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setMaxLength(Integer.MAX_VALUE - 1);
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
|
||||
.withMaxLength(Integer.MAX_VALUE - 1);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
|
||||
assertEquals(1, snippets.length);
|
||||
assertTrue(snippets[0].contains("<b>Square</b>"));
|
||||
|
@ -478,8 +490,10 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
|||
"body", Intervals.notContaining(Intervals.term("terms"), Intervals.term("both")));
|
||||
TopDocs topDocs = searcher.search(query, 10);
|
||||
assertEquals(1, topDocs.totalHits.value);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setMaxLength(Integer.MAX_VALUE - 1);
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
|
||||
.withMaxLength(Integer.MAX_VALUE - 1);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
String[] snippets = highlighter.highlight("body", query, topDocs, 2);
|
||||
assertEquals(1, snippets.length);
|
||||
assertFalse(snippets[0].contains("<b>both</b>"));
|
||||
|
@ -502,14 +516,11 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected BreakIterator getBreakIterator(String field) {
|
||||
return new WholeBreakIterator();
|
||||
}
|
||||
};
|
||||
highlighter.setMaxLength(10000);
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
|
||||
.withMaxLength(1000)
|
||||
.withBreakIterator(WholeBreakIterator::new);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
Query query = new IntervalQuery("body", Intervals.term("test"));
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits.value);
|
||||
|
@ -571,9 +582,9 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
|
||||
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter highlighter =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
new UnifiedHighlighter(uhBuilder) {
|
||||
@Override
|
||||
protected List<CharSequence[]> loadFieldValues(
|
||||
String[] fields, DocIdSetIterator docIter, int cacheCharsThreshold)
|
||||
|
@ -681,8 +692,10 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setMaxNoHighlightPassages(0); // don't want any default summary
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
|
||||
.withMaxNoHighlightPassages(0); // don't want any default summary
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
Query query = new IntervalQuery("body", Intervals.term("highlighting"));
|
||||
int[] docIDs = new int[] {0};
|
||||
String[] snippets =
|
||||
|
@ -713,12 +726,9 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
|||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected BreakIterator getBreakIterator(String field) {
|
||||
return new WholeBreakIterator();
|
||||
}
|
||||
};
|
||||
UnifiedHighlighter.builder(searcher, indexAnalyzer)
|
||||
.withBreakIterator(WholeBreakIterator::new)
|
||||
.build();
|
||||
Query query = new IntervalQuery("body", Intervals.term("highlighting"));
|
||||
int[] docIDs = new int[] {0};
|
||||
String[] snippets =
|
||||
|
@ -846,9 +856,11 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
|
||||
highlighter.setCacheFieldValCharsThreshold(
|
||||
random().nextInt(10) * 10); // 0 thru 90 intervals of 10
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(searcher, indexAnalyzer)
|
||||
.withCacheFieldValCharsThreshold(
|
||||
random().nextInt(10) * 10); // 0 thru 90 intervals of 10
|
||||
UnifiedHighlighter highlighter = randomUnifiedHighlighter(uhBuilder);
|
||||
Query query = new IntervalQuery("body", Intervals.term("answer"));
|
||||
TopDocs hits = searcher.search(query, numDocs);
|
||||
assertEquals(numDocs, hits.totalHits.value);
|
||||
|
@ -883,12 +895,9 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
|||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected PassageFormatter getFormatter(String field) {
|
||||
return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
|
||||
}
|
||||
};
|
||||
UnifiedHighlighter.builder(searcher, indexAnalyzer)
|
||||
.withFormatter(new DefaultPassageFormatter("<b>", "</b>", "... ", true))
|
||||
.build();
|
||||
|
||||
Query query = new IntervalQuery("body", Intervals.term("highlighting"));
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
|
@ -917,23 +926,21 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
|||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected PassageFormatter getFormatter(String field) {
|
||||
return new PassageFormatter() {
|
||||
PassageFormatter defaultFormatter = new DefaultPassageFormatter();
|
||||
UnifiedHighlighter.builder(searcher, indexAnalyzer)
|
||||
.withFormatter(
|
||||
new PassageFormatter() {
|
||||
PassageFormatter defaultFormatter = new DefaultPassageFormatter();
|
||||
|
||||
@Override
|
||||
public String[] format(Passage[] passages, String content) {
|
||||
// Just turns the String snippet into a length 2
|
||||
// array of String
|
||||
return new String[] {
|
||||
"blah blah", defaultFormatter.format(passages, content).toString()
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
@Override
|
||||
public String[] format(Passage[] passages, String content) {
|
||||
// Just turns the String snippet into a length 2
|
||||
// array of String
|
||||
return new String[] {
|
||||
"blah blah", defaultFormatter.format(passages, content).toString()
|
||||
};
|
||||
}
|
||||
})
|
||||
.build();
|
||||
|
||||
Query query = new IntervalQuery("body", Intervals.term("highlighting"));
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
|
@ -980,7 +987,7 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
|||
public void testMatchesSlopBug() throws IOException {
|
||||
IndexReader ir = indexSomeFields();
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter highlighter = UnifiedHighlighter.builder(searcher, indexAnalyzer).build();
|
||||
Query query =
|
||||
new IntervalQuery(
|
||||
"title",
|
||||
|
|
|
@ -107,7 +107,7 @@ public class TestUnifiedHighlighterTermVec extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter highlighter = UnifiedHighlighter.builder(searcher, indexAnalyzer).build();
|
||||
BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
|
||||
for (String field : fields) {
|
||||
queryBuilder.add(new TermQuery(new Term(field, "test")), BooleanClause.Occur.MUST);
|
||||
|
@ -192,8 +192,9 @@ public class TestUnifiedHighlighterTermVec extends LuceneTestCase {
|
|||
iw.close();
|
||||
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter.Builder uhBuilder = new UnifiedHighlighter.Builder(searcher, indexAnalyzer);
|
||||
UnifiedHighlighter highlighter =
|
||||
new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
new UnifiedHighlighter(uhBuilder) {
|
||||
@Override
|
||||
protected Set<HighlightFlag> getFlags(String field) {
|
||||
return Collections.emptySet(); // no WEIGHT_MATCHES
|
||||
|
|
|
@ -97,8 +97,10 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
|||
@Test
|
||||
public void testUnifiedHighlighterExtensibility() {
|
||||
final int maxLength = 1000;
|
||||
UnifiedHighlighter.Builder uhBuilder =
|
||||
new UnifiedHighlighter.Builder(null, new MockAnalyzer(random()));
|
||||
UnifiedHighlighter uh =
|
||||
new UnifiedHighlighter(null, new MockAnalyzer(random())) {
|
||||
new UnifiedHighlighter(uhBuilder) {
|
||||
|
||||
@Override
|
||||
protected Map<String, Object[]> highlightFieldsAsObjects(
|
||||
|
@ -252,7 +254,7 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
|||
* Tests maintaining extensibility/visibility of {@link
|
||||
* org.apache.lucene.search.uhighlight.FieldHighlighter} out of package.
|
||||
*/
|
||||
private static class CustomFieldHighlighter extends FieldHighlighter {
|
||||
protected static class CustomFieldHighlighter extends FieldHighlighter {
|
||||
CustomFieldHighlighter(
|
||||
String field,
|
||||
FieldOffsetStrategy fieldOffsetStrategy,
|
||||
|
|
Loading…
Reference in New Issue