mirror of https://github.com/apache/lucene.git
LUCENE-7575: Add UnifiedHighlighter field matcher predicate (AKA requireFieldMatch=false)
(cherry picked from commit 2e948fe
)
This commit is contained in:
parent
cdce621087
commit
4e7a7dbf9a
|
@ -3,6 +3,57 @@ Lucene Change Log
|
|||
For more information on past and future Lucene versions, please see:
|
||||
http://s.apache.org/luceneversions
|
||||
|
||||
======================= Lucene 7.0.0 =======================
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-2605: Classic QueryParser no longer splits on whitespace by default.
|
||||
Use setSplitOnWhitespace(true) to get the old behavior. (Steve Rowe)
|
||||
|
||||
* LUCENE-7369: Similarity.coord and BooleanQuery.disableCoord are removed.
|
||||
(Adrien Grand)
|
||||
|
||||
* LUCENE-7368: Removed query normalization. (Adrien Grand)
|
||||
|
||||
* LUCENE-7355: AnalyzingQueryParser has been removed as its functionality has
|
||||
been folded into the classic QueryParser. (Adrien Grand)
|
||||
|
||||
* LUCENE-7407: Doc values APIs have been switched from random access
|
||||
to iterators, enabling future codec compression improvements. (Mike
|
||||
McCandless)
|
||||
|
||||
* LUCENE-7475: Norms now support sparsity, allowing to pay for what is
|
||||
actually used. (Adrien Grand)
|
||||
|
||||
* LUCENE-7494: Points now have a per-field API, like doc values. (Adrien Grand)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-7489: Better storage of sparse doc-values fields with the default
|
||||
codec. (Adrien Grand)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-7416: BooleanQuery optimizes queries that have queries that occur both
|
||||
in the sets of SHOULD and FILTER clauses, or both in MUST/FILTER and MUST_NOT
|
||||
clauses. (Spyros Kapnissis via Adrien Grand, Uwe Schindler)
|
||||
|
||||
* LUCENE-7506: FastTaxonomyFacetCounts should use CPU in proportion to
|
||||
the size of the intersected set of hits from the query and documents
|
||||
that have a facet value, so sparse faceting works as expected
|
||||
(Adrien Grand via Mike McCandless)
|
||||
|
||||
* LUCENE-7519: Add optimized APIs to compute browse-only top level
|
||||
facets (Mike McCandless)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize)
|
||||
|
||||
* LUCENE-7360: Remove Explanation.toHtml() (Alan Woodward)
|
||||
|
||||
======================= Lucene 6.4.0 =======================
|
||||
|
||||
API Changes
|
||||
|
@ -73,6 +124,11 @@ Improvements
|
|||
* LUCENE-7537: Index time sorting now supports multi-valued sorts
|
||||
using selectors (MIN, MAX, etc.) (Jim Ferenczi via Mike McCandless)
|
||||
|
||||
* LUCENE-7575: UnifiedHighlighter can now highlight fields with queries that don't
|
||||
necessarily refer to that field (AKA requireFieldMatch==false). Disabled by default.
|
||||
See UH get/setFieldMatcher. (Jim Ferenczi via David Smiley)
|
||||
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-7568: Optimize merging when index sorting is used but the
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Collection;
|
|||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.FilteringTokenFilter;
|
||||
|
@ -49,7 +50,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
|
|||
private final LeafReader leafReader;
|
||||
private final CharacterRunAutomaton preMemIndexFilterAutomaton;
|
||||
|
||||
public MemoryIndexOffsetStrategy(String field, BytesRef[] extractedTerms, PhraseHelper phraseHelper,
|
||||
public MemoryIndexOffsetStrategy(String field, Predicate<String> fieldMatcher, BytesRef[] extractedTerms, PhraseHelper phraseHelper,
|
||||
CharacterRunAutomaton[] automata, Analyzer analyzer,
|
||||
Function<Query, Collection<Query>> multiTermQueryRewrite) {
|
||||
super(field, extractedTerms, phraseHelper, automata, analyzer);
|
||||
|
@ -57,13 +58,14 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
|
|||
memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
|
||||
leafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader(); // appears to be re-usable
|
||||
// preFilter for MemoryIndex
|
||||
preMemIndexFilterAutomaton = buildCombinedAutomaton(field, terms, this.automata, phraseHelper, multiTermQueryRewrite);
|
||||
preMemIndexFilterAutomaton = buildCombinedAutomaton(fieldMatcher, terms, this.automata, phraseHelper, multiTermQueryRewrite);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build one {@link CharacterRunAutomaton} matching any term the query might match.
|
||||
*/
|
||||
private static CharacterRunAutomaton buildCombinedAutomaton(String field, BytesRef[] terms,
|
||||
private static CharacterRunAutomaton buildCombinedAutomaton(Predicate<String> fieldMatcher,
|
||||
BytesRef[] terms,
|
||||
CharacterRunAutomaton[] automata,
|
||||
PhraseHelper strictPhrases,
|
||||
Function<Query, Collection<Query>> multiTermQueryRewrite) {
|
||||
|
@ -74,7 +76,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
|
|||
Collections.addAll(allAutomata, automata);
|
||||
for (SpanQuery spanQuery : strictPhrases.getSpanQueries()) {
|
||||
Collections.addAll(allAutomata,
|
||||
MultiTermHighlighting.extractAutomata(spanQuery, field, true, multiTermQueryRewrite));//true==lookInSpan
|
||||
MultiTermHighlighting.extractAutomata(spanQuery, fieldMatcher, true, multiTermQueryRewrite));//true==lookInSpan
|
||||
}
|
||||
|
||||
if (allAutomata.size() == 1) {
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Collection;
|
|||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.AutomatonQuery;
|
||||
|
@ -56,50 +57,52 @@ class MultiTermHighlighting {
|
|||
}
|
||||
|
||||
/**
|
||||
* Extracts all MultiTermQueries for {@code field}, and returns equivalent
|
||||
* automata that will match terms.
|
||||
* Extracts MultiTermQueries that match the provided field predicate.
|
||||
* Returns equivalent automata that will match terms.
|
||||
*/
|
||||
public static CharacterRunAutomaton[] extractAutomata(Query query, String field, boolean lookInSpan,
|
||||
public static CharacterRunAutomaton[] extractAutomata(Query query,
|
||||
Predicate<String> fieldMatcher,
|
||||
boolean lookInSpan,
|
||||
Function<Query, Collection<Query>> preRewriteFunc) {
|
||||
List<CharacterRunAutomaton> list = new ArrayList<>();
|
||||
Collection<Query> customSubQueries = preRewriteFunc.apply(query);
|
||||
if (customSubQueries != null) {
|
||||
for (Query sub : customSubQueries) {
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
|
||||
}
|
||||
} else if (query instanceof BooleanQuery) {
|
||||
for (BooleanClause clause : (BooleanQuery) query) {
|
||||
if (!clause.isProhibited()) {
|
||||
list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field, lookInSpan, preRewriteFunc)));
|
||||
list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
|
||||
}
|
||||
}
|
||||
} else if (query instanceof ConstantScoreQuery) {
|
||||
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field, lookInSpan,
|
||||
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan,
|
||||
preRewriteFunc)));
|
||||
} else if (query instanceof DisjunctionMaxQuery) {
|
||||
for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
|
||||
}
|
||||
} else if (lookInSpan && query instanceof SpanOrQuery) {
|
||||
for (Query sub : ((SpanOrQuery) query).getClauses()) {
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
|
||||
}
|
||||
} else if (lookInSpan && query instanceof SpanNearQuery) {
|
||||
for (Query sub : ((SpanNearQuery) query).getClauses()) {
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
|
||||
}
|
||||
} else if (lookInSpan && query instanceof SpanNotQuery) {
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field, lookInSpan,
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), fieldMatcher, lookInSpan,
|
||||
preRewriteFunc)));
|
||||
} else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field, lookInSpan,
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), fieldMatcher, lookInSpan,
|
||||
preRewriteFunc)));
|
||||
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field,
|
||||
lookInSpan, preRewriteFunc)));
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(),
|
||||
fieldMatcher, lookInSpan, preRewriteFunc)));
|
||||
} else if (query instanceof AutomatonQuery) {
|
||||
final AutomatonQuery aq = (AutomatonQuery) query;
|
||||
if (aq.getField().equals(field)) {
|
||||
if (fieldMatcher.test(aq.getField())) {
|
||||
list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
|
||||
@Override
|
||||
public String toString() {
|
||||
|
@ -110,7 +113,7 @@ class MultiTermHighlighting {
|
|||
} else if (query instanceof PrefixQuery) {
|
||||
final PrefixQuery pq = (PrefixQuery) query;
|
||||
Term prefix = pq.getPrefix();
|
||||
if (prefix.field().equals(field)) {
|
||||
if (fieldMatcher.test(prefix.field())) {
|
||||
list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()),
|
||||
Automata.makeAnyString())) {
|
||||
@Override
|
||||
|
@ -121,7 +124,7 @@ class MultiTermHighlighting {
|
|||
}
|
||||
} else if (query instanceof FuzzyQuery) {
|
||||
final FuzzyQuery fq = (FuzzyQuery) query;
|
||||
if (fq.getField().equals(field)) {
|
||||
if (fieldMatcher.test(fq.getField())) {
|
||||
String utf16 = fq.getTerm().text();
|
||||
int termText[] = new int[utf16.codePointCount(0, utf16.length())];
|
||||
for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
|
||||
|
@ -142,7 +145,7 @@ class MultiTermHighlighting {
|
|||
}
|
||||
} else if (query instanceof TermRangeQuery) {
|
||||
final TermRangeQuery tq = (TermRangeQuery) query;
|
||||
if (tq.getField().equals(field)) {
|
||||
if (fieldMatcher.test(tq.getField())) {
|
||||
final CharsRef lowerBound;
|
||||
if (tq.getLowerTerm() == null) {
|
||||
lowerBound = null;
|
||||
|
|
|
@ -16,17 +16,50 @@
|
|||
*/
|
||||
package org.apache.lucene.search.uhighlight;
|
||||
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.*;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.search.highlight.WeightedSpanTerm;
|
||||
import org.apache.lucene.search.highlight.WeightedSpanTermExtractor;
|
||||
import org.apache.lucene.search.spans.*;
|
||||
import org.apache.lucene.search.spans.SpanCollector;
|
||||
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanWeight;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* Helps the {@link FieldOffsetStrategy} with strict position highlighting (e.g. highlight phrases correctly).
|
||||
* This is a stateful class holding information about the query, but it can (and is) re-used across highlighting
|
||||
|
@ -40,7 +73,7 @@ import java.util.function.Function;
|
|||
public class PhraseHelper {
|
||||
|
||||
public static final PhraseHelper NONE = new PhraseHelper(new MatchAllDocsQuery(), "_ignored_",
|
||||
spanQuery -> null, query -> null, true);
|
||||
(s) -> false, spanQuery -> null, query -> null, true);
|
||||
|
||||
//TODO it seems this ought to be a general thing on Spans?
|
||||
private static final Comparator<? super Spans> SPANS_COMPARATOR = (o1, o2) -> {
|
||||
|
@ -59,10 +92,11 @@ public class PhraseHelper {
|
|||
}
|
||||
};
|
||||
|
||||
private final String fieldName; // if non-null, only look at queries/terms for this field
|
||||
private final String fieldName;
|
||||
private final Set<Term> positionInsensitiveTerms; // (TermQuery terms)
|
||||
private final Set<SpanQuery> spanQueries;
|
||||
private final boolean willRewrite;
|
||||
private final Predicate<String> fieldMatcher;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
|
@ -73,14 +107,15 @@ public class PhraseHelper {
|
|||
* to be set before the {@link WeightedSpanTermExtractor}'s extraction is invoked.
|
||||
* {@code ignoreQueriesNeedingRewrite} effectively ignores any query clause that needs to be "rewritten", which is
|
||||
* usually limited to just a {@link SpanMultiTermQueryWrapper} but could be other custom ones.
|
||||
* {@code fieldMatcher} The field name predicate to use for extracting the query part that must be highlighted.
|
||||
*/
|
||||
public PhraseHelper(Query query, String field, Function<SpanQuery, Boolean> rewriteQueryPred,
|
||||
public PhraseHelper(Query query, String field, Predicate<String> fieldMatcher, Function<SpanQuery, Boolean> rewriteQueryPred,
|
||||
Function<Query, Collection<Query>> preExtractRewriteFunction,
|
||||
boolean ignoreQueriesNeedingRewrite) {
|
||||
this.fieldName = field; // if null then don't require field match
|
||||
this.fieldName = field;
|
||||
this.fieldMatcher = fieldMatcher;
|
||||
// filter terms to those we want
|
||||
positionInsensitiveTerms = field != null ? new FieldFilteringTermHashSet(field) : new HashSet<>();
|
||||
// requireFieldMatch optional
|
||||
positionInsensitiveTerms = new FieldFilteringTermSet();
|
||||
spanQueries = new HashSet<>();
|
||||
|
||||
// TODO Have toSpanQuery(query) Function as an extension point for those with custom Query impls
|
||||
|
@ -131,11 +166,11 @@ public class PhraseHelper {
|
|||
@Override
|
||||
protected void extractWeightedSpanTerms(Map<String, WeightedSpanTerm> terms, SpanQuery spanQuery,
|
||||
float boost) throws IOException {
|
||||
if (field != null) {
|
||||
// if this span query isn't for this field, skip it.
|
||||
Set<String> fieldNameSet = new HashSet<>();//TODO reuse. note: almost always size 1
|
||||
collectSpanQueryFields(spanQuery, fieldNameSet);
|
||||
if (!fieldNameSet.contains(field)) {
|
||||
// if this span query isn't for this field, skip it.
|
||||
Set<String> fieldNameSet = new HashSet<>();//TODO reuse. note: almost always size 1
|
||||
collectSpanQueryFields(spanQuery, fieldNameSet);
|
||||
for (String spanField : fieldNameSet) {
|
||||
if (!fieldMatcher.test(spanField)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -190,10 +225,11 @@ public class PhraseHelper {
|
|||
if (spanQueries.isEmpty()) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
final LeafReader filteredReader = new SingleFieldFilterLeafReader(leafReader, fieldName);
|
||||
// for each SpanQuery, collect the member spans into a map.
|
||||
Map<BytesRef, Spans> result = new HashMap<>();
|
||||
for (SpanQuery spanQuery : spanQueries) {
|
||||
getTermToSpans(spanQuery, leafReader.getContext(), doc, result);
|
||||
getTermToSpans(spanQuery, filteredReader.getContext(), doc, result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -203,15 +239,14 @@ public class PhraseHelper {
|
|||
int doc, Map<BytesRef, Spans> result)
|
||||
throws IOException {
|
||||
// note: in WSTE there was some field specific looping that seemed pointless so that isn't here.
|
||||
final IndexSearcher searcher = new IndexSearcher(readerContext);
|
||||
final IndexSearcher searcher = new IndexSearcher(readerContext.reader());
|
||||
searcher.setQueryCache(null);
|
||||
if (willRewrite) {
|
||||
spanQuery = (SpanQuery) searcher.rewrite(spanQuery); // searcher.rewrite loops till done
|
||||
}
|
||||
|
||||
// Get the underlying query terms
|
||||
|
||||
TreeSet<Term> termSet = new TreeSet<>(); // sorted so we can loop over results in order shortly...
|
||||
TreeSet<Term> termSet = new FieldFilteringTermSet(); // sorted so we can loop over results in order shortly...
|
||||
searcher.createWeight(spanQuery, false).extractTerms(termSet);//needsScores==false
|
||||
|
||||
// Get Spans by running the query against the reader
|
||||
|
@ -240,9 +275,6 @@ public class PhraseHelper {
|
|||
for (final Term queryTerm : termSet) {
|
||||
// note: we expect that at least one query term will pass these filters. This is because the collected
|
||||
// spanQuery list were already filtered by these conditions.
|
||||
if (fieldName != null && fieldName.equals(queryTerm.field()) == false) {
|
||||
continue;
|
||||
}
|
||||
if (positionInsensitiveTerms.contains(queryTerm)) {
|
||||
continue;
|
||||
}
|
||||
|
@ -375,19 +407,17 @@ public class PhraseHelper {
|
|||
}
|
||||
|
||||
/**
|
||||
* Simple HashSet that filters out Terms not matching a desired field on {@code add()}.
|
||||
* Simple TreeSet that filters out Terms not matching the provided predicate on {@code add()}.
|
||||
*/
|
||||
private static class FieldFilteringTermHashSet extends HashSet<Term> {
|
||||
private final String field;
|
||||
|
||||
FieldFilteringTermHashSet(String field) {
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
private class FieldFilteringTermSet extends TreeSet<Term> {
|
||||
@Override
|
||||
public boolean add(Term term) {
|
||||
if (term.field().equals(field)) {
|
||||
return super.add(term);
|
||||
if (fieldMatcher.test(term.field())) {
|
||||
if (term.field().equals(fieldName)) {
|
||||
return super.add(term);
|
||||
} else {
|
||||
return super.add(new Term(fieldName, term.bytes()));
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
@ -499,6 +529,64 @@ public class PhraseHelper {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This reader will just delegate every call to a single field in the wrapped
|
||||
* LeafReader. This way we ensure that all queries going through this reader target the same field.
|
||||
*/
|
||||
static final class SingleFieldFilterLeafReader extends FilterLeafReader {
|
||||
final String fieldName;
|
||||
SingleFieldFilterLeafReader(LeafReader in, String fieldName) {
|
||||
super(in);
|
||||
this.fieldName = fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfos getFieldInfos() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
return new FilterFields(super.fields()) {
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
return super.terms(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return Collections.singletonList(fieldName).iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNumericDocValues(String field) throws IOException {
|
||||
return super.getNumericDocValues(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
|
||||
return super.getBinaryDocValues(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedDocValues getSortedDocValues(String field) throws IOException {
|
||||
return super.getSortedDocValues(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNormValues(String field) throws IOException {
|
||||
return super.getNormValues(fieldName);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A Spans based on a list of cached spans for one doc. It is pre-positioned to this doc.
|
||||
*/
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.Arrays;
|
|||
import java.util.Collection;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
@ -31,6 +32,7 @@ import java.util.Objects;
|
|||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
@ -58,7 +60,6 @@ import org.apache.lucene.search.Weight;
|
|||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
|
||||
/**
|
||||
|
@ -119,13 +120,13 @@ public class UnifiedHighlighter {
|
|||
|
||||
private boolean defaultPassageRelevancyOverSpeed = true; //For analysis, prefer MemoryIndexOffsetStrategy
|
||||
|
||||
// private boolean defaultRequireFieldMatch = true; TODO
|
||||
|
||||
private int maxLength = DEFAULT_MAX_LENGTH;
|
||||
|
||||
// BreakIterator is stateful so we use a Supplier factory method
|
||||
private Supplier<BreakIterator> defaultBreakIterator = () -> BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
|
||||
private Predicate<String> defaultFieldMatcher;
|
||||
|
||||
private PassageScorer defaultScorer = new PassageScorer();
|
||||
|
||||
private PassageFormatter defaultFormatter = new DefaultPassageFormatter();
|
||||
|
@ -140,8 +141,8 @@ public class UnifiedHighlighter {
|
|||
/**
|
||||
* Calls {@link Weight#extractTerms(Set)} on an empty index for the query.
|
||||
*/
|
||||
protected static SortedSet<Term> extractTerms(Query query) throws IOException {
|
||||
SortedSet<Term> queryTerms = new TreeSet<>();
|
||||
protected static Set<Term> extractTerms(Query query) throws IOException {
|
||||
Set<Term> queryTerms = new HashSet<>();
|
||||
EMPTY_INDEXSEARCHER.createNormalizedWeight(query, false).extractTerms(queryTerms);
|
||||
return queryTerms;
|
||||
}
|
||||
|
@ -197,6 +198,10 @@ public class UnifiedHighlighter {
|
|||
this.cacheFieldValCharsThreshold = cacheFieldValCharsThreshold;
|
||||
}
|
||||
|
||||
public void setFieldMatcher(Predicate<String> predicate) {
|
||||
this.defaultFieldMatcher = predicate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether {@link MultiTermQuery} derivatives will be highlighted. By default it's enabled. MTQ
|
||||
* highlighting can be expensive, particularly when using offsets in postings.
|
||||
|
@ -220,6 +225,18 @@ public class UnifiedHighlighter {
|
|||
return defaultPassageRelevancyOverSpeed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the predicate to use for extracting the query part that must be highlighted.
|
||||
* By default only queries that target the current field are kept. (AKA requireFieldMatch)
|
||||
*/
|
||||
protected Predicate<String> getFieldMatcher(String field) {
|
||||
if (defaultFieldMatcher != null) {
|
||||
return defaultFieldMatcher;
|
||||
} else {
|
||||
// requireFieldMatch = true
|
||||
return (qf) -> field.equals(qf);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The maximum content size to process. Content will be truncated to this size before highlighting. Typically
|
||||
|
@ -548,7 +565,7 @@ public class UnifiedHighlighter {
|
|||
copyAndSortFieldsWithMaxPassages(fieldsIn, maxPassagesIn, fields, maxPassages); // latter 2 are "out" params
|
||||
|
||||
// Init field highlighters (where most of the highlight logic lives, and on a per field basis)
|
||||
SortedSet<Term> queryTerms = extractTerms(query);
|
||||
Set<Term> queryTerms = extractTerms(query);
|
||||
FieldHighlighter[] fieldHighlighters = new FieldHighlighter[fields.length];
|
||||
int numTermVectors = 0;
|
||||
int numPostings = 0;
|
||||
|
@ -718,13 +735,13 @@ public class UnifiedHighlighter {
|
|||
getClass().getSimpleName() + " without an IndexSearcher.");
|
||||
}
|
||||
Objects.requireNonNull(content, "content is required");
|
||||
SortedSet<Term> queryTerms = extractTerms(query);
|
||||
Set<Term> queryTerms = extractTerms(query);
|
||||
return getFieldHighlighter(field, query, queryTerms, maxPassages)
|
||||
.highlightFieldForDoc(null, -1, content);
|
||||
}
|
||||
|
||||
protected FieldHighlighter getFieldHighlighter(String field, Query query, SortedSet<Term> allTerms, int maxPassages) {
|
||||
BytesRef[] terms = filterExtractedTerms(field, allTerms);
|
||||
protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
|
||||
BytesRef[] terms = filterExtractedTerms(getFieldMatcher(field), allTerms);
|
||||
Set<HighlightFlag> highlightFlags = getFlags(field);
|
||||
PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags);
|
||||
CharacterRunAutomaton[] automata = getAutomata(field, query, highlightFlags);
|
||||
|
@ -738,19 +755,15 @@ public class UnifiedHighlighter {
|
|||
getFormatter(field));
|
||||
}
|
||||
|
||||
protected static BytesRef[] filterExtractedTerms(String field, SortedSet<Term> queryTerms) {
|
||||
// TODO consider requireFieldMatch
|
||||
Term floor = new Term(field, "");
|
||||
Term ceiling = new Term(field, UnicodeUtil.BIG_TERM);
|
||||
SortedSet<Term> fieldTerms = queryTerms.subSet(floor, ceiling);
|
||||
|
||||
// Strip off the redundant field:
|
||||
BytesRef[] terms = new BytesRef[fieldTerms.size()];
|
||||
int termUpto = 0;
|
||||
for (Term term : fieldTerms) {
|
||||
terms[termUpto++] = term.bytes();
|
||||
protected static BytesRef[] filterExtractedTerms(Predicate<String> fieldMatcher, Set<Term> queryTerms) {
|
||||
// Strip off the redundant field and sort the remaining terms
|
||||
SortedSet<BytesRef> filteredTerms = new TreeSet<>();
|
||||
for (Term term : queryTerms) {
|
||||
if (fieldMatcher.test(term.field())) {
|
||||
filteredTerms.add(term.bytes());
|
||||
}
|
||||
}
|
||||
return terms;
|
||||
return filteredTerms.toArray(new BytesRef[filteredTerms.size()]);
|
||||
}
|
||||
|
||||
protected Set<HighlightFlag> getFlags(String field) {
|
||||
|
@ -771,14 +784,13 @@ public class UnifiedHighlighter {
|
|||
boolean highlightPhrasesStrictly = highlightFlags.contains(HighlightFlag.PHRASES);
|
||||
boolean handleMultiTermQuery = highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY);
|
||||
return highlightPhrasesStrictly ?
|
||||
new PhraseHelper(query, field, this::requiresRewrite, this::preSpanQueryRewrite, !handleMultiTermQuery) :
|
||||
PhraseHelper.NONE;
|
||||
new PhraseHelper(query, field, getFieldMatcher(field),
|
||||
this::requiresRewrite, this::preSpanQueryRewrite, !handleMultiTermQuery) : PhraseHelper.NONE;
|
||||
}
|
||||
|
||||
protected CharacterRunAutomaton[] getAutomata(String field, Query query, Set<HighlightFlag> highlightFlags) {
|
||||
return highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
|
||||
? MultiTermHighlighting.extractAutomata(query, field, !highlightFlags.contains(HighlightFlag.PHRASES),
|
||||
this::preMultiTermQueryRewrite)
|
||||
? MultiTermHighlighting.extractAutomata(query, getFieldMatcher(field), !highlightFlags.contains(HighlightFlag.PHRASES), this::preMultiTermQueryRewrite)
|
||||
: ZERO_LEN_AUTOMATA_ARRAY;
|
||||
}
|
||||
|
||||
|
@ -826,7 +838,7 @@ public class UnifiedHighlighter {
|
|||
//skip using a memory index since it's pure term filtering
|
||||
return new TokenStreamOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer());
|
||||
} else {
|
||||
return new MemoryIndexOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer(),
|
||||
return new MemoryIndexOffsetStrategy(field, getFieldMatcher(field), terms, phraseHelper, automata, getIndexAnalyzer(),
|
||||
this::preMultiTermQueryRewrite);
|
||||
}
|
||||
case NONE_NEEDED:
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.Arrays;
|
|||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
|
@ -32,14 +33,17 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
|
@ -959,4 +963,275 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
ir.close();
|
||||
}
|
||||
|
||||
private IndexReader indexSomeFields() throws IOException {
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
|
||||
FieldType ft = new FieldType();
|
||||
ft.setIndexOptions(IndexOptions.NONE);
|
||||
ft.setTokenized(false);
|
||||
ft.setStored(true);
|
||||
ft.freeze();
|
||||
|
||||
Field title = new Field("title", "", fieldType);
|
||||
Field text = new Field("text", "", fieldType);
|
||||
Field category = new Field("category", "", fieldType);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(title);
|
||||
doc.add(text);
|
||||
doc.add(category);
|
||||
title.setStringValue("This is the title field.");
|
||||
text.setStringValue("This is the text field. You can put some text if you want.");
|
||||
category.setStringValue("This is the category field.");
|
||||
iw.addDocument(doc);
|
||||
|
||||
IndexReader ir = iw.getReader();
|
||||
iw.close();
|
||||
return ir;
|
||||
}
|
||||
|
||||
public void testFieldMatcherTermQuery() throws Exception {
|
||||
IndexReader ir = indexSomeFields();
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected Predicate<String> getFieldMatcher(String field) {
|
||||
// requireFieldMatch=false
|
||||
return (qf) -> true;
|
||||
}
|
||||
};
|
||||
UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
|
||||
BooleanQuery.Builder queryBuilder =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("text", "some")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("text", "field")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("text", "this")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("title", "is")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("title", "this")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("category", "this")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("category", "some")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("category", "category")), BooleanClause.Occur.SHOULD);
|
||||
Query query = queryBuilder.build();
|
||||
|
||||
// title
|
||||
{
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the title <b>field</b>.", snippets[0]);
|
||||
|
||||
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
|
||||
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> is the title <b>field</b>.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
}
|
||||
|
||||
// text
|
||||
{
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
|
||||
|
||||
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> is the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
|
||||
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the text field. ", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
}
|
||||
|
||||
// category
|
||||
{
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the <b>category</b> <b>field</b>.", snippets[0]);
|
||||
|
||||
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> is the <b>category</b> field.", snippets[0]);
|
||||
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
|
||||
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
}
|
||||
ir.close();
|
||||
}
|
||||
|
||||
public void testFieldMatcherMultiTermQuery() throws Exception {
|
||||
IndexReader ir = indexSomeFields();
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected Predicate<String> getFieldMatcher(String field) {
|
||||
// requireFieldMatch=false
|
||||
return (qf) -> true;
|
||||
}
|
||||
};
|
||||
UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
|
||||
BooleanQuery.Builder queryBuilder =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new FuzzyQuery(new Term("text", "sime"), 1), BooleanClause.Occur.SHOULD)
|
||||
.add(new PrefixQuery(new Term("text", "fie")), BooleanClause.Occur.SHOULD)
|
||||
.add(new PrefixQuery(new Term("text", "thi")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("title", "is")), BooleanClause.Occur.SHOULD)
|
||||
.add(new PrefixQuery(new Term("title", "thi")), BooleanClause.Occur.SHOULD)
|
||||
.add(new PrefixQuery(new Term("category", "thi")), BooleanClause.Occur.SHOULD)
|
||||
.add(new FuzzyQuery(new Term("category", "sime"), 1), BooleanClause.Occur.SHOULD)
|
||||
.add(new PrefixQuery(new Term("category", "categ")), BooleanClause.Occur.SHOULD);
|
||||
Query query = queryBuilder.build();
|
||||
|
||||
// title
|
||||
{
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the title <b>field</b>.", snippets[0]);
|
||||
|
||||
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
|
||||
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> is the title <b>field</b>.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
}
|
||||
|
||||
// text
|
||||
{
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
|
||||
|
||||
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> is the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
|
||||
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the text field. ", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
}
|
||||
|
||||
// category
|
||||
{
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the <b>category</b> <b>field</b>.", snippets[0]);
|
||||
|
||||
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> is the <b>category</b> field.", snippets[0]);
|
||||
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
|
||||
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
}
|
||||
ir.close();
|
||||
}
|
||||
|
||||
public void testFieldMatcherPhraseQuery() throws Exception {
|
||||
IndexReader ir = indexSomeFields();
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected Predicate<String> getFieldMatcher(String field) {
|
||||
// requireFieldMatch=false
|
||||
return (qf) -> true;
|
||||
}
|
||||
};
|
||||
UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
|
||||
BooleanQuery.Builder queryBuilder =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new PhraseQuery("title", "this", "is", "the", "title"), BooleanClause.Occur.SHOULD)
|
||||
.add(new PhraseQuery(2, "category", "this", "is", "the", "field"), BooleanClause.Occur.SHOULD)
|
||||
.add(new PhraseQuery("text", "this", "is"), BooleanClause.Occur.SHOULD)
|
||||
.add(new PhraseQuery("category", "this", "is"), BooleanClause.Occur.SHOULD)
|
||||
.add(new PhraseQuery(1, "text", "you", "can", "put", "text"), BooleanClause.Occur.SHOULD);
|
||||
Query query = queryBuilder.build();
|
||||
|
||||
// title
|
||||
{
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> <b>field</b>.", snippets[0]);
|
||||
|
||||
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> field.", snippets[0]);
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
|
||||
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
}
|
||||
|
||||
// text
|
||||
{
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>text</b> <b>field</b>. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
|
||||
|
||||
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the <b>text</b> field. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
|
||||
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("This is the text field. You can put some text if you want.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
}
|
||||
|
||||
// category
|
||||
{
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
|
||||
|
||||
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
|
||||
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
|
||||
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
}
|
||||
ir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Collections;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
|
@ -144,7 +143,7 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected FieldHighlighter getFieldHighlighter(String field, Query query, SortedSet<Term> allTerms, int maxPassages) {
|
||||
protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
|
||||
return super.getFieldHighlighter(field, query, allTerms, maxPassages);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue