mirror of https://github.com/apache/lucene.git
LUCENE-7544: UnifiedHighlighter: extension hooks for custom query handling
This commit is contained in:
parent
b02e7a902e
commit
da841be887
|
@ -67,6 +67,9 @@ Improvements
|
||||||
and exclude arguments of SpanNotQuery via negative pre and/or post arguments.
|
and exclude arguments of SpanNotQuery via negative pre and/or post arguments.
|
||||||
(Marc Morissette via David Smiley)
|
(Marc Morissette via David Smiley)
|
||||||
|
|
||||||
|
* LUCENE-7544: UnifiedHighlighter: add extension points for handling custom queries.
|
||||||
|
(Michael Braun, David Smiley)
|
||||||
|
|
||||||
======================= Lucene 6.3.0 =======================
|
======================= Lucene 6.3.0 =======================
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
|
@ -19,8 +19,10 @@ package org.apache.lucene.search.uhighlight;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.FilteringTokenFilter;
|
import org.apache.lucene.analysis.FilteringTokenFilter;
|
||||||
|
@ -30,6 +32,7 @@ import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.memory.MemoryIndex;
|
import org.apache.lucene.index.memory.MemoryIndex;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.spans.SpanQuery;
|
import org.apache.lucene.search.spans.SpanQuery;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.automaton.Automata;
|
import org.apache.lucene.util.automaton.Automata;
|
||||||
|
@ -50,7 +53,9 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
|
||||||
private final LeafReader leafReader;
|
private final LeafReader leafReader;
|
||||||
private final CharacterRunAutomaton preMemIndexFilterAutomaton;
|
private final CharacterRunAutomaton preMemIndexFilterAutomaton;
|
||||||
|
|
||||||
public AnalysisOffsetStrategy(String field, BytesRef[] extractedTerms, PhraseHelper phraseHelper, CharacterRunAutomaton[] automata, Analyzer analyzer) {
|
public AnalysisOffsetStrategy(String field, BytesRef[] extractedTerms, PhraseHelper phraseHelper,
|
||||||
|
CharacterRunAutomaton[] automata, Analyzer analyzer,
|
||||||
|
Function<Query, Collection<Query>> multiTermQueryRewrite) {
|
||||||
super(field, extractedTerms, phraseHelper, automata);
|
super(field, extractedTerms, phraseHelper, automata);
|
||||||
this.analyzer = analyzer;
|
this.analyzer = analyzer;
|
||||||
// Automata (Wildcards / MultiTermQuery):
|
// Automata (Wildcards / MultiTermQuery):
|
||||||
|
@ -68,7 +73,8 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
|
||||||
memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
|
memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
|
||||||
leafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader();
|
leafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader();
|
||||||
// preFilter for MemoryIndex
|
// preFilter for MemoryIndex
|
||||||
preMemIndexFilterAutomaton = buildCombinedAutomaton(field, terms, this.automata, strictPhrases);
|
preMemIndexFilterAutomaton = buildCombinedAutomaton(field, terms, this.automata, strictPhrases,
|
||||||
|
multiTermQueryRewrite);
|
||||||
} else {
|
} else {
|
||||||
memoryIndex = null;
|
memoryIndex = null;
|
||||||
leafReader = null;
|
leafReader = null;
|
||||||
|
@ -155,7 +161,8 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
|
||||||
*/
|
*/
|
||||||
private static CharacterRunAutomaton buildCombinedAutomaton(String field, BytesRef[] terms,
|
private static CharacterRunAutomaton buildCombinedAutomaton(String field, BytesRef[] terms,
|
||||||
CharacterRunAutomaton[] automata,
|
CharacterRunAutomaton[] automata,
|
||||||
PhraseHelper strictPhrases) {
|
PhraseHelper strictPhrases,
|
||||||
|
Function<Query, Collection<Query>> multiTermQueryRewrite) {
|
||||||
List<CharacterRunAutomaton> allAutomata = new ArrayList<>();
|
List<CharacterRunAutomaton> allAutomata = new ArrayList<>();
|
||||||
if (terms.length > 0) {
|
if (terms.length > 0) {
|
||||||
allAutomata.add(new CharacterRunAutomaton(Automata.makeStringUnion(Arrays.asList(terms))));
|
allAutomata.add(new CharacterRunAutomaton(Automata.makeStringUnion(Arrays.asList(terms))));
|
||||||
|
@ -163,7 +170,7 @@ public class AnalysisOffsetStrategy extends FieldOffsetStrategy {
|
||||||
Collections.addAll(allAutomata, automata);
|
Collections.addAll(allAutomata, automata);
|
||||||
for (SpanQuery spanQuery : strictPhrases.getSpanQueries()) {
|
for (SpanQuery spanQuery : strictPhrases.getSpanQueries()) {
|
||||||
Collections.addAll(allAutomata,
|
Collections.addAll(allAutomata,
|
||||||
MultiTermHighlighting.extractAutomata(spanQuery, field, true));//true==lookInSpan
|
MultiTermHighlighting.extractAutomata(spanQuery, field, true, multiTermQueryRewrite));//true==lookInSpan
|
||||||
}
|
}
|
||||||
|
|
||||||
if (allAutomata.size() == 1) {
|
if (allAutomata.size() == 1) {
|
||||||
|
|
|
@ -20,8 +20,10 @@ import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.function.Function;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
@ -69,34 +71,44 @@ class MultiTermHighlighting {
|
||||||
* Extracts all MultiTermQueries for {@code field}, and returns equivalent
|
* Extracts all MultiTermQueries for {@code field}, and returns equivalent
|
||||||
* automata that will match terms.
|
* automata that will match terms.
|
||||||
*/
|
*/
|
||||||
public static CharacterRunAutomaton[] extractAutomata(Query query, String field, boolean lookInSpan) {
|
public static CharacterRunAutomaton[] extractAutomata(Query query, String field, boolean lookInSpan,
|
||||||
|
Function<Query, Collection<Query>> preRewriteFunc) {
|
||||||
List<CharacterRunAutomaton> list = new ArrayList<>();
|
List<CharacterRunAutomaton> list = new ArrayList<>();
|
||||||
if (query instanceof BooleanQuery) {
|
Collection<Query> customSubQueries = preRewriteFunc.apply(query);
|
||||||
|
if (customSubQueries != null) {
|
||||||
|
for (Query sub : customSubQueries) {
|
||||||
|
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
|
||||||
|
}
|
||||||
|
} else if (query instanceof BooleanQuery) {
|
||||||
for (BooleanClause clause : (BooleanQuery) query) {
|
for (BooleanClause clause : (BooleanQuery) query) {
|
||||||
if (!clause.isProhibited()) {
|
if (!clause.isProhibited()) {
|
||||||
list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field, lookInSpan)));
|
list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field, lookInSpan, preRewriteFunc)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (query instanceof ConstantScoreQuery) {
|
} else if (query instanceof ConstantScoreQuery) {
|
||||||
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field, lookInSpan)));
|
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field, lookInSpan,
|
||||||
|
preRewriteFunc)));
|
||||||
} else if (query instanceof DisjunctionMaxQuery) {
|
} else if (query instanceof DisjunctionMaxQuery) {
|
||||||
for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
|
for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
|
||||||
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan)));
|
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
|
||||||
}
|
}
|
||||||
} else if (lookInSpan && query instanceof SpanOrQuery) {
|
} else if (lookInSpan && query instanceof SpanOrQuery) {
|
||||||
for (Query sub : ((SpanOrQuery) query).getClauses()) {
|
for (Query sub : ((SpanOrQuery) query).getClauses()) {
|
||||||
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan)));
|
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
|
||||||
}
|
}
|
||||||
} else if (lookInSpan && query instanceof SpanNearQuery) {
|
} else if (lookInSpan && query instanceof SpanNearQuery) {
|
||||||
for (Query sub : ((SpanNearQuery) query).getClauses()) {
|
for (Query sub : ((SpanNearQuery) query).getClauses()) {
|
||||||
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan)));
|
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
|
||||||
}
|
}
|
||||||
} else if (lookInSpan && query instanceof SpanNotQuery) {
|
} else if (lookInSpan && query instanceof SpanNotQuery) {
|
||||||
list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field, lookInSpan)));
|
list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field, lookInSpan,
|
||||||
|
preRewriteFunc)));
|
||||||
} else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
|
} else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
|
||||||
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field, lookInSpan)));
|
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field, lookInSpan,
|
||||||
|
preRewriteFunc)));
|
||||||
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
|
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
|
||||||
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field, lookInSpan)));
|
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field,
|
||||||
|
lookInSpan, preRewriteFunc)));
|
||||||
} else if (query instanceof AutomatonQuery) {
|
} else if (query instanceof AutomatonQuery) {
|
||||||
final AutomatonQuery aq = (AutomatonQuery) query;
|
final AutomatonQuery aq = (AutomatonQuery) query;
|
||||||
if (aq.getField().equals(field)) {
|
if (aq.getField().equals(field)) {
|
||||||
|
|
|
@ -40,7 +40,7 @@ import java.util.function.Function;
|
||||||
public class PhraseHelper {
|
public class PhraseHelper {
|
||||||
|
|
||||||
public static final PhraseHelper NONE = new PhraseHelper(new MatchAllDocsQuery(), "_ignored_",
|
public static final PhraseHelper NONE = new PhraseHelper(new MatchAllDocsQuery(), "_ignored_",
|
||||||
spanQuery -> null, true);
|
spanQuery -> null, query -> null, true);
|
||||||
|
|
||||||
//TODO it seems this ought to be a general thing on Spans?
|
//TODO it seems this ought to be a general thing on Spans?
|
||||||
private static final Comparator<? super Spans> SPANS_COMPARATOR = (o1, o2) -> {
|
private static final Comparator<? super Spans> SPANS_COMPARATOR = (o1, o2) -> {
|
||||||
|
@ -69,11 +69,14 @@ public class PhraseHelper {
|
||||||
* {@code rewriteQueryPred} is an extension hook to override the default choice of
|
* {@code rewriteQueryPred} is an extension hook to override the default choice of
|
||||||
* {@link WeightedSpanTermExtractor#mustRewriteQuery(SpanQuery)}. By default unknown query types are rewritten,
|
* {@link WeightedSpanTermExtractor#mustRewriteQuery(SpanQuery)}. By default unknown query types are rewritten,
|
||||||
* so use this to return {@link Boolean#FALSE} if you know the query doesn't need to be rewritten.
|
* so use this to return {@link Boolean#FALSE} if you know the query doesn't need to be rewritten.
|
||||||
|
* Similarly, {@code preExtractRewriteFunction} is also an extension hook for extract to allow different queries
|
||||||
|
* to be set before the {@link WeightedSpanTermExtractor}'s extraction is invoked.
|
||||||
* {@code ignoreQueriesNeedingRewrite} effectively ignores any query clause that needs to be "rewritten", which is
|
* {@code ignoreQueriesNeedingRewrite} effectively ignores any query clause that needs to be "rewritten", which is
|
||||||
* usually limited to just a {@link SpanMultiTermQueryWrapper} but could be other custom ones.
|
* usually limited to just a {@link SpanMultiTermQueryWrapper} but could be other custom ones.
|
||||||
*/
|
*/
|
||||||
public PhraseHelper(Query query, String field, Function<SpanQuery, Boolean> rewriteQueryPred,
|
public PhraseHelper(Query query, String field, Function<SpanQuery, Boolean> rewriteQueryPred,
|
||||||
boolean ignoreQueriesNeedingRewrite) {
|
Function<Query, Collection<Query>> preExtractRewriteFunction,
|
||||||
|
boolean ignoreQueriesNeedingRewrite) {
|
||||||
this.fieldName = field; // if null then don't require field match
|
this.fieldName = field; // if null then don't require field match
|
||||||
// filter terms to those we want
|
// filter terms to those we want
|
||||||
positionInsensitiveTerms = field != null ? new FieldFilteringTermHashSet(field) : new HashSet<>();
|
positionInsensitiveTerms = field != null ? new FieldFilteringTermHashSet(field) : new HashSet<>();
|
||||||
|
@ -98,6 +101,18 @@ public class PhraseHelper {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void extract(Query query, float boost, Map<String, WeightedSpanTerm> terms) throws IOException {
|
||||||
|
Collection<Query> newQueriesToExtract = preExtractRewriteFunction.apply(query);
|
||||||
|
if (newQueriesToExtract != null) {
|
||||||
|
for (Query newQuery : newQueriesToExtract) {
|
||||||
|
extract(newQuery, boost, terms);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
super.extract(query, boost, terms);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected boolean isQueryUnsupported(Class<? extends Query> clazz) {
|
protected boolean isQueryUnsupported(Class<? extends Query> clazz) {
|
||||||
if (clazz.isAssignableFrom(MultiTermQuery.class)) {
|
if (clazz.isAssignableFrom(MultiTermQuery.class)) {
|
||||||
|
|
|
@ -21,6 +21,7 @@ import java.nio.charset.StandardCharsets;
|
||||||
import java.text.BreakIterator;
|
import java.text.BreakIterator;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -732,7 +733,8 @@ public class UnifiedHighlighter {
|
||||||
OffsetSource offsetSource = getOptimizedOffsetSource(field, terms, phraseHelper, automata);
|
OffsetSource offsetSource = getOptimizedOffsetSource(field, terms, phraseHelper, automata);
|
||||||
switch (offsetSource) {
|
switch (offsetSource) {
|
||||||
case ANALYSIS:
|
case ANALYSIS:
|
||||||
return new AnalysisOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer());
|
return new AnalysisOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer(),
|
||||||
|
this::preMultiTermQueryRewrite);
|
||||||
case NONE_NEEDED:
|
case NONE_NEEDED:
|
||||||
return NoOpOffsetStrategy.INSTANCE;
|
return NoOpOffsetStrategy.INSTANCE;
|
||||||
case TERM_VECTORS:
|
case TERM_VECTORS:
|
||||||
|
@ -776,13 +778,14 @@ public class UnifiedHighlighter {
|
||||||
boolean highlightPhrasesStrictly = highlightFlags.contains(HighlightFlag.PHRASES);
|
boolean highlightPhrasesStrictly = highlightFlags.contains(HighlightFlag.PHRASES);
|
||||||
boolean handleMultiTermQuery = highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY);
|
boolean handleMultiTermQuery = highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY);
|
||||||
return highlightPhrasesStrictly ?
|
return highlightPhrasesStrictly ?
|
||||||
new PhraseHelper(query, field, this::requiresRewrite, !handleMultiTermQuery) :
|
new PhraseHelper(query, field, this::requiresRewrite, this::preSpanQueryRewrite, !handleMultiTermQuery) :
|
||||||
PhraseHelper.NONE;
|
PhraseHelper.NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected CharacterRunAutomaton[] getAutomata(String field, Query query, EnumSet<HighlightFlag> highlightFlags) {
|
protected CharacterRunAutomaton[] getAutomata(String field, Query query, EnumSet<HighlightFlag> highlightFlags) {
|
||||||
return highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
|
return highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
|
||||||
? MultiTermHighlighting.extractAutomata(query, field, !highlightFlags.contains(HighlightFlag.PHRASES))
|
? MultiTermHighlighting.extractAutomata(query, field, !highlightFlags.contains(HighlightFlag.PHRASES),
|
||||||
|
this::preMultiTermQueryRewrite)
|
||||||
: ZERO_LEN_AUTOMATA_ARRAY;
|
: ZERO_LEN_AUTOMATA_ARRAY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -830,6 +833,32 @@ public class UnifiedHighlighter {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* When highlighting phrases accurately, we may need to handle custom queries that aren't supported in the
|
||||||
|
* {@link org.apache.lucene.search.highlight.WeightedSpanTermExtractor} as called by the {@code PhraseHelper}.
|
||||||
|
* Should custom query types be needed, this method should be overriden to return a collection of queries if appropriate,
|
||||||
|
* or null if nothing to do. If the query is not custom, simply returning null will allow the default rules to apply.
|
||||||
|
*
|
||||||
|
* @param query Query to be highlighted
|
||||||
|
* @return A Collection of Query object(s) if needs to be rewritten, otherwise null.
|
||||||
|
*/
|
||||||
|
protected Collection<Query> preSpanQueryRewrite(Query query) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* When dealing with multi term queries / span queries, we may need to handle custom queries that aren't supported
|
||||||
|
* by the default automata extraction in {@code MultiTermHighlighting}. This can be overridden to return a collection
|
||||||
|
* of queries if appropriate, or null if nothing to do. If query is not custom, simply returning null will allow the
|
||||||
|
* default rules to apply.
|
||||||
|
*
|
||||||
|
* @param query Query to be highlighted
|
||||||
|
* @return A Collection of Query object(s) if needst o be rewritten, otherwise null.
|
||||||
|
*/
|
||||||
|
protected Collection<Query> preMultiTermQueryRewrite(Query query) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
private DocIdSetIterator asDocIdSetIterator(int[] sortedDocIds) {
|
private DocIdSetIterator asDocIdSetIterator(int[] sortedDocIds) {
|
||||||
return new DocIdSetIterator() {
|
return new DocIdSetIterator() {
|
||||||
int idx = -1;
|
int idx = -1;
|
||||||
|
|
|
@ -20,6 +20,8 @@ package org.apache.lucene.search.uhighlight;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
@ -56,6 +58,7 @@ import org.apache.lucene.search.spans.SpanNotQuery;
|
||||||
import org.apache.lucene.search.spans.SpanOrQuery;
|
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||||
import org.apache.lucene.search.spans.SpanQuery;
|
import org.apache.lucene.search.spans.SpanQuery;
|
||||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanWeight;
|
||||||
import org.apache.lucene.store.BaseDirectoryWrapper;
|
import org.apache.lucene.store.BaseDirectoryWrapper;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||||
|
@ -933,4 +936,89 @@ public class TestUnifiedHighlighterMTQ extends LuceneTestCase {
|
||||||
ir.close();
|
ir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testCustomSpanQueryHighlighting() throws Exception {
|
||||||
|
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new Field("body", "alpha bravo charlie delta echo foxtrot golf hotel india juliet", fieldType));
|
||||||
|
doc.add(newTextField("id", "id", Field.Store.YES));
|
||||||
|
|
||||||
|
iw.addDocument(doc);
|
||||||
|
IndexReader ir = iw.getReader();
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
IndexSearcher searcher = newSearcher(ir);
|
||||||
|
UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||||
|
@Override
|
||||||
|
protected List<Query> preMultiTermQueryRewrite(Query query) {
|
||||||
|
if (query instanceof MyWrapperSpanQuery) {
|
||||||
|
return Collections.singletonList(((MyWrapperSpanQuery) query).originalQuery);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int docId = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;
|
||||||
|
|
||||||
|
WildcardQuery wildcardQuery = new WildcardQuery(new Term("body", "foxtr*"));
|
||||||
|
SpanMultiTermQueryWrapper wildcardQueryWrapper = new SpanMultiTermQueryWrapper<>(wildcardQuery);
|
||||||
|
|
||||||
|
SpanQuery wrappedQuery = new MyWrapperSpanQuery(wildcardQueryWrapper);
|
||||||
|
|
||||||
|
BooleanQuery query = new BooleanQuery.Builder()
|
||||||
|
.add(wrappedQuery, BooleanClause.Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
int[] docIds = new int[]{docId};
|
||||||
|
|
||||||
|
String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIds, new int[]{2}).get("body");
|
||||||
|
assertEquals(1, snippets.length);
|
||||||
|
assertEquals("alpha bravo charlie delta echo <b>foxtrot</b> golf hotel india juliet", snippets[0]);
|
||||||
|
ir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class MyWrapperSpanQuery extends SpanQuery {
|
||||||
|
|
||||||
|
private final SpanQuery originalQuery;
|
||||||
|
|
||||||
|
private MyWrapperSpanQuery(SpanQuery originalQuery) {
|
||||||
|
this.originalQuery = Objects.requireNonNull(originalQuery);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getField() {
|
||||||
|
return originalQuery.getField();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString(String field) {
|
||||||
|
return "(Wrapper[" + originalQuery.toString(field)+"])";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
||||||
|
return originalQuery.createWeight(searcher, needsScores, boost);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query rewrite(IndexReader reader) throws IOException {
|
||||||
|
Query newOriginalQuery = originalQuery.rewrite(reader);
|
||||||
|
if (newOriginalQuery != originalQuery) {
|
||||||
|
return new MyWrapperSpanQuery((SpanQuery)newOriginalQuery);
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null || getClass() != o.getClass()) return false;
|
||||||
|
return originalQuery.equals(((MyWrapperSpanQuery)o).originalQuery);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return originalQuery.hashCode();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,8 @@
|
||||||
package org.apache.lucene.search.uhighlight;
|
package org.apache.lucene.search.uhighlight;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
@ -29,14 +31,17 @@ import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.BoostQuery;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||||
import org.apache.lucene.search.MultiPhraseQuery;
|
import org.apache.lucene.search.MultiPhraseQuery;
|
||||||
import org.apache.lucene.search.PhraseQuery;
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
import org.apache.lucene.search.PrefixQuery;
|
import org.apache.lucene.search.PrefixQuery;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.Sort;
|
import org.apache.lucene.search.Sort;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
|
import org.apache.lucene.search.Weight;
|
||||||
import org.apache.lucene.search.WildcardQuery;
|
import org.apache.lucene.search.WildcardQuery;
|
||||||
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
||||||
import org.apache.lucene.search.spans.SpanNearQuery;
|
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||||
|
@ -401,4 +406,76 @@ public class TestUnifiedHighlighterStrictPhrases extends LuceneTestCase {
|
||||||
Object o = highlighter.highlightWithoutSearcher("body", new MatchNoDocsQuery(), content, 1);
|
Object o = highlighter.highlightWithoutSearcher("body", new MatchNoDocsQuery(), content, 1);
|
||||||
assertEquals(content, o);
|
assertEquals(content, o);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testPreSpanQueryRewrite() throws IOException {
|
||||||
|
indexWriter.addDocument(newDoc("There is no accord and satisfaction with this - Consideration of the accord is arbitrary."));
|
||||||
|
initReaderSearcherHighlighter();
|
||||||
|
|
||||||
|
highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||||
|
@Override
|
||||||
|
protected Collection<Query> preSpanQueryRewrite(Query query) {
|
||||||
|
if (query instanceof MyQuery) {
|
||||||
|
return Collections.singletonList(((MyQuery)query).wrapped);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
highlighter.setHighlightPhrasesStrictly(true);
|
||||||
|
|
||||||
|
BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
|
||||||
|
Query phraseQuery = new BoostQuery(new PhraseQuery("body", "accord", "and", "satisfaction"), 2.0f);
|
||||||
|
Query oredTerms = new BooleanQuery.Builder()
|
||||||
|
.setMinimumNumberShouldMatch(2)
|
||||||
|
.add(new TermQuery(new Term("body", "accord")), BooleanClause.Occur.SHOULD)
|
||||||
|
.add(new TermQuery(new Term("body", "satisfaction")), BooleanClause.Occur.SHOULD)
|
||||||
|
.add(new TermQuery(new Term("body", "consideration")), BooleanClause.Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
Query proximityBoostingQuery = new MyQuery(oredTerms);
|
||||||
|
Query totalQuery = bqBuilder
|
||||||
|
.add(phraseQuery, BooleanClause.Occur.SHOULD)
|
||||||
|
.add(proximityBoostingQuery, BooleanClause.Occur.SHOULD)
|
||||||
|
.build();
|
||||||
|
TopDocs topDocs = searcher.search(totalQuery, 10, Sort.INDEXORDER);
|
||||||
|
assertEquals(1, topDocs.totalHits);
|
||||||
|
String[] snippets = highlighter.highlight("body", totalQuery, topDocs);
|
||||||
|
assertArrayEquals(new String[]{"There is no <b>accord</b> <b>and</b> <b>satisfaction</b> with this - <b>Consideration</b> of the <b>accord</b> is arbitrary."}, snippets);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class MyQuery extends Query {
|
||||||
|
|
||||||
|
private final Query wrapped;
|
||||||
|
|
||||||
|
MyQuery(Query wrapped) {
|
||||||
|
this.wrapped = wrapped;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
||||||
|
return wrapped.createWeight(searcher, needsScores, boost);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query rewrite(IndexReader reader) throws IOException {
|
||||||
|
Query newWrapped = wrapped.rewrite(reader);
|
||||||
|
if (newWrapped != wrapped) {
|
||||||
|
return new MyQuery(newWrapped);
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString(String field) {
|
||||||
|
return "[[["+wrapped.toString(field)+"]]]";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
return obj != null && obj.getClass() == getClass() && wrapped.equals(((MyQuery)wrapped).wrapped);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return wrapped.hashCode();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue