mirror of https://github.com/apache/lucene.git
Merge remote-tracking branch 'origin/master'
This commit is contained in:
commit
10500c894d
|
@ -121,6 +121,10 @@ Improvements
|
|||
control how text is analyzed and converted into a query (Matt Weber
|
||||
via Mike McCandless)
|
||||
|
||||
* LUCENE-7575: UnifiedHighlighter can now highlight fields with queries that don't
|
||||
necessarily refer to that field (AKA requireFieldMatch==false). Disabled by default.
|
||||
See UH get/setFieldMatcher. (Jim Ferenczi via David Smiley)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-7568: Optimize merging when index sorting is used but the
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Collection;
|
|||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.FilteringTokenFilter;
|
||||
|
@ -49,7 +50,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
|
|||
private final LeafReader leafReader;
|
||||
private final CharacterRunAutomaton preMemIndexFilterAutomaton;
|
||||
|
||||
public MemoryIndexOffsetStrategy(String field, BytesRef[] extractedTerms, PhraseHelper phraseHelper,
|
||||
public MemoryIndexOffsetStrategy(String field, Predicate<String> fieldMatcher, BytesRef[] extractedTerms, PhraseHelper phraseHelper,
|
||||
CharacterRunAutomaton[] automata, Analyzer analyzer,
|
||||
Function<Query, Collection<Query>> multiTermQueryRewrite) {
|
||||
super(field, extractedTerms, phraseHelper, automata, analyzer);
|
||||
|
@ -57,13 +58,14 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
|
|||
memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
|
||||
leafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader(); // appears to be re-usable
|
||||
// preFilter for MemoryIndex
|
||||
preMemIndexFilterAutomaton = buildCombinedAutomaton(field, terms, this.automata, phraseHelper, multiTermQueryRewrite);
|
||||
preMemIndexFilterAutomaton = buildCombinedAutomaton(fieldMatcher, terms, this.automata, phraseHelper, multiTermQueryRewrite);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build one {@link CharacterRunAutomaton} matching any term the query might match.
|
||||
*/
|
||||
private static CharacterRunAutomaton buildCombinedAutomaton(String field, BytesRef[] terms,
|
||||
private static CharacterRunAutomaton buildCombinedAutomaton(Predicate<String> fieldMatcher,
|
||||
BytesRef[] terms,
|
||||
CharacterRunAutomaton[] automata,
|
||||
PhraseHelper strictPhrases,
|
||||
Function<Query, Collection<Query>> multiTermQueryRewrite) {
|
||||
|
@ -74,7 +76,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
|
|||
Collections.addAll(allAutomata, automata);
|
||||
for (SpanQuery spanQuery : strictPhrases.getSpanQueries()) {
|
||||
Collections.addAll(allAutomata,
|
||||
MultiTermHighlighting.extractAutomata(spanQuery, field, true, multiTermQueryRewrite));//true==lookInSpan
|
||||
MultiTermHighlighting.extractAutomata(spanQuery, fieldMatcher, true, multiTermQueryRewrite));//true==lookInSpan
|
||||
}
|
||||
|
||||
if (allAutomata.size() == 1) {
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.util.Collection;
|
|||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.AutomatonQuery;
|
||||
|
@ -56,50 +57,52 @@ class MultiTermHighlighting {
|
|||
}
|
||||
|
||||
/**
|
||||
* Extracts all MultiTermQueries for {@code field}, and returns equivalent
|
||||
* automata that will match terms.
|
||||
* Extracts MultiTermQueries that match the provided field predicate.
|
||||
* Returns equivalent automata that will match terms.
|
||||
*/
|
||||
public static CharacterRunAutomaton[] extractAutomata(Query query, String field, boolean lookInSpan,
|
||||
public static CharacterRunAutomaton[] extractAutomata(Query query,
|
||||
Predicate<String> fieldMatcher,
|
||||
boolean lookInSpan,
|
||||
Function<Query, Collection<Query>> preRewriteFunc) {
|
||||
List<CharacterRunAutomaton> list = new ArrayList<>();
|
||||
Collection<Query> customSubQueries = preRewriteFunc.apply(query);
|
||||
if (customSubQueries != null) {
|
||||
for (Query sub : customSubQueries) {
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
|
||||
}
|
||||
} else if (query instanceof BooleanQuery) {
|
||||
for (BooleanClause clause : (BooleanQuery) query) {
|
||||
if (!clause.isProhibited()) {
|
||||
list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field, lookInSpan, preRewriteFunc)));
|
||||
list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
|
||||
}
|
||||
}
|
||||
} else if (query instanceof ConstantScoreQuery) {
|
||||
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field, lookInSpan,
|
||||
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan,
|
||||
preRewriteFunc)));
|
||||
} else if (query instanceof DisjunctionMaxQuery) {
|
||||
for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
|
||||
}
|
||||
} else if (lookInSpan && query instanceof SpanOrQuery) {
|
||||
for (Query sub : ((SpanOrQuery) query).getClauses()) {
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
|
||||
}
|
||||
} else if (lookInSpan && query instanceof SpanNearQuery) {
|
||||
for (Query sub : ((SpanNearQuery) query).getClauses()) {
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
|
||||
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
|
||||
}
|
||||
} else if (lookInSpan && query instanceof SpanNotQuery) {
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field, lookInSpan,
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), fieldMatcher, lookInSpan,
|
||||
preRewriteFunc)));
|
||||
} else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field, lookInSpan,
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), fieldMatcher, lookInSpan,
|
||||
preRewriteFunc)));
|
||||
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field,
|
||||
lookInSpan, preRewriteFunc)));
|
||||
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(),
|
||||
fieldMatcher, lookInSpan, preRewriteFunc)));
|
||||
} else if (query instanceof AutomatonQuery) {
|
||||
final AutomatonQuery aq = (AutomatonQuery) query;
|
||||
if (aq.getField().equals(field)) {
|
||||
if (fieldMatcher.test(aq.getField())) {
|
||||
list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
|
||||
@Override
|
||||
public String toString() {
|
||||
|
@ -110,7 +113,7 @@ class MultiTermHighlighting {
|
|||
} else if (query instanceof PrefixQuery) {
|
||||
final PrefixQuery pq = (PrefixQuery) query;
|
||||
Term prefix = pq.getPrefix();
|
||||
if (prefix.field().equals(field)) {
|
||||
if (fieldMatcher.test(prefix.field())) {
|
||||
list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()),
|
||||
Automata.makeAnyString())) {
|
||||
@Override
|
||||
|
@ -121,7 +124,7 @@ class MultiTermHighlighting {
|
|||
}
|
||||
} else if (query instanceof FuzzyQuery) {
|
||||
final FuzzyQuery fq = (FuzzyQuery) query;
|
||||
if (fq.getField().equals(field)) {
|
||||
if (fieldMatcher.test(fq.getField())) {
|
||||
String utf16 = fq.getTerm().text();
|
||||
int termText[] = new int[utf16.codePointCount(0, utf16.length())];
|
||||
for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
|
||||
|
@ -142,7 +145,7 @@ class MultiTermHighlighting {
|
|||
}
|
||||
} else if (query instanceof TermRangeQuery) {
|
||||
final TermRangeQuery tq = (TermRangeQuery) query;
|
||||
if (tq.getField().equals(field)) {
|
||||
if (fieldMatcher.test(tq.getField())) {
|
||||
final CharsRef lowerBound;
|
||||
if (tq.getLowerTerm() == null) {
|
||||
lowerBound = null;
|
||||
|
|
|
@ -16,17 +16,50 @@
|
|||
*/
|
||||
package org.apache.lucene.search.uhighlight;
|
||||
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.*;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.PriorityQueue;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.search.highlight.WeightedSpanTerm;
|
||||
import org.apache.lucene.search.highlight.WeightedSpanTermExtractor;
|
||||
import org.apache.lucene.search.spans.*;
|
||||
import org.apache.lucene.search.spans.SpanCollector;
|
||||
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanWeight;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.function.Function;
|
||||
|
||||
/**
|
||||
* Helps the {@link FieldOffsetStrategy} with strict position highlighting (e.g. highlight phrases correctly).
|
||||
* This is a stateful class holding information about the query, but it can (and is) re-used across highlighting
|
||||
|
@ -40,7 +73,7 @@ import java.util.function.Function;
|
|||
public class PhraseHelper {
|
||||
|
||||
public static final PhraseHelper NONE = new PhraseHelper(new MatchAllDocsQuery(), "_ignored_",
|
||||
spanQuery -> null, query -> null, true);
|
||||
(s) -> false, spanQuery -> null, query -> null, true);
|
||||
|
||||
//TODO it seems this ought to be a general thing on Spans?
|
||||
private static final Comparator<? super Spans> SPANS_COMPARATOR = (o1, o2) -> {
|
||||
|
@ -59,10 +92,11 @@ public class PhraseHelper {
|
|||
}
|
||||
};
|
||||
|
||||
private final String fieldName; // if non-null, only look at queries/terms for this field
|
||||
private final String fieldName;
|
||||
private final Set<Term> positionInsensitiveTerms; // (TermQuery terms)
|
||||
private final Set<SpanQuery> spanQueries;
|
||||
private final boolean willRewrite;
|
||||
private final Predicate<String> fieldMatcher;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
|
@ -73,14 +107,15 @@ public class PhraseHelper {
|
|||
* to be set before the {@link WeightedSpanTermExtractor}'s extraction is invoked.
|
||||
* {@code ignoreQueriesNeedingRewrite} effectively ignores any query clause that needs to be "rewritten", which is
|
||||
* usually limited to just a {@link SpanMultiTermQueryWrapper} but could be other custom ones.
|
||||
* {@code fieldMatcher} The field name predicate to use for extracting the query part that must be highlighted.
|
||||
*/
|
||||
public PhraseHelper(Query query, String field, Function<SpanQuery, Boolean> rewriteQueryPred,
|
||||
public PhraseHelper(Query query, String field, Predicate<String> fieldMatcher, Function<SpanQuery, Boolean> rewriteQueryPred,
|
||||
Function<Query, Collection<Query>> preExtractRewriteFunction,
|
||||
boolean ignoreQueriesNeedingRewrite) {
|
||||
this.fieldName = field; // if null then don't require field match
|
||||
this.fieldName = field;
|
||||
this.fieldMatcher = fieldMatcher;
|
||||
// filter terms to those we want
|
||||
positionInsensitiveTerms = field != null ? new FieldFilteringTermHashSet(field) : new HashSet<>();
|
||||
// requireFieldMatch optional
|
||||
positionInsensitiveTerms = new FieldFilteringTermSet();
|
||||
spanQueries = new HashSet<>();
|
||||
|
||||
// TODO Have toSpanQuery(query) Function as an extension point for those with custom Query impls
|
||||
|
@ -131,11 +166,11 @@ public class PhraseHelper {
|
|||
@Override
|
||||
protected void extractWeightedSpanTerms(Map<String, WeightedSpanTerm> terms, SpanQuery spanQuery,
|
||||
float boost) throws IOException {
|
||||
if (field != null) {
|
||||
// if this span query isn't for this field, skip it.
|
||||
Set<String> fieldNameSet = new HashSet<>();//TODO reuse. note: almost always size 1
|
||||
collectSpanQueryFields(spanQuery, fieldNameSet);
|
||||
if (!fieldNameSet.contains(field)) {
|
||||
// if this span query isn't for this field, skip it.
|
||||
Set<String> fieldNameSet = new HashSet<>();//TODO reuse. note: almost always size 1
|
||||
collectSpanQueryFields(spanQuery, fieldNameSet);
|
||||
for (String spanField : fieldNameSet) {
|
||||
if (!fieldMatcher.test(spanField)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -190,10 +225,11 @@ public class PhraseHelper {
|
|||
if (spanQueries.isEmpty()) {
|
||||
return Collections.emptyMap();
|
||||
}
|
||||
final LeafReader filteredReader = new SingleFieldFilterLeafReader(leafReader, fieldName);
|
||||
// for each SpanQuery, collect the member spans into a map.
|
||||
Map<BytesRef, Spans> result = new HashMap<>();
|
||||
for (SpanQuery spanQuery : spanQueries) {
|
||||
getTermToSpans(spanQuery, leafReader.getContext(), doc, result);
|
||||
getTermToSpans(spanQuery, filteredReader.getContext(), doc, result);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -203,15 +239,14 @@ public class PhraseHelper {
|
|||
int doc, Map<BytesRef, Spans> result)
|
||||
throws IOException {
|
||||
// note: in WSTE there was some field specific looping that seemed pointless so that isn't here.
|
||||
final IndexSearcher searcher = new IndexSearcher(readerContext);
|
||||
final IndexSearcher searcher = new IndexSearcher(readerContext.reader());
|
||||
searcher.setQueryCache(null);
|
||||
if (willRewrite) {
|
||||
spanQuery = (SpanQuery) searcher.rewrite(spanQuery); // searcher.rewrite loops till done
|
||||
}
|
||||
|
||||
// Get the underlying query terms
|
||||
|
||||
TreeSet<Term> termSet = new TreeSet<>(); // sorted so we can loop over results in order shortly...
|
||||
TreeSet<Term> termSet = new FieldFilteringTermSet(); // sorted so we can loop over results in order shortly...
|
||||
searcher.createWeight(spanQuery, false, 1.0f).extractTerms(termSet);//needsScores==false
|
||||
|
||||
// Get Spans by running the query against the reader
|
||||
|
@ -240,9 +275,6 @@ public class PhraseHelper {
|
|||
for (final Term queryTerm : termSet) {
|
||||
// note: we expect that at least one query term will pass these filters. This is because the collected
|
||||
// spanQuery list were already filtered by these conditions.
|
||||
if (fieldName != null && fieldName.equals(queryTerm.field()) == false) {
|
||||
continue;
|
||||
}
|
||||
if (positionInsensitiveTerms.contains(queryTerm)) {
|
||||
continue;
|
||||
}
|
||||
|
@ -375,19 +407,17 @@ public class PhraseHelper {
|
|||
}
|
||||
|
||||
/**
|
||||
* Simple HashSet that filters out Terms not matching a desired field on {@code add()}.
|
||||
* Simple TreeSet that filters out Terms not matching the provided predicate on {@code add()}.
|
||||
*/
|
||||
private static class FieldFilteringTermHashSet extends HashSet<Term> {
|
||||
private final String field;
|
||||
|
||||
FieldFilteringTermHashSet(String field) {
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
private class FieldFilteringTermSet extends TreeSet<Term> {
|
||||
@Override
|
||||
public boolean add(Term term) {
|
||||
if (term.field().equals(field)) {
|
||||
return super.add(term);
|
||||
if (fieldMatcher.test(term.field())) {
|
||||
if (term.field().equals(fieldName)) {
|
||||
return super.add(term);
|
||||
} else {
|
||||
return super.add(new Term(fieldName, term.bytes()));
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
@ -499,6 +529,64 @@ public class PhraseHelper {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This reader will just delegate every call to a single field in the wrapped
|
||||
* LeafReader. This way we ensure that all queries going through this reader target the same field.
|
||||
*/
|
||||
static final class SingleFieldFilterLeafReader extends FilterLeafReader {
|
||||
final String fieldName;
|
||||
SingleFieldFilterLeafReader(LeafReader in, String fieldName) {
|
||||
super(in);
|
||||
this.fieldName = fieldName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldInfos getFieldInfos() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
return new FilterFields(super.fields()) {
|
||||
@Override
|
||||
public Terms terms(String field) throws IOException {
|
||||
return super.terms(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<String> iterator() {
|
||||
return Collections.singletonList(fieldName).iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return 1;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNumericDocValues(String field) throws IOException {
|
||||
return super.getNumericDocValues(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
|
||||
return super.getBinaryDocValues(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedDocValues getSortedDocValues(String field) throws IOException {
|
||||
return super.getSortedDocValues(fieldName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public NumericDocValues getNormValues(String field) throws IOException {
|
||||
return super.getNormValues(fieldName);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* A Spans based on a list of cached spans for one doc. It is pre-positioned to this doc.
|
||||
*/
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.Arrays;
|
|||
import java.util.Collection;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
@ -31,6 +32,7 @@ import java.util.Objects;
|
|||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
import java.util.TreeSet;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
|
@ -58,7 +60,6 @@ import org.apache.lucene.search.Weight;
|
|||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
|
||||
/**
|
||||
|
@ -119,13 +120,13 @@ public class UnifiedHighlighter {
|
|||
|
||||
private boolean defaultPassageRelevancyOverSpeed = true; //For analysis, prefer MemoryIndexOffsetStrategy
|
||||
|
||||
// private boolean defaultRequireFieldMatch = true; TODO
|
||||
|
||||
private int maxLength = DEFAULT_MAX_LENGTH;
|
||||
|
||||
// BreakIterator is stateful so we use a Supplier factory method
|
||||
private Supplier<BreakIterator> defaultBreakIterator = () -> BreakIterator.getSentenceInstance(Locale.ROOT);
|
||||
|
||||
private Predicate<String> defaultFieldMatcher;
|
||||
|
||||
private PassageScorer defaultScorer = new PassageScorer();
|
||||
|
||||
private PassageFormatter defaultFormatter = new DefaultPassageFormatter();
|
||||
|
@ -140,8 +141,8 @@ public class UnifiedHighlighter {
|
|||
/**
|
||||
* Calls {@link Weight#extractTerms(Set)} on an empty index for the query.
|
||||
*/
|
||||
protected static SortedSet<Term> extractTerms(Query query) throws IOException {
|
||||
SortedSet<Term> queryTerms = new TreeSet<>();
|
||||
protected static Set<Term> extractTerms(Query query) throws IOException {
|
||||
Set<Term> queryTerms = new HashSet<>();
|
||||
EMPTY_INDEXSEARCHER.createNormalizedWeight(query, false).extractTerms(queryTerms);
|
||||
return queryTerms;
|
||||
}
|
||||
|
@ -197,6 +198,10 @@ public class UnifiedHighlighter {
|
|||
this.cacheFieldValCharsThreshold = cacheFieldValCharsThreshold;
|
||||
}
|
||||
|
||||
public void setFieldMatcher(Predicate<String> predicate) {
|
||||
this.defaultFieldMatcher = predicate;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether {@link MultiTermQuery} derivatives will be highlighted. By default it's enabled. MTQ
|
||||
* highlighting can be expensive, particularly when using offsets in postings.
|
||||
|
@ -220,6 +225,18 @@ public class UnifiedHighlighter {
|
|||
return defaultPassageRelevancyOverSpeed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the predicate to use for extracting the query part that must be highlighted.
|
||||
* By default only queries that target the current field are kept. (AKA requireFieldMatch)
|
||||
*/
|
||||
protected Predicate<String> getFieldMatcher(String field) {
|
||||
if (defaultFieldMatcher != null) {
|
||||
return defaultFieldMatcher;
|
||||
} else {
|
||||
// requireFieldMatch = true
|
||||
return (qf) -> field.equals(qf);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The maximum content size to process. Content will be truncated to this size before highlighting. Typically
|
||||
|
@ -548,7 +565,7 @@ public class UnifiedHighlighter {
|
|||
copyAndSortFieldsWithMaxPassages(fieldsIn, maxPassagesIn, fields, maxPassages); // latter 2 are "out" params
|
||||
|
||||
// Init field highlighters (where most of the highlight logic lives, and on a per field basis)
|
||||
SortedSet<Term> queryTerms = extractTerms(query);
|
||||
Set<Term> queryTerms = extractTerms(query);
|
||||
FieldHighlighter[] fieldHighlighters = new FieldHighlighter[fields.length];
|
||||
int numTermVectors = 0;
|
||||
int numPostings = 0;
|
||||
|
@ -718,13 +735,13 @@ public class UnifiedHighlighter {
|
|||
getClass().getSimpleName() + " without an IndexSearcher.");
|
||||
}
|
||||
Objects.requireNonNull(content, "content is required");
|
||||
SortedSet<Term> queryTerms = extractTerms(query);
|
||||
Set<Term> queryTerms = extractTerms(query);
|
||||
return getFieldHighlighter(field, query, queryTerms, maxPassages)
|
||||
.highlightFieldForDoc(null, -1, content);
|
||||
}
|
||||
|
||||
protected FieldHighlighter getFieldHighlighter(String field, Query query, SortedSet<Term> allTerms, int maxPassages) {
|
||||
BytesRef[] terms = filterExtractedTerms(field, allTerms);
|
||||
protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
|
||||
BytesRef[] terms = filterExtractedTerms(getFieldMatcher(field), allTerms);
|
||||
Set<HighlightFlag> highlightFlags = getFlags(field);
|
||||
PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags);
|
||||
CharacterRunAutomaton[] automata = getAutomata(field, query, highlightFlags);
|
||||
|
@ -738,19 +755,15 @@ public class UnifiedHighlighter {
|
|||
getFormatter(field));
|
||||
}
|
||||
|
||||
protected static BytesRef[] filterExtractedTerms(String field, SortedSet<Term> queryTerms) {
|
||||
// TODO consider requireFieldMatch
|
||||
Term floor = new Term(field, "");
|
||||
Term ceiling = new Term(field, UnicodeUtil.BIG_TERM);
|
||||
SortedSet<Term> fieldTerms = queryTerms.subSet(floor, ceiling);
|
||||
|
||||
// Strip off the redundant field:
|
||||
BytesRef[] terms = new BytesRef[fieldTerms.size()];
|
||||
int termUpto = 0;
|
||||
for (Term term : fieldTerms) {
|
||||
terms[termUpto++] = term.bytes();
|
||||
protected static BytesRef[] filterExtractedTerms(Predicate<String> fieldMatcher, Set<Term> queryTerms) {
|
||||
// Strip off the redundant field and sort the remaining terms
|
||||
SortedSet<BytesRef> filteredTerms = new TreeSet<>();
|
||||
for (Term term : queryTerms) {
|
||||
if (fieldMatcher.test(term.field())) {
|
||||
filteredTerms.add(term.bytes());
|
||||
}
|
||||
}
|
||||
return terms;
|
||||
return filteredTerms.toArray(new BytesRef[filteredTerms.size()]);
|
||||
}
|
||||
|
||||
protected Set<HighlightFlag> getFlags(String field) {
|
||||
|
@ -771,14 +784,13 @@ public class UnifiedHighlighter {
|
|||
boolean highlightPhrasesStrictly = highlightFlags.contains(HighlightFlag.PHRASES);
|
||||
boolean handleMultiTermQuery = highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY);
|
||||
return highlightPhrasesStrictly ?
|
||||
new PhraseHelper(query, field, this::requiresRewrite, this::preSpanQueryRewrite, !handleMultiTermQuery) :
|
||||
PhraseHelper.NONE;
|
||||
new PhraseHelper(query, field, getFieldMatcher(field),
|
||||
this::requiresRewrite, this::preSpanQueryRewrite, !handleMultiTermQuery) : PhraseHelper.NONE;
|
||||
}
|
||||
|
||||
protected CharacterRunAutomaton[] getAutomata(String field, Query query, Set<HighlightFlag> highlightFlags) {
|
||||
return highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
|
||||
? MultiTermHighlighting.extractAutomata(query, field, !highlightFlags.contains(HighlightFlag.PHRASES),
|
||||
this::preMultiTermQueryRewrite)
|
||||
? MultiTermHighlighting.extractAutomata(query, getFieldMatcher(field), !highlightFlags.contains(HighlightFlag.PHRASES), this::preMultiTermQueryRewrite)
|
||||
: ZERO_LEN_AUTOMATA_ARRAY;
|
||||
}
|
||||
|
||||
|
@ -826,7 +838,7 @@ public class UnifiedHighlighter {
|
|||
//skip using a memory index since it's pure term filtering
|
||||
return new TokenStreamOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer());
|
||||
} else {
|
||||
return new MemoryIndexOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer(),
|
||||
return new MemoryIndexOffsetStrategy(field, getFieldMatcher(field), terms, phraseHelper, automata, getIndexAnalyzer(),
|
||||
this::preMultiTermQueryRewrite);
|
||||
}
|
||||
case NONE_NEEDED:
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.Arrays;
|
|||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
|
@ -32,14 +33,17 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
|
@ -959,4 +963,275 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
ir.close();
|
||||
}
|
||||
|
||||
private IndexReader indexSomeFields() throws IOException {
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
|
||||
FieldType ft = new FieldType();
|
||||
ft.setIndexOptions(IndexOptions.NONE);
|
||||
ft.setTokenized(false);
|
||||
ft.setStored(true);
|
||||
ft.freeze();
|
||||
|
||||
Field title = new Field("title", "", fieldType);
|
||||
Field text = new Field("text", "", fieldType);
|
||||
Field category = new Field("category", "", fieldType);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(title);
|
||||
doc.add(text);
|
||||
doc.add(category);
|
||||
title.setStringValue("This is the title field.");
|
||||
text.setStringValue("This is the text field. You can put some text if you want.");
|
||||
category.setStringValue("This is the category field.");
|
||||
iw.addDocument(doc);
|
||||
|
||||
IndexReader ir = iw.getReader();
|
||||
iw.close();
|
||||
return ir;
|
||||
}
|
||||
|
||||
public void testFieldMatcherTermQuery() throws Exception {
|
||||
IndexReader ir = indexSomeFields();
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected Predicate<String> getFieldMatcher(String field) {
|
||||
// requireFieldMatch=false
|
||||
return (qf) -> true;
|
||||
}
|
||||
};
|
||||
UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
|
||||
BooleanQuery.Builder queryBuilder =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new TermQuery(new Term("text", "some")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("text", "field")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("text", "this")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("title", "is")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("title", "this")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("category", "this")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("category", "some")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("category", "category")), BooleanClause.Occur.SHOULD);
|
||||
Query query = queryBuilder.build();
|
||||
|
||||
// title
|
||||
{
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the title <b>field</b>.", snippets[0]);
|
||||
|
||||
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
|
||||
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> is the title <b>field</b>.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
}
|
||||
|
||||
// text
|
||||
{
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
|
||||
|
||||
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> is the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
|
||||
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the text field. ", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
}
|
||||
|
||||
// category
|
||||
{
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the <b>category</b> <b>field</b>.", snippets[0]);
|
||||
|
||||
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> is the <b>category</b> field.", snippets[0]);
|
||||
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
|
||||
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
}
|
||||
ir.close();
|
||||
}
|
||||
|
||||
public void testFieldMatcherMultiTermQuery() throws Exception {
|
||||
IndexReader ir = indexSomeFields();
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected Predicate<String> getFieldMatcher(String field) {
|
||||
// requireFieldMatch=false
|
||||
return (qf) -> true;
|
||||
}
|
||||
};
|
||||
UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
|
||||
BooleanQuery.Builder queryBuilder =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new FuzzyQuery(new Term("text", "sime"), 1), BooleanClause.Occur.SHOULD)
|
||||
.add(new PrefixQuery(new Term("text", "fie")), BooleanClause.Occur.SHOULD)
|
||||
.add(new PrefixQuery(new Term("text", "thi")), BooleanClause.Occur.SHOULD)
|
||||
.add(new TermQuery(new Term("title", "is")), BooleanClause.Occur.SHOULD)
|
||||
.add(new PrefixQuery(new Term("title", "thi")), BooleanClause.Occur.SHOULD)
|
||||
.add(new PrefixQuery(new Term("category", "thi")), BooleanClause.Occur.SHOULD)
|
||||
.add(new FuzzyQuery(new Term("category", "sime"), 1), BooleanClause.Occur.SHOULD)
|
||||
.add(new PrefixQuery(new Term("category", "categ")), BooleanClause.Occur.SHOULD);
|
||||
Query query = queryBuilder.build();
|
||||
|
||||
// title
|
||||
{
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the title <b>field</b>.", snippets[0]);
|
||||
|
||||
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
|
||||
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> is the title <b>field</b>.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
}
|
||||
|
||||
// text
|
||||
{
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
|
||||
|
||||
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> is the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
|
||||
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the text field. ", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
}
|
||||
|
||||
// category
|
||||
{
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the <b>category</b> <b>field</b>.", snippets[0]);
|
||||
|
||||
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> is the <b>category</b> field.", snippets[0]);
|
||||
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
|
||||
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
}
|
||||
ir.close();
|
||||
}
|
||||
|
||||
public void testFieldMatcherPhraseQuery() throws Exception {
|
||||
IndexReader ir = indexSomeFields();
|
||||
IndexSearcher searcher = newSearcher(ir);
|
||||
UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {
|
||||
@Override
|
||||
protected Predicate<String> getFieldMatcher(String field) {
|
||||
// requireFieldMatch=false
|
||||
return (qf) -> true;
|
||||
}
|
||||
};
|
||||
UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
|
||||
BooleanQuery.Builder queryBuilder =
|
||||
new BooleanQuery.Builder()
|
||||
.add(new PhraseQuery("title", "this", "is", "the", "title"), BooleanClause.Occur.SHOULD)
|
||||
.add(new PhraseQuery(2, "category", "this", "is", "the", "field"), BooleanClause.Occur.SHOULD)
|
||||
.add(new PhraseQuery("text", "this", "is"), BooleanClause.Occur.SHOULD)
|
||||
.add(new PhraseQuery("category", "this", "is"), BooleanClause.Occur.SHOULD)
|
||||
.add(new PhraseQuery(1, "text", "you", "can", "put", "text"), BooleanClause.Occur.SHOULD);
|
||||
Query query = queryBuilder.build();
|
||||
|
||||
// title
|
||||
{
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> <b>field</b>.", snippets[0]);
|
||||
|
||||
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> field.", snippets[0]);
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
|
||||
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
}
|
||||
|
||||
// text
|
||||
{
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>text</b> <b>field</b>. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
|
||||
|
||||
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the <b>text</b> field. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
|
||||
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("This is the text field. You can put some text if you want.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
}
|
||||
|
||||
// category
|
||||
{
|
||||
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
|
||||
assertEquals(1, topDocs.totalHits);
|
||||
String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
|
||||
|
||||
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
|
||||
|
||||
|
||||
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
|
||||
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
|
||||
highlighterFieldMatch.setFieldMatcher(null);
|
||||
}
|
||||
ir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Collections;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.SortedSet;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
|
@ -144,7 +143,7 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected FieldHighlighter getFieldHighlighter(String field, Query query, SortedSet<Term> allTerms, int maxPassages) {
|
||||
protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
|
||||
return super.getFieldHighlighter(field, query, allTerms, maxPassages);
|
||||
}
|
||||
|
||||
|
|
|
@ -152,6 +152,9 @@ New Features
|
|||
|
||||
* SOLR-9728: Ability to specify Key Store type in solr.in.sh file for SSL (Michael Suzuki, Kevin Risden)
|
||||
|
||||
* SOLR-5043: New solr.dns.prevent.reverse.lookup system property that can be used to prevent long core
|
||||
(re)load delays on systems with missconfigured hostname/DNS (hossman)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
|
||||
|
@ -220,6 +223,8 @@ Bug Fixes
|
|||
|
||||
* SOLR-9616: Solr throws exception when expand=true on empty index (Timo Hund via Ishan Chattopadhyaya)
|
||||
|
||||
* SOLR-9832: Schema modifications are not immediately visible on the coordinating node. (Steve Rowe)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
@ -257,6 +262,10 @@ Other Changes
|
|||
|
||||
* SOLR-9819: Upgrade commons-fileupload to 1.3.2, fixing a potential vulnerability CVE-2016-3092 (Anshum Gupta)
|
||||
|
||||
* SOLR-9827: ConcurrentUpdateSolrClient creates a RemoteSolrException if the remote host responded with a non-ok
|
||||
response (instead of a SolrException) and includes the remote error message as part of the exception message
|
||||
(Tomás Fernández Löbbe)
|
||||
|
||||
================== 6.3.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||
|
|
|
@ -119,7 +119,6 @@ import org.apache.solr.schema.FieldType;
|
|||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.IndexSchemaFactory;
|
||||
import org.apache.solr.schema.ManagedIndexSchema;
|
||||
import org.apache.solr.schema.SchemaManager;
|
||||
import org.apache.solr.schema.SimilarityFactory;
|
||||
import org.apache.solr.search.QParserPlugin;
|
||||
import org.apache.solr.search.SolrFieldCacheMBean;
|
||||
|
@ -2720,13 +2719,6 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||
if (checkStale(zkClient, overlayPath, solrConfigversion) ||
|
||||
checkStale(zkClient, solrConfigPath, overlayVersion) ||
|
||||
checkStale(zkClient, managedSchmaResourcePath, managedSchemaVersion)) {
|
||||
|
||||
try (SolrCore solrCore = cc.solrCores.getCoreFromAnyList(coreName, true)) {
|
||||
solrCore.setLatestSchema(SchemaManager.getFreshManagedSchema(solrCore));
|
||||
} catch (Exception e) {
|
||||
log.warn("", SolrZkClient.checkInterrupted(e));
|
||||
}
|
||||
|
||||
log.info("core reload {}", coreName);
|
||||
try {
|
||||
cc.reload(coreName);
|
||||
|
|
|
@ -31,7 +31,6 @@ import java.lang.management.PlatformManagedObject;
|
|||
import java.lang.management.RuntimeMXBean;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.net.InetAddress;
|
||||
import java.net.UnknownHostException;
|
||||
import java.nio.charset.Charset;
|
||||
import java.text.DecimalFormat;
|
||||
import java.text.DecimalFormatSymbols;
|
||||
|
@ -50,6 +49,8 @@ import org.apache.solr.handler.RequestHandlerBase;
|
|||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.util.RTimer;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -65,6 +66,20 @@ public class SystemInfoHandler extends RequestHandlerBase
|
|||
{
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Undocumented expert level system property to prevent doing a reverse lookup of our hostname.
|
||||
* This property ill be logged as a suggested workaround if any probems are noticed when doing reverse
|
||||
* lookup.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* TODO: should we refactor this (and the associated logic) into a helper method for any other places
|
||||
* where DNS is used?
|
||||
* </p>
|
||||
* @see #initHostname
|
||||
*/
|
||||
private static final String PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP = "solr.dns.prevent.reverse.lookup";
|
||||
|
||||
// on some platforms, resolving canonical hostname can cause the thread
|
||||
// to block for several seconds if nameservices aren't available
|
||||
|
@ -75,22 +90,42 @@ public class SystemInfoHandler extends RequestHandlerBase
|
|||
private CoreContainer cc;
|
||||
|
||||
public SystemInfoHandler() {
|
||||
super();
|
||||
init();
|
||||
this(null);
|
||||
}
|
||||
|
||||
public SystemInfoHandler(CoreContainer cc) {
|
||||
super();
|
||||
this.cc = cc;
|
||||
init();
|
||||
initHostname();
|
||||
}
|
||||
|
||||
private void init() {
|
||||
private void initHostname() {
|
||||
if (null != System.getProperty(PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP, null)) {
|
||||
log.info("Resolving canonical hostname for local host prevented due to '{}' sysprop",
|
||||
PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP);
|
||||
hostname = null;
|
||||
return;
|
||||
}
|
||||
|
||||
RTimer timer = new RTimer();
|
||||
try {
|
||||
InetAddress addr = InetAddress.getLocalHost();
|
||||
hostname = addr.getCanonicalHostName();
|
||||
} catch (UnknownHostException e) {
|
||||
//default to null
|
||||
} catch (Exception e) {
|
||||
log.warn("Unable to resolve canonical hostname for local host, possible DNS misconfiguration. " +
|
||||
"Set the '"+PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP+"' sysprop to true on startup to " +
|
||||
"prevent future lookups if DNS can not be fixed.", e);
|
||||
hostname = null;
|
||||
return;
|
||||
}
|
||||
timer.stop();
|
||||
|
||||
if (15000D < timer.getTime()) {
|
||||
String readableTime = String.format(Locale.ROOT, "%.3f", (timer.getTime() / 1000));
|
||||
log.warn("Resolving canonical hostname for local host took {} seconds, possible DNS misconfiguration. " +
|
||||
"Set the '{}' sysprop to true on startup to prevent future lookups if DNS can not be fixed.",
|
||||
readableTime, PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -377,6 +377,18 @@ public class ManagedIndexSchemaFactory extends IndexSchemaFactory implements Sol
|
|||
this.zkIndexSchemaReader = new ZkIndexSchemaReader(this, core);
|
||||
ZkSolrResourceLoader zkLoader = (ZkSolrResourceLoader)loader;
|
||||
zkLoader.setZkIndexSchemaReader(this.zkIndexSchemaReader);
|
||||
try {
|
||||
zkIndexSchemaReader.refreshSchemaFromZk(-1); // update immediately if newer is available
|
||||
core.setLatestSchema(getSchema());
|
||||
} catch (KeeperException e) {
|
||||
String msg = "Error attempting to access " + zkLoader.getConfigSetZkPath() + "/" + managedSchemaResourceName;
|
||||
log.error(msg, e);
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, msg, e);
|
||||
} catch (InterruptedException e) {
|
||||
// Restore the interrupted status
|
||||
Thread.currentThread().interrupt();
|
||||
log.warn("", e);
|
||||
}
|
||||
} else {
|
||||
this.zkIndexSchemaReader = null;
|
||||
}
|
||||
|
|
|
@ -133,8 +133,8 @@ public class SchemaManager {
|
|||
try {
|
||||
int latestVersion = ZkController.persistConfigResourceToZooKeeper(zkLoader, managedIndexSchema.getSchemaZkVersion(),
|
||||
managedIndexSchema.getResourceName(), sw.toString().getBytes(StandardCharsets.UTF_8), true);
|
||||
req.getCore().getCoreDescriptor().getCoreContainer().reload(req.getCore().getName());
|
||||
waitForOtherReplicasToUpdate(timeOut, latestVersion);
|
||||
core.setLatestSchema(managedIndexSchema);
|
||||
return Collections.emptyList();
|
||||
} catch (ZkController.ResourceModifiedInZkException e) {
|
||||
log.info("Schema was modified by another node. Retrying..");
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.schema;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.solr.client.solrj.SolrClient;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
|
||||
import org.apache.solr.client.solrj.request.schema.SchemaRequest;
|
||||
import org.apache.solr.client.solrj.response.schema.SchemaResponse;
|
||||
import org.apache.solr.cloud.SolrCloudTestCase;
|
||||
import org.apache.solr.common.cloud.DocCollection;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
public class ManagedSchemaRoundRobinCloudTest extends SolrCloudTestCase {
|
||||
private static final String COLLECTION = "managed_coll";
|
||||
private static final String CONFIG = "cloud-managed";
|
||||
private static final String FIELD_PREFIX = "NumberedField_";
|
||||
private static final int NUM_SHARDS = 2;
|
||||
private static final int NUM_FIELDS_TO_ADD = 10;
|
||||
|
||||
@BeforeClass
|
||||
public static void setupCluster() throws Exception {
|
||||
System.setProperty("managed.schema.mutable", "true");
|
||||
configureCluster(NUM_SHARDS).addConfig(CONFIG, configset(CONFIG)).configure();
|
||||
CollectionAdminRequest.createCollection(COLLECTION, CONFIG, NUM_SHARDS, 1)
|
||||
.setMaxShardsPerNode(1)
|
||||
.process(cluster.getSolrClient());
|
||||
cluster.getSolrClient().waitForState(COLLECTION, DEFAULT_TIMEOUT, TimeUnit.SECONDS,
|
||||
(n, c) -> DocCollection.isFullyActive(n, c, NUM_SHARDS, 1));
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void clearSysProps() throws Exception {
|
||||
System.clearProperty("managed.schema.mutable");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAddFieldsRoundRobin() throws Exception {
|
||||
List<HttpSolrClient> clients = new ArrayList<>(NUM_SHARDS);
|
||||
try {
|
||||
for (int shardNum = 0 ; shardNum < NUM_SHARDS ; ++shardNum) {
|
||||
clients.add(getHttpSolrClient(cluster.getJettySolrRunners().get(shardNum).getBaseUrl().toString()));
|
||||
}
|
||||
int shardNum = 0;
|
||||
for (int fieldNum = 0 ; fieldNum < NUM_FIELDS_TO_ADD ; ++fieldNum) {
|
||||
addField(clients.get(shardNum), keyValueArrayToMap("name", FIELD_PREFIX + fieldNum, "type", "string"));
|
||||
if (++shardNum == NUM_SHARDS) {
|
||||
shardNum = 0;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
for (int shardNum = 0 ; shardNum < NUM_SHARDS ; ++shardNum) {
|
||||
clients.get(shardNum).close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void addField(SolrClient client, Map<String,Object> field) throws Exception {
|
||||
SchemaResponse.UpdateResponse addFieldResponse = new SchemaRequest.AddField(field).process(client, COLLECTION);
|
||||
assertNotNull(addFieldResponse);
|
||||
assertEquals(0, addFieldResponse.getStatus());
|
||||
assertNull(addFieldResponse.getResponse().get("errors"));
|
||||
String fieldName = field.get("name").toString();
|
||||
SchemaResponse.FieldResponse fieldResponse = new SchemaRequest.Field(fieldName).process(client, COLLECTION);
|
||||
assertNotNull(fieldResponse);
|
||||
assertEquals(0, fieldResponse.getStatus());
|
||||
}
|
||||
|
||||
private Map<String,Object> keyValueArrayToMap(String... alternatingKeysAndValues) {
|
||||
Map<String,Object> map = new HashMap<>();
|
||||
for (int i = 0 ; i < alternatingKeysAndValues.length ; i += 2)
|
||||
map.put(alternatingKeysAndValues[i], alternatingKeysAndValues[i + 1]);
|
||||
return map;
|
||||
}
|
||||
}
|
|
@ -46,7 +46,6 @@ import org.apache.solr.client.solrj.request.RequestWriter;
|
|||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.client.solrj.util.ClientUtils;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
|
@ -330,7 +329,8 @@ public class ConcurrentUpdateSolrClient extends SolrClient {
|
|||
msg.append("\n\n\n\n");
|
||||
msg.append("request: ").append(method.getURI());
|
||||
|
||||
SolrException solrExc = new SolrException(ErrorCode.getErrorCode(statusCode), msg.toString());
|
||||
SolrException solrExc;
|
||||
NamedList<String> metadata = null;
|
||||
// parse out the metadata from the SolrException
|
||||
try {
|
||||
String encoding = "UTF-8"; // default
|
||||
|
@ -343,11 +343,21 @@ public class ConcurrentUpdateSolrClient extends SolrClient {
|
|||
NamedList<Object> resp = client.parser.processResponse(rspBody, encoding);
|
||||
NamedList<Object> error = (NamedList<Object>) resp.get("error");
|
||||
if (error != null) {
|
||||
solrExc.setMetadata((NamedList<String>) error.get("metadata"));
|
||||
metadata = (NamedList<String>) error.get("metadata");
|
||||
String remoteMsg = (String) error.get("msg");
|
||||
if (remoteMsg != null) {
|
||||
msg.append("\nRemote error message: ");
|
||||
msg.append(remoteMsg);
|
||||
}
|
||||
}
|
||||
} catch (Exception exc) {
|
||||
// don't want to fail to report error if parsing the response fails
|
||||
log.warn("Failed to parse error response from " + client.getBaseURL() + " due to: " + exc);
|
||||
} finally {
|
||||
solrExc = new HttpSolrClient.RemoteSolrException(client.getBaseURL(), statusCode, msg.toString(), null);
|
||||
if (metadata != null) {
|
||||
solrExc.setMetadata(metadata);
|
||||
}
|
||||
}
|
||||
|
||||
handleError(solrExc);
|
||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
|
|||
import org.apache.solr.client.solrj.embedded.SolrExampleStreamingTest.ErrorTrackingConcurrentUpdateSolrClient;
|
||||
import org.apache.solr.client.solrj.impl.BinaryResponseParser;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient.RemoteSolrException;
|
||||
import org.apache.solr.client.solrj.impl.NoOpResponseParser;
|
||||
import org.apache.solr.client.solrj.impl.XMLResponseParser;
|
||||
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
|
||||
|
@ -464,6 +465,10 @@ abstract public class SolrExampleTests extends SolrExampleTestsBase
|
|||
concurrentClient.add(doc);
|
||||
concurrentClient.blockUntilFinished();
|
||||
assertNotNull("Should throw exception!", concurrentClient.lastError);
|
||||
assertEquals("Unexpected exception type",
|
||||
RemoteSolrException.class, concurrentClient.lastError.getClass());
|
||||
assertTrue("Unexpected exception message: " + concurrentClient.lastError.getMessage(),
|
||||
concurrentClient.lastError.getMessage().contains("Remote error message: Document contains multiple values for uniqueKey"));
|
||||
} else {
|
||||
log.info("Ignoring update test for client:" + client.getClass().getName());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue