Merge remote-tracking branch 'origin/master'

This commit is contained in:
Noble Paul 2016-12-07 18:42:39 +05:30
commit 10500c894d
15 changed files with 649 additions and 105 deletions

View File

@ -121,6 +121,10 @@ Improvements
control how text is analyzed and converted into a query (Matt Weber
via Mike McCandless)
* LUCENE-7575: UnifiedHighlighter can now highlight fields with queries that don't
necessarily refer to that field (AKA requireFieldMatch==false). Disabled by default.
See UH get/setFieldMatcher. (Jim Ferenczi via David Smiley)
Optimizations
* LUCENE-7568: Optimize merging when index sorting is used but the

View File

@ -23,6 +23,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.function.Function;
import java.util.function.Predicate;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.FilteringTokenFilter;
@ -49,7 +50,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
private final LeafReader leafReader;
private final CharacterRunAutomaton preMemIndexFilterAutomaton;
public MemoryIndexOffsetStrategy(String field, BytesRef[] extractedTerms, PhraseHelper phraseHelper,
public MemoryIndexOffsetStrategy(String field, Predicate<String> fieldMatcher, BytesRef[] extractedTerms, PhraseHelper phraseHelper,
CharacterRunAutomaton[] automata, Analyzer analyzer,
Function<Query, Collection<Query>> multiTermQueryRewrite) {
super(field, extractedTerms, phraseHelper, automata, analyzer);
@ -57,13 +58,14 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
leafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader(); // appears to be re-usable
// preFilter for MemoryIndex
preMemIndexFilterAutomaton = buildCombinedAutomaton(field, terms, this.automata, phraseHelper, multiTermQueryRewrite);
preMemIndexFilterAutomaton = buildCombinedAutomaton(fieldMatcher, terms, this.automata, phraseHelper, multiTermQueryRewrite);
}
/**
* Build one {@link CharacterRunAutomaton} matching any term the query might match.
*/
private static CharacterRunAutomaton buildCombinedAutomaton(String field, BytesRef[] terms,
private static CharacterRunAutomaton buildCombinedAutomaton(Predicate<String> fieldMatcher,
BytesRef[] terms,
CharacterRunAutomaton[] automata,
PhraseHelper strictPhrases,
Function<Query, Collection<Query>> multiTermQueryRewrite) {
@ -74,7 +76,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
Collections.addAll(allAutomata, automata);
for (SpanQuery spanQuery : strictPhrases.getSpanQueries()) {
Collections.addAll(allAutomata,
MultiTermHighlighting.extractAutomata(spanQuery, field, true, multiTermQueryRewrite));//true==lookInSpan
MultiTermHighlighting.extractAutomata(spanQuery, fieldMatcher, true, multiTermQueryRewrite));//true==lookInSpan
}
if (allAutomata.size() == 1) {

View File

@ -22,6 +22,7 @@ import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.function.Function;
import java.util.function.Predicate;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.AutomatonQuery;
@ -56,50 +57,52 @@ class MultiTermHighlighting {
}
/**
* Extracts all MultiTermQueries for {@code field}, and returns equivalent
* automata that will match terms.
* Extracts MultiTermQueries that match the provided field predicate.
* Returns equivalent automata that will match terms.
*/
public static CharacterRunAutomaton[] extractAutomata(Query query, String field, boolean lookInSpan,
public static CharacterRunAutomaton[] extractAutomata(Query query,
Predicate<String> fieldMatcher,
boolean lookInSpan,
Function<Query, Collection<Query>> preRewriteFunc) {
List<CharacterRunAutomaton> list = new ArrayList<>();
Collection<Query> customSubQueries = preRewriteFunc.apply(query);
if (customSubQueries != null) {
for (Query sub : customSubQueries) {
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (query instanceof BooleanQuery) {
for (BooleanClause clause : (BooleanQuery) query) {
if (!clause.isProhibited()) {
list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field, lookInSpan, preRewriteFunc)));
list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
}
}
} else if (query instanceof ConstantScoreQuery) {
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field, lookInSpan,
list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan,
preRewriteFunc)));
} else if (query instanceof DisjunctionMaxQuery) {
for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanOrQuery) {
for (Query sub : ((SpanOrQuery) query).getClauses()) {
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanNearQuery) {
for (Query sub : ((SpanNearQuery) query).getClauses()) {
list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
}
} else if (lookInSpan && query instanceof SpanNotQuery) {
list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field, lookInSpan,
list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), fieldMatcher, lookInSpan,
preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field, lookInSpan,
list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), fieldMatcher, lookInSpan,
preRewriteFunc)));
} else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field,
lookInSpan, preRewriteFunc)));
list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(),
fieldMatcher, lookInSpan, preRewriteFunc)));
} else if (query instanceof AutomatonQuery) {
final AutomatonQuery aq = (AutomatonQuery) query;
if (aq.getField().equals(field)) {
if (fieldMatcher.test(aq.getField())) {
list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
@Override
public String toString() {
@ -110,7 +113,7 @@ class MultiTermHighlighting {
} else if (query instanceof PrefixQuery) {
final PrefixQuery pq = (PrefixQuery) query;
Term prefix = pq.getPrefix();
if (prefix.field().equals(field)) {
if (fieldMatcher.test(prefix.field())) {
list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()),
Automata.makeAnyString())) {
@Override
@ -121,7 +124,7 @@ class MultiTermHighlighting {
}
} else if (query instanceof FuzzyQuery) {
final FuzzyQuery fq = (FuzzyQuery) query;
if (fq.getField().equals(field)) {
if (fieldMatcher.test(fq.getField())) {
String utf16 = fq.getTerm().text();
int termText[] = new int[utf16.codePointCount(0, utf16.length())];
for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
@ -142,7 +145,7 @@ class MultiTermHighlighting {
}
} else if (query instanceof TermRangeQuery) {
final TermRangeQuery tq = (TermRangeQuery) query;
if (tq.getField().equals(field)) {
if (fieldMatcher.test(tq.getField())) {
final CharsRef lowerBound;
if (tq.getLowerTerm() == null) {
lowerBound = null;

View File

@ -16,17 +16,50 @@
*/
package org.apache.lucene.search.uhighlight;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Set;
import java.util.TreeSet;
import java.util.function.Function;
import java.util.function.Predicate;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.highlight.WeightedSpanTerm;
import org.apache.lucene.search.highlight.WeightedSpanTermExtractor;
import org.apache.lucene.search.spans.*;
import org.apache.lucene.search.spans.SpanCollector;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.*;
import java.util.function.Function;
/**
* Helps the {@link FieldOffsetStrategy} with strict position highlighting (e.g. highlight phrases correctly).
* This is a stateful class holding information about the query, but it can (and is) re-used across highlighting
@ -40,7 +73,7 @@ import java.util.function.Function;
public class PhraseHelper {
public static final PhraseHelper NONE = new PhraseHelper(new MatchAllDocsQuery(), "_ignored_",
spanQuery -> null, query -> null, true);
(s) -> false, spanQuery -> null, query -> null, true);
//TODO it seems this ought to be a general thing on Spans?
private static final Comparator<? super Spans> SPANS_COMPARATOR = (o1, o2) -> {
@ -59,10 +92,11 @@ public class PhraseHelper {
}
};
private final String fieldName; // if non-null, only look at queries/terms for this field
private final String fieldName;
private final Set<Term> positionInsensitiveTerms; // (TermQuery terms)
private final Set<SpanQuery> spanQueries;
private final boolean willRewrite;
private final Predicate<String> fieldMatcher;
/**
* Constructor.
@ -73,14 +107,15 @@ public class PhraseHelper {
* to be set before the {@link WeightedSpanTermExtractor}'s extraction is invoked.
* {@code ignoreQueriesNeedingRewrite} effectively ignores any query clause that needs to be "rewritten", which is
* usually limited to just a {@link SpanMultiTermQueryWrapper} but could be other custom ones.
* {@code fieldMatcher} The field name predicate to use for extracting the query part that must be highlighted.
*/
public PhraseHelper(Query query, String field, Function<SpanQuery, Boolean> rewriteQueryPred,
public PhraseHelper(Query query, String field, Predicate<String> fieldMatcher, Function<SpanQuery, Boolean> rewriteQueryPred,
Function<Query, Collection<Query>> preExtractRewriteFunction,
boolean ignoreQueriesNeedingRewrite) {
this.fieldName = field; // if null then don't require field match
this.fieldName = field;
this.fieldMatcher = fieldMatcher;
// filter terms to those we want
positionInsensitiveTerms = field != null ? new FieldFilteringTermHashSet(field) : new HashSet<>();
// requireFieldMatch optional
positionInsensitiveTerms = new FieldFilteringTermSet();
spanQueries = new HashSet<>();
// TODO Have toSpanQuery(query) Function as an extension point for those with custom Query impls
@ -131,11 +166,11 @@ public class PhraseHelper {
@Override
protected void extractWeightedSpanTerms(Map<String, WeightedSpanTerm> terms, SpanQuery spanQuery,
float boost) throws IOException {
if (field != null) {
// if this span query isn't for this field, skip it.
Set<String> fieldNameSet = new HashSet<>();//TODO reuse. note: almost always size 1
collectSpanQueryFields(spanQuery, fieldNameSet);
if (!fieldNameSet.contains(field)) {
// if this span query isn't for this field, skip it.
Set<String> fieldNameSet = new HashSet<>();//TODO reuse. note: almost always size 1
collectSpanQueryFields(spanQuery, fieldNameSet);
for (String spanField : fieldNameSet) {
if (!fieldMatcher.test(spanField)) {
return;
}
}
@ -190,10 +225,11 @@ public class PhraseHelper {
if (spanQueries.isEmpty()) {
return Collections.emptyMap();
}
final LeafReader filteredReader = new SingleFieldFilterLeafReader(leafReader, fieldName);
// for each SpanQuery, collect the member spans into a map.
Map<BytesRef, Spans> result = new HashMap<>();
for (SpanQuery spanQuery : spanQueries) {
getTermToSpans(spanQuery, leafReader.getContext(), doc, result);
getTermToSpans(spanQuery, filteredReader.getContext(), doc, result);
}
return result;
}
@ -203,15 +239,14 @@ public class PhraseHelper {
int doc, Map<BytesRef, Spans> result)
throws IOException {
// note: in WSTE there was some field specific looping that seemed pointless so that isn't here.
final IndexSearcher searcher = new IndexSearcher(readerContext);
final IndexSearcher searcher = new IndexSearcher(readerContext.reader());
searcher.setQueryCache(null);
if (willRewrite) {
spanQuery = (SpanQuery) searcher.rewrite(spanQuery); // searcher.rewrite loops till done
}
// Get the underlying query terms
TreeSet<Term> termSet = new TreeSet<>(); // sorted so we can loop over results in order shortly...
TreeSet<Term> termSet = new FieldFilteringTermSet(); // sorted so we can loop over results in order shortly...
searcher.createWeight(spanQuery, false, 1.0f).extractTerms(termSet);//needsScores==false
// Get Spans by running the query against the reader
@ -240,9 +275,6 @@ public class PhraseHelper {
for (final Term queryTerm : termSet) {
// note: we expect that at least one query term will pass these filters. This is because the collected
// spanQuery list were already filtered by these conditions.
if (fieldName != null && fieldName.equals(queryTerm.field()) == false) {
continue;
}
if (positionInsensitiveTerms.contains(queryTerm)) {
continue;
}
@ -375,19 +407,17 @@ public class PhraseHelper {
}
/**
* Simple HashSet that filters out Terms not matching a desired field on {@code add()}.
* Simple TreeSet that filters out Terms not matching the provided predicate on {@code add()}.
*/
private static class FieldFilteringTermHashSet extends HashSet<Term> {
private final String field;
FieldFilteringTermHashSet(String field) {
this.field = field;
}
private class FieldFilteringTermSet extends TreeSet<Term> {
@Override
public boolean add(Term term) {
if (term.field().equals(field)) {
return super.add(term);
if (fieldMatcher.test(term.field())) {
if (term.field().equals(fieldName)) {
return super.add(term);
} else {
return super.add(new Term(fieldName, term.bytes()));
}
} else {
return false;
}
@ -499,6 +529,64 @@ public class PhraseHelper {
}
}
/**
* This reader will just delegate every call to a single field in the wrapped
* LeafReader. This way we ensure that all queries going through this reader target the same field.
*/
static final class SingleFieldFilterLeafReader extends FilterLeafReader {
final String fieldName;
SingleFieldFilterLeafReader(LeafReader in, String fieldName) {
super(in);
this.fieldName = fieldName;
}
@Override
public FieldInfos getFieldInfos() {
throw new UnsupportedOperationException();
}
@Override
public Fields fields() throws IOException {
return new FilterFields(super.fields()) {
@Override
public Terms terms(String field) throws IOException {
return super.terms(fieldName);
}
@Override
public Iterator<String> iterator() {
return Collections.singletonList(fieldName).iterator();
}
@Override
public int size() {
return 1;
}
};
}
@Override
public NumericDocValues getNumericDocValues(String field) throws IOException {
return super.getNumericDocValues(fieldName);
}
@Override
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
return super.getBinaryDocValues(fieldName);
}
@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
return super.getSortedDocValues(fieldName);
}
@Override
public NumericDocValues getNormValues(String field) throws IOException {
return super.getNormValues(fieldName);
}
}
/**
* A Spans based on a list of cached spans for one doc. It is pre-positioned to this doc.
*/

View File

@ -24,6 +24,7 @@ import java.util.Arrays;
import java.util.Collection;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
@ -31,6 +32,7 @@ import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.function.Predicate;
import java.util.function.Supplier;
import org.apache.lucene.analysis.Analyzer;
@ -58,7 +60,6 @@ import org.apache.lucene.search.Weight;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
/**
@ -119,13 +120,13 @@ public class UnifiedHighlighter {
private boolean defaultPassageRelevancyOverSpeed = true; //For analysis, prefer MemoryIndexOffsetStrategy
// private boolean defaultRequireFieldMatch = true; TODO
private int maxLength = DEFAULT_MAX_LENGTH;
// BreakIterator is stateful so we use a Supplier factory method
private Supplier<BreakIterator> defaultBreakIterator = () -> BreakIterator.getSentenceInstance(Locale.ROOT);
private Predicate<String> defaultFieldMatcher;
private PassageScorer defaultScorer = new PassageScorer();
private PassageFormatter defaultFormatter = new DefaultPassageFormatter();
@ -140,8 +141,8 @@ public class UnifiedHighlighter {
/**
* Calls {@link Weight#extractTerms(Set)} on an empty index for the query.
*/
protected static SortedSet<Term> extractTerms(Query query) throws IOException {
SortedSet<Term> queryTerms = new TreeSet<>();
protected static Set<Term> extractTerms(Query query) throws IOException {
Set<Term> queryTerms = new HashSet<>();
EMPTY_INDEXSEARCHER.createNormalizedWeight(query, false).extractTerms(queryTerms);
return queryTerms;
}
@ -197,6 +198,10 @@ public class UnifiedHighlighter {
this.cacheFieldValCharsThreshold = cacheFieldValCharsThreshold;
}
public void setFieldMatcher(Predicate<String> predicate) {
this.defaultFieldMatcher = predicate;
}
/**
* Returns whether {@link MultiTermQuery} derivatives will be highlighted. By default it's enabled. MTQ
* highlighting can be expensive, particularly when using offsets in postings.
@ -220,6 +225,18 @@ public class UnifiedHighlighter {
return defaultPassageRelevancyOverSpeed;
}
/**
* Returns the predicate to use for extracting the query part that must be highlighted.
* By default only queries that target the current field are kept. (AKA requireFieldMatch)
*/
protected Predicate<String> getFieldMatcher(String field) {
if (defaultFieldMatcher != null) {
return defaultFieldMatcher;
} else {
// requireFieldMatch = true
return (qf) -> field.equals(qf);
}
}
/**
* The maximum content size to process. Content will be truncated to this size before highlighting. Typically
@ -548,7 +565,7 @@ public class UnifiedHighlighter {
copyAndSortFieldsWithMaxPassages(fieldsIn, maxPassagesIn, fields, maxPassages); // latter 2 are "out" params
// Init field highlighters (where most of the highlight logic lives, and on a per field basis)
SortedSet<Term> queryTerms = extractTerms(query);
Set<Term> queryTerms = extractTerms(query);
FieldHighlighter[] fieldHighlighters = new FieldHighlighter[fields.length];
int numTermVectors = 0;
int numPostings = 0;
@ -718,13 +735,13 @@ public class UnifiedHighlighter {
getClass().getSimpleName() + " without an IndexSearcher.");
}
Objects.requireNonNull(content, "content is required");
SortedSet<Term> queryTerms = extractTerms(query);
Set<Term> queryTerms = extractTerms(query);
return getFieldHighlighter(field, query, queryTerms, maxPassages)
.highlightFieldForDoc(null, -1, content);
}
protected FieldHighlighter getFieldHighlighter(String field, Query query, SortedSet<Term> allTerms, int maxPassages) {
BytesRef[] terms = filterExtractedTerms(field, allTerms);
protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
BytesRef[] terms = filterExtractedTerms(getFieldMatcher(field), allTerms);
Set<HighlightFlag> highlightFlags = getFlags(field);
PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags);
CharacterRunAutomaton[] automata = getAutomata(field, query, highlightFlags);
@ -738,19 +755,15 @@ public class UnifiedHighlighter {
getFormatter(field));
}
protected static BytesRef[] filterExtractedTerms(String field, SortedSet<Term> queryTerms) {
// TODO consider requireFieldMatch
Term floor = new Term(field, "");
Term ceiling = new Term(field, UnicodeUtil.BIG_TERM);
SortedSet<Term> fieldTerms = queryTerms.subSet(floor, ceiling);
// Strip off the redundant field:
BytesRef[] terms = new BytesRef[fieldTerms.size()];
int termUpto = 0;
for (Term term : fieldTerms) {
terms[termUpto++] = term.bytes();
protected static BytesRef[] filterExtractedTerms(Predicate<String> fieldMatcher, Set<Term> queryTerms) {
// Strip off the redundant field and sort the remaining terms
SortedSet<BytesRef> filteredTerms = new TreeSet<>();
for (Term term : queryTerms) {
if (fieldMatcher.test(term.field())) {
filteredTerms.add(term.bytes());
}
}
return terms;
return filteredTerms.toArray(new BytesRef[filteredTerms.size()]);
}
protected Set<HighlightFlag> getFlags(String field) {
@ -771,14 +784,13 @@ public class UnifiedHighlighter {
boolean highlightPhrasesStrictly = highlightFlags.contains(HighlightFlag.PHRASES);
boolean handleMultiTermQuery = highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY);
return highlightPhrasesStrictly ?
new PhraseHelper(query, field, this::requiresRewrite, this::preSpanQueryRewrite, !handleMultiTermQuery) :
PhraseHelper.NONE;
new PhraseHelper(query, field, getFieldMatcher(field),
this::requiresRewrite, this::preSpanQueryRewrite, !handleMultiTermQuery) : PhraseHelper.NONE;
}
protected CharacterRunAutomaton[] getAutomata(String field, Query query, Set<HighlightFlag> highlightFlags) {
return highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
? MultiTermHighlighting.extractAutomata(query, field, !highlightFlags.contains(HighlightFlag.PHRASES),
this::preMultiTermQueryRewrite)
? MultiTermHighlighting.extractAutomata(query, getFieldMatcher(field), !highlightFlags.contains(HighlightFlag.PHRASES), this::preMultiTermQueryRewrite)
: ZERO_LEN_AUTOMATA_ARRAY;
}
@ -826,7 +838,7 @@ public class UnifiedHighlighter {
//skip using a memory index since it's pure term filtering
return new TokenStreamOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer());
} else {
return new MemoryIndexOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer(),
return new MemoryIndexOffsetStrategy(field, getFieldMatcher(field), terms, phraseHelper, automata, getIndexAnalyzer(),
this::preMultiTermQueryRewrite);
}
case NONE_NEEDED:

View File

@ -25,6 +25,7 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.apache.lucene.analysis.MockAnalyzer;
@ -32,14 +33,17 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
@ -959,4 +963,275 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
ir.close();
}
private IndexReader indexSomeFields() throws IOException {
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
FieldType ft = new FieldType();
ft.setIndexOptions(IndexOptions.NONE);
ft.setTokenized(false);
ft.setStored(true);
ft.freeze();
Field title = new Field("title", "", fieldType);
Field text = new Field("text", "", fieldType);
Field category = new Field("category", "", fieldType);
Document doc = new Document();
doc.add(title);
doc.add(text);
doc.add(category);
title.setStringValue("This is the title field.");
text.setStringValue("This is the text field. You can put some text if you want.");
category.setStringValue("This is the category field.");
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
return ir;
}
public void testFieldMatcherTermQuery() throws Exception {
IndexReader ir = indexSomeFields();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected Predicate<String> getFieldMatcher(String field) {
// requireFieldMatch=false
return (qf) -> true;
}
};
UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
BooleanQuery.Builder queryBuilder =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("text", "some")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("text", "field")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("text", "this")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("title", "is")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("title", "this")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("category", "this")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("category", "some")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("category", "category")), BooleanClause.Occur.SHOULD);
Query query = queryBuilder.build();
// title
{
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the title <b>field</b>.", snippets[0]);
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> is the title <b>field</b>.", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
}
// text
{
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> is the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the text field. ", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
}
// category
{
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the <b>category</b> <b>field</b>.", snippets[0]);
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> is the <b>category</b> field.", snippets[0]);
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
}
ir.close();
}
public void testFieldMatcherMultiTermQuery() throws Exception {
IndexReader ir = indexSomeFields();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected Predicate<String> getFieldMatcher(String field) {
// requireFieldMatch=false
return (qf) -> true;
}
};
UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
BooleanQuery.Builder queryBuilder =
new BooleanQuery.Builder()
.add(new FuzzyQuery(new Term("text", "sime"), 1), BooleanClause.Occur.SHOULD)
.add(new PrefixQuery(new Term("text", "fie")), BooleanClause.Occur.SHOULD)
.add(new PrefixQuery(new Term("text", "thi")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("title", "is")), BooleanClause.Occur.SHOULD)
.add(new PrefixQuery(new Term("title", "thi")), BooleanClause.Occur.SHOULD)
.add(new PrefixQuery(new Term("category", "thi")), BooleanClause.Occur.SHOULD)
.add(new FuzzyQuery(new Term("category", "sime"), 1), BooleanClause.Occur.SHOULD)
.add(new PrefixQuery(new Term("category", "categ")), BooleanClause.Occur.SHOULD);
Query query = queryBuilder.build();
// title
{
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the title <b>field</b>.", snippets[0]);
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> is the title <b>field</b>.", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
}
// text
{
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> is the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the text field. ", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
}
// category
{
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the <b>category</b> <b>field</b>.", snippets[0]);
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> is the <b>category</b> field.", snippets[0]);
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
}
ir.close();
}
public void testFieldMatcherPhraseQuery() throws Exception {
IndexReader ir = indexSomeFields();
IndexSearcher searcher = newSearcher(ir);
UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {
@Override
protected Predicate<String> getFieldMatcher(String field) {
// requireFieldMatch=false
return (qf) -> true;
}
};
UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
BooleanQuery.Builder queryBuilder =
new BooleanQuery.Builder()
.add(new PhraseQuery("title", "this", "is", "the", "title"), BooleanClause.Occur.SHOULD)
.add(new PhraseQuery(2, "category", "this", "is", "the", "field"), BooleanClause.Occur.SHOULD)
.add(new PhraseQuery("text", "this", "is"), BooleanClause.Occur.SHOULD)
.add(new PhraseQuery("category", "this", "is"), BooleanClause.Occur.SHOULD)
.add(new PhraseQuery(1, "text", "you", "can", "put", "text"), BooleanClause.Occur.SHOULD);
Query query = queryBuilder.build();
// title
{
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> <b>field</b>.", snippets[0]);
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> field.", snippets[0]);
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
}
// text
{
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>text</b> <b>field</b>. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the <b>text</b> field. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("This is the text field. You can put some text if you want.", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
}
// category
{
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertEquals(1, topDocs.totalHits);
String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
assertEquals(1, snippets.length);
assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
highlighterFieldMatch.setFieldMatcher(null);
}
ir.close();
}
}

View File

@ -23,7 +23,6 @@ import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
@ -144,7 +143,7 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
}
@Override
protected FieldHighlighter getFieldHighlighter(String field, Query query, SortedSet<Term> allTerms, int maxPassages) {
protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
return super.getFieldHighlighter(field, query, allTerms, maxPassages);
}

View File

@ -152,6 +152,9 @@ New Features
* SOLR-9728: Ability to specify Key Store type in solr.in.sh file for SSL (Michael Suzuki, Kevin Risden)
* SOLR-5043: New solr.dns.prevent.reverse.lookup system property that can be used to prevent long core
(re)load delays on systems with missconfigured hostname/DNS (hossman)
Optimizations
----------------------
* SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have
@ -220,6 +223,8 @@ Bug Fixes
* SOLR-9616: Solr throws exception when expand=true on empty index (Timo Hund via Ishan Chattopadhyaya)
* SOLR-9832: Schema modifications are not immediately visible on the coordinating node. (Steve Rowe)
Other Changes
----------------------
@ -257,6 +262,10 @@ Other Changes
* SOLR-9819: Upgrade commons-fileupload to 1.3.2, fixing a potential vulnerability CVE-2016-3092 (Anshum Gupta)
* SOLR-9827: ConcurrentUpdateSolrClient creates a RemoteSolrException if the remote host responded with a non-ok
response (instead of a SolrException) and includes the remote error message as part of the exception message
(Tomás Fernández Löbbe)
================== 6.3.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -119,7 +119,6 @@ import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.IndexSchemaFactory;
import org.apache.solr.schema.ManagedIndexSchema;
import org.apache.solr.schema.SchemaManager;
import org.apache.solr.schema.SimilarityFactory;
import org.apache.solr.search.QParserPlugin;
import org.apache.solr.search.SolrFieldCacheMBean;
@ -2720,13 +2719,6 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
if (checkStale(zkClient, overlayPath, solrConfigversion) ||
checkStale(zkClient, solrConfigPath, overlayVersion) ||
checkStale(zkClient, managedSchmaResourcePath, managedSchemaVersion)) {
try (SolrCore solrCore = cc.solrCores.getCoreFromAnyList(coreName, true)) {
solrCore.setLatestSchema(SchemaManager.getFreshManagedSchema(solrCore));
} catch (Exception e) {
log.warn("", SolrZkClient.checkInterrupted(e));
}
log.info("core reload {}", coreName);
try {
cc.reload(coreName);

View File

@ -31,7 +31,6 @@ import java.lang.management.PlatformManagedObject;
import java.lang.management.RuntimeMXBean;
import java.lang.reflect.InvocationTargetException;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.nio.charset.Charset;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
@ -50,6 +49,8 @@ import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.util.RTimer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -64,8 +65,22 @@ import static org.apache.solr.common.params.CommonParams.NAME;
public class SystemInfoHandler extends RequestHandlerBase
{
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
/**
* <p>
* Undocumented expert level system property to prevent doing a reverse lookup of our hostname.
* This property ill be logged as a suggested workaround if any probems are noticed when doing reverse
* lookup.
* </p>
*
* <p>
* TODO: should we refactor this (and the associated logic) into a helper method for any other places
* where DNS is used?
* </p>
* @see #initHostname
*/
private static final String PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP = "solr.dns.prevent.reverse.lookup";
// on some platforms, resolving canonical hostname can cause the thread
// to block for several seconds if nameservices aren't available
// so resolve this once per handler instance
@ -75,22 +90,42 @@ public class SystemInfoHandler extends RequestHandlerBase
private CoreContainer cc;
public SystemInfoHandler() {
super();
init();
this(null);
}
public SystemInfoHandler(CoreContainer cc) {
super();
this.cc = cc;
init();
initHostname();
}
private void init() {
private void initHostname() {
if (null != System.getProperty(PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP, null)) {
log.info("Resolving canonical hostname for local host prevented due to '{}' sysprop",
PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP);
hostname = null;
return;
}
RTimer timer = new RTimer();
try {
InetAddress addr = InetAddress.getLocalHost();
hostname = addr.getCanonicalHostName();
} catch (UnknownHostException e) {
//default to null
} catch (Exception e) {
log.warn("Unable to resolve canonical hostname for local host, possible DNS misconfiguration. " +
"Set the '"+PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP+"' sysprop to true on startup to " +
"prevent future lookups if DNS can not be fixed.", e);
hostname = null;
return;
}
timer.stop();
if (15000D < timer.getTime()) {
String readableTime = String.format(Locale.ROOT, "%.3f", (timer.getTime() / 1000));
log.warn("Resolving canonical hostname for local host took {} seconds, possible DNS misconfiguration. " +
"Set the '{}' sysprop to true on startup to prevent future lookups if DNS can not be fixed.",
readableTime, PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP);
}
}

View File

@ -377,6 +377,18 @@ public class ManagedIndexSchemaFactory extends IndexSchemaFactory implements Sol
this.zkIndexSchemaReader = new ZkIndexSchemaReader(this, core);
ZkSolrResourceLoader zkLoader = (ZkSolrResourceLoader)loader;
zkLoader.setZkIndexSchemaReader(this.zkIndexSchemaReader);
try {
zkIndexSchemaReader.refreshSchemaFromZk(-1); // update immediately if newer is available
core.setLatestSchema(getSchema());
} catch (KeeperException e) {
String msg = "Error attempting to access " + zkLoader.getConfigSetZkPath() + "/" + managedSchemaResourceName;
log.error(msg, e);
throw new SolrException(ErrorCode.SERVER_ERROR, msg, e);
} catch (InterruptedException e) {
// Restore the interrupted status
Thread.currentThread().interrupt();
log.warn("", e);
}
} else {
this.zkIndexSchemaReader = null;
}

View File

@ -133,8 +133,8 @@ public class SchemaManager {
try {
int latestVersion = ZkController.persistConfigResourceToZooKeeper(zkLoader, managedIndexSchema.getSchemaZkVersion(),
managedIndexSchema.getResourceName(), sw.toString().getBytes(StandardCharsets.UTF_8), true);
req.getCore().getCoreDescriptor().getCoreContainer().reload(req.getCore().getName());
waitForOtherReplicasToUpdate(timeOut, latestVersion);
core.setLatestSchema(managedIndexSchema);
return Collections.emptyList();
} catch (ZkController.ResourceModifiedInZkException e) {
log.info("Schema was modified by another node. Retrying..");

View File

@ -0,0 +1,98 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.request.CollectionAdminRequest;
import org.apache.solr.client.solrj.request.schema.SchemaRequest;
import org.apache.solr.client.solrj.response.schema.SchemaResponse;
import org.apache.solr.cloud.SolrCloudTestCase;
import org.apache.solr.common.cloud.DocCollection;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
public class ManagedSchemaRoundRobinCloudTest extends SolrCloudTestCase {
private static final String COLLECTION = "managed_coll";
private static final String CONFIG = "cloud-managed";
private static final String FIELD_PREFIX = "NumberedField_";
private static final int NUM_SHARDS = 2;
private static final int NUM_FIELDS_TO_ADD = 10;
@BeforeClass
public static void setupCluster() throws Exception {
System.setProperty("managed.schema.mutable", "true");
configureCluster(NUM_SHARDS).addConfig(CONFIG, configset(CONFIG)).configure();
CollectionAdminRequest.createCollection(COLLECTION, CONFIG, NUM_SHARDS, 1)
.setMaxShardsPerNode(1)
.process(cluster.getSolrClient());
cluster.getSolrClient().waitForState(COLLECTION, DEFAULT_TIMEOUT, TimeUnit.SECONDS,
(n, c) -> DocCollection.isFullyActive(n, c, NUM_SHARDS, 1));
}
@AfterClass
public static void clearSysProps() throws Exception {
System.clearProperty("managed.schema.mutable");
}
@Test
public void testAddFieldsRoundRobin() throws Exception {
List<HttpSolrClient> clients = new ArrayList<>(NUM_SHARDS);
try {
for (int shardNum = 0 ; shardNum < NUM_SHARDS ; ++shardNum) {
clients.add(getHttpSolrClient(cluster.getJettySolrRunners().get(shardNum).getBaseUrl().toString()));
}
int shardNum = 0;
for (int fieldNum = 0 ; fieldNum < NUM_FIELDS_TO_ADD ; ++fieldNum) {
addField(clients.get(shardNum), keyValueArrayToMap("name", FIELD_PREFIX + fieldNum, "type", "string"));
if (++shardNum == NUM_SHARDS) {
shardNum = 0;
}
}
} finally {
for (int shardNum = 0 ; shardNum < NUM_SHARDS ; ++shardNum) {
clients.get(shardNum).close();
}
}
}
private void addField(SolrClient client, Map<String,Object> field) throws Exception {
SchemaResponse.UpdateResponse addFieldResponse = new SchemaRequest.AddField(field).process(client, COLLECTION);
assertNotNull(addFieldResponse);
assertEquals(0, addFieldResponse.getStatus());
assertNull(addFieldResponse.getResponse().get("errors"));
String fieldName = field.get("name").toString();
SchemaResponse.FieldResponse fieldResponse = new SchemaRequest.Field(fieldName).process(client, COLLECTION);
assertNotNull(fieldResponse);
assertEquals(0, fieldResponse.getStatus());
}
private Map<String,Object> keyValueArrayToMap(String... alternatingKeysAndValues) {
Map<String,Object> map = new HashMap<>();
for (int i = 0 ; i < alternatingKeysAndValues.length ; i += 2)
map.put(alternatingKeysAndValues[i], alternatingKeysAndValues[i + 1]);
return map;
}
}

View File

@ -46,7 +46,6 @@ import org.apache.solr.client.solrj.request.RequestWriter;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
@ -330,7 +329,8 @@ public class ConcurrentUpdateSolrClient extends SolrClient {
msg.append("\n\n\n\n");
msg.append("request: ").append(method.getURI());
SolrException solrExc = new SolrException(ErrorCode.getErrorCode(statusCode), msg.toString());
SolrException solrExc;
NamedList<String> metadata = null;
// parse out the metadata from the SolrException
try {
String encoding = "UTF-8"; // default
@ -343,11 +343,21 @@ public class ConcurrentUpdateSolrClient extends SolrClient {
NamedList<Object> resp = client.parser.processResponse(rspBody, encoding);
NamedList<Object> error = (NamedList<Object>) resp.get("error");
if (error != null) {
solrExc.setMetadata((NamedList<String>) error.get("metadata"));
metadata = (NamedList<String>) error.get("metadata");
String remoteMsg = (String) error.get("msg");
if (remoteMsg != null) {
msg.append("\nRemote error message: ");
msg.append(remoteMsg);
}
}
} catch (Exception exc) {
// don't want to fail to report error if parsing the response fails
log.warn("Failed to parse error response from " + client.getBaseURL() + " due to: " + exc);
} finally {
solrExc = new HttpSolrClient.RemoteSolrException(client.getBaseURL(), statusCode, msg.toString(), null);
if (metadata != null) {
solrExc.setMetadata(metadata);
}
}
handleError(solrExc);

View File

@ -36,6 +36,7 @@ import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.embedded.SolrExampleStreamingTest.ErrorTrackingConcurrentUpdateSolrClient;
import org.apache.solr.client.solrj.impl.BinaryResponseParser;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient.RemoteSolrException;
import org.apache.solr.client.solrj.impl.NoOpResponseParser;
import org.apache.solr.client.solrj.impl.XMLResponseParser;
import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
@ -463,7 +464,11 @@ abstract public class SolrExampleTests extends SolrExampleTestsBase
concurrentClient.lastError = null;
concurrentClient.add(doc);
concurrentClient.blockUntilFinished();
assertNotNull("Should throw exception!", concurrentClient.lastError);
assertNotNull("Should throw exception!", concurrentClient.lastError);
assertEquals("Unexpected exception type",
RemoteSolrException.class, concurrentClient.lastError.getClass());
assertTrue("Unexpected exception message: " + concurrentClient.lastError.getMessage(),
concurrentClient.lastError.getMessage().contains("Remote error message: Document contains multiple values for uniqueKey"));
} else {
log.info("Ignoring update test for client:" + client.getClass().getName());
}