mirror of https://github.com/apache/lucene.git
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr
This commit is contained in:
commit
7f52920352
|
@ -36,6 +36,12 @@ Optimizations
|
|||
* LUCENE-7925: Collapse duplicate SHOULD or MUST clauses by summing up their
|
||||
boosts. (Adrien Grand)
|
||||
|
||||
* LUCENE-7939: MinShouldMatchSumScorer now leverages two-phase iteration in
|
||||
order to be faster when used in conjunctions. (Adrien Grand)
|
||||
|
||||
* LUCENE-7827: AnalyzingInfixSuggester doesn't create "textgrams"
|
||||
when minPrefixChar=0 (Mikhail Khludnev)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-7916: Prevent ArrayIndexOutOfBoundsException if ICUTokenizer is used
|
||||
|
|
|
@ -128,7 +128,12 @@ final class MinShouldMatchSumScorer extends Scorer {
|
|||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return new DocIdSetIterator() {
|
||||
return TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator());
|
||||
}
|
||||
|
||||
@Override
|
||||
public TwoPhaseIterator twoPhaseIterator() {
|
||||
DocIdSetIterator approximation = new DocIdSetIterator() {
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
|
@ -154,6 +159,12 @@ final class MinShouldMatchSumScorer extends Scorer {
|
|||
}
|
||||
|
||||
setDocAndFreq();
|
||||
// It would be correct to return doNextCandidate() at this point but if you
|
||||
// call nextDoc as opposed to advance, it probably means that you really
|
||||
// need the next match. Returning 'doc' here would lead to a similar
|
||||
// iteration over sub postings overall except that the decision making would
|
||||
// happen at a higher level where more abstractions are involved and
|
||||
// benchmarks suggested it causes a significant performance hit.
|
||||
return doNext();
|
||||
}
|
||||
|
||||
|
@ -181,7 +192,7 @@ final class MinShouldMatchSumScorer extends Scorer {
|
|||
}
|
||||
|
||||
setDocAndFreq();
|
||||
return doNext();
|
||||
return doNextCandidate();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -189,6 +200,30 @@ final class MinShouldMatchSumScorer extends Scorer {
|
|||
return cost;
|
||||
}
|
||||
};
|
||||
return new TwoPhaseIterator(approximation) {
|
||||
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
while (freq < minShouldMatch) {
|
||||
assert freq > 0;
|
||||
if (freq + tailSize >= minShouldMatch) {
|
||||
// a match on doc is still possible, try to
|
||||
// advance scorers from the tail
|
||||
advanceTail();
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
// maximum number of scorer that matches() might advance
|
||||
return tail.length;
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
private void addLead(DisiWrapper lead) {
|
||||
|
@ -250,6 +285,18 @@ final class MinShouldMatchSumScorer extends Scorer {
|
|||
return doc;
|
||||
}
|
||||
|
||||
/** Move iterators to the tail until the cumulated size of lead+tail is
|
||||
* greater than or equal to minShouldMath */
|
||||
private int doNextCandidate() throws IOException {
|
||||
while (freq + tailSize < minShouldMatch) {
|
||||
// no match on doc is possible, move to the next potential match
|
||||
pushBackLeads();
|
||||
setDocAndFreq();
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
/** Advance all entries from the tail to know about all matches on the
|
||||
* current doc. */
|
||||
private void updateFreq() throws IOException {
|
||||
|
|
|
@ -107,6 +107,10 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
|
||||
public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||
|
||||
/** edgegrams for searching short prefixes without Prefix Query
|
||||
* that's controlled by {@linkplain #minPrefixChars} */
|
||||
protected final static String TEXTGRAMS_FIELD_NAME = "textgrams";
|
||||
|
||||
/** Field name used for the indexed text. */
|
||||
protected final static String TEXT_FIELD_NAME = "text";
|
||||
|
||||
|
@ -353,7 +357,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
|
||||
@Override
|
||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||
if (fieldName.equals("textgrams") && minPrefixChars > 0) {
|
||||
assert !(fieldName.equals(TEXTGRAMS_FIELD_NAME) && minPrefixChars == 0)
|
||||
: "no need \"textgrams\" when minPrefixChars="+minPrefixChars;
|
||||
if (fieldName.equals(TEXTGRAMS_FIELD_NAME) && minPrefixChars > 0) {
|
||||
// TODO: should use an EdgeNGramTokenFilterFactory here
|
||||
TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), 1, minPrefixChars);
|
||||
return new TokenStreamComponents(components.getTokenizer(), filter);
|
||||
|
@ -410,7 +416,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
Document doc = new Document();
|
||||
FieldType ft = getTextFieldType();
|
||||
doc.add(new Field(TEXT_FIELD_NAME, textString, ft));
|
||||
doc.add(new Field("textgrams", textString, ft));
|
||||
if (minPrefixChars>0) {
|
||||
doc.add(new Field(TEXTGRAMS_FIELD_NAME, textString, ft));
|
||||
}
|
||||
doc.add(new StringField(EXACT_TEXT_FIELD_NAME, textString, Field.Store.NO));
|
||||
doc.add(new BinaryDocValuesField(TEXT_FIELD_NAME, text));
|
||||
doc.add(new NumericDocValuesField("weight", weight));
|
||||
|
@ -474,7 +482,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
|||
protected Query getLastTokenQuery(String token) throws IOException {
|
||||
if (token.length() < minPrefixChars) {
|
||||
// The leading ngram was directly indexed:
|
||||
return new TermQuery(new Term("textgrams", token));
|
||||
return new TermQuery(new Term(TEXTGRAMS_FIELD_NAME, token));
|
||||
}
|
||||
|
||||
return new PrefixQuery(new Term(TEXT_FIELD_NAME, token));
|
||||
|
|
|
@ -57,6 +57,8 @@ Upgrade Notes
|
|||
If a reporter configures the group="cluster" attribute then please also configure the
|
||||
class="org.apache.solr.metrics.reporters.solr.SolrClusterReporter" attribute.
|
||||
|
||||
* SOLR-11254: the abstract DocTransformer class now has an abstract score-less transform method variant.
|
||||
|
||||
New Features
|
||||
----------------------
|
||||
|
||||
|
@ -158,6 +160,8 @@ Other Changes
|
|||
|
||||
* SOLR-11240: Raise UnInvertedField internal limit. (Toke Eskildsen)
|
||||
|
||||
* SOLR-11254: Add score-less (abstract) DocTransformer.transform method. (Christine Poerschke)
|
||||
|
||||
================== 7.0.0 ==================
|
||||
|
||||
Versions of Major Components
|
||||
|
|
|
@ -255,13 +255,24 @@ public class LTRFeatureLoggerTransformerFactory extends TransformerFactory {
|
|||
@Override
|
||||
public void transform(SolrDocument doc, int docid, float score)
|
||||
throws IOException {
|
||||
implTransform(doc, docid, new Float(score));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transform(SolrDocument doc, int docid)
|
||||
throws IOException {
|
||||
implTransform(doc, docid, 0.0f);
|
||||
}
|
||||
|
||||
private void implTransform(SolrDocument doc, int docid, Float score)
|
||||
throws IOException {
|
||||
Object fv = featureLogger.getFeatureVector(docid, scoringQuery, searcher);
|
||||
if (fv == null) { // FV for this document was not in the cache
|
||||
fv = featureLogger.makeFeatureVector(
|
||||
LTRRescorer.extractFeaturesInfo(
|
||||
modelWeight,
|
||||
docid,
|
||||
(docsWereNotReranked ? new Float(score) : null),
|
||||
(docsWereNotReranked ? score : null),
|
||||
leafContexts));
|
||||
}
|
||||
|
||||
|
|
|
@ -265,7 +265,7 @@ public class RealTimeGetComponent extends SearchComponent
|
|||
throw new SolrException(ErrorCode.INVALID_STATE, "Expected ADD or UPDATE_INPLACE. Got: " + oper);
|
||||
}
|
||||
if (transformer!=null) {
|
||||
transformer.transform(doc, -1, 0); // unknown docID
|
||||
transformer.transform(doc, -1); // unknown docID
|
||||
}
|
||||
docList.add(doc);
|
||||
break;
|
||||
|
@ -314,7 +314,7 @@ public class RealTimeGetComponent extends SearchComponent
|
|||
resultContext = new RTGResultContext(rsp.getReturnFields(), searcherInfo.getSearcher(), req);
|
||||
transformer.setContext(resultContext);
|
||||
}
|
||||
transformer.transform(doc, docid, 0);
|
||||
transformer.transform(doc, docid);
|
||||
}
|
||||
docList.add(doc);
|
||||
}
|
||||
|
|
|
@ -613,7 +613,7 @@ public class TermsComponent extends SearchComponent {
|
|||
continue;
|
||||
}
|
||||
|
||||
// Since splitTerms is already sorted, this array will also be sorted
|
||||
// Since splitTerms is already sorted, this array will also be sorted. NOTE: this may not be true, it depends on readableToIndexed.
|
||||
Term[] terms = new Term[splitTerms.length];
|
||||
for (int i = 0; i < splitTerms.length; i++) {
|
||||
terms[i] = new Term(field, fieldType.readableToIndexed(splitTerms[i]));
|
||||
|
|
|
@ -167,7 +167,11 @@ public class DocsStreamer implements Iterator<SolrDocument> {
|
|||
if (transformer != null) {
|
||||
boolean doScore = rctx.wantsScores();
|
||||
try {
|
||||
transformer.transform(sdoc, id, doScore ? docIterator.score() : 0);
|
||||
if (doScore) {
|
||||
transformer.transform(sdoc, id, docIterator.score());
|
||||
} else {
|
||||
transformer.transform(sdoc, id);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error applying transformer", e);
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@ public abstract class BaseEditorialTransformer extends DocTransformer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void transform(SolrDocument doc, int docid, float score) {
|
||||
public void transform(SolrDocument doc, int docid) {
|
||||
//this only gets added if QueryElevationParams.MARK_EXCLUDED is true
|
||||
Set<String> ids = getIdSet();
|
||||
if (ids != null && ids.isEmpty() == false) {
|
||||
|
|
|
@ -123,7 +123,7 @@ class ChildDocTransformer extends DocTransformer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void transform(SolrDocument doc, int docid, float score) {
|
||||
public void transform(SolrDocument doc, int docid) {
|
||||
|
||||
FieldType idFt = idField.getType();
|
||||
Object parentIdField = doc.getFirstValue(idField.getName());
|
||||
|
|
|
@ -51,7 +51,7 @@ class DocIdAugmenter extends DocTransformer
|
|||
}
|
||||
|
||||
@Override
|
||||
public void transform(SolrDocument doc, int docid, float score) {
|
||||
public void transform(SolrDocument doc, int docid) {
|
||||
assert -1 <= docid;
|
||||
doc.setField( name, docid );
|
||||
}
|
||||
|
|
|
@ -73,13 +73,29 @@ public abstract class DocTransformer {
|
|||
* If implementations require a valid docId and index access, the {@link #needsSolrIndexSearcher}
|
||||
* method must return true
|
||||
*
|
||||
* Default implementation calls {@link #transform(SolrDocument, int)}.
|
||||
*
|
||||
* @param doc The document to alter
|
||||
* @param docid The Lucene internal doc id, or -1 in cases where the <code>doc</code> did not come from the index
|
||||
* @param score the score for this document
|
||||
* @throws IOException If there is a low-level I/O error.
|
||||
* @see #needsSolrIndexSearcher
|
||||
*/
|
||||
public abstract void transform(SolrDocument doc, int docid, float score) throws IOException;
|
||||
public void transform(SolrDocument doc, int docid, float score) throws IOException {
|
||||
transform(doc, docid);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is where implementations do the actual work.
|
||||
* If implementations require a valid docId and index access, the {@link #needsSolrIndexSearcher}
|
||||
* method must return true
|
||||
*
|
||||
* @param doc The document to alter
|
||||
* @param docid The Lucene internal doc id, or -1 in cases where the <code>doc</code> did not come from the index
|
||||
* @throws IOException If there is a low-level I/O error.
|
||||
* @see #needsSolrIndexSearcher
|
||||
*/
|
||||
public abstract void transform(SolrDocument doc, int docid) throws IOException;
|
||||
|
||||
/**
|
||||
* When a transformer needs access to fields that are not automatically derived from the
|
||||
|
|
|
@ -77,6 +77,13 @@ public class DocTransformers extends DocTransformer
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transform(SolrDocument doc, int docid) throws IOException {
|
||||
for( DocTransformer a : children ) {
|
||||
a.transform( doc, docid);
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns true if and only if at least 1 child transformer returns true */
|
||||
@Override
|
||||
public boolean needsSolrIndexSearcher() {
|
||||
|
|
|
@ -107,7 +107,7 @@ public class ExplainAugmenterFactory extends TransformerFactory
|
|||
}
|
||||
|
||||
@Override
|
||||
public void transform(SolrDocument doc, int docid, float score) {
|
||||
public void transform(SolrDocument doc, int docid) {
|
||||
if( context != null && context.getQuery() != null ) {
|
||||
try {
|
||||
Explanation exp = context.getSearcher().explain(context.getQuery(), docid);
|
||||
|
|
|
@ -131,7 +131,7 @@ public class GeoTransformerFactory extends TransformerFactory
|
|||
}
|
||||
|
||||
@Override
|
||||
public void transform(SolrDocument doc, int docid, float score) throws IOException {
|
||||
public void transform(SolrDocument doc, int docid) throws IOException {
|
||||
int leafOrd = ReaderUtil.subIndex(docid, context.getSearcher().getTopReaderContext().leaves());
|
||||
LeafReaderContext ctx = context.getSearcher().getTopReaderContext().leaves().get(leafOrd);
|
||||
ShapeValues values = shapes.getValues(ctx);
|
||||
|
@ -148,7 +148,7 @@ public class GeoTransformerFactory extends TransformerFactory
|
|||
return new DocTransformer() {
|
||||
|
||||
@Override
|
||||
public void transform(SolrDocument doc, int docid, float score) throws IOException {
|
||||
public void transform(SolrDocument doc, int docid) throws IOException {
|
||||
Object val = doc.remove(updater.field);
|
||||
if(val!=null) {
|
||||
updater.setValue(doc, val);
|
||||
|
|
|
@ -107,7 +107,7 @@ public class RawValueTransformerFactory extends TransformerFactory
|
|||
}
|
||||
|
||||
@Override
|
||||
public void transform(SolrDocument doc, int docid, float score) {
|
||||
public void transform(SolrDocument doc, int docid) {
|
||||
Object val = doc.remove(field);
|
||||
if(val==null) {
|
||||
return;
|
||||
|
|
|
@ -44,7 +44,7 @@ public class RenameFieldTransformer extends DocTransformer
|
|||
}
|
||||
|
||||
@Override
|
||||
public void transform(SolrDocument doc, int docid, float score) {
|
||||
public void transform(SolrDocument doc, int docid) {
|
||||
Object v = (copy)?doc.get(from) : doc.remove( from );
|
||||
if( v != null ) {
|
||||
doc.setField(to, v);
|
||||
|
|
|
@ -44,4 +44,9 @@ public class ScoreAugmenter extends DocTransformer {
|
|||
doc.setField( name, score );
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void transform(SolrDocument doc, int docid) {
|
||||
transform(doc, docid, 0.0f);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -321,7 +321,7 @@ class SubQueryAugmenter extends DocTransformer {
|
|||
public boolean needsSolrIndexSearcher() { return false; }
|
||||
|
||||
@Override
|
||||
public void transform(SolrDocument doc, int docid, float score) {
|
||||
public void transform(SolrDocument doc, int docid) {
|
||||
|
||||
final SolrParams docWithDeprefixed = SolrParams.wrapDefaults(
|
||||
new DocRowParams(doc, prefix, separator), baseSubParams);
|
||||
|
|
|
@ -96,7 +96,7 @@ class ValueAugmenter extends DocTransformer
|
|||
}
|
||||
|
||||
@Override
|
||||
public void transform(SolrDocument doc, int docid, float score) {
|
||||
public void transform(SolrDocument doc, int docid) {
|
||||
doc.setField( name, value );
|
||||
}
|
||||
}
|
||||
|
|
|
@ -77,7 +77,7 @@ public class ValueSourceAugmenter extends DocTransformer
|
|||
List<LeafReaderContext> readerContexts;
|
||||
|
||||
@Override
|
||||
public void transform(SolrDocument doc, int docid, float score) {
|
||||
public void transform(SolrDocument doc, int docid) {
|
||||
// This is only good for random-access functions
|
||||
|
||||
try {
|
||||
|
|
|
@ -105,7 +105,7 @@ public class TestCustomDocTransformer extends SolrTestCaseJ4 {
|
|||
* This transformer simply concatenates the values of multiple fields
|
||||
*/
|
||||
@Override
|
||||
public void transform(SolrDocument doc, int docid, float score) throws IOException {
|
||||
public void transform(SolrDocument doc, int docid) throws IOException {
|
||||
str.setLength(0);
|
||||
for(String s : extra) {
|
||||
String v = getAsString(s, doc);
|
||||
|
|
|
@ -59,14 +59,14 @@ If you do not specify the type of replica when it is created, it will be NRT typ
|
|||
There are three combinations of replica types that are recommended:
|
||||
|
||||
* All NRT replicas
|
||||
* All PULL replicas
|
||||
* All TLOG replicas
|
||||
* TLOG replicas with PULL replicas
|
||||
|
||||
==== All NRT Replicas
|
||||
|
||||
Use this for small to medium clusters, or even big clusters where the update (index) throughput is not too high. NRT is the only type of replica that supports soft-commits, so also use this combination when NearRealTime is needed.
|
||||
|
||||
==== All PULL Replicas
|
||||
==== All TLOG Replicas
|
||||
|
||||
Use this combination if NearRealTime is not needed and the number of replicas per shard is high, but you still want all replicas to be able to handle update requests.
|
||||
|
||||
|
|
Loading…
Reference in New Issue