SOLR-8867: fix frange/FunctionValues.getRangeScorer to not match missing values, getRangeScorer refactored to take LeafReaderContext

This commit is contained in:
yonik 2016-03-17 16:58:27 -04:00
parent faa0586b31
commit 5ea86b14c3
15 changed files with 57 additions and 47 deletions

View File

@ -154,6 +154,10 @@ API Changes
FilterCollector, FilterDirectory. And some Filter* classes in
lucene-test-framework too. (David Smiley)
* SOLR-8867: FunctionValues.getRangeScorer now takes a LeafReaderContext instead
of an IndexReader, and avoids matching documents without a value in the field
for numeric fields. (yonik)
Optimizations
* LUCENE-6891: Use prefix coding when writing points in

View File

@ -127,7 +127,7 @@ public class TestExpressionValueSource extends LuceneTestCase {
FunctionValues values = vs.getValues(new HashMap<String,Object>(), leaf);
// everything
ValueSourceScorer scorer = values.getRangeScorer(leaf.reader(), "4", "40", true, true);
ValueSourceScorer scorer = values.getRangeScorer(leaf, "4", "40", true, true);
DocIdSetIterator iter = scorer.iterator();
assertEquals(-1, iter.docID());
assertEquals(0, iter.nextDoc());
@ -136,7 +136,7 @@ public class TestExpressionValueSource extends LuceneTestCase {
assertEquals(DocIdSetIterator.NO_MORE_DOCS, iter.nextDoc());
// just the first doc
scorer = values.getRangeScorer(leaf.reader(), "4", "40", false, false);
scorer = values.getRangeScorer(leaf, "4", "40", false, false);
iter = scorer.iterator();
assertEquals(-1, scorer.docID());
assertEquals(0, iter.nextDoc());

View File

@ -21,7 +21,6 @@ import java.util.Map;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Explanation;
@ -34,7 +33,7 @@ import org.apache.lucene.search.Weight;
* range. The score is the float value. This can be a slow query if run by itself since it must visit all docs;
* ideally it's combined with other queries.
* It's mostly a wrapper around
* {@link FunctionValues#getRangeScorer(IndexReader, String, String, boolean, boolean)}.
* {@link FunctionValues#getRangeScorer(LeafReaderContext, String, String, boolean, boolean)}.
*
* A similar class is {@code org.apache.lucene.search.DocValuesRangeQuery} in the sandbox module. That one is
* constant scoring.
@ -162,7 +161,7 @@ public class FunctionRangeQuery extends Query {
public ValueSourceScorer scorer(LeafReaderContext context) throws IOException {
FunctionValues functionValues = valueSource.getValues(vsContext, context);
// getRangeScorer takes String args and parses them. Weird.
return functionValues.getRangeScorer(context.reader(), lowerVal, upperVal, includeLower, includeUpper);
return functionValues.getRangeScorer(context, lowerVal, upperVal, includeLower, includeUpper);
}
}
}

View File

@ -16,7 +16,7 @@
*/
package org.apache.lucene.queries.function;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRefBuilder;
@ -140,8 +140,8 @@ public abstract class FunctionValues {
* Yields a {@link Scorer} that matches all documents,
* and that which produces scores equal to {@link #floatVal(int)}.
*/
public ValueSourceScorer getScorer(IndexReader reader) {
return new ValueSourceScorer(reader, this) {
public ValueSourceScorer getScorer(LeafReaderContext readerContext) {
return new ValueSourceScorer(readerContext, this) {
@Override
public boolean matches(int doc) {
return true;
@ -157,7 +157,7 @@ public abstract class FunctionValues {
// because it needs different behavior depending on the type of fields. There is also
// a setup cost - parsing and normalizing params, and doing a binary search on the StringIndex.
// TODO: change "reader" to LeafReaderContext
public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
public ValueSourceScorer getRangeScorer(LeafReaderContext readerContext, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
float lower;
float upper;
@ -176,36 +176,40 @@ public abstract class FunctionValues {
final float u = upper;
if (includeLower && includeUpper) {
return new ValueSourceScorer(reader, this) {
return new ValueSourceScorer(readerContext, this) {
@Override
public boolean matches(int doc) {
if (!exists(doc)) return false;
float docVal = floatVal(doc);
return docVal >= l && docVal <= u;
}
};
}
else if (includeLower && !includeUpper) {
return new ValueSourceScorer(reader, this) {
return new ValueSourceScorer(readerContext, this) {
@Override
public boolean matches(int doc) {
if (!exists(doc)) return false;
float docVal = floatVal(doc);
return docVal >= l && docVal < u;
}
};
}
else if (!includeLower && includeUpper) {
return new ValueSourceScorer(reader, this) {
return new ValueSourceScorer(readerContext, this) {
@Override
public boolean matches(int doc) {
if (!exists(doc)) return false;
float docVal = floatVal(doc);
return docVal > l && docVal <= u;
}
};
}
else {
return new ValueSourceScorer(reader, this) {
return new ValueSourceScorer(readerContext, this) {
@Override
public boolean matches(int doc) {
if (!exists(doc)) return false;
float docVal = floatVal(doc);
return docVal > l && docVal < u;
}

View File

@ -18,7 +18,7 @@ package org.apache.lucene.queries.function;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
@ -43,11 +43,10 @@ public abstract class ValueSourceScorer extends Scorer {
private final TwoPhaseIterator twoPhaseIterator;
private final DocIdSetIterator disi;
//TODO use LeafReaderContext not IndexReader?
protected ValueSourceScorer(IndexReader reader, FunctionValues values) {
protected ValueSourceScorer(LeafReaderContext readerContext, FunctionValues values) {
super(null);//no weight
this.values = values;
final DocIdSetIterator approximation = DocIdSetIterator.all(reader.maxDoc()); // no approximation!
final DocIdSetIterator approximation = DocIdSetIterator.all(readerContext.reader().maxDoc()); // no approximation!
this.twoPhaseIterator = new TwoPhaseIterator(approximation) {
@Override
public boolean matches() throws IOException {

View File

@ -19,7 +19,6 @@ package org.apache.lucene.queries.function.docvalues;
import java.io.IOException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.queries.function.FunctionValues;
@ -93,7 +92,7 @@ public abstract class DocTermsIndexDocValues extends FunctionValues {
public abstract Object objectVal(int doc); // force subclasses to override
@Override
public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
public ValueSourceScorer getRangeScorer(LeafReaderContext readerContext, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
// TODO: are lowerVal and upperVal in indexed form or not?
lowerVal = lowerVal == null ? null : toTerm(lowerVal);
upperVal = upperVal == null ? null : toTerm(upperVal);
@ -121,7 +120,7 @@ public abstract class DocTermsIndexDocValues extends FunctionValues {
final int ll = lower;
final int uu = upper;
return new ValueSourceScorer(reader, this) {
return new ValueSourceScorer(readerContext, this) {
@Override
public boolean matches(int doc) {
int ord = termsIndex.getOrd(doc);

View File

@ -16,7 +16,7 @@
*/
package org.apache.lucene.queries.function.docvalues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.ValueSourceScorer;
@ -83,7 +83,7 @@ public abstract class DoubleDocValues extends FunctionValues {
}
@Override
public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
public ValueSourceScorer getRangeScorer(LeafReaderContext readerContext, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
double lower,upper;
if (lowerVal==null) {
@ -103,36 +103,40 @@ public abstract class DoubleDocValues extends FunctionValues {
if (includeLower && includeUpper) {
return new ValueSourceScorer(reader, this) {
return new ValueSourceScorer(readerContext, this) {
@Override
public boolean matches(int doc) {
if (!exists(doc)) return false;
double docVal = doubleVal(doc);
return docVal >= l && docVal <= u;
}
};
}
else if (includeLower && !includeUpper) {
return new ValueSourceScorer(reader, this) {
return new ValueSourceScorer(readerContext, this) {
@Override
public boolean matches(int doc) {
if (!exists(doc)) return false;
double docVal = doubleVal(doc);
return docVal >= l && docVal < u;
}
};
}
else if (!includeLower && includeUpper) {
return new ValueSourceScorer(reader, this) {
return new ValueSourceScorer(readerContext, this) {
@Override
public boolean matches(int doc) {
if (!exists(doc)) return false;
double docVal = doubleVal(doc);
return docVal > l && docVal <= u;
}
};
}
else {
return new ValueSourceScorer(reader, this) {
return new ValueSourceScorer(readerContext, this) {
@Override
public boolean matches(int doc) {
if (!exists(doc)) return false;
double docVal = doubleVal(doc);
return docVal > l && docVal < u;
}

View File

@ -16,7 +16,7 @@
*/
package org.apache.lucene.queries.function.docvalues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.ValueSourceScorer;
@ -78,7 +78,7 @@ public abstract class IntDocValues extends FunctionValues {
}
@Override
public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
public ValueSourceScorer getRangeScorer(LeafReaderContext readerContext, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
int lower,upper;
// instead of using separate comparison functions, adjust the endpoints.
@ -100,12 +100,11 @@ public abstract class IntDocValues extends FunctionValues {
final int ll = lower;
final int uu = upper;
return new ValueSourceScorer(reader, this) {
return new ValueSourceScorer(readerContext, this) {
@Override
public boolean matches(int doc) {
if (!exists(doc)) return false;
int val = intVal(doc);
// only check for deleted if it's the default value
// if (val==0 && reader.isDeleted(doc)) return false;
return val >= ll && val <= uu;
}
};

View File

@ -16,7 +16,7 @@
*/
package org.apache.lucene.queries.function.docvalues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.ValueSourceScorer;
@ -87,7 +87,7 @@ public abstract class LongDocValues extends FunctionValues {
}
@Override
public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
public ValueSourceScorer getRangeScorer(LeafReaderContext readerContext, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
long lower,upper;
// instead of using separate comparison functions, adjust the endpoints.
@ -109,12 +109,11 @@ public abstract class LongDocValues extends FunctionValues {
final long ll = lower;
final long uu = upper;
return new ValueSourceScorer(reader, this) {
return new ValueSourceScorer(readerContext, this) {
@Override
public boolean matches(int doc) {
if (!exists(doc)) return false;
long val = longVal(doc);
// only check for deleted if it's the default value
// if (val==0 && reader.isDeleted(doc)) return false;
return val >= ll && val <= uu;
}
};

View File

@ -20,7 +20,6 @@ import java.io.IOException;
import java.util.Map;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.queries.function.FunctionValues;
@ -119,7 +118,7 @@ public class EnumFieldSource extends FieldCacheSource {
}
@Override
public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
public ValueSourceScorer getRangeScorer(LeafReaderContext readerContext, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
Integer lower = stringValueToIntValue(lowerVal);
Integer upper = stringValueToIntValue(upperVal);
@ -140,12 +139,11 @@ public class EnumFieldSource extends FieldCacheSource {
final int ll = lower;
final int uu = upper;
return new ValueSourceScorer(reader, this) {
return new ValueSourceScorer(readerContext, this) {
@Override
public boolean matches(int doc) {
if (!exists(doc)) return false;
int val = intVal(doc);
// only check for deleted if it's the default value
// if (val==0 && reader.isDeleted(doc)) return false;
return val >= ll && val <= uu;
}
};

View File

@ -335,6 +335,10 @@ Bug Fixes
* SOLR-8838: Returning non-stored docValues is incorrect for negative floats and doubles.
(Ishan Chattopadhyaya, Steve Rowe)
* SOLR-8867: {!frange} queries will now avoid matching documents without a value in the
numeric field. For more complex functions, FunctionValues.exists() must also return true
for the document to match. (yonik)
Optimizations
----------------------
* SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been

View File

@ -63,7 +63,7 @@ public class FunctionRangeQuery extends SolrConstantScoreQuery implements PostFi
super.doSetNextReader(context);
maxdoc = context.reader().maxDoc();
FunctionValues dv = rangeFilt.getValueSource().getValues(fcontext, context);
scorer = dv.getRangeScorer(context.reader(), rangeFilt.getLowerVal(), rangeFilt.getUpperVal(), rangeFilt.isIncludeLower(), rangeFilt.isIncludeUpper());
scorer = dv.getRangeScorer(context, rangeFilt.getLowerVal(), rangeFilt.getUpperVal(), rangeFilt.isIncludeLower(), rangeFilt.isIncludeUpper());
}
}
}

View File

@ -78,7 +78,7 @@ public class ValueSourceRangeFilter extends SolrFilter {
return BitsFilteredDocIdSet.wrap(new DocIdSet() {
@Override
public DocIdSetIterator iterator() throws IOException {
Scorer scorer = valueSource.getValues(context, readerContext).getRangeScorer(readerContext.reader(), lowerVal, upperVal, includeLower, includeUpper);
Scorer scorer = valueSource.getValues(context, readerContext).getRangeScorer(readerContext, lowerVal, upperVal, includeLower, includeUpper);
return scorer == null ? null : scorer.iterator();
}
@Override

View File

@ -409,9 +409,9 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
bq.add(q, Occur.MUST);
SchemaField sf = ulog.getVersionInfo().getVersionField();
ValueSource vs = sf.getType().getValueSource(sf, null);
ValueSourceRangeFilter filt = new ValueSourceRangeFilter(vs, null, Long.toString(Math.abs(cmd.getVersion())), true, true);
ValueSourceRangeFilter filt = new ValueSourceRangeFilter(vs, Long.toString(Math.abs(cmd.getVersion())), null, true, true);
FunctionRangeQuery range = new FunctionRangeQuery(filt);
bq.add(range, Occur.MUST);
bq.add(range, Occur.MUST_NOT); // formulated in the "MUST_NOT" sense so we can delete docs w/o a version (some tests depend on this...)
q = bq.build();
}

View File

@ -201,14 +201,15 @@ public class TestRangeQuery extends SolrTestCaseJ4 {
final String[] fields = {"foo_s","foo_i","foo_l","foo_f","foo_d"
,"foo_ti","foo_tl","foo_tf","foo_td"
};
final int l=5;
final int l=-5;
final int u=25;
createIndex(15, new DocProcessor() {
@Override
public void process(SolrInputDocument doc) {
addInt(doc, l,u, fields);
// 10% of the docs have missing values
if (r.nextInt(10)!=0) addInt(doc, l,u, fields);
}
});
assertU(commit());