SOLR-8867: fix frange/FunctionValues.getRangeScorer to not match missing values, getRangeScorer refactored to take LeafReaderContext

This commit is contained in:
yonik 2016-03-17 16:58:27 -04:00
parent faa0586b31
commit 5ea86b14c3
15 changed files with 57 additions and 47 deletions

View File

@ -154,6 +154,10 @@ API Changes
FilterCollector, FilterDirectory. And some Filter* classes in FilterCollector, FilterDirectory. And some Filter* classes in
lucene-test-framework too. (David Smiley) lucene-test-framework too. (David Smiley)
* SOLR-8867: FunctionValues.getRangeScorer now takes a LeafReaderContext instead
of an IndexReader, and avoids matching documents without a value in the field
for numeric fields. (yonik)
Optimizations Optimizations
* LUCENE-6891: Use prefix coding when writing points in * LUCENE-6891: Use prefix coding when writing points in

View File

@ -127,7 +127,7 @@ public class TestExpressionValueSource extends LuceneTestCase {
FunctionValues values = vs.getValues(new HashMap<String,Object>(), leaf); FunctionValues values = vs.getValues(new HashMap<String,Object>(), leaf);
// everything // everything
ValueSourceScorer scorer = values.getRangeScorer(leaf.reader(), "4", "40", true, true); ValueSourceScorer scorer = values.getRangeScorer(leaf, "4", "40", true, true);
DocIdSetIterator iter = scorer.iterator(); DocIdSetIterator iter = scorer.iterator();
assertEquals(-1, iter.docID()); assertEquals(-1, iter.docID());
assertEquals(0, iter.nextDoc()); assertEquals(0, iter.nextDoc());
@ -136,7 +136,7 @@ public class TestExpressionValueSource extends LuceneTestCase {
assertEquals(DocIdSetIterator.NO_MORE_DOCS, iter.nextDoc()); assertEquals(DocIdSetIterator.NO_MORE_DOCS, iter.nextDoc());
// just the first doc // just the first doc
scorer = values.getRangeScorer(leaf.reader(), "4", "40", false, false); scorer = values.getRangeScorer(leaf, "4", "40", false, false);
iter = scorer.iterator(); iter = scorer.iterator();
assertEquals(-1, scorer.docID()); assertEquals(-1, scorer.docID());
assertEquals(0, iter.nextDoc()); assertEquals(0, iter.nextDoc());

View File

@ -21,7 +21,6 @@ import java.util.Map;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
@ -34,7 +33,7 @@ import org.apache.lucene.search.Weight;
* range. The score is the float value. This can be a slow query if run by itself since it must visit all docs; * range. The score is the float value. This can be a slow query if run by itself since it must visit all docs;
* ideally it's combined with other queries. * ideally it's combined with other queries.
* It's mostly a wrapper around * It's mostly a wrapper around
* {@link FunctionValues#getRangeScorer(IndexReader, String, String, boolean, boolean)}. * {@link FunctionValues#getRangeScorer(LeafReaderContext, String, String, boolean, boolean)}.
* *
* A similar class is {@code org.apache.lucene.search.DocValuesRangeQuery} in the sandbox module. That one is * A similar class is {@code org.apache.lucene.search.DocValuesRangeQuery} in the sandbox module. That one is
* constant scoring. * constant scoring.
@ -162,7 +161,7 @@ public class FunctionRangeQuery extends Query {
public ValueSourceScorer scorer(LeafReaderContext context) throws IOException { public ValueSourceScorer scorer(LeafReaderContext context) throws IOException {
FunctionValues functionValues = valueSource.getValues(vsContext, context); FunctionValues functionValues = valueSource.getValues(vsContext, context);
// getRangeScorer takes String args and parses them. Weird. // getRangeScorer takes String args and parses them. Weird.
return functionValues.getRangeScorer(context.reader(), lowerVal, upperVal, includeLower, includeUpper); return functionValues.getRangeScorer(context, lowerVal, upperVal, includeLower, includeUpper);
} }
} }
} }

View File

@ -16,7 +16,7 @@
*/ */
package org.apache.lucene.queries.function; package org.apache.lucene.queries.function;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
@ -140,8 +140,8 @@ public abstract class FunctionValues {
* Yields a {@link Scorer} that matches all documents, * Yields a {@link Scorer} that matches all documents,
* and that which produces scores equal to {@link #floatVal(int)}. * and that which produces scores equal to {@link #floatVal(int)}.
*/ */
public ValueSourceScorer getScorer(IndexReader reader) { public ValueSourceScorer getScorer(LeafReaderContext readerContext) {
return new ValueSourceScorer(reader, this) { return new ValueSourceScorer(readerContext, this) {
@Override @Override
public boolean matches(int doc) { public boolean matches(int doc) {
return true; return true;
@ -157,7 +157,7 @@ public abstract class FunctionValues {
// because it needs different behavior depending on the type of fields. There is also // because it needs different behavior depending on the type of fields. There is also
// a setup cost - parsing and normalizing params, and doing a binary search on the StringIndex. // a setup cost - parsing and normalizing params, and doing a binary search on the StringIndex.
// TODO: change "reader" to LeafReaderContext // TODO: change "reader" to LeafReaderContext
public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { public ValueSourceScorer getRangeScorer(LeafReaderContext readerContext, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
float lower; float lower;
float upper; float upper;
@ -176,36 +176,40 @@ public abstract class FunctionValues {
final float u = upper; final float u = upper;
if (includeLower && includeUpper) { if (includeLower && includeUpper) {
return new ValueSourceScorer(reader, this) { return new ValueSourceScorer(readerContext, this) {
@Override @Override
public boolean matches(int doc) { public boolean matches(int doc) {
if (!exists(doc)) return false;
float docVal = floatVal(doc); float docVal = floatVal(doc);
return docVal >= l && docVal <= u; return docVal >= l && docVal <= u;
} }
}; };
} }
else if (includeLower && !includeUpper) { else if (includeLower && !includeUpper) {
return new ValueSourceScorer(reader, this) { return new ValueSourceScorer(readerContext, this) {
@Override @Override
public boolean matches(int doc) { public boolean matches(int doc) {
if (!exists(doc)) return false;
float docVal = floatVal(doc); float docVal = floatVal(doc);
return docVal >= l && docVal < u; return docVal >= l && docVal < u;
} }
}; };
} }
else if (!includeLower && includeUpper) { else if (!includeLower && includeUpper) {
return new ValueSourceScorer(reader, this) { return new ValueSourceScorer(readerContext, this) {
@Override @Override
public boolean matches(int doc) { public boolean matches(int doc) {
if (!exists(doc)) return false;
float docVal = floatVal(doc); float docVal = floatVal(doc);
return docVal > l && docVal <= u; return docVal > l && docVal <= u;
} }
}; };
} }
else { else {
return new ValueSourceScorer(reader, this) { return new ValueSourceScorer(readerContext, this) {
@Override @Override
public boolean matches(int doc) { public boolean matches(int doc) {
if (!exists(doc)) return false;
float docVal = floatVal(doc); float docVal = floatVal(doc);
return docVal > l && docVal < u; return docVal > l && docVal < u;
} }

View File

@ -18,7 +18,7 @@ package org.apache.lucene.queries.function;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator; import org.apache.lucene.search.TwoPhaseIterator;
@ -43,11 +43,10 @@ public abstract class ValueSourceScorer extends Scorer {
private final TwoPhaseIterator twoPhaseIterator; private final TwoPhaseIterator twoPhaseIterator;
private final DocIdSetIterator disi; private final DocIdSetIterator disi;
//TODO use LeafReaderContext not IndexReader? protected ValueSourceScorer(LeafReaderContext readerContext, FunctionValues values) {
protected ValueSourceScorer(IndexReader reader, FunctionValues values) {
super(null);//no weight super(null);//no weight
this.values = values; this.values = values;
final DocIdSetIterator approximation = DocIdSetIterator.all(reader.maxDoc()); // no approximation! final DocIdSetIterator approximation = DocIdSetIterator.all(readerContext.reader().maxDoc()); // no approximation!
this.twoPhaseIterator = new TwoPhaseIterator(approximation) { this.twoPhaseIterator = new TwoPhaseIterator(approximation) {
@Override @Override
public boolean matches() throws IOException { public boolean matches() throws IOException {

View File

@ -19,7 +19,6 @@ package org.apache.lucene.queries.function.docvalues;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.FunctionValues;
@ -93,7 +92,7 @@ public abstract class DocTermsIndexDocValues extends FunctionValues {
public abstract Object objectVal(int doc); // force subclasses to override public abstract Object objectVal(int doc); // force subclasses to override
@Override @Override
public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { public ValueSourceScorer getRangeScorer(LeafReaderContext readerContext, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
// TODO: are lowerVal and upperVal in indexed form or not? // TODO: are lowerVal and upperVal in indexed form or not?
lowerVal = lowerVal == null ? null : toTerm(lowerVal); lowerVal = lowerVal == null ? null : toTerm(lowerVal);
upperVal = upperVal == null ? null : toTerm(upperVal); upperVal = upperVal == null ? null : toTerm(upperVal);
@ -121,7 +120,7 @@ public abstract class DocTermsIndexDocValues extends FunctionValues {
final int ll = lower; final int ll = lower;
final int uu = upper; final int uu = upper;
return new ValueSourceScorer(reader, this) { return new ValueSourceScorer(readerContext, this) {
@Override @Override
public boolean matches(int doc) { public boolean matches(int doc) {
int ord = termsIndex.getOrd(doc); int ord = termsIndex.getOrd(doc);

View File

@ -16,7 +16,7 @@
*/ */
package org.apache.lucene.queries.function.docvalues; package org.apache.lucene.queries.function.docvalues;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.ValueSourceScorer; import org.apache.lucene.queries.function.ValueSourceScorer;
@ -83,7 +83,7 @@ public abstract class DoubleDocValues extends FunctionValues {
} }
@Override @Override
public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { public ValueSourceScorer getRangeScorer(LeafReaderContext readerContext, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
double lower,upper; double lower,upper;
if (lowerVal==null) { if (lowerVal==null) {
@ -103,36 +103,40 @@ public abstract class DoubleDocValues extends FunctionValues {
if (includeLower && includeUpper) { if (includeLower && includeUpper) {
return new ValueSourceScorer(reader, this) { return new ValueSourceScorer(readerContext, this) {
@Override @Override
public boolean matches(int doc) { public boolean matches(int doc) {
if (!exists(doc)) return false;
double docVal = doubleVal(doc); double docVal = doubleVal(doc);
return docVal >= l && docVal <= u; return docVal >= l && docVal <= u;
} }
}; };
} }
else if (includeLower && !includeUpper) { else if (includeLower && !includeUpper) {
return new ValueSourceScorer(reader, this) { return new ValueSourceScorer(readerContext, this) {
@Override @Override
public boolean matches(int doc) { public boolean matches(int doc) {
if (!exists(doc)) return false;
double docVal = doubleVal(doc); double docVal = doubleVal(doc);
return docVal >= l && docVal < u; return docVal >= l && docVal < u;
} }
}; };
} }
else if (!includeLower && includeUpper) { else if (!includeLower && includeUpper) {
return new ValueSourceScorer(reader, this) { return new ValueSourceScorer(readerContext, this) {
@Override @Override
public boolean matches(int doc) { public boolean matches(int doc) {
if (!exists(doc)) return false;
double docVal = doubleVal(doc); double docVal = doubleVal(doc);
return docVal > l && docVal <= u; return docVal > l && docVal <= u;
} }
}; };
} }
else { else {
return new ValueSourceScorer(reader, this) { return new ValueSourceScorer(readerContext, this) {
@Override @Override
public boolean matches(int doc) { public boolean matches(int doc) {
if (!exists(doc)) return false;
double docVal = doubleVal(doc); double docVal = doubleVal(doc);
return docVal > l && docVal < u; return docVal > l && docVal < u;
} }

View File

@ -16,7 +16,7 @@
*/ */
package org.apache.lucene.queries.function.docvalues; package org.apache.lucene.queries.function.docvalues;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.ValueSourceScorer; import org.apache.lucene.queries.function.ValueSourceScorer;
@ -78,7 +78,7 @@ public abstract class IntDocValues extends FunctionValues {
} }
@Override @Override
public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { public ValueSourceScorer getRangeScorer(LeafReaderContext readerContext, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
int lower,upper; int lower,upper;
// instead of using separate comparison functions, adjust the endpoints. // instead of using separate comparison functions, adjust the endpoints.
@ -100,12 +100,11 @@ public abstract class IntDocValues extends FunctionValues {
final int ll = lower; final int ll = lower;
final int uu = upper; final int uu = upper;
return new ValueSourceScorer(reader, this) { return new ValueSourceScorer(readerContext, this) {
@Override @Override
public boolean matches(int doc) { public boolean matches(int doc) {
if (!exists(doc)) return false;
int val = intVal(doc); int val = intVal(doc);
// only check for deleted if it's the default value
// if (val==0 && reader.isDeleted(doc)) return false;
return val >= ll && val <= uu; return val >= ll && val <= uu;
} }
}; };

View File

@ -16,7 +16,7 @@
*/ */
package org.apache.lucene.queries.function.docvalues; package org.apache.lucene.queries.function.docvalues;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.ValueSourceScorer; import org.apache.lucene.queries.function.ValueSourceScorer;
@ -87,7 +87,7 @@ public abstract class LongDocValues extends FunctionValues {
} }
@Override @Override
public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { public ValueSourceScorer getRangeScorer(LeafReaderContext readerContext, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
long lower,upper; long lower,upper;
// instead of using separate comparison functions, adjust the endpoints. // instead of using separate comparison functions, adjust the endpoints.
@ -109,12 +109,11 @@ public abstract class LongDocValues extends FunctionValues {
final long ll = lower; final long ll = lower;
final long uu = upper; final long uu = upper;
return new ValueSourceScorer(reader, this) { return new ValueSourceScorer(readerContext, this) {
@Override @Override
public boolean matches(int doc) { public boolean matches(int doc) {
if (!exists(doc)) return false;
long val = longVal(doc); long val = longVal(doc);
// only check for deleted if it's the default value
// if (val==0 && reader.isDeleted(doc)) return false;
return val >= ll && val <= uu; return val >= ll && val <= uu;
} }
}; };

View File

@ -20,7 +20,6 @@ import java.io.IOException;
import java.util.Map; import java.util.Map;
import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.FunctionValues;
@ -119,7 +118,7 @@ public class EnumFieldSource extends FieldCacheSource {
} }
@Override @Override
public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { public ValueSourceScorer getRangeScorer(LeafReaderContext readerContext, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) {
Integer lower = stringValueToIntValue(lowerVal); Integer lower = stringValueToIntValue(lowerVal);
Integer upper = stringValueToIntValue(upperVal); Integer upper = stringValueToIntValue(upperVal);
@ -140,12 +139,11 @@ public class EnumFieldSource extends FieldCacheSource {
final int ll = lower; final int ll = lower;
final int uu = upper; final int uu = upper;
return new ValueSourceScorer(reader, this) { return new ValueSourceScorer(readerContext, this) {
@Override @Override
public boolean matches(int doc) { public boolean matches(int doc) {
if (!exists(doc)) return false;
int val = intVal(doc); int val = intVal(doc);
// only check for deleted if it's the default value
// if (val==0 && reader.isDeleted(doc)) return false;
return val >= ll && val <= uu; return val >= ll && val <= uu;
} }
}; };

View File

@ -335,6 +335,10 @@ Bug Fixes
* SOLR-8838: Returning non-stored docValues is incorrect for negative floats and doubles. * SOLR-8838: Returning non-stored docValues is incorrect for negative floats and doubles.
(Ishan Chattopadhyaya, Steve Rowe) (Ishan Chattopadhyaya, Steve Rowe)
* SOLR-8867: {!frange} queries will now avoid matching documents without a value in the
numeric field. For more complex functions, FunctionValues.exists() must also return true
for the document to match. (yonik)
Optimizations Optimizations
---------------------- ----------------------
* SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been * SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been

View File

@ -63,7 +63,7 @@ public class FunctionRangeQuery extends SolrConstantScoreQuery implements PostFi
super.doSetNextReader(context); super.doSetNextReader(context);
maxdoc = context.reader().maxDoc(); maxdoc = context.reader().maxDoc();
FunctionValues dv = rangeFilt.getValueSource().getValues(fcontext, context); FunctionValues dv = rangeFilt.getValueSource().getValues(fcontext, context);
scorer = dv.getRangeScorer(context.reader(), rangeFilt.getLowerVal(), rangeFilt.getUpperVal(), rangeFilt.isIncludeLower(), rangeFilt.isIncludeUpper()); scorer = dv.getRangeScorer(context, rangeFilt.getLowerVal(), rangeFilt.getUpperVal(), rangeFilt.isIncludeLower(), rangeFilt.isIncludeUpper());
} }
} }
} }

View File

@ -78,7 +78,7 @@ public class ValueSourceRangeFilter extends SolrFilter {
return BitsFilteredDocIdSet.wrap(new DocIdSet() { return BitsFilteredDocIdSet.wrap(new DocIdSet() {
@Override @Override
public DocIdSetIterator iterator() throws IOException { public DocIdSetIterator iterator() throws IOException {
Scorer scorer = valueSource.getValues(context, readerContext).getRangeScorer(readerContext.reader(), lowerVal, upperVal, includeLower, includeUpper); Scorer scorer = valueSource.getValues(context, readerContext).getRangeScorer(readerContext, lowerVal, upperVal, includeLower, includeUpper);
return scorer == null ? null : scorer.iterator(); return scorer == null ? null : scorer.iterator();
} }
@Override @Override

View File

@ -409,9 +409,9 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState
bq.add(q, Occur.MUST); bq.add(q, Occur.MUST);
SchemaField sf = ulog.getVersionInfo().getVersionField(); SchemaField sf = ulog.getVersionInfo().getVersionField();
ValueSource vs = sf.getType().getValueSource(sf, null); ValueSource vs = sf.getType().getValueSource(sf, null);
ValueSourceRangeFilter filt = new ValueSourceRangeFilter(vs, null, Long.toString(Math.abs(cmd.getVersion())), true, true); ValueSourceRangeFilter filt = new ValueSourceRangeFilter(vs, Long.toString(Math.abs(cmd.getVersion())), null, true, true);
FunctionRangeQuery range = new FunctionRangeQuery(filt); FunctionRangeQuery range = new FunctionRangeQuery(filt);
bq.add(range, Occur.MUST); bq.add(range, Occur.MUST_NOT); // formulated in the "MUST_NOT" sense so we can delete docs w/o a version (some tests depend on this...)
q = bq.build(); q = bq.build();
} }

View File

@ -201,14 +201,15 @@ public class TestRangeQuery extends SolrTestCaseJ4 {
final String[] fields = {"foo_s","foo_i","foo_l","foo_f","foo_d" final String[] fields = {"foo_s","foo_i","foo_l","foo_f","foo_d"
,"foo_ti","foo_tl","foo_tf","foo_td" ,"foo_ti","foo_tl","foo_tf","foo_td"
}; };
final int l=5; final int l=-5;
final int u=25; final int u=25;
createIndex(15, new DocProcessor() { createIndex(15, new DocProcessor() {
@Override @Override
public void process(SolrInputDocument doc) { public void process(SolrInputDocument doc) {
addInt(doc, l,u, fields); // 10% of the docs have missing values
if (r.nextInt(10)!=0) addInt(doc, l,u, fields);
} }
}); });
assertU(commit()); assertU(commit());