From 5ea86b14c36bc38366888a208368ac241d254baf Mon Sep 17 00:00:00 2001 From: yonik Date: Thu, 17 Mar 2016 16:58:27 -0400 Subject: [PATCH] SOLR-8867: fix frange/FunctionValues.getRangeScorer to not match missing values, getRangeScorer refactored to take LeafReaderContext --- lucene/CHANGES.txt | 4 ++++ .../TestExpressionValueSource.java | 4 ++-- .../queries/function/FunctionRangeQuery.java | 5 ++--- .../queries/function/FunctionValues.java | 20 +++++++++++-------- .../queries/function/ValueSourceScorer.java | 7 +++---- .../docvalues/DocTermsIndexDocValues.java | 5 ++--- .../function/docvalues/DoubleDocValues.java | 16 +++++++++------ .../function/docvalues/IntDocValues.java | 9 ++++----- .../function/docvalues/LongDocValues.java | 9 ++++----- .../function/valuesource/EnumFieldSource.java | 8 +++----- solr/CHANGES.txt | 4 ++++ .../solr/search/FunctionRangeQuery.java | 2 +- .../function/ValueSourceRangeFilter.java | 2 +- .../solr/update/DirectUpdateHandler2.java | 4 ++-- .../apache/solr/search/TestRangeQuery.java | 5 +++-- 15 files changed, 57 insertions(+), 47 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index be28e340736..29bbab8d76d 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -154,6 +154,10 @@ API Changes FilterCollector, FilterDirectory. And some Filter* classes in lucene-test-framework too. (David Smiley) +* SOLR-8867: FunctionValues.getRangeScorer now takes a LeafReaderContext instead + of an IndexReader, and avoids matching documents without a value in the field + for numeric fields. (yonik) + Optimizations * LUCENE-6891: Use prefix coding when writing points in diff --git a/lucene/expressions/src/test/org/apache/lucene/expressions/TestExpressionValueSource.java b/lucene/expressions/src/test/org/apache/lucene/expressions/TestExpressionValueSource.java index 6bf73d1d67c..3129d8ccee0 100644 --- a/lucene/expressions/src/test/org/apache/lucene/expressions/TestExpressionValueSource.java +++ b/lucene/expressions/src/test/org/apache/lucene/expressions/TestExpressionValueSource.java @@ -127,7 +127,7 @@ public class TestExpressionValueSource extends LuceneTestCase { FunctionValues values = vs.getValues(new HashMap(), leaf); // everything - ValueSourceScorer scorer = values.getRangeScorer(leaf.reader(), "4", "40", true, true); + ValueSourceScorer scorer = values.getRangeScorer(leaf, "4", "40", true, true); DocIdSetIterator iter = scorer.iterator(); assertEquals(-1, iter.docID()); assertEquals(0, iter.nextDoc()); @@ -136,7 +136,7 @@ public class TestExpressionValueSource extends LuceneTestCase { assertEquals(DocIdSetIterator.NO_MORE_DOCS, iter.nextDoc()); // just the first doc - scorer = values.getRangeScorer(leaf.reader(), "4", "40", false, false); + scorer = values.getRangeScorer(leaf, "4", "40", false, false); iter = scorer.iterator(); assertEquals(-1, scorer.docID()); assertEquals(0, iter.nextDoc()); diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionRangeQuery.java b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionRangeQuery.java index 73f0ee7c9a8..65215a3dda4 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionRangeQuery.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionRangeQuery.java @@ -21,7 +21,6 @@ import java.util.Map; import java.util.Objects; import java.util.Set; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.Explanation; @@ -34,7 +33,7 @@ import org.apache.lucene.search.Weight; * range. The score is the float value. This can be a slow query if run by itself since it must visit all docs; * ideally it's combined with other queries. * It's mostly a wrapper around - * {@link FunctionValues#getRangeScorer(IndexReader, String, String, boolean, boolean)}. + * {@link FunctionValues#getRangeScorer(LeafReaderContext, String, String, boolean, boolean)}. * * A similar class is {@code org.apache.lucene.search.DocValuesRangeQuery} in the sandbox module. That one is * constant scoring. @@ -162,7 +161,7 @@ public class FunctionRangeQuery extends Query { public ValueSourceScorer scorer(LeafReaderContext context) throws IOException { FunctionValues functionValues = valueSource.getValues(vsContext, context); // getRangeScorer takes String args and parses them. Weird. - return functionValues.getRangeScorer(context.reader(), lowerVal, upperVal, includeLower, includeUpper); + return functionValues.getRangeScorer(context, lowerVal, upperVal, includeLower, includeUpper); } } } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionValues.java b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionValues.java index 31ecd3dffc6..1e7590dc74c 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionValues.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/FunctionValues.java @@ -16,7 +16,7 @@ */ package org.apache.lucene.queries.function; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Scorer; import org.apache.lucene.util.BytesRefBuilder; @@ -140,8 +140,8 @@ public abstract class FunctionValues { * Yields a {@link Scorer} that matches all documents, * and that which produces scores equal to {@link #floatVal(int)}. */ - public ValueSourceScorer getScorer(IndexReader reader) { - return new ValueSourceScorer(reader, this) { + public ValueSourceScorer getScorer(LeafReaderContext readerContext) { + return new ValueSourceScorer(readerContext, this) { @Override public boolean matches(int doc) { return true; @@ -157,7 +157,7 @@ public abstract class FunctionValues { // because it needs different behavior depending on the type of fields. There is also // a setup cost - parsing and normalizing params, and doing a binary search on the StringIndex. // TODO: change "reader" to LeafReaderContext - public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { + public ValueSourceScorer getRangeScorer(LeafReaderContext readerContext, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { float lower; float upper; @@ -176,36 +176,40 @@ public abstract class FunctionValues { final float u = upper; if (includeLower && includeUpper) { - return new ValueSourceScorer(reader, this) { + return new ValueSourceScorer(readerContext, this) { @Override public boolean matches(int doc) { + if (!exists(doc)) return false; float docVal = floatVal(doc); return docVal >= l && docVal <= u; } }; } else if (includeLower && !includeUpper) { - return new ValueSourceScorer(reader, this) { + return new ValueSourceScorer(readerContext, this) { @Override public boolean matches(int doc) { + if (!exists(doc)) return false; float docVal = floatVal(doc); return docVal >= l && docVal < u; } }; } else if (!includeLower && includeUpper) { - return new ValueSourceScorer(reader, this) { + return new ValueSourceScorer(readerContext, this) { @Override public boolean matches(int doc) { + if (!exists(doc)) return false; float docVal = floatVal(doc); return docVal > l && docVal <= u; } }; } else { - return new ValueSourceScorer(reader, this) { + return new ValueSourceScorer(readerContext, this) { @Override public boolean matches(int doc) { + if (!exists(doc)) return false; float docVal = floatVal(doc); return docVal > l && docVal < u; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java b/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java index d05e0308a9a..035327b5c04 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/ValueSourceScorer.java @@ -18,7 +18,7 @@ package org.apache.lucene.queries.function; import java.io.IOException; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.TwoPhaseIterator; @@ -43,11 +43,10 @@ public abstract class ValueSourceScorer extends Scorer { private final TwoPhaseIterator twoPhaseIterator; private final DocIdSetIterator disi; - //TODO use LeafReaderContext not IndexReader? - protected ValueSourceScorer(IndexReader reader, FunctionValues values) { + protected ValueSourceScorer(LeafReaderContext readerContext, FunctionValues values) { super(null);//no weight this.values = values; - final DocIdSetIterator approximation = DocIdSetIterator.all(reader.maxDoc()); // no approximation! + final DocIdSetIterator approximation = DocIdSetIterator.all(readerContext.reader().maxDoc()); // no approximation! this.twoPhaseIterator = new TwoPhaseIterator(approximation) { @Override public boolean matches() throws IOException { diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java b/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java index 4cbcd68fd78..194969d9bd1 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java @@ -19,7 +19,6 @@ package org.apache.lucene.queries.function.docvalues; import java.io.IOException; import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.queries.function.FunctionValues; @@ -93,7 +92,7 @@ public abstract class DocTermsIndexDocValues extends FunctionValues { public abstract Object objectVal(int doc); // force subclasses to override @Override - public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { + public ValueSourceScorer getRangeScorer(LeafReaderContext readerContext, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { // TODO: are lowerVal and upperVal in indexed form or not? lowerVal = lowerVal == null ? null : toTerm(lowerVal); upperVal = upperVal == null ? null : toTerm(upperVal); @@ -121,7 +120,7 @@ public abstract class DocTermsIndexDocValues extends FunctionValues { final int ll = lower; final int uu = upper; - return new ValueSourceScorer(reader, this) { + return new ValueSourceScorer(readerContext, this) { @Override public boolean matches(int doc) { int ord = termsIndex.getOrd(doc); diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DoubleDocValues.java b/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DoubleDocValues.java index 1cb691fc901..91d1e6f8bd2 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DoubleDocValues.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DoubleDocValues.java @@ -16,7 +16,7 @@ */ package org.apache.lucene.queries.function.docvalues; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSourceScorer; @@ -83,7 +83,7 @@ public abstract class DoubleDocValues extends FunctionValues { } @Override - public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { + public ValueSourceScorer getRangeScorer(LeafReaderContext readerContext, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { double lower,upper; if (lowerVal==null) { @@ -103,36 +103,40 @@ public abstract class DoubleDocValues extends FunctionValues { if (includeLower && includeUpper) { - return new ValueSourceScorer(reader, this) { + return new ValueSourceScorer(readerContext, this) { @Override public boolean matches(int doc) { + if (!exists(doc)) return false; double docVal = doubleVal(doc); return docVal >= l && docVal <= u; } }; } else if (includeLower && !includeUpper) { - return new ValueSourceScorer(reader, this) { + return new ValueSourceScorer(readerContext, this) { @Override public boolean matches(int doc) { + if (!exists(doc)) return false; double docVal = doubleVal(doc); return docVal >= l && docVal < u; } }; } else if (!includeLower && includeUpper) { - return new ValueSourceScorer(reader, this) { + return new ValueSourceScorer(readerContext, this) { @Override public boolean matches(int doc) { + if (!exists(doc)) return false; double docVal = doubleVal(doc); return docVal > l && docVal <= u; } }; } else { - return new ValueSourceScorer(reader, this) { + return new ValueSourceScorer(readerContext, this) { @Override public boolean matches(int doc) { + if (!exists(doc)) return false; double docVal = doubleVal(doc); return docVal > l && docVal < u; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/IntDocValues.java b/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/IntDocValues.java index d8a77e38931..aff38ac4b65 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/IntDocValues.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/IntDocValues.java @@ -16,7 +16,7 @@ */ package org.apache.lucene.queries.function.docvalues; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSourceScorer; @@ -78,7 +78,7 @@ public abstract class IntDocValues extends FunctionValues { } @Override - public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { + public ValueSourceScorer getRangeScorer(LeafReaderContext readerContext, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { int lower,upper; // instead of using separate comparison functions, adjust the endpoints. @@ -100,12 +100,11 @@ public abstract class IntDocValues extends FunctionValues { final int ll = lower; final int uu = upper; - return new ValueSourceScorer(reader, this) { + return new ValueSourceScorer(readerContext, this) { @Override public boolean matches(int doc) { + if (!exists(doc)) return false; int val = intVal(doc); - // only check for deleted if it's the default value - // if (val==0 && reader.isDeleted(doc)) return false; return val >= ll && val <= uu; } }; diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/LongDocValues.java b/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/LongDocValues.java index 626b2e510f0..d48bb451cf0 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/LongDocValues.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/LongDocValues.java @@ -16,7 +16,7 @@ */ package org.apache.lucene.queries.function.docvalues; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.ValueSourceScorer; @@ -87,7 +87,7 @@ public abstract class LongDocValues extends FunctionValues { } @Override - public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { + public ValueSourceScorer getRangeScorer(LeafReaderContext readerContext, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { long lower,upper; // instead of using separate comparison functions, adjust the endpoints. @@ -109,12 +109,11 @@ public abstract class LongDocValues extends FunctionValues { final long ll = lower; final long uu = upper; - return new ValueSourceScorer(reader, this) { + return new ValueSourceScorer(readerContext, this) { @Override public boolean matches(int doc) { + if (!exists(doc)) return false; long val = longVal(doc); - // only check for deleted if it's the default value - // if (val==0 && reader.isDeleted(doc)) return false; return val >= ll && val <= uu; } }; diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/EnumFieldSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/EnumFieldSource.java index cc7df232986..d0f42a55e08 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/EnumFieldSource.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/EnumFieldSource.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.util.Map; import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.queries.function.FunctionValues; @@ -119,7 +118,7 @@ public class EnumFieldSource extends FieldCacheSource { } @Override - public ValueSourceScorer getRangeScorer(IndexReader reader, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { + public ValueSourceScorer getRangeScorer(LeafReaderContext readerContext, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) { Integer lower = stringValueToIntValue(lowerVal); Integer upper = stringValueToIntValue(upperVal); @@ -140,12 +139,11 @@ public class EnumFieldSource extends FieldCacheSource { final int ll = lower; final int uu = upper; - return new ValueSourceScorer(reader, this) { + return new ValueSourceScorer(readerContext, this) { @Override public boolean matches(int doc) { + if (!exists(doc)) return false; int val = intVal(doc); - // only check for deleted if it's the default value - // if (val==0 && reader.isDeleted(doc)) return false; return val >= ll && val <= uu; } }; diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 6f86b520c16..75dd73a12b0 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -335,6 +335,10 @@ Bug Fixes * SOLR-8838: Returning non-stored docValues is incorrect for negative floats and doubles. (Ishan Chattopadhyaya, Steve Rowe) +* SOLR-8867: {!frange} queries will now avoid matching documents without a value in the + numeric field. For more complex functions, FunctionValues.exists() must also return true + for the document to match. (yonik) + Optimizations ---------------------- * SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been diff --git a/solr/core/src/java/org/apache/solr/search/FunctionRangeQuery.java b/solr/core/src/java/org/apache/solr/search/FunctionRangeQuery.java index a53d8ddebd4..d64060fe8b5 100644 --- a/solr/core/src/java/org/apache/solr/search/FunctionRangeQuery.java +++ b/solr/core/src/java/org/apache/solr/search/FunctionRangeQuery.java @@ -63,7 +63,7 @@ public class FunctionRangeQuery extends SolrConstantScoreQuery implements PostFi super.doSetNextReader(context); maxdoc = context.reader().maxDoc(); FunctionValues dv = rangeFilt.getValueSource().getValues(fcontext, context); - scorer = dv.getRangeScorer(context.reader(), rangeFilt.getLowerVal(), rangeFilt.getUpperVal(), rangeFilt.isIncludeLower(), rangeFilt.isIncludeUpper()); + scorer = dv.getRangeScorer(context, rangeFilt.getLowerVal(), rangeFilt.getUpperVal(), rangeFilt.isIncludeLower(), rangeFilt.isIncludeUpper()); } } } diff --git a/solr/core/src/java/org/apache/solr/search/function/ValueSourceRangeFilter.java b/solr/core/src/java/org/apache/solr/search/function/ValueSourceRangeFilter.java index 3a0502c478c..64211d2fb99 100644 --- a/solr/core/src/java/org/apache/solr/search/function/ValueSourceRangeFilter.java +++ b/solr/core/src/java/org/apache/solr/search/function/ValueSourceRangeFilter.java @@ -78,7 +78,7 @@ public class ValueSourceRangeFilter extends SolrFilter { return BitsFilteredDocIdSet.wrap(new DocIdSet() { @Override public DocIdSetIterator iterator() throws IOException { - Scorer scorer = valueSource.getValues(context, readerContext).getRangeScorer(readerContext.reader(), lowerVal, upperVal, includeLower, includeUpper); + Scorer scorer = valueSource.getValues(context, readerContext).getRangeScorer(readerContext, lowerVal, upperVal, includeLower, includeUpper); return scorer == null ? null : scorer.iterator(); } @Override diff --git a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java index 6fe4884812d..31eb94c81e7 100644 --- a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java +++ b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java @@ -409,9 +409,9 @@ public class DirectUpdateHandler2 extends UpdateHandler implements SolrCoreState bq.add(q, Occur.MUST); SchemaField sf = ulog.getVersionInfo().getVersionField(); ValueSource vs = sf.getType().getValueSource(sf, null); - ValueSourceRangeFilter filt = new ValueSourceRangeFilter(vs, null, Long.toString(Math.abs(cmd.getVersion())), true, true); + ValueSourceRangeFilter filt = new ValueSourceRangeFilter(vs, Long.toString(Math.abs(cmd.getVersion())), null, true, true); FunctionRangeQuery range = new FunctionRangeQuery(filt); - bq.add(range, Occur.MUST); + bq.add(range, Occur.MUST_NOT); // formulated in the "MUST_NOT" sense so we can delete docs w/o a version (some tests depend on this...) q = bq.build(); } diff --git a/solr/core/src/test/org/apache/solr/search/TestRangeQuery.java b/solr/core/src/test/org/apache/solr/search/TestRangeQuery.java index 9816efffbb8..b471a754a5a 100644 --- a/solr/core/src/test/org/apache/solr/search/TestRangeQuery.java +++ b/solr/core/src/test/org/apache/solr/search/TestRangeQuery.java @@ -201,14 +201,15 @@ public class TestRangeQuery extends SolrTestCaseJ4 { final String[] fields = {"foo_s","foo_i","foo_l","foo_f","foo_d" ,"foo_ti","foo_tl","foo_tf","foo_td" }; - final int l=5; + final int l=-5; final int u=25; createIndex(15, new DocProcessor() { @Override public void process(SolrInputDocument doc) { - addInt(doc, l,u, fields); + // 10% of the docs have missing values + if (r.nextInt(10)!=0) addInt(doc, l,u, fields); } }); assertU(commit());