From d8146460f0eca4641f0e48793b6053a88be5c7d1 Mon Sep 17 00:00:00 2001 From: "Chris M. Hostetter" Date: Thu, 16 Oct 2014 19:05:20 +0000 Subject: [PATCH] LUCENE-5961: Fix the exists() method for FunctionValues returned by many ValueSoures to behave properly when wrapping other ValueSources which do not exist for the specified document git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1632414 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 + lucene/MIGRATE.txt | 8 + .../valuesource/DualFloatFunction.java | 9 +- .../function/valuesource/IfFunction.java | 4 +- .../valuesource/LinearFloatFunction.java | 4 + .../valuesource/MaxFloatFunction.java | 18 +- .../valuesource/MinFloatFunction.java | 19 +- .../valuesource/MultiFloatFunction.java | 31 +- .../function/valuesource/MultiFunction.java | 29 ++ .../valuesource/ReciprocalFloatFunction.java | 4 + .../valuesource/ScaleFloatFunction.java | 32 +- .../queries/function/TestValueSources.java | 427 ++++++++++++++---- solr/CHANGES.txt | 6 + .../handler/component/StatsComponentTest.java | 14 +- .../search/function/TestFunctionQuery.java | 11 +- 15 files changed, 490 insertions(+), 130 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 9e08556773e..dcd9a4f8394 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -184,6 +184,10 @@ Bug Fixes * LUCENE-5980: Don't let document length overflow. (Robert Muir) +* LUCENE-5961: Fix the exists() method for FunctionValues returned by many ValueSoures to + behave properly when wrapping other ValueSources which do not exist for the specified document + (hossman) + Documentation * LUCENE-5392: Add/improve analysis package documentation to reflect diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt index bea3f0ccb0d..689ccc85247 100644 --- a/lucene/MIGRATE.txt +++ b/lucene/MIGRATE.txt @@ -6,3 +6,11 @@ The API of oal.document was restructured to differentiate between stored documents and indexed documents. IndexReader.document(int) now returns StoredDocument instead of Document. In most cases a simple replacement of the return type is enough to upgrade. + +## FunctionValues.exist() Behavior Changes due to ValueSource bug fixes (LUCENE-5961) + +Bugs fixed in several ValueSource functions may result in different behavior in +situations where some documents do not have values for fields wrapped in other +ValueSources. Users who want to preserve the previous behavior may need to wrap +their ValueSources in a "DefFunction" along with a ConstValueSource of "0.0". + diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DualFloatFunction.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DualFloatFunction.java index 3f3649a1c32..35593fbb0b4 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DualFloatFunction.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DualFloatFunction.java @@ -60,7 +60,14 @@ public abstract class DualFloatFunction extends ValueSource { public float floatVal(int doc) { return func(doc, aVals, bVals); } - + /** + * True if and only if all of the wrapped {@link FunctionValues} + * exists for the specified doc + */ + @Override + public boolean exists(int doc) { + return MultiFunction.allExists(doc, aVals, bVals); + } @Override public String toString(int doc) { return name() + '(' + aVals.toString(doc) + ',' + bVals.toString(doc) + ')'; diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IfFunction.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IfFunction.java index 84efbf856c7..756b4f5e87c 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IfFunction.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IfFunction.java @@ -102,7 +102,7 @@ public class IfFunction extends BoolFunction { @Override public boolean exists(int doc) { - return true; // TODO: flow through to any sub-sources? + return ifVals.boolVal(doc) ? trueVals.exists(doc) : falseVals.exists(doc); } @Override @@ -148,4 +148,4 @@ public class IfFunction extends BoolFunction { trueSource.createWeight(context, searcher); falseSource.createWeight(context, searcher); } -} \ No newline at end of file +} diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/LinearFloatFunction.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/LinearFloatFunction.java index 72c33e2b184..3b25ee58474 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/LinearFloatFunction.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/LinearFloatFunction.java @@ -59,6 +59,10 @@ public class LinearFloatFunction extends ValueSource { return vals.floatVal(doc) * slope + intercept; } @Override + public boolean exists(int doc) { + return vals.exists(doc); + } + @Override public String toString(int doc) { return slope + "*float(" + vals.toString(doc) + ")+" + intercept; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MaxFloatFunction.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MaxFloatFunction.java index d1e1f4fd30b..a38cbb0a4d6 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MaxFloatFunction.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MaxFloatFunction.java @@ -35,11 +35,25 @@ public class MaxFloatFunction extends MultiFloatFunction { @Override protected float func(int doc, FunctionValues[] valsArr) { - if (valsArr.length == 0) return 0.0f; + if ( ! exists(doc, valsArr) ) return 0.0f; + float val = Float.NEGATIVE_INFINITY; for (FunctionValues vals : valsArr) { - val = Math.max(vals.floatVal(doc), val); + if (vals.exists(doc)) { + val = Math.max(vals.floatVal(doc), val); + } } return val; } + + /** + * True if any of the specified values + * {@link FunctionValues#exists} for the specified doc, else false. + * + * @see MultiFunction#anyExists + */ + @Override + protected boolean exists(int doc, FunctionValues[] valsArr) { + return MultiFunction.anyExists(doc, valsArr); + } } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MinFloatFunction.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MinFloatFunction.java index 4cf35877443..f1c426b2834 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MinFloatFunction.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MinFloatFunction.java @@ -35,11 +35,26 @@ public class MinFloatFunction extends MultiFloatFunction { @Override protected float func(int doc, FunctionValues[] valsArr) { - if (valsArr.length == 0) return 0.0f; + if ( ! exists(doc, valsArr) ) return 0.0f; + float val = Float.POSITIVE_INFINITY; for (FunctionValues vals : valsArr) { - val = Math.min(vals.floatVal(doc), val); + if (vals.exists(doc)) { + val = Math.min(vals.floatVal(doc), val); + } } return val; } + + /** + * True if any of the specified values + * {@link FunctionValues#exists} for the specified doc, else false. + * + * @see MultiFunction#anyExists + */ + @Override + protected boolean exists(int doc, FunctionValues[] valsArr) { + return MultiFunction.anyExists(doc, valsArr); + } + } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MultiFloatFunction.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MultiFloatFunction.java index 47b9ec1be7e..53600284e32 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MultiFloatFunction.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MultiFloatFunction.java @@ -40,6 +40,18 @@ public abstract class MultiFloatFunction extends ValueSource { abstract protected String name(); abstract protected float func(int doc, FunctionValues[] valsArr); + /** + * Called by {@link FunctionValues#exists} for each document. + * + * Default impl returns true if all of the specified values + * {@link FunctionValues#exists} for the specified doc, else false. + * + * @see FunctionValues#exists + * @see MultiFunction#allExists + */ + protected boolean exists(int doc, FunctionValues[] valsArr) { + return MultiFunction.allExists(doc, valsArr); + } @Override public String description() { @@ -70,21 +82,12 @@ public abstract class MultiFloatFunction extends ValueSource { public float floatVal(int doc) { return func(doc, valsArr); } - @Override + public boolean exists(int doc) { + return MultiFloatFunction.this.exists(doc, valsArr); + } + @Override public String toString(int doc) { - StringBuilder sb = new StringBuilder(); - sb.append(name()).append('('); - boolean firstTime=true; - for (FunctionValues vals : valsArr) { - if (firstTime) { - firstTime=false; - } else { - sb.append(','); - } - sb.append(vals.toString(doc)); - } - sb.append(')'); - return sb.toString(); + return MultiFunction.toString(name(), valsArr, doc); } }; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MultiFunction.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MultiFunction.java index bfa7a2c0c33..053031d173f 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MultiFunction.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MultiFunction.java @@ -43,6 +43,35 @@ public abstract class MultiFunction extends ValueSource { return description(name(), sources); } + /** + * Helper utility for {@link FunctionValues} wrapping multiple {@link FunctionValues} + * + * @return true if all of the specified values + * {@link FunctionValues#exists} for the specified doc, else false. + */ + public static boolean allExists(int doc, FunctionValues... values) { + for (FunctionValues v : values) { + if ( ! v.exists(doc) ) { + return false; + } + } + return true; + } + /** + * Helper utility for {@link FunctionValues} wrapping multiple {@link FunctionValues} + * + * @return true if any of the specified values + * {@link FunctionValues#exists} for the specified doc, else false. + */ + public static boolean anyExists(int doc, FunctionValues... values) { + for (FunctionValues v : values) { + if ( v.exists(doc) ) { + return true; + } + } + return false; + } + public static String description(String name, List sources) { StringBuilder sb = new StringBuilder(); sb.append(name).append('('); diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ReciprocalFloatFunction.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ReciprocalFloatFunction.java index e93985036e7..9c876b37790 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ReciprocalFloatFunction.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ReciprocalFloatFunction.java @@ -69,6 +69,10 @@ public class ReciprocalFloatFunction extends ValueSource { return a/(m*vals.floatVal(doc) + b); } @Override + public boolean exists(int doc) { + return vals.exists(doc); + } + @Override public String toString(int doc) { return Float.toString(a) + "/(" + m + "*float(" + vals.toString(doc) + ')' diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ScaleFloatFunction.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ScaleFloatFunction.java index 90013075b44..e346b946a5d 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ScaleFloatFunction.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/ScaleFloatFunction.java @@ -70,20 +70,22 @@ public class ScaleFloatFunction extends ValueSource { int maxDoc = leaf.reader().maxDoc(); FunctionValues vals = source.getValues(context, leaf); for (int i=0; i maxVal) { + maxVal = val; + } } - if (val < minVal) { - minVal = val; - } - if (val > maxVal) { - maxVal = val; - } - } } if (minVal == Float.POSITIVE_INFINITY) { @@ -113,6 +115,10 @@ public class ScaleFloatFunction extends ValueSource { final FunctionValues vals = source.getValues(context, readerContext); return new FloatDocValues(this) { + @Override + public boolean exists(int doc) { + return vals.exists(doc); + } @Override public float floatVal(int doc) { return (vals.floatVal(doc) - minSource) * scale + min; diff --git a/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java b/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java index 5a80e8437da..d5391f47b11 100644 --- a/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java +++ b/lucene/queries/src/test/org/apache/lucene/queries/function/TestValueSources.java @@ -19,6 +19,8 @@ package org.apache.lucene.queries.function; import java.util.Arrays; import java.util.List; +import java.util.Map; +import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; @@ -31,9 +33,12 @@ import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; +import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.queries.function.docvalues.FloatDocValues; import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource; import org.apache.lucene.queries.function.valuesource.ConstValueSource; import org.apache.lucene.queries.function.valuesource.DivFloatFunction; @@ -51,6 +56,7 @@ import org.apache.lucene.queries.function.valuesource.LongFieldSource; import org.apache.lucene.queries.function.valuesource.MaxDocValueSource; import org.apache.lucene.queries.function.valuesource.MaxFloatFunction; import org.apache.lucene.queries.function.valuesource.MinFloatFunction; +import org.apache.lucene.queries.function.valuesource.MultiFloatFunction; import org.apache.lucene.queries.function.valuesource.NormValueSource; import org.apache.lucene.queries.function.valuesource.NumDocsValueSource; import org.apache.lucene.queries.function.valuesource.PowFloatFunction; @@ -71,6 +77,7 @@ import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.Directory; @@ -88,6 +95,11 @@ public class TestValueSources extends LuceneTestCase { static IndexReader reader; static IndexSearcher searcher; + static final ValueSource BOGUS_FLOAT_VS = new FloatFieldSource("bogus_field"); + static final ValueSource BOGUS_DOUBLE_VS = new DoubleFieldSource("bogus_field"); + static final ValueSource BOGUS_INT_VS = new IntFieldSource("bogus_field"); + static final ValueSource BOGUS_LONG_VS = new LongFieldSource("bogus_field"); + static final List documents = Arrays.asList(new String[][] { /* id, double, float, int, long, string, text */ new String[] { "0", "3.63", "5.2", "35", "4343", "test", "this is a test test test" }, @@ -160,100 +172,165 @@ public class TestValueSources extends LuceneTestCase { } public void testConst() throws Exception { - assertHits(new FunctionQuery(new ConstValueSource(0.3f)), - new float[] { 0.3f, 0.3f }); + ValueSource vs = new ConstValueSource(0.3f); + assertHits(new FunctionQuery(vs), + new float[] { 0.3f, 0.3f }); + assertAllExist(vs); } public void testDiv() throws Exception { - assertHits(new FunctionQuery(new DivFloatFunction( - new ConstValueSource(10f), new ConstValueSource(5f))), - new float[] { 2f, 2f }); + ValueSource vs = new DivFloatFunction(new ConstValueSource(10f), new ConstValueSource(5f)); + assertHits(new FunctionQuery(vs), + new float[] { 2f, 2f }); + assertAllExist(vs); + vs = new DivFloatFunction(new ConstValueSource(10f), BOGUS_FLOAT_VS); + assertNoneExist(vs); + vs = new DivFloatFunction(BOGUS_FLOAT_VS, new ConstValueSource(10f)); + assertNoneExist(vs); } public void testDocFreq() throws Exception { - assertHits(new FunctionQuery( - new DocFreqValueSource("bogus", "bogus", "text", new BytesRef("test"))), - new float[] { 2f, 2f }); + ValueSource vs = new DocFreqValueSource("bogus", "bogus", "text", new BytesRef("test")); + assertHits(new FunctionQuery(vs), new float[] { 2f, 2f }); + assertAllExist(vs); } public void testDoubleConst() throws Exception { - assertHits(new FunctionQuery(new DoubleConstValueSource(0.3d)), - new float[] { 0.3f, 0.3f }); + ValueSource vs = new DoubleConstValueSource(0.3d); + assertHits(new FunctionQuery(vs), new float[] { 0.3f, 0.3f }); + assertAllExist(vs); } public void testDouble() throws Exception { - assertHits(new FunctionQuery(new DoubleFieldSource("double")), - new float[] { 3.63f, 5.65f }); + ValueSource vs = new DoubleFieldSource("double"); + assertHits(new FunctionQuery(vs), new float[] { 3.63f, 5.65f }); + assertAllExist(vs); + assertNoneExist(BOGUS_DOUBLE_VS); } public void testFloat() throws Exception { - assertHits(new FunctionQuery(new FloatFieldSource("float")), - new float[] { 5.2f, 9.3f }); + ValueSource vs = new FloatFieldSource("float"); + assertHits(new FunctionQuery(vs), new float[] { 5.2f, 9.3f }); + assertAllExist(vs); + assertNoneExist(BOGUS_FLOAT_VS); } public void testIDF() throws Exception { Similarity saved = searcher.getSimilarity(); try { searcher.setSimilarity(new DefaultSimilarity()); - assertHits(new FunctionQuery( - new IDFValueSource("bogus", "bogus", "text", new BytesRef("test"))), - new float[] { 0.5945349f, 0.5945349f }); + ValueSource vs = new IDFValueSource("bogus", "bogus", "text", new BytesRef("test")); + assertHits(new FunctionQuery(vs), new float[] { 0.5945349f, 0.5945349f }); + assertAllExist(vs); } finally { searcher.setSimilarity(saved); } } public void testIf() throws Exception { - assertHits(new FunctionQuery(new IfFunction( - new BytesRefFieldSource("id"), - new ConstValueSource(1.0f), - new ConstValueSource(2.0f) - )), - new float[] { 1f, 1f }); - // true just if a value exists... - assertHits(new FunctionQuery(new IfFunction( - new LiteralValueSource("false"), - new ConstValueSource(1.0f), - new ConstValueSource(2.0f) - )), - new float[] { 1f, 1f }); + ValueSource vs = new IfFunction(new BytesRefFieldSource("id"), + new ConstValueSource(1.0f), // match + new ConstValueSource(2.0f)); + assertHits(new FunctionQuery(vs), new float[] { 1f, 1f }); + assertAllExist(vs); + + // true just if a test value exists... + vs = new IfFunction(new LiteralValueSource("false"), + new ConstValueSource(1.0f), // match + new ConstValueSource(2.0f)); + assertHits(new FunctionQuery(vs), new float[] { 1f, 1f }); + assertAllExist(vs); + + // false value if tests value does not exist + vs = new IfFunction(BOGUS_FLOAT_VS, + new ConstValueSource(1.0f), + new ConstValueSource(2.0f)); // match + assertHits(new FunctionQuery(vs), new float[] { 2F, 2F }); + assertAllExist(vs); + + // final value may still not exist + vs = new IfFunction(new BytesRefFieldSource("id"), + BOGUS_FLOAT_VS, // match + new ConstValueSource(1.0f)); + assertNoneExist(vs); } public void testInt() throws Exception { - assertHits(new FunctionQuery(new IntFieldSource("int")), - new float[] { 35f, 54f }); + ValueSource vs = new IntFieldSource("int"); + assertHits(new FunctionQuery(vs), new float[] { 35f, 54f }); + assertAllExist(vs); + assertNoneExist(BOGUS_INT_VS); + } public void testJoinDocFreq() throws Exception { - assertHits(new FunctionQuery(new JoinDocFreqValueSource("string", "text")), - new float[] { 2f, 0f }); + assertHits(new FunctionQuery(new JoinDocFreqValueSource("string", "text")), + new float[] { 2f, 0f }); + + // TODO: what *should* the rules be for exist() ? } public void testLinearFloat() throws Exception { - assertHits(new FunctionQuery(new LinearFloatFunction(new ConstValueSource(2.0f), 3, 1)), - new float[] { 7f, 7f }); + ValueSource vs = new LinearFloatFunction(new ConstValueSource(2.0f), 3, 1); + assertHits(new FunctionQuery(vs), new float[] { 7f, 7f }); + assertAllExist(vs); + vs = new LinearFloatFunction(BOGUS_FLOAT_VS, 3, 1); + assertNoneExist(vs); } public void testLong() throws Exception { - assertHits(new FunctionQuery(new LongFieldSource("long")), - new float[] { 4343f, 1954f }); + ValueSource vs = new LongFieldSource("long"); + assertHits(new FunctionQuery(vs), new float[] { 4343f, 1954f }); + assertAllExist(vs); + assertNoneExist(BOGUS_LONG_VS); } public void testMaxDoc() throws Exception { - assertHits(new FunctionQuery(new MaxDocValueSource()), - new float[] { 2f, 2f }); + ValueSource vs = new MaxDocValueSource(); + assertHits(new FunctionQuery(vs), new float[] { 2f, 2f }); + assertAllExist(vs); } public void testMaxFloat() throws Exception { - assertHits(new FunctionQuery(new MaxFloatFunction(new ValueSource[] { - new ConstValueSource(1f), new ConstValueSource(2f)})), - new float[] { 2f, 2f }); + ValueSource vs = new MaxFloatFunction(new ValueSource[] { + new ConstValueSource(1f), new ConstValueSource(2f)}); + + assertHits(new FunctionQuery(vs), new float[] { 2f, 2f }); + assertAllExist(vs); + + // as long as one value exists, then max exists + vs = new MaxFloatFunction(new ValueSource[] { + BOGUS_FLOAT_VS, new ConstValueSource(2F)}); + assertAllExist(vs); + vs = new MaxFloatFunction(new ValueSource[] { + BOGUS_FLOAT_VS, new ConstValueSource(2F), BOGUS_DOUBLE_VS}); + assertAllExist(vs); + // if none exist, then max doesn't exist + vs = new MaxFloatFunction(new ValueSource[] { + BOGUS_FLOAT_VS, BOGUS_INT_VS, BOGUS_DOUBLE_VS}); + assertNoneExist(vs); } public void testMinFloat() throws Exception { - assertHits(new FunctionQuery(new MinFloatFunction(new ValueSource[] { - new ConstValueSource(1f), new ConstValueSource(2f)})), - new float[] { 1f, 1f }); + ValueSource vs = new MinFloatFunction(new ValueSource[] { + new ConstValueSource(1f), new ConstValueSource(2f)}); + + assertHits(new FunctionQuery(vs), new float[] { 1f, 1f }); + assertAllExist(vs); + + // as long as one value exists, then min exists + vs = new MinFloatFunction(new ValueSource[] { + BOGUS_FLOAT_VS, new ConstValueSource(2F)}); + assertHits(new FunctionQuery(vs), new float[] { 2F, 2F }); + assertAllExist(vs); + vs = new MinFloatFunction(new ValueSource[] { + BOGUS_FLOAT_VS, new ConstValueSource(2F), BOGUS_DOUBLE_VS}); + assertAllExist(vs); + + // if none exist, then min doesn't exist + vs = new MinFloatFunction(new ValueSource[] { + BOGUS_FLOAT_VS, BOGUS_INT_VS, BOGUS_DOUBLE_VS}); + assertNoneExist(vs); } public void testNorm() throws Exception { @@ -261,35 +338,95 @@ public class TestValueSources extends LuceneTestCase { try { // no norm field (so agnostic to indexed similarity) searcher.setSimilarity(new DefaultSimilarity()); - assertHits(new FunctionQuery( - new NormValueSource("byte")), - new float[] { 0f, 0f }); + ValueSource vs = new NormValueSource("byte"); + assertHits(new FunctionQuery(vs), new float[] { 0f, 0f }); + + // regardless of wether norms exist, value source exists == 0 + assertAllExist(vs); + + vs = new NormValueSource("text"); + assertAllExist(vs); + } finally { searcher.setSimilarity(saved); } } public void testNumDocs() throws Exception { - assertHits(new FunctionQuery(new NumDocsValueSource()), - new float[] { 2f, 2f }); + ValueSource vs = new NumDocsValueSource(); + assertHits(new FunctionQuery(vs), new float[] { 2f, 2f }); + assertAllExist(vs); } public void testPow() throws Exception { - assertHits(new FunctionQuery(new PowFloatFunction( - new ConstValueSource(2f), new ConstValueSource(3f))), - new float[] { 8f, 8f }); + ValueSource vs = new PowFloatFunction(new ConstValueSource(2f), new ConstValueSource(3f)); + assertHits(new FunctionQuery(vs), new float[] { 8f, 8f }); + assertAllExist(vs); + vs = new PowFloatFunction(BOGUS_FLOAT_VS, new ConstValueSource(3f)); + assertNoneExist(vs); + vs = new PowFloatFunction(new ConstValueSource(3f), BOGUS_FLOAT_VS); + assertNoneExist(vs); } public void testProduct() throws Exception { - assertHits(new FunctionQuery(new ProductFloatFunction(new ValueSource[] { - new ConstValueSource(2f), new ConstValueSource(3f)})), - new float[] { 6f, 6f }); + ValueSource vs = new ProductFloatFunction(new ValueSource[] { + new ConstValueSource(2f), new ConstValueSource(3f)}); + assertHits(new FunctionQuery(vs), new float[] { 6f, 6f }); + assertAllExist(vs); + + vs = new ProductFloatFunction(new ValueSource[] { + BOGUS_FLOAT_VS, new ConstValueSource(3f)}); + assertNoneExist(vs); } + public void testQueryWrapedFuncWrapedQuery() throws Exception { + ValueSource vs = new QueryValueSource(new FunctionQuery(new ConstValueSource(2f)), 0f); + assertHits(new FunctionQuery(vs), new float[] { 2f, 2f }); + assertAllExist(vs); + } + public void testQuery() throws Exception { - assertHits(new FunctionQuery(new QueryValueSource( - new FunctionQuery(new ConstValueSource(2f)), 0f)), - new float[] { 2f, 2f }); + Similarity saved = searcher.getSimilarity(); + + try { + searcher.setSimilarity(new DefaultSimilarity()); + + ValueSource vs = new QueryValueSource(new TermQuery(new Term("string","bar")), 42F); + assertHits(new FunctionQuery(vs), new float[] { 42F, 1F }); + + // valuesource should exist only for things matching the term query + // sanity check via quick & dirty wrapper arround tf + ValueSource expected = new MultiFloatFunction(new ValueSource[] { + new TFValueSource("bogus", "bogus", "string", new BytesRef("bar"))}) { + + @Override + protected String name() { return "tf_based_exists"; } + + @Override + protected float func(int doc, FunctionValues[] valsArr) { + return valsArr[0].floatVal(doc); + } + @Override + protected boolean exists(int doc, FunctionValues[] valsArr) { + // if tf > 0, then it should exist + return 0 < func(doc, valsArr); + } + }; + + assertExists(expected, vs); + + + // Query matches all docs, func exists for all docs + vs = new QueryValueSource(new TermQuery(new Term("text","test")), 0F); + assertAllExist(vs); + + // Query matches no docs, func exists for no docs + vs = new QueryValueSource(new TermQuery(new Term("bogus","does not exist")), 0F); + assertNoneExist(vs); + + } finally { + searcher.setSimilarity(saved); + } } public void testRangeMap() throws Exception { @@ -300,37 +437,59 @@ public class TestValueSources extends LuceneTestCase { 5, 6, new SumFloatFunction(new ValueSource[] {new ConstValueSource(1f), new ConstValueSource(2f)}), new ConstValueSource(11f))), new float[] { 3f, 11f }); + + // TODO: what *should* the rules be for exist() ? + // ((source exists && source in range && target exists) OR (source not in range && default exists)) ? } public void testReciprocal() throws Exception { - assertHits(new FunctionQuery(new ReciprocalFloatFunction(new ConstValueSource(2f), - 3, 1, 4)), - new float[] { 0.1f, 0.1f }); + ValueSource vs = new ReciprocalFloatFunction(new ConstValueSource(2f), 3, 1, 4); + assertHits(new FunctionQuery(vs), new float[] { 0.1f, 0.1f }); + assertAllExist(vs); + + vs = new ReciprocalFloatFunction(BOGUS_FLOAT_VS, 3, 1, 4); + assertNoneExist(vs); } public void testScale() throws Exception { - assertHits(new FunctionQuery(new ScaleFloatFunction(new IntFieldSource("int"), 0, 1)), - new float[] { 0.0f, 1.0f }); + ValueSource vs = new ScaleFloatFunction(new IntFieldSource("int"), 0, 1); + assertHits(new FunctionQuery(vs), new float[] { 0.0f, 1.0f }); + assertAllExist(vs); + + vs = new ScaleFloatFunction(BOGUS_INT_VS, 0, 1); + assertNoneExist(vs); } public void testSumFloat() throws Exception { - assertHits(new FunctionQuery(new SumFloatFunction(new ValueSource[] { - new ConstValueSource(1f), new ConstValueSource(2f)})), - new float[] { 3f, 3f }); + ValueSource vs = new SumFloatFunction(new ValueSource[] { + new ConstValueSource(1f), new ConstValueSource(2f)}); + assertHits(new FunctionQuery(vs), new float[] { 3f, 3f }); + assertAllExist(vs); + + vs = new SumFloatFunction(new ValueSource[] { + BOGUS_FLOAT_VS, new ConstValueSource(2f)}); + assertNoneExist(vs); } public void testSumTotalTermFreq() throws Exception { - assertHits(new FunctionQuery(new SumTotalTermFreqValueSource("text")), - new float[] { 8f, 8f }); + ValueSource vs = new SumTotalTermFreqValueSource("text"); + assertHits(new FunctionQuery(vs), new float[] { 8f, 8f }); + assertAllExist(vs); } public void testTermFreq() throws Exception { - assertHits(new FunctionQuery( - new TermFreqValueSource("bogus", "bogus", "text", new BytesRef("test"))), - new float[] { 3f, 1f }); - assertHits(new FunctionQuery( - new TermFreqValueSource("bogus", "bogus", "string", new BytesRef("bar"))), - new float[] { 0f, 1f }); + ValueSource vs = new TermFreqValueSource("bogus", "bogus", "text", new BytesRef("test")); + assertHits(new FunctionQuery(vs), new float[] { 3f, 1f }); + assertAllExist(vs); + + vs = new TermFreqValueSource("bogus", "bogus", "string", new BytesRef("bar")); + assertHits(new FunctionQuery(vs), new float[] { 0f, 1f }); + assertAllExist(vs); + + // regardless of wether norms exist, value source exists == 0 + vs = new TermFreqValueSource("bogus", "bogus", "bogus", new BytesRef("bogus")); + assertHits(new FunctionQuery(vs), new float[] { 0F, 0F }); + assertAllExist(vs); } public void testTF() throws Exception { @@ -338,23 +497,71 @@ public class TestValueSources extends LuceneTestCase { try { // no norm field (so agnostic to indexed similarity) searcher.setSimilarity(new DefaultSimilarity()); - assertHits(new FunctionQuery( - new TFValueSource("bogus", "bogus", "text", new BytesRef("test"))), - new float[] { (float)Math.sqrt(3d), (float)Math.sqrt(1d) }); - assertHits(new FunctionQuery( - new TFValueSource("bogus", "bogus", "string", new BytesRef("bar"))), - new float[] { 0f, 1f }); + + ValueSource vs = new TFValueSource("bogus", "bogus", "text", new BytesRef("test")); + assertHits(new FunctionQuery(vs), + new float[] { (float)Math.sqrt(3d), (float)Math.sqrt(1d) }); + assertAllExist(vs); + + vs = new TFValueSource("bogus", "bogus", "string", new BytesRef("bar")); + assertHits(new FunctionQuery(vs), new float[] { 0f, 1f }); + assertAllExist(vs); + + // regardless of wether norms exist, value source exists == 0 + vs = new TFValueSource("bogus", "bogus", "bogus", new BytesRef("bogus")); + assertHits(new FunctionQuery(vs), new float[] { 0F, 0F }); + assertAllExist(vs); + } finally { searcher.setSimilarity(saved); } } public void testTotalTermFreq() throws Exception { - assertHits(new FunctionQuery( - new TotalTermFreqValueSource("bogus", "bogus", "text", new BytesRef("test"))), - new float[] { 4f, 4f }); + ValueSource vs = new TotalTermFreqValueSource("bogus", "bogus", "text", new BytesRef("test")); + assertHits(new FunctionQuery(vs), new float[] { 4f, 4f }); + assertAllExist(vs); } + /** + * Asserts that for every doc, the {@link FunctionValues#exists} value + * from the {@link ValueSource} is true. + */ + void assertAllExist(ValueSource vs) { + assertExists(ALL_EXIST_VS, vs); + } + /** + * Asserts that for every doc, the {@link FunctionValues#exists} value + * from the {@link ValueSource} is false. + */ + void assertNoneExist(ValueSource vs) { + assertExists(NONE_EXIST_VS, vs); + } + /** + * Asserts that for every doc, the {@link FunctionValues#exists} value from the + * actual {@link ValueSource} matches the {@link FunctionValues#exists} + * value from the expected {@link ValueSource} + */ + void assertExists(ValueSource expected, ValueSource actual) { + Map context = ValueSource.newContext(searcher); + try { + expected.createWeight(context, searcher); + actual.createWeight(context, searcher); + + for (org.apache.lucene.index.LeafReaderContext leaf : reader.leaves()) { + final FunctionValues expectedVals = expected.getValues(context, leaf); + final FunctionValues actualVals = actual.getValues(context, leaf); + + String msg = expected.toString() + " ?= " + actual.toString() + " -> "; + for (int i = 0; i < leaf.reader().maxDoc(); ++i) { + assertEquals(msg + i, expectedVals.exists(i), actualVals.exists(i)); + } + } + } catch (IOException e) { + throw new RuntimeException(actual.toString(), e); + } + } + void assertHits(Query q, float scores[]) throws Exception { ScoreDoc expected[] = new ScoreDoc[scores.length]; int expectedDocs[] = new int[scores.length]; @@ -368,4 +575,54 @@ public class TestValueSources extends LuceneTestCase { CheckHits.checkHitsQuery(q, expected, docs.scoreDocs, expectedDocs); CheckHits.checkExplanations(q, "", searcher); } + + /** + * Simple test value source that implements {@link FunctionValues#exists} as a constant + * @see #ALL_EXIST_VS + * @see #NONE_EXIST_VS + */ + private static final class ExistsValueSource extends ValueSource { + + final boolean expected; + final int value; + + public ExistsValueSource(boolean expected) { + this.expected = expected; + this.value = expected ? 1 : 0; + } + + @Override + public boolean equals(Object o) { + return o == this; + } + + @Override + public int hashCode() { + return value; + } + + @Override + public String description() { + return expected ? "ALL_EXIST" : "NONE_EXIST"; + } + + @Override + public FunctionValues getValues(Map context, LeafReaderContext readerContext) { + return new FloatDocValues(this) { + @Override + public float floatVal(int doc) { + return (float)value; + } + @Override + public boolean exists(int doc) { + return expected; + } + }; + } + }; + + /** @see ExistsValueSource */ + private static final ValueSource ALL_EXIST_VS = new ExistsValueSource(true); + /** @see ExistsValueSource */ + private static final ValueSource NONE_EXIST_VS = new ExistsValueSource(false); } diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index bfb1c0707e7..e7bf18ce0ae 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -94,6 +94,12 @@ Upgrading from Solr 4.x If you need the old functionality back , please add an extra parameter f=/** example: /update/json/docs?f=/** +* Bugs fixed in several ValueSource functions may result in different behavior in + situations where some documents do not have values for fields wrapped in other value + sources. Users who want to preserve the previous behavior may need to wrap fields + in the "def()" function. Example: changing "fl=sum(fieldA,fieldB)" to + "fl=sum(def(fieldA,0.0),def(fieldB,0.0))". See LUCENE-5961 for more details. + Detailed Change List ---------------------- diff --git a/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java index 0c601e07238..0381a3c1c9e 100644 --- a/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java +++ b/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java @@ -469,10 +469,9 @@ public class StatsComponentTest extends AbstractSolrTestCase { "{!type=func key="+key+" ex=key_ex_tag}field("+f+")", "{!type=func key="+key+" ex=key_ex_tag v=field("+f+")}", "{!type=func key="+key+" ex=key_ex_tag v='field("+f+")'}", - - // identity math functions don't work as expected due to LUCENE-5961 - // "{!type=func key="+key+" ex=key_ex_tag v='sum(0,"+f+")'}", - // "{!type=func key="+key+" ex=key_ex_tag v='product(1,"+f+")'}", + // identity math functions + "{!type=func key="+key+" ex=key_ex_tag v='sum(0,"+f+")'}", + "{!type=func key="+key+" ex=key_ex_tag v='product(1,"+f+")'}", }) { assertQ("test statistics over field specified as a function: " + param, @@ -555,10 +554,9 @@ public class StatsComponentTest extends AbstractSolrTestCase { "{!type=func key="+f+" ex=key_ex_tag}field("+f+")", "{!type=func key="+f+" ex=key_ex_tag v=field("+f+")}", "{!type=func key="+f+" ex=key_ex_tag v='field("+f+")'}", - - // identity math functions don't work as expected due to LUCENE-5961 - // "{!type=func key="+f+" ex=key_ex_tag v='sum(0,"+f+")'}", - // "{!type=func key="+f+" ex=key_ex_tag v='product(1,"+f+")'}", + // identity math functions + "{!type=func key="+f+" ex=key_ex_tag v='sum(0,"+f+")'}", + "{!type=func key="+f+" ex=key_ex_tag v='product(1,"+f+")'}", }) { assertQ("test statis & stats.facet over field specified as a function: " + param, req("q", "*:*", "stats", "true", "stats.calcdistinct", "true", diff --git a/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java b/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java index 9755390e22c..cfe0d07aa37 100644 --- a/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java +++ b/solr/core/src/test/org/apache/solr/search/function/TestFunctionQuery.java @@ -731,12 +731,17 @@ public class TestFunctionQuery extends SolrTestCaseJ4 { @Test public void testPseudoFieldFunctions() throws Exception { - assertU(adoc("id", "1", "text", "hello", "foo_s","A")); + assertU(adoc("id", "1", "text", "hello", "foo_s","A", "yak_i", "32")); assertU(adoc("id", "2")); assertU(commit()); - assertJQ(req("q", "id:1", "fl", "a:1,b:2.0,c:'X',d:{!func}foo_s,e:{!func}bar_s") // if exists() is false, no pseudo-field should be added - , "/response/docs/[0]=={'a':1, 'b':2.0,'c':'X','d':'A'}"); + // if exists() is false, no pseudo-field should be added + assertJQ(req("q", "id:1", "fl", "a:1,b:2.0,c:'X',d:{!func}foo_s,e:{!func}bar_s") + , "/response/docs/[0]=={'a':1, 'b':2.0,'c':'X','d':'A'}"); + assertJQ(req("q", "id:1", "fl", "a:sum(yak_i,bog_i),b:mul(yak_i,bog_i),c:min(yak_i,bog_i)") + , "/response/docs/[0]=={ 'c':32.0 }"); + assertJQ(req("q", "id:1", "fl", "a:sum(yak_i,def(bog_i,42)), b:max(yak_i,bog_i)") + , "/response/docs/[0]=={ 'a': 74.0, 'b':32.0 }"); } public void testMissingFieldFunctionBehavior() throws Exception {