LUCENE-5961: Fix the exists() method for FunctionValues returned by many ValueSoures to behave properly when wrapping other ValueSources which do not exist for the specified document

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1632414 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2014-10-16 19:05:20 +00:00
parent 6697abbb28
commit d8146460f0
15 changed files with 490 additions and 130 deletions

View File

@ -184,6 +184,10 @@ Bug Fixes
* LUCENE-5980: Don't let document length overflow. (Robert Muir) * LUCENE-5980: Don't let document length overflow. (Robert Muir)
* LUCENE-5961: Fix the exists() method for FunctionValues returned by many ValueSoures to
behave properly when wrapping other ValueSources which do not exist for the specified document
(hossman)
Documentation Documentation
* LUCENE-5392: Add/improve analysis package documentation to reflect * LUCENE-5392: Add/improve analysis package documentation to reflect

View File

@ -6,3 +6,11 @@ The API of oal.document was restructured to differentiate between stored
documents and indexed documents. IndexReader.document(int) now returns documents and indexed documents. IndexReader.document(int) now returns
StoredDocument instead of Document. In most cases a simple replacement StoredDocument instead of Document. In most cases a simple replacement
of the return type is enough to upgrade. of the return type is enough to upgrade.
## FunctionValues.exist() Behavior Changes due to ValueSource bug fixes (LUCENE-5961)
Bugs fixed in several ValueSource functions may result in different behavior in
situations where some documents do not have values for fields wrapped in other
ValueSources. Users who want to preserve the previous behavior may need to wrap
their ValueSources in a "DefFunction" along with a ConstValueSource of "0.0".

View File

@ -60,7 +60,14 @@ public abstract class DualFloatFunction extends ValueSource {
public float floatVal(int doc) { public float floatVal(int doc) {
return func(doc, aVals, bVals); return func(doc, aVals, bVals);
} }
/**
* True if and only if <em>all</em> of the wrapped {@link FunctionValues}
* <code>exists</code> for the specified doc
*/
@Override
public boolean exists(int doc) {
return MultiFunction.allExists(doc, aVals, bVals);
}
@Override @Override
public String toString(int doc) { public String toString(int doc) {
return name() + '(' + aVals.toString(doc) + ',' + bVals.toString(doc) + ')'; return name() + '(' + aVals.toString(doc) + ',' + bVals.toString(doc) + ')';

View File

@ -102,7 +102,7 @@ public class IfFunction extends BoolFunction {
@Override @Override
public boolean exists(int doc) { public boolean exists(int doc) {
return true; // TODO: flow through to any sub-sources? return ifVals.boolVal(doc) ? trueVals.exists(doc) : falseVals.exists(doc);
} }
@Override @Override

View File

@ -59,6 +59,10 @@ public class LinearFloatFunction extends ValueSource {
return vals.floatVal(doc) * slope + intercept; return vals.floatVal(doc) * slope + intercept;
} }
@Override @Override
public boolean exists(int doc) {
return vals.exists(doc);
}
@Override
public String toString(int doc) { public String toString(int doc) {
return slope + "*float(" + vals.toString(doc) + ")+" + intercept; return slope + "*float(" + vals.toString(doc) + ")+" + intercept;
} }

View File

@ -35,11 +35,25 @@ public class MaxFloatFunction extends MultiFloatFunction {
@Override @Override
protected float func(int doc, FunctionValues[] valsArr) { protected float func(int doc, FunctionValues[] valsArr) {
if (valsArr.length == 0) return 0.0f; if ( ! exists(doc, valsArr) ) return 0.0f;
float val = Float.NEGATIVE_INFINITY; float val = Float.NEGATIVE_INFINITY;
for (FunctionValues vals : valsArr) { for (FunctionValues vals : valsArr) {
val = Math.max(vals.floatVal(doc), val); if (vals.exists(doc)) {
val = Math.max(vals.floatVal(doc), val);
}
} }
return val; return val;
} }
/**
* True if <em>any</em> of the specified <code>values</code>
* {@link FunctionValues#exists} for the specified doc, else false.
*
* @see MultiFunction#anyExists
*/
@Override
protected boolean exists(int doc, FunctionValues[] valsArr) {
return MultiFunction.anyExists(doc, valsArr);
}
} }

View File

@ -35,11 +35,26 @@ public class MinFloatFunction extends MultiFloatFunction {
@Override @Override
protected float func(int doc, FunctionValues[] valsArr) { protected float func(int doc, FunctionValues[] valsArr) {
if (valsArr.length == 0) return 0.0f; if ( ! exists(doc, valsArr) ) return 0.0f;
float val = Float.POSITIVE_INFINITY; float val = Float.POSITIVE_INFINITY;
for (FunctionValues vals : valsArr) { for (FunctionValues vals : valsArr) {
val = Math.min(vals.floatVal(doc), val); if (vals.exists(doc)) {
val = Math.min(vals.floatVal(doc), val);
}
} }
return val; return val;
} }
/**
* True if <em>any</em> of the specified <code>values</code>
* {@link FunctionValues#exists} for the specified doc, else false.
*
* @see MultiFunction#anyExists
*/
@Override
protected boolean exists(int doc, FunctionValues[] valsArr) {
return MultiFunction.anyExists(doc, valsArr);
}
} }

View File

@ -40,6 +40,18 @@ public abstract class MultiFloatFunction extends ValueSource {
abstract protected String name(); abstract protected String name();
abstract protected float func(int doc, FunctionValues[] valsArr); abstract protected float func(int doc, FunctionValues[] valsArr);
/**
* Called by {@link FunctionValues#exists} for each document.
*
* Default impl returns true if <em>all</em> of the specified <code>values</code>
* {@link FunctionValues#exists} for the specified doc, else false.
*
* @see FunctionValues#exists
* @see MultiFunction#allExists
*/
protected boolean exists(int doc, FunctionValues[] valsArr) {
return MultiFunction.allExists(doc, valsArr);
}
@Override @Override
public String description() { public String description() {
@ -70,21 +82,12 @@ public abstract class MultiFloatFunction extends ValueSource {
public float floatVal(int doc) { public float floatVal(int doc) {
return func(doc, valsArr); return func(doc, valsArr);
} }
@Override public boolean exists(int doc) {
return MultiFloatFunction.this.exists(doc, valsArr);
}
@Override
public String toString(int doc) { public String toString(int doc) {
StringBuilder sb = new StringBuilder(); return MultiFunction.toString(name(), valsArr, doc);
sb.append(name()).append('(');
boolean firstTime=true;
for (FunctionValues vals : valsArr) {
if (firstTime) {
firstTime=false;
} else {
sb.append(',');
}
sb.append(vals.toString(doc));
}
sb.append(')');
return sb.toString();
} }
}; };
} }

View File

@ -43,6 +43,35 @@ public abstract class MultiFunction extends ValueSource {
return description(name(), sources); return description(name(), sources);
} }
/**
* Helper utility for {@link FunctionValues} wrapping multiple {@link FunctionValues}
*
* @return true if <em>all</em> of the specified <code>values</code>
* {@link FunctionValues#exists} for the specified doc, else false.
*/
public static boolean allExists(int doc, FunctionValues... values) {
for (FunctionValues v : values) {
if ( ! v.exists(doc) ) {
return false;
}
}
return true;
}
/**
* Helper utility for {@link FunctionValues} wrapping multiple {@link FunctionValues}
*
* @return true if <em>any</em> of the specified <code>values</code>
* {@link FunctionValues#exists} for the specified doc, else false.
*/
public static boolean anyExists(int doc, FunctionValues... values) {
for (FunctionValues v : values) {
if ( v.exists(doc) ) {
return true;
}
}
return false;
}
public static String description(String name, List<ValueSource> sources) { public static String description(String name, List<ValueSource> sources) {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
sb.append(name).append('('); sb.append(name).append('(');

View File

@ -69,6 +69,10 @@ public class ReciprocalFloatFunction extends ValueSource {
return a/(m*vals.floatVal(doc) + b); return a/(m*vals.floatVal(doc) + b);
} }
@Override @Override
public boolean exists(int doc) {
return vals.exists(doc);
}
@Override
public String toString(int doc) { public String toString(int doc) {
return Float.toString(a) + "/(" return Float.toString(a) + "/("
+ m + "*float(" + vals.toString(doc) + ')' + m + "*float(" + vals.toString(doc) + ')'

View File

@ -70,20 +70,22 @@ public class ScaleFloatFunction extends ValueSource {
int maxDoc = leaf.reader().maxDoc(); int maxDoc = leaf.reader().maxDoc();
FunctionValues vals = source.getValues(context, leaf); FunctionValues vals = source.getValues(context, leaf);
for (int i=0; i<maxDoc; i++) { for (int i=0; i<maxDoc; i++) {
if ( ! vals.exists(i) ) {
float val = vals.floatVal(i); continue;
if ((Float.floatToRawIntBits(val) & (0xff<<23)) == 0xff<<23) { }
// if the exponent in the float is all ones, then this is +Inf, -Inf or NaN float val = vals.floatVal(i);
// which don't make sense to factor into the scale function if ((Float.floatToRawIntBits(val) & (0xff<<23)) == 0xff<<23) {
continue; // if the exponent in the float is all ones, then this is +Inf, -Inf or NaN
// which don't make sense to factor into the scale function
continue;
}
if (val < minVal) {
minVal = val;
}
if (val > maxVal) {
maxVal = val;
}
} }
if (val < minVal) {
minVal = val;
}
if (val > maxVal) {
maxVal = val;
}
}
} }
if (minVal == Float.POSITIVE_INFINITY) { if (minVal == Float.POSITIVE_INFINITY) {
@ -113,6 +115,10 @@ public class ScaleFloatFunction extends ValueSource {
final FunctionValues vals = source.getValues(context, readerContext); final FunctionValues vals = source.getValues(context, readerContext);
return new FloatDocValues(this) { return new FloatDocValues(this) {
@Override
public boolean exists(int doc) {
return vals.exists(doc);
}
@Override @Override
public float floatVal(int doc) { public float floatVal(int doc) {
return (vals.floatVal(doc) - minSource) * scale + min; return (vals.floatVal(doc) - minSource) * scale + min;

View File

@ -19,6 +19,8 @@ package org.apache.lucene.queries.function;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Map;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -31,9 +33,12 @@ import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField; import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.queries.function.docvalues.FloatDocValues;
import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource; import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource;
import org.apache.lucene.queries.function.valuesource.ConstValueSource; import org.apache.lucene.queries.function.valuesource.ConstValueSource;
import org.apache.lucene.queries.function.valuesource.DivFloatFunction; import org.apache.lucene.queries.function.valuesource.DivFloatFunction;
@ -51,6 +56,7 @@ import org.apache.lucene.queries.function.valuesource.LongFieldSource;
import org.apache.lucene.queries.function.valuesource.MaxDocValueSource; import org.apache.lucene.queries.function.valuesource.MaxDocValueSource;
import org.apache.lucene.queries.function.valuesource.MaxFloatFunction; import org.apache.lucene.queries.function.valuesource.MaxFloatFunction;
import org.apache.lucene.queries.function.valuesource.MinFloatFunction; import org.apache.lucene.queries.function.valuesource.MinFloatFunction;
import org.apache.lucene.queries.function.valuesource.MultiFloatFunction;
import org.apache.lucene.queries.function.valuesource.NormValueSource; import org.apache.lucene.queries.function.valuesource.NormValueSource;
import org.apache.lucene.queries.function.valuesource.NumDocsValueSource; import org.apache.lucene.queries.function.valuesource.NumDocsValueSource;
import org.apache.lucene.queries.function.valuesource.PowFloatFunction; import org.apache.lucene.queries.function.valuesource.PowFloatFunction;
@ -71,6 +77,7 @@ import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
@ -88,6 +95,11 @@ public class TestValueSources extends LuceneTestCase {
static IndexReader reader; static IndexReader reader;
static IndexSearcher searcher; static IndexSearcher searcher;
static final ValueSource BOGUS_FLOAT_VS = new FloatFieldSource("bogus_field");
static final ValueSource BOGUS_DOUBLE_VS = new DoubleFieldSource("bogus_field");
static final ValueSource BOGUS_INT_VS = new IntFieldSource("bogus_field");
static final ValueSource BOGUS_LONG_VS = new LongFieldSource("bogus_field");
static final List<String[]> documents = Arrays.asList(new String[][] { static final List<String[]> documents = Arrays.asList(new String[][] {
/* id, double, float, int, long, string, text */ /* id, double, float, int, long, string, text */
new String[] { "0", "3.63", "5.2", "35", "4343", "test", "this is a test test test" }, new String[] { "0", "3.63", "5.2", "35", "4343", "test", "this is a test test test" },
@ -160,100 +172,165 @@ public class TestValueSources extends LuceneTestCase {
} }
public void testConst() throws Exception { public void testConst() throws Exception {
assertHits(new FunctionQuery(new ConstValueSource(0.3f)), ValueSource vs = new ConstValueSource(0.3f);
new float[] { 0.3f, 0.3f }); assertHits(new FunctionQuery(vs),
new float[] { 0.3f, 0.3f });
assertAllExist(vs);
} }
public void testDiv() throws Exception { public void testDiv() throws Exception {
assertHits(new FunctionQuery(new DivFloatFunction( ValueSource vs = new DivFloatFunction(new ConstValueSource(10f), new ConstValueSource(5f));
new ConstValueSource(10f), new ConstValueSource(5f))), assertHits(new FunctionQuery(vs),
new float[] { 2f, 2f }); new float[] { 2f, 2f });
assertAllExist(vs);
vs = new DivFloatFunction(new ConstValueSource(10f), BOGUS_FLOAT_VS);
assertNoneExist(vs);
vs = new DivFloatFunction(BOGUS_FLOAT_VS, new ConstValueSource(10f));
assertNoneExist(vs);
} }
public void testDocFreq() throws Exception { public void testDocFreq() throws Exception {
assertHits(new FunctionQuery( ValueSource vs = new DocFreqValueSource("bogus", "bogus", "text", new BytesRef("test"));
new DocFreqValueSource("bogus", "bogus", "text", new BytesRef("test"))), assertHits(new FunctionQuery(vs), new float[] { 2f, 2f });
new float[] { 2f, 2f }); assertAllExist(vs);
} }
public void testDoubleConst() throws Exception { public void testDoubleConst() throws Exception {
assertHits(new FunctionQuery(new DoubleConstValueSource(0.3d)), ValueSource vs = new DoubleConstValueSource(0.3d);
new float[] { 0.3f, 0.3f }); assertHits(new FunctionQuery(vs), new float[] { 0.3f, 0.3f });
assertAllExist(vs);
} }
public void testDouble() throws Exception { public void testDouble() throws Exception {
assertHits(new FunctionQuery(new DoubleFieldSource("double")), ValueSource vs = new DoubleFieldSource("double");
new float[] { 3.63f, 5.65f }); assertHits(new FunctionQuery(vs), new float[] { 3.63f, 5.65f });
assertAllExist(vs);
assertNoneExist(BOGUS_DOUBLE_VS);
} }
public void testFloat() throws Exception { public void testFloat() throws Exception {
assertHits(new FunctionQuery(new FloatFieldSource("float")), ValueSource vs = new FloatFieldSource("float");
new float[] { 5.2f, 9.3f }); assertHits(new FunctionQuery(vs), new float[] { 5.2f, 9.3f });
assertAllExist(vs);
assertNoneExist(BOGUS_FLOAT_VS);
} }
public void testIDF() throws Exception { public void testIDF() throws Exception {
Similarity saved = searcher.getSimilarity(); Similarity saved = searcher.getSimilarity();
try { try {
searcher.setSimilarity(new DefaultSimilarity()); searcher.setSimilarity(new DefaultSimilarity());
assertHits(new FunctionQuery( ValueSource vs = new IDFValueSource("bogus", "bogus", "text", new BytesRef("test"));
new IDFValueSource("bogus", "bogus", "text", new BytesRef("test"))), assertHits(new FunctionQuery(vs), new float[] { 0.5945349f, 0.5945349f });
new float[] { 0.5945349f, 0.5945349f }); assertAllExist(vs);
} finally { } finally {
searcher.setSimilarity(saved); searcher.setSimilarity(saved);
} }
} }
public void testIf() throws Exception { public void testIf() throws Exception {
assertHits(new FunctionQuery(new IfFunction( ValueSource vs = new IfFunction(new BytesRefFieldSource("id"),
new BytesRefFieldSource("id"), new ConstValueSource(1.0f), // match
new ConstValueSource(1.0f), new ConstValueSource(2.0f));
new ConstValueSource(2.0f) assertHits(new FunctionQuery(vs), new float[] { 1f, 1f });
)), assertAllExist(vs);
new float[] { 1f, 1f });
// true just if a value exists... // true just if a test value exists...
assertHits(new FunctionQuery(new IfFunction( vs = new IfFunction(new LiteralValueSource("false"),
new LiteralValueSource("false"), new ConstValueSource(1.0f), // match
new ConstValueSource(1.0f), new ConstValueSource(2.0f));
new ConstValueSource(2.0f) assertHits(new FunctionQuery(vs), new float[] { 1f, 1f });
)), assertAllExist(vs);
new float[] { 1f, 1f });
// false value if tests value does not exist
vs = new IfFunction(BOGUS_FLOAT_VS,
new ConstValueSource(1.0f),
new ConstValueSource(2.0f)); // match
assertHits(new FunctionQuery(vs), new float[] { 2F, 2F });
assertAllExist(vs);
// final value may still not exist
vs = new IfFunction(new BytesRefFieldSource("id"),
BOGUS_FLOAT_VS, // match
new ConstValueSource(1.0f));
assertNoneExist(vs);
} }
public void testInt() throws Exception { public void testInt() throws Exception {
assertHits(new FunctionQuery(new IntFieldSource("int")), ValueSource vs = new IntFieldSource("int");
new float[] { 35f, 54f }); assertHits(new FunctionQuery(vs), new float[] { 35f, 54f });
assertAllExist(vs);
assertNoneExist(BOGUS_INT_VS);
} }
public void testJoinDocFreq() throws Exception { public void testJoinDocFreq() throws Exception {
assertHits(new FunctionQuery(new JoinDocFreqValueSource("string", "text")), assertHits(new FunctionQuery(new JoinDocFreqValueSource("string", "text")),
new float[] { 2f, 0f }); new float[] { 2f, 0f });
// TODO: what *should* the rules be for exist() ?
} }
public void testLinearFloat() throws Exception { public void testLinearFloat() throws Exception {
assertHits(new FunctionQuery(new LinearFloatFunction(new ConstValueSource(2.0f), 3, 1)), ValueSource vs = new LinearFloatFunction(new ConstValueSource(2.0f), 3, 1);
new float[] { 7f, 7f }); assertHits(new FunctionQuery(vs), new float[] { 7f, 7f });
assertAllExist(vs);
vs = new LinearFloatFunction(BOGUS_FLOAT_VS, 3, 1);
assertNoneExist(vs);
} }
public void testLong() throws Exception { public void testLong() throws Exception {
assertHits(new FunctionQuery(new LongFieldSource("long")), ValueSource vs = new LongFieldSource("long");
new float[] { 4343f, 1954f }); assertHits(new FunctionQuery(vs), new float[] { 4343f, 1954f });
assertAllExist(vs);
assertNoneExist(BOGUS_LONG_VS);
} }
public void testMaxDoc() throws Exception { public void testMaxDoc() throws Exception {
assertHits(new FunctionQuery(new MaxDocValueSource()), ValueSource vs = new MaxDocValueSource();
new float[] { 2f, 2f }); assertHits(new FunctionQuery(vs), new float[] { 2f, 2f });
assertAllExist(vs);
} }
public void testMaxFloat() throws Exception { public void testMaxFloat() throws Exception {
assertHits(new FunctionQuery(new MaxFloatFunction(new ValueSource[] { ValueSource vs = new MaxFloatFunction(new ValueSource[] {
new ConstValueSource(1f), new ConstValueSource(2f)})), new ConstValueSource(1f), new ConstValueSource(2f)});
new float[] { 2f, 2f });
assertHits(new FunctionQuery(vs), new float[] { 2f, 2f });
assertAllExist(vs);
// as long as one value exists, then max exists
vs = new MaxFloatFunction(new ValueSource[] {
BOGUS_FLOAT_VS, new ConstValueSource(2F)});
assertAllExist(vs);
vs = new MaxFloatFunction(new ValueSource[] {
BOGUS_FLOAT_VS, new ConstValueSource(2F), BOGUS_DOUBLE_VS});
assertAllExist(vs);
// if none exist, then max doesn't exist
vs = new MaxFloatFunction(new ValueSource[] {
BOGUS_FLOAT_VS, BOGUS_INT_VS, BOGUS_DOUBLE_VS});
assertNoneExist(vs);
} }
public void testMinFloat() throws Exception { public void testMinFloat() throws Exception {
assertHits(new FunctionQuery(new MinFloatFunction(new ValueSource[] { ValueSource vs = new MinFloatFunction(new ValueSource[] {
new ConstValueSource(1f), new ConstValueSource(2f)})), new ConstValueSource(1f), new ConstValueSource(2f)});
new float[] { 1f, 1f });
assertHits(new FunctionQuery(vs), new float[] { 1f, 1f });
assertAllExist(vs);
// as long as one value exists, then min exists
vs = new MinFloatFunction(new ValueSource[] {
BOGUS_FLOAT_VS, new ConstValueSource(2F)});
assertHits(new FunctionQuery(vs), new float[] { 2F, 2F });
assertAllExist(vs);
vs = new MinFloatFunction(new ValueSource[] {
BOGUS_FLOAT_VS, new ConstValueSource(2F), BOGUS_DOUBLE_VS});
assertAllExist(vs);
// if none exist, then min doesn't exist
vs = new MinFloatFunction(new ValueSource[] {
BOGUS_FLOAT_VS, BOGUS_INT_VS, BOGUS_DOUBLE_VS});
assertNoneExist(vs);
} }
public void testNorm() throws Exception { public void testNorm() throws Exception {
@ -261,35 +338,95 @@ public class TestValueSources extends LuceneTestCase {
try { try {
// no norm field (so agnostic to indexed similarity) // no norm field (so agnostic to indexed similarity)
searcher.setSimilarity(new DefaultSimilarity()); searcher.setSimilarity(new DefaultSimilarity());
assertHits(new FunctionQuery( ValueSource vs = new NormValueSource("byte");
new NormValueSource("byte")), assertHits(new FunctionQuery(vs), new float[] { 0f, 0f });
new float[] { 0f, 0f });
// regardless of wether norms exist, value source exists == 0
assertAllExist(vs);
vs = new NormValueSource("text");
assertAllExist(vs);
} finally { } finally {
searcher.setSimilarity(saved); searcher.setSimilarity(saved);
} }
} }
public void testNumDocs() throws Exception { public void testNumDocs() throws Exception {
assertHits(new FunctionQuery(new NumDocsValueSource()), ValueSource vs = new NumDocsValueSource();
new float[] { 2f, 2f }); assertHits(new FunctionQuery(vs), new float[] { 2f, 2f });
assertAllExist(vs);
} }
public void testPow() throws Exception { public void testPow() throws Exception {
assertHits(new FunctionQuery(new PowFloatFunction( ValueSource vs = new PowFloatFunction(new ConstValueSource(2f), new ConstValueSource(3f));
new ConstValueSource(2f), new ConstValueSource(3f))), assertHits(new FunctionQuery(vs), new float[] { 8f, 8f });
new float[] { 8f, 8f }); assertAllExist(vs);
vs = new PowFloatFunction(BOGUS_FLOAT_VS, new ConstValueSource(3f));
assertNoneExist(vs);
vs = new PowFloatFunction(new ConstValueSource(3f), BOGUS_FLOAT_VS);
assertNoneExist(vs);
} }
public void testProduct() throws Exception { public void testProduct() throws Exception {
assertHits(new FunctionQuery(new ProductFloatFunction(new ValueSource[] { ValueSource vs = new ProductFloatFunction(new ValueSource[] {
new ConstValueSource(2f), new ConstValueSource(3f)})), new ConstValueSource(2f), new ConstValueSource(3f)});
new float[] { 6f, 6f }); assertHits(new FunctionQuery(vs), new float[] { 6f, 6f });
assertAllExist(vs);
vs = new ProductFloatFunction(new ValueSource[] {
BOGUS_FLOAT_VS, new ConstValueSource(3f)});
assertNoneExist(vs);
}
public void testQueryWrapedFuncWrapedQuery() throws Exception {
ValueSource vs = new QueryValueSource(new FunctionQuery(new ConstValueSource(2f)), 0f);
assertHits(new FunctionQuery(vs), new float[] { 2f, 2f });
assertAllExist(vs);
} }
public void testQuery() throws Exception { public void testQuery() throws Exception {
assertHits(new FunctionQuery(new QueryValueSource( Similarity saved = searcher.getSimilarity();
new FunctionQuery(new ConstValueSource(2f)), 0f)),
new float[] { 2f, 2f }); try {
searcher.setSimilarity(new DefaultSimilarity());
ValueSource vs = new QueryValueSource(new TermQuery(new Term("string","bar")), 42F);
assertHits(new FunctionQuery(vs), new float[] { 42F, 1F });
// valuesource should exist only for things matching the term query
// sanity check via quick & dirty wrapper arround tf
ValueSource expected = new MultiFloatFunction(new ValueSource[] {
new TFValueSource("bogus", "bogus", "string", new BytesRef("bar"))}) {
@Override
protected String name() { return "tf_based_exists"; }
@Override
protected float func(int doc, FunctionValues[] valsArr) {
return valsArr[0].floatVal(doc);
}
@Override
protected boolean exists(int doc, FunctionValues[] valsArr) {
// if tf > 0, then it should exist
return 0 < func(doc, valsArr);
}
};
assertExists(expected, vs);
// Query matches all docs, func exists for all docs
vs = new QueryValueSource(new TermQuery(new Term("text","test")), 0F);
assertAllExist(vs);
// Query matches no docs, func exists for no docs
vs = new QueryValueSource(new TermQuery(new Term("bogus","does not exist")), 0F);
assertNoneExist(vs);
} finally {
searcher.setSimilarity(saved);
}
} }
public void testRangeMap() throws Exception { public void testRangeMap() throws Exception {
@ -300,37 +437,59 @@ public class TestValueSources extends LuceneTestCase {
5, 6, new SumFloatFunction(new ValueSource[] {new ConstValueSource(1f), new ConstValueSource(2f)}), 5, 6, new SumFloatFunction(new ValueSource[] {new ConstValueSource(1f), new ConstValueSource(2f)}),
new ConstValueSource(11f))), new ConstValueSource(11f))),
new float[] { 3f, 11f }); new float[] { 3f, 11f });
// TODO: what *should* the rules be for exist() ?
// ((source exists && source in range && target exists) OR (source not in range && default exists)) ?
} }
public void testReciprocal() throws Exception { public void testReciprocal() throws Exception {
assertHits(new FunctionQuery(new ReciprocalFloatFunction(new ConstValueSource(2f), ValueSource vs = new ReciprocalFloatFunction(new ConstValueSource(2f), 3, 1, 4);
3, 1, 4)), assertHits(new FunctionQuery(vs), new float[] { 0.1f, 0.1f });
new float[] { 0.1f, 0.1f }); assertAllExist(vs);
vs = new ReciprocalFloatFunction(BOGUS_FLOAT_VS, 3, 1, 4);
assertNoneExist(vs);
} }
public void testScale() throws Exception { public void testScale() throws Exception {
assertHits(new FunctionQuery(new ScaleFloatFunction(new IntFieldSource("int"), 0, 1)), ValueSource vs = new ScaleFloatFunction(new IntFieldSource("int"), 0, 1);
new float[] { 0.0f, 1.0f }); assertHits(new FunctionQuery(vs), new float[] { 0.0f, 1.0f });
assertAllExist(vs);
vs = new ScaleFloatFunction(BOGUS_INT_VS, 0, 1);
assertNoneExist(vs);
} }
public void testSumFloat() throws Exception { public void testSumFloat() throws Exception {
assertHits(new FunctionQuery(new SumFloatFunction(new ValueSource[] { ValueSource vs = new SumFloatFunction(new ValueSource[] {
new ConstValueSource(1f), new ConstValueSource(2f)})), new ConstValueSource(1f), new ConstValueSource(2f)});
new float[] { 3f, 3f }); assertHits(new FunctionQuery(vs), new float[] { 3f, 3f });
assertAllExist(vs);
vs = new SumFloatFunction(new ValueSource[] {
BOGUS_FLOAT_VS, new ConstValueSource(2f)});
assertNoneExist(vs);
} }
public void testSumTotalTermFreq() throws Exception { public void testSumTotalTermFreq() throws Exception {
assertHits(new FunctionQuery(new SumTotalTermFreqValueSource("text")), ValueSource vs = new SumTotalTermFreqValueSource("text");
new float[] { 8f, 8f }); assertHits(new FunctionQuery(vs), new float[] { 8f, 8f });
assertAllExist(vs);
} }
public void testTermFreq() throws Exception { public void testTermFreq() throws Exception {
assertHits(new FunctionQuery( ValueSource vs = new TermFreqValueSource("bogus", "bogus", "text", new BytesRef("test"));
new TermFreqValueSource("bogus", "bogus", "text", new BytesRef("test"))), assertHits(new FunctionQuery(vs), new float[] { 3f, 1f });
new float[] { 3f, 1f }); assertAllExist(vs);
assertHits(new FunctionQuery(
new TermFreqValueSource("bogus", "bogus", "string", new BytesRef("bar"))), vs = new TermFreqValueSource("bogus", "bogus", "string", new BytesRef("bar"));
new float[] { 0f, 1f }); assertHits(new FunctionQuery(vs), new float[] { 0f, 1f });
assertAllExist(vs);
// regardless of wether norms exist, value source exists == 0
vs = new TermFreqValueSource("bogus", "bogus", "bogus", new BytesRef("bogus"));
assertHits(new FunctionQuery(vs), new float[] { 0F, 0F });
assertAllExist(vs);
} }
public void testTF() throws Exception { public void testTF() throws Exception {
@ -338,21 +497,69 @@ public class TestValueSources extends LuceneTestCase {
try { try {
// no norm field (so agnostic to indexed similarity) // no norm field (so agnostic to indexed similarity)
searcher.setSimilarity(new DefaultSimilarity()); searcher.setSimilarity(new DefaultSimilarity());
assertHits(new FunctionQuery(
new TFValueSource("bogus", "bogus", "text", new BytesRef("test"))), ValueSource vs = new TFValueSource("bogus", "bogus", "text", new BytesRef("test"));
new float[] { (float)Math.sqrt(3d), (float)Math.sqrt(1d) }); assertHits(new FunctionQuery(vs),
assertHits(new FunctionQuery( new float[] { (float)Math.sqrt(3d), (float)Math.sqrt(1d) });
new TFValueSource("bogus", "bogus", "string", new BytesRef("bar"))), assertAllExist(vs);
new float[] { 0f, 1f });
vs = new TFValueSource("bogus", "bogus", "string", new BytesRef("bar"));
assertHits(new FunctionQuery(vs), new float[] { 0f, 1f });
assertAllExist(vs);
// regardless of wether norms exist, value source exists == 0
vs = new TFValueSource("bogus", "bogus", "bogus", new BytesRef("bogus"));
assertHits(new FunctionQuery(vs), new float[] { 0F, 0F });
assertAllExist(vs);
} finally { } finally {
searcher.setSimilarity(saved); searcher.setSimilarity(saved);
} }
} }
public void testTotalTermFreq() throws Exception { public void testTotalTermFreq() throws Exception {
assertHits(new FunctionQuery( ValueSource vs = new TotalTermFreqValueSource("bogus", "bogus", "text", new BytesRef("test"));
new TotalTermFreqValueSource("bogus", "bogus", "text", new BytesRef("test"))), assertHits(new FunctionQuery(vs), new float[] { 4f, 4f });
new float[] { 4f, 4f }); assertAllExist(vs);
}
/**
* Asserts that for every doc, the {@link FunctionValues#exists} value
* from the {@link ValueSource} is <b>true</b>.
*/
void assertAllExist(ValueSource vs) {
assertExists(ALL_EXIST_VS, vs);
}
/**
* Asserts that for every doc, the {@link FunctionValues#exists} value
* from the {@link ValueSource} is <b>false</b>.
*/
void assertNoneExist(ValueSource vs) {
assertExists(NONE_EXIST_VS, vs);
}
/**
* Asserts that for every doc, the {@link FunctionValues#exists} value from the
* <code>actual</code> {@link ValueSource} matches the {@link FunctionValues#exists}
* value from the <code>expected</code> {@link ValueSource}
*/
void assertExists(ValueSource expected, ValueSource actual) {
Map context = ValueSource.newContext(searcher);
try {
expected.createWeight(context, searcher);
actual.createWeight(context, searcher);
for (org.apache.lucene.index.LeafReaderContext leaf : reader.leaves()) {
final FunctionValues expectedVals = expected.getValues(context, leaf);
final FunctionValues actualVals = actual.getValues(context, leaf);
String msg = expected.toString() + " ?= " + actual.toString() + " -> ";
for (int i = 0; i < leaf.reader().maxDoc(); ++i) {
assertEquals(msg + i, expectedVals.exists(i), actualVals.exists(i));
}
}
} catch (IOException e) {
throw new RuntimeException(actual.toString(), e);
}
} }
void assertHits(Query q, float scores[]) throws Exception { void assertHits(Query q, float scores[]) throws Exception {
@ -368,4 +575,54 @@ public class TestValueSources extends LuceneTestCase {
CheckHits.checkHitsQuery(q, expected, docs.scoreDocs, expectedDocs); CheckHits.checkHitsQuery(q, expected, docs.scoreDocs, expectedDocs);
CheckHits.checkExplanations(q, "", searcher); CheckHits.checkExplanations(q, "", searcher);
} }
/**
* Simple test value source that implements {@link FunctionValues#exists} as a constant
* @see #ALL_EXIST_VS
* @see #NONE_EXIST_VS
*/
private static final class ExistsValueSource extends ValueSource {
final boolean expected;
final int value;
public ExistsValueSource(boolean expected) {
this.expected = expected;
this.value = expected ? 1 : 0;
}
@Override
public boolean equals(Object o) {
return o == this;
}
@Override
public int hashCode() {
return value;
}
@Override
public String description() {
return expected ? "ALL_EXIST" : "NONE_EXIST";
}
@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) {
return new FloatDocValues(this) {
@Override
public float floatVal(int doc) {
return (float)value;
}
@Override
public boolean exists(int doc) {
return expected;
}
};
}
};
/** @see ExistsValueSource */
private static final ValueSource ALL_EXIST_VS = new ExistsValueSource(true);
/** @see ExistsValueSource */
private static final ValueSource NONE_EXIST_VS = new ExistsValueSource(false);
} }

View File

@ -94,6 +94,12 @@ Upgrading from Solr 4.x
If you need the old functionality back , please add an extra parameter f=/** If you need the old functionality back , please add an extra parameter f=/**
example: /update/json/docs?f=/** example: /update/json/docs?f=/**
* Bugs fixed in several ValueSource functions may result in different behavior in
situations where some documents do not have values for fields wrapped in other value
sources. Users who want to preserve the previous behavior may need to wrap fields
in the "def()" function. Example: changing "fl=sum(fieldA,fieldB)" to
"fl=sum(def(fieldA,0.0),def(fieldB,0.0))". See LUCENE-5961 for more details.
Detailed Change List Detailed Change List
---------------------- ----------------------

View File

@ -469,10 +469,9 @@ public class StatsComponentTest extends AbstractSolrTestCase {
"{!type=func key="+key+" ex=key_ex_tag}field("+f+")", "{!type=func key="+key+" ex=key_ex_tag}field("+f+")",
"{!type=func key="+key+" ex=key_ex_tag v=field("+f+")}", "{!type=func key="+key+" ex=key_ex_tag v=field("+f+")}",
"{!type=func key="+key+" ex=key_ex_tag v='field("+f+")'}", "{!type=func key="+key+" ex=key_ex_tag v='field("+f+")'}",
// identity math functions
// identity math functions don't work as expected due to LUCENE-5961 "{!type=func key="+key+" ex=key_ex_tag v='sum(0,"+f+")'}",
// "{!type=func key="+key+" ex=key_ex_tag v='sum(0,"+f+")'}", "{!type=func key="+key+" ex=key_ex_tag v='product(1,"+f+")'}",
// "{!type=func key="+key+" ex=key_ex_tag v='product(1,"+f+")'}",
}) { }) {
assertQ("test statistics over field specified as a function: " + param, assertQ("test statistics over field specified as a function: " + param,
@ -555,10 +554,9 @@ public class StatsComponentTest extends AbstractSolrTestCase {
"{!type=func key="+f+" ex=key_ex_tag}field("+f+")", "{!type=func key="+f+" ex=key_ex_tag}field("+f+")",
"{!type=func key="+f+" ex=key_ex_tag v=field("+f+")}", "{!type=func key="+f+" ex=key_ex_tag v=field("+f+")}",
"{!type=func key="+f+" ex=key_ex_tag v='field("+f+")'}", "{!type=func key="+f+" ex=key_ex_tag v='field("+f+")'}",
// identity math functions
// identity math functions don't work as expected due to LUCENE-5961 "{!type=func key="+f+" ex=key_ex_tag v='sum(0,"+f+")'}",
// "{!type=func key="+f+" ex=key_ex_tag v='sum(0,"+f+")'}", "{!type=func key="+f+" ex=key_ex_tag v='product(1,"+f+")'}",
// "{!type=func key="+f+" ex=key_ex_tag v='product(1,"+f+")'}",
}) { }) {
assertQ("test statis & stats.facet over field specified as a function: " + param, assertQ("test statis & stats.facet over field specified as a function: " + param,
req("q", "*:*", "stats", "true", "stats.calcdistinct", "true", req("q", "*:*", "stats", "true", "stats.calcdistinct", "true",

View File

@ -731,12 +731,17 @@ public class TestFunctionQuery extends SolrTestCaseJ4 {
@Test @Test
public void testPseudoFieldFunctions() throws Exception { public void testPseudoFieldFunctions() throws Exception {
assertU(adoc("id", "1", "text", "hello", "foo_s","A")); assertU(adoc("id", "1", "text", "hello", "foo_s","A", "yak_i", "32"));
assertU(adoc("id", "2")); assertU(adoc("id", "2"));
assertU(commit()); assertU(commit());
assertJQ(req("q", "id:1", "fl", "a:1,b:2.0,c:'X',d:{!func}foo_s,e:{!func}bar_s") // if exists() is false, no pseudo-field should be added // if exists() is false, no pseudo-field should be added
, "/response/docs/[0]=={'a':1, 'b':2.0,'c':'X','d':'A'}"); assertJQ(req("q", "id:1", "fl", "a:1,b:2.0,c:'X',d:{!func}foo_s,e:{!func}bar_s")
, "/response/docs/[0]=={'a':1, 'b':2.0,'c':'X','d':'A'}");
assertJQ(req("q", "id:1", "fl", "a:sum(yak_i,bog_i),b:mul(yak_i,bog_i),c:min(yak_i,bog_i)")
, "/response/docs/[0]=={ 'c':32.0 }");
assertJQ(req("q", "id:1", "fl", "a:sum(yak_i,def(bog_i,42)), b:max(yak_i,bog_i)")
, "/response/docs/[0]=={ 'a': 74.0, 'b':32.0 }");
} }
public void testMissingFieldFunctionBehavior() throws Exception { public void testMissingFieldFunctionBehavior() throws Exception {