LUCENE-446: Added Solr's search.function for scores based on field

values, plus CustomScoreQuery for simple score (post) customization.


git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@544546 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doron Cohen 2007-06-05 16:29:35 +00:00
parent db78c85c21
commit 125fed32d8
21 changed files with 2819 additions and 2 deletions

View File

@ -186,6 +186,10 @@ New features
on the remote side of the RMI connection. on the remote side of the RMI connection.
(Matt Ericson via Otis Gospodnetic) (Matt Ericson via Otis Gospodnetic)
8. LUCENE-446: Added Solr's search.function for scores based on field
values, plus CustomScoreQuery for simple score (post) customization.
(Yonik Seeley, Doron Cohen)
Optimizations Optimizations
1. LUCENE-761: The proxStream is now cloned lazily in SegmentTermPositions 1. LUCENE-761: The proxStream is now cloned lazily in SegmentTermPositions

View File

@ -53,6 +53,22 @@ public interface FieldCache {
} }
} }
/** Interface to parse bytes from document fields.
* @see FieldCache#getBytes(IndexReader, String, FieldCache.ByteParser)
*/
public interface ByteParser {
/** Return a single Byte representation of this field's value. */
public byte parseByte(String string);
}
/** Interface to parse shorts from document fields.
* @see FieldCache#getShorts(IndexReader, String, FieldCache.ShortParser)
*/
public interface ShortParser {
/** Return a short representation of this field's value. */
public short parseShort(String string);
}
/** Interface to parse ints from document fields. /** Interface to parse ints from document fields.
* @see FieldCache#getInts(IndexReader, String, FieldCache.IntParser) * @see FieldCache#getInts(IndexReader, String, FieldCache.IntParser)
*/ */
@ -72,6 +88,56 @@ public interface FieldCache {
/** Expert: The cache used internally by sorting and range query classes. */ /** Expert: The cache used internally by sorting and range query classes. */
public static FieldCache DEFAULT = new FieldCacheImpl(); public static FieldCache DEFAULT = new FieldCacheImpl();
/** Checks the internal cache for an appropriate entry, and if none is
* found, reads the terms in <code>field</code> as a single byte and returns an array
* of size <code>reader.maxDoc()</code> of the value each document
* has in the given field.
* @param reader Used to get field values.
* @param field Which field contains the single byte values.
* @return The values in the given field for each document.
* @throws IOException If any error occurs.
*/
public byte[] getBytes (IndexReader reader, String field)
throws IOException;
/** Checks the internal cache for an appropriate entry, and if none is found,
* reads the terms in <code>field</code> as bytes and returns an array of
* size <code>reader.maxDoc()</code> of the value each document has in the
* given field.
* @param reader Used to get field values.
* @param field Which field contains the bytes.
* @param parser Computes byte for string values.
* @return The values in the given field for each document.
* @throws IOException If any error occurs.
*/
public byte[] getBytes (IndexReader reader, String field, ByteParser parser)
throws IOException;
/** Checks the internal cache for an appropriate entry, and if none is
* found, reads the terms in <code>field</code> as shorts and returns an array
* of size <code>reader.maxDoc()</code> of the value each document
* has in the given field.
* @param reader Used to get field values.
* @param field Which field contains the shorts.
* @return The values in the given field for each document.
* @throws IOException If any error occurs.
*/
public short[] getShorts (IndexReader reader, String field)
throws IOException;
/** Checks the internal cache for an appropriate entry, and if none is found,
* reads the terms in <code>field</code> as shorts and returns an array of
* size <code>reader.maxDoc()</code> of the value each document has in the
* given field.
* @param reader Used to get field values.
* @param field Which field contains the shorts.
* @param parser Computes short for string values.
* @return The values in the given field for each document.
* @throws IOException If any error occurs.
*/
public short[] getShorts (IndexReader reader, String field, ShortParser parser)
throws IOException;
/** Checks the internal cache for an appropriate entry, and if none is /** Checks the internal cache for an appropriate entry, and if none is
* found, reads the terms in <code>field</code> as integers and returns an array * found, reads the terms in <code>field</code> as integers and returns an array
* of size <code>reader.maxDoc()</code> of the value each document * of size <code>reader.maxDoc()</code> of the value each document

View File

@ -131,18 +131,108 @@ implements FieldCache {
} }
} }
private static final ByteParser BYTE_PARSER = new ByteParser() {
public byte parseByte(String value) {
return Byte.parseByte(value);
}
};
private static final ShortParser SHORT_PARSER = new ShortParser() {
public short parseShort(String value) {
return Short.parseShort(value);
}
};
private static final IntParser INT_PARSER = new IntParser() { private static final IntParser INT_PARSER = new IntParser() {
public int parseInt(String value) { public int parseInt(String value) {
return Integer.parseInt(value); return Integer.parseInt(value);
} }
}; };
private static final FloatParser FLOAT_PARSER = new FloatParser() { private static final FloatParser FLOAT_PARSER = new FloatParser() {
public float parseFloat(String value) { public float parseFloat(String value) {
return Float.parseFloat(value); return Float.parseFloat(value);
} }
}; };
// inherit javadocs
public byte[] getBytes (IndexReader reader, String field) throws IOException {
return getBytes(reader, field, BYTE_PARSER);
}
// inherit javadocs
public byte[] getBytes(IndexReader reader, String field, ByteParser parser)
throws IOException {
return (byte[]) bytesCache.get(reader, new Entry(field, parser));
}
Cache bytesCache = new Cache() {
protected Object createValue(IndexReader reader, Object entryKey)
throws IOException {
Entry entry = (Entry) entryKey;
String field = entry.field;
ByteParser parser = (ByteParser) entry.custom;
final byte[] retArray = new byte[reader.maxDoc()];
TermDocs termDocs = reader.termDocs();
TermEnum termEnum = reader.terms (new Term (field, ""));
try {
do {
Term term = termEnum.term();
if (term==null || term.field() != field) break;
byte termval = parser.parseByte(term.text());
termDocs.seek (termEnum);
while (termDocs.next()) {
retArray[termDocs.doc()] = termval;
}
} while (termEnum.next());
} finally {
termDocs.close();
termEnum.close();
}
return retArray;
}
};
// inherit javadocs
public short[] getShorts (IndexReader reader, String field) throws IOException {
return getShorts(reader, field, SHORT_PARSER);
}
// inherit javadocs
public short[] getShorts(IndexReader reader, String field, ShortParser parser)
throws IOException {
return (short[]) shortsCache.get(reader, new Entry(field, parser));
}
Cache shortsCache = new Cache() {
protected Object createValue(IndexReader reader, Object entryKey)
throws IOException {
Entry entry = (Entry) entryKey;
String field = entry.field;
ShortParser parser = (ShortParser) entry.custom;
final short[] retArray = new short[reader.maxDoc()];
TermDocs termDocs = reader.termDocs();
TermEnum termEnum = reader.terms (new Term (field, ""));
try {
do {
Term term = termEnum.term();
if (term==null || term.field() != field) break;
short termval = parser.parseShort(term.text());
termDocs.seek (termEnum);
while (termDocs.next()) {
retArray[termDocs.doc()] = termval;
}
} while (termEnum.next());
} finally {
termDocs.close();
termEnum.close();
}
return retArray;
}
};
// inherit javadocs // inherit javadocs
public int[] getInts (IndexReader reader, String field) throws IOException { public int[] getInts (IndexReader reader, String field) throws IOException {
return getInts(reader, field, INT_PARSER); return getInts(reader, field, INT_PARSER);

View File

@ -0,0 +1,105 @@
package org.apache.lucene.search.function;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.function.DocValues;
import java.io.IOException;
/**
* Expert: obtains single byte field values from the
* {@link org.apache.lucene.search.FieldCache FieldCache}
* using <code>getBytes()</code> and makes those values
* available as other numeric types, casting as needed.
*
* <p><font color="#FF0000">
* WARNING: The status of the <b>search.function</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*
* @see org.apache.lucene.search.function.FieldCacheSource for requirements
* on the field.
*/
public class ByteFieldSource extends FieldCacheSource {
private FieldCache.ByteParser parser;
/**
* Create a cached byte field source with default string-to-byte parser.
*/
public ByteFieldSource(String field) {
this(field, null);
}
/**
* Create a cached byte field source with a specific string-to-byte parser.
*/
public ByteFieldSource(String field, FieldCache.ByteParser parser) {
super(field);
this.parser = parser;
}
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
public String description() {
return "byte(" + super.description() + ')';
}
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#getCachedValues(org.apache.lucene.search.FieldCache, java.lang.String, org.apache.lucene.index.IndexReader) */
public DocValues getCachedFieldValues (FieldCache cache, String field, IndexReader reader) throws IOException {
final byte[] arr = (parser==null) ?
cache.getBytes(reader, field) :
cache.getBytes(reader, field, parser);
return new DocValues(reader.maxDoc()) {
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */
public float floatVal(int doc) {
return (float) arr[doc];
}
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#intVal(int) */
public int intVal(int doc) {
return arr[doc];
}
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */
public String toString(int doc) {
return description() + '=' + intVal(doc);
}
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */
Object getInnerArray() {
return arr;
}
};
}
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceEquals(org.apache.lucene.search.function.FieldCacheSource) */
public boolean cachedFieldSourceEquals(FieldCacheSource o) {
if (o.getClass() != ByteFieldSource.class) {
return false;
}
ByteFieldSource other = (ByteFieldSource)o;
return this.parser==null ?
other.parser==null :
this.parser.getClass() == other.parser.getClass();
}
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceHashCode() */
public int cachedFieldSourceHashCode() {
return parser==null ?
Byte.class.hashCode() : parser.getClass().hashCode();
}
}

View File

@ -0,0 +1,344 @@
package org.apache.lucene.search.function;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.ToStringUtils;
/**
* Query that sets document score as a programmatic function of (up to) two (sub) scores.
* <ol>
* <li>the score of its subQuery (any query)</li>
* <li>(optional) the score of its ValueSourtceQuery,
* for most simple/convineient use case this query would be a
* {@link org.apache.lucene.search.function.FieldScoreQuery FieldScoreQuery}</li>
* </ol>
* Subclasses can modify the computation by overriding {@link #customScore(int, float, float)}.
*
* <p><font color="#FF0000">
* WARNING: The status of the <b>search.function</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*/
public class CustomScoreQuery extends Query {
private Query subQuery;
private ValueSourceQuery valSrcQuery; // optional, can be null
private boolean strict = false; // if true, valueSource part of query does not take part in weights normalization.
/**
* Create a CustomScoreQuery over input subQuery.
* @param subQuery the sub query whose scored is being customed. Must not be null.
*/
public CustomScoreQuery(Query subQuery) {
this(subQuery,null);
}
/**
* Create a CustomScoreQuery over input subQuery and a {@link ValueSourceQuery}.
* @param subQuery the sub query whose score is being customed. Must not be null.
* @param valSrcQuery a value source query whose scores are used in the custom score
* computation. For most simple/convineient use case this would be a
* {@link org.apache.lucene.search.function.FieldScoreQuery FieldScoreQuery}.
* This parameter is optional - it can be null.
*/
public CustomScoreQuery(Query subQuery, ValueSourceQuery valSrcQuery) {
super();
this.subQuery = subQuery;
this.valSrcQuery = valSrcQuery;
if (subQuery == null) throw new IllegalArgumentException("<subqyery> must not be null!");
}
/*(non-Javadoc) @see org.apache.lucene.search.Query#rewrite(org.apache.lucene.index.IndexReader) */
public Query rewrite(IndexReader reader) throws IOException {
subQuery = subQuery.rewrite(reader);
if (valSrcQuery!=null) {
valSrcQuery = (ValueSourceQuery) valSrcQuery.rewrite(reader);
}
return this;
}
/*(non-Javadoc) @see org.apache.lucene.search.Query#extractTerms(java.util.Set) */
public void extractTerms(Set terms) {
subQuery.extractTerms(terms);
if (valSrcQuery!=null) {
valSrcQuery.extractTerms(terms);
}
}
/*(non-Javadoc) @see org.apache.lucene.search.Query#clone() */
public Object clone() {
CustomScoreQuery clone = (CustomScoreQuery)super.clone();
clone.subQuery = (Query) subQuery.clone();
if (valSrcQuery!=null) {
clone.valSrcQuery = (ValueSourceQuery) valSrcQuery.clone();
}
return clone;
}
/* (non-Javadoc) @see org.apache.lucene.search.Query#toString(java.lang.String) */
public String toString(String field) {
StringBuffer sb = new StringBuffer(name()).append("(");
sb.append(subQuery.toString(field));
if (valSrcQuery!=null) {
sb.append(", ").append(valSrcQuery.toString(field));
}
sb.append(")");
sb.append(strict?" STRICT" : "");
return sb.toString() + ToStringUtils.boost(getBoost());
}
/** Returns true if <code>o</code> is equal to this. */
public boolean equals(Object o) {
if (getClass() != o.getClass()) {
return false;
}
CustomScoreQuery other = (CustomScoreQuery)o;
return this.getBoost() == other.getBoost()
&& this.subQuery.equals(other.subQuery)
&& (this.valSrcQuery==null ? other.valSrcQuery==null
: this.valSrcQuery.equals(other.valSrcQuery));
}
/** Returns a hash code value for this object. */
public int hashCode() {
int valSrcHash = valSrcQuery==null ? 0 : valSrcQuery.hashCode();
return (getClass().hashCode() + subQuery.hashCode() + valSrcHash) ^ Float.floatToIntBits(getBoost());
}
/**
* Compute a custom score by the subQuery score and the ValueSourceQuery score.
* <p>
* Subclasses can override this method to modify the custom score.
* <p>
* The default computation herein is:
* <pre>
* ModifiedScore = valSrcScore * subQueryScore.
* </pre>
*
* @param doc id of scored doc.
* @param subQueryScore score of that doc by the subQuery.
* @param valSrcScore score of that doc by the ValueSourceQuery.
* @return custom score.
*/
public float customScore(int doc, float subQueryScore, float valSrcScore) {
return valSrcScore * subQueryScore;
}
/**
* Explain the custom score.
* Whenever overriding {@link #customScore(int, float, float)},
* this method should also be overriden to provide the correct explanation
* for the part of the custom scoring.
* @param doc doc being explained.
* @param subQueryExpl explanation for the sub-query part.
* @param valSrcExpl explanation for the value source part.
* @return an explanation for the custom score
*/
public Explanation customExplain(int doc, Explanation subQueryExpl, Explanation valSrcExpl) {
float valSrcScore = valSrcExpl==null ? 1 : valSrcExpl.getValue();
Explanation exp = new Explanation( valSrcScore * subQueryExpl.getValue(), "custom score: product of:");
exp.addDetail(subQueryExpl);
if (valSrcExpl != null) {
exp.addDetail(valSrcExpl);
}
return exp;
}
//=========================== W E I G H T ============================
private class CustomWeight implements Weight {
Searcher searcher;
Weight subQueryWeight;
Weight valSrcWeight; // optional
boolean qStrict;
public CustomWeight(Searcher searcher) throws IOException {
this.searcher = searcher;
this.subQueryWeight = subQuery.weight(searcher);
if (valSrcQuery!=null) {
this.valSrcWeight = valSrcQuery.createWeight(searcher);
}
this.qStrict = strict;
}
/*(non-Javadoc) @see org.apache.lucene.search.Weight#getQuery() */
public Query getQuery() {
return CustomScoreQuery.this;
}
/*(non-Javadoc) @see org.apache.lucene.search.Weight#getValue() */
public float getValue() {
return getBoost();
}
/*(non-Javadoc) @see org.apache.lucene.search.Weight#sumOfSquaredWeights() */
public float sumOfSquaredWeights() throws IOException {
float sum = subQueryWeight.sumOfSquaredWeights();
if (valSrcWeight!=null) {
if (qStrict) {
valSrcWeight.sumOfSquaredWeights(); // do not include ValueSource part in the query normalization
} else {
sum += valSrcWeight.sumOfSquaredWeights();
}
}
sum *= getBoost() * getBoost(); // boost each sub-weight
return sum ;
}
/*(non-Javadoc) @see org.apache.lucene.search.Weight#normalize(float) */
public void normalize(float norm) {
norm *= getBoost(); // incorporate boost
subQueryWeight.normalize(norm);
if (valSrcWeight!=null) {
if (qStrict) {
valSrcWeight.normalize(1); // do not normalize the ValueSource part
} else {
valSrcWeight.normalize(norm);
}
}
}
/*(non-Javadoc) @see org.apache.lucene.search.Weight#scorer(org.apache.lucene.index.IndexReader) */
public Scorer scorer(IndexReader reader) throws IOException {
Scorer subQueryScorer = subQueryWeight.scorer(reader);
Scorer valSrcScorer = (valSrcWeight==null ? null : valSrcWeight.scorer(reader));
return new CustomScorer(getSimilarity(searcher), reader, this, subQueryScorer, valSrcScorer);
}
/*(non-Javadoc) @see org.apache.lucene.search.Weight#explain(org.apache.lucene.index.IndexReader, int) */
public Explanation explain(IndexReader reader, int doc) throws IOException {
return scorer(reader).explain(doc);
}
}
//=========================== S C O R E R ============================
/**
* A scorer that applies a (callback) function on scores of the subQuery.
*/
private class CustomScorer extends Scorer {
private final CustomWeight weight;
private final float qWeight;
private Scorer subQueryScorer;
private Scorer valSrcScorer; // optional
private IndexReader reader;
// constructor
private CustomScorer(Similarity similarity, IndexReader reader, CustomWeight w,
Scorer subQueryScorer, Scorer valSrcScorer) throws IOException {
super(similarity);
this.weight = w;
this.qWeight = w.getValue();
this.subQueryScorer = subQueryScorer;
this.valSrcScorer = valSrcScorer;
this.reader = reader;
}
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#next() */
public boolean next() throws IOException {
boolean hasNext = subQueryScorer.next();
if (valSrcScorer!=null && hasNext) {
valSrcScorer.skipTo(subQueryScorer.doc());
}
return hasNext;
}
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#doc() */
public int doc() {
return subQueryScorer.doc();
}
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#score() */
public float score() throws IOException {
float valSrcScore = (valSrcScorer==null ? 1 : valSrcScorer.score());
return qWeight * customScore(subQueryScorer.doc(), subQueryScorer.score(), valSrcScore);
}
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#skipTo(int) */
public boolean skipTo(int target) throws IOException {
boolean hasNext = subQueryScorer.skipTo(target);
if (valSrcScorer!=null && hasNext) {
valSrcScorer.skipTo(subQueryScorer.doc());
}
return hasNext;
}
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#explain(int) */
public Explanation explain(int doc) throws IOException {
Explanation subQueryExpl = weight.subQueryWeight.explain(reader,doc);
if (!subQueryExpl.isMatch()) {
return subQueryExpl;
}
// match
Explanation valSrcExpl = valSrcScorer==null ? null : valSrcScorer.explain(doc);
Explanation customExp = customExplain(doc,subQueryExpl,valSrcExpl);
float sc = qWeight * customExp.getValue();
Explanation res = new ComplexExplanation(
true, sc, CustomScoreQuery.this.toString() + ", product of:");
res.addDetail(customExp);
res.addDetail(new Explanation(qWeight, "queryBoost")); // actually using the q boost as q weight (== weight value)
return res;
}
}
/*(non-Javadoc) @see org.apache.lucene.search.Query#createWeight(org.apache.lucene.search.Searcher) */
protected Weight createWeight(Searcher searcher) throws IOException {
return new CustomWeight(searcher);
}
/**
* Checks if this is strict custom scoring.
* In strict custom scoring, the ValueSource part of does not participate in weight normalization.
* This may be useful when one wants full control over how scores are modified, and does
* not care about normalizing by the ValueSource part.
* One particular case where this is useful if for testing this query.
* <P>
* Note: only has effect when the ValueSource part is not null.
*/
public boolean isStrict() {
return strict;
}
/**
* Set the strict mode of this query.
* @param strict The strict mode to set.
* @see #isStrict()
*/
public void setStrict(boolean strict) {
this.strict = strict;
}
/**
* A short name of this query, used in {@link #toString(String)}.
*/
public String name() {
return "custom";
}
}

View File

@ -0,0 +1,176 @@
package org.apache.lucene.search.function;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.Explanation;
/**
* Expert: represents field values as different types.
* Normally created via a
* {@link org.apache.lucene.search.function.ValueSource ValueSuorce}
* for a particular field and reader.
*
* <p><font color="#FF0000">
* WARNING: The status of the <b>search.function</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*
* @author yonik
*/
public abstract class DocValues {
/*
* DocValues is distinct from ValueSource because
* there needs to be an object created at query evaluation time that
* is not referenced by the query itself because:
* - Query objects should be MT safe
* - For caching, Query objects are often used as keys... you don't
* want the Query carrying around big objects
*/
private int nVals;
/**
* Constructor with input number of values(docs).
* @param nVals
*/
public DocValues (int nVals) {
this.nVals = nVals;
}
// prevent using this constructor
private DocValues () {
}
/**
* Return doc value as a float.
* <P>Mandatory: every DocValues implementation must implement at least this method.
* @param doc document whose float value is requested.
*/
public abstract float floatVal(int doc);
/**
* Return doc value as an int.
* <P>Optional: DocValues implementation can (but don't have to) override this method.
* @param doc document whose int value is requested.
*/
public int intVal(int doc) {
return (int) floatVal(doc);
}
/**
* Return doc value as a long.
* <P>Optional: DocValues implementation can (but don't have to) override this method.
* @param doc document whose long value is requested.
*/
public long longVal(int doc) {
return (long) floatVal(doc);
}
/**
* Return doc value as a double.
* <P>Optional: DocValues implementation can (but don't have to) override this method.
* @param doc document whose double value is requested.
*/
public double doubleVal(int doc) {
return (double) floatVal(doc);
}
/**
* Return doc value as a string.
* <P>Optional: DocValues implementation can (but don't have to) override this method.
* @param doc document whose string value is requested.
*/
public String strVal(int doc) {
return Float.toString(floatVal(doc));
}
/**
* Return a string representation of a doc value, as reuired for Explanations.
*/
public abstract String toString(int doc);
/**
* Explain the scoring value for the input doc.
*/
public Explanation explain(int doc) {
return new Explanation(floatVal(doc), toString(doc));
}
/**
* Expert: for test purposes only, return the inner array of values, or null if not applicable.
* <p>
* Allows tests to verify that loaded values are:
* <ol>
* <li>indeed cached/reused.</li>
* <li>stored in the expected size/type (byte/short/int/float).</li>
* </ol>
* Note: Tested implementations of DocValues must override this method for the test to pass!
*/
Object getInnerArray() {
return new Object[0];
}
// --- some simple statistics on values
private float minVal;
private float maxVal;
private float avgVal;
private boolean computed=false;
// compute optional values
private void compute () {
if (computed) {
return;
}
minVal = Float.MAX_VALUE;
maxVal = 0;
float sum = 0;
for (int i=0; i<nVals; i++) {
float val = floatVal(i);
sum += val;
minVal = Math.min(minVal,val);
maxVal = Math.max(maxVal,val);
}
avgVal = sum / nVals;
computed = true;
}
/**
* Optional op.
* Returns the minimum of all values.
*/
public float getMinValue () {
compute();
return minVal;
}
/**
* Optional op.
* Returns the maximum of all values.
*/
public float getMaxValue () {
compute();
return maxVal;
}
/**
* Returns the average of all values.
*/
public float getAverageValue () {
compute();
return avgVal;
}
}

View File

@ -0,0 +1,105 @@
package org.apache.lucene.search.function;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.FieldCache;
/**
* Expert: A base class for ValueSource implementations that retrieve values for
* a single field from the {@link org.apache.lucene.search.FieldCache FieldCache}.
* <p>
* Fields used herein nust be indexed (doesn't matter if these fields are stored or not).
* <p>
* It is assumed that each such indexed field is untokenized, or at least has a single token in a document.
* For documents with multiple tokens of the same field, behavior is undefined (It is likely that current
* code would use the value of one of these tokens, but this is not guaranteed).
* <p>
* Document with no tokens in this field are assigned the <code>Zero</code> value.
*
* <p><font color="#FF0000">
* WARNING: The status of the <b>search.function</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*
* @author yonik
*/
public abstract class FieldCacheSource extends ValueSource {
private String field;
private FieldCache cache = FieldCache.DEFAULT;
/**
* Create a cached field source for the input field.
*/
public FieldCacheSource(String field) {
this.field=field;
}
/* (non-Javadoc) @see org.apache.lucene.search.function.ValueSource#getValues(org.apache.lucene.index.IndexReader) */
public final DocValues getValues(IndexReader reader) throws IOException {
return getCachedFieldValues(cache, field, reader);
}
/* (non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
public String description() {
return field;
}
/**
* Return cached DocValues for input field and reader.
* @param cache FieldCache so that values of a field are loaded once per reader (RAM allowing)
* @param field Field for which values are required.
* @see ValueSource
*/
public abstract DocValues getCachedFieldValues(FieldCache cache, String field, IndexReader reader) throws IOException;
/*(non-Javadoc) @see java.lang.Object#equals(java.lang.Object) */
public final boolean equals(Object o) {
if (!(o instanceof FieldCacheSource)) {
return false;
}
FieldCacheSource other = (FieldCacheSource) o;
return
this.cache == other.cache &&
this.field.equals(other.field) &&
cachedFieldSourceEquals(other);
}
/*(non-Javadoc) @see java.lang.Object#hashCode() */
public final int hashCode() {
return
cache.hashCode() +
field.hashCode() +
cachedFieldSourceHashCode();
}
/**
* Check if equals to another {@link FieldCacheSource}, already knowing that cache and field are equal.
* @see Object#equals(java.lang.Object)
*/
public abstract boolean cachedFieldSourceEquals(FieldCacheSource other);
/**
* Return a hash code of a {@link FieldCacheSource}, without the hash-codes of the field
* and the cache (those are taken care of elsewhere).
* @see Object#hashCode()
*/
public abstract int cachedFieldSourceHashCode();
}

View File

@ -0,0 +1,127 @@
package org.apache.lucene.search.function;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A query that scores each document as the value of the numeric input field.
* <p>
* The query matches all documents, and scores each document according to the numeric
* value of that field.
* <p>
* It is assumed, and expected, that:
* <ul>
* <li>The field used here is indexed, and has exactly
* one token in every scored document.</li>
* <li>Best if this field is un_tokenized.</li>
* <li>That token is parsable to the selected type.</li>
* </ul>
* <p>
* Combining this query in a FunctionQuery allows much freedom in affecting document scores.
* Note, that with this freedom comes responsibility: it is more than likely that the
* default Lucene scoring is superior in quality to scoring modified as explained here.
* However, in some cases, and certainly for research experiments, this capability may turn useful.
* <p>
* When contructing this query, select the appropriate type. That type should match the data stored in the
* field. So in fact the "right" type should be selected before indexing. Type selection
* has effect on the RAM usage:
* <ul>
* <li>{@link Type#BYTE} consumes 1 * maxDocs bytes.</li>
* <li>{@link Type#SHORT} consumes 2 * maxDocs bytes.</li>
* <li>{@link Type#INT} consumes 4 * maxDocs bytes.</li>
* <li>{@link Type#FLOAT} consumes 8 * maxDocs bytes.</li>
* </ul>
* <p>
* <b>Caching:</b>
* Values for the numeric field are loaded once and cached in memory for further use with the same IndexReader.
* To take advantage of this, it is extremely important to reuse index-readers or index-searchers,
* otherwise, for instance if for each query a new index reader is opened, large penalties would be
* payd for loading the field values into memory over and over again!
*
* <p><font color="#FF0000">
* WARNING: The status of the <b>search.function</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*/
public class FieldScoreQuery extends ValueSourceQuery {
/**
* Type of score field, indicating how field values are interpreted/parsed.
* <p>
* The type selected at search search time should match the data stored in the field.
* Different types have different RAM requirements:
* <ul>
* <li>{@link #BYTE} consumes 1 * maxDocs bytes.</li>
* <li>{@link #SHORT} consumes 2 * maxDocs bytes.</li>
* <li>{@link #INT} consumes 4 * maxDocs bytes.</li>
* <li>{@link #FLOAT} consumes 8 * maxDocs bytes.</li>
* </ul>
*/
public static class Type {
/** field values are interpreted as numeric byte values. */
public static final Type BYTE = new Type("byte");
/** field values are interpreted as numeric short values. */
public static final Type SHORT = new Type("short");
/** field values are interpreted as numeric int values. */
public static final Type INT = new Type("int");
/** field values are interpreted as numeric float values. */
public static final Type FLOAT = new Type("float");
private String typeName;
private Type (String name) {
this.typeName = name;
}
/*(non-Javadoc) @see java.lang.Object#toString() */
public String toString() {
return getClass().getName()+"::"+typeName;
}
}
/**
* Create a FieldScoreQuery - a query that scores each document as the value of the numeric input field.
* <p>
* The <code>type</code> param tells how to parse the field string values into a numeric score value.
* @param field the numeric field to be used.
* @param type the type of the field: either
* {@link Type#BYTE}, {@link Type#SHORT}, {@link Type#INT}, or {@link Type#FLOAT}.
*/
public FieldScoreQuery(String field, Type type) {
super(getValueSource(field,type));
}
// create the appropriate (cached) field value source.
private static ValueSource getValueSource(String field, Type type) {
if (type == Type.BYTE) {
return new ByteFieldSource(field);
}
if (type == Type.SHORT) {
return new ShortFieldSource(field);
}
if (type == Type.INT) {
return new IntFieldSource(field);
}
if (type == Type.FLOAT) {
return new FloatFieldSource(field);
}
throw new IllegalArgumentException(type+" is not a known Field Score Query Type!");
}
}

View File

@ -0,0 +1,102 @@
package org.apache.lucene.search.function;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.function.DocValues;
import java.io.IOException;
/**
* Expert: obtains float field values from the
* {@link org.apache.lucene.search.FieldCache FieldCache}
* using <code>getFloats()</code> and makes those values
* available as other numeric types, casting as needed.
*
* <p><font color="#FF0000">
* WARNING: The status of the <b>search.function</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*
* @see org.apache.lucene.search.function.FieldCacheSource for requirements
* on the field.
*
* @author yonik
*/
public class FloatFieldSource extends FieldCacheSource {
private FieldCache.FloatParser parser;
/**
* Create a cached float field source with default string-to-float parser.
*/
public FloatFieldSource(String field) {
this(field, null);
}
/**
* Create a cached float field source with a specific string-to-float parser.
*/
public FloatFieldSource(String field, FieldCache.FloatParser parser) {
super(field);
this.parser = parser;
}
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
public String description() {
return "float(" + super.description() + ')';
}
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#getCachedValues(org.apache.lucene.search.FieldCache, java.lang.String, org.apache.lucene.index.IndexReader) */
public DocValues getCachedFieldValues (FieldCache cache, String field, IndexReader reader) throws IOException {
final float[] arr = (parser==null) ?
cache.getFloats(reader, field) :
cache.getFloats(reader, field, parser);
return new DocValues(reader.maxDoc()) {
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */
public float floatVal(int doc) {
return arr[doc];
}
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */
public String toString(int doc) {
return description() + '=' + arr[doc];
}
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */
Object getInnerArray() {
return arr;
}
};
}
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceEquals(org.apache.lucene.search.function.FieldCacheSource) */
public boolean cachedFieldSourceEquals(FieldCacheSource o) {
if (o.getClass() != FloatFieldSource.class) {
return false;
}
FloatFieldSource other = (FloatFieldSource)o;
return this.parser==null ?
other.parser==null :
this.parser.getClass() == other.parser.getClass();
}
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceHashCode() */
public int cachedFieldSourceHashCode() {
return parser==null ?
Float.class.hashCode() : parser.getClass().hashCode();
}
}

View File

@ -0,0 +1,107 @@
package org.apache.lucene.search.function;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.function.DocValues;
import java.io.IOException;
/**
* Expert: obtains int field values from the
* {@link org.apache.lucene.search.FieldCache FieldCache}
* using <code>getInts()</code> and makes those values
* available as other numeric types, casting as needed.
*
* <p><font color="#FF0000">
* WARNING: The status of the <b>search.function</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*
* @see org.apache.lucene.search.function.FieldCacheSource for requirements
* on the field.
*
* @author yonik
*/
public class IntFieldSource extends FieldCacheSource {
private FieldCache.IntParser parser;
/**
* Create a cached int field source with default string-to-int parser.
*/
public IntFieldSource(String field) {
this(field, null);
}
/**
* Create a cached int field source with a specific string-to-int parser.
*/
public IntFieldSource(String field, FieldCache.IntParser parser) {
super(field);
this.parser = parser;
}
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
public String description() {
return "int(" + super.description() + ')';
}
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#getCachedValues(org.apache.lucene.search.FieldCache, java.lang.String, org.apache.lucene.index.IndexReader) */
public DocValues getCachedFieldValues (FieldCache cache, String field, IndexReader reader) throws IOException {
final int[] arr = (parser==null) ?
cache.getInts(reader, field) :
cache.getInts(reader, field, parser);
return new DocValues(reader.maxDoc()) {
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */
public float floatVal(int doc) {
return (float) arr[doc];
}
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#intVal(int) */
public int intVal(int doc) {
return arr[doc];
}
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */
public String toString(int doc) {
return description() + '=' + intVal(doc);
}
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */
Object getInnerArray() {
return arr;
}
};
}
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceEquals(org.apache.lucene.search.function.FieldCacheSource) */
public boolean cachedFieldSourceEquals(FieldCacheSource o) {
if (o.getClass() != IntFieldSource.class) {
return false;
}
IntFieldSource other = (IntFieldSource)o;
return this.parser==null ?
other.parser==null :
this.parser.getClass() == other.parser.getClass();
}
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceHashCode() */
public int cachedFieldSourceHashCode() {
return parser==null ?
Integer.class.hashCode() : parser.getClass().hashCode();
}
}

View File

@ -0,0 +1,103 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.function;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.FieldCache;
import java.io.IOException;
/**
* Expert: obtains the ordinal of the field value from the default Lucene
* {@link org.apache.lucene.search.FieldCache Fieldcache} using getStringIndex().
* <p>
* The native lucene index order is used to assign an ordinal value for each field value.
* <p
* Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1.
* <p>
* Example:
* <br>If there were only three field values: "apple","banana","pear"
* <br>then ord("apple")=1, ord("banana")=2, ord("pear")=3
* <p>
* WARNING:
* ord() depends on the position in an index and can thus change
* when other documents are inserted or deleted,
* or if a MultiSearcher is used.
*
* <p><font color="#FF0000">
* WARNING: The status of the <b>search.function</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*
* @author yonik
*/
public class OrdFieldSource extends ValueSource {
protected String field;
/**
* Contructor for a certain field.
* @param field field whose values order is used.
*/
public OrdFieldSource(String field) {
this.field = field;
}
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
public String description() {
return "ord(" + field + ')';
}
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#getValues(org.apache.lucene.index.IndexReader) */
public DocValues getValues(IndexReader reader) throws IOException {
final int[] arr = FieldCache.DEFAULT.getStringIndex(reader, field).order;
return new DocValues(arr.length) {
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */
public float floatVal(int doc) {
return (float)arr[doc];
}
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#strVal(int) */
public String strVal(int doc) {
// the string value of the ordinal, not the string itself
return Integer.toString(arr[doc]);
}
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */
public String toString(int doc) {
return description() + '=' + intVal(doc);
}
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */
Object getInnerArray() {
return arr;
}
};
}
/*(non-Javadoc) @see java.lang.Object#equals(java.lang.Object) */
public boolean equals(Object o) {
if (o.getClass() != OrdFieldSource.class) return false;
OrdFieldSource other = (OrdFieldSource)o;
return this.field.equals(other.field);
}
private static final int hcode = OrdFieldSource.class.hashCode();
/*(non-Javadoc) @see java.lang.Object#hashCode() */
public int hashCode() {
return hcode + field.hashCode();
}
}

View File

@ -0,0 +1,112 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.function;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.FieldCache;
import java.io.IOException;
/**
* Expert: obtains the ordinal of the field value from the default Lucene
* {@link org.apache.lucene.search.FieldCache FieldCache} using getStringIndex()
* and reverses the order.
* <p>
* The native lucene index order is used to assign an ordinal value for each field value.
* <p>
* Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1.
* <br>
* Example of reverse ordinal (rord):
* <br>If there were only three field values: "apple","banana","pear"
* <br>then rord("apple")=3, rord("banana")=2, ord("pear")=1
* <p>
* WARNING:
* rord() depends on the position in an index and can thus change
* when other documents are inserted or deleted,
* or if a MultiSearcher is used.
*
* <p><font color="#FF0000">
* WARNING: The status of the <b>search.function</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*
* @author yonik
*/
public class ReverseOrdFieldSource extends ValueSource {
public String field;
/**
* Contructor for a certain field.
* @param field field whose values reverse order is used.
*/
public ReverseOrdFieldSource(String field) {
this.field = field;
}
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
public String description() {
return "rord("+field+')';
}
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#getValues(org.apache.lucene.index.IndexReader) */
public DocValues getValues(IndexReader reader) throws IOException {
final FieldCache.StringIndex sindex = FieldCache.DEFAULT.getStringIndex(reader, field);
final int arr[] = sindex.order;
final int end = sindex.lookup.length;
return new DocValues(arr.length) {
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */
public float floatVal(int doc) {
return (float)(end - arr[doc]);
}
/* (non-Javadoc) @see org.apache.lucene.search.function.DocValues#intVal(int) */
public int intVal(int doc) {
return end - arr[doc];
}
/* (non-Javadoc) @see org.apache.lucene.search.function.DocValues#strVal(int) */
public String strVal(int doc) {
// the string value of the ordinal, not the string itself
return Integer.toString(intVal(doc));
}
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */
public String toString(int doc) {
return description() + '=' + strVal(doc);
}
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */
Object getInnerArray() {
return arr;
}
};
}
/*(non-Javadoc) @see java.lang.Object#equals(java.lang.Object) */
public boolean equals(Object o) {
if (o.getClass() != ReverseOrdFieldSource.class) return false;
ReverseOrdFieldSource other = (ReverseOrdFieldSource)o;
return this.field.equals(other.field);
}
private static final int hcode = ReverseOrdFieldSource.class.hashCode();
/*(non-Javadoc) @see java.lang.Object#hashCode() */
public int hashCode() {
return hcode + field.hashCode();
}
}

View File

@ -0,0 +1,105 @@
package org.apache.lucene.search.function;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.function.DocValues;
import java.io.IOException;
/**
* Expert: obtains short field values from the
* {@link org.apache.lucene.search.FieldCache FieldCache}
* using <code>getShorts()</code> and makes those values
* available as other numeric types, casting as needed.
*
* <p><font color="#FF0000">
* WARNING: The status of the <b>search.function</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*
* @see org.apache.lucene.search.function.FieldCacheSource for requirements
* on the field.
*/
public class ShortFieldSource extends FieldCacheSource {
private FieldCache.ShortParser parser;
/**
* Create a cached short field source with default string-to-short parser.
*/
public ShortFieldSource(String field) {
this(field, null);
}
/**
* Create a cached short field source with a specific string-to-short parser.
*/
public ShortFieldSource(String field, FieldCache.ShortParser parser) {
super(field);
this.parser = parser;
}
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
public String description() {
return "short(" + super.description() + ')';
}
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#getCachedValues(org.apache.lucene.search.FieldCache, java.lang.String, org.apache.lucene.index.IndexReader) */
public DocValues getCachedFieldValues (FieldCache cache, String field, IndexReader reader) throws IOException {
final short[] arr = (parser==null) ?
cache.getShorts(reader, field) :
cache.getShorts(reader, field, parser);
return new DocValues(reader.maxDoc()) {
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */
public float floatVal(int doc) {
return (float) arr[doc];
}
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#intVal(int) */
public int intVal(int doc) {
return arr[doc];
}
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */
public String toString(int doc) {
return description() + '=' + intVal(doc);
}
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */
Object getInnerArray() {
return arr;
}
};
}
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceEquals(org.apache.lucene.search.function.FieldCacheSource) */
public boolean cachedFieldSourceEquals(FieldCacheSource o) {
if (o.getClass() != ShortFieldSource.class) {
return false;
}
ShortFieldSource other = (ShortFieldSource)o;
return this.parser==null ?
other.parser==null :
this.parser.getClass() == other.parser.getClass();
}
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceHashCode() */
public int cachedFieldSourceHashCode() {
return parser==null ?
Short.class.hashCode() : parser.getClass().hashCode();
}
}

View File

@ -0,0 +1,74 @@
package org.apache.lucene.search.function;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.function.DocValues;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
import java.io.Serializable;
/**
* Expert: source of values for basic function queries.
* <P>At its default/simplest form, values - one per doc - are used as the score of that doc.
* <P>Values are instantiated as
* {@link org.apache.lucene.search.function.DocValues DocValues} for a particular reader.
* <P>ValueSource implementations differ in RAM requirements: it would always be a factor
* of the number of documents, but for each document the number of bytes can be 1, 2, 4, or 8.
*
* <p><font color="#FF0000">
* WARNING: The status of the <b>search.function</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*
* @author yonik
*/
public abstract class ValueSource implements Serializable {
/**
* Return the DocValues used by the function query.
* @param reader the IndexReader used to read these values.
* If any caching is involved, that caching would also be IndexReader based.
* @throws IOException for any error.
*/
public abstract DocValues getValues(IndexReader reader) throws IOException;
/**
* description of field, used in explain()
*/
public abstract String description();
/* (non-Javadoc) @see java.lang.Object#toString() */
public String toString() {
return description();
}
/**
* Needed for possible caching of query results - used by {@link ValueSourceQuery#equals(Object)}.
* @see Object#equals(Object)
*/
public abstract boolean equals(Object o);
/**
* Needed for possible caching of query results - used by {@link ValueSourceQuery#hashCode()}.
* @see Object#hashCode()
*/
public abstract int hashCode();
}

View File

@ -0,0 +1,201 @@
package org.apache.lucene.search.function;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.*;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
import java.util.Set;
/**
* Expert: A Query that sets the scores of document to the
* values obtained from a {@link org.apache.lucene.search.function.ValueSource ValueSource}.
* <p>
* The value source can be based on a (cached) value of an indexd field, but it
* can also be based on an external source, e.g. values read from an external database.
* <p>
* Score is set as: Score(doc,query) = query.getBoost()<sup>2</sup> * valueSource(doc).
*
* <p><font color="#FF0000">
* WARNING: The status of the <b>search.function</b> package is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*
* @author yonik
*/
public class ValueSourceQuery extends Query {
ValueSource valSrc;
/**
* Create a value source query
* @param valSrc provides the values defines the function to be used for scoring
*/
public ValueSourceQuery(ValueSource valSrc) {
this.valSrc=valSrc;
}
/*(non-Javadoc) @see org.apache.lucene.search.Query#rewrite(org.apache.lucene.index.IndexReader) */
public Query rewrite(IndexReader reader) throws IOException {
return this;
}
/*(non-Javadoc) @see org.apache.lucene.search.Query#extractTerms(java.util.Set) */
public void extractTerms(Set terms) {
// no terms involved here
}
private class ValueSourceWeight implements Weight {
Searcher searcher;
float queryNorm;
float queryWeight;
public ValueSourceWeight(Searcher searcher) {
this.searcher = searcher;
}
/*(non-Javadoc) @see org.apache.lucene.search.Weight#getQuery() */
public Query getQuery() {
return ValueSourceQuery.this;
}
/*(non-Javadoc) @see org.apache.lucene.search.Weight#getValue() */
public float getValue() {
return queryWeight;
}
/*(non-Javadoc) @see org.apache.lucene.search.Weight#sumOfSquaredWeights() */
public float sumOfSquaredWeights() throws IOException {
queryWeight = getBoost();
return queryWeight * queryWeight;
}
/*(non-Javadoc) @see org.apache.lucene.search.Weight#normalize(float) */
public void normalize(float norm) {
this.queryNorm = norm;
queryWeight *= this.queryNorm;
}
/*(non-Javadoc) @see org.apache.lucene.search.Weight#scorer(org.apache.lucene.index.IndexReader) */
public Scorer scorer(IndexReader reader) throws IOException {
return new ValueSourceScorer(getSimilarity(searcher), reader, this);
}
/*(non-Javadoc) @see org.apache.lucene.search.Weight#explain(org.apache.lucene.index.IndexReader, int) */
public Explanation explain(IndexReader reader, int doc) throws IOException {
return scorer(reader).explain(doc);
}
}
/**
* A scorer that (simply) matches all documents, and scores each document with
* the value of the value soure in effect. As an example, if the value source
* is a (cached) field source, then value of that field in that document will
* be used. (assuming field is indexed for this doc, with a single token.)
*/
private class ValueSourceScorer extends Scorer {
private final IndexReader reader;
private final ValueSourceWeight weight;
private final int maxDoc;
private final float qWeight;
private int doc=-1;
private final DocValues vals;
// constructor
private ValueSourceScorer(Similarity similarity, IndexReader reader, ValueSourceWeight w) throws IOException {
super(similarity);
this.weight = w;
this.qWeight = w.getValue();
this.reader = reader;
this.maxDoc = reader.maxDoc();
// this is when/where the values are first created.
vals = valSrc.getValues(reader);
}
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#next() */
public boolean next() throws IOException {
for(;;) {
++doc;
if (doc>=maxDoc) {
return false;
}
if (reader.isDeleted(doc)) {
continue;
}
return true;
}
}
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#doc()
*/
public int doc() {
return doc;
}
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#score() */
public float score() throws IOException {
return qWeight * vals.floatVal(doc);
}
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#skipTo(int) */
public boolean skipTo(int target) throws IOException {
doc=target-1;
return next();
}
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#explain(int) */
public Explanation explain(int doc) throws IOException {
float sc = qWeight * vals.floatVal(doc);
Explanation result = new ComplexExplanation(
true, sc, ValueSourceQuery.this.toString() + ", product of:");
result.addDetail(vals.explain(doc));
result.addDetail(new Explanation(getBoost(), "boost"));
result.addDetail(new Explanation(weight.queryNorm,"queryNorm"));
return result;
}
}
/*(non-Javadoc) @see org.apache.lucene.search.Query#createWeight(org.apache.lucene.search.Searcher) */
protected Weight createWeight(Searcher searcher) {
return new ValueSourceQuery.ValueSourceWeight(searcher);
}
/* (non-Javadoc) @see org.apache.lucene.search.Query#toString(java.lang.String) */
public String toString(String field) {
return valSrc.toString() + ToStringUtils.boost(getBoost());
}
/** Returns true if <code>o</code> is equal to this. */
public boolean equals(Object o) {
if (getClass() != o.getClass()) {
return false;
}
ValueSourceQuery other = (ValueSourceQuery)o;
return this.getBoost() == other.getBoost()
&& this.valSrc.equals(other.valSrc);
}
/** Returns a hash code value for this object. */
public int hashCode() {
return (getClass().hashCode() + valSrc.hashCode()) ^ Float.floatToIntBits(getBoost());
}
}

View File

@ -0,0 +1,197 @@
<HTML>
<!--
/**
* Copyright 2005 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-->
<HEAD>
<TITLE>org.apache.lucene.search.function</TITLE>
</HEAD>
<BODY>
<DIV>
Programmatic control over documents scores.
</DIV>
<DIV>
The <code>function</code> package provides tight control over documents scores.
</DIV>
<DIV>
<font color="#FF0000">
WARNING: The status of the <b>search.function</b> package is experimental. The APIs
introduced here might change in the future and will not be supported anymore
in such a case.
</font>
</DIV>
<DIV>
Two types of queries are available in this package:
</DIV>
<DIV>
<ol>
<li>
<b>Custom Score queries</b> - allowing to set the score
of a matching document as a mathematical expression over scores
of that document by contained (sub) queries.
</li>
<li>
<b>Field score queries</b> - allowing to base the score of a
document on <b>numeric values</b> of <b>indexed fields</b>.
</li>
</ol>
</DIV>
<DIV>&nbsp;</DIV>
<DIV>
<b>Some possible uses of these queries:</b>
</DIV>
<DIV>
<ol>
<li>
Normalizing the document scores by values indexed in a special field -
for instance, experimenting with a different doc length normalization.
</li>
<li>
Introducing some static scoring element, to the score of a document, -
for instance using some topological attribute of the links to/from a document.
</li>
<li>
Computing the score of a matching document as an arbitrary odd function of
its score by a certain query.
</li>
</ol>
</DIV>
<DIV>
<b>Performance and Quality Considerations:</b>
</DIV>
<DIV>
<ol>
<li>
When scoring by values of indexed fields,
these values are loaded into memory.
Unlike the regular scoring, where the required information is read from
disk as necessary, here field values are loaded once and cached by Lucene in memory
for further use, anticipating reuse by further queries. While all this is carefully
cached with performance in mind, it is recommended to
use these features only when the default Lucene scoring does
not match your "special" application needs.
</li>
<li>
Use only with carefully selected fields, because in most cases,
search quality with regular Lucene scoring
would outperform that of scoring by field values.
</li>
<li>
Values of fields used for scoring should match.
Do not apply on a field containing arbitrary (long) text.
Do not mix values in the same field if that field is used for scoring.
</li>
<li>
Smaller (shorter) field tokens means less RAM (something always desired).
When using <a href=FieldScoreQuery.html>FieldScoreQuery</a>,
select the shortest <a href=FieldScoreQuery.html#Type>FieldScoreQuery.Type</a>
that is sufficient for the used field values.
</li>
<li>
Reusing IndexReaders/IndexSearchers is essential, because the caching of field tokens
is based on an IndexReader. Whenever a new IndexReader is used, values currently in the cache
cannot be used and new values must be loaded from disk. So replace/refresh readers/searchers in
a controlled manner.
</li>
</ol>
</DIV>
<DIV>
<b>History and Credits:</b>
<ul>
<li>
A large part of the code of this package was originated from Yonik's FunctionQuery code that was
imported from <a href="http://lucene.apache.org/solr">Solr</a>
(see <a href="http://issues.apache.org/jira/browse/LUCENE-446">LUCENE-446</a>).
</li>
<li>
The idea behind CustomScoreQurey is borrowed from
the "Easily create queries that transform sub-query scores arbitrarily" contribution by Mike Klaas
(see <a href="http://issues.apache.org/jira/browse/LUCENE-850">LUCENE-850</a>)
though the implementation and API here are different.
</li>
</ul>
</DIV>
<DIV>
<b>Code sample:</b>
<P>
Note: code snippets here should work, but they were never really compiled... so,
tests sources under TestCustomScoreQuery, TestFieldScoreQuery and TestOrdValues
may also be useful.
<ol>
<li>
Using field (byte) values to as scores:
<p>
Indexing:
<pre>
f = new Field("score", "7", Field.Store.NO, Field.Index.UN_TOKENIZED);
f.setOmitNorms(true);
d1.add(f);
</pre>
<p>
Search:
<pre>
Query q = new FieldScoreQuery("score", FieldScoreQuery.Type.BYTE);
</pre>
Document d1 above would get a score of 7.
</li>
<p>
<li>
Manipulating scores
<p>
Dividing the original score of each document by a square root of its docid
(just to demonstrate what it takes to manipulate scores this way)
<pre>
Query q = queryParser.parse("my query text");
CustomScoreQuery customQ = new CustomScoreQuery(q) {
public float customScore(int doc, float subQueryScore, float valSrcScore) {
return subQueryScore / Math.sqrt(docid);
}
};
</pre>
<p>
For more informative debug info on the custom query, also override the name() method:
<pre>
CustomScoreQuery customQ = new CustomScoreQuery(q) {
public float customScore(int doc, float subQueryScore, float valSrcScore) {
return subQueryScore / Math.sqrt(docid);
}
public String name() {
return "1/sqrt(docid)";
}
};
</pre>
<p>
Taking the square root of the original score and multiplying it by a "short field driven score", ie, the
short value that was indexed for the scored doc in a certain field:
<pre>
Query q = queryParser.parse("my query text");
FieldScoreQuery qf = new FieldScoreQuery("shortScore", FieldScoreQuery.Type.SHORT);
CustomScoreQuery customQ = new CustomScoreQuery(q,qf) {
public float customScore(int doc, float subQueryScore, float valSrcScore) {
return Math.sqrt(subQueryScore) * valSrcScore;
}
public String name() {
return "shortVal*sqrt(score)";
}
};
</pre>
</li>
</ol>
</DIV>
</BODY>
</HTML>

View File

@ -18,9 +18,11 @@ package org.apache.lucene.util;
*/ */
public class ToStringUtils { public class ToStringUtils {
/** for printing boost only if not 1.0 */
public static String boost(float boost) { public static String boost(float boost) {
if (boost != 1.0f) { if (boost != 1.0f) {
return "^" + Float.toString(boost); return "^" + Float.toString(boost);
} else return ""; } else return "";
} }
} }

View File

@ -0,0 +1,152 @@
package org.apache.lucene.search.function;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import junit.framework.TestCase;
/**
* Setup for function tests
*/
public abstract class FunctionTestSetup extends TestCase {
/**
* Actual score computation order is slightly different than assumptios
* this allows for a small amount of variation
*/
public static float TEST_SCORE_TOLERANCE_DELTA = 0.00005f;
protected static final boolean DBG = false; // change to true for logging to print
protected static final int N_DOCS = 17; // select a primary number > 2
protected static final String ID_FIELD = "id";
protected static final String TEXT_FIELD = "text";
protected static final String INT_FIELD = "iii";
protected static final String FLOAT_FIELD = "fff";
private static final String DOC_TEXT_LINES[] = {
// from a public first aid info at http://firstaid.ie.eu.org
"Well it may be a little dramatic but sometimes it true. ",
"If you call the emergency medical services to an incident, ",
"your actions have started the chain of survival. ",
"You have acted to help someone you may not even know. ",
"First aid is helping, first aid is making that call, ",
"putting a Band-Aid on a small wound, controlling bleeding in large ",
"wounds or providing CPR for a collapsed person whose not breathing ",
"and heart has stopped beating. You can help yourself, your loved ",
"ones and the stranger whose life may depend on you being in the ",
"right place at the right time with the right knowledge.",
};
protected Directory dir;
protected Analyzer anlzr;
/* @override constructor */
public FunctionTestSetup(String name) {
super(name);
}
/* @override */
protected void tearDown() throws Exception {
super.tearDown();
dir = null;
anlzr = null;
}
/* @override */
protected void setUp() throws Exception {
// prepare a small index with just a few documents.
super.setUp();
dir = new RAMDirectory();
anlzr = new StandardAnalyzer();
IndexWriter iw = new IndexWriter(dir,anlzr);
// add docs not exactly in natural ID order, to verify we do check the order of docs by scores
int remaining = N_DOCS;
boolean done[] = new boolean[N_DOCS];
int i = 0;
while (remaining>0) {
if (done[i]) {
throw new Exception("to set this test correctly N_DOCS="+N_DOCS+" must be primary and greater than 2!");
}
addDoc(iw,i);
done[i] = true;
i = (i+4)%N_DOCS;
remaining --;
}
iw.close();
}
private void addDoc(IndexWriter iw, int i) throws Exception {
Document d = new Document();
Fieldable f;
int scoreAndID = i+1;
f = new Field(ID_FIELD,id2String(scoreAndID),Field.Store.YES,Field.Index.UN_TOKENIZED); // for debug purposes
f.setOmitNorms(true);
d.add(f);
f = new Field(TEXT_FIELD,"text of doc"+scoreAndID+textLine(i),Field.Store.NO,Field.Index.TOKENIZED); // for regular search
f.setOmitNorms(true);
d.add(f);
f = new Field(INT_FIELD,""+scoreAndID,Field.Store.NO,Field.Index.UN_TOKENIZED); // for function scoring
f.setOmitNorms(true);
d.add(f);
f = new Field(FLOAT_FIELD,scoreAndID+".000",Field.Store.NO,Field.Index.UN_TOKENIZED); // for function scoring
f.setOmitNorms(true);
d.add(f);
iw.addDocument(d);
log("added: "+d);
}
// 17 --> ID00017
protected String id2String(int scoreAndID) {
String s = "000000000"+scoreAndID;
int n = (""+N_DOCS).length() + 3;
int k = s.length() - n;
return "ID"+s.substring(k);
}
// some text line for regular search
private String textLine(int docNum) {
return DOC_TEXT_LINES[docNum % DOC_TEXT_LINES.length];
}
// extract expected doc score from its ID Field: "ID7" --> 7.0
protected float expectedFieldScore(String docIDFieldVal) {
return Float.parseFloat(docIDFieldVal.substring(2));
}
// debug messages (change DBG to true for anything to print)
protected void log (Object o) {
if (DBG) {
System.out.println(o.toString());
}
}
}

View File

@ -0,0 +1,240 @@
package org.apache.lucene.search.function;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.TopDocs;
/**
* Test CustomScoreQuery search.
*/
public class TestCustomScoreQuery extends FunctionTestSetup {
/* @override constructor */
public TestCustomScoreQuery(String name) {
super(name);
}
/* @override */
protected void tearDown() throws Exception {
super.tearDown();
}
/* @override */
protected void setUp() throws Exception {
// prepare a small index with just a few documents.
super.setUp();
}
/** Test that CustomScoreQuery of Type.BYTE returns the expected scores. */
public void testCustomScoreByte () throws CorruptIndexException, Exception {
// INT field values are small enough to be parsed as byte
doTestCustomScore(INT_FIELD,FieldScoreQuery.Type.BYTE,1.0);
doTestCustomScore(INT_FIELD,FieldScoreQuery.Type.BYTE,2.0);
}
/** Test that CustomScoreQuery of Type.SHORT returns the expected scores. */
public void testCustomScoreShort () throws CorruptIndexException, Exception {
// INT field values are small enough to be parsed as short
doTestCustomScore(INT_FIELD,FieldScoreQuery.Type.SHORT,1.0);
doTestCustomScore(INT_FIELD,FieldScoreQuery.Type.SHORT,3.0);
}
/** Test that CustomScoreQuery of Type.INT returns the expected scores. */
public void testCustomScoreInt () throws CorruptIndexException, Exception {
doTestCustomScore(INT_FIELD,FieldScoreQuery.Type.INT,1.0);
doTestCustomScore(INT_FIELD,FieldScoreQuery.Type.INT,4.0);
}
/** Test that CustomScoreQuery of Type.FLOAT returns the expected scores. */
public void testCustomScoreFloat () throws CorruptIndexException, Exception {
// INT field can be parsed as float
doTestCustomScore(INT_FIELD,FieldScoreQuery.Type.FLOAT,1.0);
doTestCustomScore(INT_FIELD,FieldScoreQuery.Type.FLOAT,5.0);
// same values, but in flot format
doTestCustomScore(FLOAT_FIELD,FieldScoreQuery.Type.FLOAT,1.0);
doTestCustomScore(FLOAT_FIELD,FieldScoreQuery.Type.FLOAT,6.0);
}
// Test that FieldScoreQuery returns docs with expected score.
private void doTestCustomScore (String field, FieldScoreQuery.Type tp, double dboost) throws CorruptIndexException, Exception {
float boost = (float) dboost;
IndexSearcher s = new IndexSearcher(dir);
FieldScoreQuery qValSrc = new FieldScoreQuery(field,tp); // a query that would score by the field
QueryParser qp = new QueryParser(TEXT_FIELD,anlzr);
String qtxt = "bleeding person chain knowledge"; // from the doc texts in FunctionQuerySetup.
// regular (boolean) query.
Query q1 = qp.parse(qtxt);
log(q1);
// custom query, that should score the same as q1.
CustomScoreQuery q2CustomNeutral = new CustomScoreQuery(q1);
q2CustomNeutral.setBoost(boost);
log(q2CustomNeutral);
// custom query, that should (by default) multiply the scores of q1 by that of the field
CustomScoreQuery q3CustomMul = new CustomScoreQuery(q1,qValSrc);
q3CustomMul.setStrict(true);
q3CustomMul.setBoost(boost);
log(q3CustomMul);
// custom query, that should add the scores of q1 to that of the field
CustomScoreQuery q4CustomAdd = new CustomScoreQuery(q1,qValSrc) {
/*(non-Javadoc) @see org.apache.lucene.search.function.CustomScoreQuery#name() */
public String name() {
return "customAdd";
}
/*(non-Javadoc) @see org.apache.lucene.search.function.CustomScoreQuery#customScore(int, float, float) */
public float customScore(int doc, float subQueryScore, float valSrcScore) {
return subQueryScore + valSrcScore;
}
/* (non-Javadoc)@see org.apache.lucene.search.function.CustomScoreQuery#customExplain(int, org.apache.lucene.search.Explanation, org.apache.lucene.search.Explanation)*/
public Explanation customExplain(int doc, Explanation subQueryExpl, Explanation valSrcExpl) {
float valSrcScore = valSrcExpl==null ? 0 : valSrcExpl.getValue();
Explanation exp = new Explanation( valSrcScore + subQueryExpl.getValue(), "custom score: sum of:");
exp.addDetail(subQueryExpl);
if (valSrcExpl != null) {
exp.addDetail(valSrcExpl);
}
return exp;
}
};
q4CustomAdd.setStrict(true);
q4CustomAdd.setBoost(boost);
log(q4CustomAdd);
// custom query, that multiplies and adds the field score to that of q1
CustomScoreQuery q5CustomMulAdd = new CustomScoreQuery(q1,qValSrc) {
/*(non-Javadoc) @see org.apache.lucene.search.function.CustomScoreQuery#name() */
public String name() {
return "customMulAdd";
}
/*(non-Javadoc) @see org.apache.lucene.search.function.CustomScoreQuery#customScore(int, float, float) */
public float customScore(int doc, float subQueryScore, float valSrcScore) {
return (1 + subQueryScore) * valSrcScore;
}
/* (non-Javadoc)@see org.apache.lucene.search.function.CustomScoreQuery#customExplain(int, org.apache.lucene.search.Explanation, org.apache.lucene.search.Explanation)*/
public Explanation customExplain(int doc, Explanation subQueryExpl, Explanation valSrcExpl) {
Explanation exp = new Explanation(1 + subQueryExpl.getValue(), "sum of:");
exp.addDetail(subQueryExpl);
exp.addDetail(new Explanation(1,"const 1"));
if (valSrcExpl == null) {
exp.setDescription("CustomMulAdd, sum of:");
return exp;
}
Explanation exp2 = new Explanation(valSrcExpl.getValue() * exp.getValue(), "custom score: product of:");
exp2.addDetail(valSrcExpl);
exp2.addDetail(exp);
return exp2;
}
};
q5CustomMulAdd.setStrict(true);
q5CustomMulAdd.setBoost(boost);
log(q5CustomMulAdd);
// do al the searches
TopDocs td1 = s.search(q1,null,1000);
TopDocs td2CustomNeutral = s.search(q2CustomNeutral,null,1000);
TopDocs td3CustomMul = s.search(q3CustomMul,null,1000);
TopDocs td4CustomAdd = s.search(q4CustomAdd,null,1000);
TopDocs td5CustomMulAdd = s.search(q5CustomMulAdd,null,1000);
// put results in map so we can verify the scores although they have changed
HashMap h1 = topDocsToMap(td1);
HashMap h2CustomNeutral = topDocsToMap(td2CustomNeutral);
HashMap h3CustomMul = topDocsToMap(td3CustomMul);
HashMap h4CustomAdd = topDocsToMap(td4CustomAdd);
HashMap h5CustomMulAdd = topDocsToMap(td5CustomMulAdd);
verifyResults(boost, s,
h1, h2CustomNeutral, h3CustomMul, h4CustomAdd, h5CustomMulAdd,
q1, q2CustomNeutral, q3CustomMul, q4CustomAdd, q5CustomMulAdd);
}
// verify results are as expected.
private void verifyResults(float boost, IndexSearcher s,
HashMap h1, HashMap h2customNeutral, HashMap h3CustomMul, HashMap h4CustomAdd, HashMap h5CustomMulAdd,
Query q1, Query q2, Query q3, Query q4, Query q5) throws Exception {
// verify numbers of matches
log("#hits = "+h1.size());
assertEquals("queries should have same #hits",h1.size(),h2customNeutral.size());
assertEquals("queries should have same #hits",h1.size(),h3CustomMul.size());
assertEquals("queries should have same #hits",h1.size(),h4CustomAdd.size());
assertEquals("queries should have same #hits",h1.size(),h5CustomMulAdd.size());
// verify scores ratios
for (Iterator it = h1.keySet().iterator(); it.hasNext();) {
Integer x = (Integer) it.next();
int doc = x.intValue();
log("doc = "+doc);
float fieldScore = expectedFieldScore(s.getIndexReader().document(doc).get(ID_FIELD));
log("fieldScore = "+fieldScore);
assertTrue("fieldScore should not be 0",fieldScore>0);
float score1 = ((Float)h1.get(x)).floatValue();
logResult("score1=", s, q1, doc, score1);
float score2 = ((Float)h2customNeutral.get(x)).floatValue();
logResult("score2=", s, q2, doc, score2);
assertEquals("same score (just boosted) for neutral", boost * score1, score2, TEST_SCORE_TOLERANCE_DELTA);
float score3 = ((Float)h3CustomMul.get(x)).floatValue();
logResult("score3=", s, q3, doc, score3);
assertEquals("new score for custom mul", boost * fieldScore * score1, score3, TEST_SCORE_TOLERANCE_DELTA);
float score4 = ((Float)h4CustomAdd.get(x)).floatValue();
logResult("score4=", s, q4, doc, score4);
assertEquals("new score for custom add", boost * (fieldScore + score1), score4, TEST_SCORE_TOLERANCE_DELTA);
float score5 = ((Float)h5CustomMulAdd.get(x)).floatValue();
logResult("score5=", s, q5, doc, score5);
assertEquals("new score for custom mul add", boost * fieldScore * (score1 + 1), score5, TEST_SCORE_TOLERANCE_DELTA);
}
}
private void logResult(String msg, IndexSearcher s, Query q, int doc, float score1) throws IOException {
QueryUtils.check(q,s);
log(msg+" "+score1);
log("Explain by: "+q);
log(s.explain(q,doc));
}
// since custom scoring modifies the order of docs, map results
// by doc ids so that we can later compare/verify them
private HashMap topDocsToMap(TopDocs td) {
HashMap h = new HashMap();
for (int i=0; i<td.totalHits; i++) {
h.put(new Integer(td.scoreDocs[i].doc), new Float(td.scoreDocs[i].score));
}
return h;
}
}

View File

@ -0,0 +1,203 @@
package org.apache.lucene.search.function;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.HashMap;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
/**
* Test FieldScoreQuery search.
* <p>
* Tests here create an index with a few documents, each having
* an int value indexed field and a float value indexed field.
* The values of these fields are later used for scoring.
* <p>
* The rank tests use Hits to verify that docs are ordered (by score) as expected.
* <p>
* The exact score tests use TopDocs top to verify the exact score.
*/
public class TestFieldScoreQuery extends FunctionTestSetup {
/* @override constructor */
public TestFieldScoreQuery(String name) {
super(name);
}
/* @override */
protected void tearDown() throws Exception {
super.tearDown();
}
/* @override */
protected void setUp() throws Exception {
// prepare a small index with just a few documents.
super.setUp();
}
/** Test that FieldScoreQuery of Type.BYTE returns docs in expected order. */
public void testRankByte () throws CorruptIndexException, Exception {
// INT field values are small enough to be parsed as byte
doTestRank(INT_FIELD,FieldScoreQuery.Type.BYTE);
}
/** Test that FieldScoreQuery of Type.SHORT returns docs in expected order. */
public void testRankShort () throws CorruptIndexException, Exception {
// INT field values are small enough to be parsed as short
doTestRank(INT_FIELD,FieldScoreQuery.Type.SHORT);
}
/** Test that FieldScoreQuery of Type.INT returns docs in expected order. */
public void testRankInt () throws CorruptIndexException, Exception {
doTestRank(INT_FIELD,FieldScoreQuery.Type.INT);
}
/** Test that FieldScoreQuery of Type.FLOAT returns docs in expected order. */
public void testRankFloat () throws CorruptIndexException, Exception {
// INT field can be parsed as float
doTestRank(INT_FIELD,FieldScoreQuery.Type.FLOAT);
// same values, but in flot format
doTestRank(FLOAT_FIELD,FieldScoreQuery.Type.FLOAT);
}
// Test that FieldScoreQuery returns docs in expected order.
private void doTestRank (String field, FieldScoreQuery.Type tp) throws CorruptIndexException, Exception {
IndexSearcher s = new IndexSearcher(dir);
Query q = new FieldScoreQuery(field,tp);
log("test: "+q);
QueryUtils.check(q,s);
Hits h = s.search(q);
assertEquals("All docs should be matched!",N_DOCS,h.length());
String prevID = "ID"+(N_DOCS+1); // greater than all ids of docs in this test
for (int i=0; i<h.length(); i++) {
String resID = h.doc(i).get(ID_FIELD);
log(i+". score="+h.score(i)+" - "+resID);
log(s.explain(q,h.id(i)));
assertTrue("res id "+resID+" should be < prev res id "+prevID, resID.compareTo(prevID)<0);
prevID = resID;
}
}
/** Test that FieldScoreQuery of Type.BYTE returns the expected scores. */
public void testExactScoreByte () throws CorruptIndexException, Exception {
// INT field values are small enough to be parsed as byte
doTestExactScore(INT_FIELD,FieldScoreQuery.Type.BYTE);
}
/** Test that FieldScoreQuery of Type.SHORT returns the expected scores. */
public void testExactScoreShort () throws CorruptIndexException, Exception {
// INT field values are small enough to be parsed as short
doTestExactScore(INT_FIELD,FieldScoreQuery.Type.SHORT);
}
/** Test that FieldScoreQuery of Type.INT returns the expected scores. */
public void testExactScoreInt () throws CorruptIndexException, Exception {
doTestExactScore(INT_FIELD,FieldScoreQuery.Type.INT);
}
/** Test that FieldScoreQuery of Type.FLOAT returns the expected scores. */
public void testExactScoreFloat () throws CorruptIndexException, Exception {
// INT field can be parsed as float
doTestExactScore(INT_FIELD,FieldScoreQuery.Type.FLOAT);
// same values, but in flot format
doTestExactScore(FLOAT_FIELD,FieldScoreQuery.Type.FLOAT);
}
// Test that FieldScoreQuery returns docs with expected score.
private void doTestExactScore (String field, FieldScoreQuery.Type tp) throws CorruptIndexException, Exception {
IndexSearcher s = new IndexSearcher(dir);
Query q = new FieldScoreQuery(field,tp);
TopDocs td = s.search(q,null,1000);
assertEquals("All docs should be matched!",N_DOCS,td.totalHits);
ScoreDoc sd[] = td.scoreDocs;
for (int i=0; i<sd.length; i++) {
float score = sd[i].score;
log(s.explain(q,sd[i].doc));
String id = s.getIndexReader().document(sd[i].doc).get(ID_FIELD);
float expectedScore = expectedFieldScore(id); // "ID7" --> 7.0
assertEquals("score of "+id+" shuould be "+expectedScore+" != "+score, expectedScore, score, TEST_SCORE_TOLERANCE_DELTA);
}
}
/** Test that FieldScoreQuery of Type.BYTE caches/reuses loaded values and consumes the proper RAM resources. */
public void testCachingByte () throws CorruptIndexException, Exception {
// INT field values are small enough to be parsed as byte
doTestCaching(INT_FIELD,FieldScoreQuery.Type.BYTE);
}
/** Test that FieldScoreQuery of Type.SHORT caches/reuses loaded values and consumes the proper RAM resources. */
public void testCachingShort () throws CorruptIndexException, Exception {
// INT field values are small enough to be parsed as short
doTestCaching(INT_FIELD,FieldScoreQuery.Type.SHORT);
}
/** Test that FieldScoreQuery of Type.INT caches/reuses loaded values and consumes the proper RAM resources. */
public void testCachingInt () throws CorruptIndexException, Exception {
doTestCaching(INT_FIELD,FieldScoreQuery.Type.INT);
}
/** Test that FieldScoreQuery of Type.FLOAT caches/reuses loaded values and consumes the proper RAM resources. */
public void testCachingFloat () throws CorruptIndexException, Exception {
// INT field values can be parsed as float
doTestCaching(INT_FIELD,FieldScoreQuery.Type.FLOAT);
// same values, but in flot format
doTestCaching(FLOAT_FIELD,FieldScoreQuery.Type.FLOAT);
}
// Test that values loaded for FieldScoreQuery are cached properly and consumes the proper RAM resources.
private void doTestCaching (String field, FieldScoreQuery.Type tp) throws CorruptIndexException, Exception {
// prepare expected array types for comparison
HashMap expectedArrayTypes = new HashMap();
expectedArrayTypes.put(FieldScoreQuery.Type.BYTE, new byte[0]);
expectedArrayTypes.put(FieldScoreQuery.Type.SHORT, new short[0]);
expectedArrayTypes.put(FieldScoreQuery.Type.INT, new int[0]);
expectedArrayTypes.put(FieldScoreQuery.Type.FLOAT, new float[0]);
IndexSearcher s = new IndexSearcher(dir);
Object innerArray = null;
for (int i=0; i<10; i++) {
FieldScoreQuery q = new FieldScoreQuery(field,tp);
Hits h = s.search(q);
assertEquals("All docs should be matched!",N_DOCS,h.length());
if (i==0) {
innerArray = q.valSrc.getValues(s.getIndexReader()).getInnerArray();
log(i+". compare: "+innerArray.getClass()+" to "+expectedArrayTypes.get(tp).getClass());
assertEquals("field values should be cached in the correct array type!", innerArray.getClass(),expectedArrayTypes.get(tp).getClass());
} else {
log(i+". compare: "+innerArray+" to "+q.valSrc.getValues(s.getIndexReader()).getInnerArray());
assertSame("field values should be cached and reused!", innerArray, q.valSrc.getValues(s.getIndexReader()).getInnerArray());
}
}
// verify new values are reloaded (not reused) for a new reader
s = new IndexSearcher(dir);
FieldScoreQuery q = new FieldScoreQuery(field,tp);
Hits h = s.search(q);
assertEquals("All docs should be matched!",N_DOCS,h.length());
log("compare: "+innerArray+" to "+q.valSrc.getValues(s.getIndexReader()).getInnerArray());
assertNotSame("cached field values should not be reused if reader as changed!", innerArray, q.valSrc.getValues(s.getIndexReader()).getInnerArray());
}
}

View File

@ -0,0 +1,202 @@
package org.apache.lucene.search.function;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
/**
* Test search based on OrdFieldSource and ReverseOrdFieldSource.
* <p>
* Tests here create an index with a few documents, each having
* an indexed "id" field.
* The ord values of this field are later used for scoring.
* <p>
* The order tests use Hits to verify that docs are ordered as expected.
* <p>
* The exact score tests use TopDocs top to verify the exact score.
*/
public class TestOrdValues extends FunctionTestSetup {
/* @override constructor */
public TestOrdValues(String name) {
super(name);
}
/* @override */
protected void tearDown() throws Exception {
super.tearDown();
}
/* @override */
protected void setUp() throws Exception {
// prepare a small index with just a few documents.
super.setUp();
}
/** Test OrdFieldSource */
public void testOrdFieldRank () throws CorruptIndexException, Exception {
doTestRank(ID_FIELD,true);
}
/** Test ReverseOrdFieldSource */
public void testReverseOrdFieldRank () throws CorruptIndexException, Exception {
doTestRank(ID_FIELD,false);
}
// Test that queries based on reverse/ordFieldScore scores correctly
private void doTestRank (String field, boolean inOrder) throws CorruptIndexException, Exception {
IndexSearcher s = new IndexSearcher(dir);
ValueSource vs;
if (inOrder) {
vs = new OrdFieldSource(field);
} else {
vs = new ReverseOrdFieldSource(field);
}
Query q = new ValueSourceQuery(vs);
log("test: "+q);
QueryUtils.check(q,s);
Hits h = s.search(q);
assertEquals("All docs should be matched!",N_DOCS,h.length());
String prevID = inOrder
? "IE" // greater than all ids of docs in this test ("ID0001", etc.)
: "IC"; // smaller than all ids of docs in this test ("ID0001", etc.)
for (int i=0; i<h.length(); i++) {
String resID = h.doc(i).get(ID_FIELD);
log(i+". score="+h.score(i)+" - "+resID);
log(s.explain(q,h.id(i)));
if (inOrder) {
assertTrue("res id "+resID+" should be < prev res id "+prevID, resID.compareTo(prevID)<0);
} else {
assertTrue("res id "+resID+" should be > prev res id "+prevID, resID.compareTo(prevID)>0);
}
prevID = resID;
}
}
/** Test exact score for OrdFieldSource */
public void testOrdFieldExactScore () throws CorruptIndexException, Exception {
doTestExactScore(ID_FIELD,true);
}
/** Test exact score for ReverseOrdFieldSource */
public void testReverseOrdFieldExactScore () throws CorruptIndexException, Exception {
doTestExactScore(ID_FIELD,false);
}
// Test that queries based on reverse/ordFieldScore returns docs with expected score.
private void doTestExactScore (String field, boolean inOrder) throws CorruptIndexException, Exception {
IndexSearcher s = new IndexSearcher(dir);
ValueSource vs;
if (inOrder) {
vs = new OrdFieldSource(field);
} else {
vs = new ReverseOrdFieldSource(field);
}
Query q = new ValueSourceQuery(vs);
TopDocs td = s.search(q,null,1000);
assertEquals("All docs should be matched!",N_DOCS,td.totalHits);
ScoreDoc sd[] = td.scoreDocs;
for (int i=0; i<sd.length; i++) {
float score = sd[i].score;
String id = s.getIndexReader().document(sd[i].doc).get(ID_FIELD);
log("-------- "+i+". Explain doc "+id);
log(s.explain(q,sd[i].doc));
float expectedScore = N_DOCS-i;
assertEquals("score of result "+i+" shuould be "+expectedScore+" != "+score, expectedScore, score, TEST_SCORE_TOLERANCE_DELTA);
String expectedId = inOrder
? id2String(N_DOCS-i) // in-order ==> larger values first
: id2String(i+1); // reverse ==> smaller values first
assertTrue("id of result "+i+" shuould be "+expectedId+" != "+score, expectedId.equals(id));
}
}
/** Test caching OrdFieldSource */
public void testCachingOrd () throws CorruptIndexException, Exception {
doTestCaching(ID_FIELD,true);
}
/** Test caching for ReverseOrdFieldSource */
public void tesCachingReverseOrd () throws CorruptIndexException, Exception {
doTestCaching(ID_FIELD,false);
}
// Test that values loaded for FieldScoreQuery are cached properly and consumes the proper RAM resources.
private void doTestCaching (String field, boolean inOrder) throws CorruptIndexException, Exception {
IndexSearcher s = new IndexSearcher(dir);
Object innerArray = null;
for (int i=0; i<10; i++) {
ValueSource vs;
if (inOrder) {
vs = new OrdFieldSource(field);
} else {
vs = new ReverseOrdFieldSource(field);
}
ValueSourceQuery q = new ValueSourceQuery(vs);
Hits h = s.search(q);
assertEquals("All docs should be matched!",N_DOCS,h.length());
if (i==0) {
innerArray = q.valSrc.getValues(s.getIndexReader()).getInnerArray();
} else {
log(i+". compare: "+innerArray+" to "+q.valSrc.getValues(s.getIndexReader()).getInnerArray());
assertSame("field values should be cached and reused!", innerArray, q.valSrc.getValues(s.getIndexReader()).getInnerArray());
}
}
ValueSource vs;
ValueSourceQuery q;
Hits h;
// verify that different values are loaded for a different field
String field2 = INT_FIELD;
assertFalse(field.equals(field2)); // otherwise this test is meaningless.
if (inOrder) {
vs = new OrdFieldSource(field2);
} else {
vs = new ReverseOrdFieldSource(field2);
}
q = new ValueSourceQuery(vs);
h = s.search(q);
assertEquals("All docs should be matched!",N_DOCS,h.length());
log("compare (should differ): "+innerArray+" to "+q.valSrc.getValues(s.getIndexReader()).getInnerArray());
assertNotSame("different values shuold be loaded for a different field!", innerArray, q.valSrc.getValues(s.getIndexReader()).getInnerArray());
// verify new values are reloaded (not reused) for a new reader
s = new IndexSearcher(dir);
if (inOrder) {
vs = new OrdFieldSource(field);
} else {
vs = new ReverseOrdFieldSource(field);
}
q = new ValueSourceQuery(vs);
h = s.search(q);
assertEquals("All docs should be matched!",N_DOCS,h.length());
log("compare (should differ): "+innerArray+" to "+q.valSrc.getValues(s.getIndexReader()).getInnerArray());
assertNotSame("cached field values should not be reused if reader as changed!", innerArray, q.valSrc.getValues(s.getIndexReader()).getInnerArray());
}
}