mirror of https://github.com/apache/lucene.git
LUCENE-446: Added Solr's search.function for scores based on field
values, plus CustomScoreQuery for simple score (post) customization. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@544546 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
db78c85c21
commit
125fed32d8
|
@ -186,6 +186,10 @@ New features
|
|||
on the remote side of the RMI connection.
|
||||
(Matt Ericson via Otis Gospodnetic)
|
||||
|
||||
8. LUCENE-446: Added Solr's search.function for scores based on field
|
||||
values, plus CustomScoreQuery for simple score (post) customization.
|
||||
(Yonik Seeley, Doron Cohen)
|
||||
|
||||
Optimizations
|
||||
|
||||
1. LUCENE-761: The proxStream is now cloned lazily in SegmentTermPositions
|
||||
|
|
|
@ -53,6 +53,22 @@ public interface FieldCache {
|
|||
}
|
||||
}
|
||||
|
||||
/** Interface to parse bytes from document fields.
|
||||
* @see FieldCache#getBytes(IndexReader, String, FieldCache.ByteParser)
|
||||
*/
|
||||
public interface ByteParser {
|
||||
/** Return a single Byte representation of this field's value. */
|
||||
public byte parseByte(String string);
|
||||
}
|
||||
|
||||
/** Interface to parse shorts from document fields.
|
||||
* @see FieldCache#getShorts(IndexReader, String, FieldCache.ShortParser)
|
||||
*/
|
||||
public interface ShortParser {
|
||||
/** Return a short representation of this field's value. */
|
||||
public short parseShort(String string);
|
||||
}
|
||||
|
||||
/** Interface to parse ints from document fields.
|
||||
* @see FieldCache#getInts(IndexReader, String, FieldCache.IntParser)
|
||||
*/
|
||||
|
@ -72,6 +88,56 @@ public interface FieldCache {
|
|||
/** Expert: The cache used internally by sorting and range query classes. */
|
||||
public static FieldCache DEFAULT = new FieldCacheImpl();
|
||||
|
||||
/** Checks the internal cache for an appropriate entry, and if none is
|
||||
* found, reads the terms in <code>field</code> as a single byte and returns an array
|
||||
* of size <code>reader.maxDoc()</code> of the value each document
|
||||
* has in the given field.
|
||||
* @param reader Used to get field values.
|
||||
* @param field Which field contains the single byte values.
|
||||
* @return The values in the given field for each document.
|
||||
* @throws IOException If any error occurs.
|
||||
*/
|
||||
public byte[] getBytes (IndexReader reader, String field)
|
||||
throws IOException;
|
||||
|
||||
/** Checks the internal cache for an appropriate entry, and if none is found,
|
||||
* reads the terms in <code>field</code> as bytes and returns an array of
|
||||
* size <code>reader.maxDoc()</code> of the value each document has in the
|
||||
* given field.
|
||||
* @param reader Used to get field values.
|
||||
* @param field Which field contains the bytes.
|
||||
* @param parser Computes byte for string values.
|
||||
* @return The values in the given field for each document.
|
||||
* @throws IOException If any error occurs.
|
||||
*/
|
||||
public byte[] getBytes (IndexReader reader, String field, ByteParser parser)
|
||||
throws IOException;
|
||||
|
||||
/** Checks the internal cache for an appropriate entry, and if none is
|
||||
* found, reads the terms in <code>field</code> as shorts and returns an array
|
||||
* of size <code>reader.maxDoc()</code> of the value each document
|
||||
* has in the given field.
|
||||
* @param reader Used to get field values.
|
||||
* @param field Which field contains the shorts.
|
||||
* @return The values in the given field for each document.
|
||||
* @throws IOException If any error occurs.
|
||||
*/
|
||||
public short[] getShorts (IndexReader reader, String field)
|
||||
throws IOException;
|
||||
|
||||
/** Checks the internal cache for an appropriate entry, and if none is found,
|
||||
* reads the terms in <code>field</code> as shorts and returns an array of
|
||||
* size <code>reader.maxDoc()</code> of the value each document has in the
|
||||
* given field.
|
||||
* @param reader Used to get field values.
|
||||
* @param field Which field contains the shorts.
|
||||
* @param parser Computes short for string values.
|
||||
* @return The values in the given field for each document.
|
||||
* @throws IOException If any error occurs.
|
||||
*/
|
||||
public short[] getShorts (IndexReader reader, String field, ShortParser parser)
|
||||
throws IOException;
|
||||
|
||||
/** Checks the internal cache for an appropriate entry, and if none is
|
||||
* found, reads the terms in <code>field</code> as integers and returns an array
|
||||
* of size <code>reader.maxDoc()</code> of the value each document
|
||||
|
|
|
@ -131,18 +131,108 @@ implements FieldCache {
|
|||
}
|
||||
}
|
||||
|
||||
private static final ByteParser BYTE_PARSER = new ByteParser() {
|
||||
public byte parseByte(String value) {
|
||||
return Byte.parseByte(value);
|
||||
}
|
||||
};
|
||||
|
||||
private static final ShortParser SHORT_PARSER = new ShortParser() {
|
||||
public short parseShort(String value) {
|
||||
return Short.parseShort(value);
|
||||
}
|
||||
};
|
||||
|
||||
private static final IntParser INT_PARSER = new IntParser() {
|
||||
public int parseInt(String value) {
|
||||
return Integer.parseInt(value);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
private static final FloatParser FLOAT_PARSER = new FloatParser() {
|
||||
public float parseFloat(String value) {
|
||||
return Float.parseFloat(value);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
// inherit javadocs
|
||||
public byte[] getBytes (IndexReader reader, String field) throws IOException {
|
||||
return getBytes(reader, field, BYTE_PARSER);
|
||||
}
|
||||
|
||||
// inherit javadocs
|
||||
public byte[] getBytes(IndexReader reader, String field, ByteParser parser)
|
||||
throws IOException {
|
||||
return (byte[]) bytesCache.get(reader, new Entry(field, parser));
|
||||
}
|
||||
|
||||
Cache bytesCache = new Cache() {
|
||||
|
||||
protected Object createValue(IndexReader reader, Object entryKey)
|
||||
throws IOException {
|
||||
Entry entry = (Entry) entryKey;
|
||||
String field = entry.field;
|
||||
ByteParser parser = (ByteParser) entry.custom;
|
||||
final byte[] retArray = new byte[reader.maxDoc()];
|
||||
TermDocs termDocs = reader.termDocs();
|
||||
TermEnum termEnum = reader.terms (new Term (field, ""));
|
||||
try {
|
||||
do {
|
||||
Term term = termEnum.term();
|
||||
if (term==null || term.field() != field) break;
|
||||
byte termval = parser.parseByte(term.text());
|
||||
termDocs.seek (termEnum);
|
||||
while (termDocs.next()) {
|
||||
retArray[termDocs.doc()] = termval;
|
||||
}
|
||||
} while (termEnum.next());
|
||||
} finally {
|
||||
termDocs.close();
|
||||
termEnum.close();
|
||||
}
|
||||
return retArray;
|
||||
}
|
||||
};
|
||||
|
||||
// inherit javadocs
|
||||
public short[] getShorts (IndexReader reader, String field) throws IOException {
|
||||
return getShorts(reader, field, SHORT_PARSER);
|
||||
}
|
||||
|
||||
// inherit javadocs
|
||||
public short[] getShorts(IndexReader reader, String field, ShortParser parser)
|
||||
throws IOException {
|
||||
return (short[]) shortsCache.get(reader, new Entry(field, parser));
|
||||
}
|
||||
|
||||
Cache shortsCache = new Cache() {
|
||||
|
||||
protected Object createValue(IndexReader reader, Object entryKey)
|
||||
throws IOException {
|
||||
Entry entry = (Entry) entryKey;
|
||||
String field = entry.field;
|
||||
ShortParser parser = (ShortParser) entry.custom;
|
||||
final short[] retArray = new short[reader.maxDoc()];
|
||||
TermDocs termDocs = reader.termDocs();
|
||||
TermEnum termEnum = reader.terms (new Term (field, ""));
|
||||
try {
|
||||
do {
|
||||
Term term = termEnum.term();
|
||||
if (term==null || term.field() != field) break;
|
||||
short termval = parser.parseShort(term.text());
|
||||
termDocs.seek (termEnum);
|
||||
while (termDocs.next()) {
|
||||
retArray[termDocs.doc()] = termval;
|
||||
}
|
||||
} while (termEnum.next());
|
||||
} finally {
|
||||
termDocs.close();
|
||||
termEnum.close();
|
||||
}
|
||||
return retArray;
|
||||
}
|
||||
};
|
||||
|
||||
// inherit javadocs
|
||||
public int[] getInts (IndexReader reader, String field) throws IOException {
|
||||
return getInts(reader, field, INT_PARSER);
|
||||
|
|
|
@ -0,0 +1,105 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.function.DocValues;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Expert: obtains single byte field values from the
|
||||
* {@link org.apache.lucene.search.FieldCache FieldCache}
|
||||
* using <code>getBytes()</code> and makes those values
|
||||
* available as other numeric types, casting as needed.
|
||||
*
|
||||
* <p><font color="#FF0000">
|
||||
* WARNING: The status of the <b>search.function</b> package is experimental.
|
||||
* The APIs introduced here might change in the future and will not be
|
||||
* supported anymore in such a case.</font>
|
||||
*
|
||||
* @see org.apache.lucene.search.function.FieldCacheSource for requirements
|
||||
* on the field.
|
||||
*/
|
||||
public class ByteFieldSource extends FieldCacheSource {
|
||||
private FieldCache.ByteParser parser;
|
||||
|
||||
/**
|
||||
* Create a cached byte field source with default string-to-byte parser.
|
||||
*/
|
||||
public ByteFieldSource(String field) {
|
||||
this(field, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a cached byte field source with a specific string-to-byte parser.
|
||||
*/
|
||||
public ByteFieldSource(String field, FieldCache.ByteParser parser) {
|
||||
super(field);
|
||||
this.parser = parser;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
|
||||
public String description() {
|
||||
return "byte(" + super.description() + ')';
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#getCachedValues(org.apache.lucene.search.FieldCache, java.lang.String, org.apache.lucene.index.IndexReader) */
|
||||
public DocValues getCachedFieldValues (FieldCache cache, String field, IndexReader reader) throws IOException {
|
||||
final byte[] arr = (parser==null) ?
|
||||
cache.getBytes(reader, field) :
|
||||
cache.getBytes(reader, field, parser);
|
||||
return new DocValues(reader.maxDoc()) {
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */
|
||||
public float floatVal(int doc) {
|
||||
return (float) arr[doc];
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#intVal(int) */
|
||||
public int intVal(int doc) {
|
||||
return arr[doc];
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */
|
||||
public String toString(int doc) {
|
||||
return description() + '=' + intVal(doc);
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */
|
||||
Object getInnerArray() {
|
||||
return arr;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceEquals(org.apache.lucene.search.function.FieldCacheSource) */
|
||||
public boolean cachedFieldSourceEquals(FieldCacheSource o) {
|
||||
if (o.getClass() != ByteFieldSource.class) {
|
||||
return false;
|
||||
}
|
||||
ByteFieldSource other = (ByteFieldSource)o;
|
||||
return this.parser==null ?
|
||||
other.parser==null :
|
||||
this.parser.getClass() == other.parser.getClass();
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceHashCode() */
|
||||
public int cachedFieldSourceHashCode() {
|
||||
return parser==null ?
|
||||
Byte.class.hashCode() : parser.getClass().hashCode();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,344 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.ComplexExplanation;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
/**
|
||||
* Query that sets document score as a programmatic function of (up to) two (sub) scores.
|
||||
* <ol>
|
||||
* <li>the score of its subQuery (any query)</li>
|
||||
* <li>(optional) the score of its ValueSourtceQuery,
|
||||
* for most simple/convineient use case this query would be a
|
||||
* {@link org.apache.lucene.search.function.FieldScoreQuery FieldScoreQuery}</li>
|
||||
* </ol>
|
||||
* Subclasses can modify the computation by overriding {@link #customScore(int, float, float)}.
|
||||
*
|
||||
* <p><font color="#FF0000">
|
||||
* WARNING: The status of the <b>search.function</b> package is experimental.
|
||||
* The APIs introduced here might change in the future and will not be
|
||||
* supported anymore in such a case.</font>
|
||||
*/
|
||||
public class CustomScoreQuery extends Query {
|
||||
|
||||
private Query subQuery;
|
||||
private ValueSourceQuery valSrcQuery; // optional, can be null
|
||||
private boolean strict = false; // if true, valueSource part of query does not take part in weights normalization.
|
||||
|
||||
/**
|
||||
* Create a CustomScoreQuery over input subQuery.
|
||||
* @param subQuery the sub query whose scored is being customed. Must not be null.
|
||||
*/
|
||||
public CustomScoreQuery(Query subQuery) {
|
||||
this(subQuery,null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a CustomScoreQuery over input subQuery and a {@link ValueSourceQuery}.
|
||||
* @param subQuery the sub query whose score is being customed. Must not be null.
|
||||
* @param valSrcQuery a value source query whose scores are used in the custom score
|
||||
* computation. For most simple/convineient use case this would be a
|
||||
* {@link org.apache.lucene.search.function.FieldScoreQuery FieldScoreQuery}.
|
||||
* This parameter is optional - it can be null.
|
||||
*/
|
||||
public CustomScoreQuery(Query subQuery, ValueSourceQuery valSrcQuery) {
|
||||
super();
|
||||
this.subQuery = subQuery;
|
||||
this.valSrcQuery = valSrcQuery;
|
||||
if (subQuery == null) throw new IllegalArgumentException("<subqyery> must not be null!");
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Query#rewrite(org.apache.lucene.index.IndexReader) */
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
subQuery = subQuery.rewrite(reader);
|
||||
if (valSrcQuery!=null) {
|
||||
valSrcQuery = (ValueSourceQuery) valSrcQuery.rewrite(reader);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Query#extractTerms(java.util.Set) */
|
||||
public void extractTerms(Set terms) {
|
||||
subQuery.extractTerms(terms);
|
||||
if (valSrcQuery!=null) {
|
||||
valSrcQuery.extractTerms(terms);
|
||||
}
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Query#clone() */
|
||||
public Object clone() {
|
||||
CustomScoreQuery clone = (CustomScoreQuery)super.clone();
|
||||
clone.subQuery = (Query) subQuery.clone();
|
||||
if (valSrcQuery!=null) {
|
||||
clone.valSrcQuery = (ValueSourceQuery) valSrcQuery.clone();
|
||||
}
|
||||
return clone;
|
||||
}
|
||||
|
||||
/* (non-Javadoc) @see org.apache.lucene.search.Query#toString(java.lang.String) */
|
||||
public String toString(String field) {
|
||||
StringBuffer sb = new StringBuffer(name()).append("(");
|
||||
sb.append(subQuery.toString(field));
|
||||
if (valSrcQuery!=null) {
|
||||
sb.append(", ").append(valSrcQuery.toString(field));
|
||||
}
|
||||
sb.append(")");
|
||||
sb.append(strict?" STRICT" : "");
|
||||
return sb.toString() + ToStringUtils.boost(getBoost());
|
||||
}
|
||||
|
||||
/** Returns true if <code>o</code> is equal to this. */
|
||||
public boolean equals(Object o) {
|
||||
if (getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
CustomScoreQuery other = (CustomScoreQuery)o;
|
||||
return this.getBoost() == other.getBoost()
|
||||
&& this.subQuery.equals(other.subQuery)
|
||||
&& (this.valSrcQuery==null ? other.valSrcQuery==null
|
||||
: this.valSrcQuery.equals(other.valSrcQuery));
|
||||
}
|
||||
|
||||
/** Returns a hash code value for this object. */
|
||||
public int hashCode() {
|
||||
int valSrcHash = valSrcQuery==null ? 0 : valSrcQuery.hashCode();
|
||||
return (getClass().hashCode() + subQuery.hashCode() + valSrcHash) ^ Float.floatToIntBits(getBoost());
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a custom score by the subQuery score and the ValueSourceQuery score.
|
||||
* <p>
|
||||
* Subclasses can override this method to modify the custom score.
|
||||
* <p>
|
||||
* The default computation herein is:
|
||||
* <pre>
|
||||
* ModifiedScore = valSrcScore * subQueryScore.
|
||||
* </pre>
|
||||
*
|
||||
* @param doc id of scored doc.
|
||||
* @param subQueryScore score of that doc by the subQuery.
|
||||
* @param valSrcScore score of that doc by the ValueSourceQuery.
|
||||
* @return custom score.
|
||||
*/
|
||||
public float customScore(int doc, float subQueryScore, float valSrcScore) {
|
||||
return valSrcScore * subQueryScore;
|
||||
}
|
||||
|
||||
/**
|
||||
* Explain the custom score.
|
||||
* Whenever overriding {@link #customScore(int, float, float)},
|
||||
* this method should also be overriden to provide the correct explanation
|
||||
* for the part of the custom scoring.
|
||||
* @param doc doc being explained.
|
||||
* @param subQueryExpl explanation for the sub-query part.
|
||||
* @param valSrcExpl explanation for the value source part.
|
||||
* @return an explanation for the custom score
|
||||
*/
|
||||
public Explanation customExplain(int doc, Explanation subQueryExpl, Explanation valSrcExpl) {
|
||||
float valSrcScore = valSrcExpl==null ? 1 : valSrcExpl.getValue();
|
||||
Explanation exp = new Explanation( valSrcScore * subQueryExpl.getValue(), "custom score: product of:");
|
||||
exp.addDetail(subQueryExpl);
|
||||
if (valSrcExpl != null) {
|
||||
exp.addDetail(valSrcExpl);
|
||||
}
|
||||
return exp;
|
||||
}
|
||||
//=========================== W E I G H T ============================
|
||||
|
||||
private class CustomWeight implements Weight {
|
||||
Searcher searcher;
|
||||
Weight subQueryWeight;
|
||||
Weight valSrcWeight; // optional
|
||||
boolean qStrict;
|
||||
|
||||
public CustomWeight(Searcher searcher) throws IOException {
|
||||
this.searcher = searcher;
|
||||
this.subQueryWeight = subQuery.weight(searcher);
|
||||
if (valSrcQuery!=null) {
|
||||
this.valSrcWeight = valSrcQuery.createWeight(searcher);
|
||||
}
|
||||
this.qStrict = strict;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Weight#getQuery() */
|
||||
public Query getQuery() {
|
||||
return CustomScoreQuery.this;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Weight#getValue() */
|
||||
public float getValue() {
|
||||
return getBoost();
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Weight#sumOfSquaredWeights() */
|
||||
public float sumOfSquaredWeights() throws IOException {
|
||||
float sum = subQueryWeight.sumOfSquaredWeights();
|
||||
if (valSrcWeight!=null) {
|
||||
if (qStrict) {
|
||||
valSrcWeight.sumOfSquaredWeights(); // do not include ValueSource part in the query normalization
|
||||
} else {
|
||||
sum += valSrcWeight.sumOfSquaredWeights();
|
||||
}
|
||||
}
|
||||
sum *= getBoost() * getBoost(); // boost each sub-weight
|
||||
return sum ;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Weight#normalize(float) */
|
||||
public void normalize(float norm) {
|
||||
norm *= getBoost(); // incorporate boost
|
||||
subQueryWeight.normalize(norm);
|
||||
if (valSrcWeight!=null) {
|
||||
if (qStrict) {
|
||||
valSrcWeight.normalize(1); // do not normalize the ValueSource part
|
||||
} else {
|
||||
valSrcWeight.normalize(norm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Weight#scorer(org.apache.lucene.index.IndexReader) */
|
||||
public Scorer scorer(IndexReader reader) throws IOException {
|
||||
Scorer subQueryScorer = subQueryWeight.scorer(reader);
|
||||
Scorer valSrcScorer = (valSrcWeight==null ? null : valSrcWeight.scorer(reader));
|
||||
return new CustomScorer(getSimilarity(searcher), reader, this, subQueryScorer, valSrcScorer);
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Weight#explain(org.apache.lucene.index.IndexReader, int) */
|
||||
public Explanation explain(IndexReader reader, int doc) throws IOException {
|
||||
return scorer(reader).explain(doc);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//=========================== S C O R E R ============================
|
||||
|
||||
/**
|
||||
* A scorer that applies a (callback) function on scores of the subQuery.
|
||||
*/
|
||||
private class CustomScorer extends Scorer {
|
||||
private final CustomWeight weight;
|
||||
private final float qWeight;
|
||||
private Scorer subQueryScorer;
|
||||
private Scorer valSrcScorer; // optional
|
||||
private IndexReader reader;
|
||||
|
||||
// constructor
|
||||
private CustomScorer(Similarity similarity, IndexReader reader, CustomWeight w,
|
||||
Scorer subQueryScorer, Scorer valSrcScorer) throws IOException {
|
||||
super(similarity);
|
||||
this.weight = w;
|
||||
this.qWeight = w.getValue();
|
||||
this.subQueryScorer = subQueryScorer;
|
||||
this.valSrcScorer = valSrcScorer;
|
||||
this.reader = reader;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#next() */
|
||||
public boolean next() throws IOException {
|
||||
boolean hasNext = subQueryScorer.next();
|
||||
if (valSrcScorer!=null && hasNext) {
|
||||
valSrcScorer.skipTo(subQueryScorer.doc());
|
||||
}
|
||||
return hasNext;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#doc() */
|
||||
public int doc() {
|
||||
return subQueryScorer.doc();
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#score() */
|
||||
public float score() throws IOException {
|
||||
float valSrcScore = (valSrcScorer==null ? 1 : valSrcScorer.score());
|
||||
return qWeight * customScore(subQueryScorer.doc(), subQueryScorer.score(), valSrcScore);
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#skipTo(int) */
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
boolean hasNext = subQueryScorer.skipTo(target);
|
||||
if (valSrcScorer!=null && hasNext) {
|
||||
valSrcScorer.skipTo(subQueryScorer.doc());
|
||||
}
|
||||
return hasNext;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#explain(int) */
|
||||
public Explanation explain(int doc) throws IOException {
|
||||
Explanation subQueryExpl = weight.subQueryWeight.explain(reader,doc);
|
||||
if (!subQueryExpl.isMatch()) {
|
||||
return subQueryExpl;
|
||||
}
|
||||
// match
|
||||
Explanation valSrcExpl = valSrcScorer==null ? null : valSrcScorer.explain(doc);
|
||||
Explanation customExp = customExplain(doc,subQueryExpl,valSrcExpl);
|
||||
float sc = qWeight * customExp.getValue();
|
||||
Explanation res = new ComplexExplanation(
|
||||
true, sc, CustomScoreQuery.this.toString() + ", product of:");
|
||||
res.addDetail(customExp);
|
||||
res.addDetail(new Explanation(qWeight, "queryBoost")); // actually using the q boost as q weight (== weight value)
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Query#createWeight(org.apache.lucene.search.Searcher) */
|
||||
protected Weight createWeight(Searcher searcher) throws IOException {
|
||||
return new CustomWeight(searcher);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if this is strict custom scoring.
|
||||
* In strict custom scoring, the ValueSource part of does not participate in weight normalization.
|
||||
* This may be useful when one wants full control over how scores are modified, and does
|
||||
* not care about normalizing by the ValueSource part.
|
||||
* One particular case where this is useful if for testing this query.
|
||||
* <P>
|
||||
* Note: only has effect when the ValueSource part is not null.
|
||||
*/
|
||||
public boolean isStrict() {
|
||||
return strict;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the strict mode of this query.
|
||||
* @param strict The strict mode to set.
|
||||
* @see #isStrict()
|
||||
*/
|
||||
public void setStrict(boolean strict) {
|
||||
this.strict = strict;
|
||||
}
|
||||
|
||||
/**
|
||||
* A short name of this query, used in {@link #toString(String)}.
|
||||
*/
|
||||
public String name() {
|
||||
return "custom";
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,176 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.Explanation;
|
||||
|
||||
/**
|
||||
* Expert: represents field values as different types.
|
||||
* Normally created via a
|
||||
* {@link org.apache.lucene.search.function.ValueSource ValueSuorce}
|
||||
* for a particular field and reader.
|
||||
*
|
||||
* <p><font color="#FF0000">
|
||||
* WARNING: The status of the <b>search.function</b> package is experimental.
|
||||
* The APIs introduced here might change in the future and will not be
|
||||
* supported anymore in such a case.</font>
|
||||
*
|
||||
* @author yonik
|
||||
*/
|
||||
public abstract class DocValues {
|
||||
/*
|
||||
* DocValues is distinct from ValueSource because
|
||||
* there needs to be an object created at query evaluation time that
|
||||
* is not referenced by the query itself because:
|
||||
* - Query objects should be MT safe
|
||||
* - For caching, Query objects are often used as keys... you don't
|
||||
* want the Query carrying around big objects
|
||||
*/
|
||||
|
||||
private int nVals;
|
||||
|
||||
/**
|
||||
* Constructor with input number of values(docs).
|
||||
* @param nVals
|
||||
*/
|
||||
public DocValues (int nVals) {
|
||||
this.nVals = nVals;
|
||||
}
|
||||
|
||||
// prevent using this constructor
|
||||
private DocValues () {
|
||||
|
||||
}
|
||||
/**
|
||||
* Return doc value as a float.
|
||||
* <P>Mandatory: every DocValues implementation must implement at least this method.
|
||||
* @param doc document whose float value is requested.
|
||||
*/
|
||||
public abstract float floatVal(int doc);
|
||||
|
||||
/**
|
||||
* Return doc value as an int.
|
||||
* <P>Optional: DocValues implementation can (but don't have to) override this method.
|
||||
* @param doc document whose int value is requested.
|
||||
*/
|
||||
public int intVal(int doc) {
|
||||
return (int) floatVal(doc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return doc value as a long.
|
||||
* <P>Optional: DocValues implementation can (but don't have to) override this method.
|
||||
* @param doc document whose long value is requested.
|
||||
*/
|
||||
public long longVal(int doc) {
|
||||
return (long) floatVal(doc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return doc value as a double.
|
||||
* <P>Optional: DocValues implementation can (but don't have to) override this method.
|
||||
* @param doc document whose double value is requested.
|
||||
*/
|
||||
public double doubleVal(int doc) {
|
||||
return (double) floatVal(doc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return doc value as a string.
|
||||
* <P>Optional: DocValues implementation can (but don't have to) override this method.
|
||||
* @param doc document whose string value is requested.
|
||||
*/
|
||||
public String strVal(int doc) {
|
||||
return Float.toString(floatVal(doc));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a string representation of a doc value, as reuired for Explanations.
|
||||
*/
|
||||
public abstract String toString(int doc);
|
||||
|
||||
/**
|
||||
* Explain the scoring value for the input doc.
|
||||
*/
|
||||
public Explanation explain(int doc) {
|
||||
return new Explanation(floatVal(doc), toString(doc));
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: for test purposes only, return the inner array of values, or null if not applicable.
|
||||
* <p>
|
||||
* Allows tests to verify that loaded values are:
|
||||
* <ol>
|
||||
* <li>indeed cached/reused.</li>
|
||||
* <li>stored in the expected size/type (byte/short/int/float).</li>
|
||||
* </ol>
|
||||
* Note: Tested implementations of DocValues must override this method for the test to pass!
|
||||
*/
|
||||
Object getInnerArray() {
|
||||
return new Object[0];
|
||||
}
|
||||
|
||||
// --- some simple statistics on values
|
||||
private float minVal;
|
||||
private float maxVal;
|
||||
private float avgVal;
|
||||
private boolean computed=false;
|
||||
// compute optional values
|
||||
private void compute () {
|
||||
if (computed) {
|
||||
return;
|
||||
}
|
||||
minVal = Float.MAX_VALUE;
|
||||
maxVal = 0;
|
||||
float sum = 0;
|
||||
for (int i=0; i<nVals; i++) {
|
||||
float val = floatVal(i);
|
||||
sum += val;
|
||||
minVal = Math.min(minVal,val);
|
||||
maxVal = Math.max(maxVal,val);
|
||||
}
|
||||
avgVal = sum / nVals;
|
||||
computed = true;
|
||||
}
|
||||
/**
|
||||
* Optional op.
|
||||
* Returns the minimum of all values.
|
||||
*/
|
||||
public float getMinValue () {
|
||||
compute();
|
||||
return minVal;
|
||||
}
|
||||
|
||||
/**
|
||||
* Optional op.
|
||||
* Returns the maximum of all values.
|
||||
*/
|
||||
public float getMaxValue () {
|
||||
compute();
|
||||
return maxVal;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the average of all values.
|
||||
*/
|
||||
public float getAverageValue () {
|
||||
compute();
|
||||
return avgVal;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
|
||||
/**
|
||||
* Expert: A base class for ValueSource implementations that retrieve values for
|
||||
* a single field from the {@link org.apache.lucene.search.FieldCache FieldCache}.
|
||||
* <p>
|
||||
* Fields used herein nust be indexed (doesn't matter if these fields are stored or not).
|
||||
* <p>
|
||||
* It is assumed that each such indexed field is untokenized, or at least has a single token in a document.
|
||||
* For documents with multiple tokens of the same field, behavior is undefined (It is likely that current
|
||||
* code would use the value of one of these tokens, but this is not guaranteed).
|
||||
* <p>
|
||||
* Document with no tokens in this field are assigned the <code>Zero</code> value.
|
||||
*
|
||||
* <p><font color="#FF0000">
|
||||
* WARNING: The status of the <b>search.function</b> package is experimental.
|
||||
* The APIs introduced here might change in the future and will not be
|
||||
* supported anymore in such a case.</font>
|
||||
*
|
||||
* @author yonik
|
||||
*/
|
||||
public abstract class FieldCacheSource extends ValueSource {
|
||||
private String field;
|
||||
private FieldCache cache = FieldCache.DEFAULT;
|
||||
|
||||
/**
|
||||
* Create a cached field source for the input field.
|
||||
*/
|
||||
public FieldCacheSource(String field) {
|
||||
this.field=field;
|
||||
}
|
||||
|
||||
/* (non-Javadoc) @see org.apache.lucene.search.function.ValueSource#getValues(org.apache.lucene.index.IndexReader) */
|
||||
public final DocValues getValues(IndexReader reader) throws IOException {
|
||||
return getCachedFieldValues(cache, field, reader);
|
||||
}
|
||||
|
||||
/* (non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
|
||||
public String description() {
|
||||
return field;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return cached DocValues for input field and reader.
|
||||
* @param cache FieldCache so that values of a field are loaded once per reader (RAM allowing)
|
||||
* @param field Field for which values are required.
|
||||
* @see ValueSource
|
||||
*/
|
||||
public abstract DocValues getCachedFieldValues(FieldCache cache, String field, IndexReader reader) throws IOException;
|
||||
|
||||
/*(non-Javadoc) @see java.lang.Object#equals(java.lang.Object) */
|
||||
public final boolean equals(Object o) {
|
||||
if (!(o instanceof FieldCacheSource)) {
|
||||
return false;
|
||||
}
|
||||
FieldCacheSource other = (FieldCacheSource) o;
|
||||
return
|
||||
this.cache == other.cache &&
|
||||
this.field.equals(other.field) &&
|
||||
cachedFieldSourceEquals(other);
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see java.lang.Object#hashCode() */
|
||||
public final int hashCode() {
|
||||
return
|
||||
cache.hashCode() +
|
||||
field.hashCode() +
|
||||
cachedFieldSourceHashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if equals to another {@link FieldCacheSource}, already knowing that cache and field are equal.
|
||||
* @see Object#equals(java.lang.Object)
|
||||
*/
|
||||
public abstract boolean cachedFieldSourceEquals(FieldCacheSource other);
|
||||
|
||||
/**
|
||||
* Return a hash code of a {@link FieldCacheSource}, without the hash-codes of the field
|
||||
* and the cache (those are taken care of elsewhere).
|
||||
* @see Object#hashCode()
|
||||
*/
|
||||
public abstract int cachedFieldSourceHashCode();
|
||||
}
|
|
@ -0,0 +1,127 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A query that scores each document as the value of the numeric input field.
|
||||
* <p>
|
||||
* The query matches all documents, and scores each document according to the numeric
|
||||
* value of that field.
|
||||
* <p>
|
||||
* It is assumed, and expected, that:
|
||||
* <ul>
|
||||
* <li>The field used here is indexed, and has exactly
|
||||
* one token in every scored document.</li>
|
||||
* <li>Best if this field is un_tokenized.</li>
|
||||
* <li>That token is parsable to the selected type.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Combining this query in a FunctionQuery allows much freedom in affecting document scores.
|
||||
* Note, that with this freedom comes responsibility: it is more than likely that the
|
||||
* default Lucene scoring is superior in quality to scoring modified as explained here.
|
||||
* However, in some cases, and certainly for research experiments, this capability may turn useful.
|
||||
* <p>
|
||||
* When contructing this query, select the appropriate type. That type should match the data stored in the
|
||||
* field. So in fact the "right" type should be selected before indexing. Type selection
|
||||
* has effect on the RAM usage:
|
||||
* <ul>
|
||||
* <li>{@link Type#BYTE} consumes 1 * maxDocs bytes.</li>
|
||||
* <li>{@link Type#SHORT} consumes 2 * maxDocs bytes.</li>
|
||||
* <li>{@link Type#INT} consumes 4 * maxDocs bytes.</li>
|
||||
* <li>{@link Type#FLOAT} consumes 8 * maxDocs bytes.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <b>Caching:</b>
|
||||
* Values for the numeric field are loaded once and cached in memory for further use with the same IndexReader.
|
||||
* To take advantage of this, it is extremely important to reuse index-readers or index-searchers,
|
||||
* otherwise, for instance if for each query a new index reader is opened, large penalties would be
|
||||
* payd for loading the field values into memory over and over again!
|
||||
*
|
||||
* <p><font color="#FF0000">
|
||||
* WARNING: The status of the <b>search.function</b> package is experimental.
|
||||
* The APIs introduced here might change in the future and will not be
|
||||
* supported anymore in such a case.</font>
|
||||
*/
|
||||
public class FieldScoreQuery extends ValueSourceQuery {
|
||||
|
||||
/**
|
||||
* Type of score field, indicating how field values are interpreted/parsed.
|
||||
* <p>
|
||||
* The type selected at search search time should match the data stored in the field.
|
||||
* Different types have different RAM requirements:
|
||||
* <ul>
|
||||
* <li>{@link #BYTE} consumes 1 * maxDocs bytes.</li>
|
||||
* <li>{@link #SHORT} consumes 2 * maxDocs bytes.</li>
|
||||
* <li>{@link #INT} consumes 4 * maxDocs bytes.</li>
|
||||
* <li>{@link #FLOAT} consumes 8 * maxDocs bytes.</li>
|
||||
* </ul>
|
||||
*/
|
||||
public static class Type {
|
||||
|
||||
/** field values are interpreted as numeric byte values. */
|
||||
public static final Type BYTE = new Type("byte");
|
||||
|
||||
/** field values are interpreted as numeric short values. */
|
||||
public static final Type SHORT = new Type("short");
|
||||
|
||||
/** field values are interpreted as numeric int values. */
|
||||
public static final Type INT = new Type("int");
|
||||
|
||||
/** field values are interpreted as numeric float values. */
|
||||
public static final Type FLOAT = new Type("float");
|
||||
|
||||
private String typeName;
|
||||
private Type (String name) {
|
||||
this.typeName = name;
|
||||
}
|
||||
/*(non-Javadoc) @see java.lang.Object#toString() */
|
||||
public String toString() {
|
||||
return getClass().getName()+"::"+typeName;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a FieldScoreQuery - a query that scores each document as the value of the numeric input field.
|
||||
* <p>
|
||||
* The <code>type</code> param tells how to parse the field string values into a numeric score value.
|
||||
* @param field the numeric field to be used.
|
||||
* @param type the type of the field: either
|
||||
* {@link Type#BYTE}, {@link Type#SHORT}, {@link Type#INT}, or {@link Type#FLOAT}.
|
||||
*/
|
||||
public FieldScoreQuery(String field, Type type) {
|
||||
super(getValueSource(field,type));
|
||||
}
|
||||
|
||||
// create the appropriate (cached) field value source.
|
||||
private static ValueSource getValueSource(String field, Type type) {
|
||||
if (type == Type.BYTE) {
|
||||
return new ByteFieldSource(field);
|
||||
}
|
||||
if (type == Type.SHORT) {
|
||||
return new ShortFieldSource(field);
|
||||
}
|
||||
if (type == Type.INT) {
|
||||
return new IntFieldSource(field);
|
||||
}
|
||||
if (type == Type.FLOAT) {
|
||||
return new FloatFieldSource(field);
|
||||
}
|
||||
throw new IllegalArgumentException(type+" is not a known Field Score Query Type!");
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,102 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.function.DocValues;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Expert: obtains float field values from the
|
||||
* {@link org.apache.lucene.search.FieldCache FieldCache}
|
||||
* using <code>getFloats()</code> and makes those values
|
||||
* available as other numeric types, casting as needed.
|
||||
*
|
||||
* <p><font color="#FF0000">
|
||||
* WARNING: The status of the <b>search.function</b> package is experimental.
|
||||
* The APIs introduced here might change in the future and will not be
|
||||
* supported anymore in such a case.</font>
|
||||
*
|
||||
* @see org.apache.lucene.search.function.FieldCacheSource for requirements
|
||||
* on the field.
|
||||
*
|
||||
* @author yonik
|
||||
*/
|
||||
public class FloatFieldSource extends FieldCacheSource {
|
||||
private FieldCache.FloatParser parser;
|
||||
|
||||
/**
|
||||
* Create a cached float field source with default string-to-float parser.
|
||||
*/
|
||||
public FloatFieldSource(String field) {
|
||||
this(field, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a cached float field source with a specific string-to-float parser.
|
||||
*/
|
||||
public FloatFieldSource(String field, FieldCache.FloatParser parser) {
|
||||
super(field);
|
||||
this.parser = parser;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
|
||||
public String description() {
|
||||
return "float(" + super.description() + ')';
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#getCachedValues(org.apache.lucene.search.FieldCache, java.lang.String, org.apache.lucene.index.IndexReader) */
|
||||
public DocValues getCachedFieldValues (FieldCache cache, String field, IndexReader reader) throws IOException {
|
||||
final float[] arr = (parser==null) ?
|
||||
cache.getFloats(reader, field) :
|
||||
cache.getFloats(reader, field, parser);
|
||||
return new DocValues(reader.maxDoc()) {
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */
|
||||
public float floatVal(int doc) {
|
||||
return arr[doc];
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */
|
||||
public String toString(int doc) {
|
||||
return description() + '=' + arr[doc];
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */
|
||||
Object getInnerArray() {
|
||||
return arr;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceEquals(org.apache.lucene.search.function.FieldCacheSource) */
|
||||
public boolean cachedFieldSourceEquals(FieldCacheSource o) {
|
||||
if (o.getClass() != FloatFieldSource.class) {
|
||||
return false;
|
||||
}
|
||||
FloatFieldSource other = (FloatFieldSource)o;
|
||||
return this.parser==null ?
|
||||
other.parser==null :
|
||||
this.parser.getClass() == other.parser.getClass();
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceHashCode() */
|
||||
public int cachedFieldSourceHashCode() {
|
||||
return parser==null ?
|
||||
Float.class.hashCode() : parser.getClass().hashCode();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,107 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.function.DocValues;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Expert: obtains int field values from the
|
||||
* {@link org.apache.lucene.search.FieldCache FieldCache}
|
||||
* using <code>getInts()</code> and makes those values
|
||||
* available as other numeric types, casting as needed.
|
||||
*
|
||||
* <p><font color="#FF0000">
|
||||
* WARNING: The status of the <b>search.function</b> package is experimental.
|
||||
* The APIs introduced here might change in the future and will not be
|
||||
* supported anymore in such a case.</font>
|
||||
*
|
||||
* @see org.apache.lucene.search.function.FieldCacheSource for requirements
|
||||
* on the field.
|
||||
*
|
||||
* @author yonik
|
||||
*/
|
||||
public class IntFieldSource extends FieldCacheSource {
|
||||
private FieldCache.IntParser parser;
|
||||
|
||||
/**
|
||||
* Create a cached int field source with default string-to-int parser.
|
||||
*/
|
||||
public IntFieldSource(String field) {
|
||||
this(field, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a cached int field source with a specific string-to-int parser.
|
||||
*/
|
||||
public IntFieldSource(String field, FieldCache.IntParser parser) {
|
||||
super(field);
|
||||
this.parser = parser;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
|
||||
public String description() {
|
||||
return "int(" + super.description() + ')';
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#getCachedValues(org.apache.lucene.search.FieldCache, java.lang.String, org.apache.lucene.index.IndexReader) */
|
||||
public DocValues getCachedFieldValues (FieldCache cache, String field, IndexReader reader) throws IOException {
|
||||
final int[] arr = (parser==null) ?
|
||||
cache.getInts(reader, field) :
|
||||
cache.getInts(reader, field, parser);
|
||||
return new DocValues(reader.maxDoc()) {
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */
|
||||
public float floatVal(int doc) {
|
||||
return (float) arr[doc];
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#intVal(int) */
|
||||
public int intVal(int doc) {
|
||||
return arr[doc];
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */
|
||||
public String toString(int doc) {
|
||||
return description() + '=' + intVal(doc);
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */
|
||||
Object getInnerArray() {
|
||||
return arr;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceEquals(org.apache.lucene.search.function.FieldCacheSource) */
|
||||
public boolean cachedFieldSourceEquals(FieldCacheSource o) {
|
||||
if (o.getClass() != IntFieldSource.class) {
|
||||
return false;
|
||||
}
|
||||
IntFieldSource other = (IntFieldSource)o;
|
||||
return this.parser==null ?
|
||||
other.parser==null :
|
||||
this.parser.getClass() == other.parser.getClass();
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceHashCode() */
|
||||
public int cachedFieldSourceHashCode() {
|
||||
return parser==null ?
|
||||
Integer.class.hashCode() : parser.getClass().hashCode();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,103 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.function;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Expert: obtains the ordinal of the field value from the default Lucene
|
||||
* {@link org.apache.lucene.search.FieldCache Fieldcache} using getStringIndex().
|
||||
* <p>
|
||||
* The native lucene index order is used to assign an ordinal value for each field value.
|
||||
* <p
|
||||
* Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1.
|
||||
* <p>
|
||||
* Example:
|
||||
* <br>If there were only three field values: "apple","banana","pear"
|
||||
* <br>then ord("apple")=1, ord("banana")=2, ord("pear")=3
|
||||
* <p>
|
||||
* WARNING:
|
||||
* ord() depends on the position in an index and can thus change
|
||||
* when other documents are inserted or deleted,
|
||||
* or if a MultiSearcher is used.
|
||||
*
|
||||
* <p><font color="#FF0000">
|
||||
* WARNING: The status of the <b>search.function</b> package is experimental.
|
||||
* The APIs introduced here might change in the future and will not be
|
||||
* supported anymore in such a case.</font>
|
||||
*
|
||||
* @author yonik
|
||||
*/
|
||||
|
||||
public class OrdFieldSource extends ValueSource {
|
||||
protected String field;
|
||||
|
||||
/**
|
||||
* Contructor for a certain field.
|
||||
* @param field field whose values order is used.
|
||||
*/
|
||||
public OrdFieldSource(String field) {
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
|
||||
public String description() {
|
||||
return "ord(" + field + ')';
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#getValues(org.apache.lucene.index.IndexReader) */
|
||||
public DocValues getValues(IndexReader reader) throws IOException {
|
||||
final int[] arr = FieldCache.DEFAULT.getStringIndex(reader, field).order;
|
||||
return new DocValues(arr.length) {
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */
|
||||
public float floatVal(int doc) {
|
||||
return (float)arr[doc];
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#strVal(int) */
|
||||
public String strVal(int doc) {
|
||||
// the string value of the ordinal, not the string itself
|
||||
return Integer.toString(arr[doc]);
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */
|
||||
public String toString(int doc) {
|
||||
return description() + '=' + intVal(doc);
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */
|
||||
Object getInnerArray() {
|
||||
return arr;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see java.lang.Object#equals(java.lang.Object) */
|
||||
public boolean equals(Object o) {
|
||||
if (o.getClass() != OrdFieldSource.class) return false;
|
||||
OrdFieldSource other = (OrdFieldSource)o;
|
||||
return this.field.equals(other.field);
|
||||
}
|
||||
|
||||
private static final int hcode = OrdFieldSource.class.hashCode();
|
||||
|
||||
/*(non-Javadoc) @see java.lang.Object#hashCode() */
|
||||
public int hashCode() {
|
||||
return hcode + field.hashCode();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,112 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.function;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Expert: obtains the ordinal of the field value from the default Lucene
|
||||
* {@link org.apache.lucene.search.FieldCache FieldCache} using getStringIndex()
|
||||
* and reverses the order.
|
||||
* <p>
|
||||
* The native lucene index order is used to assign an ordinal value for each field value.
|
||||
* <p>
|
||||
* Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1.
|
||||
* <br>
|
||||
* Example of reverse ordinal (rord):
|
||||
* <br>If there were only three field values: "apple","banana","pear"
|
||||
* <br>then rord("apple")=3, rord("banana")=2, ord("pear")=1
|
||||
* <p>
|
||||
* WARNING:
|
||||
* rord() depends on the position in an index and can thus change
|
||||
* when other documents are inserted or deleted,
|
||||
* or if a MultiSearcher is used.
|
||||
*
|
||||
* <p><font color="#FF0000">
|
||||
* WARNING: The status of the <b>search.function</b> package is experimental.
|
||||
* The APIs introduced here might change in the future and will not be
|
||||
* supported anymore in such a case.</font>
|
||||
*
|
||||
* @author yonik
|
||||
*/
|
||||
|
||||
public class ReverseOrdFieldSource extends ValueSource {
|
||||
public String field;
|
||||
|
||||
/**
|
||||
* Contructor for a certain field.
|
||||
* @param field field whose values reverse order is used.
|
||||
*/
|
||||
public ReverseOrdFieldSource(String field) {
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
|
||||
public String description() {
|
||||
return "rord("+field+')';
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#getValues(org.apache.lucene.index.IndexReader) */
|
||||
public DocValues getValues(IndexReader reader) throws IOException {
|
||||
final FieldCache.StringIndex sindex = FieldCache.DEFAULT.getStringIndex(reader, field);
|
||||
|
||||
final int arr[] = sindex.order;
|
||||
final int end = sindex.lookup.length;
|
||||
|
||||
return new DocValues(arr.length) {
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */
|
||||
public float floatVal(int doc) {
|
||||
return (float)(end - arr[doc]);
|
||||
}
|
||||
/* (non-Javadoc) @see org.apache.lucene.search.function.DocValues#intVal(int) */
|
||||
public int intVal(int doc) {
|
||||
return end - arr[doc];
|
||||
}
|
||||
/* (non-Javadoc) @see org.apache.lucene.search.function.DocValues#strVal(int) */
|
||||
public String strVal(int doc) {
|
||||
// the string value of the ordinal, not the string itself
|
||||
return Integer.toString(intVal(doc));
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */
|
||||
public String toString(int doc) {
|
||||
return description() + '=' + strVal(doc);
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */
|
||||
Object getInnerArray() {
|
||||
return arr;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see java.lang.Object#equals(java.lang.Object) */
|
||||
public boolean equals(Object o) {
|
||||
if (o.getClass() != ReverseOrdFieldSource.class) return false;
|
||||
ReverseOrdFieldSource other = (ReverseOrdFieldSource)o;
|
||||
return this.field.equals(other.field);
|
||||
}
|
||||
|
||||
private static final int hcode = ReverseOrdFieldSource.class.hashCode();
|
||||
|
||||
/*(non-Javadoc) @see java.lang.Object#hashCode() */
|
||||
public int hashCode() {
|
||||
return hcode + field.hashCode();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.search.function.DocValues;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Expert: obtains short field values from the
|
||||
* {@link org.apache.lucene.search.FieldCache FieldCache}
|
||||
* using <code>getShorts()</code> and makes those values
|
||||
* available as other numeric types, casting as needed.
|
||||
*
|
||||
* <p><font color="#FF0000">
|
||||
* WARNING: The status of the <b>search.function</b> package is experimental.
|
||||
* The APIs introduced here might change in the future and will not be
|
||||
* supported anymore in such a case.</font>
|
||||
*
|
||||
* @see org.apache.lucene.search.function.FieldCacheSource for requirements
|
||||
* on the field.
|
||||
*/
|
||||
public class ShortFieldSource extends FieldCacheSource {
|
||||
private FieldCache.ShortParser parser;
|
||||
|
||||
/**
|
||||
* Create a cached short field source with default string-to-short parser.
|
||||
*/
|
||||
public ShortFieldSource(String field) {
|
||||
this(field, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a cached short field source with a specific string-to-short parser.
|
||||
*/
|
||||
public ShortFieldSource(String field, FieldCache.ShortParser parser) {
|
||||
super(field);
|
||||
this.parser = parser;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.ValueSource#description() */
|
||||
public String description() {
|
||||
return "short(" + super.description() + ')';
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#getCachedValues(org.apache.lucene.search.FieldCache, java.lang.String, org.apache.lucene.index.IndexReader) */
|
||||
public DocValues getCachedFieldValues (FieldCache cache, String field, IndexReader reader) throws IOException {
|
||||
final short[] arr = (parser==null) ?
|
||||
cache.getShorts(reader, field) :
|
||||
cache.getShorts(reader, field, parser);
|
||||
return new DocValues(reader.maxDoc()) {
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#floatVal(int) */
|
||||
public float floatVal(int doc) {
|
||||
return (float) arr[doc];
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#intVal(int) */
|
||||
public int intVal(int doc) {
|
||||
return arr[doc];
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#toString(int) */
|
||||
public String toString(int doc) {
|
||||
return description() + '=' + intVal(doc);
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.DocValues#getInnerArray() */
|
||||
Object getInnerArray() {
|
||||
return arr;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceEquals(org.apache.lucene.search.function.FieldCacheSource) */
|
||||
public boolean cachedFieldSourceEquals(FieldCacheSource o) {
|
||||
if (o.getClass() != ShortFieldSource.class) {
|
||||
return false;
|
||||
}
|
||||
ShortFieldSource other = (ShortFieldSource)o;
|
||||
return this.parser==null ?
|
||||
other.parser==null :
|
||||
this.parser.getClass() == other.parser.getClass();
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.FieldCacheSource#cachedFieldSourceHashCode() */
|
||||
public int cachedFieldSourceHashCode() {
|
||||
return parser==null ?
|
||||
Short.class.hashCode() : parser.getClass().hashCode();
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,74 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.function.DocValues;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Expert: source of values for basic function queries.
|
||||
* <P>At its default/simplest form, values - one per doc - are used as the score of that doc.
|
||||
* <P>Values are instantiated as
|
||||
* {@link org.apache.lucene.search.function.DocValues DocValues} for a particular reader.
|
||||
* <P>ValueSource implementations differ in RAM requirements: it would always be a factor
|
||||
* of the number of documents, but for each document the number of bytes can be 1, 2, 4, or 8.
|
||||
*
|
||||
* <p><font color="#FF0000">
|
||||
* WARNING: The status of the <b>search.function</b> package is experimental.
|
||||
* The APIs introduced here might change in the future and will not be
|
||||
* supported anymore in such a case.</font>
|
||||
*
|
||||
* @author yonik
|
||||
*/
|
||||
public abstract class ValueSource implements Serializable {
|
||||
|
||||
/**
|
||||
* Return the DocValues used by the function query.
|
||||
* @param reader the IndexReader used to read these values.
|
||||
* If any caching is involved, that caching would also be IndexReader based.
|
||||
* @throws IOException for any error.
|
||||
*/
|
||||
public abstract DocValues getValues(IndexReader reader) throws IOException;
|
||||
|
||||
/**
|
||||
* description of field, used in explain()
|
||||
*/
|
||||
public abstract String description();
|
||||
|
||||
/* (non-Javadoc) @see java.lang.Object#toString() */
|
||||
public String toString() {
|
||||
return description();
|
||||
}
|
||||
|
||||
/**
|
||||
* Needed for possible caching of query results - used by {@link ValueSourceQuery#equals(Object)}.
|
||||
* @see Object#equals(Object)
|
||||
*/
|
||||
public abstract boolean equals(Object o);
|
||||
|
||||
/**
|
||||
* Needed for possible caching of query results - used by {@link ValueSourceQuery#hashCode()}.
|
||||
* @see Object#hashCode()
|
||||
*/
|
||||
public abstract int hashCode();
|
||||
|
||||
}
|
|
@ -0,0 +1,201 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Expert: A Query that sets the scores of document to the
|
||||
* values obtained from a {@link org.apache.lucene.search.function.ValueSource ValueSource}.
|
||||
* <p>
|
||||
* The value source can be based on a (cached) value of an indexd field, but it
|
||||
* can also be based on an external source, e.g. values read from an external database.
|
||||
* <p>
|
||||
* Score is set as: Score(doc,query) = query.getBoost()<sup>2</sup> * valueSource(doc).
|
||||
*
|
||||
* <p><font color="#FF0000">
|
||||
* WARNING: The status of the <b>search.function</b> package is experimental.
|
||||
* The APIs introduced here might change in the future and will not be
|
||||
* supported anymore in such a case.</font>
|
||||
*
|
||||
* @author yonik
|
||||
*/
|
||||
public class ValueSourceQuery extends Query {
|
||||
ValueSource valSrc;
|
||||
|
||||
/**
|
||||
* Create a value source query
|
||||
* @param valSrc provides the values defines the function to be used for scoring
|
||||
*/
|
||||
public ValueSourceQuery(ValueSource valSrc) {
|
||||
this.valSrc=valSrc;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Query#rewrite(org.apache.lucene.index.IndexReader) */
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
return this;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Query#extractTerms(java.util.Set) */
|
||||
public void extractTerms(Set terms) {
|
||||
// no terms involved here
|
||||
}
|
||||
|
||||
private class ValueSourceWeight implements Weight {
|
||||
Searcher searcher;
|
||||
float queryNorm;
|
||||
float queryWeight;
|
||||
|
||||
public ValueSourceWeight(Searcher searcher) {
|
||||
this.searcher = searcher;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Weight#getQuery() */
|
||||
public Query getQuery() {
|
||||
return ValueSourceQuery.this;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Weight#getValue() */
|
||||
public float getValue() {
|
||||
return queryWeight;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Weight#sumOfSquaredWeights() */
|
||||
public float sumOfSquaredWeights() throws IOException {
|
||||
queryWeight = getBoost();
|
||||
return queryWeight * queryWeight;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Weight#normalize(float) */
|
||||
public void normalize(float norm) {
|
||||
this.queryNorm = norm;
|
||||
queryWeight *= this.queryNorm;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Weight#scorer(org.apache.lucene.index.IndexReader) */
|
||||
public Scorer scorer(IndexReader reader) throws IOException {
|
||||
return new ValueSourceScorer(getSimilarity(searcher), reader, this);
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Weight#explain(org.apache.lucene.index.IndexReader, int) */
|
||||
public Explanation explain(IndexReader reader, int doc) throws IOException {
|
||||
return scorer(reader).explain(doc);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A scorer that (simply) matches all documents, and scores each document with
|
||||
* the value of the value soure in effect. As an example, if the value source
|
||||
* is a (cached) field source, then value of that field in that document will
|
||||
* be used. (assuming field is indexed for this doc, with a single token.)
|
||||
*/
|
||||
private class ValueSourceScorer extends Scorer {
|
||||
private final IndexReader reader;
|
||||
private final ValueSourceWeight weight;
|
||||
private final int maxDoc;
|
||||
private final float qWeight;
|
||||
private int doc=-1;
|
||||
private final DocValues vals;
|
||||
|
||||
// constructor
|
||||
private ValueSourceScorer(Similarity similarity, IndexReader reader, ValueSourceWeight w) throws IOException {
|
||||
super(similarity);
|
||||
this.weight = w;
|
||||
this.qWeight = w.getValue();
|
||||
this.reader = reader;
|
||||
this.maxDoc = reader.maxDoc();
|
||||
// this is when/where the values are first created.
|
||||
vals = valSrc.getValues(reader);
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#next() */
|
||||
public boolean next() throws IOException {
|
||||
for(;;) {
|
||||
++doc;
|
||||
if (doc>=maxDoc) {
|
||||
return false;
|
||||
}
|
||||
if (reader.isDeleted(doc)) {
|
||||
continue;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#doc()
|
||||
*/
|
||||
public int doc() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#score() */
|
||||
public float score() throws IOException {
|
||||
return qWeight * vals.floatVal(doc);
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#skipTo(int) */
|
||||
public boolean skipTo(int target) throws IOException {
|
||||
doc=target-1;
|
||||
return next();
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Scorer#explain(int) */
|
||||
public Explanation explain(int doc) throws IOException {
|
||||
float sc = qWeight * vals.floatVal(doc);
|
||||
|
||||
Explanation result = new ComplexExplanation(
|
||||
true, sc, ValueSourceQuery.this.toString() + ", product of:");
|
||||
|
||||
result.addDetail(vals.explain(doc));
|
||||
result.addDetail(new Explanation(getBoost(), "boost"));
|
||||
result.addDetail(new Explanation(weight.queryNorm,"queryNorm"));
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.Query#createWeight(org.apache.lucene.search.Searcher) */
|
||||
protected Weight createWeight(Searcher searcher) {
|
||||
return new ValueSourceQuery.ValueSourceWeight(searcher);
|
||||
}
|
||||
|
||||
/* (non-Javadoc) @see org.apache.lucene.search.Query#toString(java.lang.String) */
|
||||
public String toString(String field) {
|
||||
return valSrc.toString() + ToStringUtils.boost(getBoost());
|
||||
}
|
||||
|
||||
/** Returns true if <code>o</code> is equal to this. */
|
||||
public boolean equals(Object o) {
|
||||
if (getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
ValueSourceQuery other = (ValueSourceQuery)o;
|
||||
return this.getBoost() == other.getBoost()
|
||||
&& this.valSrc.equals(other.valSrc);
|
||||
}
|
||||
|
||||
/** Returns a hash code value for this object. */
|
||||
public int hashCode() {
|
||||
return (getClass().hashCode() + valSrc.hashCode()) ^ Float.floatToIntBits(getBoost());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,197 @@
|
|||
<HTML>
|
||||
<!--
|
||||
/**
|
||||
* Copyright 2005 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
-->
|
||||
<HEAD>
|
||||
<TITLE>org.apache.lucene.search.function</TITLE>
|
||||
</HEAD>
|
||||
<BODY>
|
||||
<DIV>
|
||||
Programmatic control over documents scores.
|
||||
</DIV>
|
||||
<DIV>
|
||||
The <code>function</code> package provides tight control over documents scores.
|
||||
</DIV>
|
||||
<DIV>
|
||||
<font color="#FF0000">
|
||||
WARNING: The status of the <b>search.function</b> package is experimental. The APIs
|
||||
introduced here might change in the future and will not be supported anymore
|
||||
in such a case.
|
||||
</font>
|
||||
</DIV>
|
||||
<DIV>
|
||||
Two types of queries are available in this package:
|
||||
</DIV>
|
||||
<DIV>
|
||||
<ol>
|
||||
<li>
|
||||
<b>Custom Score queries</b> - allowing to set the score
|
||||
of a matching document as a mathematical expression over scores
|
||||
of that document by contained (sub) queries.
|
||||
</li>
|
||||
<li>
|
||||
<b>Field score queries</b> - allowing to base the score of a
|
||||
document on <b>numeric values</b> of <b>indexed fields</b>.
|
||||
</li>
|
||||
</ol>
|
||||
</DIV>
|
||||
<DIV> </DIV>
|
||||
<DIV>
|
||||
<b>Some possible uses of these queries:</b>
|
||||
</DIV>
|
||||
<DIV>
|
||||
<ol>
|
||||
<li>
|
||||
Normalizing the document scores by values indexed in a special field -
|
||||
for instance, experimenting with a different doc length normalization.
|
||||
</li>
|
||||
<li>
|
||||
Introducing some static scoring element, to the score of a document, -
|
||||
for instance using some topological attribute of the links to/from a document.
|
||||
</li>
|
||||
<li>
|
||||
Computing the score of a matching document as an arbitrary odd function of
|
||||
its score by a certain query.
|
||||
</li>
|
||||
</ol>
|
||||
</DIV>
|
||||
<DIV>
|
||||
<b>Performance and Quality Considerations:</b>
|
||||
</DIV>
|
||||
<DIV>
|
||||
<ol>
|
||||
<li>
|
||||
When scoring by values of indexed fields,
|
||||
these values are loaded into memory.
|
||||
Unlike the regular scoring, where the required information is read from
|
||||
disk as necessary, here field values are loaded once and cached by Lucene in memory
|
||||
for further use, anticipating reuse by further queries. While all this is carefully
|
||||
cached with performance in mind, it is recommended to
|
||||
use these features only when the default Lucene scoring does
|
||||
not match your "special" application needs.
|
||||
</li>
|
||||
<li>
|
||||
Use only with carefully selected fields, because in most cases,
|
||||
search quality with regular Lucene scoring
|
||||
would outperform that of scoring by field values.
|
||||
</li>
|
||||
<li>
|
||||
Values of fields used for scoring should match.
|
||||
Do not apply on a field containing arbitrary (long) text.
|
||||
Do not mix values in the same field if that field is used for scoring.
|
||||
</li>
|
||||
<li>
|
||||
Smaller (shorter) field tokens means less RAM (something always desired).
|
||||
When using <a href=FieldScoreQuery.html>FieldScoreQuery</a>,
|
||||
select the shortest <a href=FieldScoreQuery.html#Type>FieldScoreQuery.Type</a>
|
||||
that is sufficient for the used field values.
|
||||
</li>
|
||||
<li>
|
||||
Reusing IndexReaders/IndexSearchers is essential, because the caching of field tokens
|
||||
is based on an IndexReader. Whenever a new IndexReader is used, values currently in the cache
|
||||
cannot be used and new values must be loaded from disk. So replace/refresh readers/searchers in
|
||||
a controlled manner.
|
||||
</li>
|
||||
</ol>
|
||||
</DIV>
|
||||
<DIV>
|
||||
<b>History and Credits:</b>
|
||||
<ul>
|
||||
<li>
|
||||
A large part of the code of this package was originated from Yonik's FunctionQuery code that was
|
||||
imported from <a href="http://lucene.apache.org/solr">Solr</a>
|
||||
(see <a href="http://issues.apache.org/jira/browse/LUCENE-446">LUCENE-446</a>).
|
||||
</li>
|
||||
<li>
|
||||
The idea behind CustomScoreQurey is borrowed from
|
||||
the "Easily create queries that transform sub-query scores arbitrarily" contribution by Mike Klaas
|
||||
(see <a href="http://issues.apache.org/jira/browse/LUCENE-850">LUCENE-850</a>)
|
||||
though the implementation and API here are different.
|
||||
</li>
|
||||
</ul>
|
||||
</DIV>
|
||||
<DIV>
|
||||
<b>Code sample:</b>
|
||||
<P>
|
||||
Note: code snippets here should work, but they were never really compiled... so,
|
||||
tests sources under TestCustomScoreQuery, TestFieldScoreQuery and TestOrdValues
|
||||
may also be useful.
|
||||
<ol>
|
||||
<li>
|
||||
Using field (byte) values to as scores:
|
||||
<p>
|
||||
Indexing:
|
||||
<pre>
|
||||
f = new Field("score", "7", Field.Store.NO, Field.Index.UN_TOKENIZED);
|
||||
f.setOmitNorms(true);
|
||||
d1.add(f);
|
||||
</pre>
|
||||
<p>
|
||||
Search:
|
||||
<pre>
|
||||
Query q = new FieldScoreQuery("score", FieldScoreQuery.Type.BYTE);
|
||||
</pre>
|
||||
Document d1 above would get a score of 7.
|
||||
</li>
|
||||
<p>
|
||||
<li>
|
||||
Manipulating scores
|
||||
<p>
|
||||
Dividing the original score of each document by a square root of its docid
|
||||
(just to demonstrate what it takes to manipulate scores this way)
|
||||
<pre>
|
||||
Query q = queryParser.parse("my query text");
|
||||
CustomScoreQuery customQ = new CustomScoreQuery(q) {
|
||||
public float customScore(int doc, float subQueryScore, float valSrcScore) {
|
||||
return subQueryScore / Math.sqrt(docid);
|
||||
}
|
||||
};
|
||||
</pre>
|
||||
<p>
|
||||
For more informative debug info on the custom query, also override the name() method:
|
||||
<pre>
|
||||
CustomScoreQuery customQ = new CustomScoreQuery(q) {
|
||||
public float customScore(int doc, float subQueryScore, float valSrcScore) {
|
||||
return subQueryScore / Math.sqrt(docid);
|
||||
}
|
||||
public String name() {
|
||||
return "1/sqrt(docid)";
|
||||
}
|
||||
};
|
||||
</pre>
|
||||
<p>
|
||||
Taking the square root of the original score and multiplying it by a "short field driven score", ie, the
|
||||
short value that was indexed for the scored doc in a certain field:
|
||||
<pre>
|
||||
Query q = queryParser.parse("my query text");
|
||||
FieldScoreQuery qf = new FieldScoreQuery("shortScore", FieldScoreQuery.Type.SHORT);
|
||||
CustomScoreQuery customQ = new CustomScoreQuery(q,qf) {
|
||||
public float customScore(int doc, float subQueryScore, float valSrcScore) {
|
||||
return Math.sqrt(subQueryScore) * valSrcScore;
|
||||
}
|
||||
public String name() {
|
||||
return "shortVal*sqrt(score)";
|
||||
}
|
||||
};
|
||||
</pre>
|
||||
|
||||
</li>
|
||||
</ol>
|
||||
</DIV>
|
||||
</BODY>
|
||||
</HTML>
|
|
@ -18,9 +18,11 @@ package org.apache.lucene.util;
|
|||
*/
|
||||
|
||||
public class ToStringUtils {
|
||||
/** for printing boost only if not 1.0 */
|
||||
public static String boost(float boost) {
|
||||
if (boost != 1.0f) {
|
||||
return "^" + Float.toString(boost);
|
||||
} else return "";
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,152 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* Setup for function tests
|
||||
*/
|
||||
public abstract class FunctionTestSetup extends TestCase {
|
||||
|
||||
/**
|
||||
* Actual score computation order is slightly different than assumptios
|
||||
* this allows for a small amount of variation
|
||||
*/
|
||||
public static float TEST_SCORE_TOLERANCE_DELTA = 0.00005f;
|
||||
|
||||
protected static final boolean DBG = false; // change to true for logging to print
|
||||
|
||||
protected static final int N_DOCS = 17; // select a primary number > 2
|
||||
|
||||
protected static final String ID_FIELD = "id";
|
||||
protected static final String TEXT_FIELD = "text";
|
||||
protected static final String INT_FIELD = "iii";
|
||||
protected static final String FLOAT_FIELD = "fff";
|
||||
|
||||
private static final String DOC_TEXT_LINES[] = {
|
||||
// from a public first aid info at http://firstaid.ie.eu.org
|
||||
"Well it may be a little dramatic but sometimes it true. ",
|
||||
"If you call the emergency medical services to an incident, ",
|
||||
"your actions have started the chain of survival. ",
|
||||
"You have acted to help someone you may not even know. ",
|
||||
"First aid is helping, first aid is making that call, ",
|
||||
"putting a Band-Aid on a small wound, controlling bleeding in large ",
|
||||
"wounds or providing CPR for a collapsed person whose not breathing ",
|
||||
"and heart has stopped beating. You can help yourself, your loved ",
|
||||
"ones and the stranger whose life may depend on you being in the ",
|
||||
"right place at the right time with the right knowledge.",
|
||||
};
|
||||
|
||||
protected Directory dir;
|
||||
protected Analyzer anlzr;
|
||||
|
||||
/* @override constructor */
|
||||
public FunctionTestSetup(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
/* @override */
|
||||
protected void tearDown() throws Exception {
|
||||
super.tearDown();
|
||||
dir = null;
|
||||
anlzr = null;
|
||||
}
|
||||
|
||||
/* @override */
|
||||
protected void setUp() throws Exception {
|
||||
// prepare a small index with just a few documents.
|
||||
super.setUp();
|
||||
dir = new RAMDirectory();
|
||||
anlzr = new StandardAnalyzer();
|
||||
IndexWriter iw = new IndexWriter(dir,anlzr);
|
||||
// add docs not exactly in natural ID order, to verify we do check the order of docs by scores
|
||||
int remaining = N_DOCS;
|
||||
boolean done[] = new boolean[N_DOCS];
|
||||
int i = 0;
|
||||
while (remaining>0) {
|
||||
if (done[i]) {
|
||||
throw new Exception("to set this test correctly N_DOCS="+N_DOCS+" must be primary and greater than 2!");
|
||||
}
|
||||
addDoc(iw,i);
|
||||
done[i] = true;
|
||||
i = (i+4)%N_DOCS;
|
||||
remaining --;
|
||||
}
|
||||
iw.close();
|
||||
}
|
||||
|
||||
private void addDoc(IndexWriter iw, int i) throws Exception {
|
||||
Document d = new Document();
|
||||
Fieldable f;
|
||||
int scoreAndID = i+1;
|
||||
|
||||
f = new Field(ID_FIELD,id2String(scoreAndID),Field.Store.YES,Field.Index.UN_TOKENIZED); // for debug purposes
|
||||
f.setOmitNorms(true);
|
||||
d.add(f);
|
||||
|
||||
f = new Field(TEXT_FIELD,"text of doc"+scoreAndID+textLine(i),Field.Store.NO,Field.Index.TOKENIZED); // for regular search
|
||||
f.setOmitNorms(true);
|
||||
d.add(f);
|
||||
|
||||
f = new Field(INT_FIELD,""+scoreAndID,Field.Store.NO,Field.Index.UN_TOKENIZED); // for function scoring
|
||||
f.setOmitNorms(true);
|
||||
d.add(f);
|
||||
|
||||
f = new Field(FLOAT_FIELD,scoreAndID+".000",Field.Store.NO,Field.Index.UN_TOKENIZED); // for function scoring
|
||||
f.setOmitNorms(true);
|
||||
d.add(f);
|
||||
|
||||
iw.addDocument(d);
|
||||
log("added: "+d);
|
||||
}
|
||||
|
||||
// 17 --> ID00017
|
||||
protected String id2String(int scoreAndID) {
|
||||
String s = "000000000"+scoreAndID;
|
||||
int n = (""+N_DOCS).length() + 3;
|
||||
int k = s.length() - n;
|
||||
return "ID"+s.substring(k);
|
||||
}
|
||||
|
||||
// some text line for regular search
|
||||
private String textLine(int docNum) {
|
||||
return DOC_TEXT_LINES[docNum % DOC_TEXT_LINES.length];
|
||||
}
|
||||
|
||||
// extract expected doc score from its ID Field: "ID7" --> 7.0
|
||||
protected float expectedFieldScore(String docIDFieldVal) {
|
||||
return Float.parseFloat(docIDFieldVal.substring(2));
|
||||
}
|
||||
|
||||
// debug messages (change DBG to true for anything to print)
|
||||
protected void log (Object o) {
|
||||
if (DBG) {
|
||||
System.out.println(o.toString());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,240 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryUtils;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
|
||||
/**
|
||||
* Test CustomScoreQuery search.
|
||||
*/
|
||||
public class TestCustomScoreQuery extends FunctionTestSetup {
|
||||
|
||||
/* @override constructor */
|
||||
public TestCustomScoreQuery(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
/* @override */
|
||||
protected void tearDown() throws Exception {
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/* @override */
|
||||
protected void setUp() throws Exception {
|
||||
// prepare a small index with just a few documents.
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
/** Test that CustomScoreQuery of Type.BYTE returns the expected scores. */
|
||||
public void testCustomScoreByte () throws CorruptIndexException, Exception {
|
||||
// INT field values are small enough to be parsed as byte
|
||||
doTestCustomScore(INT_FIELD,FieldScoreQuery.Type.BYTE,1.0);
|
||||
doTestCustomScore(INT_FIELD,FieldScoreQuery.Type.BYTE,2.0);
|
||||
}
|
||||
|
||||
/** Test that CustomScoreQuery of Type.SHORT returns the expected scores. */
|
||||
public void testCustomScoreShort () throws CorruptIndexException, Exception {
|
||||
// INT field values are small enough to be parsed as short
|
||||
doTestCustomScore(INT_FIELD,FieldScoreQuery.Type.SHORT,1.0);
|
||||
doTestCustomScore(INT_FIELD,FieldScoreQuery.Type.SHORT,3.0);
|
||||
}
|
||||
|
||||
/** Test that CustomScoreQuery of Type.INT returns the expected scores. */
|
||||
public void testCustomScoreInt () throws CorruptIndexException, Exception {
|
||||
doTestCustomScore(INT_FIELD,FieldScoreQuery.Type.INT,1.0);
|
||||
doTestCustomScore(INT_FIELD,FieldScoreQuery.Type.INT,4.0);
|
||||
}
|
||||
|
||||
/** Test that CustomScoreQuery of Type.FLOAT returns the expected scores. */
|
||||
public void testCustomScoreFloat () throws CorruptIndexException, Exception {
|
||||
// INT field can be parsed as float
|
||||
doTestCustomScore(INT_FIELD,FieldScoreQuery.Type.FLOAT,1.0);
|
||||
doTestCustomScore(INT_FIELD,FieldScoreQuery.Type.FLOAT,5.0);
|
||||
// same values, but in flot format
|
||||
doTestCustomScore(FLOAT_FIELD,FieldScoreQuery.Type.FLOAT,1.0);
|
||||
doTestCustomScore(FLOAT_FIELD,FieldScoreQuery.Type.FLOAT,6.0);
|
||||
}
|
||||
|
||||
// Test that FieldScoreQuery returns docs with expected score.
|
||||
private void doTestCustomScore (String field, FieldScoreQuery.Type tp, double dboost) throws CorruptIndexException, Exception {
|
||||
float boost = (float) dboost;
|
||||
IndexSearcher s = new IndexSearcher(dir);
|
||||
FieldScoreQuery qValSrc = new FieldScoreQuery(field,tp); // a query that would score by the field
|
||||
QueryParser qp = new QueryParser(TEXT_FIELD,anlzr);
|
||||
String qtxt = "bleeding person chain knowledge"; // from the doc texts in FunctionQuerySetup.
|
||||
|
||||
// regular (boolean) query.
|
||||
Query q1 = qp.parse(qtxt);
|
||||
log(q1);
|
||||
|
||||
// custom query, that should score the same as q1.
|
||||
CustomScoreQuery q2CustomNeutral = new CustomScoreQuery(q1);
|
||||
q2CustomNeutral.setBoost(boost);
|
||||
log(q2CustomNeutral);
|
||||
|
||||
// custom query, that should (by default) multiply the scores of q1 by that of the field
|
||||
CustomScoreQuery q3CustomMul = new CustomScoreQuery(q1,qValSrc);
|
||||
q3CustomMul.setStrict(true);
|
||||
q3CustomMul.setBoost(boost);
|
||||
log(q3CustomMul);
|
||||
|
||||
// custom query, that should add the scores of q1 to that of the field
|
||||
CustomScoreQuery q4CustomAdd = new CustomScoreQuery(q1,qValSrc) {
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.CustomScoreQuery#name() */
|
||||
public String name() {
|
||||
return "customAdd";
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.CustomScoreQuery#customScore(int, float, float) */
|
||||
public float customScore(int doc, float subQueryScore, float valSrcScore) {
|
||||
return subQueryScore + valSrcScore;
|
||||
}
|
||||
/* (non-Javadoc)@see org.apache.lucene.search.function.CustomScoreQuery#customExplain(int, org.apache.lucene.search.Explanation, org.apache.lucene.search.Explanation)*/
|
||||
public Explanation customExplain(int doc, Explanation subQueryExpl, Explanation valSrcExpl) {
|
||||
float valSrcScore = valSrcExpl==null ? 0 : valSrcExpl.getValue();
|
||||
Explanation exp = new Explanation( valSrcScore + subQueryExpl.getValue(), "custom score: sum of:");
|
||||
exp.addDetail(subQueryExpl);
|
||||
if (valSrcExpl != null) {
|
||||
exp.addDetail(valSrcExpl);
|
||||
}
|
||||
return exp;
|
||||
}
|
||||
};
|
||||
q4CustomAdd.setStrict(true);
|
||||
q4CustomAdd.setBoost(boost);
|
||||
log(q4CustomAdd);
|
||||
|
||||
// custom query, that multiplies and adds the field score to that of q1
|
||||
CustomScoreQuery q5CustomMulAdd = new CustomScoreQuery(q1,qValSrc) {
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.CustomScoreQuery#name() */
|
||||
public String name() {
|
||||
return "customMulAdd";
|
||||
}
|
||||
/*(non-Javadoc) @see org.apache.lucene.search.function.CustomScoreQuery#customScore(int, float, float) */
|
||||
public float customScore(int doc, float subQueryScore, float valSrcScore) {
|
||||
return (1 + subQueryScore) * valSrcScore;
|
||||
}
|
||||
/* (non-Javadoc)@see org.apache.lucene.search.function.CustomScoreQuery#customExplain(int, org.apache.lucene.search.Explanation, org.apache.lucene.search.Explanation)*/
|
||||
public Explanation customExplain(int doc, Explanation subQueryExpl, Explanation valSrcExpl) {
|
||||
Explanation exp = new Explanation(1 + subQueryExpl.getValue(), "sum of:");
|
||||
exp.addDetail(subQueryExpl);
|
||||
exp.addDetail(new Explanation(1,"const 1"));
|
||||
if (valSrcExpl == null) {
|
||||
exp.setDescription("CustomMulAdd, sum of:");
|
||||
return exp;
|
||||
}
|
||||
Explanation exp2 = new Explanation(valSrcExpl.getValue() * exp.getValue(), "custom score: product of:");
|
||||
exp2.addDetail(valSrcExpl);
|
||||
exp2.addDetail(exp);
|
||||
return exp2;
|
||||
}
|
||||
};
|
||||
q5CustomMulAdd.setStrict(true);
|
||||
q5CustomMulAdd.setBoost(boost);
|
||||
log(q5CustomMulAdd);
|
||||
|
||||
// do al the searches
|
||||
TopDocs td1 = s.search(q1,null,1000);
|
||||
TopDocs td2CustomNeutral = s.search(q2CustomNeutral,null,1000);
|
||||
TopDocs td3CustomMul = s.search(q3CustomMul,null,1000);
|
||||
TopDocs td4CustomAdd = s.search(q4CustomAdd,null,1000);
|
||||
TopDocs td5CustomMulAdd = s.search(q5CustomMulAdd,null,1000);
|
||||
|
||||
// put results in map so we can verify the scores although they have changed
|
||||
HashMap h1 = topDocsToMap(td1);
|
||||
HashMap h2CustomNeutral = topDocsToMap(td2CustomNeutral);
|
||||
HashMap h3CustomMul = topDocsToMap(td3CustomMul);
|
||||
HashMap h4CustomAdd = topDocsToMap(td4CustomAdd);
|
||||
HashMap h5CustomMulAdd = topDocsToMap(td5CustomMulAdd);
|
||||
|
||||
verifyResults(boost, s,
|
||||
h1, h2CustomNeutral, h3CustomMul, h4CustomAdd, h5CustomMulAdd,
|
||||
q1, q2CustomNeutral, q3CustomMul, q4CustomAdd, q5CustomMulAdd);
|
||||
}
|
||||
|
||||
// verify results are as expected.
|
||||
private void verifyResults(float boost, IndexSearcher s,
|
||||
HashMap h1, HashMap h2customNeutral, HashMap h3CustomMul, HashMap h4CustomAdd, HashMap h5CustomMulAdd,
|
||||
Query q1, Query q2, Query q3, Query q4, Query q5) throws Exception {
|
||||
|
||||
// verify numbers of matches
|
||||
log("#hits = "+h1.size());
|
||||
assertEquals("queries should have same #hits",h1.size(),h2customNeutral.size());
|
||||
assertEquals("queries should have same #hits",h1.size(),h3CustomMul.size());
|
||||
assertEquals("queries should have same #hits",h1.size(),h4CustomAdd.size());
|
||||
assertEquals("queries should have same #hits",h1.size(),h5CustomMulAdd.size());
|
||||
|
||||
// verify scores ratios
|
||||
for (Iterator it = h1.keySet().iterator(); it.hasNext();) {
|
||||
Integer x = (Integer) it.next();
|
||||
|
||||
int doc = x.intValue();
|
||||
log("doc = "+doc);
|
||||
|
||||
float fieldScore = expectedFieldScore(s.getIndexReader().document(doc).get(ID_FIELD));
|
||||
log("fieldScore = "+fieldScore);
|
||||
assertTrue("fieldScore should not be 0",fieldScore>0);
|
||||
|
||||
float score1 = ((Float)h1.get(x)).floatValue();
|
||||
logResult("score1=", s, q1, doc, score1);
|
||||
|
||||
float score2 = ((Float)h2customNeutral.get(x)).floatValue();
|
||||
logResult("score2=", s, q2, doc, score2);
|
||||
assertEquals("same score (just boosted) for neutral", boost * score1, score2, TEST_SCORE_TOLERANCE_DELTA);
|
||||
|
||||
float score3 = ((Float)h3CustomMul.get(x)).floatValue();
|
||||
logResult("score3=", s, q3, doc, score3);
|
||||
assertEquals("new score for custom mul", boost * fieldScore * score1, score3, TEST_SCORE_TOLERANCE_DELTA);
|
||||
|
||||
float score4 = ((Float)h4CustomAdd.get(x)).floatValue();
|
||||
logResult("score4=", s, q4, doc, score4);
|
||||
assertEquals("new score for custom add", boost * (fieldScore + score1), score4, TEST_SCORE_TOLERANCE_DELTA);
|
||||
|
||||
float score5 = ((Float)h5CustomMulAdd.get(x)).floatValue();
|
||||
logResult("score5=", s, q5, doc, score5);
|
||||
assertEquals("new score for custom mul add", boost * fieldScore * (score1 + 1), score5, TEST_SCORE_TOLERANCE_DELTA);
|
||||
}
|
||||
}
|
||||
|
||||
private void logResult(String msg, IndexSearcher s, Query q, int doc, float score1) throws IOException {
|
||||
QueryUtils.check(q,s);
|
||||
log(msg+" "+score1);
|
||||
log("Explain by: "+q);
|
||||
log(s.explain(q,doc));
|
||||
}
|
||||
|
||||
// since custom scoring modifies the order of docs, map results
|
||||
// by doc ids so that we can later compare/verify them
|
||||
private HashMap topDocsToMap(TopDocs td) {
|
||||
HashMap h = new HashMap();
|
||||
for (int i=0; i<td.totalHits; i++) {
|
||||
h.put(new Integer(td.scoreDocs[i].doc), new Float(td.scoreDocs[i].score));
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,203 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.search.Hits;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryUtils;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
|
||||
/**
|
||||
* Test FieldScoreQuery search.
|
||||
* <p>
|
||||
* Tests here create an index with a few documents, each having
|
||||
* an int value indexed field and a float value indexed field.
|
||||
* The values of these fields are later used for scoring.
|
||||
* <p>
|
||||
* The rank tests use Hits to verify that docs are ordered (by score) as expected.
|
||||
* <p>
|
||||
* The exact score tests use TopDocs top to verify the exact score.
|
||||
*/
|
||||
public class TestFieldScoreQuery extends FunctionTestSetup {
|
||||
|
||||
/* @override constructor */
|
||||
public TestFieldScoreQuery(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
/* @override */
|
||||
protected void tearDown() throws Exception {
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/* @override */
|
||||
protected void setUp() throws Exception {
|
||||
// prepare a small index with just a few documents.
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.BYTE returns docs in expected order. */
|
||||
public void testRankByte () throws CorruptIndexException, Exception {
|
||||
// INT field values are small enough to be parsed as byte
|
||||
doTestRank(INT_FIELD,FieldScoreQuery.Type.BYTE);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.SHORT returns docs in expected order. */
|
||||
public void testRankShort () throws CorruptIndexException, Exception {
|
||||
// INT field values are small enough to be parsed as short
|
||||
doTestRank(INT_FIELD,FieldScoreQuery.Type.SHORT);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.INT returns docs in expected order. */
|
||||
public void testRankInt () throws CorruptIndexException, Exception {
|
||||
doTestRank(INT_FIELD,FieldScoreQuery.Type.INT);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.FLOAT returns docs in expected order. */
|
||||
public void testRankFloat () throws CorruptIndexException, Exception {
|
||||
// INT field can be parsed as float
|
||||
doTestRank(INT_FIELD,FieldScoreQuery.Type.FLOAT);
|
||||
// same values, but in flot format
|
||||
doTestRank(FLOAT_FIELD,FieldScoreQuery.Type.FLOAT);
|
||||
}
|
||||
|
||||
// Test that FieldScoreQuery returns docs in expected order.
|
||||
private void doTestRank (String field, FieldScoreQuery.Type tp) throws CorruptIndexException, Exception {
|
||||
IndexSearcher s = new IndexSearcher(dir);
|
||||
Query q = new FieldScoreQuery(field,tp);
|
||||
log("test: "+q);
|
||||
QueryUtils.check(q,s);
|
||||
Hits h = s.search(q);
|
||||
assertEquals("All docs should be matched!",N_DOCS,h.length());
|
||||
String prevID = "ID"+(N_DOCS+1); // greater than all ids of docs in this test
|
||||
for (int i=0; i<h.length(); i++) {
|
||||
String resID = h.doc(i).get(ID_FIELD);
|
||||
log(i+". score="+h.score(i)+" - "+resID);
|
||||
log(s.explain(q,h.id(i)));
|
||||
assertTrue("res id "+resID+" should be < prev res id "+prevID, resID.compareTo(prevID)<0);
|
||||
prevID = resID;
|
||||
}
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.BYTE returns the expected scores. */
|
||||
public void testExactScoreByte () throws CorruptIndexException, Exception {
|
||||
// INT field values are small enough to be parsed as byte
|
||||
doTestExactScore(INT_FIELD,FieldScoreQuery.Type.BYTE);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.SHORT returns the expected scores. */
|
||||
public void testExactScoreShort () throws CorruptIndexException, Exception {
|
||||
// INT field values are small enough to be parsed as short
|
||||
doTestExactScore(INT_FIELD,FieldScoreQuery.Type.SHORT);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.INT returns the expected scores. */
|
||||
public void testExactScoreInt () throws CorruptIndexException, Exception {
|
||||
doTestExactScore(INT_FIELD,FieldScoreQuery.Type.INT);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.FLOAT returns the expected scores. */
|
||||
public void testExactScoreFloat () throws CorruptIndexException, Exception {
|
||||
// INT field can be parsed as float
|
||||
doTestExactScore(INT_FIELD,FieldScoreQuery.Type.FLOAT);
|
||||
// same values, but in flot format
|
||||
doTestExactScore(FLOAT_FIELD,FieldScoreQuery.Type.FLOAT);
|
||||
}
|
||||
|
||||
// Test that FieldScoreQuery returns docs with expected score.
|
||||
private void doTestExactScore (String field, FieldScoreQuery.Type tp) throws CorruptIndexException, Exception {
|
||||
IndexSearcher s = new IndexSearcher(dir);
|
||||
Query q = new FieldScoreQuery(field,tp);
|
||||
TopDocs td = s.search(q,null,1000);
|
||||
assertEquals("All docs should be matched!",N_DOCS,td.totalHits);
|
||||
ScoreDoc sd[] = td.scoreDocs;
|
||||
for (int i=0; i<sd.length; i++) {
|
||||
float score = sd[i].score;
|
||||
log(s.explain(q,sd[i].doc));
|
||||
String id = s.getIndexReader().document(sd[i].doc).get(ID_FIELD);
|
||||
float expectedScore = expectedFieldScore(id); // "ID7" --> 7.0
|
||||
assertEquals("score of "+id+" shuould be "+expectedScore+" != "+score, expectedScore, score, TEST_SCORE_TOLERANCE_DELTA);
|
||||
}
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.BYTE caches/reuses loaded values and consumes the proper RAM resources. */
|
||||
public void testCachingByte () throws CorruptIndexException, Exception {
|
||||
// INT field values are small enough to be parsed as byte
|
||||
doTestCaching(INT_FIELD,FieldScoreQuery.Type.BYTE);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.SHORT caches/reuses loaded values and consumes the proper RAM resources. */
|
||||
public void testCachingShort () throws CorruptIndexException, Exception {
|
||||
// INT field values are small enough to be parsed as short
|
||||
doTestCaching(INT_FIELD,FieldScoreQuery.Type.SHORT);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.INT caches/reuses loaded values and consumes the proper RAM resources. */
|
||||
public void testCachingInt () throws CorruptIndexException, Exception {
|
||||
doTestCaching(INT_FIELD,FieldScoreQuery.Type.INT);
|
||||
}
|
||||
|
||||
/** Test that FieldScoreQuery of Type.FLOAT caches/reuses loaded values and consumes the proper RAM resources. */
|
||||
public void testCachingFloat () throws CorruptIndexException, Exception {
|
||||
// INT field values can be parsed as float
|
||||
doTestCaching(INT_FIELD,FieldScoreQuery.Type.FLOAT);
|
||||
// same values, but in flot format
|
||||
doTestCaching(FLOAT_FIELD,FieldScoreQuery.Type.FLOAT);
|
||||
}
|
||||
|
||||
// Test that values loaded for FieldScoreQuery are cached properly and consumes the proper RAM resources.
|
||||
private void doTestCaching (String field, FieldScoreQuery.Type tp) throws CorruptIndexException, Exception {
|
||||
// prepare expected array types for comparison
|
||||
HashMap expectedArrayTypes = new HashMap();
|
||||
expectedArrayTypes.put(FieldScoreQuery.Type.BYTE, new byte[0]);
|
||||
expectedArrayTypes.put(FieldScoreQuery.Type.SHORT, new short[0]);
|
||||
expectedArrayTypes.put(FieldScoreQuery.Type.INT, new int[0]);
|
||||
expectedArrayTypes.put(FieldScoreQuery.Type.FLOAT, new float[0]);
|
||||
|
||||
IndexSearcher s = new IndexSearcher(dir);
|
||||
Object innerArray = null;
|
||||
|
||||
for (int i=0; i<10; i++) {
|
||||
FieldScoreQuery q = new FieldScoreQuery(field,tp);
|
||||
Hits h = s.search(q);
|
||||
assertEquals("All docs should be matched!",N_DOCS,h.length());
|
||||
if (i==0) {
|
||||
innerArray = q.valSrc.getValues(s.getIndexReader()).getInnerArray();
|
||||
log(i+". compare: "+innerArray.getClass()+" to "+expectedArrayTypes.get(tp).getClass());
|
||||
assertEquals("field values should be cached in the correct array type!", innerArray.getClass(),expectedArrayTypes.get(tp).getClass());
|
||||
} else {
|
||||
log(i+". compare: "+innerArray+" to "+q.valSrc.getValues(s.getIndexReader()).getInnerArray());
|
||||
assertSame("field values should be cached and reused!", innerArray, q.valSrc.getValues(s.getIndexReader()).getInnerArray());
|
||||
}
|
||||
}
|
||||
|
||||
// verify new values are reloaded (not reused) for a new reader
|
||||
s = new IndexSearcher(dir);
|
||||
FieldScoreQuery q = new FieldScoreQuery(field,tp);
|
||||
Hits h = s.search(q);
|
||||
assertEquals("All docs should be matched!",N_DOCS,h.length());
|
||||
log("compare: "+innerArray+" to "+q.valSrc.getValues(s.getIndexReader()).getInnerArray());
|
||||
assertNotSame("cached field values should not be reused if reader as changed!", innerArray, q.valSrc.getValues(s.getIndexReader()).getInnerArray());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,202 @@
|
|||
package org.apache.lucene.search.function;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.search.Hits;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryUtils;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
|
||||
/**
|
||||
* Test search based on OrdFieldSource and ReverseOrdFieldSource.
|
||||
* <p>
|
||||
* Tests here create an index with a few documents, each having
|
||||
* an indexed "id" field.
|
||||
* The ord values of this field are later used for scoring.
|
||||
* <p>
|
||||
* The order tests use Hits to verify that docs are ordered as expected.
|
||||
* <p>
|
||||
* The exact score tests use TopDocs top to verify the exact score.
|
||||
*/
|
||||
public class TestOrdValues extends FunctionTestSetup {
|
||||
|
||||
/* @override constructor */
|
||||
public TestOrdValues(String name) {
|
||||
super(name);
|
||||
}
|
||||
|
||||
/* @override */
|
||||
protected void tearDown() throws Exception {
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
/* @override */
|
||||
protected void setUp() throws Exception {
|
||||
// prepare a small index with just a few documents.
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
/** Test OrdFieldSource */
|
||||
public void testOrdFieldRank () throws CorruptIndexException, Exception {
|
||||
doTestRank(ID_FIELD,true);
|
||||
}
|
||||
|
||||
/** Test ReverseOrdFieldSource */
|
||||
public void testReverseOrdFieldRank () throws CorruptIndexException, Exception {
|
||||
doTestRank(ID_FIELD,false);
|
||||
}
|
||||
|
||||
// Test that queries based on reverse/ordFieldScore scores correctly
|
||||
private void doTestRank (String field, boolean inOrder) throws CorruptIndexException, Exception {
|
||||
IndexSearcher s = new IndexSearcher(dir);
|
||||
ValueSource vs;
|
||||
if (inOrder) {
|
||||
vs = new OrdFieldSource(field);
|
||||
} else {
|
||||
vs = new ReverseOrdFieldSource(field);
|
||||
}
|
||||
|
||||
Query q = new ValueSourceQuery(vs);
|
||||
log("test: "+q);
|
||||
QueryUtils.check(q,s);
|
||||
Hits h = s.search(q);
|
||||
assertEquals("All docs should be matched!",N_DOCS,h.length());
|
||||
String prevID = inOrder
|
||||
? "IE" // greater than all ids of docs in this test ("ID0001", etc.)
|
||||
: "IC"; // smaller than all ids of docs in this test ("ID0001", etc.)
|
||||
|
||||
for (int i=0; i<h.length(); i++) {
|
||||
String resID = h.doc(i).get(ID_FIELD);
|
||||
log(i+". score="+h.score(i)+" - "+resID);
|
||||
log(s.explain(q,h.id(i)));
|
||||
if (inOrder) {
|
||||
assertTrue("res id "+resID+" should be < prev res id "+prevID, resID.compareTo(prevID)<0);
|
||||
} else {
|
||||
assertTrue("res id "+resID+" should be > prev res id "+prevID, resID.compareTo(prevID)>0);
|
||||
}
|
||||
prevID = resID;
|
||||
}
|
||||
}
|
||||
|
||||
/** Test exact score for OrdFieldSource */
|
||||
public void testOrdFieldExactScore () throws CorruptIndexException, Exception {
|
||||
doTestExactScore(ID_FIELD,true);
|
||||
}
|
||||
|
||||
/** Test exact score for ReverseOrdFieldSource */
|
||||
public void testReverseOrdFieldExactScore () throws CorruptIndexException, Exception {
|
||||
doTestExactScore(ID_FIELD,false);
|
||||
}
|
||||
|
||||
|
||||
// Test that queries based on reverse/ordFieldScore returns docs with expected score.
|
||||
private void doTestExactScore (String field, boolean inOrder) throws CorruptIndexException, Exception {
|
||||
IndexSearcher s = new IndexSearcher(dir);
|
||||
ValueSource vs;
|
||||
if (inOrder) {
|
||||
vs = new OrdFieldSource(field);
|
||||
} else {
|
||||
vs = new ReverseOrdFieldSource(field);
|
||||
}
|
||||
Query q = new ValueSourceQuery(vs);
|
||||
TopDocs td = s.search(q,null,1000);
|
||||
assertEquals("All docs should be matched!",N_DOCS,td.totalHits);
|
||||
ScoreDoc sd[] = td.scoreDocs;
|
||||
for (int i=0; i<sd.length; i++) {
|
||||
float score = sd[i].score;
|
||||
String id = s.getIndexReader().document(sd[i].doc).get(ID_FIELD);
|
||||
log("-------- "+i+". Explain doc "+id);
|
||||
log(s.explain(q,sd[i].doc));
|
||||
float expectedScore = N_DOCS-i;
|
||||
assertEquals("score of result "+i+" shuould be "+expectedScore+" != "+score, expectedScore, score, TEST_SCORE_TOLERANCE_DELTA);
|
||||
String expectedId = inOrder
|
||||
? id2String(N_DOCS-i) // in-order ==> larger values first
|
||||
: id2String(i+1); // reverse ==> smaller values first
|
||||
assertTrue("id of result "+i+" shuould be "+expectedId+" != "+score, expectedId.equals(id));
|
||||
}
|
||||
}
|
||||
|
||||
/** Test caching OrdFieldSource */
|
||||
public void testCachingOrd () throws CorruptIndexException, Exception {
|
||||
doTestCaching(ID_FIELD,true);
|
||||
}
|
||||
|
||||
/** Test caching for ReverseOrdFieldSource */
|
||||
public void tesCachingReverseOrd () throws CorruptIndexException, Exception {
|
||||
doTestCaching(ID_FIELD,false);
|
||||
}
|
||||
|
||||
// Test that values loaded for FieldScoreQuery are cached properly and consumes the proper RAM resources.
|
||||
private void doTestCaching (String field, boolean inOrder) throws CorruptIndexException, Exception {
|
||||
IndexSearcher s = new IndexSearcher(dir);
|
||||
Object innerArray = null;
|
||||
|
||||
for (int i=0; i<10; i++) {
|
||||
ValueSource vs;
|
||||
if (inOrder) {
|
||||
vs = new OrdFieldSource(field);
|
||||
} else {
|
||||
vs = new ReverseOrdFieldSource(field);
|
||||
}
|
||||
ValueSourceQuery q = new ValueSourceQuery(vs);
|
||||
Hits h = s.search(q);
|
||||
assertEquals("All docs should be matched!",N_DOCS,h.length());
|
||||
if (i==0) {
|
||||
innerArray = q.valSrc.getValues(s.getIndexReader()).getInnerArray();
|
||||
} else {
|
||||
log(i+". compare: "+innerArray+" to "+q.valSrc.getValues(s.getIndexReader()).getInnerArray());
|
||||
assertSame("field values should be cached and reused!", innerArray, q.valSrc.getValues(s.getIndexReader()).getInnerArray());
|
||||
}
|
||||
}
|
||||
|
||||
ValueSource vs;
|
||||
ValueSourceQuery q;
|
||||
Hits h;
|
||||
|
||||
// verify that different values are loaded for a different field
|
||||
String field2 = INT_FIELD;
|
||||
assertFalse(field.equals(field2)); // otherwise this test is meaningless.
|
||||
if (inOrder) {
|
||||
vs = new OrdFieldSource(field2);
|
||||
} else {
|
||||
vs = new ReverseOrdFieldSource(field2);
|
||||
}
|
||||
q = new ValueSourceQuery(vs);
|
||||
h = s.search(q);
|
||||
assertEquals("All docs should be matched!",N_DOCS,h.length());
|
||||
log("compare (should differ): "+innerArray+" to "+q.valSrc.getValues(s.getIndexReader()).getInnerArray());
|
||||
assertNotSame("different values shuold be loaded for a different field!", innerArray, q.valSrc.getValues(s.getIndexReader()).getInnerArray());
|
||||
|
||||
// verify new values are reloaded (not reused) for a new reader
|
||||
s = new IndexSearcher(dir);
|
||||
if (inOrder) {
|
||||
vs = new OrdFieldSource(field);
|
||||
} else {
|
||||
vs = new ReverseOrdFieldSource(field);
|
||||
}
|
||||
q = new ValueSourceQuery(vs);
|
||||
h = s.search(q);
|
||||
assertEquals("All docs should be matched!",N_DOCS,h.length());
|
||||
log("compare (should differ): "+innerArray+" to "+q.valSrc.getValues(s.getIndexReader()).getInnerArray());
|
||||
assertNotSame("cached field values should not be reused if reader as changed!", innerArray, q.valSrc.getValues(s.getIndexReader()).getInnerArray());
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue