ConstantScoreRangeQuery addition: LUCENE-383

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@344312 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2005-11-15 05:28:52 +00:00
parent bc428b86a8
commit fd41195ef3
5 changed files with 719 additions and 0 deletions

View File

@ -169,6 +169,16 @@ New features
These two new queries are not currently supported via QueryParser.
(Erik Hatcher)
24. Added ConstantScoreQuery which wraps a filter and produces a score
equal to the query boost for every matching document.
(Yonik Seeley, LUCENE-383)
25. Added ConstantScoreRangeQuery which produces a constant score for
every document in the range. One advantage over a normal RangeQuery
is that it doesn't expand to a BooleanQuery and thus doesn't have a maximum
number of terms the range can cover. Both endpoints may also be open.
(Yonik Seeley, LUCENE-383)
API Changes
1. Several methods and fields have been deprecated. The API documentation

View File

@ -0,0 +1,158 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import java.io.IOException;
import java.util.BitSet;
/**
* A query that wraps a filter and simply returns a constant score equal to the
* query boost for every document in the filter.
*
* @author yonik
* @version $Id$
*/
public class ConstantScoreQuery extends Query {
protected final Filter filter;
public ConstantScoreQuery(Filter filter) {
this.filter=filter;
}
public Query rewrite(IndexReader reader) throws IOException {
return this;
}
protected class ConstantWeight implements Weight {
private Searcher searcher;
private float queryNorm;
private float queryWeight;
public ConstantWeight(Searcher searcher) {
this.searcher = searcher;
}
public Query getQuery() {
return ConstantScoreQuery.this;
}
public float getValue() {
return queryWeight;
}
public float sumOfSquaredWeights() throws IOException {
queryWeight = getBoost();
return queryWeight * queryWeight;
}
public void normalize(float norm) {
this.queryNorm = norm;
queryWeight *= this.queryNorm;
}
public Scorer scorer(IndexReader reader) throws IOException {
return new ConstantScorer(getSimilarity(searcher), reader, this);
}
public Explanation explain(IndexReader reader, int doc) throws IOException {
ConstantScorer cs = (ConstantScorer)scorer(reader);
boolean exists = cs.bits.get(doc);
Explanation result = new Explanation();
if (exists) {
result.setDescription("ConstantScoreQuery(" + filter
+ "), product of:");
result.setValue(queryWeight);
result.addDetail(new Explanation(getBoost(), "boost"));
result.addDetail(new Explanation(queryNorm,"queryNorm"));
} else {
result.setDescription("ConstantScoreQuery(" + filter
+ ") doesn't match id " + doc);
result.setValue(0);
}
return result;
}
}
protected class ConstantScorer extends Scorer {
final BitSet bits;
final float theScore;
int doc=-1;
public ConstantScorer(Similarity similarity, IndexReader reader, Weight w) throws IOException {
super(similarity);
theScore = w.getValue();
bits = filter.bits(reader);
}
public boolean next() throws IOException {
doc = bits.nextSetBit(doc+1);
return doc >= 0;
}
public int doc() {
return doc;
}
public float score() throws IOException {
return theScore;
}
public boolean skipTo(int target) throws IOException {
doc = bits.nextSetBit(target); // requires JDK 1.4
return doc >= 0;
}
public Explanation explain(int doc) throws IOException {
throw new UnsupportedOperationException();
}
}
protected Weight createWeight(Searcher searcher) {
return new ConstantScoreQuery.ConstantWeight(searcher);
}
/** Prints a user-readable version of this query. */
public String toString(String field)
{
return "ConstantScore(" + filter.toString()
+ (getBoost()==1.0 ? ")" : "^" + getBoost());
}
/** Returns true if <code>o</code> is equal to this. */
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof ConstantScoreQuery)) return false;
ConstantScoreQuery other = (ConstantScoreQuery)o;
return this.getBoost()==other.getBoost() && filter.equals(other.filter);
}
/** Returns a hash code value for this object. */
public int hashCode() {
// Simple add is OK since no existing filter hashcode has a float component.
return filter.hashCode() + Float.floatToIntBits(getBoost());
}
}

View File

@ -0,0 +1,138 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import java.io.IOException;
/**
* A range query that returns a constant score equal to it's boost for
* all documents in the range.
* <p>
* It does not have an upper bound on the number of clauses covered in the range.
* <p>
* If an endpoint is null, it is said to be "open".
* Either or both endpoints may be open. Open endpoints may not be exclusive
* (you can't select all but the first or last term without explicitly specifying the term to exclude.)
*
* @author yonik
* @version $Id$
*/
public class ConstantScoreRangeQuery extends Query
{
private final String fieldName;
private final String lowerVal;
private final String upperVal;
private final boolean includeLower;
private final boolean includeUpper;
public ConstantScoreRangeQuery(String fieldName, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper)
{
// do a little bit of normalization...
// open ended range queries should always be inclusive.
if (lowerVal==null) {
includeLower=true;
} else if (includeLower && lowerVal.equals("")) {
lowerVal=null;
}
if (upperVal==null) {
includeUpper=true;
}
this.fieldName = fieldName.intern(); // intern it, just like terms...
this.lowerVal = lowerVal;
this.upperVal = upperVal;
this.includeLower = includeLower;
this.includeUpper = includeUpper;
}
/** Returns the field name for this query */
public String getField() { return fieldName; }
/** Returns the value of the lower endpoint of this range query, null if open ended */
public String getLowerVal() { return lowerVal; }
/** Returns the value of the upper endpoint of this range query, null if open ended */
public String getUpperVal() { return upperVal; }
/** Returns <code>true</code> if the lower endpoint is inclusive */
public boolean includesLower() { return includeLower; }
/** Returns <code>true</code> if the upper endpoint is inclusive */
public boolean includesUpper() { return includeUpper; }
public Query rewrite(IndexReader reader) throws IOException {
// Map to RangeFilter semantics which are slightly different...
RangeFilter rangeFilt = new RangeFilter(fieldName,
lowerVal!=null?lowerVal:"",
upperVal, lowerVal==""?false:includeLower, upperVal==null?false:includeUpper);
Query q = new ConstantScoreQuery(rangeFilt);
q.setBoost(getBoost());
return q;
}
/** Prints a user-readable version of this query. */
public String toString(String field)
{
StringBuffer buffer = new StringBuffer();
if (!getField().equals(field))
{
buffer.append(getField());
buffer.append(":");
}
buffer.append(includeLower ? '[' : '{');
buffer.append(lowerVal != null ? lowerVal : "*");
buffer.append(" TO ");
buffer.append(upperVal != null ? upperVal : "*");
buffer.append(includeUpper ? ']' : '}');
if (getBoost() != 1.0f)
{
buffer.append("^");
buffer.append(Float.toString(getBoost()));
}
return buffer.toString();
}
/** Returns true if <code>o</code> is equal to this. */
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof ConstantScoreRangeQuery)) return false;
ConstantScoreRangeQuery other = (ConstantScoreRangeQuery) o;
if (this.fieldName != other.fieldName // interned comparison
|| this.includeLower != other.includeLower
|| this.includeUpper != other.includeUpper
) { return false; }
if (this.lowerVal != null ? !this.lowerVal.equals(other.lowerVal) : other.lowerVal != null) return false;
if (this.upperVal != null ? !this.upperVal.equals(other.upperVal) : other.upperVal != null) return false;
return this.getBoost() == other.getBoost();
}
/** Returns a hash code value for this object.*/
public int hashCode() {
int h = Float.floatToIntBits(getBoost()) ^ fieldName.hashCode();
// hashCode of "" is 0, so don't use that for null...
h ^= lowerVal != null ? lowerVal.hashCode() : 0x965a965a;
// don't just XOR upperVal with out mixing either it or h, as it will cancel
// out lowerVal if they are equal.
h ^= (h << 17) | (h >>> 16); // a reversible (one to one) 32 bit mapping mix
h ^= (upperVal != null ? (upperVal.hashCode()) : 0x5a695a69);
h ^= (includeLower ? 0x665599aa : 0)
^ (includeUpper ? 0x99aa5566 : 0);
return h;
}
}

View File

@ -0,0 +1,66 @@
package org.apache.lucene.search;
import junit.framework.TestCase;
/**
* Copyright 2005 Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @author yonik
*/
public class QueryUtils {
/** Check the types of things query objects should be able to do. */
public static void check(Query q) {
checkHashEquals(q);
}
/** check very basic hashCode and equals */
public static void checkHashEquals(Query q) {
Query q2 = (Query)q.clone();
checkEqual(q,q2);
Query q3 = (Query)q.clone();
q3.setBoost(7.21792348f);
checkUnequal(q,q3);
// test that a class check is done so that no exception is thrown
// in the implementation of equals()
Query whacky = new Query() {
public String toString(String field) {
return "My Whacky Query";
}
};
whacky.setBoost(q.getBoost());
checkUnequal(q, whacky);
}
public static void checkEqual(Query q1, Query q2) {
TestCase.assertEquals(q1, q2);
TestCase.assertEquals(q1.hashCode(), q2.hashCode());
}
public static void checkUnequal(Query q1, Query q2) {
TestCase.assertTrue(!q1.equals(q2));
TestCase.assertTrue(!q2.equals(q1));
// possible this test can fail on a hash collision... if that
// happens, please change test to use a different example.
TestCase.assertTrue(q1.hashCode() != q2.hashCode());
}
}

View File

@ -0,0 +1,347 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
public class TestConstantScoreRangeQuery extends BaseTestRangeFilter {
/** threshold for comparing floats */
public static final float SCORE_COMP_THRESH = 1e-6f;
public TestConstantScoreRangeQuery(String name) {
super(name);
}
public TestConstantScoreRangeQuery() {
super();
}
Directory small;
void assertEquals(String m, float e, float a) {
assertEquals(m, e, a, SCORE_COMP_THRESH);
}
public void setUp() throws Exception {
super.setUp();
String[] data = new String [] {
"A 1 2 3 4 5 6",
"Z 4 5 6",
null,
"B 2 4 5 6",
"Y 3 5 6",
null,
"C 3 6",
"X 4 5 6"
};
small = new RAMDirectory();
IndexWriter writer = new IndexWriter(small,
new WhitespaceAnalyzer(),
true);
for (int i = 0; i < data.length; i++) {
Document doc = new Document();
doc.add(Field.Keyword("id",String.valueOf(i)));
doc.add(Field.Keyword("all","all"));
if (null != data[i]) {
doc.add(Field.Text("data",data[i]));
}
writer.addDocument(doc);
}
writer.optimize();
writer.close();
}
/** macro for readability */
public static Query csrq(String f, String l, String h,
boolean il, boolean ih) {
return new ConstantScoreRangeQuery(f,l,h,il,ih);
}
public void testBasics() throws IOException {
QueryUtils.check(csrq("data","1","6",T,T));
QueryUtils.check(csrq("data","A","Z",T,T));
QueryUtils.checkUnequal(csrq("data","1","6",T,T), csrq("data","A","Z",T,T));
}
public void testEqualScores() throws IOException {
// NOTE: uses index build in *this* setUp
IndexReader reader = IndexReader.open(small);
IndexSearcher search = new IndexSearcher(reader);
Hits result;
// some hits match more terms then others, score should be the same
result = search.search(csrq("data","1","6",T,T));
int numHits = result.length();
assertEquals("wrong number of results", 6, numHits);
float score = result.score(0);
for (int i = 1; i < numHits; i++) {
assertEquals("score for " + i +" was not the same",
score, result.score(i));
}
}
public void testBoost() throws IOException {
// NOTE: uses index build in *this* setUp
IndexReader reader = IndexReader.open(small);
IndexSearcher search = new IndexSearcher(reader);
// test for correct application of query normalization
// must use a non score normalizing method for this.
Query q = csrq("data","1","6",T,T);
q.setBoost(100);
search.search(q,null, new HitCollector() {
public void collect(int doc, float score) {
assertEquals("score for doc " + doc +" was not correct",
1.0f, score);
}
});
//
// Ensure that boosting works to score one clause of a query higher
// than another.
//
Query q1 = csrq("data","A","A",T,T); // matches document #0
q1.setBoost(.1f);
Query q2 = csrq("data","Z","Z",T,T); // matches document #1
BooleanQuery bq = new BooleanQuery(true);
bq.add(q1, BooleanClause.Occur.SHOULD);
bq.add(q2, BooleanClause.Occur.SHOULD);
Hits hits = search.search(bq);
assertEquals(1, hits.id(0));
assertEquals(0, hits.id(1));
assertTrue(hits.score(0) > hits.score(1));
q1 = csrq("data","A","A",T,T); // matches document #0
q1.setBoost(10f);
q2 = csrq("data","Z","Z",T,T); // matches document #1
bq = new BooleanQuery(true);
bq.add(q1, BooleanClause.Occur.SHOULD);
bq.add(q2, BooleanClause.Occur.SHOULD);
hits = search.search(bq);
assertEquals(0, hits.id(0));
assertEquals(1, hits.id(1));
assertTrue(hits.score(0) > hits.score(1));
}
public void testBooleanOrderUnAffected() throws IOException {
// NOTE: uses index build in *this* setUp
IndexReader reader = IndexReader.open(small);
IndexSearcher search = new IndexSearcher(reader);
// first do a regular RangeQuery which uses term expansion so
// docs with more terms in range get higher scores
Query rq = new RangeQuery(new Term("data","1"),new Term("data","4"),T);
Hits expected = search.search(rq);
int numHits = expected.length();
// now do a boolean where which also contains a
// ConstantScoreRangeQuery and make sure hte order is the same
BooleanQuery q = new BooleanQuery();
q.add(rq, T, F);
q.add(csrq("data","1","6", T, T), T, F);
Hits actual = search.search(q);
assertEquals("wrong numebr of hits", numHits, actual.length());
for (int i = 0; i < numHits; i++) {
assertEquals("mismatch in docid for hit#"+i,
expected.id(i), actual.id(i));
}
}
public void testRangeQueryId() throws IOException {
// NOTE: uses index build in *super* setUp
IndexReader reader = IndexReader.open(index);
IndexSearcher search = new IndexSearcher(reader);
int medId = ((maxId - minId) / 2);
String minIP = pad(minId);
String maxIP = pad(maxId);
String medIP = pad(medId);
int numDocs = reader.numDocs();
assertEquals("num of docs", numDocs, 1+ maxId - minId);
Hits result;
// test id, bounded on both ends
result = search.search(csrq("id",minIP,maxIP,T,T));
assertEquals("find all", numDocs, result.length());
result = search.search(csrq("id",minIP,maxIP,T,F));
assertEquals("all but last", numDocs-1, result.length());
result = search.search(csrq("id",minIP,maxIP,F,T));
assertEquals("all but first", numDocs-1, result.length());
result = search.search(csrq("id",minIP,maxIP,F,F));
assertEquals("all but ends", numDocs-2, result.length());
result = search.search(csrq("id",medIP,maxIP,T,T));
assertEquals("med and up", 1+ maxId-medId, result.length());
result = search.search(csrq("id",minIP,medIP,T,T));
assertEquals("up to med", 1+ medId-minId, result.length());
// unbounded id
result = search.search(csrq("id",minIP,null,T,F));
assertEquals("min and up", numDocs, result.length());
result = search.search(csrq("id",null,maxIP,F,T));
assertEquals("max and down", numDocs, result.length());
result = search.search(csrq("id",minIP,null,F,F));
assertEquals("not min, but up", numDocs-1, result.length());
result = search.search(csrq("id",null,maxIP,F,F));
assertEquals("not max, but down", numDocs-1, result.length());
result = search.search(csrq("id",medIP,maxIP,T,F));
assertEquals("med and up, not max", maxId-medId, result.length());
result = search.search(csrq("id",minIP,medIP,F,T));
assertEquals("not min, up to med", medId-minId, result.length());
// very small sets
result = search.search(csrq("id",minIP,minIP,F,F));
assertEquals("min,min,F,F", 0, result.length());
result = search.search(csrq("id",medIP,medIP,F,F));
assertEquals("med,med,F,F", 0, result.length());
result = search.search(csrq("id",maxIP,maxIP,F,F));
assertEquals("max,max,F,F", 0, result.length());
result = search.search(csrq("id",minIP,minIP,T,T));
assertEquals("min,min,T,T", 1, result.length());
result = search.search(csrq("id",null,minIP,F,T));
assertEquals("nul,min,F,T", 1, result.length());
result = search.search(csrq("id",maxIP,maxIP,T,T));
assertEquals("max,max,T,T", 1, result.length());
result = search.search(csrq("id",maxIP,null,T,F));
assertEquals("max,nul,T,T", 1, result.length());
result = search.search(csrq("id",medIP,medIP,T,T));
assertEquals("med,med,T,T", 1, result.length());
}
public void testRangeQueryRand() throws IOException {
// NOTE: uses index build in *super* setUp
IndexReader reader = IndexReader.open(index);
IndexSearcher search = new IndexSearcher(reader);
String minRP = pad(minR);
String maxRP = pad(maxR);
int numDocs = reader.numDocs();
assertEquals("num of docs", numDocs, 1+ maxId - minId);
Hits result;
Query q = new TermQuery(new Term("body","body"));
// test extremes, bounded on both ends
result = search.search(csrq("rand",minRP,maxRP,T,T));
assertEquals("find all", numDocs, result.length());
result = search.search(csrq("rand",minRP,maxRP,T,F));
assertEquals("all but biggest", numDocs-1, result.length());
result = search.search(csrq("rand",minRP,maxRP,F,T));
assertEquals("all but smallest", numDocs-1, result.length());
result = search.search(csrq("rand",minRP,maxRP,F,F));
assertEquals("all but extremes", numDocs-2, result.length());
// unbounded
result = search.search(csrq("rand",minRP,null,T,F));
assertEquals("smallest and up", numDocs, result.length());
result = search.search(csrq("rand",null,maxRP,F,T));
assertEquals("biggest and down", numDocs, result.length());
result = search.search(csrq("rand",minRP,null,F,F));
assertEquals("not smallest, but up", numDocs-1, result.length());
result = search.search(csrq("rand",null,maxRP,F,F));
assertEquals("not biggest, but down", numDocs-1, result.length());
// very small sets
result = search.search(csrq("rand",minRP,minRP,F,F));
assertEquals("min,min,F,F", 0, result.length());
result = search.search(csrq("rand",maxRP,maxRP,F,F));
assertEquals("max,max,F,F", 0, result.length());
result = search.search(csrq("rand",minRP,minRP,T,T));
assertEquals("min,min,T,T", 1, result.length());
result = search.search(csrq("rand",null,minRP,F,T));
assertEquals("nul,min,F,T", 1, result.length());
result = search.search(csrq("rand",maxRP,maxRP,T,T));
assertEquals("max,max,T,T", 1, result.length());
result = search.search(csrq("rand",maxRP,null,T,F));
assertEquals("max,nul,T,T", 1, result.length());
}
}