From fd41195ef316b9ff2f1721d259c2bbeedf1a1ff0 Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Tue, 15 Nov 2005 05:28:52 +0000 Subject: [PATCH] ConstantScoreRangeQuery addition: LUCENE-383 git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@344312 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 10 + .../lucene/search/ConstantScoreQuery.java | 158 ++++++++ .../search/ConstantScoreRangeQuery.java | 138 +++++++ .../org/apache/lucene/search/QueryUtils.java | 66 ++++ .../search/TestConstantScoreRangeQuery.java | 347 ++++++++++++++++++ 5 files changed, 719 insertions(+) create mode 100644 src/java/org/apache/lucene/search/ConstantScoreQuery.java create mode 100644 src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java create mode 100644 src/test/org/apache/lucene/search/QueryUtils.java create mode 100644 src/test/org/apache/lucene/search/TestConstantScoreRangeQuery.java diff --git a/CHANGES.txt b/CHANGES.txt index 031bff9836f..2a27452ad16 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -169,6 +169,16 @@ New features These two new queries are not currently supported via QueryParser. (Erik Hatcher) +24. Added ConstantScoreQuery which wraps a filter and produces a score + equal to the query boost for every matching document. + (Yonik Seeley, LUCENE-383) + +25. Added ConstantScoreRangeQuery which produces a constant score for + every document in the range. One advantage over a normal RangeQuery + is that it doesn't expand to a BooleanQuery and thus doesn't have a maximum + number of terms the range can cover. Both endpoints may also be open. + (Yonik Seeley, LUCENE-383) + API Changes 1. Several methods and fields have been deprecated. The API documentation diff --git a/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/src/java/org/apache/lucene/search/ConstantScoreQuery.java new file mode 100644 index 00000000000..b0187d207ca --- /dev/null +++ b/src/java/org/apache/lucene/search/ConstantScoreQuery.java @@ -0,0 +1,158 @@ +package org.apache.lucene.search; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; + +import java.io.IOException; +import java.util.BitSet; + +/** + * A query that wraps a filter and simply returns a constant score equal to the + * query boost for every document in the filter. + * + * @author yonik + * @version $Id$ + */ +public class ConstantScoreQuery extends Query { + protected final Filter filter; + + public ConstantScoreQuery(Filter filter) { + this.filter=filter; + } + + public Query rewrite(IndexReader reader) throws IOException { + return this; + } + + protected class ConstantWeight implements Weight { + private Searcher searcher; + private float queryNorm; + private float queryWeight; + + public ConstantWeight(Searcher searcher) { + this.searcher = searcher; + } + + public Query getQuery() { + return ConstantScoreQuery.this; + } + + public float getValue() { + return queryWeight; + } + + public float sumOfSquaredWeights() throws IOException { + queryWeight = getBoost(); + return queryWeight * queryWeight; + } + + public void normalize(float norm) { + this.queryNorm = norm; + queryWeight *= this.queryNorm; + } + + public Scorer scorer(IndexReader reader) throws IOException { + return new ConstantScorer(getSimilarity(searcher), reader, this); + } + + public Explanation explain(IndexReader reader, int doc) throws IOException { + + ConstantScorer cs = (ConstantScorer)scorer(reader); + boolean exists = cs.bits.get(doc); + + Explanation result = new Explanation(); + + if (exists) { + result.setDescription("ConstantScoreQuery(" + filter + + "), product of:"); + result.setValue(queryWeight); + result.addDetail(new Explanation(getBoost(), "boost")); + result.addDetail(new Explanation(queryNorm,"queryNorm")); + } else { + result.setDescription("ConstantScoreQuery(" + filter + + ") doesn't match id " + doc); + result.setValue(0); + } + return result; + } + } + + protected class ConstantScorer extends Scorer { + final BitSet bits; + final float theScore; + int doc=-1; + + public ConstantScorer(Similarity similarity, IndexReader reader, Weight w) throws IOException { + super(similarity); + theScore = w.getValue(); + bits = filter.bits(reader); + } + + public boolean next() throws IOException { + doc = bits.nextSetBit(doc+1); + return doc >= 0; + } + + public int doc() { + return doc; + } + + public float score() throws IOException { + return theScore; + } + + public boolean skipTo(int target) throws IOException { + doc = bits.nextSetBit(target); // requires JDK 1.4 + return doc >= 0; + } + + public Explanation explain(int doc) throws IOException { + throw new UnsupportedOperationException(); + } + } + + + protected Weight createWeight(Searcher searcher) { + return new ConstantScoreQuery.ConstantWeight(searcher); + } + + + /** Prints a user-readable version of this query. */ + public String toString(String field) + { + return "ConstantScore(" + filter.toString() + + (getBoost()==1.0 ? ")" : "^" + getBoost()); + } + + /** Returns true if o is equal to this. */ + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof ConstantScoreQuery)) return false; + ConstantScoreQuery other = (ConstantScoreQuery)o; + return this.getBoost()==other.getBoost() && filter.equals(other.filter); + } + + /** Returns a hash code value for this object. */ + public int hashCode() { + // Simple add is OK since no existing filter hashcode has a float component. + return filter.hashCode() + Float.floatToIntBits(getBoost()); + } + +} + + diff --git a/src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java b/src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java new file mode 100644 index 00000000000..9b7689b7630 --- /dev/null +++ b/src/java/org/apache/lucene/search/ConstantScoreRangeQuery.java @@ -0,0 +1,138 @@ +package org.apache.lucene.search; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; + +import java.io.IOException; + +/** + * A range query that returns a constant score equal to it's boost for + * all documents in the range. + *

+ * It does not have an upper bound on the number of clauses covered in the range. + *

+ * If an endpoint is null, it is said to be "open". + * Either or both endpoints may be open. Open endpoints may not be exclusive + * (you can't select all but the first or last term without explicitly specifying the term to exclude.) + * + * @author yonik + * @version $Id$ + */ + +public class ConstantScoreRangeQuery extends Query +{ + private final String fieldName; + private final String lowerVal; + private final String upperVal; + private final boolean includeLower; + private final boolean includeUpper; + + + public ConstantScoreRangeQuery(String fieldName, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper) + { + // do a little bit of normalization... + // open ended range queries should always be inclusive. + if (lowerVal==null) { + includeLower=true; + } else if (includeLower && lowerVal.equals("")) { + lowerVal=null; + } + if (upperVal==null) { + includeUpper=true; + } + + + this.fieldName = fieldName.intern(); // intern it, just like terms... + this.lowerVal = lowerVal; + this.upperVal = upperVal; + this.includeLower = includeLower; + this.includeUpper = includeUpper; + } + + /** Returns the field name for this query */ + public String getField() { return fieldName; } + /** Returns the value of the lower endpoint of this range query, null if open ended */ + public String getLowerVal() { return lowerVal; } + /** Returns the value of the upper endpoint of this range query, null if open ended */ + public String getUpperVal() { return upperVal; } + /** Returns true if the lower endpoint is inclusive */ + public boolean includesLower() { return includeLower; } + /** Returns true if the upper endpoint is inclusive */ + public boolean includesUpper() { return includeUpper; } + + public Query rewrite(IndexReader reader) throws IOException { + // Map to RangeFilter semantics which are slightly different... + RangeFilter rangeFilt = new RangeFilter(fieldName, + lowerVal!=null?lowerVal:"", + upperVal, lowerVal==""?false:includeLower, upperVal==null?false:includeUpper); + Query q = new ConstantScoreQuery(rangeFilt); + q.setBoost(getBoost()); + return q; + } + + /** Prints a user-readable version of this query. */ + public String toString(String field) + { + StringBuffer buffer = new StringBuffer(); + if (!getField().equals(field)) + { + buffer.append(getField()); + buffer.append(":"); + } + buffer.append(includeLower ? '[' : '{'); + buffer.append(lowerVal != null ? lowerVal : "*"); + buffer.append(" TO "); + buffer.append(upperVal != null ? upperVal : "*"); + buffer.append(includeUpper ? ']' : '}'); + if (getBoost() != 1.0f) + { + buffer.append("^"); + buffer.append(Float.toString(getBoost())); + } + return buffer.toString(); + } + + /** Returns true if o is equal to this. */ + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof ConstantScoreRangeQuery)) return false; + ConstantScoreRangeQuery other = (ConstantScoreRangeQuery) o; + + if (this.fieldName != other.fieldName // interned comparison + || this.includeLower != other.includeLower + || this.includeUpper != other.includeUpper + ) { return false; } + if (this.lowerVal != null ? !this.lowerVal.equals(other.lowerVal) : other.lowerVal != null) return false; + if (this.upperVal != null ? !this.upperVal.equals(other.upperVal) : other.upperVal != null) return false; + return this.getBoost() == other.getBoost(); + } + + /** Returns a hash code value for this object.*/ + public int hashCode() { + int h = Float.floatToIntBits(getBoost()) ^ fieldName.hashCode(); + // hashCode of "" is 0, so don't use that for null... + h ^= lowerVal != null ? lowerVal.hashCode() : 0x965a965a; + // don't just XOR upperVal with out mixing either it or h, as it will cancel + // out lowerVal if they are equal. + h ^= (h << 17) | (h >>> 16); // a reversible (one to one) 32 bit mapping mix + h ^= (upperVal != null ? (upperVal.hashCode()) : 0x5a695a69); + h ^= (includeLower ? 0x665599aa : 0) + ^ (includeUpper ? 0x99aa5566 : 0); + return h; + } +} diff --git a/src/test/org/apache/lucene/search/QueryUtils.java b/src/test/org/apache/lucene/search/QueryUtils.java new file mode 100644 index 00000000000..b685d73a994 --- /dev/null +++ b/src/test/org/apache/lucene/search/QueryUtils.java @@ -0,0 +1,66 @@ +package org.apache.lucene.search; + +import junit.framework.TestCase; + +/** + * Copyright 2005 Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/** + * @author yonik + */ +public class QueryUtils { + + /** Check the types of things query objects should be able to do. */ + public static void check(Query q) { + checkHashEquals(q); + } + + /** check very basic hashCode and equals */ + public static void checkHashEquals(Query q) { + Query q2 = (Query)q.clone(); + checkEqual(q,q2); + + Query q3 = (Query)q.clone(); + q3.setBoost(7.21792348f); + checkUnequal(q,q3); + + // test that a class check is done so that no exception is thrown + // in the implementation of equals() + Query whacky = new Query() { + public String toString(String field) { + return "My Whacky Query"; + } + }; + whacky.setBoost(q.getBoost()); + checkUnequal(q, whacky); + } + + public static void checkEqual(Query q1, Query q2) { + TestCase.assertEquals(q1, q2); + TestCase.assertEquals(q1.hashCode(), q2.hashCode()); + } + + public static void checkUnequal(Query q1, Query q2) { + TestCase.assertTrue(!q1.equals(q2)); + TestCase.assertTrue(!q2.equals(q1)); + + // possible this test can fail on a hash collision... if that + // happens, please change test to use a different example. + TestCase.assertTrue(q1.hashCode() != q2.hashCode()); + } + +} diff --git a/src/test/org/apache/lucene/search/TestConstantScoreRangeQuery.java b/src/test/org/apache/lucene/search/TestConstantScoreRangeQuery.java new file mode 100644 index 00000000000..bd1a3ebadca --- /dev/null +++ b/src/test/org/apache/lucene/search/TestConstantScoreRangeQuery.java @@ -0,0 +1,347 @@ +package org.apache.lucene.search; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.store.Directory; + +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; + +import org.apache.lucene.analysis.WhitespaceAnalyzer; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; + +public class TestConstantScoreRangeQuery extends BaseTestRangeFilter { + + /** threshold for comparing floats */ + public static final float SCORE_COMP_THRESH = 1e-6f; + + public TestConstantScoreRangeQuery(String name) { + super(name); + } + public TestConstantScoreRangeQuery() { + super(); + } + + Directory small; + + void assertEquals(String m, float e, float a) { + assertEquals(m, e, a, SCORE_COMP_THRESH); + } + + public void setUp() throws Exception { + super.setUp(); + + String[] data = new String [] { + "A 1 2 3 4 5 6", + "Z 4 5 6", + null, + "B 2 4 5 6", + "Y 3 5 6", + null, + "C 3 6", + "X 4 5 6" + }; + + small = new RAMDirectory(); + IndexWriter writer = new IndexWriter(small, + new WhitespaceAnalyzer(), + true); + + for (int i = 0; i < data.length; i++) { + Document doc = new Document(); + doc.add(Field.Keyword("id",String.valueOf(i))); + doc.add(Field.Keyword("all","all")); + if (null != data[i]) { + doc.add(Field.Text("data",data[i])); + } + writer.addDocument(doc); + } + + writer.optimize(); + writer.close(); + } + + + + /** macro for readability */ + public static Query csrq(String f, String l, String h, + boolean il, boolean ih) { + return new ConstantScoreRangeQuery(f,l,h,il,ih); + } + + public void testBasics() throws IOException { + QueryUtils.check(csrq("data","1","6",T,T)); + QueryUtils.check(csrq("data","A","Z",T,T)); + QueryUtils.checkUnequal(csrq("data","1","6",T,T), csrq("data","A","Z",T,T)); + } + + public void testEqualScores() throws IOException { + // NOTE: uses index build in *this* setUp + + IndexReader reader = IndexReader.open(small); + IndexSearcher search = new IndexSearcher(reader); + + Hits result; + + // some hits match more terms then others, score should be the same + + result = search.search(csrq("data","1","6",T,T)); + int numHits = result.length(); + assertEquals("wrong number of results", 6, numHits); + float score = result.score(0); + for (int i = 1; i < numHits; i++) { + assertEquals("score for " + i +" was not the same", + score, result.score(i)); + } + + } + + public void testBoost() throws IOException { + // NOTE: uses index build in *this* setUp + + IndexReader reader = IndexReader.open(small); + IndexSearcher search = new IndexSearcher(reader); + + // test for correct application of query normalization + // must use a non score normalizing method for this. + Query q = csrq("data","1","6",T,T); + q.setBoost(100); + search.search(q,null, new HitCollector() { + public void collect(int doc, float score) { + assertEquals("score for doc " + doc +" was not correct", + 1.0f, score); + } + }); + + + // + // Ensure that boosting works to score one clause of a query higher + // than another. + // + Query q1 = csrq("data","A","A",T,T); // matches document #0 + q1.setBoost(.1f); + Query q2 = csrq("data","Z","Z",T,T); // matches document #1 + BooleanQuery bq = new BooleanQuery(true); + bq.add(q1, BooleanClause.Occur.SHOULD); + bq.add(q2, BooleanClause.Occur.SHOULD); + + Hits hits = search.search(bq); + assertEquals(1, hits.id(0)); + assertEquals(0, hits.id(1)); + assertTrue(hits.score(0) > hits.score(1)); + + q1 = csrq("data","A","A",T,T); // matches document #0 + q1.setBoost(10f); + q2 = csrq("data","Z","Z",T,T); // matches document #1 + bq = new BooleanQuery(true); + bq.add(q1, BooleanClause.Occur.SHOULD); + bq.add(q2, BooleanClause.Occur.SHOULD); + + hits = search.search(bq); + assertEquals(0, hits.id(0)); + assertEquals(1, hits.id(1)); + assertTrue(hits.score(0) > hits.score(1)); + } + + + public void testBooleanOrderUnAffected() throws IOException { + // NOTE: uses index build in *this* setUp + + IndexReader reader = IndexReader.open(small); + IndexSearcher search = new IndexSearcher(reader); + + // first do a regular RangeQuery which uses term expansion so + // docs with more terms in range get higher scores + + Query rq = new RangeQuery(new Term("data","1"),new Term("data","4"),T); + + Hits expected = search.search(rq); + int numHits = expected.length(); + + // now do a boolean where which also contains a + // ConstantScoreRangeQuery and make sure hte order is the same + + BooleanQuery q = new BooleanQuery(); + q.add(rq, T, F); + q.add(csrq("data","1","6", T, T), T, F); + + Hits actual = search.search(q); + + assertEquals("wrong numebr of hits", numHits, actual.length()); + for (int i = 0; i < numHits; i++) { + assertEquals("mismatch in docid for hit#"+i, + expected.id(i), actual.id(i)); + } + + } + + + + + + public void testRangeQueryId() throws IOException { + // NOTE: uses index build in *super* setUp + + IndexReader reader = IndexReader.open(index); + IndexSearcher search = new IndexSearcher(reader); + + int medId = ((maxId - minId) / 2); + + String minIP = pad(minId); + String maxIP = pad(maxId); + String medIP = pad(medId); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1+ maxId - minId); + + Hits result; + + // test id, bounded on both ends + + result = search.search(csrq("id",minIP,maxIP,T,T)); + assertEquals("find all", numDocs, result.length()); + + result = search.search(csrq("id",minIP,maxIP,T,F)); + assertEquals("all but last", numDocs-1, result.length()); + + result = search.search(csrq("id",minIP,maxIP,F,T)); + assertEquals("all but first", numDocs-1, result.length()); + + result = search.search(csrq("id",minIP,maxIP,F,F)); + assertEquals("all but ends", numDocs-2, result.length()); + + result = search.search(csrq("id",medIP,maxIP,T,T)); + assertEquals("med and up", 1+ maxId-medId, result.length()); + + result = search.search(csrq("id",minIP,medIP,T,T)); + assertEquals("up to med", 1+ medId-minId, result.length()); + + // unbounded id + + result = search.search(csrq("id",minIP,null,T,F)); + assertEquals("min and up", numDocs, result.length()); + + result = search.search(csrq("id",null,maxIP,F,T)); + assertEquals("max and down", numDocs, result.length()); + + result = search.search(csrq("id",minIP,null,F,F)); + assertEquals("not min, but up", numDocs-1, result.length()); + + result = search.search(csrq("id",null,maxIP,F,F)); + assertEquals("not max, but down", numDocs-1, result.length()); + + result = search.search(csrq("id",medIP,maxIP,T,F)); + assertEquals("med and up, not max", maxId-medId, result.length()); + + result = search.search(csrq("id",minIP,medIP,F,T)); + assertEquals("not min, up to med", medId-minId, result.length()); + + // very small sets + + result = search.search(csrq("id",minIP,minIP,F,F)); + assertEquals("min,min,F,F", 0, result.length()); + result = search.search(csrq("id",medIP,medIP,F,F)); + assertEquals("med,med,F,F", 0, result.length()); + result = search.search(csrq("id",maxIP,maxIP,F,F)); + assertEquals("max,max,F,F", 0, result.length()); + + result = search.search(csrq("id",minIP,minIP,T,T)); + assertEquals("min,min,T,T", 1, result.length()); + result = search.search(csrq("id",null,minIP,F,T)); + assertEquals("nul,min,F,T", 1, result.length()); + + result = search.search(csrq("id",maxIP,maxIP,T,T)); + assertEquals("max,max,T,T", 1, result.length()); + result = search.search(csrq("id",maxIP,null,T,F)); + assertEquals("max,nul,T,T", 1, result.length()); + + result = search.search(csrq("id",medIP,medIP,T,T)); + assertEquals("med,med,T,T", 1, result.length()); + + } + + public void testRangeQueryRand() throws IOException { + // NOTE: uses index build in *super* setUp + + IndexReader reader = IndexReader.open(index); + IndexSearcher search = new IndexSearcher(reader); + + String minRP = pad(minR); + String maxRP = pad(maxR); + + int numDocs = reader.numDocs(); + + assertEquals("num of docs", numDocs, 1+ maxId - minId); + + Hits result; + Query q = new TermQuery(new Term("body","body")); + + // test extremes, bounded on both ends + + result = search.search(csrq("rand",minRP,maxRP,T,T)); + assertEquals("find all", numDocs, result.length()); + + result = search.search(csrq("rand",minRP,maxRP,T,F)); + assertEquals("all but biggest", numDocs-1, result.length()); + + result = search.search(csrq("rand",minRP,maxRP,F,T)); + assertEquals("all but smallest", numDocs-1, result.length()); + + result = search.search(csrq("rand",minRP,maxRP,F,F)); + assertEquals("all but extremes", numDocs-2, result.length()); + + // unbounded + + result = search.search(csrq("rand",minRP,null,T,F)); + assertEquals("smallest and up", numDocs, result.length()); + + result = search.search(csrq("rand",null,maxRP,F,T)); + assertEquals("biggest and down", numDocs, result.length()); + + result = search.search(csrq("rand",minRP,null,F,F)); + assertEquals("not smallest, but up", numDocs-1, result.length()); + + result = search.search(csrq("rand",null,maxRP,F,F)); + assertEquals("not biggest, but down", numDocs-1, result.length()); + + // very small sets + + result = search.search(csrq("rand",minRP,minRP,F,F)); + assertEquals("min,min,F,F", 0, result.length()); + result = search.search(csrq("rand",maxRP,maxRP,F,F)); + assertEquals("max,max,F,F", 0, result.length()); + + result = search.search(csrq("rand",minRP,minRP,T,T)); + assertEquals("min,min,T,T", 1, result.length()); + result = search.search(csrq("rand",null,minRP,F,T)); + assertEquals("nul,min,F,T", 1, result.length()); + + result = search.search(csrq("rand",maxRP,maxRP,T,T)); + assertEquals("max,max,T,T", 1, result.length()); + result = search.search(csrq("rand",maxRP,null,T,F)); + assertEquals("max,nul,T,T", 1, result.length()); + + } + +}