LUCENE-1424: enable MultiTermQuery to do constant scoring; refactor Range, Wildcard, Prefix to expose this

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@712890 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2008-11-11 00:40:00 +00:00
parent 66c16c196d
commit abe6ea0828
24 changed files with 1274 additions and 1309 deletions

View File

@ -5,6 +5,12 @@ $Id$
Changes in runtime behavior
1. LUCENE-1424: QueryParser now by default uses constant score query
rewriting when it generates a WildcardQuery and PrefixQuery (it
already does so for RangeQuery, as well). Call
setConstantScoreRewrite(false) to revert to BooleanQuery rewriting
method. (Mark Miller via Mike McCandless)
API Changes
1. LUCENE-1419: Add expert API to set custom indexing chain. This API is
@ -50,6 +56,13 @@ New features
5. Added web-based demo of functionality in contrib's XML Query Parser
packaged as War file (Mark Harwood)
6. LUCENE-1424: Moved constant score query rewrite capability into
MultiTermQuery, allowing RangeQuery, PrefixQuery and WildcardQuery
to switch betwen constant-score rewriting or BooleanQuery
expansion rewriting via a new setConstantScoreRewrite method.
Deprecated ConstantScoreRangeQuery (Mark Miller via Mike
McCandless)
Optimizations
1. LUCENE-1427: Fixed QueryWrapperFilter to not waste time computing

View File

@ -44,7 +44,6 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
@ -52,6 +51,7 @@ import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
@ -462,14 +462,8 @@ public class HighlighterTest extends TestCase implements Formatter {
public void testGetConstantScoreRangeFragments() throws Exception {
numHighlights = 0;
String queryString = FIELD_NAME + ":[kannedy TO kznnedy]";
// Need to explicitly set the QueryParser property to use RangeQuery
// rather
// than RangeFilters
QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer());
// parser.setUseOldRangeQuery(true);
query = parser.parse(queryString);
query = new ConstantScoreRangeQuery(FIELD_NAME, "kannedy", "kznnedy", true, true);
searcher = new IndexSearcher(ramDir);
// can't rewrite ConstantScoreRangeQuery if you want to highlight it -
@ -1043,6 +1037,7 @@ public class HighlighterTest extends TestCase implements Formatter {
searchers[1] = new IndexSearcher(ramDir2);
MultiSearcher multiSearcher = new MultiSearcher(searchers);
QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer());
parser.setConstantScoreRewrite(false);
query = parser.parse("multi*");
System.out.println("Searching for: " + query.toString(FIELD_NAME));
// at this point the multisearcher calls combine(query[])
@ -1343,6 +1338,7 @@ public class HighlighterTest extends TestCase implements Formatter {
public void doSearching(String queryString) throws Exception {
QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer());
parser.setConstantScoreRewrite(false);
query = parser.parse(queryString);
doSearching(query);
}

View File

@ -109,4 +109,4 @@ public interface CharStream {
void Done();
}
/* JavaCC - OriginalChecksum=32a89423891f765dde472f7ef0e3ef7b (do not edit this line) */
/* JavaCC - OriginalChecksum=a83909a2403f969f94d18375f9f143e4 (do not edit this line) */

View File

@ -195,4 +195,4 @@ public class ParseException extends Exception {
}
}
/* JavaCC - OriginalChecksum=c7631a240f7446940695eac31d9483ca (do not edit this line) */
/* JavaCC - OriginalChecksum=c63b396885c4ff44d7aa48d3feae60cd (do not edit this line) */

View File

@ -21,7 +21,6 @@ import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
@ -94,7 +93,6 @@ import org.apache.lucene.util.Parameter;
* </p>
*
* <p>Note that QueryParser is <em>not</em> thread-safe.</p>
*
*/
public class QueryParser implements QueryParserConstants {
@ -117,7 +115,7 @@ public class QueryParser implements QueryParserConstants {
private Operator operator = OR_OPERATOR;
boolean lowercaseExpandedTerms = true;
boolean useOldRangeQuery= false;
boolean constantScoreRewrite= true;
boolean allowLeadingWildcard = false;
boolean enablePositionIncrements = false;
@ -134,7 +132,7 @@ public class QueryParser implements QueryParserConstants {
Map fieldToDateResolution = null;
// The collator to use when determining range inclusion,
// for use when constructing RangeQuerys and ConstantScoreRangeQuerys.
// for use when constructing RangeQuerys.
Collator rangeCollator = null;
/** The default operator for parsing queries.
@ -324,24 +322,40 @@ public class QueryParser implements QueryParserConstants {
}
/**
* By default QueryParser uses new ConstantScoreRangeQuery in preference to RangeQuery
* for range queries. This implementation is generally preferable because it
* a) Runs faster b) Does not have the scarcity of range terms unduly influence score
* c) avoids any "TooManyBooleanClauses" exception.
* However, if your application really needs to use the old-fashioned RangeQuery and the above
* points are not required then set this option to <code>true</code>
* Default is <code>false</code>.
* @deprecated Please use {@link #setConstantScoreRewrite} instead.
*/
public void setUseOldRangeQuery(boolean useOldRangeQuery) {
this.useOldRangeQuery = useOldRangeQuery;
constantScoreRewrite = !useOldRangeQuery;
}
/**
* @see #setUseOldRangeQuery(boolean)
* @deprecated Please use {@link #getConstantScoreRewrite} instead.
*/
public boolean getUseOldRangeQuery() {
return useOldRangeQuery;
return !constantScoreRewrite;
}
/**
* By default QueryParser uses constant-score rewriting
* when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
* a) Runs faster b) Does not have the scarcity of terms unduly influence score
* c) avoids any "TooManyBooleanClauses" exception.
* However, if your application really needs to use the
* old-fashioned BooleanQuery expansion rewriting and the above
* points are not relevant then set this option to <code>true</code>
* Default is <code>false</code>.
*/
public void setConstantScoreRewrite(boolean v) {
constantScoreRewrite = v;
}
/**
* @see #setConstantScoreRewrite(boolean)
*/
public boolean getConstantScoreRewrite() {
return constantScoreRewrite;
}
/**
@ -415,9 +429,7 @@ public class QueryParser implements QueryParserConstants {
/**
* Sets the collator used to determine index term inclusion in ranges
* specified either for ConstantScoreRangeQuerys or RangeQuerys (if
* {@link #setUseOldRangeQuery(boolean)} is called with a <code>true</code>
* value.)
* for RangeQuerys.
* <p/>
* <strong>WARNING:</strong> Setting the rangeCollator to a non-null
* collator using this method will cause every single index Term in the
@ -426,7 +438,6 @@ public class QueryParser implements QueryParserConstants {
* be very slow.
*
* @param rc the collator to use when constructing RangeQuerys
* and ConstantScoreRangeQuerys
*/
public void setRangeCollator(Collator rc) {
rangeCollator = rc;
@ -434,9 +445,7 @@ public class QueryParser implements QueryParserConstants {
/**
* @return the collator used to determine index term inclusion in ranges
* specified either for ConstantScoreRangeQuerys or RangeQuerys (if
* {@link #setUseOldRangeQuery(boolean)} is called with a <code>true</code>
* value.)
* for RangeQuerys.
*/
public Collator getRangeCollator() {
return rangeCollator;
@ -718,7 +727,9 @@ public class QueryParser implements QueryParserConstants {
* @return new PrefixQuery instance
*/
protected Query newPrefixQuery(Term prefix){
return new PrefixQuery(prefix);
PrefixQuery query = new PrefixQuery(prefix);
query.setConstantScoreRewrite(constantScoreRewrite);
return query;
}
/**
@ -729,6 +740,7 @@ public class QueryParser implements QueryParserConstants {
* @return new FuzzyQuery Instance
*/
protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
// FuzzyQuery doesn't yet allow constant score rewrite
return new FuzzyQuery(term,minimumSimilarity,prefixLength);
}
@ -741,17 +753,9 @@ public class QueryParser implements QueryParserConstants {
* @return new RangeQuery instance
*/
protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) {
if(useOldRangeQuery)
{
return new RangeQuery(new Term(field, part1),
new Term(field, part2),
inclusive, rangeCollator);
}
else
{
return new ConstantScoreRangeQuery
(field, part1, part2, inclusive, inclusive, rangeCollator);
}
RangeQuery query = new RangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator);
query.setConstantScoreRewrite(constantScoreRewrite);
return query;
}
/**
@ -768,7 +772,9 @@ public class QueryParser implements QueryParserConstants {
* @return new WildcardQuery instance
*/
protected Query newWildcardQuery(Term t) {
return new WildcardQuery(t);
WildcardQuery query = new WildcardQuery(t);
query.setConstantScoreRewrite(constantScoreRewrite);
return query;
}
/**
@ -1245,7 +1251,6 @@ public class QueryParser implements QueryParserConstants {
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
boolean rangein = false;
Query q;
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case STAR:
@ -1490,12 +1495,6 @@ public class QueryParser implements QueryParserConstants {
finally { jj_save(0, xla); }
}
private boolean jj_3R_3() {
if (jj_scan_token(STAR)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
private boolean jj_3R_2() {
if (jj_scan_token(TERM)) return true;
if (jj_scan_token(COLON)) return true;
@ -1512,6 +1511,12 @@ public class QueryParser implements QueryParserConstants {
return false;
}
private boolean jj_3R_3() {
if (jj_scan_token(STAR)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
/** Generated Token Manager. */
public QueryParserTokenManager token_source;
/** Current token. */

View File

@ -45,7 +45,6 @@ import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
@ -118,10 +117,6 @@ import org.apache.lucene.util.Parameter;
* </p>
*
* <p>Note that QueryParser is <em>not</em> thread-safe.</p>
*
* @author Brian Goetz
* @author Peter Halacsy
* @author Tatu Saloranta
*/
public class QueryParser {
@ -144,7 +139,7 @@ public class QueryParser {
private Operator operator = OR_OPERATOR;
boolean lowercaseExpandedTerms = true;
boolean useOldRangeQuery= false;
boolean constantScoreRewrite= true;
boolean allowLeadingWildcard = false;
boolean enablePositionIncrements = false;
@ -161,7 +156,7 @@ public class QueryParser {
Map fieldToDateResolution = null;
// The collator to use when determining range inclusion,
// for use when constructing RangeQuerys and ConstantScoreRangeQuerys.
// for use when constructing RangeQuerys.
Collator rangeCollator = null;
/** The default operator for parsing queries.
@ -351,24 +346,40 @@ public class QueryParser {
}
/**
* By default QueryParser uses new ConstantScoreRangeQuery in preference to RangeQuery
* for range queries. This implementation is generally preferable because it
* a) Runs faster b) Does not have the scarcity of range terms unduly influence score
* c) avoids any "TooManyBooleanClauses" exception.
* However, if your application really needs to use the old-fashioned RangeQuery and the above
* points are not required then set this option to <code>true</code>
* Default is <code>false</code>.
* @deprecated Please use {@link #setConstantScoreRewrite} instead.
*/
public void setUseOldRangeQuery(boolean useOldRangeQuery) {
this.useOldRangeQuery = useOldRangeQuery;
constantScoreRewrite = !useOldRangeQuery;
}
/**
* @see #setUseOldRangeQuery(boolean)
* @deprecated Please use {@link #getConstantScoreRewrite} instead.
*/
public boolean getUseOldRangeQuery() {
return useOldRangeQuery;
return !constantScoreRewrite;
}
/**
* By default QueryParser uses constant-score rewriting
* when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
* a) Runs faster b) Does not have the scarcity of terms unduly influence score
* c) avoids any "TooManyBooleanClauses" exception.
* However, if your application really needs to use the
* old-fashioned BooleanQuery expansion rewriting and the above
* points are not relevant then set this option to <code>true</code>
* Default is <code>false</code>.
*/
public void setConstantScoreRewrite(boolean v) {
constantScoreRewrite = v;
}
/**
* @see #setConstantScoreRewrite(boolean)
*/
public boolean getConstantScoreRewrite() {
return constantScoreRewrite;
}
/**
@ -442,9 +453,7 @@ public class QueryParser {
/**
* Sets the collator used to determine index term inclusion in ranges
* specified either for ConstantScoreRangeQuerys or RangeQuerys (if
* {@link #setUseOldRangeQuery(boolean)} is called with a <code>true</code>
* value.)
* for RangeQuerys.
* <p/>
* <strong>WARNING:</strong> Setting the rangeCollator to a non-null
* collator using this method will cause every single index Term in the
@ -453,7 +462,6 @@ public class QueryParser {
* be very slow.
*
* @param rc the collator to use when constructing RangeQuerys
* and ConstantScoreRangeQuerys
*/
public void setRangeCollator(Collator rc) {
rangeCollator = rc;
@ -461,9 +469,7 @@ public class QueryParser {
/**
* @return the collator used to determine index term inclusion in ranges
* specified either for ConstantScoreRangeQuerys or RangeQuerys (if
* {@link #setUseOldRangeQuery(boolean)} is called with a <code>true</code>
* value.)
* for RangeQuerys.
*/
public Collator getRangeCollator() {
return rangeCollator;
@ -745,7 +751,9 @@ public class QueryParser {
* @return new PrefixQuery instance
*/
protected Query newPrefixQuery(Term prefix){
return new PrefixQuery(prefix);
PrefixQuery query = new PrefixQuery(prefix);
query.setConstantScoreRewrite(constantScoreRewrite);
return query;
}
/**
@ -756,6 +764,7 @@ public class QueryParser {
* @return new FuzzyQuery Instance
*/
protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
// FuzzyQuery doesn't yet allow constant score rewrite
return new FuzzyQuery(term,minimumSimilarity,prefixLength);
}
@ -768,17 +777,9 @@ public class QueryParser {
* @return new RangeQuery instance
*/
protected Query newRangeQuery(String field, String part1, String part2, boolean inclusive) {
if(useOldRangeQuery)
{
return new RangeQuery(new Term(field, part1),
new Term(field, part2),
inclusive, rangeCollator);
}
else
{
return new ConstantScoreRangeQuery
(field, part1, part2, inclusive, inclusive, rangeCollator);
}
RangeQuery query = new RangeQuery(field, part1, part2, inclusive, inclusive, rangeCollator);
query.setConstantScoreRewrite(constantScoreRewrite);
return query;
}
/**
@ -795,7 +796,9 @@ public class QueryParser {
* @return new WildcardQuery instance
*/
protected Query newWildcardQuery(Term t) {
return new WildcardQuery(t);
WildcardQuery query = new WildcardQuery(t);
query.setConstantScoreRewrite(constantScoreRewrite);
return query;
}
/**
@ -1247,7 +1250,6 @@ Query Term(String field) : {
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
boolean rangein = false;
Query q;
}
{

View File

@ -19,7 +19,6 @@ import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;

View File

@ -121,4 +121,4 @@ public class Token {
}
}
/* JavaCC - OriginalChecksum=c147cc166a7cf8812c7c39bc8c5eb868 (do not edit this line) */
/* JavaCC - OriginalChecksum=37b1923f964a5a434f5ea3d6952ff200 (do not edit this line) */

View File

@ -137,4 +137,4 @@ public class TokenMgrError extends Error
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
}
/* JavaCC - OriginalChecksum=186d5bcc64733844c7daab5ad5a6e349 (do not edit this line) */
/* JavaCC - OriginalChecksum=55cddb2336a66b376c0bb59d916b326d (do not edit this line) */

View File

@ -17,9 +17,6 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import java.io.IOException;
import java.text.Collator;
/**
@ -32,121 +29,31 @@ import java.text.Collator;
* Either or both endpoints may be open. Open endpoints may not be exclusive
* (you can't select all but the first or last term without explicitly specifying the term to exclude.)
*
*
* @deprecated Please use {@link RangeQuery}, and call
* {@link RangeQuery#setConstantScoreRewrite}, instead.
* @version $Id$
*/
public class ConstantScoreRangeQuery extends Query
public class ConstantScoreRangeQuery extends RangeQuery
{
private final String fieldName;
private final String lowerVal;
private final String upperVal;
private final boolean includeLower;
private final boolean includeUpper;
private Collator collator;
public ConstantScoreRangeQuery(String fieldName, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper)
{
// do a little bit of normalization...
// open ended range queries should always be inclusive.
if (lowerVal==null) {
includeLower=true;
} else if (includeLower && lowerVal.equals("")) {
lowerVal=null;
}
if (upperVal==null) {
includeUpper=true;
}
this.fieldName = fieldName.intern(); // intern it, just like terms...
this.lowerVal = lowerVal;
this.upperVal = upperVal;
this.includeLower = includeLower;
this.includeUpper = includeUpper;
super(fieldName, lowerVal, upperVal, includeLower, includeUpper);
this.constantScoreRewrite = true;
}
public ConstantScoreRangeQuery(String fieldName, String lowerVal,
String upperVal, boolean includeLower,
boolean includeUpper, Collator collator)
{
this(fieldName, lowerVal, upperVal, includeLower, includeUpper);
this.collator = collator;
boolean includeUpper, Collator collator) {
super(fieldName, lowerVal, upperVal, includeLower, includeUpper, collator);
this.constantScoreRewrite = true;
}
/** Returns the field name for this query */
public String getField() { return fieldName; }
/** Returns the value of the lower endpoint of this range query, null if open ended */
public String getLowerVal() { return lowerVal; }
/** Returns the value of the upper endpoint of this range query, null if open ended */
public String getUpperVal() { return upperVal; }
/** Returns <code>true</code> if the lower endpoint is inclusive */
public boolean includesLower() { return includeLower; }
/** Returns <code>true</code> if the upper endpoint is inclusive */
public boolean includesUpper() { return includeUpper; }
public Query rewrite(IndexReader reader) throws IOException {
// Map to RangeFilter semantics which are slightly different...
RangeFilter rangeFilt = new RangeFilter
(fieldName, lowerVal != null?lowerVal:"", upperVal,
lowerVal==""?false:includeLower, upperVal==null?false:includeUpper,
collator);
Query q = new ConstantScoreQuery(rangeFilt);
q.setBoost(getBoost());
return q;
public String getLowerVal() {
return getLowerTermText();
}
/** Prints a user-readable version of this query. */
public String toString(String field)
{
StringBuffer buffer = new StringBuffer();
if (!getField().equals(field))
{
buffer.append(getField());
buffer.append(":");
}
buffer.append(includeLower ? '[' : '{');
buffer.append(lowerVal != null ? lowerVal : "*");
buffer.append(" TO ");
buffer.append(upperVal != null ? upperVal : "*");
buffer.append(includeUpper ? ']' : '}');
if (getBoost() != 1.0f)
{
buffer.append("^");
buffer.append(Float.toString(getBoost()));
}
return buffer.toString();
}
/** Returns true if <code>o</code> is equal to this. */
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof ConstantScoreRangeQuery)) return false;
ConstantScoreRangeQuery other = (ConstantScoreRangeQuery) o;
if (this.fieldName != other.fieldName // interned comparison
|| this.includeLower != other.includeLower
|| this.includeUpper != other.includeUpper
|| (this.collator != null && ! this.collator.equals(other.collator))
) { return false; }
if (this.lowerVal != null ? !this.lowerVal.equals(other.lowerVal) : other.lowerVal != null) return false;
if (this.upperVal != null ? !this.upperVal.equals(other.upperVal) : other.upperVal != null) return false;
return this.getBoost() == other.getBoost();
}
/** Returns a hash code value for this object.*/
public int hashCode() {
int h = Float.floatToIntBits(getBoost()) ^ fieldName.hashCode();
// hashCode of "" is 0, so don't use that for null...
h ^= lowerVal != null ? lowerVal.hashCode() : 0x965a965a;
// don't just XOR upperVal with out mixing either it or h, as it will cancel
// out lowerVal if they are equal.
h ^= (h << 17) | (h >>> 16); // a reversible (one to one) 32 bit mapping mix
h ^= (upperVal != null ? (upperVal.hashCode()) : 0x5a695a69);
h ^= (includeLower ? 0x665599aa : 0)
^ (includeUpper ? 0x99aa5566 : 0);
h ^= collator != null ? collator.hashCode() : 0;
return h;
}
public String getUpperVal() {
return getUpperTermText();
}
}

View File

@ -99,6 +99,10 @@ public class FuzzyQuery extends MultiTermQuery {
protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength);
}
public void setConstantScoreRewrite(boolean constantScoreRewrite) {
throw new UnsupportedOperationException("FuzzyQuery cannot rewrite to a constant score query");
}
public Query rewrite(IndexReader reader) throws IOException {
FilteredTermEnum enumerator = getEnum(reader);

View File

@ -18,81 +18,197 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.ToStringUtils;
/**
* A {@link Query} that matches documents containing a subset of terms provided
* by a {@link FilteredTermEnum} enumeration.
* <P>
* <code>MultiTermQuery</code> is not designed to be used by itself.
* <BR>
* <code>MultiTermQuery</code> is not designed to be used by itself. <BR>
* The reason being that it is not intialized with a {@link FilteredTermEnum}
* enumeration. A {@link FilteredTermEnum} enumeration needs to be provided.
* <P>
* For example, {@link WildcardQuery} and {@link FuzzyQuery} extend
* <code>MultiTermQuery</code> to provide {@link WildcardTermEnum} and
* {@link FuzzyTermEnum}, respectively.
*
* The pattern Term may be null. A query that uses a null pattern Term should
* override equals and hashcode.
*/
public abstract class MultiTermQuery extends Query {
private Term term;
protected Term term;
protected boolean constantScoreRewrite = false;
/** Constructs a query for terms matching <code>term</code>. */
public MultiTermQuery(Term term) {
this.term = term;
}
/** Constructs a query for terms matching <code>term</code>. */
public MultiTermQuery(Term term) {
this.term = term;
}
/** Returns the pattern term. */
public Term getTerm() { return term; }
/**
* Constructs a query matching terms that cannot be represented with a single
* Term.
*/
public MultiTermQuery() {
}
/** Construct the enumeration to be used, expanding the pattern term. */
protected abstract FilteredTermEnum getEnum(IndexReader reader)
/** Returns the pattern term. */
public Term getTerm() {
return term;
}
/** Construct the enumeration to be used, expanding the pattern term. */
protected abstract FilteredTermEnum getEnum(IndexReader reader)
throws IOException;
public Query rewrite(IndexReader reader) throws IOException {
protected Filter getFilter() {
return new MultiTermFilter(this);
}
public Query rewrite(IndexReader reader) throws IOException {
if (!constantScoreRewrite) {
FilteredTermEnum enumerator = getEnum(reader);
BooleanQuery query = new BooleanQuery(true);
try {
do {
Term t = enumerator.term();
if (t != null) {
TermQuery tq = new TermQuery(t); // found a match
TermQuery tq = new TermQuery(t); // found a match
tq.setBoost(getBoost() * enumerator.difference()); // set the boost
query.add(tq, BooleanClause.Occur.SHOULD); // add to query
query.add(tq, BooleanClause.Occur.SHOULD); // add to query
}
} while (enumerator.next());
} finally {
enumerator.close();
}
return query;
} else {
Query query = new ConstantScoreQuery(getFilter());
query.setBoost(getBoost());
return query;
}
}
/** Prints a user-readable version of this query. */
public String toString(String field) {
StringBuffer buffer = new StringBuffer();
if (term != null) {
if (!term.field().equals(field)) {
buffer.append(term.field());
buffer.append(":");
}
buffer.append(term.text());
} else {
buffer.append("termPattern:unknown");
}
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
}
public boolean getConstantScoreRewrite() {
return constantScoreRewrite;
}
public void setConstantScoreRewrite(boolean constantScoreRewrite) {
this.constantScoreRewrite = constantScoreRewrite;
}
public boolean equals(Object o) {
if (o == null || term == null) {
throw new UnsupportedOperationException(
"MultiTermQuerys that do not use a pattern term need to override equals/hashcode");
}
/** Prints a user-readable version of this query. */
public String toString(String field) {
StringBuffer buffer = new StringBuffer();
if (!term.field().equals(field)) {
buffer.append(term.field());
buffer.append(":");
if (this == o)
return true;
if (!(o instanceof MultiTermQuery))
return false;
final MultiTermQuery multiTermQuery = (MultiTermQuery) o;
if (!term.equals(multiTermQuery.term))
return false;
return getBoost() == multiTermQuery.getBoost();
}
public int hashCode() {
if (term == null) {
throw new UnsupportedOperationException(
"MultiTermQuerys that do not use a pattern term need to override equals/hashcode");
}
return term.hashCode() + Float.floatToRawIntBits(getBoost());
}
static class MultiTermFilter extends Filter {
MultiTermQuery mtq;
abstract class TermGenerator {
public void generate(IndexReader reader) throws IOException {
TermEnum enumerator = mtq.getEnum(reader);
TermDocs termDocs = reader.termDocs();
try {
do {
Term term = enumerator.term();
if (term == null)
break;
termDocs.seek(term);
while (termDocs.next()) {
handleDoc(termDocs.doc());
}
} while (enumerator.next());
} finally {
termDocs.close();
enumerator.close();
}
buffer.append(term.text());
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
}
abstract public void handleDoc(int doc);
}
public MultiTermFilter(MultiTermQuery mtq) {
this.mtq = mtq;
}
public BitSet bits(IndexReader reader) throws IOException {
final BitSet bitSet = new BitSet(reader.maxDoc());
new TermGenerator() {
public void handleDoc(int doc) {
bitSet.set(doc);
}
}.generate(reader);
return bitSet;
}
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
new TermGenerator() {
public void handleDoc(int doc) {
bitSet.set(doc);
}
}.generate(reader);
return bitSet;
}
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof MultiTermQuery)) return false;
final MultiTermQuery multiTermQuery = (MultiTermQuery) o;
if (this == o)
return true;
if (!(o instanceof MultiTermFilter))
return false;
if (!term.equals(multiTermQuery.term)) return false;
return getBoost() == multiTermQuery.getBoost();
final MultiTermFilter filter = (MultiTermFilter) o;
return mtq.equals(filter.mtq);
}
public int hashCode() {
return term.hashCode() + Float.floatToRawIntBits(getBoost());
return mtq.hashCode();
}
}
}

View File

@ -17,25 +17,28 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import org.apache.lucene.search.Filter;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.TermDocs;
import java.util.BitSet;
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
/**
*
* @version $Id$
* A Filter that restricts search results to values that have a matching prefix in a given
* field.
*
* <p>
* This code borrows heavily from {@link PrefixQuery}, but is implemented as a Filter
*
* </p>
*/
public class PrefixFilter extends Filter {
protected final Term prefix;
private PrefixQuery prefixQuery;
public PrefixFilter(Term prefix) {
this.prefix = prefix;
this.prefixQuery = new PrefixQuery(prefix);
}
public Term getPrefix() { return prefix; }
@ -44,23 +47,11 @@ public class PrefixFilter extends Filter {
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
*/
public BitSet bits(IndexReader reader) throws IOException {
final BitSet bitSet = new BitSet(reader.maxDoc());
new PrefixGenerator(prefix) {
public void handleDoc(int doc) {
bitSet.set(doc);
}
}.generate(reader);
return bitSet;
return prefixQuery.getFilter().bits(reader);
}
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
new PrefixGenerator(prefix) {
public void handleDoc(int doc) {
bitSet.set(doc);
}
}.generate(reader);
return bitSet;
return prefixQuery.getFilter().getDocIdSet(reader);
}
/** Prints a user-readable version of this query. */
@ -71,51 +62,7 @@ public class PrefixFilter extends Filter {
buffer.append(")");
return buffer.toString();
}
}
// keep this protected until I decide if it's a good way
// to separate id generation from collection (or should
// I just reuse hitcollector???)
interface IdGenerator {
public void generate(IndexReader reader) throws IOException;
public void handleDoc(int doc);
}
abstract class PrefixGenerator implements IdGenerator {
protected final Term prefix;
PrefixGenerator(Term prefix) {
this.prefix = prefix;
}
public void generate(IndexReader reader) throws IOException {
TermEnum enumerator = reader.terms(prefix);
TermDocs termDocs = reader.termDocs();
try {
String prefixText = prefix.text();
String prefixField = prefix.field();
do {
Term term = enumerator.term();
if (term != null &&
term.text().startsWith(prefixText) &&
term.field() == prefixField) // interned comparison
{
termDocs.seek(term);
while (termDocs.next()) {
handleDoc(termDocs.doc());
}
} else {
break;
}
} while (enumerator.next());
} finally {
termDocs.close();
enumerator.close();
}
}
}

View File

@ -19,48 +19,33 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.ToStringUtils;
/** A Query that matches documents containing terms with a specified prefix. A PrefixQuery
* is built by QueryParser for input like <code>app*</code>. */
public class PrefixQuery extends Query {
public class PrefixQuery extends MultiTermQuery {
private Term prefix;
/** Constructs a query for terms starting with <code>prefix</code>. */
public PrefixQuery(Term prefix) {
super(prefix);
this.prefix = prefix;
}
/** Returns the prefix of this query. */
public Term getPrefix() { return prefix; }
protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
return new PrefixTermEnum(reader, getTerm());
}
public Query rewrite(IndexReader reader) throws IOException {
BooleanQuery query = new BooleanQuery(true);
TermEnum enumerator = reader.terms(prefix);
try {
String prefixText = prefix.text();
String prefixField = prefix.field();
do {
Term term = enumerator.term();
if (term != null &&
term.text().startsWith(prefixText) &&
term.field() == prefixField) // interned comparison
{
TermQuery tq = new TermQuery(term); // found a match
tq.setBoost(getBoost()); // set the boost
query.add(tq, BooleanClause.Occur.SHOULD); // add to query
//System.out.println("added " + term);
} else {
break;
}
} while (enumerator.next());
} finally {
enumerator.close();
}
return query;
public boolean equals(Object o) {
if (o instanceof PrefixQuery)
return super.equals(o);
return false;
}
/** Prints a user-readable version of this query. */
@ -75,18 +60,4 @@ public class PrefixQuery extends Query {
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
}
/** Returns true iff <code>o</code> is equal to this. */
public boolean equals(Object o) {
if (!(o instanceof PrefixQuery))
return false;
PrefixQuery other = (PrefixQuery)o;
return (this.getBoost() == other.getBoost())
&& this.prefix.equals(other.prefix);
}
/** Returns a hash code value for this object.*/
public int hashCode() {
return Float.floatToIntBits(getBoost()) ^ prefix.hashCode() ^ 0x6634D93C;
}
}

View File

@ -0,0 +1,42 @@
package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
/**
* Subclass of FilteredTermEnum for enumerating all terms that match the
* specified prefix filter term.
* <p>
* Term enumerations are always ordered by Term.compareTo(). Each term in
* the enumeration is greater than all that precede it.
*
*/
public class PrefixTermEnum extends FilteredTermEnum {
private Term prefix;
private boolean endEnum = false;
public PrefixTermEnum(IndexReader reader, Term prefix) throws IOException {
this.prefix = prefix;
setEnum(reader.terms(new Term(prefix.field(), prefix.text())));
}
public float difference() {
return 1.0f;
}
protected boolean endEnum() {
return endEnum;
}
protected boolean termCompare(Term term) {
if (term.field() == prefix.field() && term.text().startsWith(prefix.text())) {
return true;
}
endEnum = true;
return false;
}
}

View File

@ -18,10 +18,6 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.OpenBitSet;
import java.io.IOException;
import java.util.BitSet;
@ -44,6 +40,7 @@ public class RangeFilter extends Filter {
private boolean includeLower;
private boolean includeUpper;
private Collator collator;
private RangeQuery rangeQuery;
/**
* @param fieldName The field this range applies to
@ -75,6 +72,7 @@ public class RangeFilter extends Filter {
throw new IllegalArgumentException
("The upper bound must be non-null to be inclusive");
}
initRangeQuery();
}
/**
@ -99,6 +97,11 @@ public class RangeFilter extends Filter {
Collator collator) {
this(fieldName, lowerTerm, upperTerm, includeLower, includeUpper);
this.collator = collator;
initRangeQuery();
}
private void initRangeQuery() {
rangeQuery = new RangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator);
}
/**
@ -124,81 +127,7 @@ public class RangeFilter extends Filter {
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
*/
public BitSet bits(IndexReader reader) throws IOException {
BitSet bits = new BitSet(reader.maxDoc());
TermEnum enumerator =
(null != lowerTerm && collator == null
? reader.terms(new Term(fieldName, lowerTerm))
: reader.terms(new Term(fieldName)));
try {
if (enumerator.term() == null) {
return bits;
}
TermDocs termDocs = reader.termDocs();
try {
if (collator != null) {
do {
Term term = enumerator.term();
if (term != null && term.field().equals(fieldName)) {
if ((lowerTerm == null
|| (includeLower
? collator.compare(term.text(), lowerTerm) >= 0
: collator.compare(term.text(), lowerTerm) > 0))
&& (upperTerm == null
|| (includeUpper
? collator.compare(term.text(), upperTerm) <= 0
: collator.compare(term.text(), upperTerm) < 0))) {
/* we have a good term, find the docs */
termDocs.seek(enumerator.term());
while (termDocs.next()) {
bits.set(termDocs.doc());
}
}
}
}
while (enumerator.next());
} else { // collator is null - use Unicode code point ordering
boolean checkLower = false;
if (!includeLower) // make adjustments to set to exclusive
checkLower = true;
do {
Term term = enumerator.term();
if (term != null && term.field().equals(fieldName)) {
if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
checkLower = false;
if (upperTerm != null) {
int compare = upperTerm.compareTo(term.text());
/* if beyond the upper term, or is exclusive and
* this is equal to the upper term, break out */
if ((compare < 0) ||
(!includeUpper && compare==0)) {
break;
}
}
/* we have a good term, find the docs */
termDocs.seek(enumerator.term());
while (termDocs.next()) {
bits.set(termDocs.doc());
}
}
} else {
break;
}
}
while (enumerator.next());
}
} finally {
termDocs.close();
}
} finally {
enumerator.close();
}
return bits;
return rangeQuery.getFilter().bits(reader);
}
/**
@ -206,84 +135,7 @@ public class RangeFilter extends Filter {
* permitted in search results.
*/
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
OpenBitSet bits = new OpenBitSet(reader.maxDoc());
TermEnum enumerator =
(null != lowerTerm && collator == null
? reader.terms(new Term(fieldName, lowerTerm))
: reader.terms(new Term(fieldName)));
try {
if (enumerator.term() == null) {
return bits;
}
TermDocs termDocs = reader.termDocs();
try {
if (collator != null) {
do {
Term term = enumerator.term();
if (term != null && term.field().equals(fieldName)) {
if ((lowerTerm == null
|| (includeLower
? collator.compare(term.text(), lowerTerm) >= 0
: collator.compare(term.text(), lowerTerm) > 0))
&& (upperTerm == null
|| (includeUpper
? collator.compare(term.text(), upperTerm) <= 0
: collator.compare(term.text(), upperTerm) < 0))) {
/* we have a good term, find the docs */
termDocs.seek(enumerator.term());
while (termDocs.next()) {
bits.set(termDocs.doc());
}
}
}
}
while (enumerator.next());
} else { // collator is null - use Unicode code point ordering
boolean checkLower = false;
if (!includeLower) // make adjustments to set to exclusive
checkLower = true;
do {
Term term = enumerator.term();
if (term != null && term.field().equals(fieldName)) {
if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
checkLower = false;
if (upperTerm != null) {
int compare = upperTerm.compareTo(term.text());
/* if beyond the upper term, or is exclusive and
* this is equal to the upper term, break out */
if ((compare < 0) ||
(!includeUpper && compare==0)) {
break;
}
}
/* we have a good term, find the docs */
termDocs.seek(enumerator.term());
while (termDocs.next()) {
bits.set(termDocs.doc());
}
}
} else {
break;
}
}
while (enumerator.next());
}
} finally {
termDocs.close();
}
} finally {
enumerator.close();
}
return bits;
return rangeQuery.getFilter().getDocIdSet(reader);
}
public String toString() {

View File

@ -21,234 +21,213 @@ import java.io.IOException;
import java.text.Collator;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.ToStringUtils;
/**
* A Query that matches documents within an exclusive range. A RangeQuery
* is built by QueryParser for input like <code>[010 TO 120]</code> but only if the QueryParser has
* the useOldRangeQuery property set to true. The QueryParser default behaviour is to use
* the newer ConstantScoreRangeQuery class. This is generally preferable because:
* the newer ConstantScore mode. This is generally preferable because:
* <ul>
* <li>It is faster than RangeQuery</li>
* <li>Unlike RangeQuery, it does not cause a BooleanQuery.TooManyClauses exception if the range of values is large</li>
* <li>Unlike RangeQuery it does not influence scoring based on the scarcity of individual terms that may match</li>
* <li>It is faster than the standard RangeQuery mode</li>
* <li>Unlike the RangeQuery mode, it does not cause a BooleanQuery.TooManyClauses exception if the range of values is large</li>
* <li>Unlike the RangeQuery mode, it does not influence scoring based on the scarcity of individual terms that may match</li>
* </ul>
*
*
* @see ConstantScoreRangeQuery
*
*
* @version $Id$
*/
public class RangeQuery extends Query
{
private Term lowerTerm;
private Term upperTerm;
private boolean inclusive;
private Collator collator;
/** Constructs a query selecting all terms greater than
* <code>lowerTerm</code> but less than <code>upperTerm</code>.
* There must be at least one term and either term may be null,
* in which case there is no bound on that side, but if there are
* two terms, both terms <b>must</b> be for the same field.
*
* @param lowerTerm The Term at the lower end of the range
* @param upperTerm The Term at the upper end of the range
* @param inclusive If true, both <code>lowerTerm</code> and
* <code>upperTerm</code> will themselves be included in the range.
*/
public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive)
{
if (lowerTerm == null && upperTerm == null)
{
throw new IllegalArgumentException("At least one term must be non-null");
}
if (lowerTerm != null && upperTerm != null && lowerTerm.field() != upperTerm.field())
{
throw new IllegalArgumentException("Both terms must be for the same field");
}
// if we have a lowerTerm, start there. otherwise, start at beginning
if (lowerTerm != null) {
this.lowerTerm = lowerTerm;
}
else {
this.lowerTerm = new Term(upperTerm.field());
}
this.upperTerm = upperTerm;
this.inclusive = inclusive;
}
/** Constructs a query selecting all terms greater than
* <code>lowerTerm</code> but less than <code>upperTerm</code>.
* There must be at least one term and either term may be null,
* in which case there is no bound on that side, but if there are
* two terms, both terms <b>must</b> be for the same field.
* <p>
* If <code>collator</code> is not null, it will be used to decide whether
* index terms are within the given range, rather than using the Unicode code
* point order in which index terms are stored.
* <p>
* <strong>WARNING:</strong> Using this constructor and supplying a non-null
* value in the <code>collator</code> parameter will cause every single
* index Term in the Field referenced by lowerTerm and/or upperTerm to be
* examined. Depending on the number of index Terms in this Field, the
* operation could be very slow.
*
* @param lowerTerm The Term at the lower end of the range
* @param upperTerm The Term at the upper end of the range
* @param inclusive If true, both <code>lowerTerm</code> and
* <code>upperTerm</code> will themselves be included in the range.
* @param collator The collator to use to collate index Terms, to determine
* their membership in the range bounded by <code>lowerTerm</code> and
* <code>upperTerm</code>.
*/
public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive,
Collator collator)
{
this(lowerTerm, upperTerm, inclusive);
this.collator = collator;
}
public Query rewrite(IndexReader reader) throws IOException {
BooleanQuery query = new BooleanQuery(true);
String testField = getField();
if (collator != null) {
TermEnum enumerator = reader.terms(new Term(testField, ""));
String lowerTermText = lowerTerm != null ? lowerTerm.text() : null;
String upperTermText = upperTerm != null ? upperTerm.text() : null;
try {
do {
Term term = enumerator.term();
if (term != null && term.field() == testField) { // interned comparison
if ((lowerTermText == null
|| (inclusive ? collator.compare(term.text(), lowerTermText) >= 0
: collator.compare(term.text(), lowerTermText) > 0))
&& (upperTermText == null
|| (inclusive ? collator.compare(term.text(), upperTermText) <= 0
: collator.compare(term.text(), upperTermText) < 0))) {
addTermToQuery(term, query);
}
}
}
while (enumerator.next());
}
finally {
enumerator.close();
}
}
else { // collator is null
TermEnum enumerator = reader.terms(lowerTerm);
try {
boolean checkLower = false;
if (!inclusive) // make adjustments to set to exclusive
checkLower = true;
do {
Term term = enumerator.term();
if (term != null && term.field() == testField) { // interned comparison
if (!checkLower || term.text().compareTo(lowerTerm.text()) > 0) {
checkLower = false;
if (upperTerm != null) {
int compare = upperTerm.text().compareTo(term.text());
/* if beyond the upper term, or is exclusive and
* this is equal to the upper term, break out */
if ((compare < 0) || (!inclusive && compare == 0))
break;
}
addTermToQuery(term, query); // Found a match
}
}
else {
break;
}
}
while (enumerator.next());
}
finally {
enumerator.close();
}
}
return query;
}
private void addTermToQuery(Term term, BooleanQuery query) {
TermQuery tq = new TermQuery(term);
tq.setBoost(getBoost()); // set the boost
query.add(tq, BooleanClause.Occur.SHOULD); // add to query
}
/** Returns the field name for this query */
public String getField() {
return (lowerTerm != null ? lowerTerm.field() : upperTerm.field());
}
/** Returns the lower term of this range query */
public Term getLowerTerm() { return lowerTerm; }
/** Returns the upper term of this range query */
public Term getUpperTerm() { return upperTerm; }
/** Returns <code>true</code> if the range query is inclusive */
public boolean isInclusive() { return inclusive; }
/** Returns the collator used to determine range inclusion, if any. */
public Collator getCollator() { return collator; }
public class RangeQuery extends MultiTermQuery {
private Term lowerTerm;
private Term upperTerm;
private Collator collator;
private String field;
private boolean includeLower;
private boolean includeUpper;
/** Prints a user-readable version of this query. */
public String toString(String field)
{
StringBuffer buffer = new StringBuffer();
if (!getField().equals(field))
{
buffer.append(getField());
buffer.append(":");
}
buffer.append(inclusive ? "[" : "{");
buffer.append(lowerTerm != null ? lowerTerm.text() : "null");
buffer.append(" TO ");
buffer.append(upperTerm != null ? upperTerm.text() : "null");
buffer.append(inclusive ? "]" : "}");
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
}
/**
* Constructs a query selecting all terms greater/equal than <code>lowerTerm</code>
* but less/equal than <code>upperTerm</code>.
*
* <p>
* If an endpoint is null, it is said
* to be "open". Either or both endpoints may be open. Open endpoints may not
* be exclusive (you can't select all but the first or last term without
* explicitly specifying the term to exclude.)
*
* @param field The field that holds both lower and upper terms.
* @param lowerTerm
* The term text at the lower end of the range
* @param upperTerm
* The term text at the upper end of the range
* @param includeLower
* If true, the <code>lowerTerm</code> is
* included in the range.
* @param includeUpper
* If true, the <code>upperTerm</code> is
* included in the range.
*/
public RangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) {
init(new Term(field, lowerTerm), new Term(field, upperTerm), includeLower, includeUpper, null);
}
/** Returns true iff <code>o</code> is equal to this. */
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof RangeQuery)) return false;
/** Constructs a query selecting all terms greater/equal than
* <code>lowerTerm</code> but less/equal than <code>upperTerm</code>.
* <p>
* If an endpoint is null, it is said
* to be "open". Either or both endpoints may be open. Open endpoints may not
* be exclusive (you can't select all but the first or last term without
* explicitly specifying the term to exclude.)
* <p>
* If <code>collator</code> is not null, it will be used to decide whether
* index terms are within the given range, rather than using the Unicode code
* point order in which index terms are stored.
* <p>
* <strong>WARNING:</strong> Using this constructor and supplying a non-null
* value in the <code>collator</code> parameter will cause every single
* index Term in the Field referenced by lowerTerm and/or upperTerm to be
* examined. Depending on the number of index Terms in this Field, the
* operation could be very slow.
*
* @param lowerTerm The Term text at the lower end of the range
* @param upperTerm The Term text at the upper end of the range
* @param includeLower
* If true, the <code>lowerTerm</code> is
* included in the range.
* @param includeUpper
* If true, the <code>upperTerm</code> is
* included in the range.
* @param collator The collator to use to collate index Terms, to determine
* their membership in the range bounded by <code>lowerTerm</code> and
* <code>upperTerm</code>.
*/
public RangeQuery(String field, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper,
Collator collator) {
init(new Term(field, lowerTerm), new Term(field,upperTerm), includeLower, includeUpper, collator);
}
final RangeQuery other = (RangeQuery) o;
if (this.getBoost() != other.getBoost()) return false;
if (this.inclusive != other.inclusive) return false;
if (this.collator != null && ! this.collator.equals(other.collator))
return false;
/** @deprecated Please use {@link #RangeQuery(String,
* String, String, boolean, boolean, Collator)} instead */
public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive,
Collator collator) {
init(lowerTerm, upperTerm, inclusive, inclusive, collator);
}
/** @deprecated Please use {@link #RangeQuery(String,
* String, String, boolean, boolean)} instead */
public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive) {
init(lowerTerm, upperTerm, inclusive, inclusive, null);
}
// one of lowerTerm and upperTerm can be null
if (this.lowerTerm != null ? !this.lowerTerm.equals(other.lowerTerm) : other.lowerTerm != null) return false;
if (this.upperTerm != null ? !this.upperTerm.equals(other.upperTerm) : other.upperTerm != null) return false;
return true;
}
private void init(Term lowerTerm, Term upperTerm, boolean includeLower, boolean includeUpper, Collator collator) {
if (lowerTerm == null && upperTerm == null)
throw new IllegalArgumentException("At least one term must be non-null");
if (lowerTerm != null && upperTerm != null && lowerTerm.field() != upperTerm.field())
throw new IllegalArgumentException("Both terms must be for the same field");
/** Returns a hash code value for this object.*/
public int hashCode() {
int h = Float.floatToIntBits(getBoost());
h ^= lowerTerm != null ? lowerTerm.hashCode() : 0;
// reversible mix to make lower and upper position dependent and
// to prevent them from cancelling out.
h ^= (h << 25) | (h >>> 8);
h ^= upperTerm != null ? upperTerm.hashCode() : 0;
h ^= this.inclusive ? 0x2742E74A : 0;
h ^= collator != null ? collator.hashCode() : 0;
return h;
}
if (lowerTerm == null)
this.field = upperTerm.field();
else
this.field = lowerTerm.field();
this.lowerTerm = lowerTerm;
this.upperTerm = upperTerm;
this.includeLower = includeLower;
this.includeUpper = includeUpper;
this.collator = collator;
}
/** Returns the field name for this query */
public String getField() {
return field;
}
/** Returns the lower term of this range query.
* @deprecated Use {@link #getLowerTermText} instead. */
public Term getLowerTerm() { return lowerTerm; }
/** Returns the upper term of this range query.
* @deprecated Use {@link #getUpperTermText} instead. */
public Term getUpperTerm() { return upperTerm; }
/** Returns the lower value of this range query */
public String getLowerTermText() { return lowerTerm == null ? null : lowerTerm.text(); }
/** Returns the upper value of this range query */
public String getUpperTermText() { return upperTerm == null ? null : upperTerm.text(); }
/** Returns <code>true</code> if the lower endpoint is inclusive */
public boolean includesLower() { return includeLower; }
/** Returns <code>true</code> if the upper endpoint is inclusive */
public boolean includesUpper() { return includeUpper; }
/** Returns <code>true</code> if the range query is inclusive
* @deprecated Use {@link #includesLower}, {@link #includesUpper} instead.
*/
public boolean isInclusive() { return includeUpper && includeLower; }
/** Returns the collator used to determine range inclusion, if any. */
public Collator getCollator() { return collator; }
protected FilteredTermEnum getEnum(IndexReader reader) throws IOException {
return new RangeTermEnum(reader, collator, getField(), lowerTerm.text(),
upperTerm.text(), includeLower, includeUpper);
}
/** Prints a user-readable version of this query. */
public String toString(String field) {
StringBuffer buffer = new StringBuffer();
if (!getField().equals(field)) {
buffer.append(getField());
buffer.append(":");
}
buffer.append(includeLower ? '[' : '{');
buffer.append(lowerTerm != null ? lowerTerm.text() : "*");
buffer.append(" TO ");
buffer.append(upperTerm != null ? upperTerm.text() : "*");
buffer.append(includeUpper ? ']' : '}');
if (getBoost() != 1.0f) {
buffer.append("^");
buffer.append(Float.toString(getBoost()));
}
return buffer.toString();
}
/** Returns true iff <code>o</code> is equal to this. */
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof RangeQuery)) return false;
RangeQuery other = (RangeQuery) o;
if (this.field != other.field // interned comparison
|| this.includeLower != other.includeLower
|| this.includeUpper != other.includeUpper
|| (this.collator != null && ! this.collator.equals(other.collator))
) { return false; }
String lowerVal = this.lowerTerm == null ? null : lowerTerm.text();
String upperVal = this.upperTerm == null ? null : upperTerm.text();
String olowerText = other.lowerTerm == null ? null : other.lowerTerm.text();
String oupperText = other.upperTerm == null ? null : other.upperTerm.text();
if (lowerVal != null ? !lowerVal.equals(olowerText) : olowerText != null) return false;
if (upperVal != null ? !upperVal.equals(oupperText) : oupperText != null) return false;
return this.getBoost() == other.getBoost();
}
/** Returns a hash code value for this object.*/
public int hashCode() {
int h = Float.floatToIntBits(getBoost()) ^ field.hashCode();
String lowerVal = this.lowerTerm == null ? null : lowerTerm.text();
String upperVal = this.upperTerm == null ? null : upperTerm.text();
// hashCode of "" is 0, so don't use that for null...
h ^= lowerVal != null ? lowerVal.hashCode() : 0x965a965a;
// don't just XOR upperVal with out mixing either it or h, as it will cancel
// out lowerVal if they are equal.
h ^= (h << 17) | (h >>> 16); // a reversible (one to one) 32 bit mapping mix
h ^= (upperVal != null ? (upperVal.hashCode()) : 0x5a695a69);
h ^= (includeLower ? 0x665599aa : 0)
^ (includeUpper ? 0x99aa5566 : 0);
h ^= collator != null ? collator.hashCode() : 0;
return h;
}
}

View File

@ -0,0 +1,131 @@
package org.apache.lucene.search;
import java.io.IOException;
import java.text.Collator;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
/**
* Subclass of FilteredTermEnum for enumerating all terms that match the
* specified range parameters.
* <p>
* Term enumerations are always ordered by Term.compareTo(). Each term in
* the enumeration is greater than all that precede it.
*/
public class RangeTermEnum extends FilteredTermEnum {
private Collator collator = null;
private boolean endEnum = false;
private String field;
private String upperTermText;
private String lowerTermText;
private boolean includeLower;
private boolean includeUpper;
/**
* Enumerates all terms greater/equal than <code>lowerTerm</code>
* but less/equal than <code>upperTerm</code>.
*
* If an endpoint is null, it is said to be "open". Either or both
* endpoints may be open. Open endpoints may not be exclusive
* (you can't select all but the first or last term without
* explicitly specifying the term to exclude.)
*
* @param reader
* @param collator
* The collator to use to collate index Terms, to determine their
* membership in the range bounded by <code>lowerTerm</code> and
* <code>upperTerm</code>.
* @param field
* An interned field that holds both lower and upper terms.
* @param lowerTermText
* The term text at the lower end of the range
* @param upperTermText
* The term text at the upper end of the range
* @param includeLower
* If true, the <code>lowerTerm</code> is included in the range.
* @param includeUpper
* If true, the <code>upperTerm</code> is included in the range.
*
* @throws IOException
*/
public RangeTermEnum(IndexReader reader, Collator collator, String field,
String lowerTermText, String upperTermText, boolean includeLower, boolean includeUpper) throws IOException {
this.collator = collator;
this.upperTermText = upperTermText;
this.lowerTermText = lowerTermText;
this.includeLower = includeLower;
this.includeUpper = includeUpper;
this.field = field;
// do a little bit of normalization...
// open ended range queries should always be inclusive.
if (this.lowerTermText == null) {
this.lowerTermText = "";
this.includeLower = true;
}
if (this.upperTermText == null) {
this.includeUpper = true;
}
setEnum(reader.terms(new Term(this.field, this.lowerTermText)));
}
public float difference() {
return 1.0f;
}
protected boolean endEnum() {
return endEnum;
}
protected boolean termCompare(Term term) {
if (collator == null) {
// Use Unicode code point ordering
boolean checkLower = false;
if (!includeLower) // make adjustments to set to exclusive
checkLower = true;
if (term != null && term.field() == field) { // interned comparison
if (!checkLower || null==lowerTermText || term.text().compareTo(lowerTermText) > 0) {
checkLower = false;
if (upperTermText != null) {
int compare = upperTermText.compareTo(term.text());
/*
* if beyond the upper term, or is exclusive and this is equal to
* the upper term, break out
*/
if ((compare < 0) ||
(!includeUpper && compare==0)) {
endEnum = true;
return false;
}
}
return true;
}
} else {
// break
endEnum = true;
return false;
}
return false;
} else {
if (term != null && term.field() == field) { // interned comparison
if ((lowerTermText == null
|| (includeLower
? collator.compare(term.text(), lowerTermText) >= 0
: collator.compare(term.text(), lowerTermText) > 0))
&& (upperTermText == null
|| (includeUpper
? collator.compare(term.text(), upperTermText) <= 0
: collator.compare(term.text(), upperTermText) < 0))) {
return true;
}
return false;
}
endEnum = true;
return false;
}
}
}

View File

@ -48,12 +48,11 @@ public class WildcardQuery extends MultiTermQuery {
return false;
}
public Query rewrite(IndexReader reader) throws IOException {
if (this.termContainsWildcard) {
return super.rewrite(reader);
}
if (!termContainsWildcard)
return new TermQuery(getTerm());
else
return super.rewrite(reader);
}
}

View File

@ -40,9 +40,7 @@ public class WildcardTermEnum extends FilteredTermEnum {
boolean endEnum = false;
/**
* Creates a new <code>WildcardTermEnum</code>. Passing in a
* {@link org.apache.lucene.index.Term Term} that does not contain a
* <code>WILDCARD_CHAR</code> will cause an exception to be thrown.
* Creates a new <code>WildcardTermEnum</code>.
* <p>
* After calling the constructor the enumeration is already pointing to the first
* valid term if such a term exists.
@ -62,8 +60,12 @@ public class WildcardTermEnum extends FilteredTermEnum {
else if (cidx >= 0) {
idx = Math.min(idx, cidx);
}
if (idx != -1) {
pre = searchTerm.text().substring(0,idx);
} else {
pre = "";
}
pre = searchTerm.text().substring(0,idx);
preLen = pre.length();
text = text.substring(preLen);
setEnum(reader.terms(new Term(searchTerm.field(), pre)));

View File

@ -43,7 +43,6 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
@ -415,11 +414,11 @@ public class TestQueryParser extends LuceneTestCase {
public void testRange() throws Exception {
assertQueryEquals("[ a TO z]", null, "[a TO z]");
assertTrue(getQuery("[ a TO z]", null) instanceof ConstantScoreRangeQuery);
assertTrue(((RangeQuery)getQuery("[ a TO z]", null)).getConstantScoreRewrite());
QueryParser qp = new QueryParser("field", new SimpleAnalyzer());
qp.setUseOldRangeQuery(true);
assertTrue(qp.parse("[ a TO z]") instanceof RangeQuery);
qp.setConstantScoreRewrite(false);
assertFalse(((RangeQuery)qp.parse("[ a TO z]")).getConstantScoreRewrite());
assertQueryEquals("[ a TO z ]", null, "[a TO z]");
assertQueryEquals("{ a TO z}", null, "{a TO z}");
@ -458,7 +457,7 @@ public class TestQueryParser extends LuceneTestCase {
// supported).
// Test ConstantScoreRangeQuery
qp.setUseOldRangeQuery(false);
qp.setConstantScoreRewrite(true);
ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, result.length);
@ -466,7 +465,7 @@ public class TestQueryParser extends LuceneTestCase {
assertEquals("The index Term should be included.", 1, result.length);
// Test RangeQuery
qp.setUseOldRangeQuery(true);
qp.setConstantScoreRewrite(false);
result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, result.length);
@ -775,7 +774,7 @@ public class TestQueryParser extends LuceneTestCase {
public void assertParseException(String queryString) throws Exception {
try {
Query q = getQuery(queryString, null);
getQuery(queryString, null);
} catch (ParseException expected) {
return;
}

View File

@ -1,550 +0,0 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import java.io.IOException;
import java.text.Collator;
import java.util.Locale;
import junit.framework.Assert;
public class TestConstantScoreRangeQuery extends BaseTestRangeFilter {
/** threshold for comparing floats */
public static final float SCORE_COMP_THRESH = 1e-6f;
public TestConstantScoreRangeQuery(String name) {
super(name);
}
public TestConstantScoreRangeQuery() {
super();
}
Directory small;
void assertEquals(String m, float e, float a) {
assertEquals(m, e, a, SCORE_COMP_THRESH);
}
static public void assertEquals(String m, int e, int a) {
Assert.assertEquals(m, e, a);
}
public void setUp() throws Exception {
super.setUp();
String[] data = new String [] {
"A 1 2 3 4 5 6",
"Z 4 5 6",
null,
"B 2 4 5 6",
"Y 3 5 6",
null,
"C 3 6",
"X 4 5 6"
};
small = new RAMDirectory();
IndexWriter writer = new IndexWriter(small, new WhitespaceAnalyzer(), true,
IndexWriter.MaxFieldLength.LIMITED);
for (int i = 0; i < data.length; i++) {
Document doc = new Document();
doc.add(new Field("id", String.valueOf(i), Field.Store.YES, Field.Index.NOT_ANALYZED));//Field.Keyword("id",String.valueOf(i)));
doc.add(new Field("all", "all", Field.Store.YES, Field.Index.NOT_ANALYZED));//Field.Keyword("all","all"));
if (null != data[i]) {
doc.add(new Field("data", data[i], Field.Store.YES, Field.Index.ANALYZED));//Field.Text("data",data[i]));
}
writer.addDocument(doc);
}
writer.optimize();
writer.close();
}
/** macro for readability */
public static Query csrq(String f, String l, String h,
boolean il, boolean ih) {
return new ConstantScoreRangeQuery(f,l,h,il,ih);
}
/** macro for readability */
public static Query csrq(String f, String l, String h,
boolean il, boolean ih, Collator c) {
return new ConstantScoreRangeQuery(f,l,h,il,ih,c);
}
public void testBasics() throws IOException {
QueryUtils.check(csrq("data","1","6",T,T));
QueryUtils.check(csrq("data","A","Z",T,T));
QueryUtils.checkUnequal(csrq("data","1","6",T,T), csrq("data","A","Z",T,T));
}
public void testBasicsCollating() throws IOException {
Collator c = Collator.getInstance(Locale.ENGLISH);
QueryUtils.check(csrq("data","1","6",T,T,c));
QueryUtils.check(csrq("data","A","Z",T,T,c));
QueryUtils.checkUnequal(csrq("data","1","6",T,T,c), csrq("data","A","Z",T,T,c));
}
public void testEqualScores() throws IOException {
// NOTE: uses index build in *this* setUp
IndexReader reader = IndexReader.open(small);
IndexSearcher search = new IndexSearcher(reader);
ScoreDoc[] result;
// some hits match more terms then others, score should be the same
result = search.search(csrq("data","1","6",T,T), null, 1000).scoreDocs;
int numHits = result.length;
assertEquals("wrong number of results", 6, numHits);
float score = result[0].score;
for (int i = 1; i < numHits; i++) {
assertEquals("score for " + i +" was not the same",
score, result[i].score);
}
}
public void testBoost() throws IOException {
// NOTE: uses index build in *this* setUp
IndexReader reader = IndexReader.open(small);
IndexSearcher search = new IndexSearcher(reader);
// test for correct application of query normalization
// must use a non score normalizing method for this.
Query q = csrq("data","1","6",T,T);
q.setBoost(100);
search.search(q,null, new HitCollector() {
public void collect(int doc, float score) {
assertEquals("score for doc " + doc +" was not correct",
1.0f, score);
}
});
//
// Ensure that boosting works to score one clause of a query higher
// than another.
//
Query q1 = csrq("data","A","A",T,T); // matches document #0
q1.setBoost(.1f);
Query q2 = csrq("data","Z","Z",T,T); // matches document #1
BooleanQuery bq = new BooleanQuery(true);
bq.add(q1, BooleanClause.Occur.SHOULD);
bq.add(q2, BooleanClause.Occur.SHOULD);
ScoreDoc[] hits = search.search(bq, null, 1000).scoreDocs;
assertEquals(1, hits[0].doc);
assertEquals(0, hits[1].doc);
assertTrue(hits[0].score > hits[1].score);
q1 = csrq("data","A","A",T,T); // matches document #0
q1.setBoost(10f);
q2 = csrq("data","Z","Z",T,T); // matches document #1
bq = new BooleanQuery(true);
bq.add(q1, BooleanClause.Occur.SHOULD);
bq.add(q2, BooleanClause.Occur.SHOULD);
hits = search.search(bq, null, 1000).scoreDocs;
assertEquals(0, hits[0].doc);
assertEquals(1, hits[1].doc);
assertTrue(hits[0].score > hits[1].score);
}
public void testBooleanOrderUnAffected() throws IOException {
// NOTE: uses index build in *this* setUp
IndexReader reader = IndexReader.open(small);
IndexSearcher search = new IndexSearcher(reader);
// first do a regular RangeQuery which uses term expansion so
// docs with more terms in range get higher scores
Query rq = new RangeQuery(new Term("data","1"),new Term("data","4"),T);
ScoreDoc[] expected = search.search(rq, null, 1000).scoreDocs;
int numHits = expected.length;
// now do a boolean where which also contains a
// ConstantScoreRangeQuery and make sure hte order is the same
BooleanQuery q = new BooleanQuery();
q.add(rq, BooleanClause.Occur.MUST);//T, F);
q.add(csrq("data","1","6", T, T), BooleanClause.Occur.MUST);//T, F);
ScoreDoc[] actual = search.search(q, null, 1000).scoreDocs;
assertEquals("wrong numebr of hits", numHits, actual.length);
for (int i = 0; i < numHits; i++) {
assertEquals("mismatch in docid for hit#"+i,
expected[i].doc, actual[i].doc);
}
}
public void testRangeQueryId() throws IOException {
// NOTE: uses index build in *super* setUp
IndexReader reader = IndexReader.open(signedIndex.index);
IndexSearcher search = new IndexSearcher(reader);
int medId = ((maxId - minId) / 2);
String minIP = pad(minId);
String maxIP = pad(maxId);
String medIP = pad(medId);
int numDocs = reader.numDocs();
assertEquals("num of docs", numDocs, 1+ maxId - minId);
ScoreDoc[] result;
// test id, bounded on both ends
result = search.search(csrq("id",minIP,maxIP,T,T), null, numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(csrq("id",minIP,maxIP,T,F), null, numDocs).scoreDocs;
assertEquals("all but last", numDocs-1, result.length);
result = search.search(csrq("id",minIP,maxIP,F,T), null, numDocs).scoreDocs;
assertEquals("all but first", numDocs-1, result.length);
result = search.search(csrq("id",minIP,maxIP,F,F), null, numDocs).scoreDocs;
assertEquals("all but ends", numDocs-2, result.length);
result = search.search(csrq("id",medIP,maxIP,T,T), null, numDocs).scoreDocs;
assertEquals("med and up", 1+ maxId-medId, result.length);
result = search.search(csrq("id",minIP,medIP,T,T), null, numDocs).scoreDocs;
assertEquals("up to med", 1+ medId-minId, result.length);
// unbounded id
result = search.search(csrq("id",minIP,null,T,F), null, numDocs).scoreDocs;
assertEquals("min and up", numDocs, result.length);
result = search.search(csrq("id",null,maxIP,F,T), null, numDocs).scoreDocs;
assertEquals("max and down", numDocs, result.length);
result = search.search(csrq("id",minIP,null,F,F), null, numDocs).scoreDocs;
assertEquals("not min, but up", numDocs-1, result.length);
result = search.search(csrq("id",null,maxIP,F,F), null, numDocs).scoreDocs;
assertEquals("not max, but down", numDocs-1, result.length);
result = search.search(csrq("id",medIP,maxIP,T,F), null, numDocs).scoreDocs;
assertEquals("med and up, not max", maxId-medId, result.length);
result = search.search(csrq("id",minIP,medIP,F,T), null, numDocs).scoreDocs;
assertEquals("not min, up to med", medId-minId, result.length);
// very small sets
result = search.search(csrq("id",minIP,minIP,F,F), null, numDocs).scoreDocs;
assertEquals("min,min,F,F", 0, result.length);
result = search.search(csrq("id",medIP,medIP,F,F), null, numDocs).scoreDocs;
assertEquals("med,med,F,F", 0, result.length);
result = search.search(csrq("id",maxIP,maxIP,F,F), null, numDocs).scoreDocs;
assertEquals("max,max,F,F", 0, result.length);
result = search.search(csrq("id",minIP,minIP,T,T), null, numDocs).scoreDocs;
assertEquals("min,min,T,T", 1, result.length);
result = search.search(csrq("id",null,minIP,F,T), null, numDocs).scoreDocs;
assertEquals("nul,min,F,T", 1, result.length);
result = search.search(csrq("id",maxIP,maxIP,T,T), null, numDocs).scoreDocs;
assertEquals("max,max,T,T", 1, result.length);
result = search.search(csrq("id",maxIP,null,T,F), null, numDocs).scoreDocs;
assertEquals("max,nul,T,T", 1, result.length);
result = search.search(csrq("id",medIP,medIP,T,T), null, numDocs).scoreDocs;
assertEquals("med,med,T,T", 1, result.length);
}
public void testRangeQueryIdCollating() throws IOException {
// NOTE: uses index build in *super* setUp
IndexReader reader = IndexReader.open(signedIndex.index);
IndexSearcher search = new IndexSearcher(reader);
int medId = ((maxId - minId) / 2);
String minIP = pad(minId);
String maxIP = pad(maxId);
String medIP = pad(medId);
int numDocs = reader.numDocs();
assertEquals("num of docs", numDocs, 1+ maxId - minId);
ScoreDoc[] result;
Collator c = Collator.getInstance(Locale.ENGLISH);
// test id, bounded on both ends
result = search.search(csrq("id",minIP,maxIP,T,T,c), null, numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(csrq("id",minIP,maxIP,T,F,c), null, numDocs).scoreDocs;
assertEquals("all but last", numDocs-1, result.length);
result = search.search(csrq("id",minIP,maxIP,F,T,c), null, numDocs).scoreDocs;
assertEquals("all but first", numDocs-1, result.length);
result = search.search(csrq("id",minIP,maxIP,F,F,c), null, numDocs).scoreDocs;
assertEquals("all but ends", numDocs-2, result.length);
result = search.search(csrq("id",medIP,maxIP,T,T,c), null, numDocs).scoreDocs;
assertEquals("med and up", 1+ maxId-medId, result.length);
result = search.search(csrq("id",minIP,medIP,T,T,c), null, numDocs).scoreDocs;
assertEquals("up to med", 1+ medId-minId, result.length);
// unbounded id
result = search.search(csrq("id",minIP,null,T,F,c), null, numDocs).scoreDocs;
assertEquals("min and up", numDocs, result.length);
result = search.search(csrq("id",null,maxIP,F,T,c), null, numDocs).scoreDocs;
assertEquals("max and down", numDocs, result.length);
result = search.search(csrq("id",minIP,null,F,F,c), null, numDocs).scoreDocs;
assertEquals("not min, but up", numDocs-1, result.length);
result = search.search(csrq("id",null,maxIP,F,F,c), null, numDocs).scoreDocs;
assertEquals("not max, but down", numDocs-1, result.length);
result = search.search(csrq("id",medIP,maxIP,T,F,c), null, numDocs).scoreDocs;
assertEquals("med and up, not max", maxId-medId, result.length);
result = search.search(csrq("id",minIP,medIP,F,T,c), null, numDocs).scoreDocs;
assertEquals("not min, up to med", medId-minId, result.length);
// very small sets
result = search.search(csrq("id",minIP,minIP,F,F,c), null, numDocs).scoreDocs;
assertEquals("min,min,F,F,c", 0, result.length);
result = search.search(csrq("id",medIP,medIP,F,F,c), null, numDocs).scoreDocs;
assertEquals("med,med,F,F,c", 0, result.length);
result = search.search(csrq("id",maxIP,maxIP,F,F,c), null, numDocs).scoreDocs;
assertEquals("max,max,F,F,c", 0, result.length);
result = search.search(csrq("id",minIP,minIP,T,T,c), null, numDocs).scoreDocs;
assertEquals("min,min,T,T,c", 1, result.length);
result = search.search(csrq("id",null,minIP,F,T,c), null, numDocs).scoreDocs;
assertEquals("nul,min,F,T,c", 1, result.length);
result = search.search(csrq("id",maxIP,maxIP,T,T,c), null, numDocs).scoreDocs;
assertEquals("max,max,T,T,c", 1, result.length);
result = search.search(csrq("id",maxIP,null,T,F,c), null, numDocs).scoreDocs;
assertEquals("max,nul,T,T,c", 1, result.length);
result = search.search(csrq("id",medIP,medIP,T,T,c), null, numDocs).scoreDocs;
assertEquals("med,med,T,T,c", 1, result.length);
}
public void testRangeQueryRand() throws IOException {
// NOTE: uses index build in *super* setUp
IndexReader reader = IndexReader.open(signedIndex.index);
IndexSearcher search = new IndexSearcher(reader);
String minRP = pad(signedIndex.minR);
String maxRP = pad(signedIndex.maxR);
int numDocs = reader.numDocs();
assertEquals("num of docs", numDocs, 1+ maxId - minId);
ScoreDoc[] result;
// test extremes, bounded on both ends
result = search.search(csrq("rand",minRP,maxRP,T,T), null, numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(csrq("rand",minRP,maxRP,T,F), null, numDocs).scoreDocs;
assertEquals("all but biggest", numDocs-1, result.length);
result = search.search(csrq("rand",minRP,maxRP,F,T), null, numDocs).scoreDocs;
assertEquals("all but smallest", numDocs-1, result.length);
result = search.search(csrq("rand",minRP,maxRP,F,F), null, numDocs).scoreDocs;
assertEquals("all but extremes", numDocs-2, result.length);
// unbounded
result = search.search(csrq("rand",minRP,null,T,F), null, numDocs).scoreDocs;
assertEquals("smallest and up", numDocs, result.length);
result = search.search(csrq("rand",null,maxRP,F,T), null, numDocs).scoreDocs;
assertEquals("biggest and down", numDocs, result.length);
result = search.search(csrq("rand",minRP,null,F,F), null, numDocs).scoreDocs;
assertEquals("not smallest, but up", numDocs-1, result.length);
result = search.search(csrq("rand",null,maxRP,F,F), null, numDocs).scoreDocs;
assertEquals("not biggest, but down", numDocs-1, result.length);
// very small sets
result = search.search(csrq("rand",minRP,minRP,F,F), null, numDocs).scoreDocs;
assertEquals("min,min,F,F", 0, result.length);
result = search.search(csrq("rand",maxRP,maxRP,F,F), null, numDocs).scoreDocs;
assertEquals("max,max,F,F", 0, result.length);
result = search.search(csrq("rand",minRP,minRP,T,T), null, numDocs).scoreDocs;
assertEquals("min,min,T,T", 1, result.length);
result = search.search(csrq("rand",null,minRP,F,T), null, numDocs).scoreDocs;
assertEquals("nul,min,F,T", 1, result.length);
result = search.search(csrq("rand",maxRP,maxRP,T,T), null, numDocs).scoreDocs;
assertEquals("max,max,T,T", 1, result.length);
result = search.search(csrq("rand",maxRP,null,T,F), null, numDocs).scoreDocs;
assertEquals("max,nul,T,T", 1, result.length);
}
public void testRangeQueryRandCollating() throws IOException {
// NOTE: uses index build in *super* setUp
// using the unsigned index because collation seems to ignore hyphens
IndexReader reader = IndexReader.open(unsignedIndex.index);
IndexSearcher search = new IndexSearcher(reader);
String minRP = pad(unsignedIndex.minR);
String maxRP = pad(unsignedIndex.maxR);
int numDocs = reader.numDocs();
assertEquals("num of docs", numDocs, 1+ maxId - minId);
ScoreDoc[] result;
Collator c = Collator.getInstance(Locale.ENGLISH);
// test extremes, bounded on both ends
result = search.search(csrq("rand",minRP,maxRP,T,T,c), null, numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(csrq("rand",minRP,maxRP,T,F,c), null, numDocs).scoreDocs;
assertEquals("all but biggest", numDocs-1, result.length);
result = search.search(csrq("rand",minRP,maxRP,F,T,c), null, numDocs).scoreDocs;
assertEquals("all but smallest", numDocs-1, result.length);
result = search.search(csrq("rand",minRP,maxRP,F,F,c), null, numDocs).scoreDocs;
assertEquals("all but extremes", numDocs-2, result.length);
// unbounded
result = search.search(csrq("rand",minRP,null,T,F,c), null, numDocs).scoreDocs;
assertEquals("smallest and up", numDocs, result.length);
result = search.search(csrq("rand",null,maxRP,F,T,c), null, numDocs).scoreDocs;
assertEquals("biggest and down", numDocs, result.length);
result = search.search(csrq("rand",minRP,null,F,F,c), null, numDocs).scoreDocs;
assertEquals("not smallest, but up", numDocs-1, result.length);
result = search.search(csrq("rand",null,maxRP,F,F,c), null, numDocs).scoreDocs;
assertEquals("not biggest, but down", numDocs-1, result.length);
// very small sets
result = search.search(csrq("rand",minRP,minRP,F,F,c), null, numDocs).scoreDocs;
assertEquals("min,min,F,F,c", 0, result.length);
result = search.search(csrq("rand",maxRP,maxRP,F,F,c), null, numDocs).scoreDocs;
assertEquals("max,max,F,F,c", 0, result.length);
result = search.search(csrq("rand",minRP,minRP,T,T,c), null, numDocs).scoreDocs;
assertEquals("min,min,T,T,c", 1, result.length);
result = search.search(csrq("rand",null,minRP,F,T,c), null, numDocs).scoreDocs;
assertEquals("nul,min,F,T,c", 1, result.length);
result = search.search(csrq("rand",maxRP,maxRP,T,T,c), null, numDocs).scoreDocs;
assertEquals("max,max,T,T,c", 1, result.length);
result = search.search(csrq("rand",maxRP,null,T,F,c), null, numDocs).scoreDocs;
assertEquals("max,nul,T,T,c", 1, result.length);
}
public void testFarsi() throws Exception {
/* build an index */
RAMDirectory farsiIndex = new RAMDirectory();
IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T,
IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
doc.add(new Field("content","\u0633\u0627\u0628",
Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("body", "body",
Field.Store.YES, Field.Index.NOT_ANALYZED));
writer.addDocument(doc);
writer.optimize();
writer.close();
IndexReader reader = IndexReader.open(farsiIndex);
IndexSearcher search = new IndexSearcher(reader);
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
// characters properly.
Collator c = Collator.getInstance(new Locale("ar"));
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
// orders the U+0698 character before the U+0633 character, so the single
// index Term below should NOT be returned by a ConstantScoreRangeQuery
// with a Farsi Collator (or an Arabic one for the case when Farsi is
// not supported).
ScoreDoc[] result = search.search(csrq("content","\u062F", "\u0698", T, T, c), null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, result.length);
result = search.search(csrq("content", "\u0633", "\u0638", T, T, c), null, 1000).scoreDocs;
assertEquals("The index Term should be included.", 1, result.length);
search.close();
}
}

View File

@ -0,0 +1,567 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import java.io.IOException;
import java.text.Collator;
import java.util.Locale;
import junit.framework.Assert;
public class TestMultiTermConstantScore extends BaseTestRangeFilter {
/** threshold for comparing floats */
public static final float SCORE_COMP_THRESH = 1e-6f;
public TestMultiTermConstantScore(String name) {
super(name);
}
public TestMultiTermConstantScore() {
super();
}
Directory small;
void assertEquals(String m, float e, float a) {
assertEquals(m, e, a, SCORE_COMP_THRESH);
}
static public void assertEquals(String m, int e, int a) {
Assert.assertEquals(m, e, a);
}
public void setUp() throws Exception {
super.setUp();
String[] data = new String[] { "A 1 2 3 4 5 6", "Z 4 5 6", null,
"B 2 4 5 6", "Y 3 5 6", null, "C 3 6",
"X 4 5 6" };
small = new RAMDirectory();
IndexWriter writer = new IndexWriter(small, new WhitespaceAnalyzer(), true,
IndexWriter.MaxFieldLength.LIMITED);
for (int i = 0; i < data.length; i++) {
Document doc = new Document();
doc.add(new Field("id", String.valueOf(i), Field.Store.YES,
Field.Index.NOT_ANALYZED));// Field.Keyword("id",String.valueOf(i)));
doc
.add(new Field("all", "all", Field.Store.YES,
Field.Index.NOT_ANALYZED));// Field.Keyword("all","all"));
if (null != data[i]) {
doc.add(new Field("data", data[i], Field.Store.YES,
Field.Index.ANALYZED));// Field.Text("data",data[i]));
}
writer.addDocument(doc);
}
writer.optimize();
writer.close();
}
/** macro for readability */
public static Query csrq(String f, String l, String h, boolean il, boolean ih) {
RangeQuery query = new RangeQuery(f, l, h, il, ih);
query.setConstantScoreRewrite(true);
return query;
}
/** macro for readability */
public static Query csrq(String f, String l, String h, boolean il,
boolean ih, Collator c) {
RangeQuery query = new RangeQuery(f, l, h, il, ih, c);
query.setConstantScoreRewrite(true);
return query;
}
/** macro for readability */
public static Query cspq(Term prefix) {
PrefixQuery query = new PrefixQuery(prefix);
query.setConstantScoreRewrite(true);
return query;
}
/** macro for readability */
public static Query cswcq(Term wild) {
WildcardQuery query = new WildcardQuery(wild);
query.setConstantScoreRewrite(true);
return query;
}
public void testBasics() throws IOException {
QueryUtils.check(csrq("data", "1", "6", T, T));
QueryUtils.check(csrq("data", "A", "Z", T, T));
QueryUtils.checkUnequal(csrq("data", "1", "6", T, T), csrq("data", "A",
"Z", T, T));
QueryUtils.check(cspq(new Term("data", "p*u?")));
QueryUtils.checkUnequal(cspq(new Term("data", "pre*")), cspq(new Term(
"data", "pres*")));
QueryUtils.check(cswcq(new Term("data", "p")));
QueryUtils.checkUnequal(cswcq(new Term("data", "pre*n?t")), cswcq(new Term(
"data", "pr*t?j")));
}
public void testBasicsRngCollating() throws IOException {
Collator c = Collator.getInstance(Locale.ENGLISH);
QueryUtils.check(csrq("data", "1", "6", T, T, c));
QueryUtils.check(csrq("data", "A", "Z", T, T, c));
QueryUtils.checkUnequal(csrq("data", "1", "6", T, T, c), csrq("data", "A",
"Z", T, T, c));
}
public void testEqualScores() throws IOException {
// NOTE: uses index build in *this* setUp
IndexReader reader = IndexReader.open(small);
IndexSearcher search = new IndexSearcher(reader);
ScoreDoc[] result;
// some hits match more terms then others, score should be the same
result = search.search(csrq("data", "1", "6", T, T), null, 1000).scoreDocs;
int numHits = result.length;
assertEquals("wrong number of results", 6, numHits);
float score = result[0].score;
for (int i = 1; i < numHits; i++) {
assertEquals("score for " + i + " was not the same", score,
result[i].score);
}
}
public void testBoost() throws IOException {
// NOTE: uses index build in *this* setUp
IndexReader reader = IndexReader.open(small);
IndexSearcher search = new IndexSearcher(reader);
// test for correct application of query normalization
// must use a non score normalizing method for this.
Query q = csrq("data", "1", "6", T, T);
q.setBoost(100);
search.search(q, null, new HitCollector() {
public void collect(int doc, float score) {
assertEquals("score for doc " + doc + " was not correct", 1.0f, score);
}
});
//
// Ensure that boosting works to score one clause of a query higher
// than another.
//
Query q1 = csrq("data", "A", "A", T, T); // matches document #0
q1.setBoost(.1f);
Query q2 = csrq("data", "Z", "Z", T, T); // matches document #1
BooleanQuery bq = new BooleanQuery(true);
bq.add(q1, BooleanClause.Occur.SHOULD);
bq.add(q2, BooleanClause.Occur.SHOULD);
ScoreDoc[] hits = search.search(bq, null, 1000).scoreDocs;
assertEquals(1, hits[0].doc);
assertEquals(0, hits[1].doc);
assertTrue(hits[0].score > hits[1].score);
q1 = csrq("data", "A", "A", T, T); // matches document #0
q1.setBoost(10f);
q2 = csrq("data", "Z", "Z", T, T); // matches document #1
bq = new BooleanQuery(true);
bq.add(q1, BooleanClause.Occur.SHOULD);
bq.add(q2, BooleanClause.Occur.SHOULD);
hits = search.search(bq, null, 1000).scoreDocs;
assertEquals(0, hits[0].doc);
assertEquals(1, hits[1].doc);
assertTrue(hits[0].score > hits[1].score);
}
public void testBooleanOrderUnAffected() throws IOException {
// NOTE: uses index build in *this* setUp
IndexReader reader = IndexReader.open(small);
IndexSearcher search = new IndexSearcher(reader);
// first do a regular RangeQuery which uses term expansion so
// docs with more terms in range get higher scores
Query rq = new RangeQuery(new Term("data", "1"), new Term("data", "4"), T);
ScoreDoc[] expected = search.search(rq, null, 1000).scoreDocs;
int numHits = expected.length;
// now do a boolean where which also contains a
// ConstantScoreRangeQuery and make sure hte order is the same
BooleanQuery q = new BooleanQuery();
q.add(rq, BooleanClause.Occur.MUST);// T, F);
q.add(csrq("data", "1", "6", T, T), BooleanClause.Occur.MUST);// T, F);
ScoreDoc[] actual = search.search(q, null, 1000).scoreDocs;
assertEquals("wrong numebr of hits", numHits, actual.length);
for (int i = 0; i < numHits; i++) {
assertEquals("mismatch in docid for hit#" + i, expected[i].doc,
actual[i].doc);
}
}
public void testRangeQueryId() throws IOException {
// NOTE: uses index build in *super* setUp
IndexReader reader = IndexReader.open(signedIndex.index);
IndexSearcher search = new IndexSearcher(reader);
int medId = ((maxId - minId) / 2);
String minIP = pad(minId);
String maxIP = pad(maxId);
String medIP = pad(medId);
int numDocs = reader.numDocs();
assertEquals("num of docs", numDocs, 1 + maxId - minId);
ScoreDoc[] result;
// test id, bounded on both ends
result = search.search(csrq("id", minIP, maxIP, T, T), null, numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(csrq("id", minIP, maxIP, T, F), null, numDocs).scoreDocs;
assertEquals("all but last", numDocs - 1, result.length);
result = search.search(csrq("id", minIP, maxIP, F, T), null, numDocs).scoreDocs;
assertEquals("all but first", numDocs - 1, result.length);
result = search.search(csrq("id", minIP, maxIP, F, F), null, numDocs).scoreDocs;
assertEquals("all but ends", numDocs - 2, result.length);
result = search.search(csrq("id", medIP, maxIP, T, T), null, numDocs).scoreDocs;
assertEquals("med and up", 1 + maxId - medId, result.length);
result = search.search(csrq("id", minIP, medIP, T, T), null, numDocs).scoreDocs;
assertEquals("up to med", 1 + medId - minId, result.length);
// unbounded id
result = search.search(csrq("id", minIP, null, T, F), null, numDocs).scoreDocs;
assertEquals("min and up", numDocs, result.length);
result = search.search(csrq("id", null, maxIP, F, T), null, numDocs).scoreDocs;
assertEquals("max and down", numDocs, result.length);
result = search.search(csrq("id", minIP, null, F, F), null, numDocs).scoreDocs;
assertEquals("not min, but up", numDocs - 1, result.length);
result = search.search(csrq("id", null, maxIP, F, F), null, numDocs).scoreDocs;
assertEquals("not max, but down", numDocs - 1, result.length);
result = search.search(csrq("id", medIP, maxIP, T, F), null, numDocs).scoreDocs;
assertEquals("med and up, not max", maxId - medId, result.length);
result = search.search(csrq("id", minIP, medIP, F, T), null, numDocs).scoreDocs;
assertEquals("not min, up to med", medId - minId, result.length);
// very small sets
result = search.search(csrq("id", minIP, minIP, F, F), null, numDocs).scoreDocs;
assertEquals("min,min,F,F", 0, result.length);
result = search.search(csrq("id", medIP, medIP, F, F), null, numDocs).scoreDocs;
assertEquals("med,med,F,F", 0, result.length);
result = search.search(csrq("id", maxIP, maxIP, F, F), null, numDocs).scoreDocs;
assertEquals("max,max,F,F", 0, result.length);
result = search.search(csrq("id", minIP, minIP, T, T), null, numDocs).scoreDocs;
assertEquals("min,min,T,T", 1, result.length);
result = search.search(csrq("id", null, minIP, F, T), null, numDocs).scoreDocs;
assertEquals("nul,min,F,T", 1, result.length);
result = search.search(csrq("id", maxIP, maxIP, T, T), null, numDocs).scoreDocs;
assertEquals("max,max,T,T", 1, result.length);
result = search.search(csrq("id", maxIP, null, T, F), null, numDocs).scoreDocs;
assertEquals("max,nul,T,T", 1, result.length);
result = search.search(csrq("id", medIP, medIP, T, T), null, numDocs).scoreDocs;
assertEquals("med,med,T,T", 1, result.length);
}
public void testRangeQueryIdCollating() throws IOException {
// NOTE: uses index build in *super* setUp
IndexReader reader = IndexReader.open(signedIndex.index);
IndexSearcher search = new IndexSearcher(reader);
int medId = ((maxId - minId) / 2);
String minIP = pad(minId);
String maxIP = pad(maxId);
String medIP = pad(medId);
int numDocs = reader.numDocs();
assertEquals("num of docs", numDocs, 1 + maxId - minId);
ScoreDoc[] result;
Collator c = Collator.getInstance(Locale.ENGLISH);
// test id, bounded on both ends
result = search.search(csrq("id", minIP, maxIP, T, T, c), null, numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(csrq("id", minIP, maxIP, T, F, c), null, numDocs).scoreDocs;
assertEquals("all but last", numDocs - 1, result.length);
result = search.search(csrq("id", minIP, maxIP, F, T, c), null, numDocs).scoreDocs;
assertEquals("all but first", numDocs - 1, result.length);
result = search.search(csrq("id", minIP, maxIP, F, F, c), null, numDocs).scoreDocs;
assertEquals("all but ends", numDocs - 2, result.length);
result = search.search(csrq("id", medIP, maxIP, T, T, c), null, numDocs).scoreDocs;
assertEquals("med and up", 1 + maxId - medId, result.length);
result = search.search(csrq("id", minIP, medIP, T, T, c), null, numDocs).scoreDocs;
assertEquals("up to med", 1 + medId - minId, result.length);
// unbounded id
result = search.search(csrq("id", minIP, null, T, F, c), null, numDocs).scoreDocs;
assertEquals("min and up", numDocs, result.length);
result = search.search(csrq("id", null, maxIP, F, T, c), null, numDocs).scoreDocs;
assertEquals("max and down", numDocs, result.length);
result = search.search(csrq("id", minIP, null, F, F, c), null, numDocs).scoreDocs;
assertEquals("not min, but up", numDocs - 1, result.length);
result = search.search(csrq("id", null, maxIP, F, F, c), null, numDocs).scoreDocs;
assertEquals("not max, but down", numDocs - 1, result.length);
result = search.search(csrq("id", medIP, maxIP, T, F, c), null, numDocs).scoreDocs;
assertEquals("med and up, not max", maxId - medId, result.length);
result = search.search(csrq("id", minIP, medIP, F, T, c), null, numDocs).scoreDocs;
assertEquals("not min, up to med", medId - minId, result.length);
// very small sets
result = search.search(csrq("id", minIP, minIP, F, F, c), null, numDocs).scoreDocs;
assertEquals("min,min,F,F,c", 0, result.length);
result = search.search(csrq("id", medIP, medIP, F, F, c), null, numDocs).scoreDocs;
assertEquals("med,med,F,F,c", 0, result.length);
result = search.search(csrq("id", maxIP, maxIP, F, F, c), null, numDocs).scoreDocs;
assertEquals("max,max,F,F,c", 0, result.length);
result = search.search(csrq("id", minIP, minIP, T, T, c), null, numDocs).scoreDocs;
assertEquals("min,min,T,T,c", 1, result.length);
result = search.search(csrq("id", null, minIP, F, T, c), null, numDocs).scoreDocs;
assertEquals("nul,min,F,T,c", 1, result.length);
result = search.search(csrq("id", maxIP, maxIP, T, T, c), null, numDocs).scoreDocs;
assertEquals("max,max,T,T,c", 1, result.length);
result = search.search(csrq("id", maxIP, null, T, F, c), null, numDocs).scoreDocs;
assertEquals("max,nul,T,T,c", 1, result.length);
result = search.search(csrq("id", medIP, medIP, T, T, c), null, numDocs).scoreDocs;
assertEquals("med,med,T,T,c", 1, result.length);
}
public void testRangeQueryRand() throws IOException {
// NOTE: uses index build in *super* setUp
IndexReader reader = IndexReader.open(signedIndex.index);
IndexSearcher search = new IndexSearcher(reader);
String minRP = pad(signedIndex.minR);
String maxRP = pad(signedIndex.maxR);
int numDocs = reader.numDocs();
assertEquals("num of docs", numDocs, 1 + maxId - minId);
ScoreDoc[] result;
// test extremes, bounded on both ends
result = search.search(csrq("rand", minRP, maxRP, T, T), null, numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(csrq("rand", minRP, maxRP, T, F), null, numDocs).scoreDocs;
assertEquals("all but biggest", numDocs - 1, result.length);
result = search.search(csrq("rand", minRP, maxRP, F, T), null, numDocs).scoreDocs;
assertEquals("all but smallest", numDocs - 1, result.length);
result = search.search(csrq("rand", minRP, maxRP, F, F), null, numDocs).scoreDocs;
assertEquals("all but extremes", numDocs - 2, result.length);
// unbounded
result = search.search(csrq("rand", minRP, null, T, F), null, numDocs).scoreDocs;
assertEquals("smallest and up", numDocs, result.length);
result = search.search(csrq("rand", null, maxRP, F, T), null, numDocs).scoreDocs;
assertEquals("biggest and down", numDocs, result.length);
result = search.search(csrq("rand", minRP, null, F, F), null, numDocs).scoreDocs;
assertEquals("not smallest, but up", numDocs - 1, result.length);
result = search.search(csrq("rand", null, maxRP, F, F), null, numDocs).scoreDocs;
assertEquals("not biggest, but down", numDocs - 1, result.length);
// very small sets
result = search.search(csrq("rand", minRP, minRP, F, F), null, numDocs).scoreDocs;
assertEquals("min,min,F,F", 0, result.length);
result = search.search(csrq("rand", maxRP, maxRP, F, F), null, numDocs).scoreDocs;
assertEquals("max,max,F,F", 0, result.length);
result = search.search(csrq("rand", minRP, minRP, T, T), null, numDocs).scoreDocs;
assertEquals("min,min,T,T", 1, result.length);
result = search.search(csrq("rand", null, minRP, F, T), null, numDocs).scoreDocs;
assertEquals("nul,min,F,T", 1, result.length);
result = search.search(csrq("rand", maxRP, maxRP, T, T), null, numDocs).scoreDocs;
assertEquals("max,max,T,T", 1, result.length);
result = search.search(csrq("rand", maxRP, null, T, F), null, numDocs).scoreDocs;
assertEquals("max,nul,T,T", 1, result.length);
}
public void testRangeQueryRandCollating() throws IOException {
// NOTE: uses index build in *super* setUp
// using the unsigned index because collation seems to ignore hyphens
IndexReader reader = IndexReader.open(unsignedIndex.index);
IndexSearcher search = new IndexSearcher(reader);
String minRP = pad(unsignedIndex.minR);
String maxRP = pad(unsignedIndex.maxR);
int numDocs = reader.numDocs();
assertEquals("num of docs", numDocs, 1 + maxId - minId);
ScoreDoc[] result;
Collator c = Collator.getInstance(Locale.ENGLISH);
// test extremes, bounded on both ends
result = search.search(csrq("rand", minRP, maxRP, T, T, c), null, numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(csrq("rand", minRP, maxRP, T, F, c), null, numDocs).scoreDocs;
assertEquals("all but biggest", numDocs - 1, result.length);
result = search.search(csrq("rand", minRP, maxRP, F, T, c), null, numDocs).scoreDocs;
assertEquals("all but smallest", numDocs - 1, result.length);
result = search.search(csrq("rand", minRP, maxRP, F, F, c), null, numDocs).scoreDocs;
assertEquals("all but extremes", numDocs - 2, result.length);
// unbounded
result = search.search(csrq("rand", minRP, null, T, F, c), null, numDocs).scoreDocs;
assertEquals("smallest and up", numDocs, result.length);
result = search.search(csrq("rand", null, maxRP, F, T, c), null, numDocs).scoreDocs;
assertEquals("biggest and down", numDocs, result.length);
result = search.search(csrq("rand", minRP, null, F, F, c), null, numDocs).scoreDocs;
assertEquals("not smallest, but up", numDocs - 1, result.length);
result = search.search(csrq("rand", null, maxRP, F, F, c), null, numDocs).scoreDocs;
assertEquals("not biggest, but down", numDocs - 1, result.length);
// very small sets
result = search.search(csrq("rand", minRP, minRP, F, F, c), null, numDocs).scoreDocs;
assertEquals("min,min,F,F,c", 0, result.length);
result = search.search(csrq("rand", maxRP, maxRP, F, F, c), null, numDocs).scoreDocs;
assertEquals("max,max,F,F,c", 0, result.length);
result = search.search(csrq("rand", minRP, minRP, T, T, c), null, numDocs).scoreDocs;
assertEquals("min,min,T,T,c", 1, result.length);
result = search.search(csrq("rand", null, minRP, F, T, c), null, numDocs).scoreDocs;
assertEquals("nul,min,F,T,c", 1, result.length);
result = search.search(csrq("rand", maxRP, maxRP, T, T, c), null, numDocs).scoreDocs;
assertEquals("max,max,T,T,c", 1, result.length);
result = search.search(csrq("rand", maxRP, null, T, F, c), null, numDocs).scoreDocs;
assertEquals("max,nul,T,T,c", 1, result.length);
}
public void testFarsi() throws Exception {
/* build an index */
RAMDirectory farsiIndex = new RAMDirectory();
IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T,
IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc
.add(new Field("body", "body", Field.Store.YES,
Field.Index.NOT_ANALYZED));
writer.addDocument(doc);
writer.optimize();
writer.close();
IndexReader reader = IndexReader.open(farsiIndex);
IndexSearcher search = new IndexSearcher(reader);
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
// characters properly.
Collator c = Collator.getInstance(new Locale("ar"));
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
// orders the U+0698 character before the U+0633 character, so the single
// index Term below should NOT be returned by a ConstantScoreRangeQuery
// with a Farsi Collator (or an Arabic one for the case when Farsi is
// not supported).
ScoreDoc[] result = search.search(csrq("content", "\u062F", "\u0698", T, T,
c), null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, result.length);
result = search.search(csrq("content", "\u0633", "\u0638", T, T, c), null,
1000).scoreDocs;
assertEquals("The index Term should be included.", 1, result.length);
search.close();
}
}

View File

@ -21,7 +21,6 @@ import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
@ -41,9 +40,7 @@ public class TestRangeQuery extends LuceneTestCase {
}
public void testExclusive() throws Exception {
Query query = new RangeQuery(new Term("content", "A"),
new Term("content", "C"),
false);
Query query = new RangeQuery("content", "A", "C", false, false);
initializeIndex(new String[] {"A", "B", "C", "D"});
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
@ -64,9 +61,7 @@ public class TestRangeQuery extends LuceneTestCase {
}
public void testInclusive() throws Exception {
Query query = new RangeQuery(new Term("content", "A"),
new Term("content", "C"),
true);
Query query = new RangeQuery("content", "A", "C", true, true);
initializeIndex(new String[]{"A", "B", "C", "D"});
IndexSearcher searcher = new IndexSearcher(dir);
@ -88,13 +83,10 @@ public class TestRangeQuery extends LuceneTestCase {
}
public void testEqualsHashcode() {
Query query = new RangeQuery(new Term("content", "A"),
new Term("content", "C"),
true);
Query query = new RangeQuery("content", "A", "C", true, true);
query.setBoost(1.0f);
Query other = new RangeQuery(new Term("content", "A"),
new Term("content", "C"),
true);
Query other = new RangeQuery("content", "A", "C", true, true);
other.setBoost(1.0f);
assertEquals("query equals itself is true", query, query);
@ -104,38 +96,36 @@ public class TestRangeQuery extends LuceneTestCase {
other.setBoost(2.0f);
assertFalse("Different boost queries are not equal", query.equals(other));
other = new RangeQuery(new Term("notcontent", "A"), new Term("notcontent", "C"), true);
other = new RangeQuery("notcontent", "A", "C", true, true);
assertFalse("Different fields are not equal", query.equals(other));
other = new RangeQuery(new Term("content", "X"), new Term("content", "C"), true);
other = new RangeQuery("content", "X", "C", true, true);
assertFalse("Different lower terms are not equal", query.equals(other));
other = new RangeQuery(new Term("content", "A"), new Term("content", "Z"), true);
other = new RangeQuery("content", "A", "Z", true, true);
assertFalse("Different upper terms are not equal", query.equals(other));
query = new RangeQuery(null, new Term("content", "C"), true);
other = new RangeQuery(null, new Term("content", "C"), true);
query = new RangeQuery("content", null, "C", true, true);
other = new RangeQuery("content", null, "C", true, true);
assertEquals("equivalent queries with null lowerterms are equal()", query, other);
assertEquals("hashcode must return same value when equals is true", query.hashCode(), other.hashCode());
query = new RangeQuery(new Term("content", "C"), null, true);
other = new RangeQuery(new Term("content", "C"), null, true);
query = new RangeQuery("content", "C", null, true, true);
other = new RangeQuery("content", "C", null, true, true);
assertEquals("equivalent queries with null upperterms are equal()", query, other);
assertEquals("hashcode returns same value", query.hashCode(), other.hashCode());
query = new RangeQuery(null, new Term("content", "C"), true);
other = new RangeQuery(new Term("content", "C"), null, true);
query = new RangeQuery("content", null, "C", true, true);
other = new RangeQuery("content", "C", null, true, true);
assertFalse("queries with different upper and lower terms are not equal", query.equals(other));
query = new RangeQuery(new Term("content", "A"), new Term("content", "C"), false);
other = new RangeQuery(new Term("content", "A"), new Term("content", "C"), true);
query = new RangeQuery("content", "A", "C", false, false);
other = new RangeQuery("content", "A", "C", true, true);
assertFalse("queries with different inclusive are not equal", query.equals(other));
}
public void testExclusiveCollating() throws Exception {
Query query = new RangeQuery(new Term("content", "A"),
new Term("content", "C"),
false, Collator.getInstance(Locale.ENGLISH));
Query query = new RangeQuery("content", "A", "C", false, false, Collator.getInstance(Locale.ENGLISH));
initializeIndex(new String[] {"A", "B", "C", "D"});
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
@ -156,9 +146,7 @@ public class TestRangeQuery extends LuceneTestCase {
}
public void testInclusiveCollating() throws Exception {
Query query = new RangeQuery(new Term("content", "A"),
new Term("content", "C"),
true, Collator.getInstance(Locale.ENGLISH));
Query query = new RangeQuery("content", "A", "C",true, true, Collator.getInstance(Locale.ENGLISH));
initializeIndex(new String[]{"A", "B", "C", "D"});
IndexSearcher searcher = new IndexSearcher(dir);
@ -184,9 +172,7 @@ public class TestRangeQuery extends LuceneTestCase {
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
// characters properly.
Collator collator = Collator.getInstance(new Locale("ar"));
Query query = new RangeQuery(new Term("content", "\u062F"),
new Term("content", "\u0698"),
true, collator);
Query query = new RangeQuery("content", "\u062F", "\u0698", true, true, collator);
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
// orders the U+0698 character before the U+0633 character, so the single
// index Term below should NOT be returned by a RangeQuery with a Farsi
@ -196,9 +182,7 @@ public class TestRangeQuery extends LuceneTestCase {
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, hits.length);
query = new RangeQuery(new Term("content", "\u0633"),
new Term("content", "\u0638"),
true, collator);
query = new RangeQuery("content", "\u0633", "\u0638",true, true, collator);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("The index Term should be included.", 1, hits.length);
searcher.close();