LUCENE-1279: Add support for Collator to RangeFilter, etc.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@696056 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2008-09-16 21:03:21 +00:00
parent 31811e9f45
commit a8c0a8a810
17 changed files with 1177 additions and 285 deletions

View File

@ -283,6 +283,8 @@ New features
19. LUCENE-1354: Provide programmatic access to CheckIndex (Grant Ingersoll, Mike McCandless)
20. LUCENE-1279: Add support for Collators to RangeFilter/Query and Query Parser. (Steve Rowe via Grant Ingersoll)
Optimizations
1. LUCENE-705: When building a compound file, use

View File

@ -1,4 +1,5 @@
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.0 */
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */
/* JavaCCOptions:STATIC=false */
package org.apache.lucene.queryParser;
/**
@ -27,14 +28,14 @@ public interface CharStream {
/**
* Returns the column position of the character last read.
* @deprecated
* @deprecated
* @see #getEndColumn
*/
int getColumn();
/**
* Returns the line number of the character last read.
* @deprecated
* @deprecated
* @see #getEndLine
*/
int getLine();
@ -79,7 +80,7 @@ public interface CharStream {
char BeginToken() throws java.io.IOException;
/**
* Returns a string made up of characters from the marked token beginning
* Returns a string made up of characters from the marked token beginning
* to the current buffer position. Implementations have the choice of returning
* anything that they want to. For example, for efficiency, one might decide
* to just return null, which is a valid implementation.
@ -108,3 +109,4 @@ public interface CharStream {
void Done();
}
/* JavaCC - OriginalChecksum=32a89423891f765dde472f7ef0e3ef7b (do not edit this line) */

View File

@ -1,4 +1,5 @@
/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 3.0 */
/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 4.1 */
/* JavaCCOptions:KEEP_LINE_COL=null */
package org.apache.lucene.queryParser;
/**
@ -51,6 +52,7 @@ public class ParseException extends Exception {
specialConstructor = false;
}
/** Constructor with message. */
public ParseException(String message) {
super(message);
specialConstructor = false;
@ -105,7 +107,7 @@ public class ParseException extends Exception {
maxSize = expectedTokenSequences[i].length;
}
for (int j = 0; j < expectedTokenSequences[i].length; j++) {
expected.append(tokenImage[expectedTokenSequences[i][j]]).append(" ");
expected.append(tokenImage[expectedTokenSequences[i][j]]).append(' ');
}
if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) {
expected.append("...");
@ -120,8 +122,11 @@ public class ParseException extends Exception {
retval += tokenImage[0];
break;
}
retval += " " + tokenImage[tok.kind];
retval += " \"";
retval += add_escapes(tok.image);
tok = tok.next;
retval += " \"";
tok = tok.next;
}
retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn;
retval += "." + eol;
@ -138,7 +143,7 @@ public class ParseException extends Exception {
* The end of line string for this machine.
*/
protected String eol = System.getProperty("line.separator", "\n");
/**
* Used to convert raw characters to their escaped version
* when these raw version cannot be used as part of an ASCII
@ -190,3 +195,4 @@ public class ParseException extends Exception {
}
}
/* JavaCC - OriginalChecksum=c7631a240f7446940695eac31d9483ca (do not edit this line) */

View File

@ -4,6 +4,7 @@ package org.apache.lucene.queryParser;
import java.io.IOException;
import java.io.StringReader;
import java.text.DateFormat;
import java.text.Collator;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
@ -132,6 +133,10 @@ public class QueryParser implements QueryParserConstants {
// maps field names to date resolutions
Map fieldToDateResolution = null;
// The collator to use when determining range inclusion,
// for use when constructing RangeQuerys and ConstantScoreRangeQuerys.
Collator rangeCollator = null;
/** The default operator for parsing queries.
* Use {@link QueryParser#setDefaultOperator} to change it.
*/
@ -408,6 +413,35 @@ public class QueryParser implements QueryParserConstants {
return resolution;
}
/**
* Sets the collator used to determine index term inclusion in ranges
* specified either for ConstantScoreRangeQuerys or RangeQuerys (if
* {@link #setUseOldRangeQuery(boolean)} is called with a <code>true</code>
* value.)
* <p/>
* <strong>WARNING:</strong> Setting the rangeCollator to a non-null
* collator using this method will cause every single index Term in the
* Field referenced by lowerTerm and/or upperTerm to be examined.
* Depending on the number of index Terms in this Field, the operation could
* be very slow.
*
* @param rc the collator to use when constructing RangeQuerys
* and ConstantScoreRangeQuerys
*/
public void setRangeCollator(Collator rc) {
rangeCollator = rc;
}
/**
* @return the collator used to determine index term inclusion in ranges
* specified either for ConstantScoreRangeQuerys or RangeQuerys (if
* {@link #setUseOldRangeQuery(boolean)} is called with a <code>true</code>
* value.)
*/
public Collator getRangeCollator() {
return rangeCollator;
}
/**
* @deprecated use {@link #addClause(List, int, int, Query)} instead.
*/
@ -711,11 +745,12 @@ public class QueryParser implements QueryParserConstants {
{
return new RangeQuery(new Term(field, part1),
new Term(field, part2),
inclusive);
inclusive, rangeCollator);
}
else
{
return new ConstantScoreRangeQuery(field,part1,part2,inclusive,inclusive);
return new ConstantScoreRangeQuery
(field, part1, part2, inclusive, inclusive, rangeCollator);
}
}
@ -1448,26 +1483,26 @@ public class QueryParser implements QueryParserConstants {
throw new Error("Missing return statement in function");
}
final private boolean jj_2_1(int xla) {
private boolean jj_2_1(int xla) {
jj_la = xla; jj_lastpos = jj_scanpos = token;
try { return !jj_3_1(); }
catch(LookaheadSuccess ls) { return true; }
finally { jj_save(0, xla); }
}
final private boolean jj_3R_3() {
private boolean jj_3R_3() {
if (jj_scan_token(STAR)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
final private boolean jj_3R_2() {
private boolean jj_3R_2() {
if (jj_scan_token(TERM)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
final private boolean jj_3_1() {
private boolean jj_3_1() {
Token xsp;
xsp = jj_scanpos;
if (jj_3R_2()) {
@ -1477,31 +1512,34 @@ public class QueryParser implements QueryParserConstants {
return false;
}
/** Generated Token Manager. */
public QueryParserTokenManager token_source;
public Token token, jj_nt;
/** Current token. */
public Token token;
/** Next token. */
public Token jj_nt;
private int jj_ntk;
private Token jj_scanpos, jj_lastpos;
private int jj_la;
public boolean lookingAhead = false;
private boolean jj_semLA;
private int jj_gen;
final private int[] jj_la1 = new int[23];
static private int[] jj_la1_0;
static private int[] jj_la1_1;
static {
jj_la1_0();
jj_la1_1();
jj_la1_init_0();
jj_la1_init_1();
}
private static void jj_la1_0() {
private static void jj_la1_init_0() {
jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0x3ed3f00,0x90000,0x20000,0x3ed2000,0x2690000,0x100000,0x100000,0x20000,0x30000000,0x4000000,0x30000000,0x20000,0x0,0x40000000,0x0,0x20000,0x100000,0x20000,0x3ed0000,};
}
private static void jj_la1_1() {
private static void jj_la1_init_1() {
jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3,0x0,0x3,0x0,0x0,0x0,0x0,};
}
final private JJCalls[] jj_2_rtns = new JJCalls[1];
private boolean jj_rescan = false;
private int jj_gc = 0;
/** Constructor with user supplied CharStream. */
public QueryParser(CharStream stream) {
token_source = new QueryParserTokenManager(stream);
token = new Token();
@ -1511,6 +1549,7 @@ public class QueryParser implements QueryParserConstants {
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
/** Reinitialise. */
public void ReInit(CharStream stream) {
token_source.ReInit(stream);
token = new Token();
@ -1520,6 +1559,7 @@ public class QueryParser implements QueryParserConstants {
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
/** Constructor with generated Token Manager. */
public QueryParser(QueryParserTokenManager tm) {
token_source = tm;
token = new Token();
@ -1529,6 +1569,7 @@ public class QueryParser implements QueryParserConstants {
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
/** Reinitialise. */
public void ReInit(QueryParserTokenManager tm) {
token_source = tm;
token = new Token();
@ -1538,7 +1579,7 @@ public class QueryParser implements QueryParserConstants {
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
}
final private Token jj_consume_token(int kind) throws ParseException {
private Token jj_consume_token(int kind) throws ParseException {
Token oldToken;
if ((oldToken = token).next != null) token = token.next;
else token = token.next = token_source.getNextToken();
@ -1564,7 +1605,7 @@ public class QueryParser implements QueryParserConstants {
static private final class LookaheadSuccess extends java.lang.Error { }
final private LookaheadSuccess jj_ls = new LookaheadSuccess();
final private boolean jj_scan_token(int kind) {
private boolean jj_scan_token(int kind) {
if (jj_scanpos == jj_lastpos) {
jj_la--;
if (jj_scanpos.next == null) {
@ -1585,6 +1626,8 @@ public class QueryParser implements QueryParserConstants {
return false;
}
/** Get the next Token. */
final public Token getNextToken() {
if (token.next != null) token = token.next;
else token = token.next = token_source.getNextToken();
@ -1593,8 +1636,9 @@ public class QueryParser implements QueryParserConstants {
return token;
}
/** Get the specific Token. */
final public Token getToken(int index) {
Token t = lookingAhead ? jj_scanpos : token;
Token t = token;
for (int i = 0; i < index; i++) {
if (t.next != null) t = t.next;
else t = t.next = token_source.getNextToken();
@ -1602,14 +1646,14 @@ public class QueryParser implements QueryParserConstants {
return t;
}
final private int jj_ntk() {
private int jj_ntk() {
if ((jj_nt=token.next) == null)
return (jj_ntk = (token.next=token_source.getNextToken()).kind);
else
return (jj_ntk = jj_nt.kind);
}
private java.util.Vector jj_expentries = new java.util.Vector();
private java.util.List jj_expentries = new java.util.ArrayList();
private int[] jj_expentry;
private int jj_kind = -1;
private int[] jj_lasttokens = new int[100];
@ -1624,31 +1668,26 @@ public class QueryParser implements QueryParserConstants {
for (int i = 0; i < jj_endpos; i++) {
jj_expentry[i] = jj_lasttokens[i];
}
boolean exists = false;
for (java.util.Enumeration e = jj_expentries.elements(); e.hasMoreElements();) {
int[] oldentry = (int[])(e.nextElement());
jj_entries_loop: for (java.util.Iterator it = jj_expentries.iterator(); it.hasNext();) {
int[] oldentry = (int[])(it.next());
if (oldentry.length == jj_expentry.length) {
exists = true;
for (int i = 0; i < jj_expentry.length; i++) {
if (oldentry[i] != jj_expentry[i]) {
exists = false;
break;
continue jj_entries_loop;
}
}
if (exists) break;
jj_expentries.add(jj_expentry);
break jj_entries_loop;
}
}
if (!exists) jj_expentries.addElement(jj_expentry);
if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind;
}
}
/** Generate ParseException. */
public ParseException generateParseException() {
jj_expentries.removeAllElements();
jj_expentries.clear();
boolean[] la1tokens = new boolean[34];
for (int i = 0; i < 34; i++) {
la1tokens[i] = false;
}
if (jj_kind >= 0) {
la1tokens[jj_kind] = true;
jj_kind = -1;
@ -1669,7 +1708,7 @@ public class QueryParser implements QueryParserConstants {
if (la1tokens[i]) {
jj_expentry = new int[1];
jj_expentry[0] = i;
jj_expentries.addElement(jj_expentry);
jj_expentries.add(jj_expentry);
}
}
jj_endpos = 0;
@ -1677,18 +1716,20 @@ public class QueryParser implements QueryParserConstants {
jj_add_error_token(0, 0);
int[][] exptokseq = new int[jj_expentries.size()][];
for (int i = 0; i < jj_expentries.size(); i++) {
exptokseq[i] = (int[])jj_expentries.elementAt(i);
exptokseq[i] = (int[])jj_expentries.get(i);
}
return new ParseException(token, exptokseq, tokenImage);
}
/** Enable tracing. */
final public void enable_tracing() {
}
/** Disable tracing. */
final public void disable_tracing() {
}
final private void jj_rescan_token() {
private void jj_rescan_token() {
jj_rescan = true;
for (int i = 0; i < 1; i++) {
try {
@ -1707,7 +1748,7 @@ public class QueryParser implements QueryParserConstants {
jj_rescan = false;
}
final private void jj_save(int index, int xla) {
private void jj_save(int index, int xla) {
JJCalls p = jj_2_rtns[index];
while (p.gen > jj_gen) {
if (p.next == null) { p = p.next = new JJCalls(); break; }

View File

@ -28,6 +28,7 @@ package org.apache.lucene.queryParser;
import java.io.IOException;
import java.io.StringReader;
import java.text.DateFormat;
import java.text.Collator;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
@ -159,6 +160,10 @@ public class QueryParser {
// maps field names to date resolutions
Map fieldToDateResolution = null;
// The collator to use when determining range inclusion,
// for use when constructing RangeQuerys and ConstantScoreRangeQuerys.
Collator rangeCollator = null;
/** The default operator for parsing queries.
* Use {@link QueryParser#setDefaultOperator} to change it.
*/
@ -434,6 +439,35 @@ public class QueryParser {
return resolution;
}
/**
* Sets the collator used to determine index term inclusion in ranges
* specified either for ConstantScoreRangeQuerys or RangeQuerys (if
* {@link #setUseOldRangeQuery(boolean)} is called with a <code>true</code>
* value.)
* <p/>
* <strong>WARNING:</strong> Setting the rangeCollator to a non-null
* collator using this method will cause every single index Term in the
* Field referenced by lowerTerm and/or upperTerm to be examined.
* Depending on the number of index Terms in this Field, the operation could
* be very slow.
*
* @param rc the collator to use when constructing RangeQuerys
* and ConstantScoreRangeQuerys
*/
public void setRangeCollator(Collator rc) {
rangeCollator = rc;
}
/**
* @return the collator used to determine index term inclusion in ranges
* specified either for ConstantScoreRangeQuerys or RangeQuerys (if
* {@link #setUseOldRangeQuery(boolean)} is called with a <code>true</code>
* value.)
*/
public Collator getRangeCollator() {
return rangeCollator;
}
/**
* @deprecated use {@link #addClause(List, int, int, Query)} instead.
@ -738,11 +772,12 @@ public class QueryParser {
{
return new RangeQuery(new Term(field, part1),
new Term(field, part2),
inclusive);
inclusive, rangeCollator);
}
else
{
return new ConstantScoreRangeQuery(field,part1,part2,inclusive,inclusive);
return new ConstantScoreRangeQuery
(field, part1, part2, inclusive, inclusive, rangeCollator);
}
}

View File

@ -1,47 +1,90 @@
/* Generated By:JavaCC: Do not edit this line. QueryParserConstants.java */
package org.apache.lucene.queryParser;
/**
* Token literal values and constants.
* Generated by org.javacc.parser.OtherFilesGen#start()
*/
public interface QueryParserConstants {
/** End of File. */
int EOF = 0;
/** RegularExpression Id. */
int _NUM_CHAR = 1;
/** RegularExpression Id. */
int _ESCAPED_CHAR = 2;
/** RegularExpression Id. */
int _TERM_START_CHAR = 3;
/** RegularExpression Id. */
int _TERM_CHAR = 4;
/** RegularExpression Id. */
int _WHITESPACE = 5;
/** RegularExpression Id. */
int _QUOTED_CHAR = 6;
/** RegularExpression Id. */
int AND = 8;
/** RegularExpression Id. */
int OR = 9;
/** RegularExpression Id. */
int NOT = 10;
/** RegularExpression Id. */
int PLUS = 11;
/** RegularExpression Id. */
int MINUS = 12;
/** RegularExpression Id. */
int LPAREN = 13;
/** RegularExpression Id. */
int RPAREN = 14;
/** RegularExpression Id. */
int COLON = 15;
/** RegularExpression Id. */
int STAR = 16;
/** RegularExpression Id. */
int CARAT = 17;
/** RegularExpression Id. */
int QUOTED = 18;
/** RegularExpression Id. */
int TERM = 19;
/** RegularExpression Id. */
int FUZZY_SLOP = 20;
/** RegularExpression Id. */
int PREFIXTERM = 21;
/** RegularExpression Id. */
int WILDTERM = 22;
/** RegularExpression Id. */
int RANGEIN_START = 23;
/** RegularExpression Id. */
int RANGEEX_START = 24;
/** RegularExpression Id. */
int NUMBER = 25;
/** RegularExpression Id. */
int RANGEIN_TO = 26;
/** RegularExpression Id. */
int RANGEIN_END = 27;
/** RegularExpression Id. */
int RANGEIN_QUOTED = 28;
/** RegularExpression Id. */
int RANGEIN_GOOP = 29;
/** RegularExpression Id. */
int RANGEEX_TO = 30;
/** RegularExpression Id. */
int RANGEEX_END = 31;
/** RegularExpression Id. */
int RANGEEX_QUOTED = 32;
/** RegularExpression Id. */
int RANGEEX_GOOP = 33;
/** Lexical state. */
int Boost = 0;
/** Lexical state. */
int RangeEx = 1;
/** Lexical state. */
int RangeIn = 2;
/** Lexical state. */
int DEFAULT = 3;
/** Literal token values. */
String[] tokenImage = {
"<EOF>",
"<_NUM_CHAR>",

View File

@ -3,6 +3,7 @@ package org.apache.lucene.queryParser;
import java.io.IOException;
import java.io.StringReader;
import java.text.DateFormat;
import java.text.Collator;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
@ -30,9 +31,13 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.Parameter;
/** Token Manager. */
public class QueryParserTokenManager implements QueryParserConstants
{
/** Debug output. */
public java.io.PrintStream debugStream = System.out;
/** Set debug output. */
public void setDebugStream(java.io.PrintStream ds) { debugStream = ds; }
private final int jjStopStringLiteralDfa_3(int pos, long active0)
{
@ -46,21 +51,13 @@ private final int jjStartNfa_3(int pos, long active0)
{
return jjMoveNfa_3(jjStopStringLiteralDfa_3(pos, active0), pos + 1);
}
private final int jjStopAtPos(int pos, int kind)
private int jjStopAtPos(int pos, int kind)
{
jjmatchedKind = kind;
jjmatchedPos = pos;
return pos + 1;
}
private final int jjStartNfaWithStates_3(int pos, int kind, int state)
{
jjmatchedKind = kind;
jjmatchedPos = pos;
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) { return pos + 1; }
return jjMoveNfa_3(state, pos + 1);
}
private final int jjMoveStringLiteralDfa0_3()
private int jjMoveStringLiteralDfa0_3()
{
switch(curChar)
{
@ -86,35 +83,13 @@ private final int jjMoveStringLiteralDfa0_3()
return jjMoveNfa_3(0, 0);
}
}
private final void jjCheckNAdd(int state)
private int jjStartNfaWithStates_3(int pos, int kind, int state)
{
if (jjrounds[state] != jjround)
{
jjstateSet[jjnewStateCnt++] = state;
jjrounds[state] = jjround;
}
}
private final void jjAddStates(int start, int end)
{
do {
jjstateSet[jjnewStateCnt++] = jjnextStates[start];
} while (start++ != end);
}
private final void jjCheckNAddTwoStates(int state1, int state2)
{
jjCheckNAdd(state1);
jjCheckNAdd(state2);
}
private final void jjCheckNAddStates(int start, int end)
{
do {
jjCheckNAdd(jjnextStates[start]);
} while (start++ != end);
}
private final void jjCheckNAddStates(int start)
{
jjCheckNAdd(jjnextStates[start]);
jjCheckNAdd(jjnextStates[start + 1]);
jjmatchedKind = kind;
jjmatchedPos = pos;
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) { return pos + 1; }
return jjMoveNfa_3(state, pos + 1);
}
static final long[] jjbitVec0 = {
0xfffffffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL
@ -122,14 +97,13 @@ static final long[] jjbitVec0 = {
static final long[] jjbitVec2 = {
0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL
};
private final int jjMoveNfa_3(int startState, int curPos)
private int jjMoveNfa_3(int startState, int curPos)
{
int[] nextStates;
int startsAt = 0;
jjnewStateCnt = 36;
int i = 1;
jjstateSet[0] = startState;
int j, kind = 0x7fffffff;
int kind = 0x7fffffff;
for (;;)
{
if (++jjround == 0x7fffffff)
@ -137,7 +111,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
if (curChar < 64)
{
long l = 1L << curChar;
MatchLoop: do
do
{
switch(jjstateSet[--i])
{
@ -276,7 +250,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
else if (curChar < 128)
{
long l = 1L << (curChar & 077);
MatchLoop: do
do
{
switch(jjstateSet[--i])
{
@ -450,7 +424,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
long l2 = 1L << (curChar & 077);
MatchLoop: do
do
{
switch(jjstateSet[--i])
{
@ -545,15 +519,7 @@ private final int jjStartNfa_1(int pos, long active0)
{
return jjMoveNfa_1(jjStopStringLiteralDfa_1(pos, active0), pos + 1);
}
private final int jjStartNfaWithStates_1(int pos, int kind, int state)
{
jjmatchedKind = kind;
jjmatchedPos = pos;
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) { return pos + 1; }
return jjMoveNfa_1(state, pos + 1);
}
private final int jjMoveStringLiteralDfa0_1()
private int jjMoveStringLiteralDfa0_1()
{
switch(curChar)
{
@ -565,7 +531,7 @@ private final int jjMoveStringLiteralDfa0_1()
return jjMoveNfa_1(0, 0);
}
}
private final int jjMoveStringLiteralDfa1_1(long active0)
private int jjMoveStringLiteralDfa1_1(long active0)
{
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) {
@ -583,14 +549,21 @@ private final int jjMoveStringLiteralDfa1_1(long active0)
}
return jjStartNfa_1(0, active0);
}
private final int jjMoveNfa_1(int startState, int curPos)
private int jjStartNfaWithStates_1(int pos, int kind, int state)
{
jjmatchedKind = kind;
jjmatchedPos = pos;
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) { return pos + 1; }
return jjMoveNfa_1(state, pos + 1);
}
private int jjMoveNfa_1(int startState, int curPos)
{
int[] nextStates;
int startsAt = 0;
jjnewStateCnt = 7;
int i = 1;
jjstateSet[0] = startState;
int j, kind = 0x7fffffff;
int kind = 0x7fffffff;
for (;;)
{
if (++jjround == 0x7fffffff)
@ -598,7 +571,7 @@ private final int jjMoveNfa_1(int startState, int curPos)
if (curChar < 64)
{
long l = 1L << curChar;
MatchLoop: do
do
{
switch(jjstateSet[--i])
{
@ -647,7 +620,7 @@ private final int jjMoveNfa_1(int startState, int curPos)
else if (curChar < 128)
{
long l = 1L << (curChar & 077);
MatchLoop: do
do
{
switch(jjstateSet[--i])
{
@ -677,7 +650,7 @@ private final int jjMoveNfa_1(int startState, int curPos)
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
long l2 = 1L << (curChar & 077);
MatchLoop: do
do
{
switch(jjstateSet[--i])
{
@ -710,18 +683,17 @@ private final int jjMoveNfa_1(int startState, int curPos)
catch(java.io.IOException e) { return curPos; }
}
}
private final int jjMoveStringLiteralDfa0_0()
private int jjMoveStringLiteralDfa0_0()
{
return jjMoveNfa_0(0, 0);
}
private final int jjMoveNfa_0(int startState, int curPos)
private int jjMoveNfa_0(int startState, int curPos)
{
int[] nextStates;
int startsAt = 0;
jjnewStateCnt = 3;
int i = 1;
jjstateSet[0] = startState;
int j, kind = 0x7fffffff;
int kind = 0x7fffffff;
for (;;)
{
if (++jjround == 0x7fffffff)
@ -729,7 +701,7 @@ private final int jjMoveNfa_0(int startState, int curPos)
if (curChar < 64)
{
long l = 1L << curChar;
MatchLoop: do
do
{
switch(jjstateSet[--i])
{
@ -758,7 +730,7 @@ private final int jjMoveNfa_0(int startState, int curPos)
else if (curChar < 128)
{
long l = 1L << (curChar & 077);
MatchLoop: do
do
{
switch(jjstateSet[--i])
{
@ -773,7 +745,7 @@ private final int jjMoveNfa_0(int startState, int curPos)
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
long l2 = 1L << (curChar & 077);
MatchLoop: do
do
{
switch(jjstateSet[--i])
{
@ -813,15 +785,7 @@ private final int jjStartNfa_2(int pos, long active0)
{
return jjMoveNfa_2(jjStopStringLiteralDfa_2(pos, active0), pos + 1);
}
private final int jjStartNfaWithStates_2(int pos, int kind, int state)
{
jjmatchedKind = kind;
jjmatchedPos = pos;
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) { return pos + 1; }
return jjMoveNfa_2(state, pos + 1);
}
private final int jjMoveStringLiteralDfa0_2()
private int jjMoveStringLiteralDfa0_2()
{
switch(curChar)
{
@ -833,7 +797,7 @@ private final int jjMoveStringLiteralDfa0_2()
return jjMoveNfa_2(0, 0);
}
}
private final int jjMoveStringLiteralDfa1_2(long active0)
private int jjMoveStringLiteralDfa1_2(long active0)
{
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) {
@ -851,14 +815,21 @@ private final int jjMoveStringLiteralDfa1_2(long active0)
}
return jjStartNfa_2(0, active0);
}
private final int jjMoveNfa_2(int startState, int curPos)
private int jjStartNfaWithStates_2(int pos, int kind, int state)
{
jjmatchedKind = kind;
jjmatchedPos = pos;
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) { return pos + 1; }
return jjMoveNfa_2(state, pos + 1);
}
private int jjMoveNfa_2(int startState, int curPos)
{
int[] nextStates;
int startsAt = 0;
jjnewStateCnt = 7;
int i = 1;
jjstateSet[0] = startState;
int j, kind = 0x7fffffff;
int kind = 0x7fffffff;
for (;;)
{
if (++jjround == 0x7fffffff)
@ -866,7 +837,7 @@ private final int jjMoveNfa_2(int startState, int curPos)
if (curChar < 64)
{
long l = 1L << curChar;
MatchLoop: do
do
{
switch(jjstateSet[--i])
{
@ -915,7 +886,7 @@ private final int jjMoveNfa_2(int startState, int curPos)
else if (curChar < 128)
{
long l = 1L << (curChar & 077);
MatchLoop: do
do
{
switch(jjstateSet[--i])
{
@ -945,7 +916,7 @@ private final int jjMoveNfa_2(int startState, int curPos)
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
long l2 = 1L << (curChar & 077);
MatchLoop: do
do
{
switch(jjstateSet[--i])
{
@ -988,22 +959,28 @@ private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, lo
{
case 0:
return ((jjbitVec2[i2] & l2) != 0L);
default :
default :
if ((jjbitVec0[i1] & l1) != 0L)
return true;
return false;
}
}
/** Token literal values. */
public static final String[] jjstrLiteralImages = {
"", null, null, null, null, null, null, null, null, null, null, "\53", "\55",
"\50", "\51", "\72", "\52", "\136", null, null, null, null, null, "\133", "\173",
null, "\124\117", "\135", null, null, "\124\117", "\175", null, null, };
/** Lexer state names. */
public static final String[] lexStateNames = {
"Boost",
"RangeEx",
"RangeIn",
"DEFAULT",
"Boost",
"RangeEx",
"RangeIn",
"DEFAULT",
};
/** Lex State array. */
public static final int[] jjnewLexState = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1,
3, -1, 3, -1, -1, -1, 3, -1, -1,
@ -1018,13 +995,18 @@ protected CharStream input_stream;
private final int[] jjrounds = new int[36];
private final int[] jjstateSet = new int[72];
protected char curChar;
/** Constructor. */
public QueryParserTokenManager(CharStream stream){
input_stream = stream;
}
/** Constructor. */
public QueryParserTokenManager(CharStream stream, int lexState){
this(stream);
SwitchTo(lexState);
}
/** Reinitialise parser. */
public void ReInit(CharStream stream)
{
jjmatchedPos = jjnewStateCnt = 0;
@ -1032,18 +1014,22 @@ public void ReInit(CharStream stream)
input_stream = stream;
ReInitRounds();
}
private final void ReInitRounds()
private void ReInitRounds()
{
int i;
jjround = 0x80000001;
for (i = 36; i-- > 0;)
jjrounds[i] = 0x80000000;
}
/** Reinitialise parser. */
public void ReInit(CharStream stream, int lexState)
{
ReInit(stream);
SwitchTo(lexState);
}
/** Switch to specified lex state. */
public void SwitchTo(int lexState)
{
if (lexState >= 4 || lexState < 0)
@ -1054,14 +1040,25 @@ public void SwitchTo(int lexState)
protected Token jjFillToken()
{
Token t = Token.newToken(jjmatchedKind);
t.kind = jjmatchedKind;
final Token t;
final String curTokenImage;
final int beginLine;
final int endLine;
final int beginColumn;
final int endColumn;
String im = jjstrLiteralImages[jjmatchedKind];
t.image = (im == null) ? input_stream.GetImage() : im;
t.beginLine = input_stream.getBeginLine();
t.beginColumn = input_stream.getBeginColumn();
t.endLine = input_stream.getEndLine();
t.endColumn = input_stream.getEndColumn();
curTokenImage = (im == null) ? input_stream.GetImage() : im;
beginLine = input_stream.getBeginLine();
beginColumn = input_stream.getBeginColumn();
endLine = input_stream.getEndLine();
endColumn = input_stream.getEndColumn();
t = Token.newToken(jjmatchedKind, curTokenImage);
t.beginLine = beginLine;
t.endLine = endLine;
t.beginColumn = beginColumn;
t.endColumn = endColumn;
return t;
}
@ -1072,22 +1069,21 @@ int jjround;
int jjmatchedPos;
int jjmatchedKind;
/** Get the next Token. */
public Token getNextToken()
{
int kind;
Token specialToken = null;
Token matchedToken;
int curPos = 0;
EOFLoop :
for (;;)
{
try
{
{
try
{
curChar = input_stream.BeginToken();
}
}
catch(java.io.IOException e)
{
{
jjmatchedKind = 0;
matchedToken = jjFillToken();
return matchedToken;
@ -1157,4 +1153,31 @@ public Token getNextToken()
}
}
private void jjCheckNAdd(int state)
{
if (jjrounds[state] != jjround)
{
jjstateSet[jjnewStateCnt++] = state;
jjrounds[state] = jjround;
}
}
private void jjAddStates(int start, int end)
{
do {
jjstateSet[jjnewStateCnt++] = jjnextStates[start];
} while (start++ != end);
}
private void jjCheckNAddTwoStates(int state1, int state2)
{
jjCheckNAdd(state1);
jjCheckNAdd(state2);
}
private void jjCheckNAddStates(int start, int end)
{
do {
jjCheckNAdd(jjnextStates[start]);
} while (start++ != end);
}
}

View File

@ -1,4 +1,5 @@
/* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */
/* Generated By:JavaCC: Do not edit this line. Token.java Version 4.1 */
/* JavaCCOptions:TOKEN_EXTENDS=,KEEP_LINE_COL=null */
package org.apache.lucene.queryParser;
/**
@ -14,12 +15,14 @@ public class Token {
*/
public int kind;
/**
* beginLine and beginColumn describe the position of the first character
* of this token; endLine and endColumn describe the position of the
* last character of this token.
*/
public int beginLine, beginColumn, endLine, endColumn;
/** The line number of the first character of this Token. */
public int beginLine;
/** The column number of the first character of this Token. */
public int beginColumn;
/** The line number of the last character of this Token. */
public int endLine;
/** The column number of the last character of this Token. */
public int endColumn;
/**
* The string image of the token.
@ -50,6 +53,40 @@ public class Token {
*/
public Token specialToken;
/**
* An optional attribute value of the Token.
* Tokens which are not used as syntactic sugar will often contain
* meaningful values that will be used later on by the compiler or
* interpreter. This attribute value is often different from the image.
* Any subclass of Token that actually wants to return a non-null value can
* override this method as appropriate.
*/
public Object getValue() {
return null;
}
/**
* No-argument constructor
*/
public Token() {}
/**
* Constructs a new token for the specified Image.
*/
public Token(int kind)
{
this(kind, null);
}
/**
* Constructs a new token for the specified Image and Kind.
*/
public Token(int kind, String image)
{
this.kind = kind;
this.image = image;
}
/**
* Returns the image.
*/
@ -63,19 +100,25 @@ public class Token {
* can create and return subclass objects based on the value of ofKind.
* Simply add the cases to the switch for all those special cases.
* For example, if you have a subclass of Token called IDToken that
* you want to create if ofKind is ID, simlpy add something like :
* you want to create if ofKind is ID, simply add something like :
*
* case MyParserConstants.ID : return new IDToken();
* case MyParserConstants.ID : return new IDToken(ofKind, image);
*
* to the following switch statement. Then you can cast matchedToken
* variable to the appropriate type and use it in your lexical actions.
* variable to the appropriate type and use sit in your lexical actions.
*/
public static final Token newToken(int ofKind)
public static Token newToken(int ofKind, String image)
{
switch(ofKind)
{
default : return new Token();
default : return new Token(ofKind, image);
}
}
public static Token newToken(int ofKind)
{
return newToken(ofKind, null);
}
}
/* JavaCC - OriginalChecksum=c147cc166a7cf8812c7c39bc8c5eb868 (do not edit this line) */

View File

@ -1,19 +1,22 @@
/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 3.0 */
/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 4.1 */
/* JavaCCOptions: */
package org.apache.lucene.queryParser;
/** Token Manager Error. */
public class TokenMgrError extends Error
{
/*
* Ordinals for various reasons why an Error of this type can be thrown.
*/
/**
* Lexical error occured.
* Lexical error occurred.
*/
static final int LEXICAL_ERROR = 0;
/**
* An attempt wass made to create a second instance of a static token manager.
* An attempt was made to create a second instance of a static token manager.
*/
static final int STATIC_LEXER_ERROR = 1;
@ -34,7 +37,7 @@ public class TokenMgrError extends Error
int errorCode;
/**
* Replaces unprintable characters by their espaced (or unicode escaped)
* Replaces unprintable characters by their escaped (or unicode escaped)
* equivalents in the given string
*/
protected static final String addEscapes(String str) {
@ -85,12 +88,12 @@ public class TokenMgrError extends Error
/**
* Returns a detailed message for the Error when it is thrown by the
* token manager to indicate a lexical error.
* Parameters :
* EOFSeen : indicates if EOF caused the lexicl error
* curLexState : lexical state in which this error occured
* errorLine : line number when the error occured
* errorColumn : column number when the error occured
* errorAfter : prefix that was seen before this error occured
* Parameters :
* EOFSeen : indicates if EOF caused the lexical error
* curLexState : lexical state in which this error occurred
* errorLine : line number when the error occurred
* errorColumn : column number when the error occurred
* errorAfter : prefix that was seen before this error occurred
* curchar : the offending character
* Note: You can customize the lexical error message by modifying this method.
*/
@ -105,7 +108,7 @@ public class TokenMgrError extends Error
/**
* You can also modify the body of this method to customize your error messages.
* For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not
* of end-users concern, so you can return something like :
* of end-users concern, so you can return something like :
*
* "Internal Error : Please file a bug report .... "
*
@ -119,15 +122,19 @@ public class TokenMgrError extends Error
* Constructors of various flavors follow.
*/
/** No arg constructor. */
public TokenMgrError() {
}
/** Constructor with message and reason. */
public TokenMgrError(String message, int reason) {
super(message);
errorCode = reason;
}
/** Full Constructor. */
public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) {
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
}
/* JavaCC - OriginalChecksum=186d5bcc64733844c7daab5ad5a6e349 (do not edit this line) */

View File

@ -20,6 +20,7 @@ package org.apache.lucene.search;
import org.apache.lucene.index.IndexReader;
import java.io.IOException;
import java.text.Collator;
/**
* A range query that returns a constant score equal to its boost for
@ -42,6 +43,7 @@ public class ConstantScoreRangeQuery extends Query
private final String upperVal;
private final boolean includeLower;
private final boolean includeUpper;
private Collator collator;
public ConstantScoreRangeQuery(String fieldName, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper)
@ -65,6 +67,14 @@ public class ConstantScoreRangeQuery extends Query
this.includeUpper = includeUpper;
}
public ConstantScoreRangeQuery(String fieldName, String lowerVal,
String upperVal, boolean includeLower,
boolean includeUpper, Collator collator)
{
this(fieldName, lowerVal, upperVal, includeLower, includeUpper);
this.collator = collator;
}
/** Returns the field name for this query */
public String getField() { return fieldName; }
/** Returns the value of the lower endpoint of this range query, null if open ended */
@ -78,9 +88,10 @@ public class ConstantScoreRangeQuery extends Query
public Query rewrite(IndexReader reader) throws IOException {
// Map to RangeFilter semantics which are slightly different...
RangeFilter rangeFilt = new RangeFilter(fieldName,
lowerVal!=null?lowerVal:"",
upperVal, lowerVal==""?false:includeLower, upperVal==null?false:includeUpper);
RangeFilter rangeFilt = new RangeFilter
(fieldName, lowerVal != null?lowerVal:"", upperVal,
lowerVal==""?false:includeLower, upperVal==null?false:includeUpper,
collator);
Query q = new ConstantScoreQuery(rangeFilt);
q.setBoost(getBoost());
return q;
@ -117,6 +128,7 @@ public class ConstantScoreRangeQuery extends Query
if (this.fieldName != other.fieldName // interned comparison
|| this.includeLower != other.includeLower
|| this.includeUpper != other.includeUpper
|| (this.collator != null && ! this.collator.equals(other.collator))
) { return false; }
if (this.lowerVal != null ? !this.lowerVal.equals(other.lowerVal) : other.lowerVal != null) return false;
if (this.upperVal != null ? !this.upperVal.equals(other.upperVal) : other.upperVal != null) return false;
@ -134,6 +146,7 @@ public class ConstantScoreRangeQuery extends Query
h ^= (upperVal != null ? (upperVal.hashCode()) : 0x5a695a69);
h ^= (includeLower ? 0x665599aa : 0)
^ (includeUpper ? 0x99aa5566 : 0);
h ^= collator != null ? collator.hashCode() : 0;
return h;
}
}

View File

@ -25,6 +25,7 @@ import org.apache.lucene.util.OpenBitSet;
import java.io.IOException;
import java.util.BitSet;
import java.text.Collator;
/**
* A Filter that restricts search results to a range of values in a given
@ -42,8 +43,9 @@ public class RangeFilter extends Filter {
private String upperTerm;
private boolean includeLower;
private boolean includeUpper;
private Collator collator;
/**
/**
* @param fieldName The field this range applies to
* @param lowerTerm The lower bound on this range
* @param upperTerm The upper bound on this range
@ -74,7 +76,31 @@ public class RangeFilter extends Filter {
("The upper bound must be non-null to be inclusive");
}
}
/**
* <strong>WARNING:</strong> Using this constructor and supplying a non-null
* value in the <code>collator</code> parameter will cause every single
* index Term in the Field referenced by lowerTerm and/or upperTerm to be
* examined. Depending on the number of index Terms in this Field, the
* operation could be very slow.
*
* @param lowerTerm The lower bound on this range
* @param upperTerm The upper bound on this range
* @param includeLower Does this range include the lower bound?
* @param includeUpper Does this range include the upper bound?
* @param collator The collator to use when determining range inclusion; set
* to null to use Unicode code point ordering instead of collation.
* @throws IllegalArgumentException if both terms are null or if
* lowerTerm is null and includeLower is true (similar for upperTerm
* and includeUpper)
*/
public RangeFilter(String fieldName, String lowerTerm, String upperTerm,
boolean includeLower, boolean includeUpper,
Collator collator) {
this(fieldName, lowerTerm, upperTerm, includeLower, includeUpper);
this.collator = collator;
}
/**
* Constructs a filter for field <code>fieldName</code> matching
* less than or equal to <code>upperTerm</code>.
@ -100,7 +126,7 @@ public class RangeFilter extends Filter {
public BitSet bits(IndexReader reader) throws IOException {
BitSet bits = new BitSet(reader.maxDoc());
TermEnum enumerator =
(null != lowerTerm
(null != lowerTerm && collator == null
? reader.terms(new Term(fieldName, lowerTerm))
: reader.terms(new Term(fieldName)));
@ -110,40 +136,61 @@ public class RangeFilter extends Filter {
return bits;
}
boolean checkLower = false;
if (!includeLower) // make adjustments to set to exclusive
checkLower = true;
TermDocs termDocs = reader.termDocs();
try {
do {
Term term = enumerator.term();
if (term != null && term.field().equals(fieldName)) {
if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
checkLower = false;
if (upperTerm != null) {
int compare = upperTerm.compareTo(term.text());
/* if beyond the upper term, or is exclusive and
* this is equal to the upper term, break out */
if ((compare < 0) ||
(!includeUpper && compare==0)) {
break;
if (collator != null) {
do {
Term term = enumerator.term();
if (term != null && term.field().equals(fieldName)) {
if ((lowerTerm == null
|| (includeLower
? collator.compare(term.text(), lowerTerm) >= 0
: collator.compare(term.text(), lowerTerm) > 0))
&& (upperTerm == null
|| (includeUpper
? collator.compare(term.text(), upperTerm) <= 0
: collator.compare(term.text(), upperTerm) < 0))) {
/* we have a good term, find the docs */
termDocs.seek(enumerator.term());
while (termDocs.next()) {
bits.set(termDocs.doc());
}
}
/* we have a good term, find the docs */
termDocs.seek(enumerator.term());
while (termDocs.next()) {
bits.set(termDocs.doc());
}
}
} else {
break;
}
while (enumerator.next());
} else { // collator is null - use Unicode code point ordering
boolean checkLower = false;
if (!includeLower) // make adjustments to set to exclusive
checkLower = true;
do {
Term term = enumerator.term();
if (term != null && term.field().equals(fieldName)) {
if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
checkLower = false;
if (upperTerm != null) {
int compare = upperTerm.compareTo(term.text());
/* if beyond the upper term, or is exclusive and
* this is equal to the upper term, break out */
if ((compare < 0) ||
(!includeUpper && compare==0)) {
break;
}
}
/* we have a good term, find the docs */
termDocs.seek(enumerator.term());
while (termDocs.next()) {
bits.set(termDocs.doc());
}
}
} else {
break;
}
}
while (enumerator.next());
}
while (enumerator.next());
} finally {
termDocs.close();
}
@ -162,7 +209,7 @@ public class RangeFilter extends Filter {
OpenBitSet bits = new OpenBitSet(reader.maxDoc());
TermEnum enumerator =
(null != lowerTerm
(null != lowerTerm && collator == null
? reader.terms(new Term(fieldName, lowerTerm))
: reader.terms(new Term(fieldName)));
@ -171,40 +218,63 @@ public class RangeFilter extends Filter {
if (enumerator.term() == null) {
return bits;
}
boolean checkLower = false;
if (!includeLower) // make adjustments to set to exclusive
checkLower = true;
TermDocs termDocs = reader.termDocs();
try {
do {
Term term = enumerator.term();
if (term != null && term.field().equals(fieldName)) {
if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
checkLower = false;
if (upperTerm != null) {
int compare = upperTerm.compareTo(term.text());
/* if beyond the upper term, or is exclusive and
* this is equal to the upper term, break out */
if ((compare < 0) ||
(!includeUpper && compare==0)) {
break;
if (collator != null) {
do {
Term term = enumerator.term();
if (term != null && term.field().equals(fieldName)) {
if ((lowerTerm == null
|| (includeLower
? collator.compare(term.text(), lowerTerm) >= 0
: collator.compare(term.text(), lowerTerm) > 0))
&& (upperTerm == null
|| (includeUpper
? collator.compare(term.text(), upperTerm) <= 0
: collator.compare(term.text(), upperTerm) < 0))) {
/* we have a good term, find the docs */
termDocs.seek(enumerator.term());
while (termDocs.next()) {
bits.set(termDocs.doc());
}
}
/* we have a good term, find the docs */
termDocs.seek(enumerator.term());
while (termDocs.next()) {
bits.set(termDocs.doc());
}
}
} else {
break;
}
while (enumerator.next());
} else { // collator is null - use Unicode code point ordering
boolean checkLower = false;
if (!includeLower) // make adjustments to set to exclusive
checkLower = true;
do {
Term term = enumerator.term();
if (term != null && term.field().equals(fieldName)) {
if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
checkLower = false;
if (upperTerm != null) {
int compare = upperTerm.compareTo(term.text());
/* if beyond the upper term, or is exclusive and
* this is equal to the upper term, break out */
if ((compare < 0) ||
(!includeUpper && compare==0)) {
break;
}
}
/* we have a good term, find the docs */
termDocs.seek(enumerator.term());
while (termDocs.next()) {
bits.set(termDocs.doc());
}
}
} else {
break;
}
}
while (enumerator.next());
}
while (enumerator.next());
} finally {
termDocs.close();
@ -241,6 +311,7 @@ public class RangeFilter extends Filter {
if (!this.fieldName.equals(other.fieldName)
|| this.includeLower != other.includeLower
|| this.includeUpper != other.includeUpper
|| (this.collator != null && ! this.collator.equals(other.collator))
) { return false; }
if (this.lowerTerm != null ? !this.lowerTerm.equals(other.lowerTerm) : other.lowerTerm != null) return false;
if (this.upperTerm != null ? !this.upperTerm.equals(other.upperTerm) : other.upperTerm != null) return false;
@ -255,6 +326,7 @@ public class RangeFilter extends Filter {
h ^= (upperTerm != null ? (upperTerm.hashCode()) : 0x91BEC2C2);
h ^= (includeLower ? 0xD484B933 : 0)
^ (includeUpper ? 0x6AE423AC : 0);
h ^= collator != null ? collator.hashCode() : 0;
return h;
}
}

View File

@ -18,6 +18,7 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import java.text.Collator;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
@ -46,12 +47,18 @@ public class RangeQuery extends Query
private Term lowerTerm;
private Term upperTerm;
private boolean inclusive;
private Collator collator;
/** Constructs a query selecting all terms greater than
* <code>lowerTerm</code> but less than <code>upperTerm</code>.
* There must be at least one term and either term may be null,
* in which case there is no bound on that side, but if there are
* two terms, both terms <b>must</b> be for the same field.
*
* @param lowerTerm The Term at the lower end of the range
* @param upperTerm The Term at the upper end of the range
* @param inclusive If true, both <code>lowerTerm</code> and
* <code>upperTerm</code> will themselves be included in the range.
*/
public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive)
{
@ -76,48 +83,109 @@ public class RangeQuery extends Query
this.inclusive = inclusive;
}
/** Constructs a query selecting all terms greater than
* <code>lowerTerm</code> but less than <code>upperTerm</code>.
* There must be at least one term and either term may be null,
* in which case there is no bound on that side, but if there are
* two terms, both terms <b>must</b> be for the same field.
* <p>
* If <code>collator</code> is not null, it will be used to decide whether
* index terms are within the given range, rather than using the Unicode code
* point order in which index terms are stored.
* <p>
* <strong>WARNING:</strong> Using this constructor and supplying a non-null
* value in the <code>collator</code> parameter will cause every single
* index Term in the Field referenced by lowerTerm and/or upperTerm to be
* examined. Depending on the number of index Terms in this Field, the
* operation could be very slow.
*
* @param lowerTerm The Term at the lower end of the range
* @param upperTerm The Term at the upper end of the range
* @param inclusive If true, both <code>lowerTerm</code> and
* <code>upperTerm</code> will themselves be included in the range.
* @param collator The collator to use to collate index Terms, to determine
* their membership in the range bounded by <code>lowerTerm</code> and
* <code>upperTerm</code>.
*/
public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive,
Collator collator)
{
this(lowerTerm, upperTerm, inclusive);
this.collator = collator;
}
public Query rewrite(IndexReader reader) throws IOException {
BooleanQuery query = new BooleanQuery(true);
TermEnum enumerator = reader.terms(lowerTerm);
String testField = getField();
if (collator != null) {
TermEnum enumerator = reader.terms(new Term(testField, ""));
String lowerTermText = lowerTerm != null ? lowerTerm.text() : null;
String upperTermText = upperTerm != null ? upperTerm.text() : null;
try {
boolean checkLower = false;
if (!inclusive) // make adjustments to set to exclusive
checkLower = true;
String testField = getField();
do {
Term term = enumerator.term();
if (term != null && term.field() == testField) { // interned comparison
if (!checkLower || term.text().compareTo(lowerTerm.text()) > 0) {
checkLower = false;
if (upperTerm != null) {
int compare = upperTerm.text().compareTo(term.text());
/* if beyond the upper term, or is exclusive and
* this is equal to the upper term, break out */
if ((compare < 0) || (!inclusive && compare == 0))
break;
try {
do {
Term term = enumerator.term();
if (term != null && term.field() == testField) { // interned comparison
if ((lowerTermText == null
|| (inclusive ? collator.compare(term.text(), lowerTermText) >= 0
: collator.compare(term.text(), lowerTermText) > 0))
&& (upperTermText == null
|| (inclusive ? collator.compare(term.text(), upperTermText) <= 0
: collator.compare(term.text(), upperTermText) < 0))) {
addTermToQuery(term, query);
}
TermQuery tq = new TermQuery(term); // found a match
tq.setBoost(getBoost()); // set the boost
query.add(tq, BooleanClause.Occur.SHOULD); // add to query
}
}
else {
break;
}
while (enumerator.next());
}
finally {
enumerator.close();
}
while (enumerator.next());
}
finally {
enumerator.close();
else { // collator is null
TermEnum enumerator = reader.terms(lowerTerm);
try {
boolean checkLower = false;
if (!inclusive) // make adjustments to set to exclusive
checkLower = true;
do {
Term term = enumerator.term();
if (term != null && term.field() == testField) { // interned comparison
if (!checkLower || term.text().compareTo(lowerTerm.text()) > 0) {
checkLower = false;
if (upperTerm != null) {
int compare = upperTerm.text().compareTo(term.text());
/* if beyond the upper term, or is exclusive and
* this is equal to the upper term, break out */
if ((compare < 0) || (!inclusive && compare == 0))
break;
}
addTermToQuery(term, query); // Found a match
}
}
else {
break;
}
}
while (enumerator.next());
}
finally {
enumerator.close();
}
}
return query;
}
private void addTermToQuery(Term term, BooleanQuery query) {
TermQuery tq = new TermQuery(term);
tq.setBoost(getBoost()); // set the boost
query.add(tq, BooleanClause.Occur.SHOULD); // add to query
}
/** Returns the field name for this query */
public String getField() {
return (lowerTerm != null ? lowerTerm.field() : upperTerm.field());
@ -132,6 +200,9 @@ public class RangeQuery extends Query
/** Returns <code>true</code> if the range query is inclusive */
public boolean isInclusive() { return inclusive; }
/** Returns the collator used to determine range inclusion, if any. */
public Collator getCollator() { return collator; }
/** Prints a user-readable version of this query. */
public String toString(String field)
@ -159,6 +230,9 @@ public class RangeQuery extends Query
final RangeQuery other = (RangeQuery) o;
if (this.getBoost() != other.getBoost()) return false;
if (this.inclusive != other.inclusive) return false;
if (this.collator != null && ! this.collator.equals(other.collator))
return false;
// one of lowerTerm and upperTerm can be null
if (this.lowerTerm != null ? !this.lowerTerm.equals(other.lowerTerm) : other.lowerTerm != null) return false;
if (this.upperTerm != null ? !this.upperTerm.equals(other.upperTerm) : other.upperTerm != null) return false;
@ -174,6 +248,7 @@ public class RangeQuery extends Query
h ^= (h << 25) | (h >>> 8);
h ^= upperTerm != null ? upperTerm.hashCode() : 0;
h ^= this.inclusive ? 0x2742E74A : 0;
h ^= collator != null ? collator.hashCode() : 0;
return h;
}
}

View File

@ -20,6 +20,7 @@ package org.apache.lucene.queryParser;
import java.io.IOException;
import java.io.Reader;
import java.text.DateFormat;
import java.text.Collator;
import java.util.Calendar;
import java.util.Date;
import java.util.Locale;
@ -429,6 +430,51 @@ public class TestQueryParser extends LuceneTestCase {
assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}");
assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})");
}
public void testFarsiRangeCollating() throws Exception {
RAMDirectory ramDir = new RAMDirectory();
IndexWriter iw = new IndexWriter(ramDir, new WhitespaceAnalyzer(), true,
IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
doc.add(new Field("content","\u0633\u0627\u0628",
Field.Store.YES, Field.Index.UN_TOKENIZED));
iw.addDocument(doc);
iw.close();
IndexSearcher is = new IndexSearcher(ramDir);
QueryParser qp = new QueryParser("content", new WhitespaceAnalyzer());
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
// characters properly.
Collator c = Collator.getInstance(new Locale("ar"));
qp.setRangeCollator(c);
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
// orders the U+0698 character before the U+0633 character, so the single
// index Term below should NOT be returned by a ConstantScoreRangeQuery
// with a Farsi Collator (or an Arabic one for the case when Farsi is not
// supported).
// Test ConstantScoreRangeQuery
qp.setUseOldRangeQuery(false);
ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, result.length);
result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs;
assertEquals("The index Term should be included.", 1, result.length);
// Test RangeQuery
qp.setUseOldRangeQuery(true);
result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, result.length);
result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs;
assertEquals("The index Term should be included.", 1, result.length);
is.close();
}
/** for testing legacy DateField support */
private String getLegacyDate(String s) throws Exception {

View File

@ -32,12 +32,30 @@ public class BaseTestRangeFilter extends LuceneTestCase {
public static final boolean F = false;
public static final boolean T = true;
RAMDirectory index = new RAMDirectory();
Random rand = new Random(101); // use a set seed to test is deterministic
int maxR = Integer.MIN_VALUE;
int minR = Integer.MAX_VALUE;
/**
* Collation interacts badly with hyphens -- collation produces different
* ordering than Unicode code-point ordering -- so two indexes are created:
* one which can't have negative random integers, for testing collated
* ranges, and the other which can have negative random integers, for all
* other tests.
*/
class TestIndex {
int maxR;
int minR;
boolean allowNegativeRandomInts;
RAMDirectory index = new RAMDirectory();
TestIndex(int minR, int maxR, boolean allowNegativeRandomInts) {
this.minR = minR;
this.maxR = maxR;
this.allowNegativeRandomInts = allowNegativeRandomInts;
}
}
TestIndex signedIndex = new TestIndex(Integer.MAX_VALUE, Integer.MIN_VALUE, true);
TestIndex unsignedIndex = new TestIndex(Integer.MAX_VALUE, 0, false);
int minId = 0;
int maxId = 10000;
@ -65,28 +83,31 @@ public class BaseTestRangeFilter extends LuceneTestCase {
public BaseTestRangeFilter(String name) {
super(name);
build();
build(signedIndex);
build(unsignedIndex);
}
public BaseTestRangeFilter() {
build();
build(signedIndex);
build(unsignedIndex);
}
private void build() {
private void build(TestIndex index) {
try {
/* build an index */
IndexWriter writer = new IndexWriter(index, new SimpleAnalyzer(), T,
IndexWriter writer = new IndexWriter(index.index, new SimpleAnalyzer(), T,
IndexWriter.MaxFieldLength.LIMITED);
for (int d = minId; d <= maxId; d++) {
Document doc = new Document();
doc.add(new Field("id",pad(d), Field.Store.YES, Field.Index.NOT_ANALYZED));
int r= rand.nextInt();
if (maxR < r) {
maxR = r;
int r= index.allowNegativeRandomInts
? rand.nextInt() : rand.nextInt(Integer.MAX_VALUE);
if (index.maxR < r) {
index.maxR = r;
}
if (r < minR) {
minR = r;
if (r < index.minR) {
index.minR = r;
}
doc.add(new Field("rand",pad(r), Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("body","body", Field.Store.YES, Field.Index.NOT_ANALYZED));

View File

@ -18,6 +18,7 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
@ -27,6 +28,8 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import java.io.IOException;
import java.text.Collator;
import java.util.Locale;
import junit.framework.Assert;
@ -92,12 +95,25 @@ public class TestConstantScoreRangeQuery extends BaseTestRangeFilter {
return new ConstantScoreRangeQuery(f,l,h,il,ih);
}
/** macro for readability */
public static Query csrq(String f, String l, String h,
boolean il, boolean ih, Collator c) {
return new ConstantScoreRangeQuery(f,l,h,il,ih,c);
}
public void testBasics() throws IOException {
QueryUtils.check(csrq("data","1","6",T,T));
QueryUtils.check(csrq("data","A","Z",T,T));
QueryUtils.checkUnequal(csrq("data","1","6",T,T), csrq("data","A","Z",T,T));
}
public void testBasicsCollating() throws IOException {
Collator c = Collator.getInstance(Locale.ENGLISH);
QueryUtils.check(csrq("data","1","6",T,T,c));
QueryUtils.check(csrq("data","A","Z",T,T,c));
QueryUtils.checkUnequal(csrq("data","1","6",T,T,c), csrq("data","A","Z",T,T,c));
}
public void testEqualScores() throws IOException {
// NOTE: uses index build in *this* setUp
@ -205,7 +221,7 @@ public class TestConstantScoreRangeQuery extends BaseTestRangeFilter {
public void testRangeQueryId() throws IOException {
// NOTE: uses index build in *super* setUp
IndexReader reader = IndexReader.open(index);
IndexReader reader = IndexReader.open(signedIndex.index);
IndexSearcher search = new IndexSearcher(reader);
int medId = ((maxId - minId) / 2);
@ -284,21 +300,105 @@ public class TestConstantScoreRangeQuery extends BaseTestRangeFilter {
}
public void testRangeQueryIdCollating() throws IOException {
// NOTE: uses index build in *super* setUp
IndexReader reader = IndexReader.open(signedIndex.index);
IndexSearcher search = new IndexSearcher(reader);
int medId = ((maxId - minId) / 2);
String minIP = pad(minId);
String maxIP = pad(maxId);
String medIP = pad(medId);
int numDocs = reader.numDocs();
assertEquals("num of docs", numDocs, 1+ maxId - minId);
ScoreDoc[] result;
Collator c = Collator.getInstance(Locale.ENGLISH);
// test id, bounded on both ends
result = search.search(csrq("id",minIP,maxIP,T,T,c), null, numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(csrq("id",minIP,maxIP,T,F,c), null, numDocs).scoreDocs;
assertEquals("all but last", numDocs-1, result.length);
result = search.search(csrq("id",minIP,maxIP,F,T,c), null, numDocs).scoreDocs;
assertEquals("all but first", numDocs-1, result.length);
result = search.search(csrq("id",minIP,maxIP,F,F,c), null, numDocs).scoreDocs;
assertEquals("all but ends", numDocs-2, result.length);
result = search.search(csrq("id",medIP,maxIP,T,T,c), null, numDocs).scoreDocs;
assertEquals("med and up", 1+ maxId-medId, result.length);
result = search.search(csrq("id",minIP,medIP,T,T,c), null, numDocs).scoreDocs;
assertEquals("up to med", 1+ medId-minId, result.length);
// unbounded id
result = search.search(csrq("id",minIP,null,T,F,c), null, numDocs).scoreDocs;
assertEquals("min and up", numDocs, result.length);
result = search.search(csrq("id",null,maxIP,F,T,c), null, numDocs).scoreDocs;
assertEquals("max and down", numDocs, result.length);
result = search.search(csrq("id",minIP,null,F,F,c), null, numDocs).scoreDocs;
assertEquals("not min, but up", numDocs-1, result.length);
result = search.search(csrq("id",null,maxIP,F,F,c), null, numDocs).scoreDocs;
assertEquals("not max, but down", numDocs-1, result.length);
result = search.search(csrq("id",medIP,maxIP,T,F,c), null, numDocs).scoreDocs;
assertEquals("med and up, not max", maxId-medId, result.length);
result = search.search(csrq("id",minIP,medIP,F,T,c), null, numDocs).scoreDocs;
assertEquals("not min, up to med", medId-minId, result.length);
// very small sets
result = search.search(csrq("id",minIP,minIP,F,F,c), null, numDocs).scoreDocs;
assertEquals("min,min,F,F,c", 0, result.length);
result = search.search(csrq("id",medIP,medIP,F,F,c), null, numDocs).scoreDocs;
assertEquals("med,med,F,F,c", 0, result.length);
result = search.search(csrq("id",maxIP,maxIP,F,F,c), null, numDocs).scoreDocs;
assertEquals("max,max,F,F,c", 0, result.length);
result = search.search(csrq("id",minIP,minIP,T,T,c), null, numDocs).scoreDocs;
assertEquals("min,min,T,T,c", 1, result.length);
result = search.search(csrq("id",null,minIP,F,T,c), null, numDocs).scoreDocs;
assertEquals("nul,min,F,T,c", 1, result.length);
result = search.search(csrq("id",maxIP,maxIP,T,T,c), null, numDocs).scoreDocs;
assertEquals("max,max,T,T,c", 1, result.length);
result = search.search(csrq("id",maxIP,null,T,F,c), null, numDocs).scoreDocs;
assertEquals("max,nul,T,T,c", 1, result.length);
result = search.search(csrq("id",medIP,medIP,T,T,c), null, numDocs).scoreDocs;
assertEquals("med,med,T,T,c", 1, result.length);
}
public void testRangeQueryRand() throws IOException {
// NOTE: uses index build in *super* setUp
IndexReader reader = IndexReader.open(index);
IndexReader reader = IndexReader.open(signedIndex.index);
IndexSearcher search = new IndexSearcher(reader);
String minRP = pad(minR);
String maxRP = pad(maxR);
String minRP = pad(signedIndex.minR);
String maxRP = pad(signedIndex.maxR);
int numDocs = reader.numDocs();
assertEquals("num of docs", numDocs, 1+ maxId - minId);
ScoreDoc[] result;
Query q = new TermQuery(new Term("body","body"));
// test extremes, bounded on both ends
@ -347,4 +447,104 @@ public class TestConstantScoreRangeQuery extends BaseTestRangeFilter {
}
public void testRangeQueryRandCollating() throws IOException {
// NOTE: uses index build in *super* setUp
// using the unsigned index because collation seems to ignore hyphens
IndexReader reader = IndexReader.open(unsignedIndex.index);
IndexSearcher search = new IndexSearcher(reader);
String minRP = pad(unsignedIndex.minR);
String maxRP = pad(unsignedIndex.maxR);
int numDocs = reader.numDocs();
assertEquals("num of docs", numDocs, 1+ maxId - minId);
ScoreDoc[] result;
Collator c = Collator.getInstance(Locale.ENGLISH);
// test extremes, bounded on both ends
result = search.search(csrq("rand",minRP,maxRP,T,T,c), null, numDocs).scoreDocs;
assertEquals("find all", numDocs, result.length);
result = search.search(csrq("rand",minRP,maxRP,T,F,c), null, numDocs).scoreDocs;
assertEquals("all but biggest", numDocs-1, result.length);
result = search.search(csrq("rand",minRP,maxRP,F,T,c), null, numDocs).scoreDocs;
assertEquals("all but smallest", numDocs-1, result.length);
result = search.search(csrq("rand",minRP,maxRP,F,F,c), null, numDocs).scoreDocs;
assertEquals("all but extremes", numDocs-2, result.length);
// unbounded
result = search.search(csrq("rand",minRP,null,T,F,c), null, numDocs).scoreDocs;
assertEquals("smallest and up", numDocs, result.length);
result = search.search(csrq("rand",null,maxRP,F,T,c), null, numDocs).scoreDocs;
assertEquals("biggest and down", numDocs, result.length);
result = search.search(csrq("rand",minRP,null,F,F,c), null, numDocs).scoreDocs;
assertEquals("not smallest, but up", numDocs-1, result.length);
result = search.search(csrq("rand",null,maxRP,F,F,c), null, numDocs).scoreDocs;
assertEquals("not biggest, but down", numDocs-1, result.length);
// very small sets
result = search.search(csrq("rand",minRP,minRP,F,F,c), null, numDocs).scoreDocs;
assertEquals("min,min,F,F,c", 0, result.length);
result = search.search(csrq("rand",maxRP,maxRP,F,F,c), null, numDocs).scoreDocs;
assertEquals("max,max,F,F,c", 0, result.length);
result = search.search(csrq("rand",minRP,minRP,T,T,c), null, numDocs).scoreDocs;
assertEquals("min,min,T,T,c", 1, result.length);
result = search.search(csrq("rand",null,minRP,F,T,c), null, numDocs).scoreDocs;
assertEquals("nul,min,F,T,c", 1, result.length);
result = search.search(csrq("rand",maxRP,maxRP,T,T,c), null, numDocs).scoreDocs;
assertEquals("max,max,T,T,c", 1, result.length);
result = search.search(csrq("rand",maxRP,null,T,F,c), null, numDocs).scoreDocs;
assertEquals("max,nul,T,T,c", 1, result.length);
}
public void testFarsi() throws Exception {
/* build an index */
RAMDirectory farsiIndex = new RAMDirectory();
IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T,
IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
doc.add(new Field("content","\u0633\u0627\u0628",
Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("body", "body",
Field.Store.YES, Field.Index.NOT_ANALYZED));
writer.addDocument(doc);
writer.optimize();
writer.close();
IndexReader reader = IndexReader.open(farsiIndex);
IndexSearcher search = new IndexSearcher(reader);
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
// characters properly.
Collator c = Collator.getInstance(new Locale("ar"));
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
// orders the U+0698 character before the U+0633 character, so the single
// index Term below should NOT be returned by a ConstantScoreRangeQuery
// with a Farsi Collator (or an Arabic one for the case when Farsi is
// not supported).
ScoreDoc[] result = search.search(csrq("content","\u062F", "\u0698", T, T, c), null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, result.length);
result = search.search(csrq("content", "\u0633", "\u0638", T, T, c), null, 1000).scoreDocs;
assertEquals("The index Term should be included.", 1, result.length);
search.close();
}
}

View File

@ -18,9 +18,16 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import java.text.Collator;
import java.util.Locale;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.RAMDirectory;
/**
* A basic 'positive' Unit test class for the RangeFilter class.
@ -42,7 +49,7 @@ public class TestRangeFilter extends BaseTestRangeFilter {
public void testRangeFilterId() throws IOException {
IndexReader reader = IndexReader.open(index);
IndexReader reader = IndexReader.open(signedIndex.index);
IndexSearcher search = new IndexSearcher(reader);
int medId = ((maxId - minId) / 2);
@ -122,13 +129,96 @@ public class TestRangeFilter extends BaseTestRangeFilter {
}
public void testRangeFilterIdCollating() throws IOException {
IndexReader reader = IndexReader.open(signedIndex.index);
IndexSearcher search = new IndexSearcher(reader);
Collator c = Collator.getInstance(Locale.ENGLISH);
int medId = ((maxId - minId) / 2);
String minIP = pad(minId);
String maxIP = pad(maxId);
String medIP = pad(medId);
int numDocs = reader.numDocs();
assertEquals("num of docs", numDocs, 1+ maxId - minId);
Hits result;
Query q = new TermQuery(new Term("body","body"));
// test id, bounded on both ends
result = search.search(q,new RangeFilter("id",minIP,maxIP,T,T,c));
assertEquals("find all", numDocs, result.length());
result = search.search(q,new RangeFilter("id",minIP,maxIP,T,F,c));
assertEquals("all but last", numDocs-1, result.length());
result = search.search(q,new RangeFilter("id",minIP,maxIP,F,T,c));
assertEquals("all but first", numDocs-1, result.length());
result = search.search(q,new RangeFilter("id",minIP,maxIP,F,F,c));
assertEquals("all but ends", numDocs-2, result.length());
result = search.search(q,new RangeFilter("id",medIP,maxIP,T,T,c));
assertEquals("med and up", 1+ maxId-medId, result.length());
result = search.search(q,new RangeFilter("id",minIP,medIP,T,T,c));
assertEquals("up to med", 1+ medId-minId, result.length());
// unbounded id
result = search.search(q,new RangeFilter("id",minIP,null,T,F,c));
assertEquals("min and up", numDocs, result.length());
result = search.search(q,new RangeFilter("id",null,maxIP,F,T,c));
assertEquals("max and down", numDocs, result.length());
result = search.search(q,new RangeFilter("id",minIP,null,F,F,c));
assertEquals("not min, but up", numDocs-1, result.length());
result = search.search(q,new RangeFilter("id",null,maxIP,F,F,c));
assertEquals("not max, but down", numDocs-1, result.length());
result = search.search(q,new RangeFilter("id",medIP,maxIP,T,F,c));
assertEquals("med and up, not max", maxId-medId, result.length());
result = search.search(q,new RangeFilter("id",minIP,medIP,F,T,c));
assertEquals("not min, up to med", medId-minId, result.length());
// very small sets
result = search.search(q,new RangeFilter("id",minIP,minIP,F,F,c));
assertEquals("min,min,F,F", 0, result.length());
result = search.search(q,new RangeFilter("id",medIP,medIP,F,F,c));
assertEquals("med,med,F,F", 0, result.length());
result = search.search(q,new RangeFilter("id",maxIP,maxIP,F,F,c));
assertEquals("max,max,F,F", 0, result.length());
result = search.search(q,new RangeFilter("id",minIP,minIP,T,T,c));
assertEquals("min,min,T,T", 1, result.length());
result = search.search(q,new RangeFilter("id",null,minIP,F,T,c));
assertEquals("nul,min,F,T", 1, result.length());
result = search.search(q,new RangeFilter("id",maxIP,maxIP,T,T,c));
assertEquals("max,max,T,T", 1, result.length());
result = search.search(q,new RangeFilter("id",maxIP,null,T,F,c));
assertEquals("max,nul,T,T", 1, result.length());
result = search.search(q,new RangeFilter("id",medIP,medIP,T,T,c));
assertEquals("med,med,T,T", 1, result.length());
}
public void testRangeFilterRand() throws IOException {
IndexReader reader = IndexReader.open(index);
IndexReader reader = IndexReader.open(signedIndex.index);
IndexSearcher search = new IndexSearcher(reader);
String minRP = pad(minR);
String maxRP = pad(maxR);
String minRP = pad(signedIndex.minR);
String maxRP = pad(signedIndex.maxR);
int numDocs = reader.numDocs();
@ -184,4 +274,106 @@ public class TestRangeFilter extends BaseTestRangeFilter {
}
public void testRangeFilterRandCollating() throws IOException {
// using the unsigned index because collation seems to ignore hyphens
IndexReader reader = IndexReader.open(unsignedIndex.index);
IndexSearcher search = new IndexSearcher(reader);
Collator c = Collator.getInstance(Locale.ENGLISH);
String minRP = pad(unsignedIndex.minR);
String maxRP = pad(unsignedIndex.maxR);
int numDocs = reader.numDocs();
assertEquals("num of docs", numDocs, 1+ maxId - minId);
Hits result;
Query q = new TermQuery(new Term("body","body"));
// test extremes, bounded on both ends
result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,T,c));
assertEquals("find all", numDocs, result.length());
result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,F,c));
assertEquals("all but biggest", numDocs-1, result.length());
result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,T,c));
assertEquals("all but smallest", numDocs-1, result.length());
result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,F,c));
assertEquals("all but extremes", numDocs-2, result.length());
// unbounded
result = search.search(q,new RangeFilter("rand",minRP,null,T,F,c));
assertEquals("smallest and up", numDocs, result.length());
result = search.search(q,new RangeFilter("rand",null,maxRP,F,T,c));
assertEquals("biggest and down", numDocs, result.length());
result = search.search(q,new RangeFilter("rand",minRP,null,F,F,c));
assertEquals("not smallest, but up", numDocs-1, result.length());
result = search.search(q,new RangeFilter("rand",null,maxRP,F,F,c));
assertEquals("not biggest, but down", numDocs-1, result.length());
// very small sets
result = search.search(q,new RangeFilter("rand",minRP,minRP,F,F,c));
assertEquals("min,min,F,F", 0, result.length());
result = search.search(q,new RangeFilter("rand",maxRP,maxRP,F,F,c));
assertEquals("max,max,F,F", 0, result.length());
result = search.search(q,new RangeFilter("rand",minRP,minRP,T,T,c));
assertEquals("min,min,T,T", 1, result.length());
result = search.search(q,new RangeFilter("rand",null,minRP,F,T,c));
assertEquals("nul,min,F,T", 1, result.length());
result = search.search(q,new RangeFilter("rand",maxRP,maxRP,T,T,c));
assertEquals("max,max,T,T", 1, result.length());
result = search.search(q,new RangeFilter("rand",maxRP,null,T,F,c));
assertEquals("max,nul,T,T", 1, result.length());
}
public void testFarsi() throws Exception {
/* build an index */
RAMDirectory farsiIndex = new RAMDirectory();
IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T,
IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
doc.add(new Field("content","\u0633\u0627\u0628",
Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field("body", "body",
Field.Store.YES, Field.Index.UN_TOKENIZED));
writer.addDocument(doc);
writer.optimize();
writer.close();
IndexReader reader = IndexReader.open(farsiIndex);
IndexSearcher search = new IndexSearcher(reader);
Query q = new TermQuery(new Term("body","body"));
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
// characters properly.
Collator collator = Collator.getInstance(new Locale("ar"));
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
// orders the U+0698 character before the U+0633 character, so the single
// index Term below should NOT be returned by a RangeFilter with a Farsi
// Collator (or an Arabic one for the case when Farsi is not supported).
Hits result = search.search
(q, new RangeFilter("content", "\u062F", "\u0698", T, T, collator));
assertEquals("The index Term should not be included.", 0, result.length());
result = search.search
(q, new RangeFilter("content", "\u0633", "\u0638", T, T, collator));
assertEquals("The index Term should be included.", 1, result.length());
search.close();
}
}

View File

@ -26,6 +26,8 @@ import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
import java.util.Locale;
import java.text.Collator;
public class TestRangeQuery extends LuceneTestCase {
@ -130,6 +132,78 @@ public class TestRangeQuery extends LuceneTestCase {
assertFalse("queries with different inclusive are not equal", query.equals(other));
}
public void testExclusiveCollating() throws Exception {
Query query = new RangeQuery(new Term("content", "A"),
new Term("content", "C"),
false, Collator.getInstance(Locale.ENGLISH));
initializeIndex(new String[] {"A", "B", "C", "D"});
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("A,B,C,D, only B in range", 1, hits.length);
searcher.close();
initializeIndex(new String[] {"A", "B", "D"});
searcher = new IndexSearcher(dir);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("A,B,D, only B in range", 1, hits.length);
searcher.close();
addDoc("C");
searcher = new IndexSearcher(dir);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("C added, still only B in range", 1, hits.length);
searcher.close();
}
public void testInclusiveCollating() throws Exception {
Query query = new RangeQuery(new Term("content", "A"),
new Term("content", "C"),
true, Collator.getInstance(Locale.ENGLISH));
initializeIndex(new String[]{"A", "B", "C", "D"});
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
searcher.close();
initializeIndex(new String[]{"A", "B", "D"});
searcher = new IndexSearcher(dir);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("A,B,D - A and B in range", 2, hits.length);
searcher.close();
addDoc("C");
searcher = new IndexSearcher(dir);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("C added - A, B, C in range", 3, hits.length);
searcher.close();
}
public void testFarsi() throws Exception {
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
// characters properly.
Collator collator = Collator.getInstance(new Locale("ar"));
Query query = new RangeQuery(new Term("content", "\u062F"),
new Term("content", "\u0698"),
true, collator);
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
// orders the U+0698 character before the U+0633 character, so the single
// index Term below should NOT be returned by a RangeQuery with a Farsi
// Collator (or an Arabic one for the case when Farsi is not supported).
initializeIndex(new String[]{ "\u0633\u0627\u0628"});
IndexSearcher searcher = new IndexSearcher(dir);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("The index Term should not be included.", 0, hits.length);
query = new RangeQuery(new Term("content", "\u0633"),
new Term("content", "\u0638"),
true, collator);
hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals("The index Term should be included.", 1, hits.length);
searcher.close();
}
private void initializeIndex(String[] values) throws IOException {
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
for (int i = 0; i < values.length; i++) {
@ -154,6 +228,3 @@ public class TestRangeQuery extends LuceneTestCase {
docCount++;
}
}