mirror of https://github.com/apache/lucene.git
LUCENE-1279: Add support for Collator to RangeFilter, etc.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@696056 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
31811e9f45
commit
a8c0a8a810
|
@ -283,6 +283,8 @@ New features
|
|||
|
||||
19. LUCENE-1354: Provide programmatic access to CheckIndex (Grant Ingersoll, Mike McCandless)
|
||||
|
||||
20. LUCENE-1279: Add support for Collators to RangeFilter/Query and Query Parser. (Steve Rowe via Grant Ingersoll)
|
||||
|
||||
Optimizations
|
||||
|
||||
1. LUCENE-705: When building a compound file, use
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.0 */
|
||||
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */
|
||||
/* JavaCCOptions:STATIC=false */
|
||||
package org.apache.lucene.queryParser;
|
||||
|
||||
/**
|
||||
|
@ -27,14 +28,14 @@ public interface CharStream {
|
|||
|
||||
/**
|
||||
* Returns the column position of the character last read.
|
||||
* @deprecated
|
||||
* @deprecated
|
||||
* @see #getEndColumn
|
||||
*/
|
||||
int getColumn();
|
||||
|
||||
/**
|
||||
* Returns the line number of the character last read.
|
||||
* @deprecated
|
||||
* @deprecated
|
||||
* @see #getEndLine
|
||||
*/
|
||||
int getLine();
|
||||
|
@ -79,7 +80,7 @@ public interface CharStream {
|
|||
char BeginToken() throws java.io.IOException;
|
||||
|
||||
/**
|
||||
* Returns a string made up of characters from the marked token beginning
|
||||
* Returns a string made up of characters from the marked token beginning
|
||||
* to the current buffer position. Implementations have the choice of returning
|
||||
* anything that they want to. For example, for efficiency, one might decide
|
||||
* to just return null, which is a valid implementation.
|
||||
|
@ -108,3 +109,4 @@ public interface CharStream {
|
|||
void Done();
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=32a89423891f765dde472f7ef0e3ef7b (do not edit this line) */
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 3.0 */
|
||||
/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 4.1 */
|
||||
/* JavaCCOptions:KEEP_LINE_COL=null */
|
||||
package org.apache.lucene.queryParser;
|
||||
|
||||
/**
|
||||
|
@ -51,6 +52,7 @@ public class ParseException extends Exception {
|
|||
specialConstructor = false;
|
||||
}
|
||||
|
||||
/** Constructor with message. */
|
||||
public ParseException(String message) {
|
||||
super(message);
|
||||
specialConstructor = false;
|
||||
|
@ -105,7 +107,7 @@ public class ParseException extends Exception {
|
|||
maxSize = expectedTokenSequences[i].length;
|
||||
}
|
||||
for (int j = 0; j < expectedTokenSequences[i].length; j++) {
|
||||
expected.append(tokenImage[expectedTokenSequences[i][j]]).append(" ");
|
||||
expected.append(tokenImage[expectedTokenSequences[i][j]]).append(' ');
|
||||
}
|
||||
if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) {
|
||||
expected.append("...");
|
||||
|
@ -120,8 +122,11 @@ public class ParseException extends Exception {
|
|||
retval += tokenImage[0];
|
||||
break;
|
||||
}
|
||||
retval += " " + tokenImage[tok.kind];
|
||||
retval += " \"";
|
||||
retval += add_escapes(tok.image);
|
||||
tok = tok.next;
|
||||
retval += " \"";
|
||||
tok = tok.next;
|
||||
}
|
||||
retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn;
|
||||
retval += "." + eol;
|
||||
|
@ -138,7 +143,7 @@ public class ParseException extends Exception {
|
|||
* The end of line string for this machine.
|
||||
*/
|
||||
protected String eol = System.getProperty("line.separator", "\n");
|
||||
|
||||
|
||||
/**
|
||||
* Used to convert raw characters to their escaped version
|
||||
* when these raw version cannot be used as part of an ASCII
|
||||
|
@ -190,3 +195,4 @@ public class ParseException extends Exception {
|
|||
}
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=c7631a240f7446940695eac31d9483ca (do not edit this line) */
|
||||
|
|
|
@ -4,6 +4,7 @@ package org.apache.lucene.queryParser;
|
|||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.text.DateFormat;
|
||||
import java.text.Collator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
|
@ -132,6 +133,10 @@ public class QueryParser implements QueryParserConstants {
|
|||
// maps field names to date resolutions
|
||||
Map fieldToDateResolution = null;
|
||||
|
||||
// The collator to use when determining range inclusion,
|
||||
// for use when constructing RangeQuerys and ConstantScoreRangeQuerys.
|
||||
Collator rangeCollator = null;
|
||||
|
||||
/** The default operator for parsing queries.
|
||||
* Use {@link QueryParser#setDefaultOperator} to change it.
|
||||
*/
|
||||
|
@ -408,6 +413,35 @@ public class QueryParser implements QueryParserConstants {
|
|||
return resolution;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the collator used to determine index term inclusion in ranges
|
||||
* specified either for ConstantScoreRangeQuerys or RangeQuerys (if
|
||||
* {@link #setUseOldRangeQuery(boolean)} is called with a <code>true</code>
|
||||
* value.)
|
||||
* <p/>
|
||||
* <strong>WARNING:</strong> Setting the rangeCollator to a non-null
|
||||
* collator using this method will cause every single index Term in the
|
||||
* Field referenced by lowerTerm and/or upperTerm to be examined.
|
||||
* Depending on the number of index Terms in this Field, the operation could
|
||||
* be very slow.
|
||||
*
|
||||
* @param rc the collator to use when constructing RangeQuerys
|
||||
* and ConstantScoreRangeQuerys
|
||||
*/
|
||||
public void setRangeCollator(Collator rc) {
|
||||
rangeCollator = rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the collator used to determine index term inclusion in ranges
|
||||
* specified either for ConstantScoreRangeQuerys or RangeQuerys (if
|
||||
* {@link #setUseOldRangeQuery(boolean)} is called with a <code>true</code>
|
||||
* value.)
|
||||
*/
|
||||
public Collator getRangeCollator() {
|
||||
return rangeCollator;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated use {@link #addClause(List, int, int, Query)} instead.
|
||||
*/
|
||||
|
@ -711,11 +745,12 @@ public class QueryParser implements QueryParserConstants {
|
|||
{
|
||||
return new RangeQuery(new Term(field, part1),
|
||||
new Term(field, part2),
|
||||
inclusive);
|
||||
inclusive, rangeCollator);
|
||||
}
|
||||
else
|
||||
{
|
||||
return new ConstantScoreRangeQuery(field,part1,part2,inclusive,inclusive);
|
||||
return new ConstantScoreRangeQuery
|
||||
(field, part1, part2, inclusive, inclusive, rangeCollator);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1448,26 +1483,26 @@ public class QueryParser implements QueryParserConstants {
|
|||
throw new Error("Missing return statement in function");
|
||||
}
|
||||
|
||||
final private boolean jj_2_1(int xla) {
|
||||
private boolean jj_2_1(int xla) {
|
||||
jj_la = xla; jj_lastpos = jj_scanpos = token;
|
||||
try { return !jj_3_1(); }
|
||||
catch(LookaheadSuccess ls) { return true; }
|
||||
finally { jj_save(0, xla); }
|
||||
}
|
||||
|
||||
final private boolean jj_3R_3() {
|
||||
private boolean jj_3R_3() {
|
||||
if (jj_scan_token(STAR)) return true;
|
||||
if (jj_scan_token(COLON)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
final private boolean jj_3R_2() {
|
||||
private boolean jj_3R_2() {
|
||||
if (jj_scan_token(TERM)) return true;
|
||||
if (jj_scan_token(COLON)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
final private boolean jj_3_1() {
|
||||
private boolean jj_3_1() {
|
||||
Token xsp;
|
||||
xsp = jj_scanpos;
|
||||
if (jj_3R_2()) {
|
||||
|
@ -1477,31 +1512,34 @@ public class QueryParser implements QueryParserConstants {
|
|||
return false;
|
||||
}
|
||||
|
||||
/** Generated Token Manager. */
|
||||
public QueryParserTokenManager token_source;
|
||||
public Token token, jj_nt;
|
||||
/** Current token. */
|
||||
public Token token;
|
||||
/** Next token. */
|
||||
public Token jj_nt;
|
||||
private int jj_ntk;
|
||||
private Token jj_scanpos, jj_lastpos;
|
||||
private int jj_la;
|
||||
public boolean lookingAhead = false;
|
||||
private boolean jj_semLA;
|
||||
private int jj_gen;
|
||||
final private int[] jj_la1 = new int[23];
|
||||
static private int[] jj_la1_0;
|
||||
static private int[] jj_la1_1;
|
||||
static {
|
||||
jj_la1_0();
|
||||
jj_la1_1();
|
||||
jj_la1_init_0();
|
||||
jj_la1_init_1();
|
||||
}
|
||||
private static void jj_la1_0() {
|
||||
private static void jj_la1_init_0() {
|
||||
jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0x3ed3f00,0x90000,0x20000,0x3ed2000,0x2690000,0x100000,0x100000,0x20000,0x30000000,0x4000000,0x30000000,0x20000,0x0,0x40000000,0x0,0x20000,0x100000,0x20000,0x3ed0000,};
|
||||
}
|
||||
private static void jj_la1_1() {
|
||||
private static void jj_la1_init_1() {
|
||||
jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3,0x0,0x3,0x0,0x0,0x0,0x0,};
|
||||
}
|
||||
final private JJCalls[] jj_2_rtns = new JJCalls[1];
|
||||
private boolean jj_rescan = false;
|
||||
private int jj_gc = 0;
|
||||
|
||||
/** Constructor with user supplied CharStream. */
|
||||
public QueryParser(CharStream stream) {
|
||||
token_source = new QueryParserTokenManager(stream);
|
||||
token = new Token();
|
||||
|
@ -1511,6 +1549,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(CharStream stream) {
|
||||
token_source.ReInit(stream);
|
||||
token = new Token();
|
||||
|
@ -1520,6 +1559,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||
}
|
||||
|
||||
/** Constructor with generated Token Manager. */
|
||||
public QueryParser(QueryParserTokenManager tm) {
|
||||
token_source = tm;
|
||||
token = new Token();
|
||||
|
@ -1529,6 +1569,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(QueryParserTokenManager tm) {
|
||||
token_source = tm;
|
||||
token = new Token();
|
||||
|
@ -1538,7 +1579,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||
}
|
||||
|
||||
final private Token jj_consume_token(int kind) throws ParseException {
|
||||
private Token jj_consume_token(int kind) throws ParseException {
|
||||
Token oldToken;
|
||||
if ((oldToken = token).next != null) token = token.next;
|
||||
else token = token.next = token_source.getNextToken();
|
||||
|
@ -1564,7 +1605,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
|
||||
static private final class LookaheadSuccess extends java.lang.Error { }
|
||||
final private LookaheadSuccess jj_ls = new LookaheadSuccess();
|
||||
final private boolean jj_scan_token(int kind) {
|
||||
private boolean jj_scan_token(int kind) {
|
||||
if (jj_scanpos == jj_lastpos) {
|
||||
jj_la--;
|
||||
if (jj_scanpos.next == null) {
|
||||
|
@ -1585,6 +1626,8 @@ public class QueryParser implements QueryParserConstants {
|
|||
return false;
|
||||
}
|
||||
|
||||
|
||||
/** Get the next Token. */
|
||||
final public Token getNextToken() {
|
||||
if (token.next != null) token = token.next;
|
||||
else token = token.next = token_source.getNextToken();
|
||||
|
@ -1593,8 +1636,9 @@ public class QueryParser implements QueryParserConstants {
|
|||
return token;
|
||||
}
|
||||
|
||||
/** Get the specific Token. */
|
||||
final public Token getToken(int index) {
|
||||
Token t = lookingAhead ? jj_scanpos : token;
|
||||
Token t = token;
|
||||
for (int i = 0; i < index; i++) {
|
||||
if (t.next != null) t = t.next;
|
||||
else t = t.next = token_source.getNextToken();
|
||||
|
@ -1602,14 +1646,14 @@ public class QueryParser implements QueryParserConstants {
|
|||
return t;
|
||||
}
|
||||
|
||||
final private int jj_ntk() {
|
||||
private int jj_ntk() {
|
||||
if ((jj_nt=token.next) == null)
|
||||
return (jj_ntk = (token.next=token_source.getNextToken()).kind);
|
||||
else
|
||||
return (jj_ntk = jj_nt.kind);
|
||||
}
|
||||
|
||||
private java.util.Vector jj_expentries = new java.util.Vector();
|
||||
private java.util.List jj_expentries = new java.util.ArrayList();
|
||||
private int[] jj_expentry;
|
||||
private int jj_kind = -1;
|
||||
private int[] jj_lasttokens = new int[100];
|
||||
|
@ -1624,31 +1668,26 @@ public class QueryParser implements QueryParserConstants {
|
|||
for (int i = 0; i < jj_endpos; i++) {
|
||||
jj_expentry[i] = jj_lasttokens[i];
|
||||
}
|
||||
boolean exists = false;
|
||||
for (java.util.Enumeration e = jj_expentries.elements(); e.hasMoreElements();) {
|
||||
int[] oldentry = (int[])(e.nextElement());
|
||||
jj_entries_loop: for (java.util.Iterator it = jj_expentries.iterator(); it.hasNext();) {
|
||||
int[] oldentry = (int[])(it.next());
|
||||
if (oldentry.length == jj_expentry.length) {
|
||||
exists = true;
|
||||
for (int i = 0; i < jj_expentry.length; i++) {
|
||||
if (oldentry[i] != jj_expentry[i]) {
|
||||
exists = false;
|
||||
break;
|
||||
continue jj_entries_loop;
|
||||
}
|
||||
}
|
||||
if (exists) break;
|
||||
jj_expentries.add(jj_expentry);
|
||||
break jj_entries_loop;
|
||||
}
|
||||
}
|
||||
if (!exists) jj_expentries.addElement(jj_expentry);
|
||||
if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind;
|
||||
}
|
||||
}
|
||||
|
||||
/** Generate ParseException. */
|
||||
public ParseException generateParseException() {
|
||||
jj_expentries.removeAllElements();
|
||||
jj_expentries.clear();
|
||||
boolean[] la1tokens = new boolean[34];
|
||||
for (int i = 0; i < 34; i++) {
|
||||
la1tokens[i] = false;
|
||||
}
|
||||
if (jj_kind >= 0) {
|
||||
la1tokens[jj_kind] = true;
|
||||
jj_kind = -1;
|
||||
|
@ -1669,7 +1708,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
if (la1tokens[i]) {
|
||||
jj_expentry = new int[1];
|
||||
jj_expentry[0] = i;
|
||||
jj_expentries.addElement(jj_expentry);
|
||||
jj_expentries.add(jj_expentry);
|
||||
}
|
||||
}
|
||||
jj_endpos = 0;
|
||||
|
@ -1677,18 +1716,20 @@ public class QueryParser implements QueryParserConstants {
|
|||
jj_add_error_token(0, 0);
|
||||
int[][] exptokseq = new int[jj_expentries.size()][];
|
||||
for (int i = 0; i < jj_expentries.size(); i++) {
|
||||
exptokseq[i] = (int[])jj_expentries.elementAt(i);
|
||||
exptokseq[i] = (int[])jj_expentries.get(i);
|
||||
}
|
||||
return new ParseException(token, exptokseq, tokenImage);
|
||||
}
|
||||
|
||||
/** Enable tracing. */
|
||||
final public void enable_tracing() {
|
||||
}
|
||||
|
||||
/** Disable tracing. */
|
||||
final public void disable_tracing() {
|
||||
}
|
||||
|
||||
final private void jj_rescan_token() {
|
||||
private void jj_rescan_token() {
|
||||
jj_rescan = true;
|
||||
for (int i = 0; i < 1; i++) {
|
||||
try {
|
||||
|
@ -1707,7 +1748,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
jj_rescan = false;
|
||||
}
|
||||
|
||||
final private void jj_save(int index, int xla) {
|
||||
private void jj_save(int index, int xla) {
|
||||
JJCalls p = jj_2_rtns[index];
|
||||
while (p.gen > jj_gen) {
|
||||
if (p.next == null) { p = p.next = new JJCalls(); break; }
|
||||
|
|
|
@ -28,6 +28,7 @@ package org.apache.lucene.queryParser;
|
|||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.text.DateFormat;
|
||||
import java.text.Collator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
|
@ -159,6 +160,10 @@ public class QueryParser {
|
|||
// maps field names to date resolutions
|
||||
Map fieldToDateResolution = null;
|
||||
|
||||
// The collator to use when determining range inclusion,
|
||||
// for use when constructing RangeQuerys and ConstantScoreRangeQuerys.
|
||||
Collator rangeCollator = null;
|
||||
|
||||
/** The default operator for parsing queries.
|
||||
* Use {@link QueryParser#setDefaultOperator} to change it.
|
||||
*/
|
||||
|
@ -434,6 +439,35 @@ public class QueryParser {
|
|||
|
||||
return resolution;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the collator used to determine index term inclusion in ranges
|
||||
* specified either for ConstantScoreRangeQuerys or RangeQuerys (if
|
||||
* {@link #setUseOldRangeQuery(boolean)} is called with a <code>true</code>
|
||||
* value.)
|
||||
* <p/>
|
||||
* <strong>WARNING:</strong> Setting the rangeCollator to a non-null
|
||||
* collator using this method will cause every single index Term in the
|
||||
* Field referenced by lowerTerm and/or upperTerm to be examined.
|
||||
* Depending on the number of index Terms in this Field, the operation could
|
||||
* be very slow.
|
||||
*
|
||||
* @param rc the collator to use when constructing RangeQuerys
|
||||
* and ConstantScoreRangeQuerys
|
||||
*/
|
||||
public void setRangeCollator(Collator rc) {
|
||||
rangeCollator = rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the collator used to determine index term inclusion in ranges
|
||||
* specified either for ConstantScoreRangeQuerys or RangeQuerys (if
|
||||
* {@link #setUseOldRangeQuery(boolean)} is called with a <code>true</code>
|
||||
* value.)
|
||||
*/
|
||||
public Collator getRangeCollator() {
|
||||
return rangeCollator;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated use {@link #addClause(List, int, int, Query)} instead.
|
||||
|
@ -738,11 +772,12 @@ public class QueryParser {
|
|||
{
|
||||
return new RangeQuery(new Term(field, part1),
|
||||
new Term(field, part2),
|
||||
inclusive);
|
||||
inclusive, rangeCollator);
|
||||
}
|
||||
else
|
||||
{
|
||||
return new ConstantScoreRangeQuery(field,part1,part2,inclusive,inclusive);
|
||||
return new ConstantScoreRangeQuery
|
||||
(field, part1, part2, inclusive, inclusive, rangeCollator);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,47 +1,90 @@
|
|||
/* Generated By:JavaCC: Do not edit this line. QueryParserConstants.java */
|
||||
package org.apache.lucene.queryParser;
|
||||
|
||||
|
||||
/**
|
||||
* Token literal values and constants.
|
||||
* Generated by org.javacc.parser.OtherFilesGen#start()
|
||||
*/
|
||||
public interface QueryParserConstants {
|
||||
|
||||
/** End of File. */
|
||||
int EOF = 0;
|
||||
/** RegularExpression Id. */
|
||||
int _NUM_CHAR = 1;
|
||||
/** RegularExpression Id. */
|
||||
int _ESCAPED_CHAR = 2;
|
||||
/** RegularExpression Id. */
|
||||
int _TERM_START_CHAR = 3;
|
||||
/** RegularExpression Id. */
|
||||
int _TERM_CHAR = 4;
|
||||
/** RegularExpression Id. */
|
||||
int _WHITESPACE = 5;
|
||||
/** RegularExpression Id. */
|
||||
int _QUOTED_CHAR = 6;
|
||||
/** RegularExpression Id. */
|
||||
int AND = 8;
|
||||
/** RegularExpression Id. */
|
||||
int OR = 9;
|
||||
/** RegularExpression Id. */
|
||||
int NOT = 10;
|
||||
/** RegularExpression Id. */
|
||||
int PLUS = 11;
|
||||
/** RegularExpression Id. */
|
||||
int MINUS = 12;
|
||||
/** RegularExpression Id. */
|
||||
int LPAREN = 13;
|
||||
/** RegularExpression Id. */
|
||||
int RPAREN = 14;
|
||||
/** RegularExpression Id. */
|
||||
int COLON = 15;
|
||||
/** RegularExpression Id. */
|
||||
int STAR = 16;
|
||||
/** RegularExpression Id. */
|
||||
int CARAT = 17;
|
||||
/** RegularExpression Id. */
|
||||
int QUOTED = 18;
|
||||
/** RegularExpression Id. */
|
||||
int TERM = 19;
|
||||
/** RegularExpression Id. */
|
||||
int FUZZY_SLOP = 20;
|
||||
/** RegularExpression Id. */
|
||||
int PREFIXTERM = 21;
|
||||
/** RegularExpression Id. */
|
||||
int WILDTERM = 22;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEIN_START = 23;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEEX_START = 24;
|
||||
/** RegularExpression Id. */
|
||||
int NUMBER = 25;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEIN_TO = 26;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEIN_END = 27;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEIN_QUOTED = 28;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEIN_GOOP = 29;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEEX_TO = 30;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEEX_END = 31;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEEX_QUOTED = 32;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEEX_GOOP = 33;
|
||||
|
||||
/** Lexical state. */
|
||||
int Boost = 0;
|
||||
/** Lexical state. */
|
||||
int RangeEx = 1;
|
||||
/** Lexical state. */
|
||||
int RangeIn = 2;
|
||||
/** Lexical state. */
|
||||
int DEFAULT = 3;
|
||||
|
||||
/** Literal token values. */
|
||||
String[] tokenImage = {
|
||||
"<EOF>",
|
||||
"<_NUM_CHAR>",
|
||||
|
|
|
@ -3,6 +3,7 @@ package org.apache.lucene.queryParser;
|
|||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.text.DateFormat;
|
||||
import java.text.Collator;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
|
@ -30,9 +31,13 @@ import org.apache.lucene.search.TermQuery;
|
|||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.util.Parameter;
|
||||
|
||||
/** Token Manager. */
|
||||
public class QueryParserTokenManager implements QueryParserConstants
|
||||
{
|
||||
|
||||
/** Debug output. */
|
||||
public java.io.PrintStream debugStream = System.out;
|
||||
/** Set debug output. */
|
||||
public void setDebugStream(java.io.PrintStream ds) { debugStream = ds; }
|
||||
private final int jjStopStringLiteralDfa_3(int pos, long active0)
|
||||
{
|
||||
|
@ -46,21 +51,13 @@ private final int jjStartNfa_3(int pos, long active0)
|
|||
{
|
||||
return jjMoveNfa_3(jjStopStringLiteralDfa_3(pos, active0), pos + 1);
|
||||
}
|
||||
private final int jjStopAtPos(int pos, int kind)
|
||||
private int jjStopAtPos(int pos, int kind)
|
||||
{
|
||||
jjmatchedKind = kind;
|
||||
jjmatchedPos = pos;
|
||||
return pos + 1;
|
||||
}
|
||||
private final int jjStartNfaWithStates_3(int pos, int kind, int state)
|
||||
{
|
||||
jjmatchedKind = kind;
|
||||
jjmatchedPos = pos;
|
||||
try { curChar = input_stream.readChar(); }
|
||||
catch(java.io.IOException e) { return pos + 1; }
|
||||
return jjMoveNfa_3(state, pos + 1);
|
||||
}
|
||||
private final int jjMoveStringLiteralDfa0_3()
|
||||
private int jjMoveStringLiteralDfa0_3()
|
||||
{
|
||||
switch(curChar)
|
||||
{
|
||||
|
@ -86,35 +83,13 @@ private final int jjMoveStringLiteralDfa0_3()
|
|||
return jjMoveNfa_3(0, 0);
|
||||
}
|
||||
}
|
||||
private final void jjCheckNAdd(int state)
|
||||
private int jjStartNfaWithStates_3(int pos, int kind, int state)
|
||||
{
|
||||
if (jjrounds[state] != jjround)
|
||||
{
|
||||
jjstateSet[jjnewStateCnt++] = state;
|
||||
jjrounds[state] = jjround;
|
||||
}
|
||||
}
|
||||
private final void jjAddStates(int start, int end)
|
||||
{
|
||||
do {
|
||||
jjstateSet[jjnewStateCnt++] = jjnextStates[start];
|
||||
} while (start++ != end);
|
||||
}
|
||||
private final void jjCheckNAddTwoStates(int state1, int state2)
|
||||
{
|
||||
jjCheckNAdd(state1);
|
||||
jjCheckNAdd(state2);
|
||||
}
|
||||
private final void jjCheckNAddStates(int start, int end)
|
||||
{
|
||||
do {
|
||||
jjCheckNAdd(jjnextStates[start]);
|
||||
} while (start++ != end);
|
||||
}
|
||||
private final void jjCheckNAddStates(int start)
|
||||
{
|
||||
jjCheckNAdd(jjnextStates[start]);
|
||||
jjCheckNAdd(jjnextStates[start + 1]);
|
||||
jjmatchedKind = kind;
|
||||
jjmatchedPos = pos;
|
||||
try { curChar = input_stream.readChar(); }
|
||||
catch(java.io.IOException e) { return pos + 1; }
|
||||
return jjMoveNfa_3(state, pos + 1);
|
||||
}
|
||||
static final long[] jjbitVec0 = {
|
||||
0xfffffffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL
|
||||
|
@ -122,14 +97,13 @@ static final long[] jjbitVec0 = {
|
|||
static final long[] jjbitVec2 = {
|
||||
0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL
|
||||
};
|
||||
private final int jjMoveNfa_3(int startState, int curPos)
|
||||
private int jjMoveNfa_3(int startState, int curPos)
|
||||
{
|
||||
int[] nextStates;
|
||||
int startsAt = 0;
|
||||
jjnewStateCnt = 36;
|
||||
int i = 1;
|
||||
jjstateSet[0] = startState;
|
||||
int j, kind = 0x7fffffff;
|
||||
int kind = 0x7fffffff;
|
||||
for (;;)
|
||||
{
|
||||
if (++jjround == 0x7fffffff)
|
||||
|
@ -137,7 +111,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
if (curChar < 64)
|
||||
{
|
||||
long l = 1L << curChar;
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
|
@ -276,7 +250,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
else if (curChar < 128)
|
||||
{
|
||||
long l = 1L << (curChar & 077);
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
|
@ -450,7 +424,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
|||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
long l2 = 1L << (curChar & 077);
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
|
@ -545,15 +519,7 @@ private final int jjStartNfa_1(int pos, long active0)
|
|||
{
|
||||
return jjMoveNfa_1(jjStopStringLiteralDfa_1(pos, active0), pos + 1);
|
||||
}
|
||||
private final int jjStartNfaWithStates_1(int pos, int kind, int state)
|
||||
{
|
||||
jjmatchedKind = kind;
|
||||
jjmatchedPos = pos;
|
||||
try { curChar = input_stream.readChar(); }
|
||||
catch(java.io.IOException e) { return pos + 1; }
|
||||
return jjMoveNfa_1(state, pos + 1);
|
||||
}
|
||||
private final int jjMoveStringLiteralDfa0_1()
|
||||
private int jjMoveStringLiteralDfa0_1()
|
||||
{
|
||||
switch(curChar)
|
||||
{
|
||||
|
@ -565,7 +531,7 @@ private final int jjMoveStringLiteralDfa0_1()
|
|||
return jjMoveNfa_1(0, 0);
|
||||
}
|
||||
}
|
||||
private final int jjMoveStringLiteralDfa1_1(long active0)
|
||||
private int jjMoveStringLiteralDfa1_1(long active0)
|
||||
{
|
||||
try { curChar = input_stream.readChar(); }
|
||||
catch(java.io.IOException e) {
|
||||
|
@ -583,14 +549,21 @@ private final int jjMoveStringLiteralDfa1_1(long active0)
|
|||
}
|
||||
return jjStartNfa_1(0, active0);
|
||||
}
|
||||
private final int jjMoveNfa_1(int startState, int curPos)
|
||||
private int jjStartNfaWithStates_1(int pos, int kind, int state)
|
||||
{
|
||||
jjmatchedKind = kind;
|
||||
jjmatchedPos = pos;
|
||||
try { curChar = input_stream.readChar(); }
|
||||
catch(java.io.IOException e) { return pos + 1; }
|
||||
return jjMoveNfa_1(state, pos + 1);
|
||||
}
|
||||
private int jjMoveNfa_1(int startState, int curPos)
|
||||
{
|
||||
int[] nextStates;
|
||||
int startsAt = 0;
|
||||
jjnewStateCnt = 7;
|
||||
int i = 1;
|
||||
jjstateSet[0] = startState;
|
||||
int j, kind = 0x7fffffff;
|
||||
int kind = 0x7fffffff;
|
||||
for (;;)
|
||||
{
|
||||
if (++jjround == 0x7fffffff)
|
||||
|
@ -598,7 +571,7 @@ private final int jjMoveNfa_1(int startState, int curPos)
|
|||
if (curChar < 64)
|
||||
{
|
||||
long l = 1L << curChar;
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
|
@ -647,7 +620,7 @@ private final int jjMoveNfa_1(int startState, int curPos)
|
|||
else if (curChar < 128)
|
||||
{
|
||||
long l = 1L << (curChar & 077);
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
|
@ -677,7 +650,7 @@ private final int jjMoveNfa_1(int startState, int curPos)
|
|||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
long l2 = 1L << (curChar & 077);
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
|
@ -710,18 +683,17 @@ private final int jjMoveNfa_1(int startState, int curPos)
|
|||
catch(java.io.IOException e) { return curPos; }
|
||||
}
|
||||
}
|
||||
private final int jjMoveStringLiteralDfa0_0()
|
||||
private int jjMoveStringLiteralDfa0_0()
|
||||
{
|
||||
return jjMoveNfa_0(0, 0);
|
||||
}
|
||||
private final int jjMoveNfa_0(int startState, int curPos)
|
||||
private int jjMoveNfa_0(int startState, int curPos)
|
||||
{
|
||||
int[] nextStates;
|
||||
int startsAt = 0;
|
||||
jjnewStateCnt = 3;
|
||||
int i = 1;
|
||||
jjstateSet[0] = startState;
|
||||
int j, kind = 0x7fffffff;
|
||||
int kind = 0x7fffffff;
|
||||
for (;;)
|
||||
{
|
||||
if (++jjround == 0x7fffffff)
|
||||
|
@ -729,7 +701,7 @@ private final int jjMoveNfa_0(int startState, int curPos)
|
|||
if (curChar < 64)
|
||||
{
|
||||
long l = 1L << curChar;
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
|
@ -758,7 +730,7 @@ private final int jjMoveNfa_0(int startState, int curPos)
|
|||
else if (curChar < 128)
|
||||
{
|
||||
long l = 1L << (curChar & 077);
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
|
@ -773,7 +745,7 @@ private final int jjMoveNfa_0(int startState, int curPos)
|
|||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
long l2 = 1L << (curChar & 077);
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
|
@ -813,15 +785,7 @@ private final int jjStartNfa_2(int pos, long active0)
|
|||
{
|
||||
return jjMoveNfa_2(jjStopStringLiteralDfa_2(pos, active0), pos + 1);
|
||||
}
|
||||
private final int jjStartNfaWithStates_2(int pos, int kind, int state)
|
||||
{
|
||||
jjmatchedKind = kind;
|
||||
jjmatchedPos = pos;
|
||||
try { curChar = input_stream.readChar(); }
|
||||
catch(java.io.IOException e) { return pos + 1; }
|
||||
return jjMoveNfa_2(state, pos + 1);
|
||||
}
|
||||
private final int jjMoveStringLiteralDfa0_2()
|
||||
private int jjMoveStringLiteralDfa0_2()
|
||||
{
|
||||
switch(curChar)
|
||||
{
|
||||
|
@ -833,7 +797,7 @@ private final int jjMoveStringLiteralDfa0_2()
|
|||
return jjMoveNfa_2(0, 0);
|
||||
}
|
||||
}
|
||||
private final int jjMoveStringLiteralDfa1_2(long active0)
|
||||
private int jjMoveStringLiteralDfa1_2(long active0)
|
||||
{
|
||||
try { curChar = input_stream.readChar(); }
|
||||
catch(java.io.IOException e) {
|
||||
|
@ -851,14 +815,21 @@ private final int jjMoveStringLiteralDfa1_2(long active0)
|
|||
}
|
||||
return jjStartNfa_2(0, active0);
|
||||
}
|
||||
private final int jjMoveNfa_2(int startState, int curPos)
|
||||
private int jjStartNfaWithStates_2(int pos, int kind, int state)
|
||||
{
|
||||
jjmatchedKind = kind;
|
||||
jjmatchedPos = pos;
|
||||
try { curChar = input_stream.readChar(); }
|
||||
catch(java.io.IOException e) { return pos + 1; }
|
||||
return jjMoveNfa_2(state, pos + 1);
|
||||
}
|
||||
private int jjMoveNfa_2(int startState, int curPos)
|
||||
{
|
||||
int[] nextStates;
|
||||
int startsAt = 0;
|
||||
jjnewStateCnt = 7;
|
||||
int i = 1;
|
||||
jjstateSet[0] = startState;
|
||||
int j, kind = 0x7fffffff;
|
||||
int kind = 0x7fffffff;
|
||||
for (;;)
|
||||
{
|
||||
if (++jjround == 0x7fffffff)
|
||||
|
@ -866,7 +837,7 @@ private final int jjMoveNfa_2(int startState, int curPos)
|
|||
if (curChar < 64)
|
||||
{
|
||||
long l = 1L << curChar;
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
|
@ -915,7 +886,7 @@ private final int jjMoveNfa_2(int startState, int curPos)
|
|||
else if (curChar < 128)
|
||||
{
|
||||
long l = 1L << (curChar & 077);
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
|
@ -945,7 +916,7 @@ private final int jjMoveNfa_2(int startState, int curPos)
|
|||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
long l2 = 1L << (curChar & 077);
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
|
@ -988,22 +959,28 @@ private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, lo
|
|||
{
|
||||
case 0:
|
||||
return ((jjbitVec2[i2] & l2) != 0L);
|
||||
default :
|
||||
default :
|
||||
if ((jjbitVec0[i1] & l1) != 0L)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Token literal values. */
|
||||
public static final String[] jjstrLiteralImages = {
|
||||
"", null, null, null, null, null, null, null, null, null, null, "\53", "\55",
|
||||
"\50", "\51", "\72", "\52", "\136", null, null, null, null, null, "\133", "\173",
|
||||
null, "\124\117", "\135", null, null, "\124\117", "\175", null, null, };
|
||||
|
||||
/** Lexer state names. */
|
||||
public static final String[] lexStateNames = {
|
||||
"Boost",
|
||||
"RangeEx",
|
||||
"RangeIn",
|
||||
"DEFAULT",
|
||||
"Boost",
|
||||
"RangeEx",
|
||||
"RangeIn",
|
||||
"DEFAULT",
|
||||
};
|
||||
|
||||
/** Lex State array. */
|
||||
public static final int[] jjnewLexState = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1,
|
||||
3, -1, 3, -1, -1, -1, 3, -1, -1,
|
||||
|
@ -1018,13 +995,18 @@ protected CharStream input_stream;
|
|||
private final int[] jjrounds = new int[36];
|
||||
private final int[] jjstateSet = new int[72];
|
||||
protected char curChar;
|
||||
/** Constructor. */
|
||||
public QueryParserTokenManager(CharStream stream){
|
||||
input_stream = stream;
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public QueryParserTokenManager(CharStream stream, int lexState){
|
||||
this(stream);
|
||||
SwitchTo(lexState);
|
||||
}
|
||||
|
||||
/** Reinitialise parser. */
|
||||
public void ReInit(CharStream stream)
|
||||
{
|
||||
jjmatchedPos = jjnewStateCnt = 0;
|
||||
|
@ -1032,18 +1014,22 @@ public void ReInit(CharStream stream)
|
|||
input_stream = stream;
|
||||
ReInitRounds();
|
||||
}
|
||||
private final void ReInitRounds()
|
||||
private void ReInitRounds()
|
||||
{
|
||||
int i;
|
||||
jjround = 0x80000001;
|
||||
for (i = 36; i-- > 0;)
|
||||
jjrounds[i] = 0x80000000;
|
||||
}
|
||||
|
||||
/** Reinitialise parser. */
|
||||
public void ReInit(CharStream stream, int lexState)
|
||||
{
|
||||
ReInit(stream);
|
||||
SwitchTo(lexState);
|
||||
}
|
||||
|
||||
/** Switch to specified lex state. */
|
||||
public void SwitchTo(int lexState)
|
||||
{
|
||||
if (lexState >= 4 || lexState < 0)
|
||||
|
@ -1054,14 +1040,25 @@ public void SwitchTo(int lexState)
|
|||
|
||||
protected Token jjFillToken()
|
||||
{
|
||||
Token t = Token.newToken(jjmatchedKind);
|
||||
t.kind = jjmatchedKind;
|
||||
final Token t;
|
||||
final String curTokenImage;
|
||||
final int beginLine;
|
||||
final int endLine;
|
||||
final int beginColumn;
|
||||
final int endColumn;
|
||||
String im = jjstrLiteralImages[jjmatchedKind];
|
||||
t.image = (im == null) ? input_stream.GetImage() : im;
|
||||
t.beginLine = input_stream.getBeginLine();
|
||||
t.beginColumn = input_stream.getBeginColumn();
|
||||
t.endLine = input_stream.getEndLine();
|
||||
t.endColumn = input_stream.getEndColumn();
|
||||
curTokenImage = (im == null) ? input_stream.GetImage() : im;
|
||||
beginLine = input_stream.getBeginLine();
|
||||
beginColumn = input_stream.getBeginColumn();
|
||||
endLine = input_stream.getEndLine();
|
||||
endColumn = input_stream.getEndColumn();
|
||||
t = Token.newToken(jjmatchedKind, curTokenImage);
|
||||
|
||||
t.beginLine = beginLine;
|
||||
t.endLine = endLine;
|
||||
t.beginColumn = beginColumn;
|
||||
t.endColumn = endColumn;
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
|
@ -1072,22 +1069,21 @@ int jjround;
|
|||
int jjmatchedPos;
|
||||
int jjmatchedKind;
|
||||
|
||||
/** Get the next Token. */
|
||||
public Token getNextToken()
|
||||
{
|
||||
int kind;
|
||||
Token specialToken = null;
|
||||
Token matchedToken;
|
||||
int curPos = 0;
|
||||
|
||||
EOFLoop :
|
||||
for (;;)
|
||||
{
|
||||
try
|
||||
{
|
||||
{
|
||||
try
|
||||
{
|
||||
curChar = input_stream.BeginToken();
|
||||
}
|
||||
}
|
||||
catch(java.io.IOException e)
|
||||
{
|
||||
{
|
||||
jjmatchedKind = 0;
|
||||
matchedToken = jjFillToken();
|
||||
return matchedToken;
|
||||
|
@ -1157,4 +1153,31 @@ public Token getNextToken()
|
|||
}
|
||||
}
|
||||
|
||||
private void jjCheckNAdd(int state)
|
||||
{
|
||||
if (jjrounds[state] != jjround)
|
||||
{
|
||||
jjstateSet[jjnewStateCnt++] = state;
|
||||
jjrounds[state] = jjround;
|
||||
}
|
||||
}
|
||||
private void jjAddStates(int start, int end)
|
||||
{
|
||||
do {
|
||||
jjstateSet[jjnewStateCnt++] = jjnextStates[start];
|
||||
} while (start++ != end);
|
||||
}
|
||||
private void jjCheckNAddTwoStates(int state1, int state2)
|
||||
{
|
||||
jjCheckNAdd(state1);
|
||||
jjCheckNAdd(state2);
|
||||
}
|
||||
|
||||
private void jjCheckNAddStates(int start, int end)
|
||||
{
|
||||
do {
|
||||
jjCheckNAdd(jjnextStates[start]);
|
||||
} while (start++ != end);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
/* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */
|
||||
/* Generated By:JavaCC: Do not edit this line. Token.java Version 4.1 */
|
||||
/* JavaCCOptions:TOKEN_EXTENDS=,KEEP_LINE_COL=null */
|
||||
package org.apache.lucene.queryParser;
|
||||
|
||||
/**
|
||||
|
@ -14,12 +15,14 @@ public class Token {
|
|||
*/
|
||||
public int kind;
|
||||
|
||||
/**
|
||||
* beginLine and beginColumn describe the position of the first character
|
||||
* of this token; endLine and endColumn describe the position of the
|
||||
* last character of this token.
|
||||
*/
|
||||
public int beginLine, beginColumn, endLine, endColumn;
|
||||
/** The line number of the first character of this Token. */
|
||||
public int beginLine;
|
||||
/** The column number of the first character of this Token. */
|
||||
public int beginColumn;
|
||||
/** The line number of the last character of this Token. */
|
||||
public int endLine;
|
||||
/** The column number of the last character of this Token. */
|
||||
public int endColumn;
|
||||
|
||||
/**
|
||||
* The string image of the token.
|
||||
|
@ -50,6 +53,40 @@ public class Token {
|
|||
*/
|
||||
public Token specialToken;
|
||||
|
||||
/**
|
||||
* An optional attribute value of the Token.
|
||||
* Tokens which are not used as syntactic sugar will often contain
|
||||
* meaningful values that will be used later on by the compiler or
|
||||
* interpreter. This attribute value is often different from the image.
|
||||
* Any subclass of Token that actually wants to return a non-null value can
|
||||
* override this method as appropriate.
|
||||
*/
|
||||
public Object getValue() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* No-argument constructor
|
||||
*/
|
||||
public Token() {}
|
||||
|
||||
/**
|
||||
* Constructs a new token for the specified Image.
|
||||
*/
|
||||
public Token(int kind)
|
||||
{
|
||||
this(kind, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new token for the specified Image and Kind.
|
||||
*/
|
||||
public Token(int kind, String image)
|
||||
{
|
||||
this.kind = kind;
|
||||
this.image = image;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the image.
|
||||
*/
|
||||
|
@ -63,19 +100,25 @@ public class Token {
|
|||
* can create and return subclass objects based on the value of ofKind.
|
||||
* Simply add the cases to the switch for all those special cases.
|
||||
* For example, if you have a subclass of Token called IDToken that
|
||||
* you want to create if ofKind is ID, simlpy add something like :
|
||||
* you want to create if ofKind is ID, simply add something like :
|
||||
*
|
||||
* case MyParserConstants.ID : return new IDToken();
|
||||
* case MyParserConstants.ID : return new IDToken(ofKind, image);
|
||||
*
|
||||
* to the following switch statement. Then you can cast matchedToken
|
||||
* variable to the appropriate type and use it in your lexical actions.
|
||||
* variable to the appropriate type and use sit in your lexical actions.
|
||||
*/
|
||||
public static final Token newToken(int ofKind)
|
||||
public static Token newToken(int ofKind, String image)
|
||||
{
|
||||
switch(ofKind)
|
||||
{
|
||||
default : return new Token();
|
||||
default : return new Token(ofKind, image);
|
||||
}
|
||||
}
|
||||
|
||||
public static Token newToken(int ofKind)
|
||||
{
|
||||
return newToken(ofKind, null);
|
||||
}
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=c147cc166a7cf8812c7c39bc8c5eb868 (do not edit this line) */
|
||||
|
|
|
@ -1,19 +1,22 @@
|
|||
/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 3.0 */
|
||||
/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 4.1 */
|
||||
/* JavaCCOptions: */
|
||||
package org.apache.lucene.queryParser;
|
||||
|
||||
/** Token Manager Error. */
|
||||
public class TokenMgrError extends Error
|
||||
{
|
||||
|
||||
/*
|
||||
* Ordinals for various reasons why an Error of this type can be thrown.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Lexical error occured.
|
||||
* Lexical error occurred.
|
||||
*/
|
||||
static final int LEXICAL_ERROR = 0;
|
||||
|
||||
/**
|
||||
* An attempt wass made to create a second instance of a static token manager.
|
||||
* An attempt was made to create a second instance of a static token manager.
|
||||
*/
|
||||
static final int STATIC_LEXER_ERROR = 1;
|
||||
|
||||
|
@ -34,7 +37,7 @@ public class TokenMgrError extends Error
|
|||
int errorCode;
|
||||
|
||||
/**
|
||||
* Replaces unprintable characters by their espaced (or unicode escaped)
|
||||
* Replaces unprintable characters by their escaped (or unicode escaped)
|
||||
* equivalents in the given string
|
||||
*/
|
||||
protected static final String addEscapes(String str) {
|
||||
|
@ -85,12 +88,12 @@ public class TokenMgrError extends Error
|
|||
/**
|
||||
* Returns a detailed message for the Error when it is thrown by the
|
||||
* token manager to indicate a lexical error.
|
||||
* Parameters :
|
||||
* EOFSeen : indicates if EOF caused the lexicl error
|
||||
* curLexState : lexical state in which this error occured
|
||||
* errorLine : line number when the error occured
|
||||
* errorColumn : column number when the error occured
|
||||
* errorAfter : prefix that was seen before this error occured
|
||||
* Parameters :
|
||||
* EOFSeen : indicates if EOF caused the lexical error
|
||||
* curLexState : lexical state in which this error occurred
|
||||
* errorLine : line number when the error occurred
|
||||
* errorColumn : column number when the error occurred
|
||||
* errorAfter : prefix that was seen before this error occurred
|
||||
* curchar : the offending character
|
||||
* Note: You can customize the lexical error message by modifying this method.
|
||||
*/
|
||||
|
@ -105,7 +108,7 @@ public class TokenMgrError extends Error
|
|||
/**
|
||||
* You can also modify the body of this method to customize your error messages.
|
||||
* For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not
|
||||
* of end-users concern, so you can return something like :
|
||||
* of end-users concern, so you can return something like :
|
||||
*
|
||||
* "Internal Error : Please file a bug report .... "
|
||||
*
|
||||
|
@ -119,15 +122,19 @@ public class TokenMgrError extends Error
|
|||
* Constructors of various flavors follow.
|
||||
*/
|
||||
|
||||
/** No arg constructor. */
|
||||
public TokenMgrError() {
|
||||
}
|
||||
|
||||
/** Constructor with message and reason. */
|
||||
public TokenMgrError(String message, int reason) {
|
||||
super(message);
|
||||
errorCode = reason;
|
||||
}
|
||||
|
||||
/** Full Constructor. */
|
||||
public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) {
|
||||
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
|
||||
}
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=186d5bcc64733844c7daab5ad5a6e349 (do not edit this line) */
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.search;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.Collator;
|
||||
|
||||
/**
|
||||
* A range query that returns a constant score equal to its boost for
|
||||
|
@ -42,6 +43,7 @@ public class ConstantScoreRangeQuery extends Query
|
|||
private final String upperVal;
|
||||
private final boolean includeLower;
|
||||
private final boolean includeUpper;
|
||||
private Collator collator;
|
||||
|
||||
|
||||
public ConstantScoreRangeQuery(String fieldName, String lowerVal, String upperVal, boolean includeLower, boolean includeUpper)
|
||||
|
@ -65,6 +67,14 @@ public class ConstantScoreRangeQuery extends Query
|
|||
this.includeUpper = includeUpper;
|
||||
}
|
||||
|
||||
public ConstantScoreRangeQuery(String fieldName, String lowerVal,
|
||||
String upperVal, boolean includeLower,
|
||||
boolean includeUpper, Collator collator)
|
||||
{
|
||||
this(fieldName, lowerVal, upperVal, includeLower, includeUpper);
|
||||
this.collator = collator;
|
||||
}
|
||||
|
||||
/** Returns the field name for this query */
|
||||
public String getField() { return fieldName; }
|
||||
/** Returns the value of the lower endpoint of this range query, null if open ended */
|
||||
|
@ -78,9 +88,10 @@ public class ConstantScoreRangeQuery extends Query
|
|||
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
// Map to RangeFilter semantics which are slightly different...
|
||||
RangeFilter rangeFilt = new RangeFilter(fieldName,
|
||||
lowerVal!=null?lowerVal:"",
|
||||
upperVal, lowerVal==""?false:includeLower, upperVal==null?false:includeUpper);
|
||||
RangeFilter rangeFilt = new RangeFilter
|
||||
(fieldName, lowerVal != null?lowerVal:"", upperVal,
|
||||
lowerVal==""?false:includeLower, upperVal==null?false:includeUpper,
|
||||
collator);
|
||||
Query q = new ConstantScoreQuery(rangeFilt);
|
||||
q.setBoost(getBoost());
|
||||
return q;
|
||||
|
@ -117,6 +128,7 @@ public class ConstantScoreRangeQuery extends Query
|
|||
if (this.fieldName != other.fieldName // interned comparison
|
||||
|| this.includeLower != other.includeLower
|
||||
|| this.includeUpper != other.includeUpper
|
||||
|| (this.collator != null && ! this.collator.equals(other.collator))
|
||||
) { return false; }
|
||||
if (this.lowerVal != null ? !this.lowerVal.equals(other.lowerVal) : other.lowerVal != null) return false;
|
||||
if (this.upperVal != null ? !this.upperVal.equals(other.upperVal) : other.upperVal != null) return false;
|
||||
|
@ -134,6 +146,7 @@ public class ConstantScoreRangeQuery extends Query
|
|||
h ^= (upperVal != null ? (upperVal.hashCode()) : 0x5a695a69);
|
||||
h ^= (includeLower ? 0x665599aa : 0)
|
||||
^ (includeUpper ? 0x99aa5566 : 0);
|
||||
h ^= collator != null ? collator.hashCode() : 0;
|
||||
return h;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.util.OpenBitSet;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
import java.text.Collator;
|
||||
|
||||
/**
|
||||
* A Filter that restricts search results to a range of values in a given
|
||||
|
@ -42,8 +43,9 @@ public class RangeFilter extends Filter {
|
|||
private String upperTerm;
|
||||
private boolean includeLower;
|
||||
private boolean includeUpper;
|
||||
private Collator collator;
|
||||
|
||||
/**
|
||||
/**
|
||||
* @param fieldName The field this range applies to
|
||||
* @param lowerTerm The lower bound on this range
|
||||
* @param upperTerm The upper bound on this range
|
||||
|
@ -74,7 +76,31 @@ public class RangeFilter extends Filter {
|
|||
("The upper bound must be non-null to be inclusive");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* <strong>WARNING:</strong> Using this constructor and supplying a non-null
|
||||
* value in the <code>collator</code> parameter will cause every single
|
||||
* index Term in the Field referenced by lowerTerm and/or upperTerm to be
|
||||
* examined. Depending on the number of index Terms in this Field, the
|
||||
* operation could be very slow.
|
||||
*
|
||||
* @param lowerTerm The lower bound on this range
|
||||
* @param upperTerm The upper bound on this range
|
||||
* @param includeLower Does this range include the lower bound?
|
||||
* @param includeUpper Does this range include the upper bound?
|
||||
* @param collator The collator to use when determining range inclusion; set
|
||||
* to null to use Unicode code point ordering instead of collation.
|
||||
* @throws IllegalArgumentException if both terms are null or if
|
||||
* lowerTerm is null and includeLower is true (similar for upperTerm
|
||||
* and includeUpper)
|
||||
*/
|
||||
public RangeFilter(String fieldName, String lowerTerm, String upperTerm,
|
||||
boolean includeLower, boolean includeUpper,
|
||||
Collator collator) {
|
||||
this(fieldName, lowerTerm, upperTerm, includeLower, includeUpper);
|
||||
this.collator = collator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a filter for field <code>fieldName</code> matching
|
||||
* less than or equal to <code>upperTerm</code>.
|
||||
|
@ -100,7 +126,7 @@ public class RangeFilter extends Filter {
|
|||
public BitSet bits(IndexReader reader) throws IOException {
|
||||
BitSet bits = new BitSet(reader.maxDoc());
|
||||
TermEnum enumerator =
|
||||
(null != lowerTerm
|
||||
(null != lowerTerm && collator == null
|
||||
? reader.terms(new Term(fieldName, lowerTerm))
|
||||
: reader.terms(new Term(fieldName)));
|
||||
|
||||
|
@ -110,40 +136,61 @@ public class RangeFilter extends Filter {
|
|||
return bits;
|
||||
}
|
||||
|
||||
boolean checkLower = false;
|
||||
if (!includeLower) // make adjustments to set to exclusive
|
||||
checkLower = true;
|
||||
|
||||
TermDocs termDocs = reader.termDocs();
|
||||
try {
|
||||
|
||||
do {
|
||||
Term term = enumerator.term();
|
||||
if (term != null && term.field().equals(fieldName)) {
|
||||
if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
|
||||
checkLower = false;
|
||||
if (upperTerm != null) {
|
||||
int compare = upperTerm.compareTo(term.text());
|
||||
/* if beyond the upper term, or is exclusive and
|
||||
* this is equal to the upper term, break out */
|
||||
if ((compare < 0) ||
|
||||
(!includeUpper && compare==0)) {
|
||||
break;
|
||||
if (collator != null) {
|
||||
do {
|
||||
Term term = enumerator.term();
|
||||
if (term != null && term.field().equals(fieldName)) {
|
||||
if ((lowerTerm == null
|
||||
|| (includeLower
|
||||
? collator.compare(term.text(), lowerTerm) >= 0
|
||||
: collator.compare(term.text(), lowerTerm) > 0))
|
||||
&& (upperTerm == null
|
||||
|| (includeUpper
|
||||
? collator.compare(term.text(), upperTerm) <= 0
|
||||
: collator.compare(term.text(), upperTerm) < 0))) {
|
||||
/* we have a good term, find the docs */
|
||||
termDocs.seek(enumerator.term());
|
||||
while (termDocs.next()) {
|
||||
bits.set(termDocs.doc());
|
||||
}
|
||||
}
|
||||
/* we have a good term, find the docs */
|
||||
|
||||
termDocs.seek(enumerator.term());
|
||||
while (termDocs.next()) {
|
||||
bits.set(termDocs.doc());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
while (enumerator.next());
|
||||
} else { // collator is null - use Unicode code point ordering
|
||||
boolean checkLower = false;
|
||||
if (!includeLower) // make adjustments to set to exclusive
|
||||
checkLower = true;
|
||||
|
||||
do {
|
||||
Term term = enumerator.term();
|
||||
if (term != null && term.field().equals(fieldName)) {
|
||||
if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
|
||||
checkLower = false;
|
||||
if (upperTerm != null) {
|
||||
int compare = upperTerm.compareTo(term.text());
|
||||
/* if beyond the upper term, or is exclusive and
|
||||
* this is equal to the upper term, break out */
|
||||
if ((compare < 0) ||
|
||||
(!includeUpper && compare==0)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* we have a good term, find the docs */
|
||||
|
||||
termDocs.seek(enumerator.term());
|
||||
while (termDocs.next()) {
|
||||
bits.set(termDocs.doc());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
while (enumerator.next());
|
||||
}
|
||||
while (enumerator.next());
|
||||
|
||||
} finally {
|
||||
termDocs.close();
|
||||
}
|
||||
|
@ -162,7 +209,7 @@ public class RangeFilter extends Filter {
|
|||
OpenBitSet bits = new OpenBitSet(reader.maxDoc());
|
||||
|
||||
TermEnum enumerator =
|
||||
(null != lowerTerm
|
||||
(null != lowerTerm && collator == null
|
||||
? reader.terms(new Term(fieldName, lowerTerm))
|
||||
: reader.terms(new Term(fieldName)));
|
||||
|
||||
|
@ -171,40 +218,63 @@ public class RangeFilter extends Filter {
|
|||
if (enumerator.term() == null) {
|
||||
return bits;
|
||||
}
|
||||
|
||||
boolean checkLower = false;
|
||||
if (!includeLower) // make adjustments to set to exclusive
|
||||
checkLower = true;
|
||||
|
||||
|
||||
TermDocs termDocs = reader.termDocs();
|
||||
|
||||
try {
|
||||
|
||||
do {
|
||||
Term term = enumerator.term();
|
||||
if (term != null && term.field().equals(fieldName)) {
|
||||
if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
|
||||
checkLower = false;
|
||||
if (upperTerm != null) {
|
||||
int compare = upperTerm.compareTo(term.text());
|
||||
/* if beyond the upper term, or is exclusive and
|
||||
* this is equal to the upper term, break out */
|
||||
if ((compare < 0) ||
|
||||
(!includeUpper && compare==0)) {
|
||||
break;
|
||||
if (collator != null) {
|
||||
do {
|
||||
Term term = enumerator.term();
|
||||
if (term != null && term.field().equals(fieldName)) {
|
||||
if ((lowerTerm == null
|
||||
|| (includeLower
|
||||
? collator.compare(term.text(), lowerTerm) >= 0
|
||||
: collator.compare(term.text(), lowerTerm) > 0))
|
||||
&& (upperTerm == null
|
||||
|| (includeUpper
|
||||
? collator.compare(term.text(), upperTerm) <= 0
|
||||
: collator.compare(term.text(), upperTerm) < 0))) {
|
||||
/* we have a good term, find the docs */
|
||||
termDocs.seek(enumerator.term());
|
||||
while (termDocs.next()) {
|
||||
bits.set(termDocs.doc());
|
||||
}
|
||||
}
|
||||
/* we have a good term, find the docs */
|
||||
|
||||
termDocs.seek(enumerator.term());
|
||||
while (termDocs.next()) {
|
||||
bits.set(termDocs.doc());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
while (enumerator.next());
|
||||
} else { // collator is null - use Unicode code point ordering
|
||||
boolean checkLower = false;
|
||||
if (!includeLower) // make adjustments to set to exclusive
|
||||
checkLower = true;
|
||||
|
||||
do {
|
||||
Term term = enumerator.term();
|
||||
if (term != null && term.field().equals(fieldName)) {
|
||||
if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
|
||||
checkLower = false;
|
||||
if (upperTerm != null) {
|
||||
int compare = upperTerm.compareTo(term.text());
|
||||
/* if beyond the upper term, or is exclusive and
|
||||
* this is equal to the upper term, break out */
|
||||
if ((compare < 0) ||
|
||||
(!includeUpper && compare==0)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
/* we have a good term, find the docs */
|
||||
|
||||
termDocs.seek(enumerator.term());
|
||||
while (termDocs.next()) {
|
||||
bits.set(termDocs.doc());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
while (enumerator.next());
|
||||
}
|
||||
while (enumerator.next());
|
||||
|
||||
} finally {
|
||||
termDocs.close();
|
||||
|
@ -241,6 +311,7 @@ public class RangeFilter extends Filter {
|
|||
if (!this.fieldName.equals(other.fieldName)
|
||||
|| this.includeLower != other.includeLower
|
||||
|| this.includeUpper != other.includeUpper
|
||||
|| (this.collator != null && ! this.collator.equals(other.collator))
|
||||
) { return false; }
|
||||
if (this.lowerTerm != null ? !this.lowerTerm.equals(other.lowerTerm) : other.lowerTerm != null) return false;
|
||||
if (this.upperTerm != null ? !this.upperTerm.equals(other.upperTerm) : other.upperTerm != null) return false;
|
||||
|
@ -255,6 +326,7 @@ public class RangeFilter extends Filter {
|
|||
h ^= (upperTerm != null ? (upperTerm.hashCode()) : 0x91BEC2C2);
|
||||
h ^= (includeLower ? 0xD484B933 : 0)
|
||||
^ (includeUpper ? 0x6AE423AC : 0);
|
||||
h ^= collator != null ? collator.hashCode() : 0;
|
||||
return h;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.Collator;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermEnum;
|
||||
|
@ -46,12 +47,18 @@ public class RangeQuery extends Query
|
|||
private Term lowerTerm;
|
||||
private Term upperTerm;
|
||||
private boolean inclusive;
|
||||
private Collator collator;
|
||||
|
||||
/** Constructs a query selecting all terms greater than
|
||||
* <code>lowerTerm</code> but less than <code>upperTerm</code>.
|
||||
* There must be at least one term and either term may be null,
|
||||
* in which case there is no bound on that side, but if there are
|
||||
* two terms, both terms <b>must</b> be for the same field.
|
||||
*
|
||||
* @param lowerTerm The Term at the lower end of the range
|
||||
* @param upperTerm The Term at the upper end of the range
|
||||
* @param inclusive If true, both <code>lowerTerm</code> and
|
||||
* <code>upperTerm</code> will themselves be included in the range.
|
||||
*/
|
||||
public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive)
|
||||
{
|
||||
|
@ -76,48 +83,109 @@ public class RangeQuery extends Query
|
|||
this.inclusive = inclusive;
|
||||
}
|
||||
|
||||
/** Constructs a query selecting all terms greater than
|
||||
* <code>lowerTerm</code> but less than <code>upperTerm</code>.
|
||||
* There must be at least one term and either term may be null,
|
||||
* in which case there is no bound on that side, but if there are
|
||||
* two terms, both terms <b>must</b> be for the same field.
|
||||
* <p>
|
||||
* If <code>collator</code> is not null, it will be used to decide whether
|
||||
* index terms are within the given range, rather than using the Unicode code
|
||||
* point order in which index terms are stored.
|
||||
* <p>
|
||||
* <strong>WARNING:</strong> Using this constructor and supplying a non-null
|
||||
* value in the <code>collator</code> parameter will cause every single
|
||||
* index Term in the Field referenced by lowerTerm and/or upperTerm to be
|
||||
* examined. Depending on the number of index Terms in this Field, the
|
||||
* operation could be very slow.
|
||||
*
|
||||
* @param lowerTerm The Term at the lower end of the range
|
||||
* @param upperTerm The Term at the upper end of the range
|
||||
* @param inclusive If true, both <code>lowerTerm</code> and
|
||||
* <code>upperTerm</code> will themselves be included in the range.
|
||||
* @param collator The collator to use to collate index Terms, to determine
|
||||
* their membership in the range bounded by <code>lowerTerm</code> and
|
||||
* <code>upperTerm</code>.
|
||||
*/
|
||||
public RangeQuery(Term lowerTerm, Term upperTerm, boolean inclusive,
|
||||
Collator collator)
|
||||
{
|
||||
this(lowerTerm, upperTerm, inclusive);
|
||||
this.collator = collator;
|
||||
}
|
||||
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
|
||||
BooleanQuery query = new BooleanQuery(true);
|
||||
TermEnum enumerator = reader.terms(lowerTerm);
|
||||
String testField = getField();
|
||||
if (collator != null) {
|
||||
TermEnum enumerator = reader.terms(new Term(testField, ""));
|
||||
String lowerTermText = lowerTerm != null ? lowerTerm.text() : null;
|
||||
String upperTermText = upperTerm != null ? upperTerm.text() : null;
|
||||
|
||||
try {
|
||||
|
||||
boolean checkLower = false;
|
||||
if (!inclusive) // make adjustments to set to exclusive
|
||||
checkLower = true;
|
||||
|
||||
String testField = getField();
|
||||
|
||||
do {
|
||||
Term term = enumerator.term();
|
||||
if (term != null && term.field() == testField) { // interned comparison
|
||||
if (!checkLower || term.text().compareTo(lowerTerm.text()) > 0) {
|
||||
checkLower = false;
|
||||
if (upperTerm != null) {
|
||||
int compare = upperTerm.text().compareTo(term.text());
|
||||
/* if beyond the upper term, or is exclusive and
|
||||
* this is equal to the upper term, break out */
|
||||
if ((compare < 0) || (!inclusive && compare == 0))
|
||||
break;
|
||||
try {
|
||||
do {
|
||||
Term term = enumerator.term();
|
||||
if (term != null && term.field() == testField) { // interned comparison
|
||||
if ((lowerTermText == null
|
||||
|| (inclusive ? collator.compare(term.text(), lowerTermText) >= 0
|
||||
: collator.compare(term.text(), lowerTermText) > 0))
|
||||
&& (upperTermText == null
|
||||
|| (inclusive ? collator.compare(term.text(), upperTermText) <= 0
|
||||
: collator.compare(term.text(), upperTermText) < 0))) {
|
||||
addTermToQuery(term, query);
|
||||
}
|
||||
TermQuery tq = new TermQuery(term); // found a match
|
||||
tq.setBoost(getBoost()); // set the boost
|
||||
query.add(tq, BooleanClause.Occur.SHOULD); // add to query
|
||||
}
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
while (enumerator.next());
|
||||
}
|
||||
finally {
|
||||
enumerator.close();
|
||||
}
|
||||
while (enumerator.next());
|
||||
}
|
||||
finally {
|
||||
enumerator.close();
|
||||
else { // collator is null
|
||||
TermEnum enumerator = reader.terms(lowerTerm);
|
||||
|
||||
try {
|
||||
|
||||
boolean checkLower = false;
|
||||
if (!inclusive) // make adjustments to set to exclusive
|
||||
checkLower = true;
|
||||
|
||||
do {
|
||||
Term term = enumerator.term();
|
||||
if (term != null && term.field() == testField) { // interned comparison
|
||||
if (!checkLower || term.text().compareTo(lowerTerm.text()) > 0) {
|
||||
checkLower = false;
|
||||
if (upperTerm != null) {
|
||||
int compare = upperTerm.text().compareTo(term.text());
|
||||
/* if beyond the upper term, or is exclusive and
|
||||
* this is equal to the upper term, break out */
|
||||
if ((compare < 0) || (!inclusive && compare == 0))
|
||||
break;
|
||||
}
|
||||
addTermToQuery(term, query); // Found a match
|
||||
}
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
while (enumerator.next());
|
||||
}
|
||||
finally {
|
||||
enumerator.close();
|
||||
}
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
private void addTermToQuery(Term term, BooleanQuery query) {
|
||||
TermQuery tq = new TermQuery(term);
|
||||
tq.setBoost(getBoost()); // set the boost
|
||||
query.add(tq, BooleanClause.Occur.SHOULD); // add to query
|
||||
}
|
||||
|
||||
/** Returns the field name for this query */
|
||||
public String getField() {
|
||||
return (lowerTerm != null ? lowerTerm.field() : upperTerm.field());
|
||||
|
@ -132,6 +200,9 @@ public class RangeQuery extends Query
|
|||
/** Returns <code>true</code> if the range query is inclusive */
|
||||
public boolean isInclusive() { return inclusive; }
|
||||
|
||||
/** Returns the collator used to determine range inclusion, if any. */
|
||||
public Collator getCollator() { return collator; }
|
||||
|
||||
|
||||
/** Prints a user-readable version of this query. */
|
||||
public String toString(String field)
|
||||
|
@ -159,6 +230,9 @@ public class RangeQuery extends Query
|
|||
final RangeQuery other = (RangeQuery) o;
|
||||
if (this.getBoost() != other.getBoost()) return false;
|
||||
if (this.inclusive != other.inclusive) return false;
|
||||
if (this.collator != null && ! this.collator.equals(other.collator))
|
||||
return false;
|
||||
|
||||
// one of lowerTerm and upperTerm can be null
|
||||
if (this.lowerTerm != null ? !this.lowerTerm.equals(other.lowerTerm) : other.lowerTerm != null) return false;
|
||||
if (this.upperTerm != null ? !this.upperTerm.equals(other.upperTerm) : other.upperTerm != null) return false;
|
||||
|
@ -174,6 +248,7 @@ public class RangeQuery extends Query
|
|||
h ^= (h << 25) | (h >>> 8);
|
||||
h ^= upperTerm != null ? upperTerm.hashCode() : 0;
|
||||
h ^= this.inclusive ? 0x2742E74A : 0;
|
||||
h ^= collator != null ? collator.hashCode() : 0;
|
||||
return h;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.queryParser;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.text.DateFormat;
|
||||
import java.text.Collator;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
import java.util.Locale;
|
||||
|
@ -429,6 +430,51 @@ public class TestQueryParser extends LuceneTestCase {
|
|||
assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}");
|
||||
assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})");
|
||||
}
|
||||
|
||||
public void testFarsiRangeCollating() throws Exception {
|
||||
|
||||
RAMDirectory ramDir = new RAMDirectory();
|
||||
IndexWriter iw = new IndexWriter(ramDir, new WhitespaceAnalyzer(), true,
|
||||
IndexWriter.MaxFieldLength.LIMITED);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("content","\u0633\u0627\u0628",
|
||||
Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
iw.addDocument(doc);
|
||||
iw.close();
|
||||
IndexSearcher is = new IndexSearcher(ramDir);
|
||||
|
||||
QueryParser qp = new QueryParser("content", new WhitespaceAnalyzer());
|
||||
|
||||
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
|
||||
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
|
||||
// characters properly.
|
||||
Collator c = Collator.getInstance(new Locale("ar"));
|
||||
qp.setRangeCollator(c);
|
||||
|
||||
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
|
||||
// orders the U+0698 character before the U+0633 character, so the single
|
||||
// index Term below should NOT be returned by a ConstantScoreRangeQuery
|
||||
// with a Farsi Collator (or an Arabic one for the case when Farsi is not
|
||||
// supported).
|
||||
|
||||
// Test ConstantScoreRangeQuery
|
||||
qp.setUseOldRangeQuery(false);
|
||||
ScoreDoc[] result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should not be included.", 0, result.length);
|
||||
|
||||
result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should be included.", 1, result.length);
|
||||
|
||||
// Test RangeQuery
|
||||
qp.setUseOldRangeQuery(true);
|
||||
result = is.search(qp.parse("[ \u062F TO \u0698 ]"), null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should not be included.", 0, result.length);
|
||||
|
||||
result = is.search(qp.parse("[ \u0633 TO \u0638 ]"), null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should be included.", 1, result.length);
|
||||
|
||||
is.close();
|
||||
}
|
||||
|
||||
/** for testing legacy DateField support */
|
||||
private String getLegacyDate(String s) throws Exception {
|
||||
|
|
|
@ -32,12 +32,30 @@ public class BaseTestRangeFilter extends LuceneTestCase {
|
|||
public static final boolean F = false;
|
||||
public static final boolean T = true;
|
||||
|
||||
RAMDirectory index = new RAMDirectory();
|
||||
Random rand = new Random(101); // use a set seed to test is deterministic
|
||||
|
||||
int maxR = Integer.MIN_VALUE;
|
||||
int minR = Integer.MAX_VALUE;
|
||||
|
||||
/**
|
||||
* Collation interacts badly with hyphens -- collation produces different
|
||||
* ordering than Unicode code-point ordering -- so two indexes are created:
|
||||
* one which can't have negative random integers, for testing collated
|
||||
* ranges, and the other which can have negative random integers, for all
|
||||
* other tests.
|
||||
*/
|
||||
class TestIndex {
|
||||
int maxR;
|
||||
int minR;
|
||||
boolean allowNegativeRandomInts;
|
||||
RAMDirectory index = new RAMDirectory();
|
||||
|
||||
TestIndex(int minR, int maxR, boolean allowNegativeRandomInts) {
|
||||
this.minR = minR;
|
||||
this.maxR = maxR;
|
||||
this.allowNegativeRandomInts = allowNegativeRandomInts;
|
||||
}
|
||||
}
|
||||
TestIndex signedIndex = new TestIndex(Integer.MAX_VALUE, Integer.MIN_VALUE, true);
|
||||
TestIndex unsignedIndex = new TestIndex(Integer.MAX_VALUE, 0, false);
|
||||
|
||||
int minId = 0;
|
||||
int maxId = 10000;
|
||||
|
||||
|
@ -65,28 +83,31 @@ public class BaseTestRangeFilter extends LuceneTestCase {
|
|||
|
||||
public BaseTestRangeFilter(String name) {
|
||||
super(name);
|
||||
build();
|
||||
build(signedIndex);
|
||||
build(unsignedIndex);
|
||||
}
|
||||
public BaseTestRangeFilter() {
|
||||
build();
|
||||
build(signedIndex);
|
||||
build(unsignedIndex);
|
||||
}
|
||||
|
||||
private void build() {
|
||||
private void build(TestIndex index) {
|
||||
try {
|
||||
|
||||
/* build an index */
|
||||
IndexWriter writer = new IndexWriter(index, new SimpleAnalyzer(), T,
|
||||
IndexWriter writer = new IndexWriter(index.index, new SimpleAnalyzer(), T,
|
||||
IndexWriter.MaxFieldLength.LIMITED);
|
||||
|
||||
for (int d = minId; d <= maxId; d++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("id",pad(d), Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||
int r= rand.nextInt();
|
||||
if (maxR < r) {
|
||||
maxR = r;
|
||||
int r= index.allowNegativeRandomInts
|
||||
? rand.nextInt() : rand.nextInt(Integer.MAX_VALUE);
|
||||
if (index.maxR < r) {
|
||||
index.maxR = r;
|
||||
}
|
||||
if (r < minR) {
|
||||
minR = r;
|
||||
if (r < index.minR) {
|
||||
index.minR = r;
|
||||
}
|
||||
doc.add(new Field("rand",pad(r), Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||
doc.add(new Field("body","body", Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -27,6 +28,8 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.Collator;
|
||||
import java.util.Locale;
|
||||
|
||||
import junit.framework.Assert;
|
||||
|
||||
|
@ -92,12 +95,25 @@ public class TestConstantScoreRangeQuery extends BaseTestRangeFilter {
|
|||
return new ConstantScoreRangeQuery(f,l,h,il,ih);
|
||||
}
|
||||
|
||||
/** macro for readability */
|
||||
public static Query csrq(String f, String l, String h,
|
||||
boolean il, boolean ih, Collator c) {
|
||||
return new ConstantScoreRangeQuery(f,l,h,il,ih,c);
|
||||
}
|
||||
|
||||
public void testBasics() throws IOException {
|
||||
QueryUtils.check(csrq("data","1","6",T,T));
|
||||
QueryUtils.check(csrq("data","A","Z",T,T));
|
||||
QueryUtils.checkUnequal(csrq("data","1","6",T,T), csrq("data","A","Z",T,T));
|
||||
}
|
||||
|
||||
public void testBasicsCollating() throws IOException {
|
||||
Collator c = Collator.getInstance(Locale.ENGLISH);
|
||||
QueryUtils.check(csrq("data","1","6",T,T,c));
|
||||
QueryUtils.check(csrq("data","A","Z",T,T,c));
|
||||
QueryUtils.checkUnequal(csrq("data","1","6",T,T,c), csrq("data","A","Z",T,T,c));
|
||||
}
|
||||
|
||||
public void testEqualScores() throws IOException {
|
||||
// NOTE: uses index build in *this* setUp
|
||||
|
||||
|
@ -205,7 +221,7 @@ public class TestConstantScoreRangeQuery extends BaseTestRangeFilter {
|
|||
public void testRangeQueryId() throws IOException {
|
||||
// NOTE: uses index build in *super* setUp
|
||||
|
||||
IndexReader reader = IndexReader.open(index);
|
||||
IndexReader reader = IndexReader.open(signedIndex.index);
|
||||
IndexSearcher search = new IndexSearcher(reader);
|
||||
|
||||
int medId = ((maxId - minId) / 2);
|
||||
|
@ -284,21 +300,105 @@ public class TestConstantScoreRangeQuery extends BaseTestRangeFilter {
|
|||
|
||||
}
|
||||
|
||||
|
||||
public void testRangeQueryIdCollating() throws IOException {
|
||||
// NOTE: uses index build in *super* setUp
|
||||
|
||||
IndexReader reader = IndexReader.open(signedIndex.index);
|
||||
IndexSearcher search = new IndexSearcher(reader);
|
||||
|
||||
int medId = ((maxId - minId) / 2);
|
||||
|
||||
String minIP = pad(minId);
|
||||
String maxIP = pad(maxId);
|
||||
String medIP = pad(medId);
|
||||
|
||||
int numDocs = reader.numDocs();
|
||||
|
||||
assertEquals("num of docs", numDocs, 1+ maxId - minId);
|
||||
|
||||
ScoreDoc[] result;
|
||||
|
||||
Collator c = Collator.getInstance(Locale.ENGLISH);
|
||||
|
||||
// test id, bounded on both ends
|
||||
|
||||
result = search.search(csrq("id",minIP,maxIP,T,T,c), null, numDocs).scoreDocs;
|
||||
assertEquals("find all", numDocs, result.length);
|
||||
|
||||
result = search.search(csrq("id",minIP,maxIP,T,F,c), null, numDocs).scoreDocs;
|
||||
assertEquals("all but last", numDocs-1, result.length);
|
||||
|
||||
result = search.search(csrq("id",minIP,maxIP,F,T,c), null, numDocs).scoreDocs;
|
||||
assertEquals("all but first", numDocs-1, result.length);
|
||||
|
||||
result = search.search(csrq("id",minIP,maxIP,F,F,c), null, numDocs).scoreDocs;
|
||||
assertEquals("all but ends", numDocs-2, result.length);
|
||||
|
||||
result = search.search(csrq("id",medIP,maxIP,T,T,c), null, numDocs).scoreDocs;
|
||||
assertEquals("med and up", 1+ maxId-medId, result.length);
|
||||
|
||||
result = search.search(csrq("id",minIP,medIP,T,T,c), null, numDocs).scoreDocs;
|
||||
assertEquals("up to med", 1+ medId-minId, result.length);
|
||||
|
||||
// unbounded id
|
||||
|
||||
result = search.search(csrq("id",minIP,null,T,F,c), null, numDocs).scoreDocs;
|
||||
assertEquals("min and up", numDocs, result.length);
|
||||
|
||||
result = search.search(csrq("id",null,maxIP,F,T,c), null, numDocs).scoreDocs;
|
||||
assertEquals("max and down", numDocs, result.length);
|
||||
|
||||
result = search.search(csrq("id",minIP,null,F,F,c), null, numDocs).scoreDocs;
|
||||
assertEquals("not min, but up", numDocs-1, result.length);
|
||||
|
||||
result = search.search(csrq("id",null,maxIP,F,F,c), null, numDocs).scoreDocs;
|
||||
assertEquals("not max, but down", numDocs-1, result.length);
|
||||
|
||||
result = search.search(csrq("id",medIP,maxIP,T,F,c), null, numDocs).scoreDocs;
|
||||
assertEquals("med and up, not max", maxId-medId, result.length);
|
||||
|
||||
result = search.search(csrq("id",minIP,medIP,F,T,c), null, numDocs).scoreDocs;
|
||||
assertEquals("not min, up to med", medId-minId, result.length);
|
||||
|
||||
// very small sets
|
||||
|
||||
result = search.search(csrq("id",minIP,minIP,F,F,c), null, numDocs).scoreDocs;
|
||||
assertEquals("min,min,F,F,c", 0, result.length);
|
||||
result = search.search(csrq("id",medIP,medIP,F,F,c), null, numDocs).scoreDocs;
|
||||
assertEquals("med,med,F,F,c", 0, result.length);
|
||||
result = search.search(csrq("id",maxIP,maxIP,F,F,c), null, numDocs).scoreDocs;
|
||||
assertEquals("max,max,F,F,c", 0, result.length);
|
||||
|
||||
result = search.search(csrq("id",minIP,minIP,T,T,c), null, numDocs).scoreDocs;
|
||||
assertEquals("min,min,T,T,c", 1, result.length);
|
||||
result = search.search(csrq("id",null,minIP,F,T,c), null, numDocs).scoreDocs;
|
||||
assertEquals("nul,min,F,T,c", 1, result.length);
|
||||
|
||||
result = search.search(csrq("id",maxIP,maxIP,T,T,c), null, numDocs).scoreDocs;
|
||||
assertEquals("max,max,T,T,c", 1, result.length);
|
||||
result = search.search(csrq("id",maxIP,null,T,F,c), null, numDocs).scoreDocs;
|
||||
assertEquals("max,nul,T,T,c", 1, result.length);
|
||||
|
||||
result = search.search(csrq("id",medIP,medIP,T,T,c), null, numDocs).scoreDocs;
|
||||
assertEquals("med,med,T,T,c", 1, result.length);
|
||||
}
|
||||
|
||||
|
||||
public void testRangeQueryRand() throws IOException {
|
||||
// NOTE: uses index build in *super* setUp
|
||||
|
||||
IndexReader reader = IndexReader.open(index);
|
||||
IndexReader reader = IndexReader.open(signedIndex.index);
|
||||
IndexSearcher search = new IndexSearcher(reader);
|
||||
|
||||
String minRP = pad(minR);
|
||||
String maxRP = pad(maxR);
|
||||
String minRP = pad(signedIndex.minR);
|
||||
String maxRP = pad(signedIndex.maxR);
|
||||
|
||||
int numDocs = reader.numDocs();
|
||||
|
||||
assertEquals("num of docs", numDocs, 1+ maxId - minId);
|
||||
|
||||
ScoreDoc[] result;
|
||||
Query q = new TermQuery(new Term("body","body"));
|
||||
|
||||
// test extremes, bounded on both ends
|
||||
|
||||
|
@ -347,4 +447,104 @@ public class TestConstantScoreRangeQuery extends BaseTestRangeFilter {
|
|||
|
||||
}
|
||||
|
||||
public void testRangeQueryRandCollating() throws IOException {
|
||||
// NOTE: uses index build in *super* setUp
|
||||
|
||||
// using the unsigned index because collation seems to ignore hyphens
|
||||
IndexReader reader = IndexReader.open(unsignedIndex.index);
|
||||
IndexSearcher search = new IndexSearcher(reader);
|
||||
|
||||
String minRP = pad(unsignedIndex.minR);
|
||||
String maxRP = pad(unsignedIndex.maxR);
|
||||
|
||||
int numDocs = reader.numDocs();
|
||||
|
||||
assertEquals("num of docs", numDocs, 1+ maxId - minId);
|
||||
|
||||
ScoreDoc[] result;
|
||||
|
||||
Collator c = Collator.getInstance(Locale.ENGLISH);
|
||||
|
||||
// test extremes, bounded on both ends
|
||||
|
||||
result = search.search(csrq("rand",minRP,maxRP,T,T,c), null, numDocs).scoreDocs;
|
||||
assertEquals("find all", numDocs, result.length);
|
||||
|
||||
result = search.search(csrq("rand",minRP,maxRP,T,F,c), null, numDocs).scoreDocs;
|
||||
assertEquals("all but biggest", numDocs-1, result.length);
|
||||
|
||||
result = search.search(csrq("rand",minRP,maxRP,F,T,c), null, numDocs).scoreDocs;
|
||||
assertEquals("all but smallest", numDocs-1, result.length);
|
||||
|
||||
result = search.search(csrq("rand",minRP,maxRP,F,F,c), null, numDocs).scoreDocs;
|
||||
assertEquals("all but extremes", numDocs-2, result.length);
|
||||
|
||||
// unbounded
|
||||
|
||||
result = search.search(csrq("rand",minRP,null,T,F,c), null, numDocs).scoreDocs;
|
||||
assertEquals("smallest and up", numDocs, result.length);
|
||||
|
||||
result = search.search(csrq("rand",null,maxRP,F,T,c), null, numDocs).scoreDocs;
|
||||
assertEquals("biggest and down", numDocs, result.length);
|
||||
|
||||
result = search.search(csrq("rand",minRP,null,F,F,c), null, numDocs).scoreDocs;
|
||||
assertEquals("not smallest, but up", numDocs-1, result.length);
|
||||
|
||||
result = search.search(csrq("rand",null,maxRP,F,F,c), null, numDocs).scoreDocs;
|
||||
assertEquals("not biggest, but down", numDocs-1, result.length);
|
||||
|
||||
// very small sets
|
||||
|
||||
result = search.search(csrq("rand",minRP,minRP,F,F,c), null, numDocs).scoreDocs;
|
||||
assertEquals("min,min,F,F,c", 0, result.length);
|
||||
result = search.search(csrq("rand",maxRP,maxRP,F,F,c), null, numDocs).scoreDocs;
|
||||
assertEquals("max,max,F,F,c", 0, result.length);
|
||||
|
||||
result = search.search(csrq("rand",minRP,minRP,T,T,c), null, numDocs).scoreDocs;
|
||||
assertEquals("min,min,T,T,c", 1, result.length);
|
||||
result = search.search(csrq("rand",null,minRP,F,T,c), null, numDocs).scoreDocs;
|
||||
assertEquals("nul,min,F,T,c", 1, result.length);
|
||||
|
||||
result = search.search(csrq("rand",maxRP,maxRP,T,T,c), null, numDocs).scoreDocs;
|
||||
assertEquals("max,max,T,T,c", 1, result.length);
|
||||
result = search.search(csrq("rand",maxRP,null,T,F,c), null, numDocs).scoreDocs;
|
||||
assertEquals("max,nul,T,T,c", 1, result.length);
|
||||
}
|
||||
|
||||
public void testFarsi() throws Exception {
|
||||
|
||||
/* build an index */
|
||||
RAMDirectory farsiIndex = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T,
|
||||
IndexWriter.MaxFieldLength.LIMITED);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("content","\u0633\u0627\u0628",
|
||||
Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||
doc.add(new Field("body", "body",
|
||||
Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||
writer.addDocument(doc);
|
||||
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(farsiIndex);
|
||||
IndexSearcher search = new IndexSearcher(reader);
|
||||
|
||||
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
|
||||
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
|
||||
// characters properly.
|
||||
Collator c = Collator.getInstance(new Locale("ar"));
|
||||
|
||||
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
|
||||
// orders the U+0698 character before the U+0633 character, so the single
|
||||
// index Term below should NOT be returned by a ConstantScoreRangeQuery
|
||||
// with a Farsi Collator (or an Arabic one for the case when Farsi is
|
||||
// not supported).
|
||||
ScoreDoc[] result = search.search(csrq("content","\u062F", "\u0698", T, T, c), null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should not be included.", 0, result.length);
|
||||
|
||||
result = search.search(csrq("content", "\u0633", "\u0638", T, T, c), null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should be included.", 1, result.length);
|
||||
search.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,9 +18,16 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.text.Collator;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
/**
|
||||
* A basic 'positive' Unit test class for the RangeFilter class.
|
||||
|
@ -42,7 +49,7 @@ public class TestRangeFilter extends BaseTestRangeFilter {
|
|||
|
||||
public void testRangeFilterId() throws IOException {
|
||||
|
||||
IndexReader reader = IndexReader.open(index);
|
||||
IndexReader reader = IndexReader.open(signedIndex.index);
|
||||
IndexSearcher search = new IndexSearcher(reader);
|
||||
|
||||
int medId = ((maxId - minId) / 2);
|
||||
|
@ -122,13 +129,96 @@ public class TestRangeFilter extends BaseTestRangeFilter {
|
|||
|
||||
}
|
||||
|
||||
public void testRangeFilterIdCollating() throws IOException {
|
||||
|
||||
IndexReader reader = IndexReader.open(signedIndex.index);
|
||||
IndexSearcher search = new IndexSearcher(reader);
|
||||
|
||||
Collator c = Collator.getInstance(Locale.ENGLISH);
|
||||
|
||||
int medId = ((maxId - minId) / 2);
|
||||
|
||||
String minIP = pad(minId);
|
||||
String maxIP = pad(maxId);
|
||||
String medIP = pad(medId);
|
||||
|
||||
int numDocs = reader.numDocs();
|
||||
|
||||
assertEquals("num of docs", numDocs, 1+ maxId - minId);
|
||||
|
||||
Hits result;
|
||||
Query q = new TermQuery(new Term("body","body"));
|
||||
|
||||
// test id, bounded on both ends
|
||||
|
||||
result = search.search(q,new RangeFilter("id",minIP,maxIP,T,T,c));
|
||||
assertEquals("find all", numDocs, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("id",minIP,maxIP,T,F,c));
|
||||
assertEquals("all but last", numDocs-1, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("id",minIP,maxIP,F,T,c));
|
||||
assertEquals("all but first", numDocs-1, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("id",minIP,maxIP,F,F,c));
|
||||
assertEquals("all but ends", numDocs-2, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("id",medIP,maxIP,T,T,c));
|
||||
assertEquals("med and up", 1+ maxId-medId, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("id",minIP,medIP,T,T,c));
|
||||
assertEquals("up to med", 1+ medId-minId, result.length());
|
||||
|
||||
// unbounded id
|
||||
|
||||
result = search.search(q,new RangeFilter("id",minIP,null,T,F,c));
|
||||
assertEquals("min and up", numDocs, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("id",null,maxIP,F,T,c));
|
||||
assertEquals("max and down", numDocs, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("id",minIP,null,F,F,c));
|
||||
assertEquals("not min, but up", numDocs-1, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("id",null,maxIP,F,F,c));
|
||||
assertEquals("not max, but down", numDocs-1, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("id",medIP,maxIP,T,F,c));
|
||||
assertEquals("med and up, not max", maxId-medId, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("id",minIP,medIP,F,T,c));
|
||||
assertEquals("not min, up to med", medId-minId, result.length());
|
||||
|
||||
// very small sets
|
||||
|
||||
result = search.search(q,new RangeFilter("id",minIP,minIP,F,F,c));
|
||||
assertEquals("min,min,F,F", 0, result.length());
|
||||
result = search.search(q,new RangeFilter("id",medIP,medIP,F,F,c));
|
||||
assertEquals("med,med,F,F", 0, result.length());
|
||||
result = search.search(q,new RangeFilter("id",maxIP,maxIP,F,F,c));
|
||||
assertEquals("max,max,F,F", 0, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("id",minIP,minIP,T,T,c));
|
||||
assertEquals("min,min,T,T", 1, result.length());
|
||||
result = search.search(q,new RangeFilter("id",null,minIP,F,T,c));
|
||||
assertEquals("nul,min,F,T", 1, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("id",maxIP,maxIP,T,T,c));
|
||||
assertEquals("max,max,T,T", 1, result.length());
|
||||
result = search.search(q,new RangeFilter("id",maxIP,null,T,F,c));
|
||||
assertEquals("max,nul,T,T", 1, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("id",medIP,medIP,T,T,c));
|
||||
assertEquals("med,med,T,T", 1, result.length());
|
||||
}
|
||||
|
||||
public void testRangeFilterRand() throws IOException {
|
||||
|
||||
IndexReader reader = IndexReader.open(index);
|
||||
IndexReader reader = IndexReader.open(signedIndex.index);
|
||||
IndexSearcher search = new IndexSearcher(reader);
|
||||
|
||||
String minRP = pad(minR);
|
||||
String maxRP = pad(maxR);
|
||||
String minRP = pad(signedIndex.minR);
|
||||
String maxRP = pad(signedIndex.maxR);
|
||||
|
||||
int numDocs = reader.numDocs();
|
||||
|
||||
|
@ -184,4 +274,106 @@ public class TestRangeFilter extends BaseTestRangeFilter {
|
|||
|
||||
}
|
||||
|
||||
public void testRangeFilterRandCollating() throws IOException {
|
||||
|
||||
// using the unsigned index because collation seems to ignore hyphens
|
||||
IndexReader reader = IndexReader.open(unsignedIndex.index);
|
||||
IndexSearcher search = new IndexSearcher(reader);
|
||||
|
||||
Collator c = Collator.getInstance(Locale.ENGLISH);
|
||||
|
||||
String minRP = pad(unsignedIndex.minR);
|
||||
String maxRP = pad(unsignedIndex.maxR);
|
||||
|
||||
int numDocs = reader.numDocs();
|
||||
|
||||
assertEquals("num of docs", numDocs, 1+ maxId - minId);
|
||||
|
||||
Hits result;
|
||||
Query q = new TermQuery(new Term("body","body"));
|
||||
|
||||
// test extremes, bounded on both ends
|
||||
|
||||
result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,T,c));
|
||||
assertEquals("find all", numDocs, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("rand",minRP,maxRP,T,F,c));
|
||||
assertEquals("all but biggest", numDocs-1, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,T,c));
|
||||
assertEquals("all but smallest", numDocs-1, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("rand",minRP,maxRP,F,F,c));
|
||||
assertEquals("all but extremes", numDocs-2, result.length());
|
||||
|
||||
// unbounded
|
||||
|
||||
result = search.search(q,new RangeFilter("rand",minRP,null,T,F,c));
|
||||
assertEquals("smallest and up", numDocs, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("rand",null,maxRP,F,T,c));
|
||||
assertEquals("biggest and down", numDocs, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("rand",minRP,null,F,F,c));
|
||||
assertEquals("not smallest, but up", numDocs-1, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("rand",null,maxRP,F,F,c));
|
||||
assertEquals("not biggest, but down", numDocs-1, result.length());
|
||||
|
||||
// very small sets
|
||||
|
||||
result = search.search(q,new RangeFilter("rand",minRP,minRP,F,F,c));
|
||||
assertEquals("min,min,F,F", 0, result.length());
|
||||
result = search.search(q,new RangeFilter("rand",maxRP,maxRP,F,F,c));
|
||||
assertEquals("max,max,F,F", 0, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("rand",minRP,minRP,T,T,c));
|
||||
assertEquals("min,min,T,T", 1, result.length());
|
||||
result = search.search(q,new RangeFilter("rand",null,minRP,F,T,c));
|
||||
assertEquals("nul,min,F,T", 1, result.length());
|
||||
|
||||
result = search.search(q,new RangeFilter("rand",maxRP,maxRP,T,T,c));
|
||||
assertEquals("max,max,T,T", 1, result.length());
|
||||
result = search.search(q,new RangeFilter("rand",maxRP,null,T,F,c));
|
||||
assertEquals("max,nul,T,T", 1, result.length());
|
||||
}
|
||||
|
||||
public void testFarsi() throws Exception {
|
||||
|
||||
/* build an index */
|
||||
RAMDirectory farsiIndex = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(farsiIndex, new SimpleAnalyzer(), T,
|
||||
IndexWriter.MaxFieldLength.LIMITED);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("content","\u0633\u0627\u0628",
|
||||
Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
doc.add(new Field("body", "body",
|
||||
Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
writer.addDocument(doc);
|
||||
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(farsiIndex);
|
||||
IndexSearcher search = new IndexSearcher(reader);
|
||||
Query q = new TermQuery(new Term("body","body"));
|
||||
|
||||
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
|
||||
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
|
||||
// characters properly.
|
||||
Collator collator = Collator.getInstance(new Locale("ar"));
|
||||
|
||||
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
|
||||
// orders the U+0698 character before the U+0633 character, so the single
|
||||
// index Term below should NOT be returned by a RangeFilter with a Farsi
|
||||
// Collator (or an Arabic one for the case when Farsi is not supported).
|
||||
Hits result = search.search
|
||||
(q, new RangeFilter("content", "\u062F", "\u0698", T, T, collator));
|
||||
assertEquals("The index Term should not be included.", 0, result.length());
|
||||
|
||||
result = search.search
|
||||
(q, new RangeFilter("content", "\u0633", "\u0638", T, T, collator));
|
||||
assertEquals("The index Term should be included.", 1, result.length());
|
||||
search.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,6 +26,8 @@ import org.apache.lucene.store.RAMDirectory;
|
|||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
import java.text.Collator;
|
||||
|
||||
|
||||
public class TestRangeQuery extends LuceneTestCase {
|
||||
|
@ -130,6 +132,78 @@ public class TestRangeQuery extends LuceneTestCase {
|
|||
assertFalse("queries with different inclusive are not equal", query.equals(other));
|
||||
}
|
||||
|
||||
public void testExclusiveCollating() throws Exception {
|
||||
Query query = new RangeQuery(new Term("content", "A"),
|
||||
new Term("content", "C"),
|
||||
false, Collator.getInstance(Locale.ENGLISH));
|
||||
initializeIndex(new String[] {"A", "B", "C", "D"});
|
||||
IndexSearcher searcher = new IndexSearcher(dir);
|
||||
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("A,B,C,D, only B in range", 1, hits.length);
|
||||
searcher.close();
|
||||
|
||||
initializeIndex(new String[] {"A", "B", "D"});
|
||||
searcher = new IndexSearcher(dir);
|
||||
hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("A,B,D, only B in range", 1, hits.length);
|
||||
searcher.close();
|
||||
|
||||
addDoc("C");
|
||||
searcher = new IndexSearcher(dir);
|
||||
hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("C added, still only B in range", 1, hits.length);
|
||||
searcher.close();
|
||||
}
|
||||
|
||||
public void testInclusiveCollating() throws Exception {
|
||||
Query query = new RangeQuery(new Term("content", "A"),
|
||||
new Term("content", "C"),
|
||||
true, Collator.getInstance(Locale.ENGLISH));
|
||||
|
||||
initializeIndex(new String[]{"A", "B", "C", "D"});
|
||||
IndexSearcher searcher = new IndexSearcher(dir);
|
||||
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("A,B,C,D - A,B,C in range", 3, hits.length);
|
||||
searcher.close();
|
||||
|
||||
initializeIndex(new String[]{"A", "B", "D"});
|
||||
searcher = new IndexSearcher(dir);
|
||||
hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("A,B,D - A and B in range", 2, hits.length);
|
||||
searcher.close();
|
||||
|
||||
addDoc("C");
|
||||
searcher = new IndexSearcher(dir);
|
||||
hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("C added - A, B, C in range", 3, hits.length);
|
||||
searcher.close();
|
||||
}
|
||||
|
||||
public void testFarsi() throws Exception {
|
||||
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
|
||||
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
|
||||
// characters properly.
|
||||
Collator collator = Collator.getInstance(new Locale("ar"));
|
||||
Query query = new RangeQuery(new Term("content", "\u062F"),
|
||||
new Term("content", "\u0698"),
|
||||
true, collator);
|
||||
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
|
||||
// orders the U+0698 character before the U+0633 character, so the single
|
||||
// index Term below should NOT be returned by a RangeQuery with a Farsi
|
||||
// Collator (or an Arabic one for the case when Farsi is not supported).
|
||||
initializeIndex(new String[]{ "\u0633\u0627\u0628"});
|
||||
IndexSearcher searcher = new IndexSearcher(dir);
|
||||
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should not be included.", 0, hits.length);
|
||||
|
||||
query = new RangeQuery(new Term("content", "\u0633"),
|
||||
new Term("content", "\u0638"),
|
||||
true, collator);
|
||||
hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals("The index Term should be included.", 1, hits.length);
|
||||
searcher.close();
|
||||
}
|
||||
|
||||
private void initializeIndex(String[] values) throws IOException {
|
||||
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
|
@ -154,6 +228,3 @@ public class TestRangeQuery extends LuceneTestCase {
|
|||
docCount++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue