mirror of https://github.com/apache/lucene.git
LUCENE-1938: Precedence query parser using the contrib/queryparser framework
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1025597 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
01390fcefa
commit
405cb198a0
|
@ -249,6 +249,11 @@ New features
|
|||
* LUCENE-2624: Add Analyzers for Armenian, Basque, and Catalan, from snowball.
|
||||
(Robert Muir)
|
||||
|
||||
* LUCENE-1938: PrecedenceQueryParser is now implemented with the flexible QP framework.
|
||||
This means that you can also add this functionality to your own QP pipeline by using
|
||||
BooleanModifiersQueryNodeProcessor, for example instead of GroupQueryNodeProcessor.
|
||||
(Adriano Crestani via Robert Muir)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-2124: Moved the JDK-based collation support from contrib/collation
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
<!--
|
||||
NOTE: see the README.javacc for details on how to fully regenerate the parser
|
||||
-->
|
||||
<target name="javacc" depends="javacc-flexible,javacc-precedence,javacc-surround"/>
|
||||
<target name="javacc" depends="javacc-flexible,javacc-surround"/>
|
||||
|
||||
<target name="javacc-flexible" depends="javacc-check">
|
||||
<delete>
|
||||
|
@ -95,19 +95,6 @@ import org.apache.lucene.queryParser.core.messages.*;"
|
|||
byline="true"/>
|
||||
</target>
|
||||
|
||||
<property name="javacc.precedence.path" location="src/java/org/apache/lucene/queryParser/precedence"/>
|
||||
|
||||
<target name="javacc-precedence" depends="javacc-check" description="generate precedence query parser from jj (requires javacc 4.1)">
|
||||
<delete>
|
||||
<fileset dir="${javacc.precedence.path}" includes="*.java">
|
||||
<containsregexp expression="Generated.*By.*JavaCC"/>
|
||||
</fileset>
|
||||
</delete>
|
||||
<invoke-javacc target="${javacc.precedence.path}/PrecedenceQueryParser.jj"
|
||||
outputDir="${javacc.precedence.path}"
|
||||
/>
|
||||
</target>
|
||||
|
||||
<target name="javacc-surround" depends="javacc-check" description="generate surround query parser from jj (requires javacc 4.1">
|
||||
<invoke-javacc target="src/java/org/apache/lucene/queryParser/surround/parser/QueryParser.jj"
|
||||
outputDir="src/java/org/apache/lucene/queryParser/surround/parser"
|
||||
|
|
|
@ -1,112 +0,0 @@
|
|||
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */
|
||||
/* JavaCCOptions:STATIC=false */
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
/**
|
||||
* This interface describes a character stream that maintains line and
|
||||
* column number positions of the characters. It also has the capability
|
||||
* to backup the stream to some extent. An implementation of this
|
||||
* interface is used in the TokenManager implementation generated by
|
||||
* JavaCCParser.
|
||||
*
|
||||
* All the methods except backup can be implemented in any fashion. backup
|
||||
* needs to be implemented correctly for the correct operation of the lexer.
|
||||
* Rest of the methods are all used to get information like line number,
|
||||
* column number and the String that constitutes a token and are not used
|
||||
* by the lexer. Hence their implementation won't affect the generated lexer's
|
||||
* operation.
|
||||
*/
|
||||
|
||||
public interface CharStream {
|
||||
|
||||
/**
|
||||
* Returns the next character from the selected input. The method
|
||||
* of selecting the input is the responsibility of the class
|
||||
* implementing this interface. Can throw any java.io.IOException.
|
||||
*/
|
||||
char readChar() throws java.io.IOException;
|
||||
|
||||
/**
|
||||
* Returns the column position of the character last read.
|
||||
* @deprecated
|
||||
* @see #getEndColumn
|
||||
*/
|
||||
int getColumn();
|
||||
|
||||
/**
|
||||
* Returns the line number of the character last read.
|
||||
* @deprecated
|
||||
* @see #getEndLine
|
||||
*/
|
||||
int getLine();
|
||||
|
||||
/**
|
||||
* Returns the column number of the last character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
int getEndColumn();
|
||||
|
||||
/**
|
||||
* Returns the line number of the last character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
int getEndLine();
|
||||
|
||||
/**
|
||||
* Returns the column number of the first character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
int getBeginColumn();
|
||||
|
||||
/**
|
||||
* Returns the line number of the first character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
int getBeginLine();
|
||||
|
||||
/**
|
||||
* Backs up the input stream by amount steps. Lexer calls this method if it
|
||||
* had already read some characters, but could not use them to match a
|
||||
* (longer) token. So, they will be used again as the prefix of the next
|
||||
* token and it is the implemetation's responsibility to do this right.
|
||||
*/
|
||||
void backup(int amount);
|
||||
|
||||
/**
|
||||
* Returns the next character that marks the beginning of the next token.
|
||||
* All characters must remain in the buffer between two successive calls
|
||||
* to this method to implement backup correctly.
|
||||
*/
|
||||
char BeginToken() throws java.io.IOException;
|
||||
|
||||
/**
|
||||
* Returns a string made up of characters from the marked token beginning
|
||||
* to the current buffer position. Implementations have the choice of returning
|
||||
* anything that they want to. For example, for efficiency, one might decide
|
||||
* to just return null, which is a valid implementation.
|
||||
*/
|
||||
String GetImage();
|
||||
|
||||
/**
|
||||
* Returns an array of characters that make up the suffix of length 'len' for
|
||||
* the currently matched token. This is used to build up the matched string
|
||||
* for use in actions in the case of MORE. A simple and inefficient
|
||||
* implementation of this is as follows :
|
||||
*
|
||||
* {
|
||||
* String t = GetImage();
|
||||
* return t.substring(t.length() - len, t.length()).toCharArray();
|
||||
* }
|
||||
*/
|
||||
char[] GetSuffix(int len);
|
||||
|
||||
/**
|
||||
* The lexer calls this function to indicate that it is done with the stream
|
||||
* and hence implementations can free any resources held by this class.
|
||||
* Again, the body of this function can be just empty and it will not
|
||||
* affect the lexer's operation.
|
||||
*/
|
||||
void Done();
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=8cc617b193267dc876ef9699367c8186 (do not edit this line) */
|
|
@ -1,123 +0,0 @@
|
|||
// FastCharStream.java
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
|
||||
* this does not do line-number counting, but instead keeps track of the
|
||||
* character position of the token in the input, as required by Lucene's {@link
|
||||
* org.apache.lucene.analysis.Token} API. */
|
||||
public final class FastCharStream implements CharStream {
|
||||
char[] buffer = null;
|
||||
|
||||
int bufferLength = 0; // end of valid chars
|
||||
int bufferPosition = 0; // next char to read
|
||||
|
||||
int tokenStart = 0; // offset in buffer
|
||||
int bufferStart = 0; // position in file of buffer
|
||||
|
||||
Reader input; // source of chars
|
||||
|
||||
/** Constructs from a Reader. */
|
||||
public FastCharStream(Reader r) {
|
||||
input = r;
|
||||
}
|
||||
|
||||
public final char readChar() throws IOException {
|
||||
if (bufferPosition >= bufferLength)
|
||||
refill();
|
||||
return buffer[bufferPosition++];
|
||||
}
|
||||
|
||||
private final void refill() throws IOException {
|
||||
int newPosition = bufferLength - tokenStart;
|
||||
|
||||
if (tokenStart == 0) { // token won't fit in buffer
|
||||
if (buffer == null) { // first time: alloc buffer
|
||||
buffer = new char[2048];
|
||||
} else if (bufferLength == buffer.length) { // grow buffer
|
||||
char[] newBuffer = new char[buffer.length*2];
|
||||
System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
|
||||
buffer = newBuffer;
|
||||
}
|
||||
} else { // shift token to front
|
||||
System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
|
||||
}
|
||||
|
||||
bufferLength = newPosition; // update state
|
||||
bufferPosition = newPosition;
|
||||
bufferStart += tokenStart;
|
||||
tokenStart = 0;
|
||||
|
||||
int charsRead = // fill space in buffer
|
||||
input.read(buffer, newPosition, buffer.length-newPosition);
|
||||
if (charsRead == -1)
|
||||
throw new IOException("read past eof");
|
||||
else
|
||||
bufferLength += charsRead;
|
||||
}
|
||||
|
||||
public final char BeginToken() throws IOException {
|
||||
tokenStart = bufferPosition;
|
||||
return readChar();
|
||||
}
|
||||
|
||||
public final void backup(int amount) {
|
||||
bufferPosition -= amount;
|
||||
}
|
||||
|
||||
public final String GetImage() {
|
||||
return new String(buffer, tokenStart, bufferPosition - tokenStart);
|
||||
}
|
||||
|
||||
public final char[] GetSuffix(int len) {
|
||||
char[] value = new char[len];
|
||||
System.arraycopy(buffer, bufferPosition - len, value, 0, len);
|
||||
return value;
|
||||
}
|
||||
|
||||
public final void Done() {
|
||||
try {
|
||||
input.close();
|
||||
} catch (IOException e) {
|
||||
System.err.println("Caught: " + e + "; ignoring.");
|
||||
}
|
||||
}
|
||||
|
||||
public final int getColumn() {
|
||||
return bufferStart + bufferPosition;
|
||||
}
|
||||
public final int getLine() {
|
||||
return 1;
|
||||
}
|
||||
public final int getEndColumn() {
|
||||
return bufferStart + bufferPosition;
|
||||
}
|
||||
public final int getEndLine() {
|
||||
return 1;
|
||||
}
|
||||
public final int getBeginColumn() {
|
||||
return bufferStart + tokenStart;
|
||||
}
|
||||
public final int getBeginLine() {
|
||||
return 1;
|
||||
}
|
||||
}
|
|
@ -1,198 +0,0 @@
|
|||
/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 4.1 */
|
||||
/* JavaCCOptions:KEEP_LINE_COL=null */
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
/**
|
||||
* This exception is thrown when parse errors are encountered.
|
||||
* You can explicitly create objects of this exception type by
|
||||
* calling the method generateParseException in the generated
|
||||
* parser.
|
||||
*
|
||||
* You can modify this class to customize your error reporting
|
||||
* mechanisms so long as you retain the public fields.
|
||||
*/
|
||||
public class ParseException extends Exception {
|
||||
|
||||
/**
|
||||
* This constructor is used by the method "generateParseException"
|
||||
* in the generated parser. Calling this constructor generates
|
||||
* a new object of this type with the fields "currentToken",
|
||||
* "expectedTokenSequences", and "tokenImage" set. The boolean
|
||||
* flag "specialConstructor" is also set to true to indicate that
|
||||
* this constructor was used to create this object.
|
||||
* This constructor calls its super class with the empty string
|
||||
* to force the "toString" method of parent class "Throwable" to
|
||||
* print the error message in the form:
|
||||
* ParseException: <result of getMessage>
|
||||
*/
|
||||
public ParseException(Token currentTokenVal,
|
||||
int[][] expectedTokenSequencesVal,
|
||||
String[] tokenImageVal
|
||||
)
|
||||
{
|
||||
super("");
|
||||
specialConstructor = true;
|
||||
currentToken = currentTokenVal;
|
||||
expectedTokenSequences = expectedTokenSequencesVal;
|
||||
tokenImage = tokenImageVal;
|
||||
}
|
||||
|
||||
/**
|
||||
* The following constructors are for use by you for whatever
|
||||
* purpose you can think of. Constructing the exception in this
|
||||
* manner makes the exception behave in the normal way - i.e., as
|
||||
* documented in the class "Throwable". The fields "errorToken",
|
||||
* "expectedTokenSequences", and "tokenImage" do not contain
|
||||
* relevant information. The JavaCC generated code does not use
|
||||
* these constructors.
|
||||
*/
|
||||
|
||||
public ParseException() {
|
||||
super();
|
||||
specialConstructor = false;
|
||||
}
|
||||
|
||||
/** Constructor with message. */
|
||||
public ParseException(String message) {
|
||||
super(message);
|
||||
specialConstructor = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* This variable determines which constructor was used to create
|
||||
* this object and thereby affects the semantics of the
|
||||
* "getMessage" method (see below).
|
||||
*/
|
||||
protected boolean specialConstructor;
|
||||
|
||||
/**
|
||||
* This is the last token that has been consumed successfully. If
|
||||
* this object has been created due to a parse error, the token
|
||||
* followng this token will (therefore) be the first error token.
|
||||
*/
|
||||
public Token currentToken;
|
||||
|
||||
/**
|
||||
* Each entry in this array is an array of integers. Each array
|
||||
* of integers represents a sequence of tokens (by their ordinal
|
||||
* values) that is expected at this point of the parse.
|
||||
*/
|
||||
public int[][] expectedTokenSequences;
|
||||
|
||||
/**
|
||||
* This is a reference to the "tokenImage" array of the generated
|
||||
* parser within which the parse error occurred. This array is
|
||||
* defined in the generated ...Constants interface.
|
||||
*/
|
||||
public String[] tokenImage;
|
||||
|
||||
/**
|
||||
* This method has the standard behavior when this object has been
|
||||
* created using the standard constructors. Otherwise, it uses
|
||||
* "currentToken" and "expectedTokenSequences" to generate a parse
|
||||
* error message and returns it. If this object has been created
|
||||
* due to a parse error, and you do not catch it (it gets thrown
|
||||
* from the parser), then this method is called during the printing
|
||||
* of the final stack trace, and hence the correct error message
|
||||
* gets displayed.
|
||||
*/
|
||||
public String getMessage() {
|
||||
if (!specialConstructor) {
|
||||
return super.getMessage();
|
||||
}
|
||||
StringBuffer expected = new StringBuffer();
|
||||
int maxSize = 0;
|
||||
for (int i = 0; i < expectedTokenSequences.length; i++) {
|
||||
if (maxSize < expectedTokenSequences[i].length) {
|
||||
maxSize = expectedTokenSequences[i].length;
|
||||
}
|
||||
for (int j = 0; j < expectedTokenSequences[i].length; j++) {
|
||||
expected.append(tokenImage[expectedTokenSequences[i][j]]).append(' ');
|
||||
}
|
||||
if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) {
|
||||
expected.append("...");
|
||||
}
|
||||
expected.append(eol).append(" ");
|
||||
}
|
||||
String retval = "Encountered \"";
|
||||
Token tok = currentToken.next;
|
||||
for (int i = 0; i < maxSize; i++) {
|
||||
if (i != 0) retval += " ";
|
||||
if (tok.kind == 0) {
|
||||
retval += tokenImage[0];
|
||||
break;
|
||||
}
|
||||
retval += " " + tokenImage[tok.kind];
|
||||
retval += " \"";
|
||||
retval += add_escapes(tok.image);
|
||||
retval += " \"";
|
||||
tok = tok.next;
|
||||
}
|
||||
retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn;
|
||||
retval += "." + eol;
|
||||
if (expectedTokenSequences.length == 1) {
|
||||
retval += "Was expecting:" + eol + " ";
|
||||
} else {
|
||||
retval += "Was expecting one of:" + eol + " ";
|
||||
}
|
||||
retval += expected.toString();
|
||||
return retval;
|
||||
}
|
||||
|
||||
/**
|
||||
* The end of line string for this machine.
|
||||
*/
|
||||
protected String eol = System.getProperty("line.separator", "\n");
|
||||
|
||||
/**
|
||||
* Used to convert raw characters to their escaped version
|
||||
* when these raw version cannot be used as part of an ASCII
|
||||
* string literal.
|
||||
*/
|
||||
protected String add_escapes(String str) {
|
||||
StringBuffer retval = new StringBuffer();
|
||||
char ch;
|
||||
for (int i = 0; i < str.length(); i++) {
|
||||
switch (str.charAt(i))
|
||||
{
|
||||
case 0 :
|
||||
continue;
|
||||
case '\b':
|
||||
retval.append("\\b");
|
||||
continue;
|
||||
case '\t':
|
||||
retval.append("\\t");
|
||||
continue;
|
||||
case '\n':
|
||||
retval.append("\\n");
|
||||
continue;
|
||||
case '\f':
|
||||
retval.append("\\f");
|
||||
continue;
|
||||
case '\r':
|
||||
retval.append("\\r");
|
||||
continue;
|
||||
case '\"':
|
||||
retval.append("\\\"");
|
||||
continue;
|
||||
case '\'':
|
||||
retval.append("\\\'");
|
||||
continue;
|
||||
case '\\':
|
||||
retval.append("\\\\");
|
||||
continue;
|
||||
default:
|
||||
if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
|
||||
String s = "0000" + Integer.toString(ch, 16);
|
||||
retval.append("\\u" + s.substring(s.length() - 4, s.length()));
|
||||
} else {
|
||||
retval.append(ch);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return retval.toString();
|
||||
}
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=15fbbe38a36c8ac9e2740d030624c321 (do not edit this line) */
|
File diff suppressed because it is too large
Load Diff
|
@ -1,982 +0,0 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
options {
|
||||
STATIC=false;
|
||||
JAVA_UNICODE_ESCAPE=true;
|
||||
USER_CHAR_STREAM=true;
|
||||
}
|
||||
|
||||
PARSER_BEGIN(PrecedenceQueryParser)
|
||||
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.text.DateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.*;
|
||||
import org.apache.lucene.document.DateTools;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.RegexpQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
||||
/**
|
||||
* Experimental query parser variant designed to handle operator precedence
|
||||
* in a more sensible fashion than QueryParser. There are still some
|
||||
* open issues with this parser. The following tests are currently failing
|
||||
* in TestPrecedenceQueryParser and are disabled to make this test pass:
|
||||
* <ul>
|
||||
* <li> testSimple
|
||||
* <li> testWildcard
|
||||
* <li> testPrecedence
|
||||
* </ul>
|
||||
*
|
||||
* This class is generated by JavaCC. The only method that clients should need
|
||||
* to call is {@link #parse(String)}.
|
||||
*
|
||||
* The syntax for query strings is as follows:
|
||||
* A Query is a series of clauses.
|
||||
* A clause may be prefixed by:
|
||||
* <ul>
|
||||
* <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating
|
||||
* that the clause is required or prohibited respectively; or
|
||||
* <li> a term followed by a colon, indicating the field to be searched.
|
||||
* This enables one to construct queries which search multiple fields.
|
||||
* </ul>
|
||||
*
|
||||
* A clause may be either:
|
||||
* <ul>
|
||||
* <li> a term, indicating all the documents that contain this term; or
|
||||
* <li> a nested query, enclosed in parentheses. Note that this may be used
|
||||
* with a <code>+</code>/<code>-</code> prefix to require any of a set of
|
||||
* terms.
|
||||
* </ul>
|
||||
*
|
||||
* Thus, in BNF, the query grammar is:
|
||||
* <pre>
|
||||
* Query ::= ( Clause )*
|
||||
* Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* Examples of appropriately formatted queries can be found in the <a
|
||||
* href="../../../../../../../queryparsersyntax.html">query syntax
|
||||
* documentation</a>.
|
||||
* </p>
|
||||
*/
|
||||
public class PrecedenceQueryParser {
|
||||
|
||||
private static final int CONJ_NONE = 0;
|
||||
private static final int CONJ_AND = 1;
|
||||
private static final int CONJ_OR = 2;
|
||||
|
||||
private static final int MOD_NONE = 0;
|
||||
private static final int MOD_NOT = 10;
|
||||
private static final int MOD_REQ = 11;
|
||||
|
||||
// make it possible to call setDefaultOperator() without accessing
|
||||
// the nested class:
|
||||
public static final Operator AND_OPERATOR = Operator.AND;
|
||||
public static final Operator OR_OPERATOR = Operator.OR;
|
||||
|
||||
/** The actual operator that parser uses to combine query terms */
|
||||
private Operator operator = OR_OPERATOR;
|
||||
|
||||
boolean lowercaseExpandedTerms = true;
|
||||
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
|
||||
|
||||
Analyzer analyzer;
|
||||
String field;
|
||||
int phraseSlop = 0;
|
||||
float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
|
||||
int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
|
||||
Locale locale = Locale.getDefault();
|
||||
|
||||
static enum Operator { OR, AND }
|
||||
|
||||
/** Constructs a query parser.
|
||||
* @param f the default field for query terms.
|
||||
* @param a used to find terms in the query text.
|
||||
*/
|
||||
public PrecedenceQueryParser(String f, Analyzer a) {
|
||||
this(new FastCharStream(new StringReader("")));
|
||||
analyzer = a;
|
||||
field = f;
|
||||
}
|
||||
|
||||
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
|
||||
* @param expression the query string to be parsed.
|
||||
* @throws ParseException if the parsing fails
|
||||
*/
|
||||
public Query parse(String expression) throws ParseException {
|
||||
// optimize empty query to be empty BooleanQuery
|
||||
if (expression == null || expression.trim().length() == 0) {
|
||||
return new BooleanQuery();
|
||||
}
|
||||
|
||||
ReInit(new FastCharStream(new StringReader(expression)));
|
||||
try {
|
||||
Query query = Query(field);
|
||||
return (query != null) ? query : new BooleanQuery();
|
||||
}
|
||||
catch (TokenMgrError tme) {
|
||||
throw new ParseException(tme.getMessage());
|
||||
}
|
||||
catch (BooleanQuery.TooManyClauses tmc) {
|
||||
throw new ParseException("Too many boolean clauses");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the analyzer.
|
||||
*/
|
||||
public Analyzer getAnalyzer() {
|
||||
return analyzer;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the field.
|
||||
*/
|
||||
public String getField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the minimal similarity for fuzzy queries.
|
||||
*/
|
||||
public float getFuzzyMinSim() {
|
||||
return fuzzyMinSim;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the minimum similarity for fuzzy queries.
|
||||
* Default is 2f.
|
||||
*/
|
||||
public void setFuzzyMinSim(float fuzzyMinSim) {
|
||||
this.fuzzyMinSim = fuzzyMinSim;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the prefix length for fuzzy queries.
|
||||
* @return Returns the fuzzyPrefixLength.
|
||||
*/
|
||||
public int getFuzzyPrefixLength() {
|
||||
return fuzzyPrefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the prefix length for fuzzy queries. Default is 0.
|
||||
* @param fuzzyPrefixLength The fuzzyPrefixLength to set.
|
||||
*/
|
||||
public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
|
||||
this.fuzzyPrefixLength = fuzzyPrefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the default slop for phrases. If zero, then exact phrase matches
|
||||
* are required. Default value is zero.
|
||||
*/
|
||||
public void setPhraseSlop(int phraseSlop) {
|
||||
this.phraseSlop = phraseSlop;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the default slop for phrases.
|
||||
*/
|
||||
public int getPhraseSlop() {
|
||||
return phraseSlop;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the boolean operator of the QueryParser.
|
||||
* In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
|
||||
* are considered optional: for example <code>capital of Hungary</code> is equal to
|
||||
* <code>capital OR of OR Hungary</code>.<br/>
|
||||
* In <code>AND_OPERATOR</code> mode terms are considered to be in conjunction: the
|
||||
* above mentioned query is parsed as <code>capital AND of AND Hungary</code>
|
||||
*/
|
||||
public void setDefaultOperator(Operator op) {
|
||||
this.operator = op;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets implicit operator setting, which will be either AND_OPERATOR
|
||||
* or OR_OPERATOR.
|
||||
*/
|
||||
public Operator getDefaultOperator() {
|
||||
return operator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
|
||||
* lower-cased or not. Default is <code>true</code>.
|
||||
*/
|
||||
public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
|
||||
this.lowercaseExpandedTerms = lowercaseExpandedTerms;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #setLowercaseExpandedTerms(boolean)
|
||||
*/
|
||||
public boolean getLowercaseExpandedTerms() {
|
||||
return lowercaseExpandedTerms;
|
||||
}
|
||||
/**
|
||||
* By default PrecedenceQueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
|
||||
* when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
|
||||
* a) Runs faster b) Does not have the scarcity of terms unduly influence score
|
||||
* c) avoids any "TooManyBooleanClauses" exception.
|
||||
* However, if your application really needs to use the
|
||||
* old-fashioned BooleanQuery expansion rewriting and the above
|
||||
* points are not relevant then use this to change
|
||||
* the rewrite method.
|
||||
*/
|
||||
public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
|
||||
multiTermRewriteMethod = method;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @see #setMultiTermRewriteMethod
|
||||
*/
|
||||
public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() {
|
||||
return multiTermRewriteMethod;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set locale used by date range parsing.
|
||||
*/
|
||||
public void setLocale(Locale locale) {
|
||||
this.locale = locale;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns current locale, allowing access by subclasses.
|
||||
*/
|
||||
public Locale getLocale() {
|
||||
return locale;
|
||||
}
|
||||
|
||||
protected void addClause(List<BooleanClause> clauses, int conj, int modifier, Query q) {
|
||||
boolean required, prohibited;
|
||||
|
||||
// If this term is introduced by AND, make the preceding term required,
|
||||
// unless it's already prohibited
|
||||
if (clauses.size() > 0 && conj == CONJ_AND) {
|
||||
BooleanClause c = clauses.get(clauses.size()-1);
|
||||
if (!c.isProhibited())
|
||||
c.setOccur(BooleanClause.Occur.MUST);
|
||||
}
|
||||
|
||||
if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) {
|
||||
// If this term is introduced by OR, make the preceding term optional,
|
||||
// unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
|
||||
// notice if the input is a OR b, first term is parsed as required; without
|
||||
// this modification a OR b would parsed as +a OR b
|
||||
BooleanClause c = clauses.get(clauses.size()-1);
|
||||
if (!c.isProhibited())
|
||||
c.setOccur(BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
|
||||
// We might have been passed a null query; the term might have been
|
||||
// filtered away by the analyzer.
|
||||
if (q == null)
|
||||
return;
|
||||
|
||||
if (operator == OR_OPERATOR) {
|
||||
// We set REQUIRED if we're introduced by AND or +; PROHIBITED if
|
||||
// introduced by NOT or -; make sure not to set both.
|
||||
prohibited = (modifier == MOD_NOT);
|
||||
required = (modifier == MOD_REQ);
|
||||
if (conj == CONJ_AND && !prohibited) {
|
||||
required = true;
|
||||
}
|
||||
} else {
|
||||
// We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED
|
||||
// if not PROHIBITED and not introduced by OR
|
||||
prohibited = (modifier == MOD_NOT);
|
||||
required = (!prohibited && conj != CONJ_OR);
|
||||
}
|
||||
if (required && !prohibited)
|
||||
clauses.add(new BooleanClause(q, BooleanClause.Occur.MUST));
|
||||
else if (!required && !prohibited)
|
||||
clauses.add(new BooleanClause(q, BooleanClause.Occur.SHOULD));
|
||||
else if (!required && prohibited)
|
||||
clauses.add(new BooleanClause(q, BooleanClause.Occur.MUST_NOT));
|
||||
else
|
||||
throw new RuntimeException("Clause cannot be both required and prohibited");
|
||||
}
|
||||
|
||||
/**
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
|
||||
// Use the analyzer to get all the tokens, and then build a TermQuery,
|
||||
// PhraseQuery, or nothing based on the term count
|
||||
|
||||
TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
|
||||
List<AttributeSource.State> list = new ArrayList<AttributeSource.State>();
|
||||
int positionCount = 0;
|
||||
boolean severalTokensAtSamePosition = false;
|
||||
TermToBytesRefAttribute termAtt = source.addAttribute(TermToBytesRefAttribute.class);
|
||||
PositionIncrementAttribute posincrAtt = source.addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
try {
|
||||
while (source.incrementToken()) {
|
||||
list.add(source.captureState());
|
||||
if (posincrAtt.getPositionIncrement() == 1)
|
||||
positionCount++;
|
||||
else
|
||||
severalTokensAtSamePosition = true;
|
||||
}
|
||||
source.end();
|
||||
source.close();
|
||||
} catch (IOException e) {
|
||||
// ignore, should never happen for StringReaders
|
||||
}
|
||||
|
||||
if (list.size() == 0)
|
||||
return null;
|
||||
else if (list.size() == 1) {
|
||||
source.restoreState(list.get(0));
|
||||
BytesRef term = new BytesRef();
|
||||
termAtt.toBytesRef(term);
|
||||
return new TermQuery(new Term(field, term));
|
||||
} else {
|
||||
if (severalTokensAtSamePosition || !quoted) {
|
||||
if (positionCount == 1 || !quoted) {
|
||||
// no phrase query:
|
||||
BooleanQuery q = new BooleanQuery(positionCount == 1);
|
||||
|
||||
BooleanClause.Occur occur = positionCount > 1 && operator == AND_OPERATOR ?
|
||||
BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
|
||||
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
BytesRef term = new BytesRef();
|
||||
source.restoreState(list.get(i));
|
||||
termAtt.toBytesRef(term);
|
||||
TermQuery currentQuery = new TermQuery(
|
||||
new Term(field, term));
|
||||
q.add(currentQuery, occur);
|
||||
}
|
||||
return q;
|
||||
}
|
||||
else {
|
||||
// phrase query:
|
||||
MultiPhraseQuery mpq = new MultiPhraseQuery();
|
||||
List<Term> multiTerms = new ArrayList<Term>();
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
BytesRef term = new BytesRef();
|
||||
source.restoreState(list.get(i));
|
||||
if (posincrAtt.getPositionIncrement() == 1 && multiTerms.size() > 0) {
|
||||
mpq.add(multiTerms.toArray(new Term[0]));
|
||||
multiTerms.clear();
|
||||
}
|
||||
termAtt.toBytesRef(term);
|
||||
multiTerms.add(new Term(field, term));
|
||||
}
|
||||
mpq.add(multiTerms.toArray(new Term[0]));
|
||||
return mpq;
|
||||
}
|
||||
}
|
||||
else {
|
||||
PhraseQuery q = new PhraseQuery();
|
||||
q.setSlop(phraseSlop);
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
BytesRef term = new BytesRef();
|
||||
source.restoreState(list.get(i));
|
||||
termAtt.toBytesRef(term);
|
||||
q.add(new Term(field, term));
|
||||
}
|
||||
return q;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}.
|
||||
* This method may be overridden, for example, to return
|
||||
* a SpanNearQuery instead of a PhraseQuery.
|
||||
*
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getFieldQuery(String field, String queryText, int slop)
|
||||
throws ParseException {
|
||||
Query query = getFieldQuery(field, queryText, true);
|
||||
|
||||
if (query instanceof PhraseQuery) {
|
||||
((PhraseQuery) query).setSlop(slop);
|
||||
}
|
||||
if (query instanceof MultiPhraseQuery) {
|
||||
((MultiPhraseQuery) query).setSlop(slop);
|
||||
}
|
||||
|
||||
return query;
|
||||
}
|
||||
|
||||
/**
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getRangeQuery(String field,
|
||||
String part1,
|
||||
String part2,
|
||||
boolean inclusive) throws ParseException
|
||||
{
|
||||
if (lowercaseExpandedTerms) {
|
||||
part1 = part1.toLowerCase();
|
||||
part2 = part2.toLowerCase();
|
||||
}
|
||||
try {
|
||||
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
|
||||
df.setLenient(true);
|
||||
Date d1 = df.parse(part1);
|
||||
Date d2 = df.parse(part2);
|
||||
part1 = DateTools.dateToString(d1, DateTools.Resolution.DAY);
|
||||
part2 = DateTools.dateToString(d2, DateTools.Resolution.DAY);
|
||||
}
|
||||
catch (Exception e) { }
|
||||
|
||||
final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive);
|
||||
query.setRewriteMethod(multiTermRewriteMethod);
|
||||
return query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method for generating query, given a set of clauses.
|
||||
* By default creates a boolean query composed of clauses passed in.
|
||||
*
|
||||
* Can be overridden by extending classes, to modify query being
|
||||
* returned.
|
||||
*
|
||||
* @param clauses List that contains {@link BooleanClause} instances
|
||||
* to join.
|
||||
*
|
||||
* @return Resulting {@link Query} object.
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getBooleanQuery(List<BooleanClause> clauses) throws ParseException
|
||||
{
|
||||
return getBooleanQuery(clauses, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method for generating query, given a set of clauses.
|
||||
* By default creates a boolean query composed of clauses passed in.
|
||||
*
|
||||
* Can be overridden by extending classes, to modify query being
|
||||
* returned.
|
||||
*
|
||||
* @param clauses List that contains {@link BooleanClause} instances
|
||||
* to join.
|
||||
* @param disableCoord true if coord scoring should be disabled.
|
||||
*
|
||||
* @return Resulting {@link Query} object.
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getBooleanQuery(List<BooleanClause> clauses, boolean disableCoord)
|
||||
throws ParseException {
|
||||
if (clauses == null || clauses.size() == 0)
|
||||
return null;
|
||||
|
||||
BooleanQuery query = new BooleanQuery(disableCoord);
|
||||
for (int i = 0; i < clauses.size(); i++) {
|
||||
query.add(clauses.get(i));
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method for generating a query. Called when parser
|
||||
* parses an input term token that contains one or more wildcard
|
||||
* characters (? and *), but is not a prefix term token (one
|
||||
* that has just a single * character at the end)
|
||||
*<p>
|
||||
* Depending on settings, prefix term may be lower-cased
|
||||
* automatically. It will not go through the default Analyzer,
|
||||
* however, since normal Analyzers are unlikely to work properly
|
||||
* with wildcard templates.
|
||||
*<p>
|
||||
* Can be overridden by extending classes, to provide custom handling for
|
||||
* wildcard queries, which may be necessary due to missing analyzer calls.
|
||||
*
|
||||
* @param field Name of the field query will use.
|
||||
* @param termStr Term token that contains one or more wild card
|
||||
* characters (? or *), but is not simple prefix term
|
||||
*
|
||||
* @return Resulting {@link Query} built for the term
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getWildcardQuery(String field, String termStr) throws ParseException
|
||||
{
|
||||
if (lowercaseExpandedTerms) {
|
||||
termStr = termStr.toLowerCase();
|
||||
}
|
||||
Term t = new Term(field, termStr);
|
||||
final WildcardQuery query = new WildcardQuery(t);
|
||||
query.setRewriteMethod(multiTermRewriteMethod);
|
||||
return query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method for generating a query (similar to
|
||||
* {@link #getWildcardQuery}). Called when parser parses an input term
|
||||
* token that uses prefix notation; that is, contains a single '*' wildcard
|
||||
* character as its last character. Since this is a special case
|
||||
* of generic wildcard term, and such a query can be optimized easily,
|
||||
* this usually results in a different query object.
|
||||
*<p>
|
||||
* Depending on settings, a prefix term may be lower-cased
|
||||
* automatically. It will not go through the default Analyzer,
|
||||
* however, since normal Analyzers are unlikely to work properly
|
||||
* with wildcard templates.
|
||||
*<p>
|
||||
* Can be overridden by extending classes, to provide custom handling for
|
||||
* wild card queries, which may be necessary due to missing analyzer calls.
|
||||
*
|
||||
* @param field Name of the field query will use.
|
||||
* @param termStr Term token to use for building term for the query
|
||||
* (<b>without</b> trailing '*' character!)
|
||||
*
|
||||
* @return Resulting {@link Query} built for the term
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getPrefixQuery(String field, String termStr) throws ParseException
|
||||
{
|
||||
if (lowercaseExpandedTerms) {
|
||||
termStr = termStr.toLowerCase();
|
||||
}
|
||||
Term t = new Term(field, termStr);
|
||||
final PrefixQuery query = new PrefixQuery(t);
|
||||
query.setRewriteMethod(multiTermRewriteMethod);
|
||||
return query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method for generating a query. Called when parser
|
||||
* parses an input term token that contains a regular expression
|
||||
* query.
|
||||
*<p>
|
||||
* Depending on settings, pattern term may be lower-cased
|
||||
* automatically. It will not go through the default Analyzer,
|
||||
* however, since normal Analyzers are unlikely to work properly
|
||||
* with regular expression templates.
|
||||
*<p>
|
||||
* Can be overridden by extending classes, to provide custom handling for
|
||||
* regular expression queries, which may be necessary due to missing analyzer
|
||||
* calls.
|
||||
*
|
||||
* @param field Name of the field query will use.
|
||||
* @param termStr Term token that contains a regular expression
|
||||
*
|
||||
* @return Resulting {@link Query} built for the term
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getRegexpQuery(String field, String termStr) throws ParseException
|
||||
{
|
||||
if (lowercaseExpandedTerms) {
|
||||
termStr = termStr.toLowerCase();
|
||||
}
|
||||
final Term regexp = new Term(field, termStr);
|
||||
final RegexpQuery query = new RegexpQuery(regexp);
|
||||
query.setRewriteMethod(multiTermRewriteMethod);
|
||||
return query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method for generating a query (similar to
|
||||
* {@link #getWildcardQuery}). Called when parser parses
|
||||
* an input term token that has the fuzzy suffix (~) appended.
|
||||
*
|
||||
* @param field Name of the field query will use.
|
||||
* @param termStr Term token to use for building term for the query
|
||||
*
|
||||
* @return Resulting {@link Query} built for the term
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
|
||||
{
|
||||
if (lowercaseExpandedTerms) {
|
||||
termStr = termStr.toLowerCase();
|
||||
}
|
||||
Term t = new Term(field, termStr);
|
||||
return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a String where the escape char has been
|
||||
* removed, or kept only once if there was a double escape.
|
||||
*/
|
||||
private String discardEscapeChar(String input) {
|
||||
char[] caSource = input.toCharArray();
|
||||
char[] caDest = new char[caSource.length];
|
||||
int j = 0;
|
||||
for (int i = 0; i < caSource.length; i++) {
|
||||
if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) {
|
||||
caDest[j++]=caSource[i];
|
||||
}
|
||||
}
|
||||
return new String(caDest, 0, j);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a String where those characters that QueryParser
|
||||
* expects to be escaped are escaped by a preceding <code>\</code>.
|
||||
*/
|
||||
public static String escape(String s) {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
char c = s.charAt(i);
|
||||
// NOTE: keep this in sync with _ESCAPED_CHAR below!
|
||||
if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
|
||||
|| c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
|
||||
|| c == '*' || c == '?') {
|
||||
sb.append('\\');
|
||||
}
|
||||
sb.append(c);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
|
||||
* Usage:<br>
|
||||
* <code>java org.apache.lucene.queryParser.QueryParser <input></code>
|
||||
*/
|
||||
// public static void main(String[] args) throws Exception {
|
||||
// if (args.length == 0) {
|
||||
// System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
|
||||
// System.exit(0);
|
||||
// }
|
||||
// PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
|
||||
// new org.apache.lucene.analysis.SimpleAnalyzer());
|
||||
// Query q = qp.parse(args[0]);
|
||||
// System.out.println(q.toString("field"));
|
||||
// }
|
||||
}
|
||||
|
||||
PARSER_END(PrecedenceQueryParser)
|
||||
|
||||
/* ***************** */
|
||||
/* Token Definitions */
|
||||
/* ***************** */
|
||||
|
||||
<*> TOKEN : {
|
||||
<#_NUM_CHAR: ["0"-"9"] >
|
||||
// NOTE: keep this in sync with escape(String) above!
|
||||
| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^",
|
||||
"[", "]", "\"", "{", "}", "~", "*", "?" ] >
|
||||
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^",
|
||||
"[", "]", "\"", "{", "}", "~", "*", "?" ]
|
||||
| <_ESCAPED_CHAR> ) >
|
||||
| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
|
||||
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") >
|
||||
}
|
||||
|
||||
<DEFAULT, RangeIn, RangeEx> SKIP : {
|
||||
< <_WHITESPACE>>
|
||||
}
|
||||
|
||||
// OG: to support prefix queries:
|
||||
// http://nagoya.apache.org/bugzilla/show_bug.cgi?id=12137
|
||||
// Change from:
|
||||
// | <WILDTERM: <_TERM_START_CHAR>
|
||||
// (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
|
||||
// To:
|
||||
//
|
||||
// | <WILDTERM: (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
|
||||
|
||||
<DEFAULT> TOKEN : {
|
||||
<AND: ("AND" | "&&") >
|
||||
| <OR: ("OR" | "||") >
|
||||
| <NOT: ("NOT" | "!") >
|
||||
| <PLUS: "+" >
|
||||
| <MINUS: "-" >
|
||||
| <LPAREN: "(" >
|
||||
| <RPAREN: ")" >
|
||||
| <COLON: ":" >
|
||||
| <CARAT: "^" > : Boost
|
||||
| <QUOTED: "\"" (~["\""])+ "\"">
|
||||
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
|
||||
| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
|
||||
| <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
|
||||
| <WILDTERM: <_TERM_START_CHAR>
|
||||
(<_TERM_CHAR> | ( [ "*", "?" ] ))* >
|
||||
| <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" >
|
||||
| <RANGEIN_START: "[" > : RangeIn
|
||||
| <RANGEEX_START: "{" > : RangeEx
|
||||
}
|
||||
|
||||
<Boost> TOKEN : {
|
||||
<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
|
||||
}
|
||||
|
||||
<RangeIn> TOKEN : {
|
||||
<RANGEIN_TO: "TO">
|
||||
| <RANGEIN_END: "]"> : DEFAULT
|
||||
| <RANGEIN_QUOTED: "\"" (~["\""])+ "\"">
|
||||
| <RANGEIN_GOOP: (~[ " ", "]" ])+ >
|
||||
}
|
||||
|
||||
<RangeEx> TOKEN : {
|
||||
<RANGEEX_TO: "TO">
|
||||
| <RANGEEX_END: "}"> : DEFAULT
|
||||
| <RANGEEX_QUOTED: "\"" (~["\""])+ "\"">
|
||||
| <RANGEEX_GOOP: (~[ " ", "}" ])+ >
|
||||
}
|
||||
|
||||
// * Query ::= ( Clause )*
|
||||
// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
|
||||
|
||||
int Conjunction() : {
|
||||
int ret = CONJ_NONE;
|
||||
}
|
||||
{
|
||||
[
|
||||
<AND> { ret = CONJ_AND; }
|
||||
| <OR> { ret = CONJ_OR; }
|
||||
]
|
||||
{ return ret; }
|
||||
}
|
||||
|
||||
int Modifier() : {
|
||||
int ret = MOD_NONE;
|
||||
}
|
||||
{
|
||||
[
|
||||
<PLUS> { ret = MOD_REQ; }
|
||||
| <MINUS> { ret = MOD_NOT; }
|
||||
| <NOT> { ret = MOD_NOT; }
|
||||
]
|
||||
{ return ret; }
|
||||
}
|
||||
|
||||
Query Query(String field) :
|
||||
{
|
||||
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
|
||||
Query q, firstQuery=null;
|
||||
boolean orPresent = false;
|
||||
int modifier;
|
||||
}
|
||||
{
|
||||
modifier=Modifier() q=andExpression(field)
|
||||
{
|
||||
addClause(clauses, CONJ_NONE, modifier, q);
|
||||
if (modifier == MOD_NONE)
|
||||
firstQuery = q;
|
||||
}
|
||||
(
|
||||
[<OR> { orPresent=true; }] modifier=Modifier() q=andExpression(field)
|
||||
{ addClause(clauses, orPresent ? CONJ_OR : CONJ_NONE, modifier, q); }
|
||||
)*
|
||||
{
|
||||
if (clauses.size() == 1 && firstQuery != null)
|
||||
return firstQuery;
|
||||
else {
|
||||
return getBooleanQuery(clauses);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Query andExpression(String field) :
|
||||
{
|
||||
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
|
||||
Query q, firstQuery=null;
|
||||
int modifier;
|
||||
}
|
||||
{
|
||||
q=Clause(field)
|
||||
{
|
||||
addClause(clauses, CONJ_NONE, MOD_NONE, q);
|
||||
firstQuery = q;
|
||||
}
|
||||
(
|
||||
<AND> modifier=Modifier() q=Clause(field)
|
||||
{ addClause(clauses, CONJ_AND, modifier, q); }
|
||||
)*
|
||||
{
|
||||
if (clauses.size() == 1 && firstQuery != null)
|
||||
return firstQuery;
|
||||
else {
|
||||
return getBooleanQuery(clauses);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Query Clause(String field) : {
|
||||
Query q;
|
||||
Token fieldToken=null, boost=null;
|
||||
}
|
||||
{
|
||||
[
|
||||
LOOKAHEAD(2)
|
||||
fieldToken=<TERM> <COLON> {
|
||||
field=discardEscapeChar(fieldToken.image);
|
||||
}
|
||||
]
|
||||
|
||||
(
|
||||
q=Term(field)
|
||||
| <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?
|
||||
|
||||
)
|
||||
{
|
||||
if (boost != null) {
|
||||
float f = (float)1.0;
|
||||
try {
|
||||
f = Float.valueOf(boost.image).floatValue();
|
||||
q.setBoost(f);
|
||||
} catch (Exception ignored) { }
|
||||
}
|
||||
return q;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Query Term(String field) : {
|
||||
Token term, boost=null, fuzzySlop=null, goop1, goop2;
|
||||
boolean prefix = false;
|
||||
boolean wildcard = false;
|
||||
boolean fuzzy = false;
|
||||
boolean regexp = false;
|
||||
|
||||
Query q;
|
||||
}
|
||||
{
|
||||
(
|
||||
(
|
||||
term=<TERM>
|
||||
| term=<PREFIXTERM> { prefix=true; }
|
||||
| term=<WILDTERM> { wildcard=true; }
|
||||
| term=<REGEXPTERM> { regexp=true; }
|
||||
| term=<NUMBER>
|
||||
)
|
||||
[ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
|
||||
[ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
|
||||
{
|
||||
String termImage=discardEscapeChar(term.image);
|
||||
if (wildcard) {
|
||||
q = getWildcardQuery(field, termImage);
|
||||
} else if (prefix) {
|
||||
q = getPrefixQuery(field,
|
||||
discardEscapeChar(term.image.substring
|
||||
(0, term.image.length()-1)));
|
||||
} else if (regexp) {
|
||||
q = getRegexpQuery(field, term.image.substring(1, term.image.length()-1));
|
||||
} else if (fuzzy) {
|
||||
float fms = fuzzyMinSim;
|
||||
try {
|
||||
fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
|
||||
} catch (Exception ignored) { }
|
||||
if(fms < 0.0f){
|
||||
throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
|
||||
} else if (fms >= 1.0f && fms != (int) fms) {
|
||||
throw new ParseException("Fractional edit distances are not allowed!");
|
||||
}
|
||||
q = getFuzzyQuery(field, termImage, fms);
|
||||
} else {
|
||||
q = getFieldQuery(field, termImage, false);
|
||||
}
|
||||
}
|
||||
| ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
|
||||
[ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )
|
||||
<RANGEIN_END> )
|
||||
[ <CARAT> boost=<NUMBER> ]
|
||||
{
|
||||
if (goop1.kind == RANGEIN_QUOTED) {
|
||||
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
|
||||
} else {
|
||||
goop1.image = discardEscapeChar(goop1.image);
|
||||
}
|
||||
if (goop2.kind == RANGEIN_QUOTED) {
|
||||
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
|
||||
} else {
|
||||
goop2.image = discardEscapeChar(goop2.image);
|
||||
}
|
||||
q = getRangeQuery(field, goop1.image, goop2.image, true);
|
||||
}
|
||||
| ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
|
||||
[ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )
|
||||
<RANGEEX_END> )
|
||||
[ <CARAT> boost=<NUMBER> ]
|
||||
{
|
||||
if (goop1.kind == RANGEEX_QUOTED) {
|
||||
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
|
||||
} else {
|
||||
goop1.image = discardEscapeChar(goop1.image);
|
||||
}
|
||||
if (goop2.kind == RANGEEX_QUOTED) {
|
||||
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
|
||||
} else {
|
||||
goop2.image = discardEscapeChar(goop2.image);
|
||||
}
|
||||
|
||||
q = getRangeQuery(field, goop1.image, goop2.image, false);
|
||||
}
|
||||
| term=<QUOTED>
|
||||
[ fuzzySlop=<FUZZY_SLOP> ]
|
||||
[ <CARAT> boost=<NUMBER> ]
|
||||
{
|
||||
int s = phraseSlop;
|
||||
|
||||
if (fuzzySlop != null) {
|
||||
try {
|
||||
s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
|
||||
}
|
||||
catch (Exception ignored) { }
|
||||
}
|
||||
q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s);
|
||||
}
|
||||
)
|
||||
{
|
||||
if (boost != null) {
|
||||
float f = (float) 1.0;
|
||||
try {
|
||||
f = Float.valueOf(boost.image).floatValue();
|
||||
}
|
||||
catch (Exception ignored) {
|
||||
/* Should this be handled somehow? (defaults to "no boost", if
|
||||
* boost number is invalid)
|
||||
*/
|
||||
}
|
||||
|
||||
// avoid boosting null queries, such as those caused by stop words
|
||||
if (q != null) {
|
||||
q.setBoost(f);
|
||||
}
|
||||
}
|
||||
return q;
|
||||
}
|
||||
}
|
|
@ -1,122 +0,0 @@
|
|||
/* Generated By:JavaCC: Do not edit this line. PrecedenceQueryParserConstants.java */
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
|
||||
/**
|
||||
* Token literal values and constants.
|
||||
* Generated by org.javacc.parser.OtherFilesGen#start()
|
||||
*/
|
||||
public interface PrecedenceQueryParserConstants {
|
||||
|
||||
/** End of File. */
|
||||
int EOF = 0;
|
||||
/** RegularExpression Id. */
|
||||
int _NUM_CHAR = 1;
|
||||
/** RegularExpression Id. */
|
||||
int _ESCAPED_CHAR = 2;
|
||||
/** RegularExpression Id. */
|
||||
int _TERM_START_CHAR = 3;
|
||||
/** RegularExpression Id. */
|
||||
int _TERM_CHAR = 4;
|
||||
/** RegularExpression Id. */
|
||||
int _WHITESPACE = 5;
|
||||
/** RegularExpression Id. */
|
||||
int AND = 7;
|
||||
/** RegularExpression Id. */
|
||||
int OR = 8;
|
||||
/** RegularExpression Id. */
|
||||
int NOT = 9;
|
||||
/** RegularExpression Id. */
|
||||
int PLUS = 10;
|
||||
/** RegularExpression Id. */
|
||||
int MINUS = 11;
|
||||
/** RegularExpression Id. */
|
||||
int LPAREN = 12;
|
||||
/** RegularExpression Id. */
|
||||
int RPAREN = 13;
|
||||
/** RegularExpression Id. */
|
||||
int COLON = 14;
|
||||
/** RegularExpression Id. */
|
||||
int CARAT = 15;
|
||||
/** RegularExpression Id. */
|
||||
int QUOTED = 16;
|
||||
/** RegularExpression Id. */
|
||||
int TERM = 17;
|
||||
/** RegularExpression Id. */
|
||||
int FUZZY_SLOP = 18;
|
||||
/** RegularExpression Id. */
|
||||
int PREFIXTERM = 19;
|
||||
/** RegularExpression Id. */
|
||||
int WILDTERM = 20;
|
||||
/** RegularExpression Id. */
|
||||
int REGEXPTERM = 21;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEIN_START = 22;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEEX_START = 23;
|
||||
/** RegularExpression Id. */
|
||||
int NUMBER = 24;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEIN_TO = 25;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEIN_END = 26;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEIN_QUOTED = 27;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEIN_GOOP = 28;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEEX_TO = 29;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEEX_END = 30;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEEX_QUOTED = 31;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEEX_GOOP = 32;
|
||||
|
||||
/** Lexical state. */
|
||||
int Boost = 0;
|
||||
/** Lexical state. */
|
||||
int RangeEx = 1;
|
||||
/** Lexical state. */
|
||||
int RangeIn = 2;
|
||||
/** Lexical state. */
|
||||
int DEFAULT = 3;
|
||||
|
||||
/** Literal token values. */
|
||||
String[] tokenImage = {
|
||||
"<EOF>",
|
||||
"<_NUM_CHAR>",
|
||||
"<_ESCAPED_CHAR>",
|
||||
"<_TERM_START_CHAR>",
|
||||
"<_TERM_CHAR>",
|
||||
"<_WHITESPACE>",
|
||||
"<token of kind 6>",
|
||||
"<AND>",
|
||||
"<OR>",
|
||||
"<NOT>",
|
||||
"\"+\"",
|
||||
"\"-\"",
|
||||
"\"(\"",
|
||||
"\")\"",
|
||||
"\":\"",
|
||||
"\"^\"",
|
||||
"<QUOTED>",
|
||||
"<TERM>",
|
||||
"<FUZZY_SLOP>",
|
||||
"<PREFIXTERM>",
|
||||
"<WILDTERM>",
|
||||
"<REGEXPTERM>",
|
||||
"\"[\"",
|
||||
"\"{\"",
|
||||
"<NUMBER>",
|
||||
"\"TO\"",
|
||||
"\"]\"",
|
||||
"<RANGEIN_QUOTED>",
|
||||
"<RANGEIN_GOOP>",
|
||||
"\"TO\"",
|
||||
"\"}\"",
|
||||
"<RANGEEX_QUOTED>",
|
||||
"<RANGEEX_GOOP>",
|
||||
};
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,124 +0,0 @@
|
|||
/* Generated By:JavaCC: Do not edit this line. Token.java Version 4.1 */
|
||||
/* JavaCCOptions:TOKEN_EXTENDS=,KEEP_LINE_COL=null */
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
/**
|
||||
* Describes the input token stream.
|
||||
*/
|
||||
|
||||
public class Token {
|
||||
|
||||
/**
|
||||
* An integer that describes the kind of this token. This numbering
|
||||
* system is determined by JavaCCParser, and a table of these numbers is
|
||||
* stored in the file ...Constants.java.
|
||||
*/
|
||||
public int kind;
|
||||
|
||||
/** The line number of the first character of this Token. */
|
||||
public int beginLine;
|
||||
/** The column number of the first character of this Token. */
|
||||
public int beginColumn;
|
||||
/** The line number of the last character of this Token. */
|
||||
public int endLine;
|
||||
/** The column number of the last character of this Token. */
|
||||
public int endColumn;
|
||||
|
||||
/**
|
||||
* The string image of the token.
|
||||
*/
|
||||
public String image;
|
||||
|
||||
/**
|
||||
* A reference to the next regular (non-special) token from the input
|
||||
* stream. If this is the last token from the input stream, or if the
|
||||
* token manager has not read tokens beyond this one, this field is
|
||||
* set to null. This is true only if this token is also a regular
|
||||
* token. Otherwise, see below for a description of the contents of
|
||||
* this field.
|
||||
*/
|
||||
public Token next;
|
||||
|
||||
/**
|
||||
* This field is used to access special tokens that occur prior to this
|
||||
* token, but after the immediately preceding regular (non-special) token.
|
||||
* If there are no such special tokens, this field is set to null.
|
||||
* When there are more than one such special token, this field refers
|
||||
* to the last of these special tokens, which in turn refers to the next
|
||||
* previous special token through its specialToken field, and so on
|
||||
* until the first special token (whose specialToken field is null).
|
||||
* The next fields of special tokens refer to other special tokens that
|
||||
* immediately follow it (without an intervening regular token). If there
|
||||
* is no such token, this field is null.
|
||||
*/
|
||||
public Token specialToken;
|
||||
|
||||
/**
|
||||
* An optional attribute value of the Token.
|
||||
* Tokens which are not used as syntactic sugar will often contain
|
||||
* meaningful values that will be used later on by the compiler or
|
||||
* interpreter. This attribute value is often different from the image.
|
||||
* Any subclass of Token that actually wants to return a non-null value can
|
||||
* override this method as appropriate.
|
||||
*/
|
||||
public Object getValue() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* No-argument constructor
|
||||
*/
|
||||
public Token() {}
|
||||
|
||||
/**
|
||||
* Constructs a new token for the specified Image.
|
||||
*/
|
||||
public Token(int kind)
|
||||
{
|
||||
this(kind, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new token for the specified Image and Kind.
|
||||
*/
|
||||
public Token(int kind, String image)
|
||||
{
|
||||
this.kind = kind;
|
||||
this.image = image;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the image.
|
||||
*/
|
||||
public String toString()
|
||||
{
|
||||
return image;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new Token object, by default. However, if you want, you
|
||||
* can create and return subclass objects based on the value of ofKind.
|
||||
* Simply add the cases to the switch for all those special cases.
|
||||
* For example, if you have a subclass of Token called IDToken that
|
||||
* you want to create if ofKind is ID, simply add something like :
|
||||
*
|
||||
* case MyParserConstants.ID : return new IDToken(ofKind, image);
|
||||
*
|
||||
* to the following switch statement. Then you can cast matchedToken
|
||||
* variable to the appropriate type and use sit in your lexical actions.
|
||||
*/
|
||||
public static Token newToken(int ofKind, String image)
|
||||
{
|
||||
switch(ofKind)
|
||||
{
|
||||
default : return new Token(ofKind, image);
|
||||
}
|
||||
}
|
||||
|
||||
public static Token newToken(int ofKind)
|
||||
{
|
||||
return newToken(ofKind, null);
|
||||
}
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=0dc5808f2ab8aac8775ea9175fa2cb51 (do not edit this line) */
|
|
@ -1,141 +0,0 @@
|
|||
/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 4.1 */
|
||||
/* JavaCCOptions: */
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
/** Token Manager Error. */
|
||||
@SuppressWarnings("serial")
|
||||
public class TokenMgrError extends Error
|
||||
{
|
||||
|
||||
/*
|
||||
* Ordinals for various reasons why an Error of this type can be thrown.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Lexical error occurred.
|
||||
*/
|
||||
static final int LEXICAL_ERROR = 0;
|
||||
|
||||
/**
|
||||
* An attempt was made to create a second instance of a static token manager.
|
||||
*/
|
||||
static final int STATIC_LEXER_ERROR = 1;
|
||||
|
||||
/**
|
||||
* Tried to change to an invalid lexical state.
|
||||
*/
|
||||
static final int INVALID_LEXICAL_STATE = 2;
|
||||
|
||||
/**
|
||||
* Detected (and bailed out of) an infinite loop in the token manager.
|
||||
*/
|
||||
static final int LOOP_DETECTED = 3;
|
||||
|
||||
/**
|
||||
* Indicates the reason why the exception is thrown. It will have
|
||||
* one of the above 4 values.
|
||||
*/
|
||||
int errorCode;
|
||||
|
||||
/**
|
||||
* Replaces unprintable characters by their escaped (or unicode escaped)
|
||||
* equivalents in the given string
|
||||
*/
|
||||
protected static final String addEscapes(String str) {
|
||||
StringBuffer retval = new StringBuffer();
|
||||
char ch;
|
||||
for (int i = 0; i < str.length(); i++) {
|
||||
switch (str.charAt(i))
|
||||
{
|
||||
case 0 :
|
||||
continue;
|
||||
case '\b':
|
||||
retval.append("\\b");
|
||||
continue;
|
||||
case '\t':
|
||||
retval.append("\\t");
|
||||
continue;
|
||||
case '\n':
|
||||
retval.append("\\n");
|
||||
continue;
|
||||
case '\f':
|
||||
retval.append("\\f");
|
||||
continue;
|
||||
case '\r':
|
||||
retval.append("\\r");
|
||||
continue;
|
||||
case '\"':
|
||||
retval.append("\\\"");
|
||||
continue;
|
||||
case '\'':
|
||||
retval.append("\\\'");
|
||||
continue;
|
||||
case '\\':
|
||||
retval.append("\\\\");
|
||||
continue;
|
||||
default:
|
||||
if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
|
||||
String s = "0000" + Integer.toString(ch, 16);
|
||||
retval.append("\\u" + s.substring(s.length() - 4, s.length()));
|
||||
} else {
|
||||
retval.append(ch);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return retval.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a detailed message for the Error when it is thrown by the
|
||||
* token manager to indicate a lexical error.
|
||||
* Parameters :
|
||||
* EOFSeen : indicates if EOF caused the lexical error
|
||||
* curLexState : lexical state in which this error occurred
|
||||
* errorLine : line number when the error occurred
|
||||
* errorColumn : column number when the error occurred
|
||||
* errorAfter : prefix that was seen before this error occurred
|
||||
* curchar : the offending character
|
||||
* Note: You can customize the lexical error message by modifying this method.
|
||||
*/
|
||||
protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) {
|
||||
return("Lexical error at line " +
|
||||
errorLine + ", column " +
|
||||
errorColumn + ". Encountered: " +
|
||||
(EOFSeen ? "<EOF> " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") +
|
||||
"after : \"" + addEscapes(errorAfter) + "\"");
|
||||
}
|
||||
|
||||
/**
|
||||
* You can also modify the body of this method to customize your error messages.
|
||||
* For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not
|
||||
* of end-users concern, so you can return something like :
|
||||
*
|
||||
* "Internal Error : Please file a bug report .... "
|
||||
*
|
||||
* from this method for such cases in the release version of your parser.
|
||||
*/
|
||||
public String getMessage() {
|
||||
return super.getMessage();
|
||||
}
|
||||
|
||||
/*
|
||||
* Constructors of various flavors follow.
|
||||
*/
|
||||
|
||||
/** No arg constructor. */
|
||||
public TokenMgrError() {
|
||||
}
|
||||
|
||||
/** Constructor with message and reason. */
|
||||
public TokenMgrError(String message, int reason) {
|
||||
super(message);
|
||||
errorCode = reason;
|
||||
}
|
||||
|
||||
/** Full Constructor. */
|
||||
public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) {
|
||||
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
|
||||
}
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=257b82f2650841e86289a309cb3dae76 (do not edit this line) */
|
|
@ -16,7 +16,24 @@
|
|||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
</head>
|
||||
<body>
|
||||
QueryParser designed to handle operator precedence in a more sensible fashion than the default QueryParser.
|
||||
|
||||
This package contains the Precedence Query Parser Implementation
|
||||
|
||||
<h2>Lucene Precedence Query Parser</h2>
|
||||
|
||||
<p>
|
||||
The Precedence Query Parser extends the Standard Query Parser and enables
|
||||
the boolean precedence. So, the query <a AND b OR c AND d> is parsed to
|
||||
<(+a +b) (+c +d)> instead of <+a +b +c +d>.
|
||||
</p>
|
||||
<p>
|
||||
Check {@link org.apache.lucene.queryParser.standard.StandardQueryParser} for more details about the
|
||||
supported syntax and query parser functionalities.
|
||||
</p>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
@ -0,0 +1,138 @@
|
|||
package org.apache.lucene.queryParser.precedence.processors;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.queryParser.core.QueryNodeException;
|
||||
import org.apache.lucene.queryParser.core.nodes.AndQueryNode;
|
||||
import org.apache.lucene.queryParser.core.nodes.BooleanQueryNode;
|
||||
import org.apache.lucene.queryParser.core.nodes.ModifierQueryNode;
|
||||
import org.apache.lucene.queryParser.core.nodes.OrQueryNode;
|
||||
import org.apache.lucene.queryParser.core.nodes.QueryNode;
|
||||
import org.apache.lucene.queryParser.core.nodes.ModifierQueryNode.Modifier;
|
||||
import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl;
|
||||
import org.apache.lucene.queryParser.precedence.PrecedenceQueryParser;
|
||||
import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute;
|
||||
import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute.Operator;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* This processor is used to apply the correct {@link ModifierQueryNode} to {@link BooleanQueryNode}s children.
|
||||
* </p>
|
||||
* <p>
|
||||
* It walks through the query node tree looking for {@link BooleanQueryNode}s. If an {@link AndQueryNode} is found,
|
||||
* every child, which is not a {@link ModifierQueryNode} or the {@link ModifierQueryNode}
|
||||
* is {@link Modifier#MOD_NONE}, becomes a {@link Modifier#MOD_REQ}. For any other
|
||||
* {@link BooleanQueryNode} which is not an {@link OrQueryNode}, it checks the default operator is {@link Operator#AND},
|
||||
* if it is, the same operation when an {@link AndQueryNode} is found is applied to it.
|
||||
* </p>
|
||||
*
|
||||
* @see DefaultOperatorAttribute
|
||||
* @see PrecedenceQueryParser#setDefaultOperator
|
||||
*/
|
||||
public class BooleanModifiersQueryNodeProcessor extends QueryNodeProcessorImpl {
|
||||
|
||||
private ArrayList<QueryNode> childrenBuffer = new ArrayList<QueryNode>();
|
||||
|
||||
private Boolean usingAnd = false;
|
||||
|
||||
public BooleanModifiersQueryNodeProcessor() {
|
||||
// empty constructor
|
||||
}
|
||||
|
||||
@Override
|
||||
public QueryNode process(QueryNode queryTree) throws QueryNodeException {
|
||||
|
||||
if (!getQueryConfigHandler().hasAttribute(DefaultOperatorAttribute.class)) {
|
||||
throw new IllegalArgumentException(
|
||||
"DefaultOperatorAttribute should be set on the QueryConfigHandler");
|
||||
}
|
||||
|
||||
this.usingAnd = Operator.AND == getQueryConfigHandler().getAttribute(
|
||||
DefaultOperatorAttribute.class).getOperator();
|
||||
|
||||
return super.process(queryTree);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException {
|
||||
|
||||
if (node instanceof AndQueryNode) {
|
||||
this.childrenBuffer.clear();
|
||||
List<QueryNode> children = node.getChildren();
|
||||
|
||||
for (QueryNode child : children) {
|
||||
this.childrenBuffer.add(applyModifier(child, Modifier.MOD_REQ));
|
||||
}
|
||||
|
||||
node.set(this.childrenBuffer);
|
||||
|
||||
} else if (this.usingAnd && node instanceof BooleanQueryNode
|
||||
&& !(node instanceof OrQueryNode)) {
|
||||
|
||||
this.childrenBuffer.clear();
|
||||
List<QueryNode> children = node.getChildren();
|
||||
|
||||
for (QueryNode child : children) {
|
||||
this.childrenBuffer.add(applyModifier(child, Modifier.MOD_REQ));
|
||||
}
|
||||
|
||||
node.set(this.childrenBuffer);
|
||||
|
||||
}
|
||||
|
||||
return node;
|
||||
|
||||
}
|
||||
|
||||
private QueryNode applyModifier(QueryNode node, Modifier mod) {
|
||||
|
||||
// check if modifier is not already defined and is default
|
||||
if (!(node instanceof ModifierQueryNode)) {
|
||||
return new ModifierQueryNode(node, mod);
|
||||
|
||||
} else {
|
||||
ModifierQueryNode modNode = (ModifierQueryNode) node;
|
||||
|
||||
if (modNode.getModifier() == Modifier.MOD_NONE) {
|
||||
return new ModifierQueryNode(modNode.getChild(), mod);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return node;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
protected QueryNode preProcessNode(QueryNode node) throws QueryNodeException {
|
||||
return node;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<QueryNode> setChildrenOrder(List<QueryNode> children)
|
||||
throws QueryNodeException {
|
||||
|
||||
return children;
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
package org.apache.lucene.queryParser.precedence.processors;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.queryParser.core.config.QueryConfigHandler;
|
||||
import org.apache.lucene.queryParser.precedence.PrecedenceQueryParser;
|
||||
import org.apache.lucene.queryParser.standard.processors.GroupQueryNodeProcessor;
|
||||
import org.apache.lucene.queryParser.standard.processors.StandardQueryNodeProcessorPipeline;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* This processor pipeline extends {@link StandardQueryNodeProcessorPipeline} and enables
|
||||
* boolean precedence on it.
|
||||
* </p>
|
||||
* <p>
|
||||
* EXPERT: the precedence is enabled by removing {@link GroupQueryNodeProcessor} from the
|
||||
* {@link StandardQueryNodeProcessorPipeline} and appending {@link BooleanModifiersQueryNodeProcessor}
|
||||
* to the pipeline.
|
||||
* </p>
|
||||
*
|
||||
* @see PrecedenceQueryParser
|
||||
* @see StandardQueryNodeProcessorPipeline
|
||||
*/
|
||||
public class PrecedenceQueryNodeProcessorPipeline extends StandardQueryNodeProcessorPipeline {
|
||||
|
||||
/**
|
||||
* @see StandardQueryNodeProcessorPipeline#StandardQueryNodeProcessorPipeline(QueryConfigHandler)
|
||||
*/
|
||||
public PrecedenceQueryNodeProcessorPipeline(QueryConfigHandler queryConfig) {
|
||||
super(queryConfig);
|
||||
|
||||
for (int i = 0 ; i < size() ; i++) {
|
||||
|
||||
if (get(i).getClass().equals(GroupQueryNodeProcessor.class)) {
|
||||
remove(i--);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
add(new BooleanModifiersQueryNodeProcessor());
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,47 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
This package contains the processors used by Precedence Query Parser
|
||||
|
||||
<h2>Lucene Precedence Query Parser Processors</h2>
|
||||
|
||||
<p>
|
||||
This package contains the 2 {@link org.apache.lucene.queryParser.core.processors.QueryNodeProcessor}s used by
|
||||
{@link org.apache.lucene.queryParser.precedence.PrecedenceQueryParser}.
|
||||
</p>
|
||||
<p>
|
||||
{@link org.apache.lucene.queryParser.precedence.processors.BooleanModifiersQueryNodeProcessor}: this processor
|
||||
is used to apply {@link org.apache.lucene.queryParser.core.nodes.ModifierQueryNode}s on
|
||||
{@link org.apache.lucene.queryParser.core.nodes.BooleanQueryNode} children according to the boolean type
|
||||
or the default operator.
|
||||
</p>
|
||||
<p>
|
||||
{@link org.apache.lucene.queryParser.precedence.processors.PrecedenceQueryNodeProcessorPipeline}: this
|
||||
processor pipeline is used by {@link org.apache.lucene.queryParser.precedence.PrecedenceQueryParser}. It extends
|
||||
{@link org.apache.lucene.queryParser.standard.processors.StandardQueryNodeProcessorPipeline} and rearrange
|
||||
the pipeline so the boolean precedence is processed correctly. Check {@link org.apache.lucene.queryParser.precedence.processors.PrecedenceQueryNodeProcessorPipeline}
|
||||
for more details.
|
||||
</p>
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -17,67 +17,82 @@ package org.apache.lucene.queryParser.precedence;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.text.DateFormat;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
import java.util.GregorianCalendar;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.DateField;
|
||||
import org.apache.lucene.document.DateTools;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.queryParser.TestQueryParser;
|
||||
import org.apache.lucene.queryParser.core.QueryNodeException;
|
||||
import org.apache.lucene.queryParser.core.QueryNodeParseException;
|
||||
import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute.Operator;
|
||||
import org.apache.lucene.queryParser.standard.parser.ParseException;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.RegexpQuery;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.text.DateFormat;
|
||||
import java.util.Calendar;
|
||||
import java.util.GregorianCalendar;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* This test case tests {@link PrecedenceQueryParser}.
|
||||
* </p>
|
||||
* <p>
|
||||
* It contains all tests from {@link TestQueryParser} with some adjusted to
|
||||
* fit the precedence requirement, plus some precedence test cases.
|
||||
* </p>
|
||||
*
|
||||
* @see TestQueryParser
|
||||
*/
|
||||
public class TestPrecedenceQueryParser extends LuceneTestCase {
|
||||
|
||||
public static Analyzer qpAnalyzer = new QPTestAnalyzer();
|
||||
|
||||
public static final class QPTestFilter extends TokenFilter {
|
||||
/**
|
||||
* Filter which discards the token 'stop' and which expands the
|
||||
* token 'phrase' into 'phrase1 phrase2'
|
||||
* Filter which discards the token 'stop' and which expands the token
|
||||
* 'phrase' into 'phrase1 phrase2'
|
||||
*/
|
||||
public QPTestFilter(TokenStream in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
boolean inPhrase = false;
|
||||
|
||||
int savedStart = 0, savedEnd = 0;
|
||||
|
||||
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
clearAttributes();
|
||||
if (inPhrase) {
|
||||
inPhrase = false;
|
||||
termAtt.setEmpty().append("phrase2");
|
||||
offsetAtt.setOffset(savedStart, savedEnd);
|
||||
return true;
|
||||
} else
|
||||
while(input.incrementToken())
|
||||
while (input.incrementToken())
|
||||
if (termAtt.toString().equals("phrase")) {
|
||||
inPhrase = true;
|
||||
savedStart = offsetAtt.startOffset();
|
||||
|
@ -94,31 +109,13 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
|||
public static final class QPTestAnalyzer extends Analyzer {
|
||||
|
||||
/** Filters MockTokenizer with StopFilter. */
|
||||
@Override
|
||||
public final TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
|
||||
}
|
||||
}
|
||||
|
||||
public static class QPTestParser extends PrecedenceQueryParser {
|
||||
public QPTestParser(String f, Analyzer a) {
|
||||
super(f, a);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException {
|
||||
throw new ParseException("Fuzzy queries not allowed");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query getWildcardQuery(String field, String termStr) throws ParseException {
|
||||
throw new ParseException("Wildcard queries not allowed");
|
||||
}
|
||||
}
|
||||
|
||||
private int originalMaxClauses;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
originalMaxClauses = BooleanQuery.getMaxClauseCount();
|
||||
|
@ -127,40 +124,31 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
|||
public PrecedenceQueryParser getParser(Analyzer a) throws Exception {
|
||||
if (a == null)
|
||||
a = new MockAnalyzer(MockTokenizer.SIMPLE, true);
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser("field", a);
|
||||
qp.setDefaultOperator(PrecedenceQueryParser.OR_OPERATOR);
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser();
|
||||
qp.setAnalyzer(a);
|
||||
qp.setDefaultOperator(Operator.OR);
|
||||
return qp;
|
||||
}
|
||||
|
||||
public Query getQuery(String query, Analyzer a) throws Exception {
|
||||
return getParser(a).parse(query);
|
||||
return getParser(a).parse(query, "field");
|
||||
}
|
||||
|
||||
public void assertQueryEquals(String query, Analyzer a, String result)
|
||||
throws Exception {
|
||||
throws Exception {
|
||||
Query q = getQuery(query, a);
|
||||
String s = q.toString("field");
|
||||
if (!s.equals(result)) {
|
||||
fail("Query /" + query + "/ yielded /" + s
|
||||
+ "/, expecting /" + result + "/");
|
||||
fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result
|
||||
+ "/");
|
||||
}
|
||||
}
|
||||
|
||||
public void assertWildcardQueryEquals(String query, boolean lowercase, String result)
|
||||
throws Exception {
|
||||
public void assertWildcardQueryEquals(String query, boolean lowercase,
|
||||
String result) throws Exception {
|
||||
PrecedenceQueryParser qp = getParser(null);
|
||||
qp.setLowercaseExpandedTerms(lowercase);
|
||||
Query q = qp.parse(query);
|
||||
String s = q.toString("field");
|
||||
if (!s.equals(result)) {
|
||||
fail("WildcardQuery /" + query + "/ yielded /" + s
|
||||
+ "/, expecting /" + result + "/");
|
||||
}
|
||||
}
|
||||
|
||||
public void assertWildcardQueryEquals(String query, String result) throws Exception {
|
||||
PrecedenceQueryParser qp = getParser(null);
|
||||
Query q = qp.parse(query);
|
||||
Query q = qp.parse(query, "field");
|
||||
String s = q.toString("field");
|
||||
if (!s.equals(result)) {
|
||||
fail("WildcardQuery /" + query + "/ yielded /" + s + "/, expecting /"
|
||||
|
@ -168,36 +156,41 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public Query getQueryDOA(String query, Analyzer a)
|
||||
throws Exception {
|
||||
if (a == null)
|
||||
a = new MockAnalyzer(MockTokenizer.SIMPLE, true);
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser("field", a);
|
||||
qp.setDefaultOperator(PrecedenceQueryParser.AND_OPERATOR);
|
||||
return qp.parse(query);
|
||||
}
|
||||
|
||||
public void assertQueryEqualsDOA(String query, Analyzer a, String result)
|
||||
throws Exception {
|
||||
Query q = getQueryDOA(query, a);
|
||||
public void assertWildcardQueryEquals(String query, String result)
|
||||
throws Exception {
|
||||
PrecedenceQueryParser qp = getParser(null);
|
||||
Query q = qp.parse(query, "field");
|
||||
String s = q.toString("field");
|
||||
if (!s.equals(result)) {
|
||||
fail("Query /" + query + "/ yielded /" + s
|
||||
+ "/, expecting /" + result + "/");
|
||||
fail("WildcardQuery /" + query + "/ yielded /" + s + "/, expecting /"
|
||||
+ result + "/");
|
||||
}
|
||||
}
|
||||
|
||||
// failing tests disabled since PrecedenceQueryParser
|
||||
// is currently unmaintained
|
||||
public void _testSimple() throws Exception {
|
||||
assertQueryEquals("", null, "");
|
||||
public Query getQueryDOA(String query, Analyzer a) throws Exception {
|
||||
if (a == null)
|
||||
a = new MockAnalyzer(MockTokenizer.SIMPLE, true);
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser();
|
||||
qp.setAnalyzer(a);
|
||||
qp.setDefaultOperator(Operator.AND);
|
||||
return qp.parse(query, "field");
|
||||
}
|
||||
|
||||
public void assertQueryEqualsDOA(String query, Analyzer a, String result)
|
||||
throws Exception {
|
||||
Query q = getQueryDOA(query, a);
|
||||
String s = q.toString("field");
|
||||
if (!s.equals(result)) {
|
||||
fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result
|
||||
+ "/");
|
||||
}
|
||||
}
|
||||
|
||||
public void testSimple() throws Exception {
|
||||
assertQueryEquals("term term term", null, "term term term");
|
||||
assertQueryEquals("türm term term", null, "türm term term");
|
||||
assertQueryEquals("ümlaut", null, "ümlaut");
|
||||
|
||||
assertQueryEquals("+a", null, "+a");
|
||||
assertQueryEquals("-a", null, "-a");
|
||||
assertQueryEquals("a AND b", null, "+a +b");
|
||||
assertQueryEquals("(a AND b)", null, "+a +b");
|
||||
assertQueryEquals("c OR (a AND b)", null, "c (+a +b)");
|
||||
|
@ -212,9 +205,9 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
|||
|
||||
assertQueryEquals("+term -term term", null, "+term -term term");
|
||||
assertQueryEquals("foo:term AND field:anotherTerm", null,
|
||||
"+foo:term +anotherterm");
|
||||
"+foo:term +anotherterm");
|
||||
assertQueryEquals("term AND \"phrase phrase\"", null,
|
||||
"+term +\"phrase phrase\"");
|
||||
"+term +\"phrase phrase\"");
|
||||
assertQueryEquals("\"hello there\"", null, "\"hello there\"");
|
||||
assertTrue(getQuery("a AND b", null) instanceof BooleanQuery);
|
||||
assertTrue(getQuery("hello", null) instanceof TermQuery);
|
||||
|
@ -229,25 +222,25 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
|||
assertQueryEquals("\"term germ\"^2", null, "\"term germ\"^2.0");
|
||||
|
||||
assertQueryEquals("(foo OR bar) AND (baz OR boo)", null,
|
||||
"+(foo bar) +(baz boo)");
|
||||
assertQueryEquals("((a OR b) AND NOT c) OR d", null,
|
||||
"(+(a b) -c) d");
|
||||
"+(foo bar) +(baz boo)");
|
||||
assertQueryEquals("((a OR b) AND NOT c) OR d", null, "(+(a b) -c) d");
|
||||
assertQueryEquals("+(apple \"steve jobs\") -(foo bar baz)", null,
|
||||
"+(apple \"steve jobs\") -(foo bar baz)");
|
||||
"+(apple \"steve jobs\") -(foo bar baz)");
|
||||
assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null,
|
||||
"+(title:dog title:cat) -author:\"bob dole\"");
|
||||
"+(title:dog title:cat) -author:\"bob dole\"");
|
||||
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser("field", new MockAnalyzer());
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser();
|
||||
qp.setAnalyzer(new MockAnalyzer());
|
||||
// make sure OR is the default:
|
||||
assertEquals(PrecedenceQueryParser.OR_OPERATOR, qp.getDefaultOperator());
|
||||
qp.setDefaultOperator(PrecedenceQueryParser.AND_OPERATOR);
|
||||
assertEquals(PrecedenceQueryParser.AND_OPERATOR, qp.getDefaultOperator());
|
||||
qp.setDefaultOperator(PrecedenceQueryParser.OR_OPERATOR);
|
||||
assertEquals(PrecedenceQueryParser.OR_OPERATOR, qp.getDefaultOperator());
|
||||
assertEquals(Operator.OR, qp.getDefaultOperator());
|
||||
qp.setDefaultOperator(Operator.AND);
|
||||
assertEquals(Operator.AND, qp.getDefaultOperator());
|
||||
qp.setDefaultOperator(Operator.OR);
|
||||
assertEquals(Operator.OR, qp.getDefaultOperator());
|
||||
|
||||
assertQueryEquals("a OR !b", null, "a (-b)");
|
||||
assertQueryEquals("a OR ! b", null, "a (-b)");
|
||||
assertQueryEquals("a OR -b", null, "a (-b)");
|
||||
assertQueryEquals("a OR !b", null, "a -b");
|
||||
assertQueryEquals("a OR ! b", null, "a -b");
|
||||
assertQueryEquals("a OR -b", null, "a -b");
|
||||
}
|
||||
|
||||
public void testPunct() throws Exception {
|
||||
|
@ -266,110 +259,24 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testNumber() throws Exception {
|
||||
// The numbers go away because SimpleAnalzyer ignores them
|
||||
// The numbers go away because SimpleAnalzyer ignores them
|
||||
assertQueryEquals("3", null, "");
|
||||
assertQueryEquals("term 1.0 1 2", null, "term");
|
||||
assertQueryEquals("term term1 term2", null, "term term term");
|
||||
|
||||
Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, true);
|
||||
Analyzer a = new MockAnalyzer();
|
||||
assertQueryEquals("3", a, "3");
|
||||
assertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2");
|
||||
assertQueryEquals("term term1 term2", a, "term term1 term2");
|
||||
}
|
||||
|
||||
//individual CJK chars as terms, like StandardAnalyzer
|
||||
private class SimpleCJKTokenizer extends Tokenizer {
|
||||
private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
public SimpleCJKTokenizer(Reader input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
int ch = input.read();
|
||||
if (ch < 0)
|
||||
return false;
|
||||
clearAttributes();
|
||||
termAtt.setEmpty().append((char) ch);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
private class SimpleCJKAnalyzer extends Analyzer {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new SimpleCJKTokenizer(reader);
|
||||
}
|
||||
}
|
||||
|
||||
public void testCJKTerm() throws Exception {
|
||||
// individual CJK chars as terms
|
||||
SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
|
||||
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
expected.add(new TermQuery(new Term("field", "中")), BooleanClause.Occur.SHOULD);
|
||||
expected.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD);
|
||||
|
||||
assertEquals(expected, getQuery("中国", analyzer));
|
||||
}
|
||||
|
||||
public void testCJKBoostedTerm() throws Exception {
|
||||
// individual CJK chars as terms
|
||||
SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
|
||||
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
expected.setBoost(0.5f);
|
||||
expected.add(new TermQuery(new Term("field", "中")), BooleanClause.Occur.SHOULD);
|
||||
expected.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD);
|
||||
|
||||
assertEquals(expected, getQuery("中国^0.5", analyzer));
|
||||
}
|
||||
|
||||
public void testCJKPhrase() throws Exception {
|
||||
// individual CJK chars as terms
|
||||
SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
|
||||
|
||||
PhraseQuery expected = new PhraseQuery();
|
||||
expected.add(new Term("field", "中"));
|
||||
expected.add(new Term("field", "国"));
|
||||
|
||||
assertEquals(expected, getQuery("\"中国\"", analyzer));
|
||||
}
|
||||
|
||||
public void testCJKBoostedPhrase() throws Exception {
|
||||
// individual CJK chars as terms
|
||||
SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
|
||||
|
||||
PhraseQuery expected = new PhraseQuery();
|
||||
expected.setBoost(0.5f);
|
||||
expected.add(new Term("field", "中"));
|
||||
expected.add(new Term("field", "国"));
|
||||
|
||||
assertEquals(expected, getQuery("\"中国\"^0.5", analyzer));
|
||||
}
|
||||
|
||||
public void testCJKSloppyPhrase() throws Exception {
|
||||
// individual CJK chars as terms
|
||||
SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
|
||||
|
||||
PhraseQuery expected = new PhraseQuery();
|
||||
expected.setSlop(3);
|
||||
expected.add(new Term("field", "中"));
|
||||
expected.add(new Term("field", "国"));
|
||||
|
||||
assertEquals(expected, getQuery("\"中国\"~3", analyzer));
|
||||
}
|
||||
|
||||
// failing tests disabled since PrecedenceQueryParser
|
||||
// is currently unmaintained
|
||||
public void _testWildcard() throws Exception {
|
||||
public void testWildcard() throws Exception {
|
||||
assertQueryEquals("term*", null, "term*");
|
||||
assertQueryEquals("term*^2", null, "term*^2.0");
|
||||
assertQueryEquals("term~", null, "term~0.5");
|
||||
assertQueryEquals("term~", null, "term~2.0");
|
||||
assertQueryEquals("term~0.7", null, "term~0.7");
|
||||
assertQueryEquals("term~^2", null, "term^2.0~0.5");
|
||||
assertQueryEquals("term^2~", null, "term^2.0~0.5");
|
||||
assertQueryEquals("term~^3", null, "term~2.0^3.0");
|
||||
assertQueryEquals("term^3~", null, "term~2.0^3.0");
|
||||
assertQueryEquals("term*germ", null, "term*germ");
|
||||
assertQueryEquals("term*germ^3", null, "term*germ^3.0");
|
||||
|
||||
|
@ -377,24 +284,25 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
|||
assertTrue(getQuery("term*^2", null) instanceof PrefixQuery);
|
||||
assertTrue(getQuery("term~", null) instanceof FuzzyQuery);
|
||||
assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery);
|
||||
FuzzyQuery fq = (FuzzyQuery)getQuery("term~0.7", null);
|
||||
FuzzyQuery fq = (FuzzyQuery) getQuery("term~0.7", null);
|
||||
assertEquals(0.7f, fq.getMinSimilarity(), 0.1f);
|
||||
assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
|
||||
fq = (FuzzyQuery)getQuery("term~", null);
|
||||
assertEquals(0.5f, fq.getMinSimilarity(), 0.1f);
|
||||
fq = (FuzzyQuery) getQuery("term~", null);
|
||||
assertEquals(2.0f, fq.getMinSimilarity(), 0.1f);
|
||||
assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
|
||||
try {
|
||||
getQuery("term~1.1", null); // value > 1, throws exception
|
||||
getQuery("term~1.1", null); // value > 1, throws exception
|
||||
fail();
|
||||
} catch(ParseException pe) {
|
||||
} catch (ParseException pe) {
|
||||
// expected exception
|
||||
}
|
||||
assertTrue(getQuery("term*germ", null) instanceof WildcardQuery);
|
||||
|
||||
/* Tests to see that wild card terms are (or are not) properly
|
||||
* lower-cased with propery parser configuration
|
||||
*/
|
||||
// First prefix queries:
|
||||
/*
|
||||
* Tests to see that wild card terms are (or are not) properly lower-cased
|
||||
* with propery parser configuration
|
||||
*/
|
||||
// First prefix queries:
|
||||
// by default, convert to lowercase:
|
||||
assertWildcardQueryEquals("Term*", true, "term*");
|
||||
// explicitly set lowercase:
|
||||
|
@ -405,7 +313,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
|||
assertWildcardQueryEquals("term*", false, "term*");
|
||||
assertWildcardQueryEquals("Term*", false, "Term*");
|
||||
assertWildcardQueryEquals("TERM*", false, "TERM*");
|
||||
// Then 'full' wildcard queries:
|
||||
// Then 'full' wildcard queries:
|
||||
// by default, convert to lowercase:
|
||||
assertWildcardQueryEquals("Te?m", "te?m");
|
||||
// explicitly set lowercase:
|
||||
|
@ -418,11 +326,11 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
|||
assertWildcardQueryEquals("Te?m", false, "Te?m");
|
||||
assertWildcardQueryEquals("TE?M", false, "TE?M");
|
||||
assertWildcardQueryEquals("Te?m*gerM", false, "Te?m*gerM");
|
||||
// Fuzzy queries:
|
||||
assertWildcardQueryEquals("Term~", "term~0.5");
|
||||
assertWildcardQueryEquals("Term~", true, "term~0.5");
|
||||
assertWildcardQueryEquals("Term~", false, "Term~0.5");
|
||||
// Range queries:
|
||||
// Fuzzy queries:
|
||||
assertWildcardQueryEquals("Term~", "term~2.0");
|
||||
assertWildcardQueryEquals("Term~", true, "term~2.0");
|
||||
assertWildcardQueryEquals("Term~", false, "Term~2.0");
|
||||
// Range queries:
|
||||
assertWildcardQueryEquals("[A TO C]", "[a TO c]");
|
||||
assertWildcardQueryEquals("[A TO C]", true, "[a TO c]");
|
||||
assertWildcardQueryEquals("[A TO C]", false, "[A TO C]");
|
||||
|
@ -434,11 +342,11 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
|||
assertQueryEquals("term -stop term", qpAnalyzer, "term term");
|
||||
assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll");
|
||||
assertQueryEquals("term phrase term", qpAnalyzer,
|
||||
"term (phrase1 phrase2) term");
|
||||
"term (phrase1 phrase2) term");
|
||||
// note the parens in this next assertion differ from the original
|
||||
// QueryParser behavior
|
||||
assertQueryEquals("term AND NOT phrase term", qpAnalyzer,
|
||||
"(+term -(phrase1 phrase2)) term");
|
||||
"(+term -(phrase1 phrase2)) term");
|
||||
assertQueryEquals("stop", qpAnalyzer, "");
|
||||
assertQueryEquals("stop OR stop AND stop", qpAnalyzer, "");
|
||||
assertTrue(getQuery("term term term", qpAnalyzer) instanceof BooleanQuery);
|
||||
|
@ -455,7 +363,8 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
|||
assertQueryEquals("[ a TO z] OR bar", null, "[a TO z] bar");
|
||||
assertQueryEquals("[ a TO z] AND bar", null, "+[a TO z] +bar");
|
||||
assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}");
|
||||
assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})");
|
||||
assertQueryEquals("gack ( bar blar { a TO z}) ", null,
|
||||
"gack (bar blar {a TO z})");
|
||||
}
|
||||
|
||||
private String escapeDateString(String s) {
|
||||
|
@ -471,54 +380,107 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
|||
return DateTools.dateToString(df.parse(s), DateTools.Resolution.DAY);
|
||||
}
|
||||
|
||||
public String getLocalizedDate(int year, int month, int day) {
|
||||
private String getLocalizedDate(int year, int month, int day,
|
||||
boolean extendLastDate) {
|
||||
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
|
||||
Calendar calendar = new GregorianCalendar();
|
||||
calendar.clear();
|
||||
calendar.set(year, month, day);
|
||||
calendar.set(Calendar.HOUR_OF_DAY, 23);
|
||||
calendar.set(Calendar.MINUTE, 59);
|
||||
calendar.set(Calendar.SECOND, 59);
|
||||
calendar.set(Calendar.MILLISECOND, 999);
|
||||
if (extendLastDate) {
|
||||
calendar.set(Calendar.HOUR_OF_DAY, 23);
|
||||
calendar.set(Calendar.MINUTE, 59);
|
||||
calendar.set(Calendar.SECOND, 59);
|
||||
calendar.set(Calendar.MILLISECOND, 999);
|
||||
}
|
||||
return df.format(calendar.getTime());
|
||||
}
|
||||
|
||||
public void testDateRange() throws Exception {
|
||||
String startDate = getLocalizedDate(2002, 1, 1);
|
||||
String endDate = getLocalizedDate(2002, 1, 4);
|
||||
assertQueryEquals("[ " + escapeDateString(startDate) + " TO " + escapeDateString(endDate) + "]", null,
|
||||
"[" + getDate(startDate) + " TO " + getDate(endDate) + "]");
|
||||
assertQueryEquals("{ " + escapeDateString(startDate) + " " + escapeDateString(endDate) + " }", null,
|
||||
"{" + getDate(startDate) + " TO " + getDate(endDate) + "}");
|
||||
String startDate = getLocalizedDate(2002, 1, 1, false);
|
||||
String endDate = getLocalizedDate(2002, 1, 4, false);
|
||||
Calendar endDateExpected = new GregorianCalendar();
|
||||
endDateExpected.set(2002, 1, 4, 23, 59, 59);
|
||||
endDateExpected.set(Calendar.MILLISECOND, 999);
|
||||
final String defaultField = "default";
|
||||
final String monthField = "month";
|
||||
final String hourField = "hour";
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser(new MockAnalyzer());
|
||||
|
||||
// Don't set any date resolution and verify if DateField is used
|
||||
assertDateRangeQueryEquals(qp, defaultField, startDate, endDate,
|
||||
endDateExpected.getTime(), null);
|
||||
|
||||
Map<CharSequence, DateTools.Resolution> fieldMap = new HashMap<CharSequence,DateTools.Resolution>();
|
||||
// set a field specific date resolution
|
||||
fieldMap.put(monthField, DateTools.Resolution.MONTH);
|
||||
qp.setDateResolution(fieldMap);
|
||||
|
||||
// DateField should still be used for defaultField
|
||||
assertDateRangeQueryEquals(qp, defaultField, startDate, endDate,
|
||||
endDateExpected.getTime(), null);
|
||||
|
||||
// set default date resolution to MILLISECOND
|
||||
qp.setDateResolution(DateTools.Resolution.MILLISECOND);
|
||||
|
||||
// set second field specific date resolution
|
||||
fieldMap.put(hourField, DateTools.Resolution.HOUR);
|
||||
qp.setDateResolution(fieldMap);
|
||||
|
||||
// for this field no field specific date resolution has been set,
|
||||
// so verify if the default resolution is used
|
||||
assertDateRangeQueryEquals(qp, defaultField, startDate, endDate,
|
||||
endDateExpected.getTime(), DateTools.Resolution.MILLISECOND);
|
||||
|
||||
// verify if field specific date resolutions are used for these two fields
|
||||
assertDateRangeQueryEquals(qp, monthField, startDate, endDate,
|
||||
endDateExpected.getTime(), DateTools.Resolution.MONTH);
|
||||
|
||||
assertDateRangeQueryEquals(qp, hourField, startDate, endDate,
|
||||
endDateExpected.getTime(), DateTools.Resolution.HOUR);
|
||||
}
|
||||
|
||||
/** for testing DateTools support */
|
||||
private String getDate(String s, DateTools.Resolution resolution)
|
||||
throws Exception {
|
||||
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
|
||||
return getDate(df.parse(s), resolution);
|
||||
}
|
||||
|
||||
/** for testing DateTools support */
|
||||
private String getDate(Date d, DateTools.Resolution resolution)
|
||||
throws Exception {
|
||||
if (resolution == null) {
|
||||
return DateField.dateToString(d);
|
||||
} else {
|
||||
return DateTools.dateToString(d, resolution);
|
||||
}
|
||||
}
|
||||
|
||||
public void assertQueryEquals(PrecedenceQueryParser qp, String field, String query,
|
||||
String result) throws Exception {
|
||||
Query q = qp.parse(query, field);
|
||||
String s = q.toString(field);
|
||||
if (!s.equals(result)) {
|
||||
fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result
|
||||
+ "/");
|
||||
}
|
||||
}
|
||||
|
||||
public void assertDateRangeQueryEquals(PrecedenceQueryParser qp, String field,
|
||||
String startDate, String endDate, Date endDateInclusive,
|
||||
DateTools.Resolution resolution) throws Exception {
|
||||
assertQueryEquals(qp, field, field + ":[" + escapeDateString(startDate)
|
||||
+ " TO " + escapeDateString(endDate) + "]", "["
|
||||
+ getDate(startDate, resolution) + " TO "
|
||||
+ getDate(endDateInclusive, resolution) + "]");
|
||||
assertQueryEquals(qp, field, field + ":{" + escapeDateString(startDate)
|
||||
+ " TO " + escapeDateString(endDate) + "}", "{"
|
||||
+ getDate(startDate, resolution) + " TO "
|
||||
+ getDate(endDate, resolution) + "}");
|
||||
}
|
||||
|
||||
public void testEscaped() throws Exception {
|
||||
Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false);
|
||||
|
||||
/*assertQueryEquals("\\[brackets", a, "\\[brackets");
|
||||
assertQueryEquals("\\[brackets", null, "brackets");
|
||||
assertQueryEquals("\\\\", a, "\\\\");
|
||||
assertQueryEquals("\\+blah", a, "\\+blah");
|
||||
assertQueryEquals("\\(blah", a, "\\(blah");
|
||||
|
||||
assertQueryEquals("\\-blah", a, "\\-blah");
|
||||
assertQueryEquals("\\!blah", a, "\\!blah");
|
||||
assertQueryEquals("\\{blah", a, "\\{blah");
|
||||
assertQueryEquals("\\}blah", a, "\\}blah");
|
||||
assertQueryEquals("\\:blah", a, "\\:blah");
|
||||
assertQueryEquals("\\^blah", a, "\\^blah");
|
||||
assertQueryEquals("\\[blah", a, "\\[blah");
|
||||
assertQueryEquals("\\]blah", a, "\\]blah");
|
||||
assertQueryEquals("\\\"blah", a, "\\\"blah");
|
||||
assertQueryEquals("\\(blah", a, "\\(blah");
|
||||
assertQueryEquals("\\)blah", a, "\\)blah");
|
||||
assertQueryEquals("\\~blah", a, "\\~blah");
|
||||
assertQueryEquals("\\*blah", a, "\\*blah");
|
||||
assertQueryEquals("\\?blah", a, "\\?blah");
|
||||
//assertQueryEquals("foo \\&\\& bar", a, "foo \\&\\& bar");
|
||||
//assertQueryEquals("foo \\|| bar", a, "foo \\|| bar");
|
||||
//assertQueryEquals("foo \\AND bar", a, "foo \\AND bar");*/
|
||||
|
||||
assertQueryEquals("a\\-b:c", a, "a-b:c");
|
||||
assertQueryEquals("a\\+b:c", a, "a+b:c");
|
||||
assertQueryEquals("a\\:b:c", a, "a:b:c");
|
||||
|
@ -551,44 +513,29 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
|||
assertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]");
|
||||
}
|
||||
|
||||
public void testTabNewlineCarriageReturn()
|
||||
throws Exception {
|
||||
assertQueryEqualsDOA("+weltbank +worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
public void testTabNewlineCarriageReturn() throws Exception {
|
||||
assertQueryEqualsDOA("+weltbank +worlbank", null, "+weltbank +worlbank");
|
||||
|
||||
assertQueryEqualsDOA("+weltbank\n+worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \n+worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \n +worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("+weltbank\n+worlbank", null, "+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \n+worlbank", null, "+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \n +worlbank", null, "+weltbank +worlbank");
|
||||
|
||||
assertQueryEqualsDOA("+weltbank\r+worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \r+worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \r +worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("+weltbank\r+worlbank", null, "+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \r+worlbank", null, "+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \r +worlbank", null, "+weltbank +worlbank");
|
||||
|
||||
assertQueryEqualsDOA("+weltbank\r\n+worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \r\n+worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \r\n +worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("+weltbank\r\n+worlbank", null, "+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \r\n+worlbank", null, "+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \r\n +worlbank", null, "+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \r \n +worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
"+weltbank +worlbank");
|
||||
|
||||
assertQueryEqualsDOA("+weltbank\t+worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \t+worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \t +worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("+weltbank\t+worlbank", null, "+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \t+worlbank", null, "+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \t +worlbank", null, "+weltbank +worlbank");
|
||||
}
|
||||
|
||||
public void testSimpleDAO()
|
||||
throws Exception {
|
||||
public void testSimpleDAO() throws Exception {
|
||||
assertQueryEqualsDOA("term term term", null, "+term +term +term");
|
||||
assertQueryEqualsDOA("term +term term", null, "+term +term +term");
|
||||
assertQueryEqualsDOA("term term +term", null, "+term +term +term");
|
||||
|
@ -596,23 +543,25 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
|||
assertQueryEqualsDOA("-term term term", null, "-term +term +term");
|
||||
}
|
||||
|
||||
public void testBoost()
|
||||
throws Exception {
|
||||
public void testBoost() throws Exception {
|
||||
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on"));
|
||||
Analyzer oneStopAnalyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true);
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser("field", oneStopAnalyzer);
|
||||
Query q = qp.parse("on^1.0");
|
||||
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser();
|
||||
qp.setAnalyzer(oneStopAnalyzer);
|
||||
Query q = qp.parse("on^1.0", "field");
|
||||
assertNotNull(q);
|
||||
q = qp.parse("\"hello\"^2.0");
|
||||
q = qp.parse("\"hello\"^2.0", "field");
|
||||
assertNotNull(q);
|
||||
assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
|
||||
q = qp.parse("hello^2.0");
|
||||
q = qp.parse("hello^2.0", "field");
|
||||
assertNotNull(q);
|
||||
assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
|
||||
q = qp.parse("\"on\"^1.0");
|
||||
q = qp.parse("\"on\"^1.0", "field");
|
||||
assertNotNull(q);
|
||||
|
||||
q = getParser(new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)).parse("the^3");
|
||||
q = getParser(new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)).parse("the^3",
|
||||
"field");
|
||||
assertNotNull(q);
|
||||
}
|
||||
|
||||
|
@ -620,105 +569,75 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
|
|||
try {
|
||||
assertQueryEquals("\"some phrase", null, "abc");
|
||||
fail("ParseException expected, not thrown");
|
||||
} catch (ParseException expected) {
|
||||
} catch (QueryNodeParseException expected) {
|
||||
}
|
||||
}
|
||||
|
||||
public void testCustomQueryParserWildcard() {
|
||||
try {
|
||||
new QPTestParser("contents", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("a?t");
|
||||
} catch (ParseException expected) {
|
||||
return;
|
||||
}
|
||||
fail("Wildcard queries should not be allowed");
|
||||
}
|
||||
|
||||
public void testCustomQueryParserFuzzy() throws Exception {
|
||||
try {
|
||||
new QPTestParser("contents", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("xunit~");
|
||||
} catch (ParseException expected) {
|
||||
return;
|
||||
}
|
||||
fail("Fuzzy queries should not be allowed");
|
||||
}
|
||||
|
||||
public void testBooleanQuery() throws Exception {
|
||||
BooleanQuery.setMaxClauseCount(2);
|
||||
try {
|
||||
getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("one two three");
|
||||
getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("one two three", "field");
|
||||
fail("ParseException expected due to too many boolean clauses");
|
||||
} catch (ParseException expected) {
|
||||
} catch (QueryNodeException expected) {
|
||||
// too many boolean clauses, so ParseException is expected
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This test differs from the original QueryParser, showing how the
|
||||
* precedence issue has been corrected.
|
||||
* This test differs from the original QueryParser, showing how the precedence
|
||||
* issue has been corrected.
|
||||
*/
|
||||
// failing tests disabled since PrecedenceQueryParser
|
||||
// is currently unmaintained
|
||||
public void _testPrecedence() throws Exception {
|
||||
public void testPrecedence() throws Exception {
|
||||
PrecedenceQueryParser parser = getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
|
||||
Query query1 = parser.parse("A AND B OR C AND D");
|
||||
Query query2 = parser.parse("(A AND B) OR (C AND D)");
|
||||
Query query1 = parser.parse("A AND B OR C AND D", "field");
|
||||
Query query2 = parser.parse("(A AND B) OR (C AND D)", "field");
|
||||
assertEquals(query1, query2);
|
||||
|
||||
query1 = parser.parse("A OR B C");
|
||||
query2 = parser.parse("A B C");
|
||||
query1 = parser.parse("A OR B C", "field");
|
||||
query2 = parser.parse("(A B) C", "field");
|
||||
assertEquals(query1, query2);
|
||||
|
||||
query1 = parser.parse("A AND B C");
|
||||
query2 = parser.parse("(+A +B) C");
|
||||
query1 = parser.parse("A AND B C", "field");
|
||||
query2 = parser.parse("(+A +B) C", "field");
|
||||
assertEquals(query1, query2);
|
||||
|
||||
query1 = parser.parse("A AND NOT B");
|
||||
query2 = parser.parse("+A -B");
|
||||
query1 = parser.parse("A AND NOT B", "field");
|
||||
query2 = parser.parse("+A -B", "field");
|
||||
assertEquals(query1, query2);
|
||||
|
||||
query1 = parser.parse("A OR NOT B");
|
||||
query2 = parser.parse("A -B");
|
||||
query1 = parser.parse("A OR NOT B", "field");
|
||||
query2 = parser.parse("A -B", "field");
|
||||
assertEquals(query1, query2);
|
||||
|
||||
query1 = parser.parse("A OR NOT B AND C");
|
||||
query2 = parser.parse("A (-B +C)");
|
||||
query1 = parser.parse("A OR NOT B AND C", "field");
|
||||
query2 = parser.parse("A (-B +C)", "field");
|
||||
assertEquals(query1, query2);
|
||||
|
||||
parser.setDefaultOperator(Operator.AND);
|
||||
query1 = parser.parse("A AND B OR C AND D", "field");
|
||||
query2 = parser.parse("(A AND B) OR (C AND D)", "field");
|
||||
assertEquals(query1, query2);
|
||||
|
||||
query1 = parser.parse("A AND B C", "field");
|
||||
query2 = parser.parse("(A B) C", "field");
|
||||
assertEquals(query1, query2);
|
||||
|
||||
query1 = parser.parse("A AND B C", "field");
|
||||
query2 = parser.parse("(+A +B) C", "field");
|
||||
assertEquals(query1, query2);
|
||||
|
||||
query1 = parser.parse("A AND NOT B", "field");
|
||||
query2 = parser.parse("+A -B", "field");
|
||||
assertEquals(query1, query2);
|
||||
|
||||
query1 = parser.parse("A AND NOT B OR C", "field");
|
||||
query2 = parser.parse("(+A -B) OR C", "field");
|
||||
assertEquals(query1, query2);
|
||||
|
||||
}
|
||||
|
||||
public void testRegexps() throws Exception {
|
||||
PrecedenceQueryParser qp = getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
|
||||
RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]"));
|
||||
assertEquals(q, qp.parse("/[a-z][123]/"));
|
||||
qp.setLowercaseExpandedTerms(true);
|
||||
assertEquals(q, qp.parse("/[A-Z][123]/"));
|
||||
q.setBoost(0.5f);
|
||||
assertEquals(q, qp.parse("/[A-Z][123]/^0.5"));
|
||||
qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
assertTrue(qp.parse("/[A-Z][123]/^0.5") instanceof RegexpQuery);
|
||||
assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)qp.parse("/[A-Z][123]/^0.5")).getRewriteMethod());
|
||||
assertEquals(q, qp.parse("/[A-Z][123]/^0.5"));
|
||||
qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
|
||||
|
||||
Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]"));
|
||||
assertEquals(escaped, qp.parse("/[a-z]\\/[123]/"));
|
||||
Query escaped2 = new RegexpQuery(new Term("field", "[a-z]\\*[123]"));
|
||||
assertEquals(escaped2, qp.parse("/[a-z]\\*[123]/"));
|
||||
|
||||
BooleanQuery complex = new BooleanQuery();
|
||||
BooleanQuery inner = new BooleanQuery();
|
||||
inner.add(new RegexpQuery(new Term("field", "[a-z]\\/[123]")), Occur.MUST);
|
||||
inner.add(new TermQuery(new Term("path", "/etc/init.d/")), Occur.MUST);
|
||||
complex.add(inner, Occur.SHOULD);
|
||||
complex.add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), Occur.SHOULD);
|
||||
assertEquals(complex, qp.parse("/[a-z]\\/[123]/ AND path:/etc/init.d/ OR /etc\\/init\\[.\\]d/lucene/ "));
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
public void tearDown() {
|
||||
BooleanQuery.setMaxClauseCount(originalMaxClauses);
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue