mirror of https://github.com/apache/lucene.git
Add new PrecedenceQueryParser so others can try it out and discuss any issues with it. TestPrecedenceQueryParser is
a clone of TestQueryParser with two noted changes - one within testQPA and with the testPrecedence method. testPrecedence was added to TestQueryParser to show its awkward behavior (and ensure it doesn't change). Modified build to use Ant's <javacc> task, instead of launching directly with <java>, which has built-in dependency checking. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@156431 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
88a88a92f3
commit
5f67460ad2
83
build.xml
83
build.xml
|
@ -32,9 +32,6 @@
|
|||
<property name="demo.war.name" value="luceneweb"/>
|
||||
|
||||
<property name="javacc.home" location="."/>
|
||||
<property name="javacc.jar.dir" location="${javacc.home}/bin/lib"/>
|
||||
<property name="javacc.jar" location="${javacc.jar.dir}/javacc.jar"/>
|
||||
<property name="javacc.main.class" value="org.javacc.parser.Main"/>
|
||||
|
||||
<property name="jakarta.site2.home" location="../jakarta-site2"/>
|
||||
|
||||
|
@ -101,8 +98,8 @@
|
|||
|
||||
<available
|
||||
property="javacc.present"
|
||||
classname="${javacc.main.class}"
|
||||
classpath="${javacc.jar}"
|
||||
classname="org.javacc.parser.Main"
|
||||
classpath="${javacc.home}/bin/lib/javacc.jar"
|
||||
/>
|
||||
|
||||
<available
|
||||
|
@ -579,15 +576,50 @@
|
|||
<!-- ================================================================== -->
|
||||
<!-- Build the JavaCC files into the source tree -->
|
||||
<!-- ================================================================== -->
|
||||
<target name="javacc" depends="javacc-StandardAnalyzer,javacc-QueryParser,javacc-HTMLParser"/>
|
||||
<target name="jjdoc">
|
||||
<mkdir dir="${build.dir}/docs/grammars"/>
|
||||
<jjdoc target="src/java/org/apache/lucene/queryParser/QueryParser.jj"
|
||||
outputfile="${build.dir}/docs/grammars/QueryParser.html"
|
||||
javacchome="${javacc.home}"
|
||||
/>
|
||||
<jjdoc target="src/java/org/apache/lucene/queryParser/PrecedenceQueryParser.jj"
|
||||
outputfile="${build.dir}/docs/grammars/PrecedenceQueryParser.html"
|
||||
javacchome="${javacc.home}"
|
||||
/>
|
||||
<jjdoc target="src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj"
|
||||
outputfile="${build.dir}/docs/grammars/StandardTokenizer.html"
|
||||
javacchome="${javacc.home}"
|
||||
/>
|
||||
<jjdoc target="src/demo/org/apache/lucene/demo/html/HTMLParser.jj"
|
||||
outputfile="${build.dir}/docs/grammars/HTMLParser.html"
|
||||
javacchome="${javacc.home}"
|
||||
/>
|
||||
</target>
|
||||
|
||||
<target name="javacc" depends="javacc-StandardAnalyzer,javacc-QueryParser,javacc-PrecedenceQueryParser,javacc-HTMLParser"/>
|
||||
|
||||
<macrodef name="invoke-javacc">
|
||||
<attribute name="target"/>
|
||||
<attribute name="outputDir"/>
|
||||
<sequential>
|
||||
<javacc
|
||||
target="@{target}"
|
||||
outputDirectory="@{outputDir}"
|
||||
debugTokenManager="${javacc.debug.tokenmgr}"
|
||||
debugParser="${javacc.debug.parser}"
|
||||
debuglookahead="${javacc.debug.lookahead}"
|
||||
javacchome="${javacc.home}"
|
||||
/>
|
||||
</sequential>
|
||||
</macrodef>
|
||||
|
||||
<target name="javacc-StandardAnalyzer" depends="init,javacc-check" if="javacc.present">
|
||||
<!-- generate this in a build directory so we can exclude ParseException -->
|
||||
<mkdir dir="${build.dir}/gen/org/apache/lucene/analysis/standard"/>
|
||||
<antcall target="invoke-javacc">
|
||||
<param name="target" location="src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj"/>
|
||||
<param name="output.dir" location="${build.dir}/gen/org/apache/lucene/analysis/standard"/>
|
||||
</antcall>
|
||||
|
||||
<invoke-javacc target="src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj"
|
||||
outputDir="${build.dir}/gen/org/apache/lucene/analysis/standard"
|
||||
/>
|
||||
<copy todir="src/java/org/apache/lucene/analysis/standard">
|
||||
<fileset dir="${build.dir}/gen/org/apache/lucene/analysis/standard">
|
||||
<include name="*.java"/>
|
||||
|
@ -597,28 +629,21 @@
|
|||
</target>
|
||||
|
||||
<target name="javacc-QueryParser" depends="init,javacc-check" if="javacc.present">
|
||||
<antcall target="invoke-javacc">
|
||||
<param name="target" location="src/java/org/apache/lucene/queryParser/QueryParser.jj"/>
|
||||
<param name="output.dir" location="src/java/org/apache/lucene/queryParser"/>
|
||||
</antcall>
|
||||
<invoke-javacc target="src/java/org/apache/lucene/queryParser/QueryParser.jj"
|
||||
outputDir="src/java/org/apache/lucene/queryParser"
|
||||
/>
|
||||
</target>
|
||||
|
||||
<target name="javacc-PrecedenceQueryParser" depends="init,javacc-check" if="javacc.present">
|
||||
<invoke-javacc target="src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj"
|
||||
outputDir="src/java/org/apache/lucene/queryParser/precedence"
|
||||
/>
|
||||
</target>
|
||||
|
||||
<target name="javacc-HTMLParser" depends="init,javacc-check" if="javacc.present">
|
||||
<antcall target="invoke-javacc">
|
||||
<param name="target" location="src/demo/org/apache/lucene/demo/html/HTMLParser.jj"/>
|
||||
<param name="output.dir" location="src/demo/org/apache/lucene/demo/html"/>
|
||||
</antcall>
|
||||
</target>
|
||||
|
||||
<target name="invoke-javacc">
|
||||
<java classname="${javacc.main.class}" fork="true">
|
||||
<classpath path="${javacc.jar}"/>
|
||||
|
||||
<sysproperty key="install.root" file="${javacc.home}"/>
|
||||
|
||||
<arg value="-OUTPUT_DIRECTORY:${output.dir}"/>
|
||||
<arg value="${target}"/>
|
||||
</java>
|
||||
<invoke-javacc target="src/demo/org/apache/lucene/demo/html/HTMLParser.jj"
|
||||
outputDir="src/demo/org/apache/lucene/demo/html"
|
||||
/>
|
||||
</target>
|
||||
|
||||
<target name="default" depends="jar-core"/>
|
||||
|
|
|
@ -0,0 +1,110 @@
|
|||
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 3.0 */
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
/**
|
||||
* This interface describes a character stream that maintains line and
|
||||
* column number positions of the characters. It also has the capability
|
||||
* to backup the stream to some extent. An implementation of this
|
||||
* interface is used in the TokenManager implementation generated by
|
||||
* JavaCCParser.
|
||||
*
|
||||
* All the methods except backup can be implemented in any fashion. backup
|
||||
* needs to be implemented correctly for the correct operation of the lexer.
|
||||
* Rest of the methods are all used to get information like line number,
|
||||
* column number and the String that constitutes a token and are not used
|
||||
* by the lexer. Hence their implementation won't affect the generated lexer's
|
||||
* operation.
|
||||
*/
|
||||
|
||||
public interface CharStream {
|
||||
|
||||
/**
|
||||
* Returns the next character from the selected input. The method
|
||||
* of selecting the input is the responsibility of the class
|
||||
* implementing this interface. Can throw any java.io.IOException.
|
||||
*/
|
||||
char readChar() throws java.io.IOException;
|
||||
|
||||
/**
|
||||
* Returns the column position of the character last read.
|
||||
* @deprecated
|
||||
* @see #getEndColumn
|
||||
*/
|
||||
int getColumn();
|
||||
|
||||
/**
|
||||
* Returns the line number of the character last read.
|
||||
* @deprecated
|
||||
* @see #getEndLine
|
||||
*/
|
||||
int getLine();
|
||||
|
||||
/**
|
||||
* Returns the column number of the last character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
int getEndColumn();
|
||||
|
||||
/**
|
||||
* Returns the line number of the last character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
int getEndLine();
|
||||
|
||||
/**
|
||||
* Returns the column number of the first character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
int getBeginColumn();
|
||||
|
||||
/**
|
||||
* Returns the line number of the first character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
int getBeginLine();
|
||||
|
||||
/**
|
||||
* Backs up the input stream by amount steps. Lexer calls this method if it
|
||||
* had already read some characters, but could not use them to match a
|
||||
* (longer) token. So, they will be used again as the prefix of the next
|
||||
* token and it is the implemetation's responsibility to do this right.
|
||||
*/
|
||||
void backup(int amount);
|
||||
|
||||
/**
|
||||
* Returns the next character that marks the beginning of the next token.
|
||||
* All characters must remain in the buffer between two successive calls
|
||||
* to this method to implement backup correctly.
|
||||
*/
|
||||
char BeginToken() throws java.io.IOException;
|
||||
|
||||
/**
|
||||
* Returns a string made up of characters from the marked token beginning
|
||||
* to the current buffer position. Implementations have the choice of returning
|
||||
* anything that they want to. For example, for efficiency, one might decide
|
||||
* to just return null, which is a valid implementation.
|
||||
*/
|
||||
String GetImage();
|
||||
|
||||
/**
|
||||
* Returns an array of characters that make up the suffix of length 'len' for
|
||||
* the currently matched token. This is used to build up the matched string
|
||||
* for use in actions in the case of MORE. A simple and inefficient
|
||||
* implementation of this is as follows :
|
||||
*
|
||||
* {
|
||||
* String t = GetImage();
|
||||
* return t.substring(t.length() - len, t.length()).toCharArray();
|
||||
* }
|
||||
*/
|
||||
char[] GetSuffix(int len);
|
||||
|
||||
/**
|
||||
* The lexer calls this function to indicate that it is done with the stream
|
||||
* and hence implementations can free any resources held by this class.
|
||||
* Again, the body of this function can be just empty and it will not
|
||||
* affect the lexer's operation.
|
||||
*/
|
||||
void Done();
|
||||
|
||||
}
|
|
@ -0,0 +1,123 @@
|
|||
// FastCharStream.java
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.queryParser.*;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
|
||||
* this does not do line-number counting, but instead keeps track of the
|
||||
* character position of the token in the input, as required by Lucene's {@link
|
||||
* org.apache.lucene.analysis.Token} API. */
|
||||
public final class FastCharStream implements CharStream {
|
||||
char[] buffer = null;
|
||||
|
||||
int bufferLength = 0; // end of valid chars
|
||||
int bufferPosition = 0; // next char to read
|
||||
|
||||
int tokenStart = 0; // offset in buffer
|
||||
int bufferStart = 0; // position in file of buffer
|
||||
|
||||
Reader input; // source of chars
|
||||
|
||||
/** Constructs from a Reader. */
|
||||
public FastCharStream(Reader r) {
|
||||
input = r;
|
||||
}
|
||||
|
||||
public final char readChar() throws IOException {
|
||||
if (bufferPosition >= bufferLength)
|
||||
refill();
|
||||
return buffer[bufferPosition++];
|
||||
}
|
||||
|
||||
private final void refill() throws IOException {
|
||||
int newPosition = bufferLength - tokenStart;
|
||||
|
||||
if (tokenStart == 0) { // token won't fit in buffer
|
||||
if (buffer == null) { // first time: alloc buffer
|
||||
buffer = new char[2048];
|
||||
} else if (bufferLength == buffer.length) { // grow buffer
|
||||
char[] newBuffer = new char[buffer.length*2];
|
||||
System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
|
||||
buffer = newBuffer;
|
||||
}
|
||||
} else { // shift token to front
|
||||
System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
|
||||
}
|
||||
|
||||
bufferLength = newPosition; // update state
|
||||
bufferPosition = newPosition;
|
||||
bufferStart += tokenStart;
|
||||
tokenStart = 0;
|
||||
|
||||
int charsRead = // fill space in buffer
|
||||
input.read(buffer, newPosition, buffer.length-newPosition);
|
||||
if (charsRead == -1)
|
||||
throw new IOException("read past eof");
|
||||
else
|
||||
bufferLength += charsRead;
|
||||
}
|
||||
|
||||
public final char BeginToken() throws IOException {
|
||||
tokenStart = bufferPosition;
|
||||
return readChar();
|
||||
}
|
||||
|
||||
public final void backup(int amount) {
|
||||
bufferPosition -= amount;
|
||||
}
|
||||
|
||||
public final String GetImage() {
|
||||
return new String(buffer, tokenStart, bufferPosition - tokenStart);
|
||||
}
|
||||
|
||||
public final char[] GetSuffix(int len) {
|
||||
char[] value = new char[len];
|
||||
System.arraycopy(buffer, bufferPosition - len, value, 0, len);
|
||||
return value;
|
||||
}
|
||||
|
||||
public final void Done() {
|
||||
try {
|
||||
input.close();
|
||||
} catch (IOException e) {
|
||||
System.err.println("Caught: " + e + "; ignoring.");
|
||||
}
|
||||
}
|
||||
|
||||
public final int getColumn() {
|
||||
return bufferStart + bufferPosition;
|
||||
}
|
||||
public final int getLine() {
|
||||
return 1;
|
||||
}
|
||||
public final int getEndColumn() {
|
||||
return bufferStart + bufferPosition;
|
||||
}
|
||||
public final int getEndLine() {
|
||||
return 1;
|
||||
}
|
||||
public final int getBeginColumn() {
|
||||
return bufferStart + tokenStart;
|
||||
}
|
||||
public final int getBeginLine() {
|
||||
return 1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,192 @@
|
|||
/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 3.0 */
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
/**
|
||||
* This exception is thrown when parse errors are encountered.
|
||||
* You can explicitly create objects of this exception type by
|
||||
* calling the method generateParseException in the generated
|
||||
* parser.
|
||||
*
|
||||
* You can modify this class to customize your error reporting
|
||||
* mechanisms so long as you retain the public fields.
|
||||
*/
|
||||
public class ParseException extends Exception {
|
||||
|
||||
/**
|
||||
* This constructor is used by the method "generateParseException"
|
||||
* in the generated parser. Calling this constructor generates
|
||||
* a new object of this type with the fields "currentToken",
|
||||
* "expectedTokenSequences", and "tokenImage" set. The boolean
|
||||
* flag "specialConstructor" is also set to true to indicate that
|
||||
* this constructor was used to create this object.
|
||||
* This constructor calls its super class with the empty string
|
||||
* to force the "toString" method of parent class "Throwable" to
|
||||
* print the error message in the form:
|
||||
* ParseException: <result of getMessage>
|
||||
*/
|
||||
public ParseException(Token currentTokenVal,
|
||||
int[][] expectedTokenSequencesVal,
|
||||
String[] tokenImageVal
|
||||
)
|
||||
{
|
||||
super("");
|
||||
specialConstructor = true;
|
||||
currentToken = currentTokenVal;
|
||||
expectedTokenSequences = expectedTokenSequencesVal;
|
||||
tokenImage = tokenImageVal;
|
||||
}
|
||||
|
||||
/**
|
||||
* The following constructors are for use by you for whatever
|
||||
* purpose you can think of. Constructing the exception in this
|
||||
* manner makes the exception behave in the normal way - i.e., as
|
||||
* documented in the class "Throwable". The fields "errorToken",
|
||||
* "expectedTokenSequences", and "tokenImage" do not contain
|
||||
* relevant information. The JavaCC generated code does not use
|
||||
* these constructors.
|
||||
*/
|
||||
|
||||
public ParseException() {
|
||||
super();
|
||||
specialConstructor = false;
|
||||
}
|
||||
|
||||
public ParseException(String message) {
|
||||
super(message);
|
||||
specialConstructor = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* This variable determines which constructor was used to create
|
||||
* this object and thereby affects the semantics of the
|
||||
* "getMessage" method (see below).
|
||||
*/
|
||||
protected boolean specialConstructor;
|
||||
|
||||
/**
|
||||
* This is the last token that has been consumed successfully. If
|
||||
* this object has been created due to a parse error, the token
|
||||
* followng this token will (therefore) be the first error token.
|
||||
*/
|
||||
public Token currentToken;
|
||||
|
||||
/**
|
||||
* Each entry in this array is an array of integers. Each array
|
||||
* of integers represents a sequence of tokens (by their ordinal
|
||||
* values) that is expected at this point of the parse.
|
||||
*/
|
||||
public int[][] expectedTokenSequences;
|
||||
|
||||
/**
|
||||
* This is a reference to the "tokenImage" array of the generated
|
||||
* parser within which the parse error occurred. This array is
|
||||
* defined in the generated ...Constants interface.
|
||||
*/
|
||||
public String[] tokenImage;
|
||||
|
||||
/**
|
||||
* This method has the standard behavior when this object has been
|
||||
* created using the standard constructors. Otherwise, it uses
|
||||
* "currentToken" and "expectedTokenSequences" to generate a parse
|
||||
* error message and returns it. If this object has been created
|
||||
* due to a parse error, and you do not catch it (it gets thrown
|
||||
* from the parser), then this method is called during the printing
|
||||
* of the final stack trace, and hence the correct error message
|
||||
* gets displayed.
|
||||
*/
|
||||
public String getMessage() {
|
||||
if (!specialConstructor) {
|
||||
return super.getMessage();
|
||||
}
|
||||
String expected = "";
|
||||
int maxSize = 0;
|
||||
for (int i = 0; i < expectedTokenSequences.length; i++) {
|
||||
if (maxSize < expectedTokenSequences[i].length) {
|
||||
maxSize = expectedTokenSequences[i].length;
|
||||
}
|
||||
for (int j = 0; j < expectedTokenSequences[i].length; j++) {
|
||||
expected += tokenImage[expectedTokenSequences[i][j]] + " ";
|
||||
}
|
||||
if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) {
|
||||
expected += "...";
|
||||
}
|
||||
expected += eol + " ";
|
||||
}
|
||||
String retval = "Encountered \"";
|
||||
Token tok = currentToken.next;
|
||||
for (int i = 0; i < maxSize; i++) {
|
||||
if (i != 0) retval += " ";
|
||||
if (tok.kind == 0) {
|
||||
retval += tokenImage[0];
|
||||
break;
|
||||
}
|
||||
retval += add_escapes(tok.image);
|
||||
tok = tok.next;
|
||||
}
|
||||
retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn;
|
||||
retval += "." + eol;
|
||||
if (expectedTokenSequences.length == 1) {
|
||||
retval += "Was expecting:" + eol + " ";
|
||||
} else {
|
||||
retval += "Was expecting one of:" + eol + " ";
|
||||
}
|
||||
retval += expected;
|
||||
return retval;
|
||||
}
|
||||
|
||||
/**
|
||||
* The end of line string for this machine.
|
||||
*/
|
||||
protected String eol = System.getProperty("line.separator", "\n");
|
||||
|
||||
/**
|
||||
* Used to convert raw characters to their escaped version
|
||||
* when these raw version cannot be used as part of an ASCII
|
||||
* string literal.
|
||||
*/
|
||||
protected String add_escapes(String str) {
|
||||
StringBuffer retval = new StringBuffer();
|
||||
char ch;
|
||||
for (int i = 0; i < str.length(); i++) {
|
||||
switch (str.charAt(i))
|
||||
{
|
||||
case 0 :
|
||||
continue;
|
||||
case '\b':
|
||||
retval.append("\\b");
|
||||
continue;
|
||||
case '\t':
|
||||
retval.append("\\t");
|
||||
continue;
|
||||
case '\n':
|
||||
retval.append("\\n");
|
||||
continue;
|
||||
case '\f':
|
||||
retval.append("\\f");
|
||||
continue;
|
||||
case '\r':
|
||||
retval.append("\\r");
|
||||
continue;
|
||||
case '\"':
|
||||
retval.append("\\\"");
|
||||
continue;
|
||||
case '\'':
|
||||
retval.append("\\\'");
|
||||
continue;
|
||||
case '\\':
|
||||
retval.append("\\\\");
|
||||
continue;
|
||||
default:
|
||||
if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
|
||||
String s = "0000" + Integer.toString(ch, 16);
|
||||
retval.append("\\u" + s.substring(s.length() - 4, s.length()));
|
||||
} else {
|
||||
retval.append(ch);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return retval.toString();
|
||||
}
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,910 @@
|
|||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
options {
|
||||
STATIC=false;
|
||||
JAVA_UNICODE_ESCAPE=true;
|
||||
USER_CHAR_STREAM=true;
|
||||
}
|
||||
|
||||
PARSER_BEGIN(PrecedenceQueryParser)
|
||||
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
import java.util.Vector;
|
||||
import java.io.*;
|
||||
import java.text.*;
|
||||
import java.util.*;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.util.Parameter;
|
||||
|
||||
/**
|
||||
* This class is generated by JavaCC. The only method that clients should need
|
||||
* to call is {@link #parse(String)} or {@link #parse(String, String, Analyzer)}.
|
||||
*
|
||||
* The syntax for query strings is as follows:
|
||||
* A Query is a series of clauses.
|
||||
* A clause may be prefixed by:
|
||||
* <ul>
|
||||
* <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating
|
||||
* that the clause is required or prohibited respectively; or
|
||||
* <li> a term followed by a colon, indicating the field to be searched.
|
||||
* This enables one to construct queries which search multiple fields.
|
||||
* </ul>
|
||||
*
|
||||
* A clause may be either:
|
||||
* <ul>
|
||||
* <li> a term, indicating all the documents that contain this term; or
|
||||
* <li> a nested query, enclosed in parentheses. Note that this may be used
|
||||
* with a <code>+</code>/<code>-</code> prefix to require any of a set of
|
||||
* terms.
|
||||
* </ul>
|
||||
*
|
||||
* Thus, in BNF, the query grammar is:
|
||||
* <pre>
|
||||
* Query ::= ( Clause )*
|
||||
* Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
|
||||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* Examples of appropriately formatted queries can be found in the <a
|
||||
* href="http://jakarta.apache.org/lucene/docs/queryparsersyntax.html">query syntax
|
||||
* documentation</a>.
|
||||
* </p>
|
||||
*
|
||||
* @author Brian Goetz
|
||||
* @author Peter Halacsy
|
||||
* @author Tatu Saloranta
|
||||
*/
|
||||
|
||||
public class PrecedenceQueryParser {
|
||||
|
||||
private static final int CONJ_NONE = 0;
|
||||
private static final int CONJ_AND = 1;
|
||||
private static final int CONJ_OR = 2;
|
||||
|
||||
private static final int MOD_NONE = 0;
|
||||
private static final int MOD_NOT = 10;
|
||||
private static final int MOD_REQ = 11;
|
||||
|
||||
// make it possible to call setDefaultOperator() without accessing
|
||||
// the nested class:
|
||||
public static final Operator AND_OPERATOR = Operator.AND;
|
||||
public static final Operator OR_OPERATOR = Operator.OR;
|
||||
|
||||
/** The actual operator that parser uses to combine query terms */
|
||||
private Operator operator = OR_OPERATOR;
|
||||
|
||||
boolean lowercaseExpandedTerms = true;
|
||||
|
||||
Analyzer analyzer;
|
||||
String field;
|
||||
int phraseSlop = 0;
|
||||
float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
|
||||
int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
|
||||
Locale locale = Locale.getDefault();
|
||||
|
||||
static final class Operator extends Parameter {
|
||||
private Operator(String name) {
|
||||
super(name);
|
||||
}
|
||||
static final Operator OR = new Operator("OR");
|
||||
static final Operator AND = new Operator("AND");
|
||||
}
|
||||
|
||||
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
|
||||
* @param query the query string to be parsed.
|
||||
* @param field the default field for query terms.
|
||||
* @param analyzer used to find terms in the query text.
|
||||
* @throws ParseException if the parsing fails
|
||||
*/
|
||||
static public Query parse(String query, String field, Analyzer analyzer)
|
||||
throws ParseException {
|
||||
PrecedenceQueryParser parser = new PrecedenceQueryParser(field, analyzer);
|
||||
return parser.parse(query);
|
||||
}
|
||||
|
||||
/** Constructs a query parser.
|
||||
* @param f the default field for query terms.
|
||||
* @param a used to find terms in the query text.
|
||||
*/
|
||||
public PrecedenceQueryParser(String f, Analyzer a) {
|
||||
this(new FastCharStream(new StringReader("")));
|
||||
analyzer = a;
|
||||
field = f;
|
||||
}
|
||||
|
||||
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
|
||||
* @param query the query string to be parsed.
|
||||
* @throws ParseException if the parsing fails
|
||||
*/
|
||||
public Query parse(String expression) throws ParseException {
|
||||
ReInit(new FastCharStream(new StringReader(expression)));
|
||||
try {
|
||||
Query query = Query(field);
|
||||
return (query != null) ? query : new BooleanQuery();
|
||||
}
|
||||
catch (TokenMgrError tme) {
|
||||
throw new ParseException(tme.getMessage());
|
||||
}
|
||||
catch (BooleanQuery.TooManyClauses tmc) {
|
||||
throw new ParseException("Too many boolean clauses");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the analyzer.
|
||||
*/
|
||||
public Analyzer getAnalyzer() {
|
||||
return analyzer;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Returns the field.
|
||||
*/
|
||||
public String getField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the minimal similarity for fuzzy queries.
|
||||
*/
|
||||
public float getFuzzyMinSim() {
|
||||
return fuzzyMinSim;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the minimum similarity for fuzzy queries.
|
||||
* Default is 0.5f.
|
||||
*/
|
||||
public void setFuzzyMinSim(float fuzzyMinSim) {
|
||||
this.fuzzyMinSim = fuzzyMinSim;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the prefix length for fuzzy queries.
|
||||
* @return Returns the fuzzyPrefixLength.
|
||||
*/
|
||||
public int getFuzzyPrefixLength() {
|
||||
return fuzzyPrefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the prefix length for fuzzy queries. Default is 0.
|
||||
* @param fuzzyPrefixLength The fuzzyPrefixLength to set.
|
||||
*/
|
||||
public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
|
||||
this.fuzzyPrefixLength = fuzzyPrefixLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the default slop for phrases. If zero, then exact phrase matches
|
||||
* are required. Default value is zero.
|
||||
*/
|
||||
public void setPhraseSlop(int phraseSlop) {
|
||||
this.phraseSlop = phraseSlop;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the default slop for phrases.
|
||||
*/
|
||||
public int getPhraseSlop() {
|
||||
return phraseSlop;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the boolean operator of the QueryParser.
|
||||
* In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
|
||||
* are considered optional: for example <code>capital of Hungary</code> is equal to
|
||||
* <code>capital OR of OR Hungary</code>.<br/>
|
||||
* In <code>AND_OPERATOR</code> mode terms are considered to be in conjuction: the
|
||||
* above mentioned query is parsed as <code>capital AND of AND Hungary</code>
|
||||
*/
|
||||
public void setDefaultOperator(Operator op) {
|
||||
this.operator = op;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets implicit operator setting, which will be either AND_OPERATOR
|
||||
* or OR_OPERATOR.
|
||||
*/
|
||||
public Operator getDefaultOperator() {
|
||||
return operator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
|
||||
* lower-cased or not. Default is <code>true</code>.
|
||||
*/
|
||||
public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
|
||||
this.lowercaseExpandedTerms = lowercaseExpandedTerms;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #setLowercaseExpandedTerms(boolean)
|
||||
*/
|
||||
public boolean getLowercaseExpandedTerms() {
|
||||
return lowercaseExpandedTerms;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set locale used by date range parsing.
|
||||
*/
|
||||
public void setLocale(Locale locale) {
|
||||
this.locale = locale;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns current locale, allowing access by subclasses.
|
||||
*/
|
||||
public Locale getLocale() {
|
||||
return locale;
|
||||
}
|
||||
|
||||
protected void addClause(Vector clauses, int conj, int modifier, Query q) {
|
||||
boolean required, prohibited;
|
||||
|
||||
// If this term is introduced by AND, make the preceding term required,
|
||||
// unless it's already prohibited
|
||||
if (clauses.size() > 0 && conj == CONJ_AND) {
|
||||
BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
|
||||
if (!c.isProhibited())
|
||||
c.setOccur(BooleanClause.Occur.MUST);
|
||||
}
|
||||
|
||||
if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) {
|
||||
// If this term is introduced by OR, make the preceding term optional,
|
||||
// unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
|
||||
// notice if the input is a OR b, first term is parsed as required; without
|
||||
// this modification a OR b would parsed as +a OR b
|
||||
BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
|
||||
if (!c.isProhibited())
|
||||
c.setOccur(BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
|
||||
// We might have been passed a null query; the term might have been
|
||||
// filtered away by the analyzer.
|
||||
if (q == null)
|
||||
return;
|
||||
|
||||
if (operator == OR_OPERATOR) {
|
||||
// We set REQUIRED if we're introduced by AND or +; PROHIBITED if
|
||||
// introduced by NOT or -; make sure not to set both.
|
||||
prohibited = (modifier == MOD_NOT);
|
||||
required = (modifier == MOD_REQ);
|
||||
if (conj == CONJ_AND && !prohibited) {
|
||||
required = true;
|
||||
}
|
||||
} else {
|
||||
// We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED
|
||||
// if not PROHIBITED and not introduced by OR
|
||||
prohibited = (modifier == MOD_NOT);
|
||||
required = (!prohibited && conj != CONJ_OR);
|
||||
}
|
||||
if (required && !prohibited)
|
||||
clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST));
|
||||
else if (!required && !prohibited)
|
||||
clauses.addElement(new BooleanClause(q, BooleanClause.Occur.SHOULD));
|
||||
else if (!required && prohibited)
|
||||
clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST_NOT));
|
||||
else
|
||||
throw new RuntimeException("Clause cannot be both required and prohibited");
|
||||
}
|
||||
|
||||
/**
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getFieldQuery(String field, String queryText) throws ParseException {
|
||||
// Use the analyzer to get all the tokens, and then build a TermQuery,
|
||||
// PhraseQuery, or nothing based on the term count
|
||||
|
||||
TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
|
||||
Vector v = new Vector();
|
||||
org.apache.lucene.analysis.Token t;
|
||||
int positionCount = 0;
|
||||
boolean severalTokensAtSamePosition = false;
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
t = source.next();
|
||||
}
|
||||
catch (IOException e) {
|
||||
t = null;
|
||||
}
|
||||
if (t == null)
|
||||
break;
|
||||
v.addElement(t);
|
||||
if (t.getPositionIncrement() == 1)
|
||||
positionCount++;
|
||||
else
|
||||
severalTokensAtSamePosition = true;
|
||||
}
|
||||
try {
|
||||
source.close();
|
||||
}
|
||||
catch (IOException e) {
|
||||
// ignore
|
||||
}
|
||||
|
||||
if (v.size() == 0)
|
||||
return null;
|
||||
else if (v.size() == 1) {
|
||||
t = (org.apache.lucene.analysis.Token) v.elementAt(0);
|
||||
return new TermQuery(new Term(field, t.termText()));
|
||||
} else {
|
||||
if (severalTokensAtSamePosition) {
|
||||
if (positionCount == 1) {
|
||||
// no phrase query:
|
||||
BooleanQuery q = new BooleanQuery();
|
||||
for (int i = 0; i < v.size(); i++) {
|
||||
t = (org.apache.lucene.analysis.Token) v.elementAt(i);
|
||||
TermQuery currentQuery = new TermQuery(
|
||||
new Term(field, t.termText()));
|
||||
q.add(currentQuery, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
return q;
|
||||
}
|
||||
else {
|
||||
// phrase query:
|
||||
MultiPhraseQuery mpq = new MultiPhraseQuery();
|
||||
List multiTerms = new ArrayList();
|
||||
for (int i = 0; i < v.size(); i++) {
|
||||
t = (org.apache.lucene.analysis.Token) v.elementAt(i);
|
||||
if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) {
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]));
|
||||
multiTerms.clear();
|
||||
}
|
||||
multiTerms.add(new Term(field, t.termText()));
|
||||
}
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]));
|
||||
return mpq;
|
||||
}
|
||||
}
|
||||
else {
|
||||
PhraseQuery q = new PhraseQuery();
|
||||
q.setSlop(phraseSlop);
|
||||
for (int i = 0; i < v.size(); i++) {
|
||||
q.add(new Term(field, ((org.apache.lucene.analysis.Token)
|
||||
v.elementAt(i)).termText()));
|
||||
|
||||
}
|
||||
return q;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Base implementation delegates to {@link #getFieldQuery(String,String)}.
|
||||
* This method may be overridden, for example, to return
|
||||
* a SpanNearQuery instead of a PhraseQuery.
|
||||
*
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getFieldQuery(String field, String queryText, int slop)
|
||||
throws ParseException {
|
||||
Query query = getFieldQuery(field, queryText);
|
||||
|
||||
if (query instanceof PhraseQuery) {
|
||||
((PhraseQuery) query).setSlop(slop);
|
||||
}
|
||||
if (query instanceof MultiPhraseQuery) {
|
||||
((MultiPhraseQuery) query).setSlop(slop);
|
||||
}
|
||||
|
||||
return query;
|
||||
}
|
||||
|
||||
/**
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getRangeQuery(String field,
|
||||
String part1,
|
||||
String part2,
|
||||
boolean inclusive) throws ParseException
|
||||
{
|
||||
if (lowercaseExpandedTerms) {
|
||||
part1 = part1.toLowerCase();
|
||||
part2 = part2.toLowerCase();
|
||||
}
|
||||
try {
|
||||
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
|
||||
df.setLenient(true);
|
||||
Date d1 = df.parse(part1);
|
||||
Date d2 = df.parse(part2);
|
||||
part1 = DateTools.dateToString(d1, DateTools.Resolution.DAY);
|
||||
part2 = DateTools.dateToString(d2, DateTools.Resolution.DAY);
|
||||
}
|
||||
catch (Exception e) { }
|
||||
|
||||
return new RangeQuery(new Term(field, part1),
|
||||
new Term(field, part2),
|
||||
inclusive);
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method for generating query, given a set of clauses.
|
||||
* By default creates a boolean query composed of clauses passed in.
|
||||
*
|
||||
* Can be overridden by extending classes, to modify query being
|
||||
* returned.
|
||||
*
|
||||
* @param clauses Vector that contains {@link BooleanClause} instances
|
||||
* to join.
|
||||
*
|
||||
* @return Resulting {@link Query} object.
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getBooleanQuery(Vector clauses) throws ParseException
|
||||
{
|
||||
if (clauses == null || clauses.size() == 0)
|
||||
return null;
|
||||
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
for (int i = 0; i < clauses.size(); i++) {
|
||||
query.add((BooleanClause)clauses.elementAt(i));
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method for generating a query. Called when parser
|
||||
* parses an input term token that contains one or more wildcard
|
||||
* characters (? and *), but is not a prefix term token (one
|
||||
* that has just a single * character at the end)
|
||||
*<p>
|
||||
* Depending on settings, prefix term may be lower-cased
|
||||
* automatically. It will not go through the default Analyzer,
|
||||
* however, since normal Analyzers are unlikely to work properly
|
||||
* with wildcard templates.
|
||||
*<p>
|
||||
* Can be overridden by extending classes, to provide custom handling for
|
||||
* wildcard queries, which may be necessary due to missing analyzer calls.
|
||||
*
|
||||
* @param field Name of the field query will use.
|
||||
* @param termStr Term token that contains one or more wild card
|
||||
* characters (? or *), but is not simple prefix term
|
||||
*
|
||||
* @return Resulting {@link Query} built for the term
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getWildcardQuery(String field, String termStr) throws ParseException
|
||||
{
|
||||
if (lowercaseExpandedTerms) {
|
||||
termStr = termStr.toLowerCase();
|
||||
}
|
||||
Term t = new Term(field, termStr);
|
||||
return new WildcardQuery(t);
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method for generating a query (similar to
|
||||
* {@link #getWildcardQuery}). Called when parser parses an input term
|
||||
* token that uses prefix notation; that is, contains a single '*' wildcard
|
||||
* character as its last character. Since this is a special case
|
||||
* of generic wildcard term, and such a query can be optimized easily,
|
||||
* this usually results in a different query object.
|
||||
*<p>
|
||||
* Depending on settings, a prefix term may be lower-cased
|
||||
* automatically. It will not go through the default Analyzer,
|
||||
* however, since normal Analyzers are unlikely to work properly
|
||||
* with wildcard templates.
|
||||
*<p>
|
||||
* Can be overridden by extending classes, to provide custom handling for
|
||||
* wild card queries, which may be necessary due to missing analyzer calls.
|
||||
*
|
||||
* @param field Name of the field query will use.
|
||||
* @param termStr Term token to use for building term for the query
|
||||
* (<b>without</b> trailing '*' character!)
|
||||
*
|
||||
* @return Resulting {@link Query} built for the term
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getPrefixQuery(String field, String termStr) throws ParseException
|
||||
{
|
||||
if (lowercaseExpandedTerms) {
|
||||
termStr = termStr.toLowerCase();
|
||||
}
|
||||
Term t = new Term(field, termStr);
|
||||
return new PrefixQuery(t);
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method for generating a query (similar to
|
||||
* {@link #getWildcardQuery}). Called when parser parses
|
||||
* an input term token that has the fuzzy suffix (~) appended.
|
||||
*
|
||||
* @param field Name of the field query will use.
|
||||
* @param termStr Term token to use for building term for the query
|
||||
*
|
||||
* @return Resulting {@link Query} built for the term
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
|
||||
{
|
||||
if (lowercaseExpandedTerms) {
|
||||
termStr = termStr.toLowerCase();
|
||||
}
|
||||
Term t = new Term(field, termStr);
|
||||
return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a String where the escape char has been
|
||||
* removed, or kept only once if there was a double escape.
|
||||
*/
|
||||
private String discardEscapeChar(String input) {
|
||||
char[] caSource = input.toCharArray();
|
||||
char[] caDest = new char[caSource.length];
|
||||
int j = 0;
|
||||
for (int i = 0; i < caSource.length; i++) {
|
||||
if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) {
|
||||
caDest[j++]=caSource[i];
|
||||
}
|
||||
}
|
||||
return new String(caDest, 0, j);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a String where those characters that QueryParser
|
||||
* expects to be escaped are escaped by a preceding <code>\</code>.
|
||||
*/
|
||||
public static String escape(String s) {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
char c = s.charAt(i);
|
||||
// NOTE: keep this in sync with _ESCAPED_CHAR below!
|
||||
if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
|
||||
|| c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
|
||||
|| c == '*' || c == '?') {
|
||||
sb.append('\\');
|
||||
}
|
||||
sb.append(c);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
|
||||
* Usage:<br>
|
||||
* <code>java org.apache.lucene.queryParser.QueryParser <input></code>
|
||||
*/
|
||||
public static void main(String[] args) throws Exception {
|
||||
if (args.length == 0) {
|
||||
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
|
||||
System.exit(0);
|
||||
}
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
|
||||
new org.apache.lucene.analysis.SimpleAnalyzer());
|
||||
Query q = qp.parse(args[0]);
|
||||
System.out.println(q.toString("field"));
|
||||
}
|
||||
}
|
||||
|
||||
PARSER_END(PrecedenceQueryParser)
|
||||
|
||||
/* ***************** */
|
||||
/* Token Definitions */
|
||||
/* ***************** */
|
||||
|
||||
<*> TOKEN : {
|
||||
<#_NUM_CHAR: ["0"-"9"] >
|
||||
// NOTE: keep this in sync with escape(String) above!
|
||||
| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^",
|
||||
"[", "]", "\"", "{", "}", "~", "*", "?" ] >
|
||||
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^",
|
||||
"[", "]", "\"", "{", "}", "~", "*", "?" ]
|
||||
| <_ESCAPED_CHAR> ) >
|
||||
| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
|
||||
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") >
|
||||
}
|
||||
|
||||
<DEFAULT, RangeIn, RangeEx> SKIP : {
|
||||
<<_WHITESPACE>>
|
||||
}
|
||||
|
||||
// OG: to support prefix queries:
|
||||
// http://nagoya.apache.org/bugzilla/show_bug.cgi?id=12137
|
||||
// Change from:
|
||||
// | <WILDTERM: <_TERM_START_CHAR>
|
||||
// (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
|
||||
// To:
|
||||
//
|
||||
// | <WILDTERM: (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
|
||||
|
||||
<DEFAULT> TOKEN : {
|
||||
<AND: ("AND" | "&&") >
|
||||
| <OR: ("OR" | "||") >
|
||||
| <NOT: ("NOT" | "!") >
|
||||
| <PLUS: "+" >
|
||||
| <MINUS: "-" >
|
||||
| <LPAREN: "(" >
|
||||
| <RPAREN: ")" >
|
||||
| <COLON: ":" >
|
||||
| <CARAT: "^" > : Boost
|
||||
| <QUOTED: "\"" (~["\""])+ "\"">
|
||||
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
|
||||
| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
|
||||
| <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
|
||||
| <WILDTERM: <_TERM_START_CHAR>
|
||||
(<_TERM_CHAR> | ( [ "*", "?" ] ))* >
|
||||
| <RANGEIN_START: "[" > : RangeIn
|
||||
| <RANGEEX_START: "{" > : RangeEx
|
||||
}
|
||||
|
||||
<Boost> TOKEN : {
|
||||
<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
|
||||
}
|
||||
|
||||
<RangeIn> TOKEN : {
|
||||
<RANGEIN_TO: "TO">
|
||||
| <RANGEIN_END: "]"> : DEFAULT
|
||||
| <RANGEIN_QUOTED: "\"" (~["\""])+ "\"">
|
||||
| <RANGEIN_GOOP: (~[ " ", "]" ])+ >
|
||||
}
|
||||
|
||||
<RangeEx> TOKEN : {
|
||||
<RANGEEX_TO: "TO">
|
||||
| <RANGEEX_END: "}"> : DEFAULT
|
||||
| <RANGEEX_QUOTED: "\"" (~["\""])+ "\"">
|
||||
| <RANGEEX_GOOP: (~[ " ", "}" ])+ >
|
||||
}
|
||||
|
||||
// * Query ::= ( Clause )*
|
||||
// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
|
||||
|
||||
int Conjunction() : {
|
||||
int ret = CONJ_NONE;
|
||||
}
|
||||
{
|
||||
[
|
||||
<AND> { ret = CONJ_AND; }
|
||||
| <OR> { ret = CONJ_OR; }
|
||||
]
|
||||
{ return ret; }
|
||||
}
|
||||
|
||||
int Modifier() : {
|
||||
int ret = MOD_NONE;
|
||||
}
|
||||
{
|
||||
[
|
||||
<PLUS> { ret = MOD_REQ; }
|
||||
| <MINUS> { ret = MOD_NOT; }
|
||||
| <NOT> { ret = MOD_NOT; }
|
||||
]
|
||||
{ return ret; }
|
||||
}
|
||||
|
||||
Query Query(String field) :
|
||||
{
|
||||
Vector clauses = new Vector();
|
||||
int modifier;
|
||||
Query q, firstQuery=null;
|
||||
boolean orPresent = false;
|
||||
}
|
||||
{
|
||||
modifier=Modifier() q=andExpression(field)
|
||||
{
|
||||
addClause(clauses, CONJ_NONE, modifier, q);
|
||||
firstQuery=q;
|
||||
}
|
||||
(
|
||||
[<OR> { orPresent=true; }] modifier=Modifier() q=andExpression(field)
|
||||
{ addClause(clauses, orPresent ? CONJ_OR : CONJ_NONE, modifier, q); }
|
||||
)*
|
||||
{
|
||||
if (clauses.size() == 1 && firstQuery != null)
|
||||
return firstQuery;
|
||||
else {
|
||||
return getBooleanQuery(clauses);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Query orExpression(String field) :
|
||||
{
|
||||
Vector clauses = new Vector();
|
||||
Query q, firstQuery=null;
|
||||
int modifier;
|
||||
}
|
||||
{
|
||||
q=andExpression(field)
|
||||
{
|
||||
addClause(clauses, CONJ_NONE, MOD_NONE, q);
|
||||
firstQuery=q;
|
||||
}
|
||||
(
|
||||
<OR> modifier=Modifier() q=andExpression(field)
|
||||
{ addClause(clauses, CONJ_OR, modifier, q); }
|
||||
)*
|
||||
{
|
||||
if (clauses.size() == 1 && firstQuery != null)
|
||||
return firstQuery;
|
||||
else {
|
||||
return getBooleanQuery(clauses);
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
Query andExpression(String field) :
|
||||
{
|
||||
Vector clauses = new Vector();
|
||||
Query q, firstQuery=null;
|
||||
int modifier;
|
||||
}
|
||||
{
|
||||
q=Clause(field)
|
||||
{
|
||||
addClause(clauses, CONJ_NONE, MOD_NONE, q);
|
||||
firstQuery=q;
|
||||
}
|
||||
(
|
||||
<AND> modifier=Modifier() q=Clause(field)
|
||||
{ addClause(clauses, CONJ_AND, modifier, q); }
|
||||
)*
|
||||
{
|
||||
if (clauses.size() == 1 && firstQuery != null)
|
||||
return firstQuery;
|
||||
else {
|
||||
return getBooleanQuery(clauses);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Query Clause(String field) : {
|
||||
Query q;
|
||||
Token fieldToken=null, boost=null;
|
||||
}
|
||||
{
|
||||
[
|
||||
LOOKAHEAD(2)
|
||||
fieldToken=<TERM> <COLON> {
|
||||
field=discardEscapeChar(fieldToken.image);
|
||||
}
|
||||
]
|
||||
|
||||
(
|
||||
q=Term(field)
|
||||
| <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?
|
||||
|
||||
)
|
||||
{
|
||||
if (boost != null) {
|
||||
float f = (float)1.0;
|
||||
try {
|
||||
f = Float.valueOf(boost.image).floatValue();
|
||||
q.setBoost(f);
|
||||
} catch (Exception ignored) { }
|
||||
}
|
||||
return q;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Query Term(String field) : {
|
||||
Token term, boost=null, fuzzySlop=null, goop1, goop2;
|
||||
boolean prefix = false;
|
||||
boolean wildcard = false;
|
||||
boolean fuzzy = false;
|
||||
boolean rangein = false;
|
||||
Query q;
|
||||
}
|
||||
{
|
||||
(
|
||||
(
|
||||
term=<TERM>
|
||||
| term=<PREFIXTERM> { prefix=true; }
|
||||
| term=<WILDTERM> { wildcard=true; }
|
||||
| term=<NUMBER>
|
||||
)
|
||||
[ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
|
||||
[ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
|
||||
{
|
||||
String termImage=discardEscapeChar(term.image);
|
||||
if (wildcard) {
|
||||
q = getWildcardQuery(field, termImage);
|
||||
} else if (prefix) {
|
||||
q = getPrefixQuery(field,
|
||||
discardEscapeChar(term.image.substring
|
||||
(0, term.image.length()-1)));
|
||||
} else if (fuzzy) {
|
||||
float fms = fuzzyMinSim;
|
||||
try {
|
||||
fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
|
||||
} catch (Exception ignored) { }
|
||||
if(fms < 0.0f || fms > 1.0f){
|
||||
throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
|
||||
}
|
||||
q = getFuzzyQuery(field, termImage, fms);
|
||||
} else {
|
||||
q = getFieldQuery(field, termImage);
|
||||
}
|
||||
}
|
||||
| ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
|
||||
[ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )
|
||||
<RANGEIN_END> )
|
||||
[ <CARAT> boost=<NUMBER> ]
|
||||
{
|
||||
if (goop1.kind == RANGEIN_QUOTED) {
|
||||
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
|
||||
} else {
|
||||
goop1.image = discardEscapeChar(goop1.image);
|
||||
}
|
||||
if (goop2.kind == RANGEIN_QUOTED) {
|
||||
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
|
||||
} else {
|
||||
goop2.image = discardEscapeChar(goop2.image);
|
||||
}
|
||||
q = getRangeQuery(field, goop1.image, goop2.image, true);
|
||||
}
|
||||
| ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
|
||||
[ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )
|
||||
<RANGEEX_END> )
|
||||
[ <CARAT> boost=<NUMBER> ]
|
||||
{
|
||||
if (goop1.kind == RANGEEX_QUOTED) {
|
||||
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
|
||||
} else {
|
||||
goop1.image = discardEscapeChar(goop1.image);
|
||||
}
|
||||
if (goop2.kind == RANGEEX_QUOTED) {
|
||||
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
|
||||
} else {
|
||||
goop2.image = discardEscapeChar(goop2.image);
|
||||
}
|
||||
|
||||
q = getRangeQuery(field, goop1.image, goop2.image, false);
|
||||
}
|
||||
| term=<QUOTED>
|
||||
[ fuzzySlop=<FUZZY_SLOP> ]
|
||||
[ <CARAT> boost=<NUMBER> ]
|
||||
{
|
||||
int s = phraseSlop;
|
||||
|
||||
if (fuzzySlop != null) {
|
||||
try {
|
||||
s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
|
||||
}
|
||||
catch (Exception ignored) { }
|
||||
}
|
||||
q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s);
|
||||
}
|
||||
)
|
||||
{
|
||||
if (boost != null) {
|
||||
float f = (float) 1.0;
|
||||
try {
|
||||
f = Float.valueOf(boost.image).floatValue();
|
||||
}
|
||||
catch (Exception ignored) {
|
||||
/* Should this be handled somehow? (defaults to "no boost", if
|
||||
* boost number is invalid)
|
||||
*/
|
||||
}
|
||||
|
||||
// avoid boosting null queries, such as those caused by stop words
|
||||
if (q != null) {
|
||||
q.setBoost(f);
|
||||
}
|
||||
}
|
||||
return q;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
/* Generated By:JavaCC: Do not edit this line. PrecedenceQueryParserConstants.java */
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
public interface PrecedenceQueryParserConstants {
|
||||
|
||||
int EOF = 0;
|
||||
int _NUM_CHAR = 1;
|
||||
int _ESCAPED_CHAR = 2;
|
||||
int _TERM_START_CHAR = 3;
|
||||
int _TERM_CHAR = 4;
|
||||
int _WHITESPACE = 5;
|
||||
int AND = 7;
|
||||
int OR = 8;
|
||||
int NOT = 9;
|
||||
int PLUS = 10;
|
||||
int MINUS = 11;
|
||||
int LPAREN = 12;
|
||||
int RPAREN = 13;
|
||||
int COLON = 14;
|
||||
int CARAT = 15;
|
||||
int QUOTED = 16;
|
||||
int TERM = 17;
|
||||
int FUZZY_SLOP = 18;
|
||||
int PREFIXTERM = 19;
|
||||
int WILDTERM = 20;
|
||||
int RANGEIN_START = 21;
|
||||
int RANGEEX_START = 22;
|
||||
int NUMBER = 23;
|
||||
int RANGEIN_TO = 24;
|
||||
int RANGEIN_END = 25;
|
||||
int RANGEIN_QUOTED = 26;
|
||||
int RANGEIN_GOOP = 27;
|
||||
int RANGEEX_TO = 28;
|
||||
int RANGEEX_END = 29;
|
||||
int RANGEEX_QUOTED = 30;
|
||||
int RANGEEX_GOOP = 31;
|
||||
|
||||
int Boost = 0;
|
||||
int RangeEx = 1;
|
||||
int RangeIn = 2;
|
||||
int DEFAULT = 3;
|
||||
|
||||
String[] tokenImage = {
|
||||
"<EOF>",
|
||||
"<_NUM_CHAR>",
|
||||
"<_ESCAPED_CHAR>",
|
||||
"<_TERM_START_CHAR>",
|
||||
"<_TERM_CHAR>",
|
||||
"<_WHITESPACE>",
|
||||
"<token of kind 6>",
|
||||
"<AND>",
|
||||
"<OR>",
|
||||
"<NOT>",
|
||||
"\"+\"",
|
||||
"\"-\"",
|
||||
"\"(\"",
|
||||
"\")\"",
|
||||
"\":\"",
|
||||
"\"^\"",
|
||||
"<QUOTED>",
|
||||
"<TERM>",
|
||||
"<FUZZY_SLOP>",
|
||||
"<PREFIXTERM>",
|
||||
"<WILDTERM>",
|
||||
"\"[\"",
|
||||
"\"{\"",
|
||||
"<NUMBER>",
|
||||
"\"TO\"",
|
||||
"\"]\"",
|
||||
"<RANGEIN_QUOTED>",
|
||||
"<RANGEIN_GOOP>",
|
||||
"\"TO\"",
|
||||
"\"}\"",
|
||||
"<RANGEEX_QUOTED>",
|
||||
"<RANGEEX_GOOP>",
|
||||
};
|
||||
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,81 @@
|
|||
/* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
/**
|
||||
* Describes the input token stream.
|
||||
*/
|
||||
|
||||
public class Token {
|
||||
|
||||
/**
|
||||
* An integer that describes the kind of this token. This numbering
|
||||
* system is determined by JavaCCParser, and a table of these numbers is
|
||||
* stored in the file ...Constants.java.
|
||||
*/
|
||||
public int kind;
|
||||
|
||||
/**
|
||||
* beginLine and beginColumn describe the position of the first character
|
||||
* of this token; endLine and endColumn describe the position of the
|
||||
* last character of this token.
|
||||
*/
|
||||
public int beginLine, beginColumn, endLine, endColumn;
|
||||
|
||||
/**
|
||||
* The string image of the token.
|
||||
*/
|
||||
public String image;
|
||||
|
||||
/**
|
||||
* A reference to the next regular (non-special) token from the input
|
||||
* stream. If this is the last token from the input stream, or if the
|
||||
* token manager has not read tokens beyond this one, this field is
|
||||
* set to null. This is true only if this token is also a regular
|
||||
* token. Otherwise, see below for a description of the contents of
|
||||
* this field.
|
||||
*/
|
||||
public Token next;
|
||||
|
||||
/**
|
||||
* This field is used to access special tokens that occur prior to this
|
||||
* token, but after the immediately preceding regular (non-special) token.
|
||||
* If there are no such special tokens, this field is set to null.
|
||||
* When there are more than one such special token, this field refers
|
||||
* to the last of these special tokens, which in turn refers to the next
|
||||
* previous special token through its specialToken field, and so on
|
||||
* until the first special token (whose specialToken field is null).
|
||||
* The next fields of special tokens refer to other special tokens that
|
||||
* immediately follow it (without an intervening regular token). If there
|
||||
* is no such token, this field is null.
|
||||
*/
|
||||
public Token specialToken;
|
||||
|
||||
/**
|
||||
* Returns the image.
|
||||
*/
|
||||
public String toString()
|
||||
{
|
||||
return image;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new Token object, by default. However, if you want, you
|
||||
* can create and return subclass objects based on the value of ofKind.
|
||||
* Simply add the cases to the switch for all those special cases.
|
||||
* For example, if you have a subclass of Token called IDToken that
|
||||
* you want to create if ofKind is ID, simlpy add something like :
|
||||
*
|
||||
* case MyParserConstants.ID : return new IDToken();
|
||||
*
|
||||
* to the following switch statement. Then you can cast matchedToken
|
||||
* variable to the appropriate type and use it in your lexical actions.
|
||||
*/
|
||||
public static final Token newToken(int ofKind)
|
||||
{
|
||||
switch(ofKind)
|
||||
{
|
||||
default : return new Token();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,133 @@
|
|||
/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 3.0 */
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
public class TokenMgrError extends Error
|
||||
{
|
||||
/*
|
||||
* Ordinals for various reasons why an Error of this type can be thrown.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Lexical error occured.
|
||||
*/
|
||||
static final int LEXICAL_ERROR = 0;
|
||||
|
||||
/**
|
||||
* An attempt wass made to create a second instance of a static token manager.
|
||||
*/
|
||||
static final int STATIC_LEXER_ERROR = 1;
|
||||
|
||||
/**
|
||||
* Tried to change to an invalid lexical state.
|
||||
*/
|
||||
static final int INVALID_LEXICAL_STATE = 2;
|
||||
|
||||
/**
|
||||
* Detected (and bailed out of) an infinite loop in the token manager.
|
||||
*/
|
||||
static final int LOOP_DETECTED = 3;
|
||||
|
||||
/**
|
||||
* Indicates the reason why the exception is thrown. It will have
|
||||
* one of the above 4 values.
|
||||
*/
|
||||
int errorCode;
|
||||
|
||||
/**
|
||||
* Replaces unprintable characters by their espaced (or unicode escaped)
|
||||
* equivalents in the given string
|
||||
*/
|
||||
protected static final String addEscapes(String str) {
|
||||
StringBuffer retval = new StringBuffer();
|
||||
char ch;
|
||||
for (int i = 0; i < str.length(); i++) {
|
||||
switch (str.charAt(i))
|
||||
{
|
||||
case 0 :
|
||||
continue;
|
||||
case '\b':
|
||||
retval.append("\\b");
|
||||
continue;
|
||||
case '\t':
|
||||
retval.append("\\t");
|
||||
continue;
|
||||
case '\n':
|
||||
retval.append("\\n");
|
||||
continue;
|
||||
case '\f':
|
||||
retval.append("\\f");
|
||||
continue;
|
||||
case '\r':
|
||||
retval.append("\\r");
|
||||
continue;
|
||||
case '\"':
|
||||
retval.append("\\\"");
|
||||
continue;
|
||||
case '\'':
|
||||
retval.append("\\\'");
|
||||
continue;
|
||||
case '\\':
|
||||
retval.append("\\\\");
|
||||
continue;
|
||||
default:
|
||||
if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
|
||||
String s = "0000" + Integer.toString(ch, 16);
|
||||
retval.append("\\u" + s.substring(s.length() - 4, s.length()));
|
||||
} else {
|
||||
retval.append(ch);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return retval.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a detailed message for the Error when it is thrown by the
|
||||
* token manager to indicate a lexical error.
|
||||
* Parameters :
|
||||
* EOFSeen : indicates if EOF caused the lexicl error
|
||||
* curLexState : lexical state in which this error occured
|
||||
* errorLine : line number when the error occured
|
||||
* errorColumn : column number when the error occured
|
||||
* errorAfter : prefix that was seen before this error occured
|
||||
* curchar : the offending character
|
||||
* Note: You can customize the lexical error message by modifying this method.
|
||||
*/
|
||||
protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) {
|
||||
return("Lexical error at line " +
|
||||
errorLine + ", column " +
|
||||
errorColumn + ". Encountered: " +
|
||||
(EOFSeen ? "<EOF> " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") +
|
||||
"after : \"" + addEscapes(errorAfter) + "\"");
|
||||
}
|
||||
|
||||
/**
|
||||
* You can also modify the body of this method to customize your error messages.
|
||||
* For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not
|
||||
* of end-users concern, so you can return something like :
|
||||
*
|
||||
* "Internal Error : Please file a bug report .... "
|
||||
*
|
||||
* from this method for such cases in the release version of your parser.
|
||||
*/
|
||||
public String getMessage() {
|
||||
return super.getMessage();
|
||||
}
|
||||
|
||||
/*
|
||||
* Constructors of various flavors follow.
|
||||
*/
|
||||
|
||||
public TokenMgrError() {
|
||||
}
|
||||
|
||||
public TokenMgrError(String message, int reason) {
|
||||
super(message);
|
||||
errorCode = reason;
|
||||
}
|
||||
|
||||
public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) {
|
||||
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
|
||||
}
|
||||
}
|
|
@ -522,6 +522,16 @@ public class TestQueryParser extends TestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This test differs from TestPrecedenceQueryParser
|
||||
*/
|
||||
public void testPrecedence() throws Exception {
|
||||
Query query1 = QueryParser.parse("A AND B OR C AND D", "field", new WhitespaceAnalyzer());
|
||||
Query query2 = QueryParser.parse("+A +B +C +D", "field", new WhitespaceAnalyzer());
|
||||
assertEquals(query1, query2);
|
||||
}
|
||||
|
||||
|
||||
public void tearDown() {
|
||||
BooleanQuery.setMaxClauseCount(originalMaxClauses);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,540 @@
|
|||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
/**
|
||||
* Copyright 2002-2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseTokenizer;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.DateTools;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.RangeQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.text.DateFormat;
|
||||
import java.util.Calendar;
|
||||
|
||||
public class TestPrecedenceQueryParser extends TestCase {
|
||||
|
||||
public static Analyzer qpAnalyzer = new QPTestAnalyzer();
|
||||
|
||||
public static class QPTestFilter extends TokenFilter {
|
||||
/**
|
||||
* Filter which discards the token 'stop' and which expands the
|
||||
* token 'phrase' into 'phrase1 phrase2'
|
||||
*/
|
||||
public QPTestFilter(TokenStream in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
boolean inPhrase = false;
|
||||
int savedStart = 0, savedEnd = 0;
|
||||
|
||||
public Token next() throws IOException {
|
||||
if (inPhrase) {
|
||||
inPhrase = false;
|
||||
return new Token("phrase2", savedStart, savedEnd);
|
||||
} else
|
||||
for (Token token = input.next(); token != null; token = input.next()) {
|
||||
if (token.termText().equals("phrase")) {
|
||||
inPhrase = true;
|
||||
savedStart = token.startOffset();
|
||||
savedEnd = token.endOffset();
|
||||
return new Token("phrase1", savedStart, savedEnd);
|
||||
} else if (!token.termText().equals("stop"))
|
||||
return token;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
public static class QPTestAnalyzer extends Analyzer {
|
||||
|
||||
/** Filters LowerCaseTokenizer with StopFilter. */
|
||||
public final TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new QPTestFilter(new LowerCaseTokenizer(reader));
|
||||
}
|
||||
}
|
||||
|
||||
public static class QPTestParser extends PrecedenceQueryParser {
|
||||
public QPTestParser(String f, Analyzer a) {
|
||||
super(f, a);
|
||||
}
|
||||
|
||||
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException {
|
||||
throw new ParseException("Fuzzy queries not allowed");
|
||||
}
|
||||
|
||||
protected Query getWildcardQuery(String field, String termStr) throws ParseException {
|
||||
throw new ParseException("Wildcard queries not allowed");
|
||||
}
|
||||
}
|
||||
|
||||
private int originalMaxClauses;
|
||||
|
||||
public void setUp() {
|
||||
originalMaxClauses = BooleanQuery.getMaxClauseCount();
|
||||
}
|
||||
|
||||
public PrecedenceQueryParser getParser(Analyzer a) throws Exception {
|
||||
if (a == null)
|
||||
a = new SimpleAnalyzer();
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser("field", a);
|
||||
qp.setDefaultOperator(PrecedenceQueryParser.OR_OPERATOR);
|
||||
return qp;
|
||||
}
|
||||
|
||||
public Query getQuery(String query, Analyzer a) throws Exception {
|
||||
return getParser(a).parse(query);
|
||||
}
|
||||
|
||||
public void assertQueryEquals(String query, Analyzer a, String result)
|
||||
throws Exception {
|
||||
Query q = getQuery(query, a);
|
||||
String s = q.toString("field");
|
||||
if (!s.equals(result)) {
|
||||
fail("Query /" + query + "/ yielded /" + s
|
||||
+ "/, expecting /" + result + "/");
|
||||
}
|
||||
}
|
||||
|
||||
public void assertWildcardQueryEquals(String query, boolean lowercase, String result)
|
||||
throws Exception {
|
||||
PrecedenceQueryParser qp = getParser(null);
|
||||
qp.setLowercaseExpandedTerms(lowercase);
|
||||
Query q = qp.parse(query);
|
||||
String s = q.toString("field");
|
||||
if (!s.equals(result)) {
|
||||
fail("WildcardQuery /" + query + "/ yielded /" + s
|
||||
+ "/, expecting /" + result + "/");
|
||||
}
|
||||
}
|
||||
|
||||
public void assertWildcardQueryEquals(String query, String result) throws Exception {
|
||||
PrecedenceQueryParser qp = getParser(null);
|
||||
Query q = qp.parse(query);
|
||||
String s = q.toString("field");
|
||||
if (!s.equals(result)) {
|
||||
fail("WildcardQuery /" + query + "/ yielded /" + s + "/, expecting /"
|
||||
+ result + "/");
|
||||
}
|
||||
}
|
||||
|
||||
public Query getQueryDOA(String query, Analyzer a)
|
||||
throws Exception {
|
||||
if (a == null)
|
||||
a = new SimpleAnalyzer();
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser("field", a);
|
||||
qp.setDefaultOperator(PrecedenceQueryParser.AND_OPERATOR);
|
||||
return qp.parse(query);
|
||||
}
|
||||
|
||||
public void assertQueryEqualsDOA(String query, Analyzer a, String result)
|
||||
throws Exception {
|
||||
Query q = getQueryDOA(query, a);
|
||||
String s = q.toString("field");
|
||||
if (!s.equals(result)) {
|
||||
fail("Query /" + query + "/ yielded /" + s
|
||||
+ "/, expecting /" + result + "/");
|
||||
}
|
||||
}
|
||||
|
||||
public void testSimple() throws Exception {
|
||||
assertQueryEquals("term term term", null, "term term term");
|
||||
assertQueryEquals("türm term term", null, "türm term term");
|
||||
assertQueryEquals("ümlaut", null, "ümlaut");
|
||||
|
||||
assertQueryEquals("a AND b", null, "+a +b");
|
||||
assertQueryEquals("(a AND b)", null, "+a +b");
|
||||
assertQueryEquals("c OR (a AND b)", null, "c (+a +b)");
|
||||
assertQueryEquals("a AND NOT b", null, "+a -b");
|
||||
assertQueryEquals("a AND -b", null, "+a -b");
|
||||
assertQueryEquals("a AND !b", null, "+a -b");
|
||||
assertQueryEquals("a && b", null, "+a +b");
|
||||
assertQueryEquals("a && ! b", null, "+a -b");
|
||||
|
||||
assertQueryEquals("a OR b", null, "a b");
|
||||
assertQueryEquals("a || b", null, "a b");
|
||||
assertQueryEquals("a OR !b", null, "a -b");
|
||||
assertQueryEquals("a OR ! b", null, "a -b");
|
||||
assertQueryEquals("a OR -b", null, "a -b");
|
||||
|
||||
assertQueryEquals("+term -term term", null, "+term -term term");
|
||||
assertQueryEquals("foo:term AND field:anotherTerm", null,
|
||||
"+foo:term +anotherterm");
|
||||
assertQueryEquals("term AND \"phrase phrase\"", null,
|
||||
"+term +\"phrase phrase\"");
|
||||
assertQueryEquals("\"hello there\"", null, "\"hello there\"");
|
||||
assertTrue(getQuery("a AND b", null) instanceof BooleanQuery);
|
||||
assertTrue(getQuery("hello", null) instanceof TermQuery);
|
||||
assertTrue(getQuery("\"hello there\"", null) instanceof PhraseQuery);
|
||||
|
||||
assertQueryEquals("germ term^2.0", null, "germ term^2.0");
|
||||
assertQueryEquals("(term)^2.0", null, "term^2.0");
|
||||
assertQueryEquals("(germ term)^2.0", null, "(germ term)^2.0");
|
||||
assertQueryEquals("term^2.0", null, "term^2.0");
|
||||
assertQueryEquals("term^2", null, "term^2.0");
|
||||
assertQueryEquals("\"germ term\"^2.0", null, "\"germ term\"^2.0");
|
||||
assertQueryEquals("\"term germ\"^2", null, "\"term germ\"^2.0");
|
||||
|
||||
assertQueryEquals("(foo OR bar) AND (baz OR boo)", null,
|
||||
"+(foo bar) +(baz boo)");
|
||||
assertQueryEquals("((a OR b) AND NOT c) OR d", null,
|
||||
"(+(a b) -c) d");
|
||||
assertQueryEquals("+(apple \"steve jobs\") -(foo bar baz)", null,
|
||||
"+(apple \"steve jobs\") -(foo bar baz)");
|
||||
assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null,
|
||||
"+(title:dog title:cat) -author:\"bob dole\"");
|
||||
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser("field", new StandardAnalyzer());
|
||||
// make sure OR is the default:
|
||||
assertEquals(PrecedenceQueryParser.OR_OPERATOR, qp.getDefaultOperator());
|
||||
qp.setDefaultOperator(PrecedenceQueryParser.AND_OPERATOR);
|
||||
assertEquals(PrecedenceQueryParser.AND_OPERATOR, qp.getDefaultOperator());
|
||||
qp.setDefaultOperator(PrecedenceQueryParser.OR_OPERATOR);
|
||||
assertEquals(PrecedenceQueryParser.OR_OPERATOR, qp.getDefaultOperator());
|
||||
}
|
||||
|
||||
public void testPunct() throws Exception {
|
||||
Analyzer a = new WhitespaceAnalyzer();
|
||||
assertQueryEquals("a&b", a, "a&b");
|
||||
assertQueryEquals("a&&b", a, "a&&b");
|
||||
assertQueryEquals(".NET", a, ".NET");
|
||||
}
|
||||
|
||||
public void testSlop() throws Exception {
|
||||
assertQueryEquals("\"term germ\"~2", null, "\"term germ\"~2");
|
||||
assertQueryEquals("\"term germ\"~2 flork", null, "\"term germ\"~2 flork");
|
||||
assertQueryEquals("\"term\"~2", null, "term");
|
||||
assertQueryEquals("\" \"~2 germ", null, "germ");
|
||||
assertQueryEquals("\"term germ\"~2^2", null, "\"term germ\"~2^2.0");
|
||||
}
|
||||
|
||||
public void testNumber() throws Exception {
|
||||
// The numbers go away because SimpleAnalzyer ignores them
|
||||
assertQueryEquals("3", null, "");
|
||||
assertQueryEquals("term 1.0 1 2", null, "term");
|
||||
assertQueryEquals("term term1 term2", null, "term term term");
|
||||
|
||||
Analyzer a = new StandardAnalyzer();
|
||||
assertQueryEquals("3", a, "3");
|
||||
assertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2");
|
||||
assertQueryEquals("term term1 term2", a, "term term1 term2");
|
||||
}
|
||||
|
||||
public void testWildcard() throws Exception {
|
||||
assertQueryEquals("term*", null, "term*");
|
||||
assertQueryEquals("term*^2", null, "term*^2.0");
|
||||
assertQueryEquals("term~", null, "term~0.5");
|
||||
assertQueryEquals("term~0.7", null, "term~0.7");
|
||||
assertQueryEquals("term~^2", null, "term^2.0~0.5");
|
||||
assertQueryEquals("term^2~", null, "term^2.0~0.5");
|
||||
assertQueryEquals("term*germ", null, "term*germ");
|
||||
assertQueryEquals("term*germ^3", null, "term*germ^3.0");
|
||||
|
||||
assertTrue(getQuery("term*", null) instanceof PrefixQuery);
|
||||
assertTrue(getQuery("term*^2", null) instanceof PrefixQuery);
|
||||
assertTrue(getQuery("term~", null) instanceof FuzzyQuery);
|
||||
assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery);
|
||||
FuzzyQuery fq = (FuzzyQuery)getQuery("term~0.7", null);
|
||||
assertEquals(0.7f, fq.getMinSimilarity(), 0.1f);
|
||||
assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
|
||||
fq = (FuzzyQuery)getQuery("term~", null);
|
||||
assertEquals(0.5f, fq.getMinSimilarity(), 0.1f);
|
||||
assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
|
||||
try {
|
||||
getQuery("term~1.1", null); // value > 1, throws exception
|
||||
fail();
|
||||
} catch(ParseException pe) {
|
||||
// expected exception
|
||||
}
|
||||
assertTrue(getQuery("term*germ", null) instanceof WildcardQuery);
|
||||
|
||||
/* Tests to see that wild card terms are (or are not) properly
|
||||
* lower-cased with propery parser configuration
|
||||
*/
|
||||
// First prefix queries:
|
||||
// by default, convert to lowercase:
|
||||
assertWildcardQueryEquals("Term*", true, "term*");
|
||||
// explicitly set lowercase:
|
||||
assertWildcardQueryEquals("term*", true, "term*");
|
||||
assertWildcardQueryEquals("Term*", true, "term*");
|
||||
assertWildcardQueryEquals("TERM*", true, "term*");
|
||||
// explicitly disable lowercase conversion:
|
||||
assertWildcardQueryEquals("term*", false, "term*");
|
||||
assertWildcardQueryEquals("Term*", false, "Term*");
|
||||
assertWildcardQueryEquals("TERM*", false, "TERM*");
|
||||
// Then 'full' wildcard queries:
|
||||
// by default, convert to lowercase:
|
||||
assertWildcardQueryEquals("Te?m", "te?m");
|
||||
// explicitly set lowercase:
|
||||
assertWildcardQueryEquals("te?m", true, "te?m");
|
||||
assertWildcardQueryEquals("Te?m", true, "te?m");
|
||||
assertWildcardQueryEquals("TE?M", true, "te?m");
|
||||
assertWildcardQueryEquals("Te?m*gerM", true, "te?m*germ");
|
||||
// explicitly disable lowercase conversion:
|
||||
assertWildcardQueryEquals("te?m", false, "te?m");
|
||||
assertWildcardQueryEquals("Te?m", false, "Te?m");
|
||||
assertWildcardQueryEquals("TE?M", false, "TE?M");
|
||||
assertWildcardQueryEquals("Te?m*gerM", false, "Te?m*gerM");
|
||||
// Fuzzy queries:
|
||||
assertWildcardQueryEquals("Term~", "term~0.5");
|
||||
assertWildcardQueryEquals("Term~", true, "term~0.5");
|
||||
assertWildcardQueryEquals("Term~", false, "Term~0.5");
|
||||
// Range queries:
|
||||
assertWildcardQueryEquals("[A TO C]", "[a TO c]");
|
||||
assertWildcardQueryEquals("[A TO C]", true, "[a TO c]");
|
||||
assertWildcardQueryEquals("[A TO C]", false, "[A TO C]");
|
||||
}
|
||||
|
||||
public void testQPA() throws Exception {
|
||||
assertQueryEquals("term term term", qpAnalyzer, "term term term");
|
||||
assertQueryEquals("term +stop term", qpAnalyzer, "term term");
|
||||
assertQueryEquals("term -stop term", qpAnalyzer, "term term");
|
||||
assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll");
|
||||
assertQueryEquals("term phrase term", qpAnalyzer,
|
||||
"term \"phrase1 phrase2\" term");
|
||||
// note the parens in this next assertion differ from the original
|
||||
// QueryParser behavior
|
||||
assertQueryEquals("term AND NOT phrase term", qpAnalyzer,
|
||||
"(+term -\"phrase1 phrase2\") term");
|
||||
assertQueryEquals("stop", qpAnalyzer, "");
|
||||
assertTrue(getQuery("term term term", qpAnalyzer) instanceof BooleanQuery);
|
||||
assertTrue(getQuery("term +stop", qpAnalyzer) instanceof TermQuery);
|
||||
}
|
||||
|
||||
public void testRange() throws Exception {
|
||||
assertQueryEquals("[ a TO z]", null, "[a TO z]");
|
||||
assertTrue(getQuery("[ a TO z]", null) instanceof RangeQuery);
|
||||
assertQueryEquals("[ a TO z ]", null, "[a TO z]");
|
||||
assertQueryEquals("{ a TO z}", null, "{a TO z}");
|
||||
assertQueryEquals("{ a TO z }", null, "{a TO z}");
|
||||
assertQueryEquals("{ a TO z }^2.0", null, "{a TO z}^2.0");
|
||||
assertQueryEquals("[ a TO z] OR bar", null, "[a TO z] bar");
|
||||
assertQueryEquals("[ a TO z] AND bar", null, "+[a TO z] +bar");
|
||||
assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}");
|
||||
assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})");
|
||||
}
|
||||
|
||||
public String getDate(String s) throws Exception {
|
||||
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
|
||||
return DateTools.dateToString(df.parse(s), DateTools.Resolution.DAY);
|
||||
}
|
||||
|
||||
public String getLocalizedDate(int year, int month, int day) {
|
||||
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
|
||||
Calendar calendar = Calendar.getInstance();
|
||||
calendar.set(year, month, day);
|
||||
return df.format(calendar.getTime());
|
||||
}
|
||||
|
||||
public void testDateRange() throws Exception {
|
||||
String startDate = getLocalizedDate(2002, 1, 1);
|
||||
String endDate = getLocalizedDate(2002, 1, 4);
|
||||
assertQueryEquals("[ " + startDate + " TO " + endDate + "]", null,
|
||||
"[" + getDate(startDate) + " TO " + getDate(endDate) + "]");
|
||||
assertQueryEquals("{ " + startDate + " " + endDate + " }", null,
|
||||
"{" + getDate(startDate) + " TO " + getDate(endDate) + "}");
|
||||
}
|
||||
|
||||
public void testEscaped() throws Exception {
|
||||
Analyzer a = new WhitespaceAnalyzer();
|
||||
|
||||
/*assertQueryEquals("\\[brackets", a, "\\[brackets");
|
||||
assertQueryEquals("\\[brackets", null, "brackets");
|
||||
assertQueryEquals("\\\\", a, "\\\\");
|
||||
assertQueryEquals("\\+blah", a, "\\+blah");
|
||||
assertQueryEquals("\\(blah", a, "\\(blah");
|
||||
|
||||
assertQueryEquals("\\-blah", a, "\\-blah");
|
||||
assertQueryEquals("\\!blah", a, "\\!blah");
|
||||
assertQueryEquals("\\{blah", a, "\\{blah");
|
||||
assertQueryEquals("\\}blah", a, "\\}blah");
|
||||
assertQueryEquals("\\:blah", a, "\\:blah");
|
||||
assertQueryEquals("\\^blah", a, "\\^blah");
|
||||
assertQueryEquals("\\[blah", a, "\\[blah");
|
||||
assertQueryEquals("\\]blah", a, "\\]blah");
|
||||
assertQueryEquals("\\\"blah", a, "\\\"blah");
|
||||
assertQueryEquals("\\(blah", a, "\\(blah");
|
||||
assertQueryEquals("\\)blah", a, "\\)blah");
|
||||
assertQueryEquals("\\~blah", a, "\\~blah");
|
||||
assertQueryEquals("\\*blah", a, "\\*blah");
|
||||
assertQueryEquals("\\?blah", a, "\\?blah");
|
||||
//assertQueryEquals("foo \\&\\& bar", a, "foo \\&\\& bar");
|
||||
//assertQueryEquals("foo \\|| bar", a, "foo \\|| bar");
|
||||
//assertQueryEquals("foo \\AND bar", a, "foo \\AND bar");*/
|
||||
|
||||
assertQueryEquals("a\\-b:c", a, "a-b:c");
|
||||
assertQueryEquals("a\\+b:c", a, "a+b:c");
|
||||
assertQueryEquals("a\\:b:c", a, "a:b:c");
|
||||
assertQueryEquals("a\\\\b:c", a, "a\\b:c");
|
||||
|
||||
assertQueryEquals("a:b\\-c", a, "a:b-c");
|
||||
assertQueryEquals("a:b\\+c", a, "a:b+c");
|
||||
assertQueryEquals("a:b\\:c", a, "a:b:c");
|
||||
assertQueryEquals("a:b\\\\c", a, "a:b\\c");
|
||||
|
||||
assertQueryEquals("a:b\\-c*", a, "a:b-c*");
|
||||
assertQueryEquals("a:b\\+c*", a, "a:b+c*");
|
||||
assertQueryEquals("a:b\\:c*", a, "a:b:c*");
|
||||
|
||||
assertQueryEquals("a:b\\\\c*", a, "a:b\\c*");
|
||||
|
||||
assertQueryEquals("a:b\\-?c", a, "a:b-?c");
|
||||
assertQueryEquals("a:b\\+?c", a, "a:b+?c");
|
||||
assertQueryEquals("a:b\\:?c", a, "a:b:?c");
|
||||
|
||||
assertQueryEquals("a:b\\\\?c", a, "a:b\\?c");
|
||||
|
||||
assertQueryEquals("a:b\\-c~", a, "a:b-c~0.5");
|
||||
assertQueryEquals("a:b\\+c~", a, "a:b+c~0.5");
|
||||
assertQueryEquals("a:b\\:c~", a, "a:b:c~0.5");
|
||||
assertQueryEquals("a:b\\\\c~", a, "a:b\\c~0.5");
|
||||
|
||||
assertQueryEquals("[ a\\- TO a\\+ ]", null, "[a- TO a+]");
|
||||
assertQueryEquals("[ a\\: TO a\\~ ]", null, "[a: TO a~]");
|
||||
assertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]");
|
||||
}
|
||||
|
||||
public void testTabNewlineCarriageReturn()
|
||||
throws Exception {
|
||||
assertQueryEqualsDOA("+weltbank +worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
|
||||
assertQueryEqualsDOA("+weltbank\n+worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \n+worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \n +worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
|
||||
assertQueryEqualsDOA("+weltbank\r+worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \r+worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \r +worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
|
||||
assertQueryEqualsDOA("+weltbank\r\n+worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \r\n+worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \r\n +worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \r \n +worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
|
||||
assertQueryEqualsDOA("+weltbank\t+worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \t+worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
assertQueryEqualsDOA("weltbank \t +worlbank", null,
|
||||
"+weltbank +worlbank");
|
||||
}
|
||||
|
||||
public void testSimpleDAO()
|
||||
throws Exception {
|
||||
assertQueryEqualsDOA("term term term", null, "+term +term +term");
|
||||
assertQueryEqualsDOA("term +term term", null, "+term +term +term");
|
||||
assertQueryEqualsDOA("term term +term", null, "+term +term +term");
|
||||
assertQueryEqualsDOA("term +term +term", null, "+term +term +term");
|
||||
assertQueryEqualsDOA("-term term term", null, "-term +term +term");
|
||||
}
|
||||
|
||||
public void testBoost()
|
||||
throws Exception {
|
||||
StandardAnalyzer oneStopAnalyzer = new StandardAnalyzer(new String[]{"on"});
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser("field", oneStopAnalyzer);
|
||||
Query q = qp.parse("on^1.0");
|
||||
assertNotNull(q);
|
||||
q = qp.parse("\"hello\"^2.0");
|
||||
assertNotNull(q);
|
||||
assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
|
||||
q = qp.parse("hello^2.0");
|
||||
assertNotNull(q);
|
||||
assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
|
||||
q = qp.parse("\"on\"^1.0");
|
||||
assertNotNull(q);
|
||||
|
||||
q = PrecedenceQueryParser.parse("the^3", "field", new StandardAnalyzer());
|
||||
assertNotNull(q);
|
||||
}
|
||||
|
||||
public void testException() throws Exception {
|
||||
try {
|
||||
assertQueryEquals("\"some phrase", null, "abc");
|
||||
fail("ParseException expected, not thrown");
|
||||
} catch (ParseException expected) {
|
||||
}
|
||||
}
|
||||
|
||||
public void testCustomQueryParserWildcard() {
|
||||
try {
|
||||
new QPTestParser("contents", new WhitespaceAnalyzer()).parse("a?t");
|
||||
} catch (ParseException expected) {
|
||||
return;
|
||||
}
|
||||
fail("Wildcard queries should not be allowed");
|
||||
}
|
||||
|
||||
public void testCustomQueryParserFuzzy() throws Exception {
|
||||
try {
|
||||
new QPTestParser("contents", new WhitespaceAnalyzer()).parse("xunit~");
|
||||
} catch (ParseException expected) {
|
||||
return;
|
||||
}
|
||||
fail("Fuzzy queries should not be allowed");
|
||||
}
|
||||
|
||||
public void testBooleanQuery() throws Exception {
|
||||
BooleanQuery.setMaxClauseCount(2);
|
||||
try {
|
||||
PrecedenceQueryParser.parse("one two three", "field", new WhitespaceAnalyzer());
|
||||
fail("ParseException expected due to too many boolean clauses");
|
||||
} catch (ParseException expected) {
|
||||
// too many boolean clauses, so ParseException is expected
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This test differs from the original QueryParser, showing how the
|
||||
* precedence issue has been corrected.
|
||||
*/
|
||||
public void testPrecedence() throws Exception {
|
||||
Query query1 = PrecedenceQueryParser.parse("A AND B OR C AND D", "field", new WhitespaceAnalyzer());
|
||||
Query query2 = PrecedenceQueryParser.parse("(A AND B) OR (C AND D)", "field", new WhitespaceAnalyzer());
|
||||
assertEquals(query1, query2);
|
||||
}
|
||||
|
||||
|
||||
public void tearDown() {
|
||||
BooleanQuery.setMaxClauseCount(originalMaxClauses);
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue