LUCENE-1938: Precedence query parser using the contrib/queryparser framework

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1025597 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-10-20 14:26:30 +00:00
parent 01390fcefa
commit 405cb198a0
16 changed files with 577 additions and 4642 deletions

View File

@ -248,6 +248,11 @@ New features
* LUCENE-2624: Add Analyzers for Armenian, Basque, and Catalan, from snowball.
(Robert Muir)
* LUCENE-1938: PrecedenceQueryParser is now implemented with the flexible QP framework.
This means that you can also add this functionality to your own QP pipeline by using
BooleanModifiersQueryNodeProcessor, for example instead of GroupQueryNodeProcessor.
(Adriano Crestani via Robert Muir)
Build

View File

@ -30,7 +30,7 @@
<!--
NOTE: see the README.javacc for details on how to fully regenerate the parser
-->
<target name="javacc" depends="javacc-flexible,javacc-precedence,javacc-surround"/>
<target name="javacc" depends="javacc-flexible,javacc-surround"/>
<target name="javacc-flexible" depends="javacc-check">
<delete>
@ -94,19 +94,6 @@ import org.apache.lucene.queryParser.core.messages.*;"
flags="g"
byline="true"/>
</target>
<property name="javacc.precedence.path" location="src/java/org/apache/lucene/queryParser/precedence"/>
<target name="javacc-precedence" depends="javacc-check" description="generate precedence query parser from jj (requires javacc 4.1)">
<delete>
<fileset dir="${javacc.precedence.path}" includes="*.java">
<containsregexp expression="Generated.*By.*JavaCC"/>
</fileset>
</delete>
<invoke-javacc target="${javacc.precedence.path}/PrecedenceQueryParser.jj"
outputDir="${javacc.precedence.path}"
/>
</target>
<target name="javacc-surround" depends="javacc-check" description="generate surround query parser from jj (requires javacc 4.1">
<invoke-javacc target="src/java/org/apache/lucene/queryParser/surround/parser/QueryParser.jj"

View File

@ -1,112 +0,0 @@
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */
/* JavaCCOptions:STATIC=false */
package org.apache.lucene.queryParser.precedence;
/**
* This interface describes a character stream that maintains line and
* column number positions of the characters. It also has the capability
* to backup the stream to some extent. An implementation of this
* interface is used in the TokenManager implementation generated by
* JavaCCParser.
*
* All the methods except backup can be implemented in any fashion. backup
* needs to be implemented correctly for the correct operation of the lexer.
* Rest of the methods are all used to get information like line number,
* column number and the String that constitutes a token and are not used
* by the lexer. Hence their implementation won't affect the generated lexer's
* operation.
*/
public interface CharStream {
/**
* Returns the next character from the selected input. The method
* of selecting the input is the responsibility of the class
* implementing this interface. Can throw any java.io.IOException.
*/
char readChar() throws java.io.IOException;
/**
* Returns the column position of the character last read.
* @deprecated
* @see #getEndColumn
*/
int getColumn();
/**
* Returns the line number of the character last read.
* @deprecated
* @see #getEndLine
*/
int getLine();
/**
* Returns the column number of the last character for current token (being
* matched after the last call to BeginTOken).
*/
int getEndColumn();
/**
* Returns the line number of the last character for current token (being
* matched after the last call to BeginTOken).
*/
int getEndLine();
/**
* Returns the column number of the first character for current token (being
* matched after the last call to BeginTOken).
*/
int getBeginColumn();
/**
* Returns the line number of the first character for current token (being
* matched after the last call to BeginTOken).
*/
int getBeginLine();
/**
* Backs up the input stream by amount steps. Lexer calls this method if it
* had already read some characters, but could not use them to match a
* (longer) token. So, they will be used again as the prefix of the next
* token and it is the implemetation's responsibility to do this right.
*/
void backup(int amount);
/**
* Returns the next character that marks the beginning of the next token.
* All characters must remain in the buffer between two successive calls
* to this method to implement backup correctly.
*/
char BeginToken() throws java.io.IOException;
/**
* Returns a string made up of characters from the marked token beginning
* to the current buffer position. Implementations have the choice of returning
* anything that they want to. For example, for efficiency, one might decide
* to just return null, which is a valid implementation.
*/
String GetImage();
/**
* Returns an array of characters that make up the suffix of length 'len' for
* the currently matched token. This is used to build up the matched string
* for use in actions in the case of MORE. A simple and inefficient
* implementation of this is as follows :
*
* {
* String t = GetImage();
* return t.substring(t.length() - len, t.length()).toCharArray();
* }
*/
char[] GetSuffix(int len);
/**
* The lexer calls this function to indicate that it is done with the stream
* and hence implementations can free any resources held by this class.
* Again, the body of this function can be just empty and it will not
* affect the lexer's operation.
*/
void Done();
}
/* JavaCC - OriginalChecksum=8cc617b193267dc876ef9699367c8186 (do not edit this line) */

View File

@ -1,123 +0,0 @@
// FastCharStream.java
package org.apache.lucene.queryParser.precedence;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.*;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the
* character position of the token in the input, as required by Lucene's {@link
* org.apache.lucene.analysis.Token} API. */
public final class FastCharStream implements CharStream {
char[] buffer = null;
int bufferLength = 0; // end of valid chars
int bufferPosition = 0; // next char to read
int tokenStart = 0; // offset in buffer
int bufferStart = 0; // position in file of buffer
Reader input; // source of chars
/** Constructs from a Reader. */
public FastCharStream(Reader r) {
input = r;
}
public final char readChar() throws IOException {
if (bufferPosition >= bufferLength)
refill();
return buffer[bufferPosition++];
}
private final void refill() throws IOException {
int newPosition = bufferLength - tokenStart;
if (tokenStart == 0) { // token won't fit in buffer
if (buffer == null) { // first time: alloc buffer
buffer = new char[2048];
} else if (bufferLength == buffer.length) { // grow buffer
char[] newBuffer = new char[buffer.length*2];
System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
buffer = newBuffer;
}
} else { // shift token to front
System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
}
bufferLength = newPosition; // update state
bufferPosition = newPosition;
bufferStart += tokenStart;
tokenStart = 0;
int charsRead = // fill space in buffer
input.read(buffer, newPosition, buffer.length-newPosition);
if (charsRead == -1)
throw new IOException("read past eof");
else
bufferLength += charsRead;
}
public final char BeginToken() throws IOException {
tokenStart = bufferPosition;
return readChar();
}
public final void backup(int amount) {
bufferPosition -= amount;
}
public final String GetImage() {
return new String(buffer, tokenStart, bufferPosition - tokenStart);
}
public final char[] GetSuffix(int len) {
char[] value = new char[len];
System.arraycopy(buffer, bufferPosition - len, value, 0, len);
return value;
}
public final void Done() {
try {
input.close();
} catch (IOException e) {
System.err.println("Caught: " + e + "; ignoring.");
}
}
public final int getColumn() {
return bufferStart + bufferPosition;
}
public final int getLine() {
return 1;
}
public final int getEndColumn() {
return bufferStart + bufferPosition;
}
public final int getEndLine() {
return 1;
}
public final int getBeginColumn() {
return bufferStart + tokenStart;
}
public final int getBeginLine() {
return 1;
}
}

View File

@ -1,198 +0,0 @@
/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 4.1 */
/* JavaCCOptions:KEEP_LINE_COL=null */
package org.apache.lucene.queryParser.precedence;
/**
* This exception is thrown when parse errors are encountered.
* You can explicitly create objects of this exception type by
* calling the method generateParseException in the generated
* parser.
*
* You can modify this class to customize your error reporting
* mechanisms so long as you retain the public fields.
*/
public class ParseException extends Exception {
/**
* This constructor is used by the method "generateParseException"
* in the generated parser. Calling this constructor generates
* a new object of this type with the fields "currentToken",
* "expectedTokenSequences", and "tokenImage" set. The boolean
* flag "specialConstructor" is also set to true to indicate that
* this constructor was used to create this object.
* This constructor calls its super class with the empty string
* to force the "toString" method of parent class "Throwable" to
* print the error message in the form:
* ParseException: <result of getMessage>
*/
public ParseException(Token currentTokenVal,
int[][] expectedTokenSequencesVal,
String[] tokenImageVal
)
{
super("");
specialConstructor = true;
currentToken = currentTokenVal;
expectedTokenSequences = expectedTokenSequencesVal;
tokenImage = tokenImageVal;
}
/**
* The following constructors are for use by you for whatever
* purpose you can think of. Constructing the exception in this
* manner makes the exception behave in the normal way - i.e., as
* documented in the class "Throwable". The fields "errorToken",
* "expectedTokenSequences", and "tokenImage" do not contain
* relevant information. The JavaCC generated code does not use
* these constructors.
*/
public ParseException() {
super();
specialConstructor = false;
}
/** Constructor with message. */
public ParseException(String message) {
super(message);
specialConstructor = false;
}
/**
* This variable determines which constructor was used to create
* this object and thereby affects the semantics of the
* "getMessage" method (see below).
*/
protected boolean specialConstructor;
/**
* This is the last token that has been consumed successfully. If
* this object has been created due to a parse error, the token
* followng this token will (therefore) be the first error token.
*/
public Token currentToken;
/**
* Each entry in this array is an array of integers. Each array
* of integers represents a sequence of tokens (by their ordinal
* values) that is expected at this point of the parse.
*/
public int[][] expectedTokenSequences;
/**
* This is a reference to the "tokenImage" array of the generated
* parser within which the parse error occurred. This array is
* defined in the generated ...Constants interface.
*/
public String[] tokenImage;
/**
* This method has the standard behavior when this object has been
* created using the standard constructors. Otherwise, it uses
* "currentToken" and "expectedTokenSequences" to generate a parse
* error message and returns it. If this object has been created
* due to a parse error, and you do not catch it (it gets thrown
* from the parser), then this method is called during the printing
* of the final stack trace, and hence the correct error message
* gets displayed.
*/
public String getMessage() {
if (!specialConstructor) {
return super.getMessage();
}
StringBuffer expected = new StringBuffer();
int maxSize = 0;
for (int i = 0; i < expectedTokenSequences.length; i++) {
if (maxSize < expectedTokenSequences[i].length) {
maxSize = expectedTokenSequences[i].length;
}
for (int j = 0; j < expectedTokenSequences[i].length; j++) {
expected.append(tokenImage[expectedTokenSequences[i][j]]).append(' ');
}
if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) {
expected.append("...");
}
expected.append(eol).append(" ");
}
String retval = "Encountered \"";
Token tok = currentToken.next;
for (int i = 0; i < maxSize; i++) {
if (i != 0) retval += " ";
if (tok.kind == 0) {
retval += tokenImage[0];
break;
}
retval += " " + tokenImage[tok.kind];
retval += " \"";
retval += add_escapes(tok.image);
retval += " \"";
tok = tok.next;
}
retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn;
retval += "." + eol;
if (expectedTokenSequences.length == 1) {
retval += "Was expecting:" + eol + " ";
} else {
retval += "Was expecting one of:" + eol + " ";
}
retval += expected.toString();
return retval;
}
/**
* The end of line string for this machine.
*/
protected String eol = System.getProperty("line.separator", "\n");
/**
* Used to convert raw characters to their escaped version
* when these raw version cannot be used as part of an ASCII
* string literal.
*/
protected String add_escapes(String str) {
StringBuffer retval = new StringBuffer();
char ch;
for (int i = 0; i < str.length(); i++) {
switch (str.charAt(i))
{
case 0 :
continue;
case '\b':
retval.append("\\b");
continue;
case '\t':
retval.append("\\t");
continue;
case '\n':
retval.append("\\n");
continue;
case '\f':
retval.append("\\f");
continue;
case '\r':
retval.append("\\r");
continue;
case '\"':
retval.append("\\\"");
continue;
case '\'':
retval.append("\\\'");
continue;
case '\\':
retval.append("\\\\");
continue;
default:
if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
String s = "0000" + Integer.toString(ch, 16);
retval.append("\\u" + s.substring(s.length() - 4, s.length()));
} else {
retval.append(ch);
}
continue;
}
}
return retval.toString();
}
}
/* JavaCC - OriginalChecksum=15fbbe38a36c8ac9e2740d030624c321 (do not edit this line) */

View File

@ -1,982 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
options {
STATIC=false;
JAVA_UNICODE_ESCAPE=true;
USER_CHAR_STREAM=true;
}
PARSER_BEGIN(PrecedenceQueryParser)
package org.apache.lucene.queryParser.precedence;
import java.io.IOException;
import java.io.StringReader;
import java.text.DateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.AttributeSource;
/**
* Experimental query parser variant designed to handle operator precedence
* in a more sensible fashion than QueryParser. There are still some
* open issues with this parser. The following tests are currently failing
* in TestPrecedenceQueryParser and are disabled to make this test pass:
* <ul>
* <li> testSimple
* <li> testWildcard
* <li> testPrecedence
* </ul>
*
* This class is generated by JavaCC. The only method that clients should need
* to call is {@link #parse(String)}.
*
* The syntax for query strings is as follows:
* A Query is a series of clauses.
* A clause may be prefixed by:
* <ul>
* <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating
* that the clause is required or prohibited respectively; or
* <li> a term followed by a colon, indicating the field to be searched.
* This enables one to construct queries which search multiple fields.
* </ul>
*
* A clause may be either:
* <ul>
* <li> a term, indicating all the documents that contain this term; or
* <li> a nested query, enclosed in parentheses. Note that this may be used
* with a <code>+</code>/<code>-</code> prefix to require any of a set of
* terms.
* </ul>
*
* Thus, in BNF, the query grammar is:
* <pre>
* Query ::= ( Clause )*
* Clause ::= ["+", "-"] [&lt;TERM&gt; ":"] ( &lt;TERM&gt; | "(" Query ")" )
* </pre>
*
* <p>
* Examples of appropriately formatted queries can be found in the <a
* href="../../../../../../../queryparsersyntax.html">query syntax
* documentation</a>.
* </p>
*/
public class PrecedenceQueryParser {
private static final int CONJ_NONE = 0;
private static final int CONJ_AND = 1;
private static final int CONJ_OR = 2;
private static final int MOD_NONE = 0;
private static final int MOD_NOT = 10;
private static final int MOD_REQ = 11;
// make it possible to call setDefaultOperator() without accessing
// the nested class:
public static final Operator AND_OPERATOR = Operator.AND;
public static final Operator OR_OPERATOR = Operator.OR;
/** The actual operator that parser uses to combine query terms */
private Operator operator = OR_OPERATOR;
boolean lowercaseExpandedTerms = true;
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
Analyzer analyzer;
String field;
int phraseSlop = 0;
float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
Locale locale = Locale.getDefault();
static enum Operator { OR, AND }
/** Constructs a query parser.
* @param f the default field for query terms.
* @param a used to find terms in the query text.
*/
public PrecedenceQueryParser(String f, Analyzer a) {
this(new FastCharStream(new StringReader("")));
analyzer = a;
field = f;
}
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
* @param expression the query string to be parsed.
* @throws ParseException if the parsing fails
*/
public Query parse(String expression) throws ParseException {
// optimize empty query to be empty BooleanQuery
if (expression == null || expression.trim().length() == 0) {
return new BooleanQuery();
}
ReInit(new FastCharStream(new StringReader(expression)));
try {
Query query = Query(field);
return (query != null) ? query : new BooleanQuery();
}
catch (TokenMgrError tme) {
throw new ParseException(tme.getMessage());
}
catch (BooleanQuery.TooManyClauses tmc) {
throw new ParseException("Too many boolean clauses");
}
}
/**
* @return Returns the analyzer.
*/
public Analyzer getAnalyzer() {
return analyzer;
}
/**
* @return Returns the field.
*/
public String getField() {
return field;
}
/**
* Get the minimal similarity for fuzzy queries.
*/
public float getFuzzyMinSim() {
return fuzzyMinSim;
}
/**
* Set the minimum similarity for fuzzy queries.
* Default is 2f.
*/
public void setFuzzyMinSim(float fuzzyMinSim) {
this.fuzzyMinSim = fuzzyMinSim;
}
/**
* Get the prefix length for fuzzy queries.
* @return Returns the fuzzyPrefixLength.
*/
public int getFuzzyPrefixLength() {
return fuzzyPrefixLength;
}
/**
* Set the prefix length for fuzzy queries. Default is 0.
* @param fuzzyPrefixLength The fuzzyPrefixLength to set.
*/
public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
this.fuzzyPrefixLength = fuzzyPrefixLength;
}
/**
* Sets the default slop for phrases. If zero, then exact phrase matches
* are required. Default value is zero.
*/
public void setPhraseSlop(int phraseSlop) {
this.phraseSlop = phraseSlop;
}
/**
* Gets the default slop for phrases.
*/
public int getPhraseSlop() {
return phraseSlop;
}
/**
* Sets the boolean operator of the QueryParser.
* In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
* are considered optional: for example <code>capital of Hungary</code> is equal to
* <code>capital OR of OR Hungary</code>.<br/>
* In <code>AND_OPERATOR</code> mode terms are considered to be in conjunction: the
* above mentioned query is parsed as <code>capital AND of AND Hungary</code>
*/
public void setDefaultOperator(Operator op) {
this.operator = op;
}
/**
* Gets implicit operator setting, which will be either AND_OPERATOR
* or OR_OPERATOR.
*/
public Operator getDefaultOperator() {
return operator;
}
/**
* Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
* lower-cased or not. Default is <code>true</code>.
*/
public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
this.lowercaseExpandedTerms = lowercaseExpandedTerms;
}
/**
* @see #setLowercaseExpandedTerms(boolean)
*/
public boolean getLowercaseExpandedTerms() {
return lowercaseExpandedTerms;
}
/**
* By default PrecedenceQueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
* when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
* a) Runs faster b) Does not have the scarcity of terms unduly influence score
* c) avoids any "TooManyBooleanClauses" exception.
* However, if your application really needs to use the
* old-fashioned BooleanQuery expansion rewriting and the above
* points are not relevant then use this to change
* the rewrite method.
*/
public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
multiTermRewriteMethod = method;
}
/**
* @see #setMultiTermRewriteMethod
*/
public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() {
return multiTermRewriteMethod;
}
/**
* Set locale used by date range parsing.
*/
public void setLocale(Locale locale) {
this.locale = locale;
}
/**
* Returns current locale, allowing access by subclasses.
*/
public Locale getLocale() {
return locale;
}
protected void addClause(List<BooleanClause> clauses, int conj, int modifier, Query q) {
boolean required, prohibited;
// If this term is introduced by AND, make the preceding term required,
// unless it's already prohibited
if (clauses.size() > 0 && conj == CONJ_AND) {
BooleanClause c = clauses.get(clauses.size()-1);
if (!c.isProhibited())
c.setOccur(BooleanClause.Occur.MUST);
}
if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) {
// If this term is introduced by OR, make the preceding term optional,
// unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
// notice if the input is a OR b, first term is parsed as required; without
// this modification a OR b would parsed as +a OR b
BooleanClause c = clauses.get(clauses.size()-1);
if (!c.isProhibited())
c.setOccur(BooleanClause.Occur.SHOULD);
}
// We might have been passed a null query; the term might have been
// filtered away by the analyzer.
if (q == null)
return;
if (operator == OR_OPERATOR) {
// We set REQUIRED if we're introduced by AND or +; PROHIBITED if
// introduced by NOT or -; make sure not to set both.
prohibited = (modifier == MOD_NOT);
required = (modifier == MOD_REQ);
if (conj == CONJ_AND && !prohibited) {
required = true;
}
} else {
// We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED
// if not PROHIBITED and not introduced by OR
prohibited = (modifier == MOD_NOT);
required = (!prohibited && conj != CONJ_OR);
}
if (required && !prohibited)
clauses.add(new BooleanClause(q, BooleanClause.Occur.MUST));
else if (!required && !prohibited)
clauses.add(new BooleanClause(q, BooleanClause.Occur.SHOULD));
else if (!required && prohibited)
clauses.add(new BooleanClause(q, BooleanClause.Occur.MUST_NOT));
else
throw new RuntimeException("Clause cannot be both required and prohibited");
}
/**
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
// Use the analyzer to get all the tokens, and then build a TermQuery,
// PhraseQuery, or nothing based on the term count
TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
List<AttributeSource.State> list = new ArrayList<AttributeSource.State>();
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
TermToBytesRefAttribute termAtt = source.addAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posincrAtt = source.addAttribute(PositionIncrementAttribute.class);
try {
while (source.incrementToken()) {
list.add(source.captureState());
if (posincrAtt.getPositionIncrement() == 1)
positionCount++;
else
severalTokensAtSamePosition = true;
}
source.end();
source.close();
} catch (IOException e) {
// ignore, should never happen for StringReaders
}
if (list.size() == 0)
return null;
else if (list.size() == 1) {
source.restoreState(list.get(0));
BytesRef term = new BytesRef();
termAtt.toBytesRef(term);
return new TermQuery(new Term(field, term));
} else {
if (severalTokensAtSamePosition || !quoted) {
if (positionCount == 1 || !quoted) {
// no phrase query:
BooleanQuery q = new BooleanQuery(positionCount == 1);
BooleanClause.Occur occur = positionCount > 1 && operator == AND_OPERATOR ?
BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
for (int i = 0; i < list.size(); i++) {
BytesRef term = new BytesRef();
source.restoreState(list.get(i));
termAtt.toBytesRef(term);
TermQuery currentQuery = new TermQuery(
new Term(field, term));
q.add(currentQuery, occur);
}
return q;
}
else {
// phrase query:
MultiPhraseQuery mpq = new MultiPhraseQuery();
List<Term> multiTerms = new ArrayList<Term>();
for (int i = 0; i < list.size(); i++) {
BytesRef term = new BytesRef();
source.restoreState(list.get(i));
if (posincrAtt.getPositionIncrement() == 1 && multiTerms.size() > 0) {
mpq.add(multiTerms.toArray(new Term[0]));
multiTerms.clear();
}
termAtt.toBytesRef(term);
multiTerms.add(new Term(field, term));
}
mpq.add(multiTerms.toArray(new Term[0]));
return mpq;
}
}
else {
PhraseQuery q = new PhraseQuery();
q.setSlop(phraseSlop);
for (int i = 0; i < list.size(); i++) {
BytesRef term = new BytesRef();
source.restoreState(list.get(i));
termAtt.toBytesRef(term);
q.add(new Term(field, term));
}
return q;
}
}
}
/**
* Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}.
* This method may be overridden, for example, to return
* a SpanNearQuery instead of a PhraseQuery.
*
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFieldQuery(String field, String queryText, int slop)
throws ParseException {
Query query = getFieldQuery(field, queryText, true);
if (query instanceof PhraseQuery) {
((PhraseQuery) query).setSlop(slop);
}
if (query instanceof MultiPhraseQuery) {
((MultiPhraseQuery) query).setSlop(slop);
}
return query;
}
/**
* @exception ParseException throw in overridden method to disallow
*/
protected Query getRangeQuery(String field,
String part1,
String part2,
boolean inclusive) throws ParseException
{
if (lowercaseExpandedTerms) {
part1 = part1.toLowerCase();
part2 = part2.toLowerCase();
}
try {
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
df.setLenient(true);
Date d1 = df.parse(part1);
Date d2 = df.parse(part2);
part1 = DateTools.dateToString(d1, DateTools.Resolution.DAY);
part2 = DateTools.dateToString(d2, DateTools.Resolution.DAY);
}
catch (Exception e) { }
final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
/**
* Factory method for generating query, given a set of clauses.
* By default creates a boolean query composed of clauses passed in.
*
* Can be overridden by extending classes, to modify query being
* returned.
*
* @param clauses List that contains {@link BooleanClause} instances
* to join.
*
* @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow
*/
protected Query getBooleanQuery(List<BooleanClause> clauses) throws ParseException
{
return getBooleanQuery(clauses, false);
}
/**
* Factory method for generating query, given a set of clauses.
* By default creates a boolean query composed of clauses passed in.
*
* Can be overridden by extending classes, to modify query being
* returned.
*
* @param clauses List that contains {@link BooleanClause} instances
* to join.
* @param disableCoord true if coord scoring should be disabled.
*
* @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow
*/
protected Query getBooleanQuery(List<BooleanClause> clauses, boolean disableCoord)
throws ParseException {
if (clauses == null || clauses.size() == 0)
return null;
BooleanQuery query = new BooleanQuery(disableCoord);
for (int i = 0; i < clauses.size(); i++) {
query.add(clauses.get(i));
}
return query;
}
/**
* Factory method for generating a query. Called when parser
* parses an input term token that contains one or more wildcard
* characters (? and *), but is not a prefix term token (one
* that has just a single * character at the end)
*<p>
* Depending on settings, prefix term may be lower-cased
* automatically. It will not go through the default Analyzer,
* however, since normal Analyzers are unlikely to work properly
* with wildcard templates.
*<p>
* Can be overridden by extending classes, to provide custom handling for
* wildcard queries, which may be necessary due to missing analyzer calls.
*
* @param field Name of the field query will use.
* @param termStr Term token that contains one or more wild card
* characters (? or *), but is not simple prefix term
*
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
protected Query getWildcardQuery(String field, String termStr) throws ParseException
{
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
final WildcardQuery query = new WildcardQuery(t);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
/**
* Factory method for generating a query (similar to
* {@link #getWildcardQuery}). Called when parser parses an input term
* token that uses prefix notation; that is, contains a single '*' wildcard
* character as its last character. Since this is a special case
* of generic wildcard term, and such a query can be optimized easily,
* this usually results in a different query object.
*<p>
* Depending on settings, a prefix term may be lower-cased
* automatically. It will not go through the default Analyzer,
* however, since normal Analyzers are unlikely to work properly
* with wildcard templates.
*<p>
* Can be overridden by extending classes, to provide custom handling for
* wild card queries, which may be necessary due to missing analyzer calls.
*
* @param field Name of the field query will use.
* @param termStr Term token to use for building term for the query
* (<b>without</b> trailing '*' character!)
*
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
protected Query getPrefixQuery(String field, String termStr) throws ParseException
{
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
final PrefixQuery query = new PrefixQuery(t);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
/**
* Factory method for generating a query. Called when parser
* parses an input term token that contains a regular expression
* query.
*<p>
* Depending on settings, pattern term may be lower-cased
* automatically. It will not go through the default Analyzer,
* however, since normal Analyzers are unlikely to work properly
* with regular expression templates.
*<p>
* Can be overridden by extending classes, to provide custom handling for
* regular expression queries, which may be necessary due to missing analyzer
* calls.
*
* @param field Name of the field query will use.
* @param termStr Term token that contains a regular expression
*
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
protected Query getRegexpQuery(String field, String termStr) throws ParseException
{
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
final Term regexp = new Term(field, termStr);
final RegexpQuery query = new RegexpQuery(regexp);
query.setRewriteMethod(multiTermRewriteMethod);
return query;
}
/**
* Factory method for generating a query (similar to
* {@link #getWildcardQuery}). Called when parser parses
* an input term token that has the fuzzy suffix (~) appended.
*
* @param field Name of the field query will use.
* @param termStr Term token to use for building term for the query
*
* @return Resulting {@link Query} built for the term
* @exception ParseException throw in overridden method to disallow
*/
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
{
if (lowercaseExpandedTerms) {
termStr = termStr.toLowerCase();
}
Term t = new Term(field, termStr);
return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
}
/**
* Returns a String where the escape char has been
* removed, or kept only once if there was a double escape.
*/
private String discardEscapeChar(String input) {
char[] caSource = input.toCharArray();
char[] caDest = new char[caSource.length];
int j = 0;
for (int i = 0; i < caSource.length; i++) {
if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) {
caDest[j++]=caSource[i];
}
}
return new String(caDest, 0, j);
}
/**
* Returns a String where those characters that QueryParser
* expects to be escaped are escaped by a preceding <code>\</code>.
*/
public static String escape(String s) {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
// NOTE: keep this in sync with _ESCAPED_CHAR below!
if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
|| c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
|| c == '*' || c == '?') {
sb.append('\\');
}
sb.append(c);
}
return sb.toString();
}
/**
* Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
* Usage:<br>
* <code>java org.apache.lucene.queryParser.QueryParser &lt;input&gt;</code>
*/
// public static void main(String[] args) throws Exception {
// if (args.length == 0) {
// System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
// System.exit(0);
// }
// PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
// new org.apache.lucene.analysis.SimpleAnalyzer());
// Query q = qp.parse(args[0]);
// System.out.println(q.toString("field"));
// }
}
PARSER_END(PrecedenceQueryParser)
/* ***************** */
/* Token Definitions */
/* ***************** */
<*> TOKEN : {
<#_NUM_CHAR: ["0"-"9"] >
// NOTE: keep this in sync with escape(String) above!
| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^",
"[", "]", "\"", "{", "}", "~", "*", "?" ] >
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^",
"[", "]", "\"", "{", "}", "~", "*", "?" ]
| <_ESCAPED_CHAR> ) >
| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") >
}
<DEFAULT, RangeIn, RangeEx> SKIP : {
< <_WHITESPACE>>
}
// OG: to support prefix queries:
// http://nagoya.apache.org/bugzilla/show_bug.cgi?id=12137
// Change from:
// | <WILDTERM: <_TERM_START_CHAR>
// (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
// To:
//
// | <WILDTERM: (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
<DEFAULT> TOKEN : {
<AND: ("AND" | "&&") >
| <OR: ("OR" | "||") >
| <NOT: ("NOT" | "!") >
| <PLUS: "+" >
| <MINUS: "-" >
| <LPAREN: "(" >
| <RPAREN: ")" >
| <COLON: ":" >
| <CARAT: "^" > : Boost
| <QUOTED: "\"" (~["\""])+ "\"">
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
| <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
| <WILDTERM: <_TERM_START_CHAR>
(<_TERM_CHAR> | ( [ "*", "?" ] ))* >
| <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" >
| <RANGEIN_START: "[" > : RangeIn
| <RANGEEX_START: "{" > : RangeEx
}
<Boost> TOKEN : {
<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
}
<RangeIn> TOKEN : {
<RANGEIN_TO: "TO">
| <RANGEIN_END: "]"> : DEFAULT
| <RANGEIN_QUOTED: "\"" (~["\""])+ "\"">
| <RANGEIN_GOOP: (~[ " ", "]" ])+ >
}
<RangeEx> TOKEN : {
<RANGEEX_TO: "TO">
| <RANGEEX_END: "}"> : DEFAULT
| <RANGEEX_QUOTED: "\"" (~["\""])+ "\"">
| <RANGEEX_GOOP: (~[ " ", "}" ])+ >
}
// * Query ::= ( Clause )*
// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
int Conjunction() : {
int ret = CONJ_NONE;
}
{
[
<AND> { ret = CONJ_AND; }
| <OR> { ret = CONJ_OR; }
]
{ return ret; }
}
int Modifier() : {
int ret = MOD_NONE;
}
{
[
<PLUS> { ret = MOD_REQ; }
| <MINUS> { ret = MOD_NOT; }
| <NOT> { ret = MOD_NOT; }
]
{ return ret; }
}
Query Query(String field) :
{
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
Query q, firstQuery=null;
boolean orPresent = false;
int modifier;
}
{
modifier=Modifier() q=andExpression(field)
{
addClause(clauses, CONJ_NONE, modifier, q);
if (modifier == MOD_NONE)
firstQuery = q;
}
(
[<OR> { orPresent=true; }] modifier=Modifier() q=andExpression(field)
{ addClause(clauses, orPresent ? CONJ_OR : CONJ_NONE, modifier, q); }
)*
{
if (clauses.size() == 1 && firstQuery != null)
return firstQuery;
else {
return getBooleanQuery(clauses);
}
}
}
Query andExpression(String field) :
{
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
Query q, firstQuery=null;
int modifier;
}
{
q=Clause(field)
{
addClause(clauses, CONJ_NONE, MOD_NONE, q);
firstQuery = q;
}
(
<AND> modifier=Modifier() q=Clause(field)
{ addClause(clauses, CONJ_AND, modifier, q); }
)*
{
if (clauses.size() == 1 && firstQuery != null)
return firstQuery;
else {
return getBooleanQuery(clauses);
}
}
}
Query Clause(String field) : {
Query q;
Token fieldToken=null, boost=null;
}
{
[
LOOKAHEAD(2)
fieldToken=<TERM> <COLON> {
field=discardEscapeChar(fieldToken.image);
}
]
(
q=Term(field)
| <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?
)
{
if (boost != null) {
float f = (float)1.0;
try {
f = Float.valueOf(boost.image).floatValue();
q.setBoost(f);
} catch (Exception ignored) { }
}
return q;
}
}
Query Term(String field) : {
Token term, boost=null, fuzzySlop=null, goop1, goop2;
boolean prefix = false;
boolean wildcard = false;
boolean fuzzy = false;
boolean regexp = false;
Query q;
}
{
(
(
term=<TERM>
| term=<PREFIXTERM> { prefix=true; }
| term=<WILDTERM> { wildcard=true; }
| term=<REGEXPTERM> { regexp=true; }
| term=<NUMBER>
)
[ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
[ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
{
String termImage=discardEscapeChar(term.image);
if (wildcard) {
q = getWildcardQuery(field, termImage);
} else if (prefix) {
q = getPrefixQuery(field,
discardEscapeChar(term.image.substring
(0, term.image.length()-1)));
} else if (regexp) {
q = getRegexpQuery(field, term.image.substring(1, term.image.length()-1));
} else if (fuzzy) {
float fms = fuzzyMinSim;
try {
fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
} catch (Exception ignored) { }
if(fms < 0.0f){
throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
} else if (fms >= 1.0f && fms != (int) fms) {
throw new ParseException("Fractional edit distances are not allowed!");
}
q = getFuzzyQuery(field, termImage, fms);
} else {
q = getFieldQuery(field, termImage, false);
}
}
| ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
[ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )
<RANGEIN_END> )
[ <CARAT> boost=<NUMBER> ]
{
if (goop1.kind == RANGEIN_QUOTED) {
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
} else {
goop1.image = discardEscapeChar(goop1.image);
}
if (goop2.kind == RANGEIN_QUOTED) {
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
} else {
goop2.image = discardEscapeChar(goop2.image);
}
q = getRangeQuery(field, goop1.image, goop2.image, true);
}
| ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
[ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )
<RANGEEX_END> )
[ <CARAT> boost=<NUMBER> ]
{
if (goop1.kind == RANGEEX_QUOTED) {
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
} else {
goop1.image = discardEscapeChar(goop1.image);
}
if (goop2.kind == RANGEEX_QUOTED) {
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
} else {
goop2.image = discardEscapeChar(goop2.image);
}
q = getRangeQuery(field, goop1.image, goop2.image, false);
}
| term=<QUOTED>
[ fuzzySlop=<FUZZY_SLOP> ]
[ <CARAT> boost=<NUMBER> ]
{
int s = phraseSlop;
if (fuzzySlop != null) {
try {
s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
}
catch (Exception ignored) { }
}
q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s);
}
)
{
if (boost != null) {
float f = (float) 1.0;
try {
f = Float.valueOf(boost.image).floatValue();
}
catch (Exception ignored) {
/* Should this be handled somehow? (defaults to "no boost", if
* boost number is invalid)
*/
}
// avoid boosting null queries, such as those caused by stop words
if (q != null) {
q.setBoost(f);
}
}
return q;
}
}

View File

@ -1,122 +0,0 @@
/* Generated By:JavaCC: Do not edit this line. PrecedenceQueryParserConstants.java */
package org.apache.lucene.queryParser.precedence;
/**
* Token literal values and constants.
* Generated by org.javacc.parser.OtherFilesGen#start()
*/
public interface PrecedenceQueryParserConstants {
/** End of File. */
int EOF = 0;
/** RegularExpression Id. */
int _NUM_CHAR = 1;
/** RegularExpression Id. */
int _ESCAPED_CHAR = 2;
/** RegularExpression Id. */
int _TERM_START_CHAR = 3;
/** RegularExpression Id. */
int _TERM_CHAR = 4;
/** RegularExpression Id. */
int _WHITESPACE = 5;
/** RegularExpression Id. */
int AND = 7;
/** RegularExpression Id. */
int OR = 8;
/** RegularExpression Id. */
int NOT = 9;
/** RegularExpression Id. */
int PLUS = 10;
/** RegularExpression Id. */
int MINUS = 11;
/** RegularExpression Id. */
int LPAREN = 12;
/** RegularExpression Id. */
int RPAREN = 13;
/** RegularExpression Id. */
int COLON = 14;
/** RegularExpression Id. */
int CARAT = 15;
/** RegularExpression Id. */
int QUOTED = 16;
/** RegularExpression Id. */
int TERM = 17;
/** RegularExpression Id. */
int FUZZY_SLOP = 18;
/** RegularExpression Id. */
int PREFIXTERM = 19;
/** RegularExpression Id. */
int WILDTERM = 20;
/** RegularExpression Id. */
int REGEXPTERM = 21;
/** RegularExpression Id. */
int RANGEIN_START = 22;
/** RegularExpression Id. */
int RANGEEX_START = 23;
/** RegularExpression Id. */
int NUMBER = 24;
/** RegularExpression Id. */
int RANGEIN_TO = 25;
/** RegularExpression Id. */
int RANGEIN_END = 26;
/** RegularExpression Id. */
int RANGEIN_QUOTED = 27;
/** RegularExpression Id. */
int RANGEIN_GOOP = 28;
/** RegularExpression Id. */
int RANGEEX_TO = 29;
/** RegularExpression Id. */
int RANGEEX_END = 30;
/** RegularExpression Id. */
int RANGEEX_QUOTED = 31;
/** RegularExpression Id. */
int RANGEEX_GOOP = 32;
/** Lexical state. */
int Boost = 0;
/** Lexical state. */
int RangeEx = 1;
/** Lexical state. */
int RangeIn = 2;
/** Lexical state. */
int DEFAULT = 3;
/** Literal token values. */
String[] tokenImage = {
"<EOF>",
"<_NUM_CHAR>",
"<_ESCAPED_CHAR>",
"<_TERM_START_CHAR>",
"<_TERM_CHAR>",
"<_WHITESPACE>",
"<token of kind 6>",
"<AND>",
"<OR>",
"<NOT>",
"\"+\"",
"\"-\"",
"\"(\"",
"\")\"",
"\":\"",
"\"^\"",
"<QUOTED>",
"<TERM>",
"<FUZZY_SLOP>",
"<PREFIXTERM>",
"<WILDTERM>",
"<REGEXPTERM>",
"\"[\"",
"\"{\"",
"<NUMBER>",
"\"TO\"",
"\"]\"",
"<RANGEIN_QUOTED>",
"<RANGEIN_GOOP>",
"\"TO\"",
"\"}\"",
"<RANGEEX_QUOTED>",
"<RANGEEX_GOOP>",
};
}

View File

@ -1,124 +0,0 @@
/* Generated By:JavaCC: Do not edit this line. Token.java Version 4.1 */
/* JavaCCOptions:TOKEN_EXTENDS=,KEEP_LINE_COL=null */
package org.apache.lucene.queryParser.precedence;
/**
* Describes the input token stream.
*/
public class Token {
/**
* An integer that describes the kind of this token. This numbering
* system is determined by JavaCCParser, and a table of these numbers is
* stored in the file ...Constants.java.
*/
public int kind;
/** The line number of the first character of this Token. */
public int beginLine;
/** The column number of the first character of this Token. */
public int beginColumn;
/** The line number of the last character of this Token. */
public int endLine;
/** The column number of the last character of this Token. */
public int endColumn;
/**
* The string image of the token.
*/
public String image;
/**
* A reference to the next regular (non-special) token from the input
* stream. If this is the last token from the input stream, or if the
* token manager has not read tokens beyond this one, this field is
* set to null. This is true only if this token is also a regular
* token. Otherwise, see below for a description of the contents of
* this field.
*/
public Token next;
/**
* This field is used to access special tokens that occur prior to this
* token, but after the immediately preceding regular (non-special) token.
* If there are no such special tokens, this field is set to null.
* When there are more than one such special token, this field refers
* to the last of these special tokens, which in turn refers to the next
* previous special token through its specialToken field, and so on
* until the first special token (whose specialToken field is null).
* The next fields of special tokens refer to other special tokens that
* immediately follow it (without an intervening regular token). If there
* is no such token, this field is null.
*/
public Token specialToken;
/**
* An optional attribute value of the Token.
* Tokens which are not used as syntactic sugar will often contain
* meaningful values that will be used later on by the compiler or
* interpreter. This attribute value is often different from the image.
* Any subclass of Token that actually wants to return a non-null value can
* override this method as appropriate.
*/
public Object getValue() {
return null;
}
/**
* No-argument constructor
*/
public Token() {}
/**
* Constructs a new token for the specified Image.
*/
public Token(int kind)
{
this(kind, null);
}
/**
* Constructs a new token for the specified Image and Kind.
*/
public Token(int kind, String image)
{
this.kind = kind;
this.image = image;
}
/**
* Returns the image.
*/
public String toString()
{
return image;
}
/**
* Returns a new Token object, by default. However, if you want, you
* can create and return subclass objects based on the value of ofKind.
* Simply add the cases to the switch for all those special cases.
* For example, if you have a subclass of Token called IDToken that
* you want to create if ofKind is ID, simply add something like :
*
* case MyParserConstants.ID : return new IDToken(ofKind, image);
*
* to the following switch statement. Then you can cast matchedToken
* variable to the appropriate type and use sit in your lexical actions.
*/
public static Token newToken(int ofKind, String image)
{
switch(ofKind)
{
default : return new Token(ofKind, image);
}
}
public static Token newToken(int ofKind)
{
return newToken(ofKind, null);
}
}
/* JavaCC - OriginalChecksum=0dc5808f2ab8aac8775ea9175fa2cb51 (do not edit this line) */

View File

@ -1,141 +0,0 @@
/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 4.1 */
/* JavaCCOptions: */
package org.apache.lucene.queryParser.precedence;
/** Token Manager Error. */
@SuppressWarnings("serial")
public class TokenMgrError extends Error
{
/*
* Ordinals for various reasons why an Error of this type can be thrown.
*/
/**
* Lexical error occurred.
*/
static final int LEXICAL_ERROR = 0;
/**
* An attempt was made to create a second instance of a static token manager.
*/
static final int STATIC_LEXER_ERROR = 1;
/**
* Tried to change to an invalid lexical state.
*/
static final int INVALID_LEXICAL_STATE = 2;
/**
* Detected (and bailed out of) an infinite loop in the token manager.
*/
static final int LOOP_DETECTED = 3;
/**
* Indicates the reason why the exception is thrown. It will have
* one of the above 4 values.
*/
int errorCode;
/**
* Replaces unprintable characters by their escaped (or unicode escaped)
* equivalents in the given string
*/
protected static final String addEscapes(String str) {
StringBuffer retval = new StringBuffer();
char ch;
for (int i = 0; i < str.length(); i++) {
switch (str.charAt(i))
{
case 0 :
continue;
case '\b':
retval.append("\\b");
continue;
case '\t':
retval.append("\\t");
continue;
case '\n':
retval.append("\\n");
continue;
case '\f':
retval.append("\\f");
continue;
case '\r':
retval.append("\\r");
continue;
case '\"':
retval.append("\\\"");
continue;
case '\'':
retval.append("\\\'");
continue;
case '\\':
retval.append("\\\\");
continue;
default:
if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
String s = "0000" + Integer.toString(ch, 16);
retval.append("\\u" + s.substring(s.length() - 4, s.length()));
} else {
retval.append(ch);
}
continue;
}
}
return retval.toString();
}
/**
* Returns a detailed message for the Error when it is thrown by the
* token manager to indicate a lexical error.
* Parameters :
* EOFSeen : indicates if EOF caused the lexical error
* curLexState : lexical state in which this error occurred
* errorLine : line number when the error occurred
* errorColumn : column number when the error occurred
* errorAfter : prefix that was seen before this error occurred
* curchar : the offending character
* Note: You can customize the lexical error message by modifying this method.
*/
protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) {
return("Lexical error at line " +
errorLine + ", column " +
errorColumn + ". Encountered: " +
(EOFSeen ? "<EOF> " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") +
"after : \"" + addEscapes(errorAfter) + "\"");
}
/**
* You can also modify the body of this method to customize your error messages.
* For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not
* of end-users concern, so you can return something like :
*
* "Internal Error : Please file a bug report .... "
*
* from this method for such cases in the release version of your parser.
*/
public String getMessage() {
return super.getMessage();
}
/*
* Constructors of various flavors follow.
*/
/** No arg constructor. */
public TokenMgrError() {
}
/** Constructor with message and reason. */
public TokenMgrError(String message, int reason) {
super(message);
errorCode = reason;
}
/** Full Constructor. */
public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) {
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
}
/* JavaCC - OriginalChecksum=257b82f2650841e86289a309cb3dae76 (do not edit this line) */

View File

@ -16,7 +16,24 @@
limitations under the License.
-->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
QueryParser designed to handle operator precedence in a more sensible fashion than the default QueryParser.
This package contains the Precedence Query Parser Implementation
<h2>Lucene Precedence Query Parser</h2>
<p>
The Precedence Query Parser extends the Standard Query Parser and enables
the boolean precedence. So, the query <a AND b OR c AND d> is parsed to
<(+a +b) (+c +d)> instead of <+a +b +c +d>.
</p>
<p>
Check {@link org.apache.lucene.queryParser.standard.StandardQueryParser} for more details about the
supported syntax and query parser functionalities.
</p>
</body>
</html>

View File

@ -0,0 +1,138 @@
package org.apache.lucene.queryParser.precedence.processors;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.queryParser.core.QueryNodeException;
import org.apache.lucene.queryParser.core.nodes.AndQueryNode;
import org.apache.lucene.queryParser.core.nodes.BooleanQueryNode;
import org.apache.lucene.queryParser.core.nodes.ModifierQueryNode;
import org.apache.lucene.queryParser.core.nodes.OrQueryNode;
import org.apache.lucene.queryParser.core.nodes.QueryNode;
import org.apache.lucene.queryParser.core.nodes.ModifierQueryNode.Modifier;
import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl;
import org.apache.lucene.queryParser.precedence.PrecedenceQueryParser;
import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute;
import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute.Operator;
/**
* <p>
* This processor is used to apply the correct {@link ModifierQueryNode} to {@link BooleanQueryNode}s children.
* </p>
* <p>
* It walks through the query node tree looking for {@link BooleanQueryNode}s. If an {@link AndQueryNode} is found,
* every child, which is not a {@link ModifierQueryNode} or the {@link ModifierQueryNode}
* is {@link Modifier#MOD_NONE}, becomes a {@link Modifier#MOD_REQ}. For any other
* {@link BooleanQueryNode} which is not an {@link OrQueryNode}, it checks the default operator is {@link Operator#AND},
* if it is, the same operation when an {@link AndQueryNode} is found is applied to it.
* </p>
*
* @see DefaultOperatorAttribute
* @see PrecedenceQueryParser#setDefaultOperator
*/
public class BooleanModifiersQueryNodeProcessor extends QueryNodeProcessorImpl {
private ArrayList<QueryNode> childrenBuffer = new ArrayList<QueryNode>();
private Boolean usingAnd = false;
public BooleanModifiersQueryNodeProcessor() {
// empty constructor
}
@Override
public QueryNode process(QueryNode queryTree) throws QueryNodeException {
if (!getQueryConfigHandler().hasAttribute(DefaultOperatorAttribute.class)) {
throw new IllegalArgumentException(
"DefaultOperatorAttribute should be set on the QueryConfigHandler");
}
this.usingAnd = Operator.AND == getQueryConfigHandler().getAttribute(
DefaultOperatorAttribute.class).getOperator();
return super.process(queryTree);
}
@Override
protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException {
if (node instanceof AndQueryNode) {
this.childrenBuffer.clear();
List<QueryNode> children = node.getChildren();
for (QueryNode child : children) {
this.childrenBuffer.add(applyModifier(child, Modifier.MOD_REQ));
}
node.set(this.childrenBuffer);
} else if (this.usingAnd && node instanceof BooleanQueryNode
&& !(node instanceof OrQueryNode)) {
this.childrenBuffer.clear();
List<QueryNode> children = node.getChildren();
for (QueryNode child : children) {
this.childrenBuffer.add(applyModifier(child, Modifier.MOD_REQ));
}
node.set(this.childrenBuffer);
}
return node;
}
private QueryNode applyModifier(QueryNode node, Modifier mod) {
// check if modifier is not already defined and is default
if (!(node instanceof ModifierQueryNode)) {
return new ModifierQueryNode(node, mod);
} else {
ModifierQueryNode modNode = (ModifierQueryNode) node;
if (modNode.getModifier() == Modifier.MOD_NONE) {
return new ModifierQueryNode(modNode.getChild(), mod);
}
}
return node;
}
@Override
protected QueryNode preProcessNode(QueryNode node) throws QueryNodeException {
return node;
}
@Override
protected List<QueryNode> setChildrenOrder(List<QueryNode> children)
throws QueryNodeException {
return children;
}
}

View File

@ -0,0 +1,59 @@
package org.apache.lucene.queryParser.precedence.processors;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.queryParser.core.config.QueryConfigHandler;
import org.apache.lucene.queryParser.precedence.PrecedenceQueryParser;
import org.apache.lucene.queryParser.standard.processors.GroupQueryNodeProcessor;
import org.apache.lucene.queryParser.standard.processors.StandardQueryNodeProcessorPipeline;
/**
* <p>
* This processor pipeline extends {@link StandardQueryNodeProcessorPipeline} and enables
* boolean precedence on it.
* </p>
* <p>
* EXPERT: the precedence is enabled by removing {@link GroupQueryNodeProcessor} from the
* {@link StandardQueryNodeProcessorPipeline} and appending {@link BooleanModifiersQueryNodeProcessor}
* to the pipeline.
* </p>
*
* @see PrecedenceQueryParser
* @see StandardQueryNodeProcessorPipeline
*/
public class PrecedenceQueryNodeProcessorPipeline extends StandardQueryNodeProcessorPipeline {
/**
* @see StandardQueryNodeProcessorPipeline#StandardQueryNodeProcessorPipeline(QueryConfigHandler)
*/
public PrecedenceQueryNodeProcessorPipeline(QueryConfigHandler queryConfig) {
super(queryConfig);
for (int i = 0 ; i < size() ; i++) {
if (get(i).getClass().equals(GroupQueryNodeProcessor.class)) {
remove(i--);
}
}
add(new BooleanModifiersQueryNodeProcessor());
}
}

View File

@ -0,0 +1,47 @@
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
This package contains the processors used by Precedence Query Parser
<h2>Lucene Precedence Query Parser Processors</h2>
<p>
This package contains the 2 {@link org.apache.lucene.queryParser.core.processors.QueryNodeProcessor}s used by
{@link org.apache.lucene.queryParser.precedence.PrecedenceQueryParser}.
</p>
<p>
{@link org.apache.lucene.queryParser.precedence.processors.BooleanModifiersQueryNodeProcessor}: this processor
is used to apply {@link org.apache.lucene.queryParser.core.nodes.ModifierQueryNode}s on
{@link org.apache.lucene.queryParser.core.nodes.BooleanQueryNode} children according to the boolean type
or the default operator.
</p>
<p>
{@link org.apache.lucene.queryParser.precedence.processors.PrecedenceQueryNodeProcessorPipeline}: this
processor pipeline is used by {@link org.apache.lucene.queryParser.precedence.PrecedenceQueryParser}. It extends
{@link org.apache.lucene.queryParser.standard.processors.StandardQueryNodeProcessorPipeline} and rearrange
the pipeline so the boolean precedence is processed correctly. Check {@link org.apache.lucene.queryParser.precedence.processors.PrecedenceQueryNodeProcessorPipeline}
for more details.
</p>
</body>
</html>

View File

@ -17,67 +17,82 @@ package org.apache.lucene.queryParser.precedence;
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import java.text.DateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.queryParser.TestQueryParser;
import org.apache.lucene.queryParser.core.QueryNodeException;
import org.apache.lucene.queryParser.core.QueryNodeParseException;
import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute.Operator;
import org.apache.lucene.queryParser.standard.parser.ParseException;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import java.io.IOException;
import java.io.Reader;
import java.text.DateFormat;
import java.util.Calendar;
import java.util.GregorianCalendar;
/**
* <p>
* This test case tests {@link PrecedenceQueryParser}.
* </p>
* <p>
* It contains all tests from {@link TestQueryParser} with some adjusted to
* fit the precedence requirement, plus some precedence test cases.
* </p>
*
* @see TestQueryParser
*/
public class TestPrecedenceQueryParser extends LuceneTestCase {
public static Analyzer qpAnalyzer = new QPTestAnalyzer();
public static final class QPTestFilter extends TokenFilter {
/**
* Filter which discards the token 'stop' and which expands the
* token 'phrase' into 'phrase1 phrase2'
* Filter which discards the token 'stop' and which expands the token
* 'phrase' into 'phrase1 phrase2'
*/
public QPTestFilter(TokenStream in) {
super(in);
}
boolean inPhrase = false;
int savedStart = 0, savedEnd = 0;
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
@Override
public boolean incrementToken() throws IOException {
clearAttributes();
if (inPhrase) {
inPhrase = false;
termAtt.setEmpty().append("phrase2");
offsetAtt.setOffset(savedStart, savedEnd);
return true;
} else
while(input.incrementToken())
while (input.incrementToken())
if (termAtt.toString().equals("phrase")) {
inPhrase = true;
savedStart = offsetAtt.startOffset();
@ -94,31 +109,13 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
public static final class QPTestAnalyzer extends Analyzer {
/** Filters MockTokenizer with StopFilter. */
@Override
public final TokenStream tokenStream(String fieldName, Reader reader) {
return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
}
}
public static class QPTestParser extends PrecedenceQueryParser {
public QPTestParser(String f, Analyzer a) {
super(f, a);
}
@Override
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException {
throw new ParseException("Fuzzy queries not allowed");
}
@Override
protected Query getWildcardQuery(String field, String termStr) throws ParseException {
throw new ParseException("Wildcard queries not allowed");
}
}
private int originalMaxClauses;
@Override
public void setUp() throws Exception {
super.setUp();
originalMaxClauses = BooleanQuery.getMaxClauseCount();
@ -127,40 +124,31 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
public PrecedenceQueryParser getParser(Analyzer a) throws Exception {
if (a == null)
a = new MockAnalyzer(MockTokenizer.SIMPLE, true);
PrecedenceQueryParser qp = new PrecedenceQueryParser("field", a);
qp.setDefaultOperator(PrecedenceQueryParser.OR_OPERATOR);
PrecedenceQueryParser qp = new PrecedenceQueryParser();
qp.setAnalyzer(a);
qp.setDefaultOperator(Operator.OR);
return qp;
}
public Query getQuery(String query, Analyzer a) throws Exception {
return getParser(a).parse(query);
return getParser(a).parse(query, "field");
}
public void assertQueryEquals(String query, Analyzer a, String result)
throws Exception {
throws Exception {
Query q = getQuery(query, a);
String s = q.toString("field");
if (!s.equals(result)) {
fail("Query /" + query + "/ yielded /" + s
+ "/, expecting /" + result + "/");
fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result
+ "/");
}
}
public void assertWildcardQueryEquals(String query, boolean lowercase, String result)
throws Exception {
public void assertWildcardQueryEquals(String query, boolean lowercase,
String result) throws Exception {
PrecedenceQueryParser qp = getParser(null);
qp.setLowercaseExpandedTerms(lowercase);
Query q = qp.parse(query);
String s = q.toString("field");
if (!s.equals(result)) {
fail("WildcardQuery /" + query + "/ yielded /" + s
+ "/, expecting /" + result + "/");
}
}
public void assertWildcardQueryEquals(String query, String result) throws Exception {
PrecedenceQueryParser qp = getParser(null);
Query q = qp.parse(query);
Query q = qp.parse(query, "field");
String s = q.toString("field");
if (!s.equals(result)) {
fail("WildcardQuery /" + query + "/ yielded /" + s + "/, expecting /"
@ -168,36 +156,41 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
}
}
public Query getQueryDOA(String query, Analyzer a)
throws Exception {
if (a == null)
a = new MockAnalyzer(MockTokenizer.SIMPLE, true);
PrecedenceQueryParser qp = new PrecedenceQueryParser("field", a);
qp.setDefaultOperator(PrecedenceQueryParser.AND_OPERATOR);
return qp.parse(query);
}
public void assertQueryEqualsDOA(String query, Analyzer a, String result)
throws Exception {
Query q = getQueryDOA(query, a);
public void assertWildcardQueryEquals(String query, String result)
throws Exception {
PrecedenceQueryParser qp = getParser(null);
Query q = qp.parse(query, "field");
String s = q.toString("field");
if (!s.equals(result)) {
fail("Query /" + query + "/ yielded /" + s
+ "/, expecting /" + result + "/");
fail("WildcardQuery /" + query + "/ yielded /" + s + "/, expecting /"
+ result + "/");
}
}
// failing tests disabled since PrecedenceQueryParser
// is currently unmaintained
public void _testSimple() throws Exception {
assertQueryEquals("", null, "");
public Query getQueryDOA(String query, Analyzer a) throws Exception {
if (a == null)
a = new MockAnalyzer(MockTokenizer.SIMPLE, true);
PrecedenceQueryParser qp = new PrecedenceQueryParser();
qp.setAnalyzer(a);
qp.setDefaultOperator(Operator.AND);
return qp.parse(query, "field");
}
public void assertQueryEqualsDOA(String query, Analyzer a, String result)
throws Exception {
Query q = getQueryDOA(query, a);
String s = q.toString("field");
if (!s.equals(result)) {
fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result
+ "/");
}
}
public void testSimple() throws Exception {
assertQueryEquals("term term term", null, "term term term");
assertQueryEquals("türm term term", null, "türm term term");
assertQueryEquals("ümlaut", null, "ümlaut");
assertQueryEquals("+a", null, "+a");
assertQueryEquals("-a", null, "-a");
assertQueryEquals("a AND b", null, "+a +b");
assertQueryEquals("(a AND b)", null, "+a +b");
assertQueryEquals("c OR (a AND b)", null, "c (+a +b)");
@ -212,9 +205,9 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
assertQueryEquals("+term -term term", null, "+term -term term");
assertQueryEquals("foo:term AND field:anotherTerm", null,
"+foo:term +anotherterm");
"+foo:term +anotherterm");
assertQueryEquals("term AND \"phrase phrase\"", null,
"+term +\"phrase phrase\"");
"+term +\"phrase phrase\"");
assertQueryEquals("\"hello there\"", null, "\"hello there\"");
assertTrue(getQuery("a AND b", null) instanceof BooleanQuery);
assertTrue(getQuery("hello", null) instanceof TermQuery);
@ -229,25 +222,25 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
assertQueryEquals("\"term germ\"^2", null, "\"term germ\"^2.0");
assertQueryEquals("(foo OR bar) AND (baz OR boo)", null,
"+(foo bar) +(baz boo)");
assertQueryEquals("((a OR b) AND NOT c) OR d", null,
"(+(a b) -c) d");
"+(foo bar) +(baz boo)");
assertQueryEquals("((a OR b) AND NOT c) OR d", null, "(+(a b) -c) d");
assertQueryEquals("+(apple \"steve jobs\") -(foo bar baz)", null,
"+(apple \"steve jobs\") -(foo bar baz)");
"+(apple \"steve jobs\") -(foo bar baz)");
assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null,
"+(title:dog title:cat) -author:\"bob dole\"");
PrecedenceQueryParser qp = new PrecedenceQueryParser("field", new MockAnalyzer());
// make sure OR is the default:
assertEquals(PrecedenceQueryParser.OR_OPERATOR, qp.getDefaultOperator());
qp.setDefaultOperator(PrecedenceQueryParser.AND_OPERATOR);
assertEquals(PrecedenceQueryParser.AND_OPERATOR, qp.getDefaultOperator());
qp.setDefaultOperator(PrecedenceQueryParser.OR_OPERATOR);
assertEquals(PrecedenceQueryParser.OR_OPERATOR, qp.getDefaultOperator());
"+(title:dog title:cat) -author:\"bob dole\"");
assertQueryEquals("a OR !b", null, "a (-b)");
assertQueryEquals("a OR ! b", null, "a (-b)");
assertQueryEquals("a OR -b", null, "a (-b)");
PrecedenceQueryParser qp = new PrecedenceQueryParser();
qp.setAnalyzer(new MockAnalyzer());
// make sure OR is the default:
assertEquals(Operator.OR, qp.getDefaultOperator());
qp.setDefaultOperator(Operator.AND);
assertEquals(Operator.AND, qp.getDefaultOperator());
qp.setDefaultOperator(Operator.OR);
assertEquals(Operator.OR, qp.getDefaultOperator());
assertQueryEquals("a OR !b", null, "a -b");
assertQueryEquals("a OR ! b", null, "a -b");
assertQueryEquals("a OR -b", null, "a -b");
}
public void testPunct() throws Exception {
@ -266,110 +259,24 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
}
public void testNumber() throws Exception {
// The numbers go away because SimpleAnalzyer ignores them
// The numbers go away because SimpleAnalzyer ignores them
assertQueryEquals("3", null, "");
assertQueryEquals("term 1.0 1 2", null, "term");
assertQueryEquals("term term1 term2", null, "term term term");
Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, true);
Analyzer a = new MockAnalyzer();
assertQueryEquals("3", a, "3");
assertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2");
assertQueryEquals("term term1 term2", a, "term term1 term2");
}
//individual CJK chars as terms, like StandardAnalyzer
private class SimpleCJKTokenizer extends Tokenizer {
private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
public SimpleCJKTokenizer(Reader input) {
super(input);
}
@Override
public boolean incrementToken() throws IOException {
int ch = input.read();
if (ch < 0)
return false;
clearAttributes();
termAtt.setEmpty().append((char) ch);
return true;
}
}
private class SimpleCJKAnalyzer extends Analyzer {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
return new SimpleCJKTokenizer(reader);
}
}
public void testCJKTerm() throws Exception {
// individual CJK chars as terms
SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
BooleanQuery expected = new BooleanQuery();
expected.add(new TermQuery(new Term("field", "")), BooleanClause.Occur.SHOULD);
expected.add(new TermQuery(new Term("field", "")), BooleanClause.Occur.SHOULD);
assertEquals(expected, getQuery("中国", analyzer));
}
public void testCJKBoostedTerm() throws Exception {
// individual CJK chars as terms
SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
BooleanQuery expected = new BooleanQuery();
expected.setBoost(0.5f);
expected.add(new TermQuery(new Term("field", "")), BooleanClause.Occur.SHOULD);
expected.add(new TermQuery(new Term("field", "")), BooleanClause.Occur.SHOULD);
assertEquals(expected, getQuery("中国^0.5", analyzer));
}
public void testCJKPhrase() throws Exception {
// individual CJK chars as terms
SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
PhraseQuery expected = new PhraseQuery();
expected.add(new Term("field", ""));
expected.add(new Term("field", ""));
assertEquals(expected, getQuery("\"中国\"", analyzer));
}
public void testCJKBoostedPhrase() throws Exception {
// individual CJK chars as terms
SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
PhraseQuery expected = new PhraseQuery();
expected.setBoost(0.5f);
expected.add(new Term("field", ""));
expected.add(new Term("field", ""));
assertEquals(expected, getQuery("\"中国\"^0.5", analyzer));
}
public void testCJKSloppyPhrase() throws Exception {
// individual CJK chars as terms
SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer();
PhraseQuery expected = new PhraseQuery();
expected.setSlop(3);
expected.add(new Term("field", ""));
expected.add(new Term("field", ""));
assertEquals(expected, getQuery("\"中国\"~3", analyzer));
}
// failing tests disabled since PrecedenceQueryParser
// is currently unmaintained
public void _testWildcard() throws Exception {
public void testWildcard() throws Exception {
assertQueryEquals("term*", null, "term*");
assertQueryEquals("term*^2", null, "term*^2.0");
assertQueryEquals("term~", null, "term~0.5");
assertQueryEquals("term~", null, "term~2.0");
assertQueryEquals("term~0.7", null, "term~0.7");
assertQueryEquals("term~^2", null, "term^2.0~0.5");
assertQueryEquals("term^2~", null, "term^2.0~0.5");
assertQueryEquals("term~^3", null, "term~2.0^3.0");
assertQueryEquals("term^3~", null, "term~2.0^3.0");
assertQueryEquals("term*germ", null, "term*germ");
assertQueryEquals("term*germ^3", null, "term*germ^3.0");
@ -377,24 +284,25 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
assertTrue(getQuery("term*^2", null) instanceof PrefixQuery);
assertTrue(getQuery("term~", null) instanceof FuzzyQuery);
assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery);
FuzzyQuery fq = (FuzzyQuery)getQuery("term~0.7", null);
FuzzyQuery fq = (FuzzyQuery) getQuery("term~0.7", null);
assertEquals(0.7f, fq.getMinSimilarity(), 0.1f);
assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
fq = (FuzzyQuery)getQuery("term~", null);
assertEquals(0.5f, fq.getMinSimilarity(), 0.1f);
fq = (FuzzyQuery) getQuery("term~", null);
assertEquals(2.0f, fq.getMinSimilarity(), 0.1f);
assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength());
try {
getQuery("term~1.1", null); // value > 1, throws exception
getQuery("term~1.1", null); // value > 1, throws exception
fail();
} catch(ParseException pe) {
} catch (ParseException pe) {
// expected exception
}
assertTrue(getQuery("term*germ", null) instanceof WildcardQuery);
/* Tests to see that wild card terms are (or are not) properly
* lower-cased with propery parser configuration
*/
// First prefix queries:
/*
* Tests to see that wild card terms are (or are not) properly lower-cased
* with propery parser configuration
*/
// First prefix queries:
// by default, convert to lowercase:
assertWildcardQueryEquals("Term*", true, "term*");
// explicitly set lowercase:
@ -405,7 +313,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
assertWildcardQueryEquals("term*", false, "term*");
assertWildcardQueryEquals("Term*", false, "Term*");
assertWildcardQueryEquals("TERM*", false, "TERM*");
// Then 'full' wildcard queries:
// Then 'full' wildcard queries:
// by default, convert to lowercase:
assertWildcardQueryEquals("Te?m", "te?m");
// explicitly set lowercase:
@ -418,11 +326,11 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
assertWildcardQueryEquals("Te?m", false, "Te?m");
assertWildcardQueryEquals("TE?M", false, "TE?M");
assertWildcardQueryEquals("Te?m*gerM", false, "Te?m*gerM");
// Fuzzy queries:
assertWildcardQueryEquals("Term~", "term~0.5");
assertWildcardQueryEquals("Term~", true, "term~0.5");
assertWildcardQueryEquals("Term~", false, "Term~0.5");
// Range queries:
// Fuzzy queries:
assertWildcardQueryEquals("Term~", "term~2.0");
assertWildcardQueryEquals("Term~", true, "term~2.0");
assertWildcardQueryEquals("Term~", false, "Term~2.0");
// Range queries:
assertWildcardQueryEquals("[A TO C]", "[a TO c]");
assertWildcardQueryEquals("[A TO C]", true, "[a TO c]");
assertWildcardQueryEquals("[A TO C]", false, "[A TO C]");
@ -434,11 +342,11 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
assertQueryEquals("term -stop term", qpAnalyzer, "term term");
assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll");
assertQueryEquals("term phrase term", qpAnalyzer,
"term (phrase1 phrase2) term");
"term (phrase1 phrase2) term");
// note the parens in this next assertion differ from the original
// QueryParser behavior
assertQueryEquals("term AND NOT phrase term", qpAnalyzer,
"(+term -(phrase1 phrase2)) term");
"(+term -(phrase1 phrase2)) term");
assertQueryEquals("stop", qpAnalyzer, "");
assertQueryEquals("stop OR stop AND stop", qpAnalyzer, "");
assertTrue(getQuery("term term term", qpAnalyzer) instanceof BooleanQuery);
@ -455,9 +363,10 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
assertQueryEquals("[ a TO z] OR bar", null, "[a TO z] bar");
assertQueryEquals("[ a TO z] AND bar", null, "+[a TO z] +bar");
assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}");
assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})");
assertQueryEquals("gack ( bar blar { a TO z}) ", null,
"gack (bar blar {a TO z})");
}
private String escapeDateString(String s) {
if (s.contains(" ")) {
return "\"" + s + "\"";
@ -471,53 +380,106 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
return DateTools.dateToString(df.parse(s), DateTools.Resolution.DAY);
}
public String getLocalizedDate(int year, int month, int day) {
private String getLocalizedDate(int year, int month, int day,
boolean extendLastDate) {
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
Calendar calendar = new GregorianCalendar();
calendar.clear();
calendar.set(year, month, day);
calendar.set(Calendar.HOUR_OF_DAY, 23);
calendar.set(Calendar.MINUTE, 59);
calendar.set(Calendar.SECOND, 59);
calendar.set(Calendar.MILLISECOND, 999);
if (extendLastDate) {
calendar.set(Calendar.HOUR_OF_DAY, 23);
calendar.set(Calendar.MINUTE, 59);
calendar.set(Calendar.SECOND, 59);
calendar.set(Calendar.MILLISECOND, 999);
}
return df.format(calendar.getTime());
}
public void testDateRange() throws Exception {
String startDate = getLocalizedDate(2002, 1, 1);
String endDate = getLocalizedDate(2002, 1, 4);
assertQueryEquals("[ " + escapeDateString(startDate) + " TO " + escapeDateString(endDate) + "]", null,
"[" + getDate(startDate) + " TO " + getDate(endDate) + "]");
assertQueryEquals("{ " + escapeDateString(startDate) + " " + escapeDateString(endDate) + " }", null,
"{" + getDate(startDate) + " TO " + getDate(endDate) + "}");
String startDate = getLocalizedDate(2002, 1, 1, false);
String endDate = getLocalizedDate(2002, 1, 4, false);
Calendar endDateExpected = new GregorianCalendar();
endDateExpected.set(2002, 1, 4, 23, 59, 59);
endDateExpected.set(Calendar.MILLISECOND, 999);
final String defaultField = "default";
final String monthField = "month";
final String hourField = "hour";
PrecedenceQueryParser qp = new PrecedenceQueryParser(new MockAnalyzer());
// Don't set any date resolution and verify if DateField is used
assertDateRangeQueryEquals(qp, defaultField, startDate, endDate,
endDateExpected.getTime(), null);
Map<CharSequence, DateTools.Resolution> fieldMap = new HashMap<CharSequence,DateTools.Resolution>();
// set a field specific date resolution
fieldMap.put(monthField, DateTools.Resolution.MONTH);
qp.setDateResolution(fieldMap);
// DateField should still be used for defaultField
assertDateRangeQueryEquals(qp, defaultField, startDate, endDate,
endDateExpected.getTime(), null);
// set default date resolution to MILLISECOND
qp.setDateResolution(DateTools.Resolution.MILLISECOND);
// set second field specific date resolution
fieldMap.put(hourField, DateTools.Resolution.HOUR);
qp.setDateResolution(fieldMap);
// for this field no field specific date resolution has been set,
// so verify if the default resolution is used
assertDateRangeQueryEquals(qp, defaultField, startDate, endDate,
endDateExpected.getTime(), DateTools.Resolution.MILLISECOND);
// verify if field specific date resolutions are used for these two fields
assertDateRangeQueryEquals(qp, monthField, startDate, endDate,
endDateExpected.getTime(), DateTools.Resolution.MONTH);
assertDateRangeQueryEquals(qp, hourField, startDate, endDate,
endDateExpected.getTime(), DateTools.Resolution.HOUR);
}
/** for testing DateTools support */
private String getDate(String s, DateTools.Resolution resolution)
throws Exception {
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
return getDate(df.parse(s), resolution);
}
/** for testing DateTools support */
private String getDate(Date d, DateTools.Resolution resolution)
throws Exception {
if (resolution == null) {
return DateField.dateToString(d);
} else {
return DateTools.dateToString(d, resolution);
}
}
public void assertQueryEquals(PrecedenceQueryParser qp, String field, String query,
String result) throws Exception {
Query q = qp.parse(query, field);
String s = q.toString(field);
if (!s.equals(result)) {
fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result
+ "/");
}
}
public void assertDateRangeQueryEquals(PrecedenceQueryParser qp, String field,
String startDate, String endDate, Date endDateInclusive,
DateTools.Resolution resolution) throws Exception {
assertQueryEquals(qp, field, field + ":[" + escapeDateString(startDate)
+ " TO " + escapeDateString(endDate) + "]", "["
+ getDate(startDate, resolution) + " TO "
+ getDate(endDateInclusive, resolution) + "]");
assertQueryEquals(qp, field, field + ":{" + escapeDateString(startDate)
+ " TO " + escapeDateString(endDate) + "}", "{"
+ getDate(startDate, resolution) + " TO "
+ getDate(endDate, resolution) + "}");
}
public void testEscaped() throws Exception {
Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false);
/*assertQueryEquals("\\[brackets", a, "\\[brackets");
assertQueryEquals("\\[brackets", null, "brackets");
assertQueryEquals("\\\\", a, "\\\\");
assertQueryEquals("\\+blah", a, "\\+blah");
assertQueryEquals("\\(blah", a, "\\(blah");
assertQueryEquals("\\-blah", a, "\\-blah");
assertQueryEquals("\\!blah", a, "\\!blah");
assertQueryEquals("\\{blah", a, "\\{blah");
assertQueryEquals("\\}blah", a, "\\}blah");
assertQueryEquals("\\:blah", a, "\\:blah");
assertQueryEquals("\\^blah", a, "\\^blah");
assertQueryEquals("\\[blah", a, "\\[blah");
assertQueryEquals("\\]blah", a, "\\]blah");
assertQueryEquals("\\\"blah", a, "\\\"blah");
assertQueryEquals("\\(blah", a, "\\(blah");
assertQueryEquals("\\)blah", a, "\\)blah");
assertQueryEquals("\\~blah", a, "\\~blah");
assertQueryEquals("\\*blah", a, "\\*blah");
assertQueryEquals("\\?blah", a, "\\?blah");
//assertQueryEquals("foo \\&\\& bar", a, "foo \\&\\& bar");
//assertQueryEquals("foo \\|| bar", a, "foo \\|| bar");
//assertQueryEquals("foo \\AND bar", a, "foo \\AND bar");*/
assertQueryEquals("a\\-b:c", a, "a-b:c");
assertQueryEquals("a\\+b:c", a, "a+b:c");
@ -551,44 +513,29 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
assertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]");
}
public void testTabNewlineCarriageReturn()
throws Exception {
assertQueryEqualsDOA("+weltbank +worlbank", null,
"+weltbank +worlbank");
public void testTabNewlineCarriageReturn() throws Exception {
assertQueryEqualsDOA("+weltbank +worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("+weltbank\n+worlbank", null,
"+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \n+worlbank", null,
"+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \n +worlbank", null,
"+weltbank +worlbank");
assertQueryEqualsDOA("+weltbank\n+worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \n+worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \n +worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("+weltbank\r+worlbank", null,
"+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \r+worlbank", null,
"+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \r +worlbank", null,
"+weltbank +worlbank");
assertQueryEqualsDOA("+weltbank\r+worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \r+worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \r +worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("+weltbank\r\n+worlbank", null,
"+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \r\n+worlbank", null,
"+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \r\n +worlbank", null,
"+weltbank +worlbank");
assertQueryEqualsDOA("+weltbank\r\n+worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \r\n+worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \r\n +worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \r \n +worlbank", null,
"+weltbank +worlbank");
"+weltbank +worlbank");
assertQueryEqualsDOA("+weltbank\t+worlbank", null,
"+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \t+worlbank", null,
"+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \t +worlbank", null,
"+weltbank +worlbank");
assertQueryEqualsDOA("+weltbank\t+worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \t+worlbank", null, "+weltbank +worlbank");
assertQueryEqualsDOA("weltbank \t +worlbank", null, "+weltbank +worlbank");
}
public void testSimpleDAO()
throws Exception {
public void testSimpleDAO() throws Exception {
assertQueryEqualsDOA("term term term", null, "+term +term +term");
assertQueryEqualsDOA("term +term term", null, "+term +term +term");
assertQueryEqualsDOA("term term +term", null, "+term +term +term");
@ -596,23 +543,25 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
assertQueryEqualsDOA("-term term term", null, "-term +term +term");
}
public void testBoost()
throws Exception {
public void testBoost() throws Exception {
CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on"));
Analyzer oneStopAnalyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true);
PrecedenceQueryParser qp = new PrecedenceQueryParser("field", oneStopAnalyzer);
Query q = qp.parse("on^1.0");
PrecedenceQueryParser qp = new PrecedenceQueryParser();
qp.setAnalyzer(oneStopAnalyzer);
Query q = qp.parse("on^1.0", "field");
assertNotNull(q);
q = qp.parse("\"hello\"^2.0");
q = qp.parse("\"hello\"^2.0", "field");
assertNotNull(q);
assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
q = qp.parse("hello^2.0");
q = qp.parse("hello^2.0", "field");
assertNotNull(q);
assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
q = qp.parse("\"on\"^1.0");
q = qp.parse("\"on\"^1.0", "field");
assertNotNull(q);
q = getParser(new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)).parse("the^3");
q = getParser(new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)).parse("the^3",
"field");
assertNotNull(q);
}
@ -620,105 +569,75 @@ public class TestPrecedenceQueryParser extends LuceneTestCase {
try {
assertQueryEquals("\"some phrase", null, "abc");
fail("ParseException expected, not thrown");
} catch (ParseException expected) {
} catch (QueryNodeParseException expected) {
}
}
public void testCustomQueryParserWildcard() {
try {
new QPTestParser("contents", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("a?t");
} catch (ParseException expected) {
return;
}
fail("Wildcard queries should not be allowed");
}
public void testCustomQueryParserFuzzy() throws Exception {
try {
new QPTestParser("contents", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("xunit~");
} catch (ParseException expected) {
return;
}
fail("Fuzzy queries should not be allowed");
}
public void testBooleanQuery() throws Exception {
BooleanQuery.setMaxClauseCount(2);
try {
getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("one two three");
getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("one two three", "field");
fail("ParseException expected due to too many boolean clauses");
} catch (ParseException expected) {
} catch (QueryNodeException expected) {
// too many boolean clauses, so ParseException is expected
}
}
/**
* This test differs from the original QueryParser, showing how the
* precedence issue has been corrected.
* This test differs from the original QueryParser, showing how the precedence
* issue has been corrected.
*/
// failing tests disabled since PrecedenceQueryParser
// is currently unmaintained
public void _testPrecedence() throws Exception {
public void testPrecedence() throws Exception {
PrecedenceQueryParser parser = getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
Query query1 = parser.parse("A AND B OR C AND D");
Query query2 = parser.parse("(A AND B) OR (C AND D)");
Query query1 = parser.parse("A AND B OR C AND D", "field");
Query query2 = parser.parse("(A AND B) OR (C AND D)", "field");
assertEquals(query1, query2);
query1 = parser.parse("A OR B C");
query2 = parser.parse("A B C");
query1 = parser.parse("A OR B C", "field");
query2 = parser.parse("(A B) C", "field");
assertEquals(query1, query2);
query1 = parser.parse("A AND B C");
query2 = parser.parse("(+A +B) C");
query1 = parser.parse("A AND B C", "field");
query2 = parser.parse("(+A +B) C", "field");
assertEquals(query1, query2);
query1 = parser.parse("A AND NOT B");
query2 = parser.parse("+A -B");
query1 = parser.parse("A AND NOT B", "field");
query2 = parser.parse("+A -B", "field");
assertEquals(query1, query2);
query1 = parser.parse("A OR NOT B");
query2 = parser.parse("A -B");
query1 = parser.parse("A OR NOT B", "field");
query2 = parser.parse("A -B", "field");
assertEquals(query1, query2);
query1 = parser.parse("A OR NOT B AND C");
query2 = parser.parse("A (-B +C)");
query1 = parser.parse("A OR NOT B AND C", "field");
query2 = parser.parse("A (-B +C)", "field");
assertEquals(query1, query2);
}
public void testRegexps() throws Exception {
PrecedenceQueryParser qp = getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]"));
assertEquals(q, qp.parse("/[a-z][123]/"));
qp.setLowercaseExpandedTerms(true);
assertEquals(q, qp.parse("/[A-Z][123]/"));
q.setBoost(0.5f);
assertEquals(q, qp.parse("/[A-Z][123]/^0.5"));
qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
assertTrue(qp.parse("/[A-Z][123]/^0.5") instanceof RegexpQuery);
assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)qp.parse("/[A-Z][123]/^0.5")).getRewriteMethod());
assertEquals(q, qp.parse("/[A-Z][123]/^0.5"));
qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]"));
assertEquals(escaped, qp.parse("/[a-z]\\/[123]/"));
Query escaped2 = new RegexpQuery(new Term("field", "[a-z]\\*[123]"));
assertEquals(escaped2, qp.parse("/[a-z]\\*[123]/"));
parser.setDefaultOperator(Operator.AND);
query1 = parser.parse("A AND B OR C AND D", "field");
query2 = parser.parse("(A AND B) OR (C AND D)", "field");
assertEquals(query1, query2);
query1 = parser.parse("A AND B C", "field");
query2 = parser.parse("(A B) C", "field");
assertEquals(query1, query2);
query1 = parser.parse("A AND B C", "field");
query2 = parser.parse("(+A +B) C", "field");
assertEquals(query1, query2);
query1 = parser.parse("A AND NOT B", "field");
query2 = parser.parse("+A -B", "field");
assertEquals(query1, query2);
query1 = parser.parse("A AND NOT B OR C", "field");
query2 = parser.parse("(+A -B) OR C", "field");
assertEquals(query1, query2);
BooleanQuery complex = new BooleanQuery();
BooleanQuery inner = new BooleanQuery();
inner.add(new RegexpQuery(new Term("field", "[a-z]\\/[123]")), Occur.MUST);
inner.add(new TermQuery(new Term("path", "/etc/init.d/")), Occur.MUST);
complex.add(inner, Occur.SHOULD);
complex.add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), Occur.SHOULD);
assertEquals(complex, qp.parse("/[a-z]\\/[123]/ AND path:/etc/init.d/ OR /etc\\/init\\[.\\]d/lucene/ "));
}
@Override
public void tearDown() throws Exception {
public void tearDown() {
BooleanQuery.setMaxClauseCount(originalMaxClauses);
super.tearDown();
}
}