From 405cb198a0202dfb688c83d5b4ec6d8d078c18ee Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Wed, 20 Oct 2010 14:26:30 +0000 Subject: [PATCH] LUCENE-1938: Precedence query parser using the contrib/queryparser framework git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1025597 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/contrib/CHANGES.txt | 5 + lucene/contrib/queryparser/build.xml | 15 +- .../queryParser/precedence/CharStream.java | 112 -- .../precedence/FastCharStream.java | 123 -- .../precedence/ParseException.java | 198 --- .../precedence/PrecedenceQueryParser.java | 1405 +---------------- .../precedence/PrecedenceQueryParser.jj | 982 ------------ .../PrecedenceQueryParserConstants.java | 122 -- .../PrecedenceQueryParserTokenManager.java | 1110 ------------- .../lucene/queryParser/precedence/Token.java | 124 -- .../queryParser/precedence/TokenMgrError.java | 141 -- .../queryParser/precedence/package.html | 19 +- .../BooleanModifiersQueryNodeProcessor.java | 138 ++ .../PrecedenceQueryNodeProcessorPipeline.java | 59 + .../precedence/processors/package.html | 47 + .../precedence/TestPrecedenceQueryParser.java | 619 ++++---- 16 files changed, 577 insertions(+), 4642 deletions(-) delete mode 100644 lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/CharStream.java delete mode 100644 lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/FastCharStream.java delete mode 100644 lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java delete mode 100644 lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj delete mode 100644 lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserConstants.java delete mode 100644 lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java delete mode 100644 lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java delete mode 100644 lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java create mode 100644 lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/processors/BooleanModifiersQueryNodeProcessor.java create mode 100644 lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/processors/PrecedenceQueryNodeProcessorPipeline.java create mode 100644 lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/processors/package.html diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt index f001c73cf04..a15d19e4a9c 100644 --- a/lucene/contrib/CHANGES.txt +++ b/lucene/contrib/CHANGES.txt @@ -248,6 +248,11 @@ New features * LUCENE-2624: Add Analyzers for Armenian, Basque, and Catalan, from snowball. (Robert Muir) + + * LUCENE-1938: PrecedenceQueryParser is now implemented with the flexible QP framework. + This means that you can also add this functionality to your own QP pipeline by using + BooleanModifiersQueryNodeProcessor, for example instead of GroupQueryNodeProcessor. + (Adriano Crestani via Robert Muir) Build diff --git a/lucene/contrib/queryparser/build.xml b/lucene/contrib/queryparser/build.xml index 44e74a2a2b9..98eb8625147 100644 --- a/lucene/contrib/queryparser/build.xml +++ b/lucene/contrib/queryparser/build.xml @@ -30,7 +30,7 @@ - + @@ -94,19 +94,6 @@ import org.apache.lucene.queryParser.core.messages.*;" flags="g" byline="true"/> - - - - - - - - - - - Note that - * this does not do line-number counting, but instead keeps track of the - * character position of the token in the input, as required by Lucene's {@link - * org.apache.lucene.analysis.Token} API. */ -public final class FastCharStream implements CharStream { - char[] buffer = null; - - int bufferLength = 0; // end of valid chars - int bufferPosition = 0; // next char to read - - int tokenStart = 0; // offset in buffer - int bufferStart = 0; // position in file of buffer - - Reader input; // source of chars - - /** Constructs from a Reader. */ - public FastCharStream(Reader r) { - input = r; - } - - public final char readChar() throws IOException { - if (bufferPosition >= bufferLength) - refill(); - return buffer[bufferPosition++]; - } - - private final void refill() throws IOException { - int newPosition = bufferLength - tokenStart; - - if (tokenStart == 0) { // token won't fit in buffer - if (buffer == null) { // first time: alloc buffer - buffer = new char[2048]; - } else if (bufferLength == buffer.length) { // grow buffer - char[] newBuffer = new char[buffer.length*2]; - System.arraycopy(buffer, 0, newBuffer, 0, bufferLength); - buffer = newBuffer; - } - } else { // shift token to front - System.arraycopy(buffer, tokenStart, buffer, 0, newPosition); - } - - bufferLength = newPosition; // update state - bufferPosition = newPosition; - bufferStart += tokenStart; - tokenStart = 0; - - int charsRead = // fill space in buffer - input.read(buffer, newPosition, buffer.length-newPosition); - if (charsRead == -1) - throw new IOException("read past eof"); - else - bufferLength += charsRead; - } - - public final char BeginToken() throws IOException { - tokenStart = bufferPosition; - return readChar(); - } - - public final void backup(int amount) { - bufferPosition -= amount; - } - - public final String GetImage() { - return new String(buffer, tokenStart, bufferPosition - tokenStart); - } - - public final char[] GetSuffix(int len) { - char[] value = new char[len]; - System.arraycopy(buffer, bufferPosition - len, value, 0, len); - return value; - } - - public final void Done() { - try { - input.close(); - } catch (IOException e) { - System.err.println("Caught: " + e + "; ignoring."); - } - } - - public final int getColumn() { - return bufferStart + bufferPosition; - } - public final int getLine() { - return 1; - } - public final int getEndColumn() { - return bufferStart + bufferPosition; - } - public final int getEndLine() { - return 1; - } - public final int getBeginColumn() { - return bufferStart + tokenStart; - } - public final int getBeginLine() { - return 1; - } -} diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java deleted file mode 100644 index 6e9ec487912..00000000000 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/ParseException.java +++ /dev/null @@ -1,198 +0,0 @@ -/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 4.1 */ -/* JavaCCOptions:KEEP_LINE_COL=null */ -package org.apache.lucene.queryParser.precedence; - -/** - * This exception is thrown when parse errors are encountered. - * You can explicitly create objects of this exception type by - * calling the method generateParseException in the generated - * parser. - * - * You can modify this class to customize your error reporting - * mechanisms so long as you retain the public fields. - */ -public class ParseException extends Exception { - - /** - * This constructor is used by the method "generateParseException" - * in the generated parser. Calling this constructor generates - * a new object of this type with the fields "currentToken", - * "expectedTokenSequences", and "tokenImage" set. The boolean - * flag "specialConstructor" is also set to true to indicate that - * this constructor was used to create this object. - * This constructor calls its super class with the empty string - * to force the "toString" method of parent class "Throwable" to - * print the error message in the form: - * ParseException: - */ - public ParseException(Token currentTokenVal, - int[][] expectedTokenSequencesVal, - String[] tokenImageVal - ) - { - super(""); - specialConstructor = true; - currentToken = currentTokenVal; - expectedTokenSequences = expectedTokenSequencesVal; - tokenImage = tokenImageVal; - } - - /** - * The following constructors are for use by you for whatever - * purpose you can think of. Constructing the exception in this - * manner makes the exception behave in the normal way - i.e., as - * documented in the class "Throwable". The fields "errorToken", - * "expectedTokenSequences", and "tokenImage" do not contain - * relevant information. The JavaCC generated code does not use - * these constructors. - */ - - public ParseException() { - super(); - specialConstructor = false; - } - - /** Constructor with message. */ - public ParseException(String message) { - super(message); - specialConstructor = false; - } - - /** - * This variable determines which constructor was used to create - * this object and thereby affects the semantics of the - * "getMessage" method (see below). - */ - protected boolean specialConstructor; - - /** - * This is the last token that has been consumed successfully. If - * this object has been created due to a parse error, the token - * followng this token will (therefore) be the first error token. - */ - public Token currentToken; - - /** - * Each entry in this array is an array of integers. Each array - * of integers represents a sequence of tokens (by their ordinal - * values) that is expected at this point of the parse. - */ - public int[][] expectedTokenSequences; - - /** - * This is a reference to the "tokenImage" array of the generated - * parser within which the parse error occurred. This array is - * defined in the generated ...Constants interface. - */ - public String[] tokenImage; - - /** - * This method has the standard behavior when this object has been - * created using the standard constructors. Otherwise, it uses - * "currentToken" and "expectedTokenSequences" to generate a parse - * error message and returns it. If this object has been created - * due to a parse error, and you do not catch it (it gets thrown - * from the parser), then this method is called during the printing - * of the final stack trace, and hence the correct error message - * gets displayed. - */ - public String getMessage() { - if (!specialConstructor) { - return super.getMessage(); - } - StringBuffer expected = new StringBuffer(); - int maxSize = 0; - for (int i = 0; i < expectedTokenSequences.length; i++) { - if (maxSize < expectedTokenSequences[i].length) { - maxSize = expectedTokenSequences[i].length; - } - for (int j = 0; j < expectedTokenSequences[i].length; j++) { - expected.append(tokenImage[expectedTokenSequences[i][j]]).append(' '); - } - if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) { - expected.append("..."); - } - expected.append(eol).append(" "); - } - String retval = "Encountered \""; - Token tok = currentToken.next; - for (int i = 0; i < maxSize; i++) { - if (i != 0) retval += " "; - if (tok.kind == 0) { - retval += tokenImage[0]; - break; - } - retval += " " + tokenImage[tok.kind]; - retval += " \""; - retval += add_escapes(tok.image); - retval += " \""; - tok = tok.next; - } - retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn; - retval += "." + eol; - if (expectedTokenSequences.length == 1) { - retval += "Was expecting:" + eol + " "; - } else { - retval += "Was expecting one of:" + eol + " "; - } - retval += expected.toString(); - return retval; - } - - /** - * The end of line string for this machine. - */ - protected String eol = System.getProperty("line.separator", "\n"); - - /** - * Used to convert raw characters to their escaped version - * when these raw version cannot be used as part of an ASCII - * string literal. - */ - protected String add_escapes(String str) { - StringBuffer retval = new StringBuffer(); - char ch; - for (int i = 0; i < str.length(); i++) { - switch (str.charAt(i)) - { - case 0 : - continue; - case '\b': - retval.append("\\b"); - continue; - case '\t': - retval.append("\\t"); - continue; - case '\n': - retval.append("\\n"); - continue; - case '\f': - retval.append("\\f"); - continue; - case '\r': - retval.append("\\r"); - continue; - case '\"': - retval.append("\\\""); - continue; - case '\'': - retval.append("\\\'"); - continue; - case '\\': - retval.append("\\\\"); - continue; - default: - if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { - String s = "0000" + Integer.toString(ch, 16); - retval.append("\\u" + s.substring(s.length() - 4, s.length())); - } else { - retval.append(ch); - } - continue; - } - } - return retval.toString(); - } - -} -/* JavaCC - OriginalChecksum=15fbbe38a36c8ac9e2740d030624c321 (do not edit this line) */ diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java index e4ec9b033a0..68d1bff7575 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java @@ -1,1382 +1,57 @@ -/* Generated By:JavaCC: Do not edit this line. PrecedenceQueryParser.java */ package org.apache.lucene.queryParser.precedence; -import java.io.IOException; -import java.io.StringReader; -import java.text.DateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.List; -import java.util.Locale; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.*; -import org.apache.lucene.document.DateTools; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.FuzzyQuery; -import org.apache.lucene.search.MultiTermQuery; -import org.apache.lucene.search.MultiPhraseQuery; -import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.PrefixQuery; -import org.apache.lucene.search.RegexpQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.WildcardQuery; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.queryParser.precedence.processors.PrecedenceQueryNodeProcessorPipeline; +import org.apache.lucene.queryParser.standard.StandardQueryParser; +import org.apache.lucene.queryParser.standard.processors.StandardQueryNodeProcessorPipeline; /** - * Experimental query parser variant designed to handle operator precedence - * in a more sensible fashion than QueryParser. There are still some - * open issues with this parser. The following tests are currently failing - * in TestPrecedenceQueryParser and are disabled to make this test pass: - *
    - *
  • testSimple - *
  • testWildcard - *
  • testPrecedence - *
- * - * This class is generated by JavaCC. The only method that clients should need - * to call is {@link #parse(String)}. - * - * The syntax for query strings is as follows: - * A Query is a series of clauses. - * A clause may be prefixed by: - *
    - *
  • a plus (+) or a minus (-) sign, indicating - * that the clause is required or prohibited respectively; or - *
  • a term followed by a colon, indicating the field to be searched. - * This enables one to construct queries which search multiple fields. - *
- * - * A clause may be either: - *
    - *
  • a term, indicating all the documents that contain this term; or - *
  • a nested query, enclosed in parentheses. Note that this may be used - * with a +/- prefix to require any of a set of - * terms. - *
- * - * Thus, in BNF, the query grammar is: - *
- *   Query  ::= ( Clause )*
- *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
- * 
- * *

- * Examples of appropriately formatted queries can be found in the query syntax - * documentation. + * This query parser works exactly as the standard query parser ( {@link StandardQueryParser} ), + * except that it respect the boolean precedence, so <a AND b OR c AND d> is parsed to <(+a +b) (+c +d)> + * instead of <+a +b +c +d>. *

+ *

+ * EXPERT: This class extends {@link StandardQueryParser}, but uses {@link PrecedenceQueryNodeProcessorPipeline} + * instead of {@link StandardQueryNodeProcessorPipeline} to process the query tree. + *

+ * + * @see StandardQueryParser */ -public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { - - private static final int CONJ_NONE = 0; - private static final int CONJ_AND = 1; - private static final int CONJ_OR = 2; - - private static final int MOD_NONE = 0; - private static final int MOD_NOT = 10; - private static final int MOD_REQ = 11; - - // make it possible to call setDefaultOperator() without accessing - // the nested class: - public static final Operator AND_OPERATOR = Operator.AND; - public static final Operator OR_OPERATOR = Operator.OR; - - /** The actual operator that parser uses to combine query terms */ - private Operator operator = OR_OPERATOR; - - boolean lowercaseExpandedTerms = true; - MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; - - Analyzer analyzer; - String field; - int phraseSlop = 0; - float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; - int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; - Locale locale = Locale.getDefault(); - - static enum Operator { OR, AND } - - /** Constructs a query parser. - * @param f the default field for query terms. - * @param a used to find terms in the query text. - */ - public PrecedenceQueryParser(String f, Analyzer a) { - this(new FastCharStream(new StringReader(""))); - analyzer = a; - field = f; - } - - /** Parses a query string, returning a {@link org.apache.lucene.search.Query}. - * @param expression the query string to be parsed. - * @throws ParseException if the parsing fails - */ - public Query parse(String expression) throws ParseException { - // optimize empty query to be empty BooleanQuery - if (expression == null || expression.trim().length() == 0) { - return new BooleanQuery(); - } - - ReInit(new FastCharStream(new StringReader(expression))); - try { - Query query = Query(field); - return (query != null) ? query : new BooleanQuery(); - } - catch (TokenMgrError tme) { - throw new ParseException(tme.getMessage()); - } - catch (BooleanQuery.TooManyClauses tmc) { - throw new ParseException("Too many boolean clauses"); - } - } - - /** - * @return Returns the analyzer. - */ - public Analyzer getAnalyzer() { - return analyzer; - } - +public class PrecedenceQueryParser extends StandardQueryParser { + /** - * @return Returns the field. + * @see StandardQueryParser#StandardQueryParser() */ - public String getField() { - return field; + public PrecedenceQueryParser() { + setQueryNodeProcessor(new PrecedenceQueryNodeProcessorPipeline(getQueryConfigHandler())); } - - /** - * Get the minimal similarity for fuzzy queries. - */ - public float getFuzzyMinSim() { - return fuzzyMinSim; - } - + /** - * Set the minimum similarity for fuzzy queries. - * Default is 2f. + * @see StandardQueryParser#StandardQueryParser(Analyzer) */ - public void setFuzzyMinSim(float fuzzyMinSim) { - this.fuzzyMinSim = fuzzyMinSim; + public PrecedenceQueryParser(Analyzer analyer) { + super(analyer); + + setQueryNodeProcessor(new PrecedenceQueryNodeProcessorPipeline(getQueryConfigHandler())); + } - /** - * Get the prefix length for fuzzy queries. - * @return Returns the fuzzyPrefixLength. - */ - public int getFuzzyPrefixLength() { - return fuzzyPrefixLength; - } - - /** - * Set the prefix length for fuzzy queries. Default is 0. - * @param fuzzyPrefixLength The fuzzyPrefixLength to set. - */ - public void setFuzzyPrefixLength(int fuzzyPrefixLength) { - this.fuzzyPrefixLength = fuzzyPrefixLength; - } - - /** - * Sets the default slop for phrases. If zero, then exact phrase matches - * are required. Default value is zero. - */ - public void setPhraseSlop(int phraseSlop) { - this.phraseSlop = phraseSlop; - } - - /** - * Gets the default slop for phrases. - */ - public int getPhraseSlop() { - return phraseSlop; - } - - /** - * Sets the boolean operator of the QueryParser. - * In default mode (OR_OPERATOR) terms without any modifiers - * are considered optional: for example capital of Hungary is equal to - * capital OR of OR Hungary.
- * In AND_OPERATOR mode terms are considered to be in conjunction: the - * above mentioned query is parsed as capital AND of AND Hungary - */ - public void setDefaultOperator(Operator op) { - this.operator = op; - } - - /** - * Gets implicit operator setting, which will be either AND_OPERATOR - * or OR_OPERATOR. - */ - public Operator getDefaultOperator() { - return operator; - } - - /** - * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically - * lower-cased or not. Default is true. - */ - public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) { - this.lowercaseExpandedTerms = lowercaseExpandedTerms; - } - - /** - * @see #setLowercaseExpandedTerms(boolean) - */ - public boolean getLowercaseExpandedTerms() { - return lowercaseExpandedTerms; - } - /** - * By default PrecedenceQueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} - * when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it - * a) Runs faster b) Does not have the scarcity of terms unduly influence score - * c) avoids any "TooManyBooleanClauses" exception. - * However, if your application really needs to use the - * old-fashioned BooleanQuery expansion rewriting and the above - * points are not relevant then use this to change - * the rewrite method. - */ - public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) { - multiTermRewriteMethod = method; - } - - - /** - * @see #setMultiTermRewriteMethod - */ - public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() { - return multiTermRewriteMethod; - } - - /** - * Set locale used by date range parsing. - */ - public void setLocale(Locale locale) { - this.locale = locale; - } - - /** - * Returns current locale, allowing access by subclasses. - */ - public Locale getLocale() { - return locale; - } - - protected void addClause(List clauses, int conj, int modifier, Query q) { - boolean required, prohibited; - - // If this term is introduced by AND, make the preceding term required, - // unless it's already prohibited - if (clauses.size() > 0 && conj == CONJ_AND) { - BooleanClause c = clauses.get(clauses.size()-1); - if (!c.isProhibited()) - c.setOccur(BooleanClause.Occur.MUST); - } - - if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) { - // If this term is introduced by OR, make the preceding term optional, - // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) - // notice if the input is a OR b, first term is parsed as required; without - // this modification a OR b would parsed as +a OR b - BooleanClause c = clauses.get(clauses.size()-1); - if (!c.isProhibited()) - c.setOccur(BooleanClause.Occur.SHOULD); - } - - // We might have been passed a null query; the term might have been - // filtered away by the analyzer. - if (q == null) - return; - - if (operator == OR_OPERATOR) { - // We set REQUIRED if we're introduced by AND or +; PROHIBITED if - // introduced by NOT or -; make sure not to set both. - prohibited = (modifier == MOD_NOT); - required = (modifier == MOD_REQ); - if (conj == CONJ_AND && !prohibited) { - required = true; - } - } else { - // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED - // if not PROHIBITED and not introduced by OR - prohibited = (modifier == MOD_NOT); - required = (!prohibited && conj != CONJ_OR); - } - if (required && !prohibited) - clauses.add(new BooleanClause(q, BooleanClause.Occur.MUST)); - else if (!required && !prohibited) - clauses.add(new BooleanClause(q, BooleanClause.Occur.SHOULD)); - else if (!required && prohibited) - clauses.add(new BooleanClause(q, BooleanClause.Occur.MUST_NOT)); - else - throw new RuntimeException("Clause cannot be both required and prohibited"); - } - - /** - * @exception ParseException throw in overridden method to disallow - */ - protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException { - // Use the analyzer to get all the tokens, and then build a TermQuery, - // PhraseQuery, or nothing based on the term count - - TokenStream source = analyzer.tokenStream(field, new StringReader(queryText)); - List list = new ArrayList(); - int positionCount = 0; - boolean severalTokensAtSamePosition = false; - TermToBytesRefAttribute termAtt = source.addAttribute(TermToBytesRefAttribute.class); - PositionIncrementAttribute posincrAtt = source.addAttribute(PositionIncrementAttribute.class); - - try { - while (source.incrementToken()) { - list.add(source.captureState()); - if (posincrAtt.getPositionIncrement() == 1) - positionCount++; - else - severalTokensAtSamePosition = true; - } - source.end(); - source.close(); - } catch (IOException e) { - // ignore, should never happen for StringReaders - } - - if (list.size() == 0) - return null; - else if (list.size() == 1) { - source.restoreState(list.get(0)); - BytesRef term = new BytesRef(); - termAtt.toBytesRef(term); - return new TermQuery(new Term(field, term)); - } else { - if (severalTokensAtSamePosition || !quoted) { - if (positionCount == 1 || !quoted) { - // no phrase query: - BooleanQuery q = new BooleanQuery(positionCount == 1); - - BooleanClause.Occur occur = positionCount > 1 && operator == AND_OPERATOR ? - BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; - - for (int i = 0; i < list.size(); i++) { - BytesRef term = new BytesRef(); - source.restoreState(list.get(i)); - termAtt.toBytesRef(term); - TermQuery currentQuery = new TermQuery( - new Term(field, term)); - q.add(currentQuery, occur); - } - return q; - } - else { - // phrase query: - MultiPhraseQuery mpq = new MultiPhraseQuery(); - List multiTerms = new ArrayList(); - for (int i = 0; i < list.size(); i++) { - BytesRef term = new BytesRef(); - source.restoreState(list.get(i)); - if (posincrAtt.getPositionIncrement() == 1 && multiTerms.size() > 0) { - mpq.add(multiTerms.toArray(new Term[0])); - multiTerms.clear(); - } - termAtt.toBytesRef(term); - multiTerms.add(new Term(field, term)); - } - mpq.add(multiTerms.toArray(new Term[0])); - return mpq; - } - } - else { - PhraseQuery q = new PhraseQuery(); - q.setSlop(phraseSlop); - for (int i = 0; i < list.size(); i++) { - BytesRef term = new BytesRef(); - source.restoreState(list.get(i)); - termAtt.toBytesRef(term); - q.add(new Term(field, term)); - } - return q; - } - } - } - - /** - * Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}. - * This method may be overridden, for example, to return - * a SpanNearQuery instead of a PhraseQuery. - * - * @exception ParseException throw in overridden method to disallow - */ - protected Query getFieldQuery(String field, String queryText, int slop) - throws ParseException { - Query query = getFieldQuery(field, queryText, true); - - if (query instanceof PhraseQuery) { - ((PhraseQuery) query).setSlop(slop); - } - if (query instanceof MultiPhraseQuery) { - ((MultiPhraseQuery) query).setSlop(slop); - } - - return query; - } - - /** - * @exception ParseException throw in overridden method to disallow - */ - protected Query getRangeQuery(String field, - String part1, - String part2, - boolean inclusive) throws ParseException - { - if (lowercaseExpandedTerms) { - part1 = part1.toLowerCase(); - part2 = part2.toLowerCase(); - } - try { - DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale); - df.setLenient(true); - Date d1 = df.parse(part1); - Date d2 = df.parse(part2); - part1 = DateTools.dateToString(d1, DateTools.Resolution.DAY); - part2 = DateTools.dateToString(d2, DateTools.Resolution.DAY); - } - catch (Exception e) { } - - final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive); - query.setRewriteMethod(multiTermRewriteMethod); - return query; - } - - /** - * Factory method for generating query, given a set of clauses. - * By default creates a boolean query composed of clauses passed in. - * - * Can be overridden by extending classes, to modify query being - * returned. - * - * @param clauses List that contains {@link BooleanClause} instances - * to join. - * - * @return Resulting {@link Query} object. - * @exception ParseException throw in overridden method to disallow - */ - protected Query getBooleanQuery(List clauses) throws ParseException - { - return getBooleanQuery(clauses, false); - } - - /** - * Factory method for generating query, given a set of clauses. - * By default creates a boolean query composed of clauses passed in. - * - * Can be overridden by extending classes, to modify query being - * returned. - * - * @param clauses List that contains {@link BooleanClause} instances - * to join. - * @param disableCoord true if coord scoring should be disabled. - * - * @return Resulting {@link Query} object. - * @exception ParseException throw in overridden method to disallow - */ - protected Query getBooleanQuery(List clauses, boolean disableCoord) - throws ParseException { - if (clauses == null || clauses.size() == 0) - return null; - - BooleanQuery query = new BooleanQuery(disableCoord); - for (int i = 0; i < clauses.size(); i++) { - query.add(clauses.get(i)); - } - return query; - } - - /** - * Factory method for generating a query. Called when parser - * parses an input term token that contains one or more wildcard - * characters (? and *), but is not a prefix term token (one - * that has just a single * character at the end) - *

- * Depending on settings, prefix term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with wildcard templates. - *

- * Can be overridden by extending classes, to provide custom handling for - * wildcard queries, which may be necessary due to missing analyzer calls. - * - * @param field Name of the field query will use. - * @param termStr Term token that contains one or more wild card - * characters (? or *), but is not simple prefix term - * - * @return Resulting {@link Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getWildcardQuery(String field, String termStr) throws ParseException - { - if (lowercaseExpandedTerms) { - termStr = termStr.toLowerCase(); - } - Term t = new Term(field, termStr); - final WildcardQuery query = new WildcardQuery(t); - query.setRewriteMethod(multiTermRewriteMethod); - return query; - } - - /** - * Factory method for generating a query (similar to - * {@link #getWildcardQuery}). Called when parser parses an input term - * token that uses prefix notation; that is, contains a single '*' wildcard - * character as its last character. Since this is a special case - * of generic wildcard term, and such a query can be optimized easily, - * this usually results in a different query object. - *

- * Depending on settings, a prefix term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with wildcard templates. - *

- * Can be overridden by extending classes, to provide custom handling for - * wild card queries, which may be necessary due to missing analyzer calls. - * - * @param field Name of the field query will use. - * @param termStr Term token to use for building term for the query - * (without trailing '*' character!) - * - * @return Resulting {@link Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getPrefixQuery(String field, String termStr) throws ParseException - { - if (lowercaseExpandedTerms) { - termStr = termStr.toLowerCase(); - } - Term t = new Term(field, termStr); - final PrefixQuery query = new PrefixQuery(t); - query.setRewriteMethod(multiTermRewriteMethod); - return query; - } - - /** - * Factory method for generating a query. Called when parser - * parses an input term token that contains a regular expression - * query. - *

- * Depending on settings, pattern term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with regular expression templates. - *

- * Can be overridden by extending classes, to provide custom handling for - * regular expression queries, which may be necessary due to missing analyzer - * calls. - * - * @param field Name of the field query will use. - * @param termStr Term token that contains a regular expression - * - * @return Resulting {@link Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getRegexpQuery(String field, String termStr) throws ParseException - { - if (lowercaseExpandedTerms) { - termStr = termStr.toLowerCase(); - } - final Term regexp = new Term(field, termStr); - final RegexpQuery query = new RegexpQuery(regexp); - query.setRewriteMethod(multiTermRewriteMethod); - return query; - } - - /** - * Factory method for generating a query (similar to - * {@link #getWildcardQuery}). Called when parser parses - * an input term token that has the fuzzy suffix (~) appended. - * - * @param field Name of the field query will use. - * @param termStr Term token to use for building term for the query - * - * @return Resulting {@link Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException - { - if (lowercaseExpandedTerms) { - termStr = termStr.toLowerCase(); - } - Term t = new Term(field, termStr); - return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength); - } - - /** - * Returns a String where the escape char has been - * removed, or kept only once if there was a double escape. - */ - private String discardEscapeChar(String input) { - char[] caSource = input.toCharArray(); - char[] caDest = new char[caSource.length]; - int j = 0; - for (int i = 0; i < caSource.length; i++) { - if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) { - caDest[j++]=caSource[i]; - } - } - return new String(caDest, 0, j); - } - - /** - * Returns a String where those characters that QueryParser - * expects to be escaped are escaped by a preceding \. - */ - public static String escape(String s) { - StringBuffer sb = new StringBuffer(); - for (int i = 0; i < s.length(); i++) { - char c = s.charAt(i); - // NOTE: keep this in sync with _ESCAPED_CHAR below! - if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' - || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' - || c == '*' || c == '?') { - sb.append('\\'); - } - sb.append(c); - } - return sb.toString(); - } - -// * Query ::= ( Clause )* -// * Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) - final public int Conjunction() throws ParseException { - int ret = CONJ_NONE; - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case AND: - case OR: - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case AND: - jj_consume_token(AND); - ret = CONJ_AND; - break; - case OR: - jj_consume_token(OR); - ret = CONJ_OR; - break; - default: - jj_la1[0] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - break; - default: - jj_la1[1] = jj_gen; - ; - } - {if (true) return ret;} - throw new Error("Missing return statement in function"); - } - - final public int Modifier() throws ParseException { - int ret = MOD_NONE; - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case NOT: - case PLUS: - case MINUS: - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case PLUS: - jj_consume_token(PLUS); - ret = MOD_REQ; - break; - case MINUS: - jj_consume_token(MINUS); - ret = MOD_NOT; - break; - case NOT: - jj_consume_token(NOT); - ret = MOD_NOT; - break; - default: - jj_la1[2] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - break; - default: - jj_la1[3] = jj_gen; - ; - } - {if (true) return ret;} - throw new Error("Missing return statement in function"); - } - - final public Query Query(String field) throws ParseException { - List clauses = new ArrayList(); - Query q, firstQuery=null; - boolean orPresent = false; - int modifier; - modifier = Modifier(); - q = andExpression(field); - addClause(clauses, CONJ_NONE, modifier, q); - if (modifier == MOD_NONE) - firstQuery = q; - label_1: - while (true) { - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case OR: - case NOT: - case PLUS: - case MINUS: - case LPAREN: - case QUOTED: - case TERM: - case PREFIXTERM: - case WILDTERM: - case REGEXPTERM: - case RANGEIN_START: - case RANGEEX_START: - case NUMBER: - ; - break; - default: - jj_la1[4] = jj_gen; - break label_1; - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case OR: - jj_consume_token(OR); - orPresent=true; - break; - default: - jj_la1[5] = jj_gen; - ; - } - modifier = Modifier(); - q = andExpression(field); - addClause(clauses, orPresent ? CONJ_OR : CONJ_NONE, modifier, q); - } - if (clauses.size() == 1 && firstQuery != null) - {if (true) return firstQuery;} - else { - {if (true) return getBooleanQuery(clauses);} - } - throw new Error("Missing return statement in function"); - } - - final public Query andExpression(String field) throws ParseException { - List clauses = new ArrayList(); - Query q, firstQuery=null; - int modifier; - q = Clause(field); - addClause(clauses, CONJ_NONE, MOD_NONE, q); - firstQuery = q; - label_2: - while (true) { - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case AND: - ; - break; - default: - jj_la1[6] = jj_gen; - break label_2; - } - jj_consume_token(AND); - modifier = Modifier(); - q = Clause(field); - addClause(clauses, CONJ_AND, modifier, q); - } - if (clauses.size() == 1 && firstQuery != null) - {if (true) return firstQuery;} - else { - {if (true) return getBooleanQuery(clauses);} - } - throw new Error("Missing return statement in function"); - } - - final public Query Clause(String field) throws ParseException { - Query q; - Token fieldToken=null, boost=null; - if (jj_2_1(2)) { - fieldToken = jj_consume_token(TERM); - jj_consume_token(COLON); - field=discardEscapeChar(fieldToken.image); - } else { - ; - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case QUOTED: - case TERM: - case PREFIXTERM: - case WILDTERM: - case REGEXPTERM: - case RANGEIN_START: - case RANGEEX_START: - case NUMBER: - q = Term(field); - break; - case LPAREN: - jj_consume_token(LPAREN); - q = Query(field); - jj_consume_token(RPAREN); - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); - break; - default: - jj_la1[7] = jj_gen; - ; - } - break; - default: - jj_la1[8] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - if (boost != null) { - float f = (float)1.0; - try { - f = Float.valueOf(boost.image).floatValue(); - q.setBoost(f); - } catch (Exception ignored) { } - } - {if (true) return q;} - throw new Error("Missing return statement in function"); - } - - final public Query Term(String field) throws ParseException { - Token term, boost=null, fuzzySlop=null, goop1, goop2; - boolean prefix = false; - boolean wildcard = false; - boolean fuzzy = false; - boolean regexp = false; - - Query q; - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case TERM: - case PREFIXTERM: - case WILDTERM: - case REGEXPTERM: - case NUMBER: - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case TERM: - term = jj_consume_token(TERM); - break; - case PREFIXTERM: - term = jj_consume_token(PREFIXTERM); - prefix=true; - break; - case WILDTERM: - term = jj_consume_token(WILDTERM); - wildcard=true; - break; - case REGEXPTERM: - term = jj_consume_token(REGEXPTERM); - regexp=true; - break; - case NUMBER: - term = jj_consume_token(NUMBER); - break; - default: - jj_la1[9] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case FUZZY_SLOP: - fuzzySlop = jj_consume_token(FUZZY_SLOP); - fuzzy=true; - break; - default: - jj_la1[10] = jj_gen; - ; - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case FUZZY_SLOP: - fuzzySlop = jj_consume_token(FUZZY_SLOP); - fuzzy=true; - break; - default: - jj_la1[11] = jj_gen; - ; - } - break; - default: - jj_la1[12] = jj_gen; - ; - } - String termImage=discardEscapeChar(term.image); - if (wildcard) { - q = getWildcardQuery(field, termImage); - } else if (prefix) { - q = getPrefixQuery(field, - discardEscapeChar(term.image.substring - (0, term.image.length()-1))); - } else if (regexp) { - q = getRegexpQuery(field, term.image.substring(1, term.image.length()-1)); - } else if (fuzzy) { - float fms = fuzzyMinSim; - try { - fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue(); - } catch (Exception ignored) { } - if(fms < 0.0f){ - {if (true) throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");} - } else if (fms >= 1.0f && fms != (int) fms) { - {if (true) throw new ParseException("Fractional edit distances are not allowed!");} - } - q = getFuzzyQuery(field, termImage, fms); - } else { - q = getFieldQuery(field, termImage, false); - } - break; - case RANGEIN_START: - jj_consume_token(RANGEIN_START); - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case RANGEIN_GOOP: - goop1 = jj_consume_token(RANGEIN_GOOP); - break; - case RANGEIN_QUOTED: - goop1 = jj_consume_token(RANGEIN_QUOTED); - break; - default: - jj_la1[13] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case RANGEIN_TO: - jj_consume_token(RANGEIN_TO); - break; - default: - jj_la1[14] = jj_gen; - ; - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case RANGEIN_GOOP: - goop2 = jj_consume_token(RANGEIN_GOOP); - break; - case RANGEIN_QUOTED: - goop2 = jj_consume_token(RANGEIN_QUOTED); - break; - default: - jj_la1[15] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - jj_consume_token(RANGEIN_END); - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); - break; - default: - jj_la1[16] = jj_gen; - ; - } - if (goop1.kind == RANGEIN_QUOTED) { - goop1.image = goop1.image.substring(1, goop1.image.length()-1); - } else { - goop1.image = discardEscapeChar(goop1.image); - } - if (goop2.kind == RANGEIN_QUOTED) { - goop2.image = goop2.image.substring(1, goop2.image.length()-1); - } else { - goop2.image = discardEscapeChar(goop2.image); - } - q = getRangeQuery(field, goop1.image, goop2.image, true); - break; - case RANGEEX_START: - jj_consume_token(RANGEEX_START); - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case RANGEEX_GOOP: - goop1 = jj_consume_token(RANGEEX_GOOP); - break; - case RANGEEX_QUOTED: - goop1 = jj_consume_token(RANGEEX_QUOTED); - break; - default: - jj_la1[17] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case RANGEEX_TO: - jj_consume_token(RANGEEX_TO); - break; - default: - jj_la1[18] = jj_gen; - ; - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case RANGEEX_GOOP: - goop2 = jj_consume_token(RANGEEX_GOOP); - break; - case RANGEEX_QUOTED: - goop2 = jj_consume_token(RANGEEX_QUOTED); - break; - default: - jj_la1[19] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - jj_consume_token(RANGEEX_END); - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); - break; - default: - jj_la1[20] = jj_gen; - ; - } - if (goop1.kind == RANGEEX_QUOTED) { - goop1.image = goop1.image.substring(1, goop1.image.length()-1); - } else { - goop1.image = discardEscapeChar(goop1.image); - } - if (goop2.kind == RANGEEX_QUOTED) { - goop2.image = goop2.image.substring(1, goop2.image.length()-1); - } else { - goop2.image = discardEscapeChar(goop2.image); - } - - q = getRangeQuery(field, goop1.image, goop2.image, false); - break; - case QUOTED: - term = jj_consume_token(QUOTED); - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case FUZZY_SLOP: - fuzzySlop = jj_consume_token(FUZZY_SLOP); - break; - default: - jj_la1[21] = jj_gen; - ; - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); - break; - default: - jj_la1[22] = jj_gen; - ; - } - int s = phraseSlop; - - if (fuzzySlop != null) { - try { - s = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); - } - catch (Exception ignored) { } - } - q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s); - break; - default: - jj_la1[23] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - if (boost != null) { - float f = (float) 1.0; - try { - f = Float.valueOf(boost.image).floatValue(); - } - catch (Exception ignored) { - /* Should this be handled somehow? (defaults to "no boost", if - * boost number is invalid) - */ - } - - // avoid boosting null queries, such as those caused by stop words - if (q != null) { - q.setBoost(f); - } - } - {if (true) return q;} - throw new Error("Missing return statement in function"); - } - - private boolean jj_2_1(int xla) { - jj_la = xla; jj_lastpos = jj_scanpos = token; - try { return !jj_3_1(); } - catch(LookaheadSuccess ls) { return true; } - finally { jj_save(0, xla); } - } - - private boolean jj_3_1() { - if (jj_scan_token(TERM)) return true; - if (jj_scan_token(COLON)) return true; - return false; - } - - /** Generated Token Manager. */ - public PrecedenceQueryParserTokenManager token_source; - /** Current token. */ - public Token token; - /** Next token. */ - public Token jj_nt; - private int jj_ntk; - private Token jj_scanpos, jj_lastpos; - private int jj_la; - private int jj_gen; - final private int[] jj_la1 = new int[24]; - static private int[] jj_la1_0; - static private int[] jj_la1_1; - static { - jj_la1_init_0(); - jj_la1_init_1(); - } - private static void jj_la1_init_0() { - jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0x1fb1f00,0x100,0x80,0x8000,0x1fb1000,0x13a0000,0x40000,0x40000,0x8000,0x18000000,0x2000000,0x18000000,0x8000,0x80000000,0x20000000,0x80000000,0x8000,0x40000,0x8000,0x1fb0000,}; - } - private static void jj_la1_init_1() { - jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x1,0x0,0x0,0x0,0x0,}; - } - final private JJCalls[] jj_2_rtns = new JJCalls[1]; - private boolean jj_rescan = false; - private int jj_gc = 0; - - /** Constructor with user supplied CharStream. */ - public PrecedenceQueryParser(CharStream stream) { - token_source = new PrecedenceQueryParserTokenManager(stream); - token = new Token(); - jj_ntk = -1; - jj_gen = 0; - for (int i = 0; i < 24; i++) jj_la1[i] = -1; - for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); - } - - /** Reinitialise. */ - public void ReInit(CharStream stream) { - token_source.ReInit(stream); - token = new Token(); - jj_ntk = -1; - jj_gen = 0; - for (int i = 0; i < 24; i++) jj_la1[i] = -1; - for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); - } - - /** Constructor with generated Token Manager. */ - public PrecedenceQueryParser(PrecedenceQueryParserTokenManager tm) { - token_source = tm; - token = new Token(); - jj_ntk = -1; - jj_gen = 0; - for (int i = 0; i < 24; i++) jj_la1[i] = -1; - for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); - } - - /** Reinitialise. */ - public void ReInit(PrecedenceQueryParserTokenManager tm) { - token_source = tm; - token = new Token(); - jj_ntk = -1; - jj_gen = 0; - for (int i = 0; i < 24; i++) jj_la1[i] = -1; - for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); - } - - private Token jj_consume_token(int kind) throws ParseException { - Token oldToken; - if ((oldToken = token).next != null) token = token.next; - else token = token.next = token_source.getNextToken(); - jj_ntk = -1; - if (token.kind == kind) { - jj_gen++; - if (++jj_gc > 100) { - jj_gc = 0; - for (int i = 0; i < jj_2_rtns.length; i++) { - JJCalls c = jj_2_rtns[i]; - while (c != null) { - if (c.gen < jj_gen) c.first = null; - c = c.next; - } - } - } - return token; - } - token = oldToken; - jj_kind = kind; - throw generateParseException(); - } - - static private final class LookaheadSuccess extends java.lang.Error { } - final private LookaheadSuccess jj_ls = new LookaheadSuccess(); - private boolean jj_scan_token(int kind) { - if (jj_scanpos == jj_lastpos) { - jj_la--; - if (jj_scanpos.next == null) { - jj_lastpos = jj_scanpos = jj_scanpos.next = token_source.getNextToken(); - } else { - jj_lastpos = jj_scanpos = jj_scanpos.next; - } - } else { - jj_scanpos = jj_scanpos.next; - } - if (jj_rescan) { - int i = 0; Token tok = token; - while (tok != null && tok != jj_scanpos) { i++; tok = tok.next; } - if (tok != null) jj_add_error_token(kind, i); - } - if (jj_scanpos.kind != kind) return true; - if (jj_la == 0 && jj_scanpos == jj_lastpos) throw jj_ls; - return false; - } - - -/** Get the next Token. */ - final public Token getNextToken() { - if (token.next != null) token = token.next; - else token = token.next = token_source.getNextToken(); - jj_ntk = -1; - jj_gen++; - return token; - } - -/** Get the specific Token. */ - final public Token getToken(int index) { - Token t = token; - for (int i = 0; i < index; i++) { - if (t.next != null) t = t.next; - else t = t.next = token_source.getNextToken(); - } - return t; - } - - private int jj_ntk() { - if ((jj_nt=token.next) == null) - return (jj_ntk = (token.next=token_source.getNextToken()).kind); - else - return (jj_ntk = jj_nt.kind); - } - - private java.util.List jj_expentries = new java.util.ArrayList(); - private int[] jj_expentry; - private int jj_kind = -1; - private int[] jj_lasttokens = new int[100]; - private int jj_endpos; - - private void jj_add_error_token(int kind, int pos) { - if (pos >= 100) return; - if (pos == jj_endpos + 1) { - jj_lasttokens[jj_endpos++] = kind; - } else if (jj_endpos != 0) { - jj_expentry = new int[jj_endpos]; - for (int i = 0; i < jj_endpos; i++) { - jj_expentry[i] = jj_lasttokens[i]; - } - jj_entries_loop: for (java.util.Iterator it = jj_expentries.iterator(); it.hasNext();) { - int[] oldentry = (int[])(it.next()); - if (oldentry.length == jj_expentry.length) { - for (int i = 0; i < jj_expentry.length; i++) { - if (oldentry[i] != jj_expentry[i]) { - continue jj_entries_loop; - } - } - jj_expentries.add(jj_expentry); - break jj_entries_loop; - } - } - if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind; - } - } - - /** Generate ParseException. */ - public ParseException generateParseException() { - jj_expentries.clear(); - boolean[] la1tokens = new boolean[33]; - if (jj_kind >= 0) { - la1tokens[jj_kind] = true; - jj_kind = -1; - } - for (int i = 0; i < 24; i++) { - if (jj_la1[i] == jj_gen) { - for (int j = 0; j < 32; j++) { - if ((jj_la1_0[i] & (1< jj_gen) { - jj_la = p.arg; jj_lastpos = jj_scanpos = p.first; - switch (i) { - case 0: jj_3_1(); break; - } - } - p = p.next; - } while (p != null); - } catch(LookaheadSuccess ls) { } - } - jj_rescan = false; - } - - private void jj_save(int index, int xla) { - JJCalls p = jj_2_rtns[index]; - while (p.gen > jj_gen) { - if (p.next == null) { p = p.next = new JJCalls(); break; } - p = p.next; - } - p.gen = jj_gen + xla - jj_la; p.first = token; p.arg = xla; - } - - static final class JJCalls { - int gen; - Token first; - int arg; - JJCalls next; - } - - /** - * Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}. - * Usage:
- * java org.apache.lucene.queryParser.QueryParser <input> - */ -// public static void main(String[] args) throws Exception { -// if (args.length == 0) { -// System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser "); -// System.exit(0); -// } -// PrecedenceQueryParser qp = new PrecedenceQueryParser("field", -// new org.apache.lucene.analysis.SimpleAnalyzer()); -// Query q = qp.parse(args[0]); -// System.out.println(q.toString("field")); -// } } diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj deleted file mode 100644 index c7151c17a22..00000000000 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj +++ /dev/null @@ -1,982 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -options { - STATIC=false; - JAVA_UNICODE_ESCAPE=true; - USER_CHAR_STREAM=true; -} - -PARSER_BEGIN(PrecedenceQueryParser) - -package org.apache.lucene.queryParser.precedence; - -import java.io.IOException; -import java.io.StringReader; -import java.text.DateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.List; -import java.util.Locale; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.*; -import org.apache.lucene.document.DateTools; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.FuzzyQuery; -import org.apache.lucene.search.MultiTermQuery; -import org.apache.lucene.search.MultiPhraseQuery; -import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.PrefixQuery; -import org.apache.lucene.search.RegexpQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.WildcardQuery; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.AttributeSource; - -/** - * Experimental query parser variant designed to handle operator precedence - * in a more sensible fashion than QueryParser. There are still some - * open issues with this parser. The following tests are currently failing - * in TestPrecedenceQueryParser and are disabled to make this test pass: - *

    - *
  • testSimple - *
  • testWildcard - *
  • testPrecedence - *
- * - * This class is generated by JavaCC. The only method that clients should need - * to call is {@link #parse(String)}. - * - * The syntax for query strings is as follows: - * A Query is a series of clauses. - * A clause may be prefixed by: - *
    - *
  • a plus (+) or a minus (-) sign, indicating - * that the clause is required or prohibited respectively; or - *
  • a term followed by a colon, indicating the field to be searched. - * This enables one to construct queries which search multiple fields. - *
- * - * A clause may be either: - *
    - *
  • a term, indicating all the documents that contain this term; or - *
  • a nested query, enclosed in parentheses. Note that this may be used - * with a +/- prefix to require any of a set of - * terms. - *
- * - * Thus, in BNF, the query grammar is: - *
- *   Query  ::= ( Clause )*
- *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
- * 
- * - *

- * Examples of appropriately formatted queries can be found in the query syntax - * documentation. - *

- */ -public class PrecedenceQueryParser { - - private static final int CONJ_NONE = 0; - private static final int CONJ_AND = 1; - private static final int CONJ_OR = 2; - - private static final int MOD_NONE = 0; - private static final int MOD_NOT = 10; - private static final int MOD_REQ = 11; - - // make it possible to call setDefaultOperator() without accessing - // the nested class: - public static final Operator AND_OPERATOR = Operator.AND; - public static final Operator OR_OPERATOR = Operator.OR; - - /** The actual operator that parser uses to combine query terms */ - private Operator operator = OR_OPERATOR; - - boolean lowercaseExpandedTerms = true; - MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; - - Analyzer analyzer; - String field; - int phraseSlop = 0; - float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; - int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; - Locale locale = Locale.getDefault(); - - static enum Operator { OR, AND } - - /** Constructs a query parser. - * @param f the default field for query terms. - * @param a used to find terms in the query text. - */ - public PrecedenceQueryParser(String f, Analyzer a) { - this(new FastCharStream(new StringReader(""))); - analyzer = a; - field = f; - } - - /** Parses a query string, returning a {@link org.apache.lucene.search.Query}. - * @param expression the query string to be parsed. - * @throws ParseException if the parsing fails - */ - public Query parse(String expression) throws ParseException { - // optimize empty query to be empty BooleanQuery - if (expression == null || expression.trim().length() == 0) { - return new BooleanQuery(); - } - - ReInit(new FastCharStream(new StringReader(expression))); - try { - Query query = Query(field); - return (query != null) ? query : new BooleanQuery(); - } - catch (TokenMgrError tme) { - throw new ParseException(tme.getMessage()); - } - catch (BooleanQuery.TooManyClauses tmc) { - throw new ParseException("Too many boolean clauses"); - } - } - - /** - * @return Returns the analyzer. - */ - public Analyzer getAnalyzer() { - return analyzer; - } - - /** - * @return Returns the field. - */ - public String getField() { - return field; - } - - /** - * Get the minimal similarity for fuzzy queries. - */ - public float getFuzzyMinSim() { - return fuzzyMinSim; - } - - /** - * Set the minimum similarity for fuzzy queries. - * Default is 2f. - */ - public void setFuzzyMinSim(float fuzzyMinSim) { - this.fuzzyMinSim = fuzzyMinSim; - } - - /** - * Get the prefix length for fuzzy queries. - * @return Returns the fuzzyPrefixLength. - */ - public int getFuzzyPrefixLength() { - return fuzzyPrefixLength; - } - - /** - * Set the prefix length for fuzzy queries. Default is 0. - * @param fuzzyPrefixLength The fuzzyPrefixLength to set. - */ - public void setFuzzyPrefixLength(int fuzzyPrefixLength) { - this.fuzzyPrefixLength = fuzzyPrefixLength; - } - - /** - * Sets the default slop for phrases. If zero, then exact phrase matches - * are required. Default value is zero. - */ - public void setPhraseSlop(int phraseSlop) { - this.phraseSlop = phraseSlop; - } - - /** - * Gets the default slop for phrases. - */ - public int getPhraseSlop() { - return phraseSlop; - } - - /** - * Sets the boolean operator of the QueryParser. - * In default mode (OR_OPERATOR) terms without any modifiers - * are considered optional: for example capital of Hungary is equal to - * capital OR of OR Hungary.
- * In AND_OPERATOR mode terms are considered to be in conjunction: the - * above mentioned query is parsed as capital AND of AND Hungary - */ - public void setDefaultOperator(Operator op) { - this.operator = op; - } - - /** - * Gets implicit operator setting, which will be either AND_OPERATOR - * or OR_OPERATOR. - */ - public Operator getDefaultOperator() { - return operator; - } - - /** - * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically - * lower-cased or not. Default is true. - */ - public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) { - this.lowercaseExpandedTerms = lowercaseExpandedTerms; - } - - /** - * @see #setLowercaseExpandedTerms(boolean) - */ - public boolean getLowercaseExpandedTerms() { - return lowercaseExpandedTerms; - } - /** - * By default PrecedenceQueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} - * when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it - * a) Runs faster b) Does not have the scarcity of terms unduly influence score - * c) avoids any "TooManyBooleanClauses" exception. - * However, if your application really needs to use the - * old-fashioned BooleanQuery expansion rewriting and the above - * points are not relevant then use this to change - * the rewrite method. - */ - public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) { - multiTermRewriteMethod = method; - } - - - /** - * @see #setMultiTermRewriteMethod - */ - public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() { - return multiTermRewriteMethod; - } - - /** - * Set locale used by date range parsing. - */ - public void setLocale(Locale locale) { - this.locale = locale; - } - - /** - * Returns current locale, allowing access by subclasses. - */ - public Locale getLocale() { - return locale; - } - - protected void addClause(List clauses, int conj, int modifier, Query q) { - boolean required, prohibited; - - // If this term is introduced by AND, make the preceding term required, - // unless it's already prohibited - if (clauses.size() > 0 && conj == CONJ_AND) { - BooleanClause c = clauses.get(clauses.size()-1); - if (!c.isProhibited()) - c.setOccur(BooleanClause.Occur.MUST); - } - - if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) { - // If this term is introduced by OR, make the preceding term optional, - // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) - // notice if the input is a OR b, first term is parsed as required; without - // this modification a OR b would parsed as +a OR b - BooleanClause c = clauses.get(clauses.size()-1); - if (!c.isProhibited()) - c.setOccur(BooleanClause.Occur.SHOULD); - } - - // We might have been passed a null query; the term might have been - // filtered away by the analyzer. - if (q == null) - return; - - if (operator == OR_OPERATOR) { - // We set REQUIRED if we're introduced by AND or +; PROHIBITED if - // introduced by NOT or -; make sure not to set both. - prohibited = (modifier == MOD_NOT); - required = (modifier == MOD_REQ); - if (conj == CONJ_AND && !prohibited) { - required = true; - } - } else { - // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED - // if not PROHIBITED and not introduced by OR - prohibited = (modifier == MOD_NOT); - required = (!prohibited && conj != CONJ_OR); - } - if (required && !prohibited) - clauses.add(new BooleanClause(q, BooleanClause.Occur.MUST)); - else if (!required && !prohibited) - clauses.add(new BooleanClause(q, BooleanClause.Occur.SHOULD)); - else if (!required && prohibited) - clauses.add(new BooleanClause(q, BooleanClause.Occur.MUST_NOT)); - else - throw new RuntimeException("Clause cannot be both required and prohibited"); - } - - /** - * @exception ParseException throw in overridden method to disallow - */ - protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException { - // Use the analyzer to get all the tokens, and then build a TermQuery, - // PhraseQuery, or nothing based on the term count - - TokenStream source = analyzer.tokenStream(field, new StringReader(queryText)); - List list = new ArrayList(); - int positionCount = 0; - boolean severalTokensAtSamePosition = false; - TermToBytesRefAttribute termAtt = source.addAttribute(TermToBytesRefAttribute.class); - PositionIncrementAttribute posincrAtt = source.addAttribute(PositionIncrementAttribute.class); - - try { - while (source.incrementToken()) { - list.add(source.captureState()); - if (posincrAtt.getPositionIncrement() == 1) - positionCount++; - else - severalTokensAtSamePosition = true; - } - source.end(); - source.close(); - } catch (IOException e) { - // ignore, should never happen for StringReaders - } - - if (list.size() == 0) - return null; - else if (list.size() == 1) { - source.restoreState(list.get(0)); - BytesRef term = new BytesRef(); - termAtt.toBytesRef(term); - return new TermQuery(new Term(field, term)); - } else { - if (severalTokensAtSamePosition || !quoted) { - if (positionCount == 1 || !quoted) { - // no phrase query: - BooleanQuery q = new BooleanQuery(positionCount == 1); - - BooleanClause.Occur occur = positionCount > 1 && operator == AND_OPERATOR ? - BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; - - for (int i = 0; i < list.size(); i++) { - BytesRef term = new BytesRef(); - source.restoreState(list.get(i)); - termAtt.toBytesRef(term); - TermQuery currentQuery = new TermQuery( - new Term(field, term)); - q.add(currentQuery, occur); - } - return q; - } - else { - // phrase query: - MultiPhraseQuery mpq = new MultiPhraseQuery(); - List multiTerms = new ArrayList(); - for (int i = 0; i < list.size(); i++) { - BytesRef term = new BytesRef(); - source.restoreState(list.get(i)); - if (posincrAtt.getPositionIncrement() == 1 && multiTerms.size() > 0) { - mpq.add(multiTerms.toArray(new Term[0])); - multiTerms.clear(); - } - termAtt.toBytesRef(term); - multiTerms.add(new Term(field, term)); - } - mpq.add(multiTerms.toArray(new Term[0])); - return mpq; - } - } - else { - PhraseQuery q = new PhraseQuery(); - q.setSlop(phraseSlop); - for (int i = 0; i < list.size(); i++) { - BytesRef term = new BytesRef(); - source.restoreState(list.get(i)); - termAtt.toBytesRef(term); - q.add(new Term(field, term)); - } - return q; - } - } - } - - /** - * Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}. - * This method may be overridden, for example, to return - * a SpanNearQuery instead of a PhraseQuery. - * - * @exception ParseException throw in overridden method to disallow - */ - protected Query getFieldQuery(String field, String queryText, int slop) - throws ParseException { - Query query = getFieldQuery(field, queryText, true); - - if (query instanceof PhraseQuery) { - ((PhraseQuery) query).setSlop(slop); - } - if (query instanceof MultiPhraseQuery) { - ((MultiPhraseQuery) query).setSlop(slop); - } - - return query; - } - - /** - * @exception ParseException throw in overridden method to disallow - */ - protected Query getRangeQuery(String field, - String part1, - String part2, - boolean inclusive) throws ParseException - { - if (lowercaseExpandedTerms) { - part1 = part1.toLowerCase(); - part2 = part2.toLowerCase(); - } - try { - DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale); - df.setLenient(true); - Date d1 = df.parse(part1); - Date d2 = df.parse(part2); - part1 = DateTools.dateToString(d1, DateTools.Resolution.DAY); - part2 = DateTools.dateToString(d2, DateTools.Resolution.DAY); - } - catch (Exception e) { } - - final TermRangeQuery query = new TermRangeQuery(field, part1, part2, inclusive, inclusive); - query.setRewriteMethod(multiTermRewriteMethod); - return query; - } - - /** - * Factory method for generating query, given a set of clauses. - * By default creates a boolean query composed of clauses passed in. - * - * Can be overridden by extending classes, to modify query being - * returned. - * - * @param clauses List that contains {@link BooleanClause} instances - * to join. - * - * @return Resulting {@link Query} object. - * @exception ParseException throw in overridden method to disallow - */ - protected Query getBooleanQuery(List clauses) throws ParseException - { - return getBooleanQuery(clauses, false); - } - - /** - * Factory method for generating query, given a set of clauses. - * By default creates a boolean query composed of clauses passed in. - * - * Can be overridden by extending classes, to modify query being - * returned. - * - * @param clauses List that contains {@link BooleanClause} instances - * to join. - * @param disableCoord true if coord scoring should be disabled. - * - * @return Resulting {@link Query} object. - * @exception ParseException throw in overridden method to disallow - */ - protected Query getBooleanQuery(List clauses, boolean disableCoord) - throws ParseException { - if (clauses == null || clauses.size() == 0) - return null; - - BooleanQuery query = new BooleanQuery(disableCoord); - for (int i = 0; i < clauses.size(); i++) { - query.add(clauses.get(i)); - } - return query; - } - - /** - * Factory method for generating a query. Called when parser - * parses an input term token that contains one or more wildcard - * characters (? and *), but is not a prefix term token (one - * that has just a single * character at the end) - *

- * Depending on settings, prefix term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with wildcard templates. - *

- * Can be overridden by extending classes, to provide custom handling for - * wildcard queries, which may be necessary due to missing analyzer calls. - * - * @param field Name of the field query will use. - * @param termStr Term token that contains one or more wild card - * characters (? or *), but is not simple prefix term - * - * @return Resulting {@link Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getWildcardQuery(String field, String termStr) throws ParseException - { - if (lowercaseExpandedTerms) { - termStr = termStr.toLowerCase(); - } - Term t = new Term(field, termStr); - final WildcardQuery query = new WildcardQuery(t); - query.setRewriteMethod(multiTermRewriteMethod); - return query; - } - - /** - * Factory method for generating a query (similar to - * {@link #getWildcardQuery}). Called when parser parses an input term - * token that uses prefix notation; that is, contains a single '*' wildcard - * character as its last character. Since this is a special case - * of generic wildcard term, and such a query can be optimized easily, - * this usually results in a different query object. - *

- * Depending on settings, a prefix term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with wildcard templates. - *

- * Can be overridden by extending classes, to provide custom handling for - * wild card queries, which may be necessary due to missing analyzer calls. - * - * @param field Name of the field query will use. - * @param termStr Term token to use for building term for the query - * (without trailing '*' character!) - * - * @return Resulting {@link Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getPrefixQuery(String field, String termStr) throws ParseException - { - if (lowercaseExpandedTerms) { - termStr = termStr.toLowerCase(); - } - Term t = new Term(field, termStr); - final PrefixQuery query = new PrefixQuery(t); - query.setRewriteMethod(multiTermRewriteMethod); - return query; - } - - /** - * Factory method for generating a query. Called when parser - * parses an input term token that contains a regular expression - * query. - *

- * Depending on settings, pattern term may be lower-cased - * automatically. It will not go through the default Analyzer, - * however, since normal Analyzers are unlikely to work properly - * with regular expression templates. - *

- * Can be overridden by extending classes, to provide custom handling for - * regular expression queries, which may be necessary due to missing analyzer - * calls. - * - * @param field Name of the field query will use. - * @param termStr Term token that contains a regular expression - * - * @return Resulting {@link Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getRegexpQuery(String field, String termStr) throws ParseException - { - if (lowercaseExpandedTerms) { - termStr = termStr.toLowerCase(); - } - final Term regexp = new Term(field, termStr); - final RegexpQuery query = new RegexpQuery(regexp); - query.setRewriteMethod(multiTermRewriteMethod); - return query; - } - - /** - * Factory method for generating a query (similar to - * {@link #getWildcardQuery}). Called when parser parses - * an input term token that has the fuzzy suffix (~) appended. - * - * @param field Name of the field query will use. - * @param termStr Term token to use for building term for the query - * - * @return Resulting {@link Query} built for the term - * @exception ParseException throw in overridden method to disallow - */ - protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException - { - if (lowercaseExpandedTerms) { - termStr = termStr.toLowerCase(); - } - Term t = new Term(field, termStr); - return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength); - } - - /** - * Returns a String where the escape char has been - * removed, or kept only once if there was a double escape. - */ - private String discardEscapeChar(String input) { - char[] caSource = input.toCharArray(); - char[] caDest = new char[caSource.length]; - int j = 0; - for (int i = 0; i < caSource.length; i++) { - if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) { - caDest[j++]=caSource[i]; - } - } - return new String(caDest, 0, j); - } - - /** - * Returns a String where those characters that QueryParser - * expects to be escaped are escaped by a preceding \. - */ - public static String escape(String s) { - StringBuffer sb = new StringBuffer(); - for (int i = 0; i < s.length(); i++) { - char c = s.charAt(i); - // NOTE: keep this in sync with _ESCAPED_CHAR below! - if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' - || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' - || c == '*' || c == '?') { - sb.append('\\'); - } - sb.append(c); - } - return sb.toString(); - } - - /** - * Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}. - * Usage:
- * java org.apache.lucene.queryParser.QueryParser <input> - */ -// public static void main(String[] args) throws Exception { -// if (args.length == 0) { -// System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser "); -// System.exit(0); -// } -// PrecedenceQueryParser qp = new PrecedenceQueryParser("field", -// new org.apache.lucene.analysis.SimpleAnalyzer()); -// Query q = qp.parse(args[0]); -// System.out.println(q.toString("field")); -// } -} - -PARSER_END(PrecedenceQueryParser) - -/* ***************** */ -/* Token Definitions */ -/* ***************** */ - -<*> TOKEN : { - <#_NUM_CHAR: ["0"-"9"] > -// NOTE: keep this in sync with escape(String) above! -| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^", - "[", "]", "\"", "{", "}", "~", "*", "?" ] > -| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^", - "[", "]", "\"", "{", "}", "~", "*", "?" ] - | <_ESCAPED_CHAR> ) > -| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) > -| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") > -} - - SKIP : { - < <_WHITESPACE>> -} - -// OG: to support prefix queries: -// http://nagoya.apache.org/bugzilla/show_bug.cgi?id=12137 -// Change from: -// | -// (<_TERM_CHAR> | ( [ "*", "?" ] ))* > -// To: -// -// | | ( [ "*", "?" ] ))* > - - TOKEN : { - -| -| -| -| -| -| -| -| : Boost -| -| (<_TERM_CHAR>)* > -| )+ ( "." (<_NUM_CHAR>)+ )? )? > -| (<_TERM_CHAR>)* "*" > -| - (<_TERM_CHAR> | ( [ "*", "?" ] ))* > -| -| : RangeIn -| : RangeEx -} - - TOKEN : { -)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT -} - - TOKEN : { - -| : DEFAULT -| -| -} - - TOKEN : { - -| : DEFAULT -| -| -} - -// * Query ::= ( Clause )* -// * Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) - -int Conjunction() : { - int ret = CONJ_NONE; -} -{ - [ - { ret = CONJ_AND; } - | { ret = CONJ_OR; } - ] - { return ret; } -} - -int Modifier() : { - int ret = MOD_NONE; -} -{ - [ - { ret = MOD_REQ; } - | { ret = MOD_NOT; } - | { ret = MOD_NOT; } - ] - { return ret; } -} - -Query Query(String field) : -{ - List clauses = new ArrayList(); - Query q, firstQuery=null; - boolean orPresent = false; - int modifier; -} -{ - modifier=Modifier() q=andExpression(field) - { - addClause(clauses, CONJ_NONE, modifier, q); - if (modifier == MOD_NONE) - firstQuery = q; - } - ( - [ { orPresent=true; }] modifier=Modifier() q=andExpression(field) - { addClause(clauses, orPresent ? CONJ_OR : CONJ_NONE, modifier, q); } - )* - { - if (clauses.size() == 1 && firstQuery != null) - return firstQuery; - else { - return getBooleanQuery(clauses); - } - } -} - -Query andExpression(String field) : -{ - List clauses = new ArrayList(); - Query q, firstQuery=null; - int modifier; -} -{ - q=Clause(field) - { - addClause(clauses, CONJ_NONE, MOD_NONE, q); - firstQuery = q; - } - ( - modifier=Modifier() q=Clause(field) - { addClause(clauses, CONJ_AND, modifier, q); } - )* - { - if (clauses.size() == 1 && firstQuery != null) - return firstQuery; - else { - return getBooleanQuery(clauses); - } - } -} - -Query Clause(String field) : { - Query q; - Token fieldToken=null, boost=null; -} -{ - [ - LOOKAHEAD(2) - fieldToken= { - field=discardEscapeChar(fieldToken.image); - } - ] - - ( - q=Term(field) - | q=Query(field) ( boost=)? - - ) - { - if (boost != null) { - float f = (float)1.0; - try { - f = Float.valueOf(boost.image).floatValue(); - q.setBoost(f); - } catch (Exception ignored) { } - } - return q; - } -} - - -Query Term(String field) : { - Token term, boost=null, fuzzySlop=null, goop1, goop2; - boolean prefix = false; - boolean wildcard = false; - boolean fuzzy = false; - boolean regexp = false; - - Query q; -} -{ - ( - ( - term= - | term= { prefix=true; } - | term= { wildcard=true; } - | term= { regexp=true; } - | term= - ) - [ fuzzySlop= { fuzzy=true; } ] - [ boost= [ fuzzySlop= { fuzzy=true; } ] ] - { - String termImage=discardEscapeChar(term.image); - if (wildcard) { - q = getWildcardQuery(field, termImage); - } else if (prefix) { - q = getPrefixQuery(field, - discardEscapeChar(term.image.substring - (0, term.image.length()-1))); - } else if (regexp) { - q = getRegexpQuery(field, term.image.substring(1, term.image.length()-1)); - } else if (fuzzy) { - float fms = fuzzyMinSim; - try { - fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue(); - } catch (Exception ignored) { } - if(fms < 0.0f){ - throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !"); - } else if (fms >= 1.0f && fms != (int) fms) { - throw new ParseException("Fractional edit distances are not allowed!"); - } - q = getFuzzyQuery(field, termImage, fms); - } else { - q = getFieldQuery(field, termImage, false); - } - } - | ( ( goop1=|goop1= ) - [ ] ( goop2=|goop2= ) - ) - [ boost= ] - { - if (goop1.kind == RANGEIN_QUOTED) { - goop1.image = goop1.image.substring(1, goop1.image.length()-1); - } else { - goop1.image = discardEscapeChar(goop1.image); - } - if (goop2.kind == RANGEIN_QUOTED) { - goop2.image = goop2.image.substring(1, goop2.image.length()-1); - } else { - goop2.image = discardEscapeChar(goop2.image); - } - q = getRangeQuery(field, goop1.image, goop2.image, true); - } - | ( ( goop1=|goop1= ) - [ ] ( goop2=|goop2= ) - ) - [ boost= ] - { - if (goop1.kind == RANGEEX_QUOTED) { - goop1.image = goop1.image.substring(1, goop1.image.length()-1); - } else { - goop1.image = discardEscapeChar(goop1.image); - } - if (goop2.kind == RANGEEX_QUOTED) { - goop2.image = goop2.image.substring(1, goop2.image.length()-1); - } else { - goop2.image = discardEscapeChar(goop2.image); - } - - q = getRangeQuery(field, goop1.image, goop2.image, false); - } - | term= - [ fuzzySlop= ] - [ boost= ] - { - int s = phraseSlop; - - if (fuzzySlop != null) { - try { - s = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); - } - catch (Exception ignored) { } - } - q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s); - } - ) - { - if (boost != null) { - float f = (float) 1.0; - try { - f = Float.valueOf(boost.image).floatValue(); - } - catch (Exception ignored) { - /* Should this be handled somehow? (defaults to "no boost", if - * boost number is invalid) - */ - } - - // avoid boosting null queries, such as those caused by stop words - if (q != null) { - q.setBoost(f); - } - } - return q; - } -} diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserConstants.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserConstants.java deleted file mode 100644 index be8a0ffda92..00000000000 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserConstants.java +++ /dev/null @@ -1,122 +0,0 @@ -/* Generated By:JavaCC: Do not edit this line. PrecedenceQueryParserConstants.java */ -package org.apache.lucene.queryParser.precedence; - - -/** - * Token literal values and constants. - * Generated by org.javacc.parser.OtherFilesGen#start() - */ -public interface PrecedenceQueryParserConstants { - - /** End of File. */ - int EOF = 0; - /** RegularExpression Id. */ - int _NUM_CHAR = 1; - /** RegularExpression Id. */ - int _ESCAPED_CHAR = 2; - /** RegularExpression Id. */ - int _TERM_START_CHAR = 3; - /** RegularExpression Id. */ - int _TERM_CHAR = 4; - /** RegularExpression Id. */ - int _WHITESPACE = 5; - /** RegularExpression Id. */ - int AND = 7; - /** RegularExpression Id. */ - int OR = 8; - /** RegularExpression Id. */ - int NOT = 9; - /** RegularExpression Id. */ - int PLUS = 10; - /** RegularExpression Id. */ - int MINUS = 11; - /** RegularExpression Id. */ - int LPAREN = 12; - /** RegularExpression Id. */ - int RPAREN = 13; - /** RegularExpression Id. */ - int COLON = 14; - /** RegularExpression Id. */ - int CARAT = 15; - /** RegularExpression Id. */ - int QUOTED = 16; - /** RegularExpression Id. */ - int TERM = 17; - /** RegularExpression Id. */ - int FUZZY_SLOP = 18; - /** RegularExpression Id. */ - int PREFIXTERM = 19; - /** RegularExpression Id. */ - int WILDTERM = 20; - /** RegularExpression Id. */ - int REGEXPTERM = 21; - /** RegularExpression Id. */ - int RANGEIN_START = 22; - /** RegularExpression Id. */ - int RANGEEX_START = 23; - /** RegularExpression Id. */ - int NUMBER = 24; - /** RegularExpression Id. */ - int RANGEIN_TO = 25; - /** RegularExpression Id. */ - int RANGEIN_END = 26; - /** RegularExpression Id. */ - int RANGEIN_QUOTED = 27; - /** RegularExpression Id. */ - int RANGEIN_GOOP = 28; - /** RegularExpression Id. */ - int RANGEEX_TO = 29; - /** RegularExpression Id. */ - int RANGEEX_END = 30; - /** RegularExpression Id. */ - int RANGEEX_QUOTED = 31; - /** RegularExpression Id. */ - int RANGEEX_GOOP = 32; - - /** Lexical state. */ - int Boost = 0; - /** Lexical state. */ - int RangeEx = 1; - /** Lexical state. */ - int RangeIn = 2; - /** Lexical state. */ - int DEFAULT = 3; - - /** Literal token values. */ - String[] tokenImage = { - "", - "<_NUM_CHAR>", - "<_ESCAPED_CHAR>", - "<_TERM_START_CHAR>", - "<_TERM_CHAR>", - "<_WHITESPACE>", - "", - "", - "", - "", - "\"+\"", - "\"-\"", - "\"(\"", - "\")\"", - "\":\"", - "\"^\"", - "", - "", - "", - "", - "", - "", - "\"[\"", - "\"{\"", - "", - "\"TO\"", - "\"]\"", - "", - "", - "\"TO\"", - "\"}\"", - "", - "", - }; - -} diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java deleted file mode 100644 index 4f3ff70994a..00000000000 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java +++ /dev/null @@ -1,1110 +0,0 @@ -/* Generated By:JavaCC: Do not edit this line. PrecedenceQueryParserTokenManager.java */ -package org.apache.lucene.queryParser.precedence; -import java.io.IOException; -import java.io.StringReader; -import java.text.DateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.List; -import java.util.Locale; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.*; -import org.apache.lucene.document.DateTools; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.FuzzyQuery; -import org.apache.lucene.search.MultiTermQuery; -import org.apache.lucene.search.MultiPhraseQuery; -import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.PrefixQuery; -import org.apache.lucene.search.RegexpQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.WildcardQuery; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.AttributeSource; - -/** Token Manager. */ -public class PrecedenceQueryParserTokenManager implements PrecedenceQueryParserConstants -{ - - /** Debug output. */ - public java.io.PrintStream debugStream = System.out; - /** Set debug output. */ - public void setDebugStream(java.io.PrintStream ds) { debugStream = ds; } -private final int jjStopStringLiteralDfa_3(int pos, long active0) -{ - switch (pos) - { - default : - return -1; - } -} -private final int jjStartNfa_3(int pos, long active0) -{ - return jjMoveNfa_3(jjStopStringLiteralDfa_3(pos, active0), pos + 1); -} -private int jjStopAtPos(int pos, int kind) -{ - jjmatchedKind = kind; - jjmatchedPos = pos; - return pos + 1; -} -private int jjMoveStringLiteralDfa0_3() -{ - switch(curChar) - { - case 40: - return jjStopAtPos(0, 12); - case 41: - return jjStopAtPos(0, 13); - case 43: - return jjStopAtPos(0, 10); - case 45: - return jjStopAtPos(0, 11); - case 58: - return jjStopAtPos(0, 14); - case 91: - return jjStopAtPos(0, 22); - case 94: - return jjStopAtPos(0, 15); - case 123: - return jjStopAtPos(0, 23); - default : - return jjMoveNfa_3(0, 0); - } -} -static final long[] jjbitVec0 = { - 0xfffffffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL -}; -static final long[] jjbitVec2 = { - 0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL -}; -private int jjMoveNfa_3(int startState, int curPos) -{ - int startsAt = 0; - jjnewStateCnt = 38; - int i = 1; - jjstateSet[0] = startState; - int kind = 0x7fffffff; - for (;;) - { - if (++jjround == 0x7fffffff) - ReInitRounds(); - if (curChar < 64) - { - long l = 1L << curChar; - do - { - switch(jjstateSet[--i]) - { - case 0: - if ((0x7bffd0f8ffffd9ffL & l) != 0L) - { - if (kind > 17) - kind = 17; - jjCheckNAddStates(0, 6); - } - else if ((0x100002600L & l) != 0L) - { - if (kind > 6) - kind = 6; - } - else if (curChar == 34) - jjCheckNAdd(15); - else if (curChar == 33) - { - if (kind > 9) - kind = 9; - } - if (curChar == 47) - jjCheckNAddStates(7, 9); - else if (curChar == 38) - jjstateSet[jjnewStateCnt++] = 4; - break; - case 4: - if (curChar == 38 && kind > 7) - kind = 7; - break; - case 5: - if (curChar == 38) - jjstateSet[jjnewStateCnt++] = 4; - break; - case 13: - if (curChar == 33 && kind > 9) - kind = 9; - break; - case 14: - if (curChar == 34) - jjCheckNAdd(15); - break; - case 15: - if ((0xfffffffbffffffffL & l) != 0L) - jjCheckNAddTwoStates(15, 16); - break; - case 16: - if (curChar == 34 && kind > 16) - kind = 16; - break; - case 18: - if ((0x3ff000000000000L & l) == 0L) - break; - if (kind > 18) - kind = 18; - jjAddStates(10, 11); - break; - case 19: - if (curChar == 46) - jjCheckNAdd(20); - break; - case 20: - if ((0x3ff000000000000L & l) == 0L) - break; - if (kind > 18) - kind = 18; - jjCheckNAdd(20); - break; - case 21: - case 23: - if (curChar == 47) - jjCheckNAddStates(7, 9); - break; - case 22: - if ((0xffff7fffffffffffL & l) != 0L) - jjCheckNAddStates(7, 9); - break; - case 25: - if (curChar == 47 && kind > 21) - kind = 21; - break; - case 26: - if ((0x7bffd0f8ffffd9ffL & l) == 0L) - break; - if (kind > 17) - kind = 17; - jjCheckNAddStates(0, 6); - break; - case 27: - if ((0x7bfff8f8ffffd9ffL & l) == 0L) - break; - if (kind > 17) - kind = 17; - jjCheckNAddTwoStates(27, 28); - break; - case 29: - if ((0x84002f0600000000L & l) == 0L) - break; - if (kind > 17) - kind = 17; - jjCheckNAddTwoStates(27, 28); - break; - case 30: - if ((0x7bfff8f8ffffd9ffL & l) != 0L) - jjCheckNAddStates(12, 14); - break; - case 31: - if (curChar == 42 && kind > 19) - kind = 19; - break; - case 33: - if ((0x84002f0600000000L & l) != 0L) - jjCheckNAddStates(12, 14); - break; - case 34: - if ((0xfbfffcf8ffffd9ffL & l) == 0L) - break; - if (kind > 20) - kind = 20; - jjCheckNAddTwoStates(34, 35); - break; - case 36: - if ((0x84002f0600000000L & l) == 0L) - break; - if (kind > 20) - kind = 20; - jjCheckNAddTwoStates(34, 35); - break; - default : break; - } - } while(i != startsAt); - } - else if (curChar < 128) - { - long l = 1L << (curChar & 077); - do - { - switch(jjstateSet[--i]) - { - case 0: - if ((0x97ffffff97ffffffL & l) != 0L) - { - if (kind > 17) - kind = 17; - jjCheckNAddStates(0, 6); - } - else if (curChar == 126) - { - if (kind > 18) - kind = 18; - jjstateSet[jjnewStateCnt++] = 18; - } - if (curChar == 92) - jjCheckNAddStates(15, 17); - else if (curChar == 78) - jjstateSet[jjnewStateCnt++] = 11; - else if (curChar == 124) - jjstateSet[jjnewStateCnt++] = 8; - else if (curChar == 79) - jjstateSet[jjnewStateCnt++] = 6; - else if (curChar == 65) - jjstateSet[jjnewStateCnt++] = 2; - break; - case 1: - if (curChar == 68 && kind > 7) - kind = 7; - break; - case 2: - if (curChar == 78) - jjstateSet[jjnewStateCnt++] = 1; - break; - case 3: - if (curChar == 65) - jjstateSet[jjnewStateCnt++] = 2; - break; - case 6: - if (curChar == 82 && kind > 8) - kind = 8; - break; - case 7: - if (curChar == 79) - jjstateSet[jjnewStateCnt++] = 6; - break; - case 8: - if (curChar == 124 && kind > 8) - kind = 8; - break; - case 9: - if (curChar == 124) - jjstateSet[jjnewStateCnt++] = 8; - break; - case 10: - if (curChar == 84 && kind > 9) - kind = 9; - break; - case 11: - if (curChar == 79) - jjstateSet[jjnewStateCnt++] = 10; - break; - case 12: - if (curChar == 78) - jjstateSet[jjnewStateCnt++] = 11; - break; - case 15: - jjAddStates(18, 19); - break; - case 17: - if (curChar != 126) - break; - if (kind > 18) - kind = 18; - jjstateSet[jjnewStateCnt++] = 18; - break; - case 22: - jjAddStates(7, 9); - break; - case 24: - if (curChar == 92) - jjstateSet[jjnewStateCnt++] = 23; - break; - case 26: - if ((0x97ffffff97ffffffL & l) == 0L) - break; - if (kind > 17) - kind = 17; - jjCheckNAddStates(0, 6); - break; - case 27: - if ((0x97ffffff97ffffffL & l) == 0L) - break; - if (kind > 17) - kind = 17; - jjCheckNAddTwoStates(27, 28); - break; - case 28: - if (curChar == 92) - jjCheckNAddTwoStates(29, 29); - break; - case 29: - if ((0x6800000078000000L & l) == 0L) - break; - if (kind > 17) - kind = 17; - jjCheckNAddTwoStates(27, 28); - break; - case 30: - if ((0x97ffffff97ffffffL & l) != 0L) - jjCheckNAddStates(12, 14); - break; - case 32: - if (curChar == 92) - jjCheckNAddTwoStates(33, 33); - break; - case 33: - if ((0x6800000078000000L & l) != 0L) - jjCheckNAddStates(12, 14); - break; - case 34: - if ((0x97ffffff97ffffffL & l) == 0L) - break; - if (kind > 20) - kind = 20; - jjCheckNAddTwoStates(34, 35); - break; - case 35: - if (curChar == 92) - jjCheckNAddTwoStates(36, 36); - break; - case 36: - if ((0x6800000078000000L & l) == 0L) - break; - if (kind > 20) - kind = 20; - jjCheckNAddTwoStates(34, 35); - break; - case 37: - if (curChar == 92) - jjCheckNAddStates(15, 17); - break; - default : break; - } - } while(i != startsAt); - } - else - { - int hiByte = (int)(curChar >> 8); - int i1 = hiByte >> 6; - long l1 = 1L << (hiByte & 077); - int i2 = (curChar & 0xff) >> 6; - long l2 = 1L << (curChar & 077); - do - { - switch(jjstateSet[--i]) - { - case 0: - if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) - break; - if (kind > 17) - kind = 17; - jjCheckNAddStates(0, 6); - break; - case 15: - if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjAddStates(18, 19); - break; - case 22: - if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjAddStates(7, 9); - break; - case 27: - if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) - break; - if (kind > 17) - kind = 17; - jjCheckNAddTwoStates(27, 28); - break; - case 30: - if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(12, 14); - break; - case 34: - if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) - break; - if (kind > 20) - kind = 20; - jjCheckNAddTwoStates(34, 35); - break; - default : break; - } - } while(i != startsAt); - } - if (kind != 0x7fffffff) - { - jjmatchedKind = kind; - jjmatchedPos = curPos; - kind = 0x7fffffff; - } - ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 38 - (jjnewStateCnt = startsAt))) - return curPos; - try { curChar = input_stream.readChar(); } - catch(java.io.IOException e) { return curPos; } - } -} -private final int jjStopStringLiteralDfa_1(int pos, long active0) -{ - switch (pos) - { - case 0: - if ((active0 & 0x20000000L) != 0L) - { - jjmatchedKind = 32; - return 4; - } - return -1; - default : - return -1; - } -} -private final int jjStartNfa_1(int pos, long active0) -{ - return jjMoveNfa_1(jjStopStringLiteralDfa_1(pos, active0), pos + 1); -} -private int jjMoveStringLiteralDfa0_1() -{ - switch(curChar) - { - case 84: - return jjMoveStringLiteralDfa1_1(0x20000000L); - case 125: - return jjStopAtPos(0, 30); - default : - return jjMoveNfa_1(0, 0); - } -} -private int jjMoveStringLiteralDfa1_1(long active0) -{ - try { curChar = input_stream.readChar(); } - catch(java.io.IOException e) { - jjStopStringLiteralDfa_1(0, active0); - return 1; - } - switch(curChar) - { - case 79: - if ((active0 & 0x20000000L) != 0L) - return jjStartNfaWithStates_1(1, 29, 4); - break; - default : - break; - } - return jjStartNfa_1(0, active0); -} -private int jjStartNfaWithStates_1(int pos, int kind, int state) -{ - jjmatchedKind = kind; - jjmatchedPos = pos; - try { curChar = input_stream.readChar(); } - catch(java.io.IOException e) { return pos + 1; } - return jjMoveNfa_1(state, pos + 1); -} -private int jjMoveNfa_1(int startState, int curPos) -{ - int startsAt = 0; - jjnewStateCnt = 5; - int i = 1; - jjstateSet[0] = startState; - int kind = 0x7fffffff; - for (;;) - { - if (++jjround == 0x7fffffff) - ReInitRounds(); - if (curChar < 64) - { - long l = 1L << curChar; - do - { - switch(jjstateSet[--i]) - { - case 0: - if ((0xfffffffeffffffffL & l) != 0L) - { - if (kind > 32) - kind = 32; - jjCheckNAdd(4); - } - if ((0x100002600L & l) != 0L) - { - if (kind > 6) - kind = 6; - } - else if (curChar == 34) - jjCheckNAdd(2); - break; - case 1: - if (curChar == 34) - jjCheckNAdd(2); - break; - case 2: - if ((0xfffffffbffffffffL & l) != 0L) - jjCheckNAddTwoStates(2, 3); - break; - case 3: - if (curChar == 34 && kind > 31) - kind = 31; - break; - case 4: - if ((0xfffffffeffffffffL & l) == 0L) - break; - if (kind > 32) - kind = 32; - jjCheckNAdd(4); - break; - default : break; - } - } while(i != startsAt); - } - else if (curChar < 128) - { - long l = 1L << (curChar & 077); - do - { - switch(jjstateSet[--i]) - { - case 0: - case 4: - if ((0xdfffffffffffffffL & l) == 0L) - break; - if (kind > 32) - kind = 32; - jjCheckNAdd(4); - break; - case 2: - jjAddStates(20, 21); - break; - default : break; - } - } while(i != startsAt); - } - else - { - int hiByte = (int)(curChar >> 8); - int i1 = hiByte >> 6; - long l1 = 1L << (hiByte & 077); - int i2 = (curChar & 0xff) >> 6; - long l2 = 1L << (curChar & 077); - do - { - switch(jjstateSet[--i]) - { - case 0: - case 4: - if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) - break; - if (kind > 32) - kind = 32; - jjCheckNAdd(4); - break; - case 2: - if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjAddStates(20, 21); - break; - default : break; - } - } while(i != startsAt); - } - if (kind != 0x7fffffff) - { - jjmatchedKind = kind; - jjmatchedPos = curPos; - kind = 0x7fffffff; - } - ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 5 - (jjnewStateCnt = startsAt))) - return curPos; - try { curChar = input_stream.readChar(); } - catch(java.io.IOException e) { return curPos; } - } -} -private int jjMoveStringLiteralDfa0_0() -{ - return jjMoveNfa_0(0, 0); -} -private int jjMoveNfa_0(int startState, int curPos) -{ - int startsAt = 0; - jjnewStateCnt = 3; - int i = 1; - jjstateSet[0] = startState; - int kind = 0x7fffffff; - for (;;) - { - if (++jjround == 0x7fffffff) - ReInitRounds(); - if (curChar < 64) - { - long l = 1L << curChar; - do - { - switch(jjstateSet[--i]) - { - case 0: - if ((0x3ff000000000000L & l) == 0L) - break; - if (kind > 24) - kind = 24; - jjAddStates(22, 23); - break; - case 1: - if (curChar == 46) - jjCheckNAdd(2); - break; - case 2: - if ((0x3ff000000000000L & l) == 0L) - break; - if (kind > 24) - kind = 24; - jjCheckNAdd(2); - break; - default : break; - } - } while(i != startsAt); - } - else if (curChar < 128) - { - long l = 1L << (curChar & 077); - do - { - switch(jjstateSet[--i]) - { - default : break; - } - } while(i != startsAt); - } - else - { - int hiByte = (int)(curChar >> 8); - int i1 = hiByte >> 6; - long l1 = 1L << (hiByte & 077); - int i2 = (curChar & 0xff) >> 6; - long l2 = 1L << (curChar & 077); - do - { - switch(jjstateSet[--i]) - { - default : break; - } - } while(i != startsAt); - } - if (kind != 0x7fffffff) - { - jjmatchedKind = kind; - jjmatchedPos = curPos; - kind = 0x7fffffff; - } - ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 3 - (jjnewStateCnt = startsAt))) - return curPos; - try { curChar = input_stream.readChar(); } - catch(java.io.IOException e) { return curPos; } - } -} -private final int jjStopStringLiteralDfa_2(int pos, long active0) -{ - switch (pos) - { - case 0: - if ((active0 & 0x2000000L) != 0L) - { - jjmatchedKind = 28; - return 4; - } - return -1; - default : - return -1; - } -} -private final int jjStartNfa_2(int pos, long active0) -{ - return jjMoveNfa_2(jjStopStringLiteralDfa_2(pos, active0), pos + 1); -} -private int jjMoveStringLiteralDfa0_2() -{ - switch(curChar) - { - case 84: - return jjMoveStringLiteralDfa1_2(0x2000000L); - case 93: - return jjStopAtPos(0, 26); - default : - return jjMoveNfa_2(0, 0); - } -} -private int jjMoveStringLiteralDfa1_2(long active0) -{ - try { curChar = input_stream.readChar(); } - catch(java.io.IOException e) { - jjStopStringLiteralDfa_2(0, active0); - return 1; - } - switch(curChar) - { - case 79: - if ((active0 & 0x2000000L) != 0L) - return jjStartNfaWithStates_2(1, 25, 4); - break; - default : - break; - } - return jjStartNfa_2(0, active0); -} -private int jjStartNfaWithStates_2(int pos, int kind, int state) -{ - jjmatchedKind = kind; - jjmatchedPos = pos; - try { curChar = input_stream.readChar(); } - catch(java.io.IOException e) { return pos + 1; } - return jjMoveNfa_2(state, pos + 1); -} -private int jjMoveNfa_2(int startState, int curPos) -{ - int startsAt = 0; - jjnewStateCnt = 5; - int i = 1; - jjstateSet[0] = startState; - int kind = 0x7fffffff; - for (;;) - { - if (++jjround == 0x7fffffff) - ReInitRounds(); - if (curChar < 64) - { - long l = 1L << curChar; - do - { - switch(jjstateSet[--i]) - { - case 0: - if ((0xfffffffeffffffffL & l) != 0L) - { - if (kind > 28) - kind = 28; - jjCheckNAdd(4); - } - if ((0x100002600L & l) != 0L) - { - if (kind > 6) - kind = 6; - } - else if (curChar == 34) - jjCheckNAdd(2); - break; - case 1: - if (curChar == 34) - jjCheckNAdd(2); - break; - case 2: - if ((0xfffffffbffffffffL & l) != 0L) - jjCheckNAddTwoStates(2, 3); - break; - case 3: - if (curChar == 34 && kind > 27) - kind = 27; - break; - case 4: - if ((0xfffffffeffffffffL & l) == 0L) - break; - if (kind > 28) - kind = 28; - jjCheckNAdd(4); - break; - default : break; - } - } while(i != startsAt); - } - else if (curChar < 128) - { - long l = 1L << (curChar & 077); - do - { - switch(jjstateSet[--i]) - { - case 0: - case 4: - if ((0xffffffffdfffffffL & l) == 0L) - break; - if (kind > 28) - kind = 28; - jjCheckNAdd(4); - break; - case 2: - jjAddStates(20, 21); - break; - default : break; - } - } while(i != startsAt); - } - else - { - int hiByte = (int)(curChar >> 8); - int i1 = hiByte >> 6; - long l1 = 1L << (hiByte & 077); - int i2 = (curChar & 0xff) >> 6; - long l2 = 1L << (curChar & 077); - do - { - switch(jjstateSet[--i]) - { - case 0: - case 4: - if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) - break; - if (kind > 28) - kind = 28; - jjCheckNAdd(4); - break; - case 2: - if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - jjAddStates(20, 21); - break; - default : break; - } - } while(i != startsAt); - } - if (kind != 0x7fffffff) - { - jjmatchedKind = kind; - jjmatchedPos = curPos; - kind = 0x7fffffff; - } - ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 5 - (jjnewStateCnt = startsAt))) - return curPos; - try { curChar = input_stream.readChar(); } - catch(java.io.IOException e) { return curPos; } - } -} -static final int[] jjnextStates = { - 27, 30, 31, 34, 35, 32, 28, 22, 24, 25, 18, 19, 30, 31, 32, 29, - 33, 36, 15, 16, 2, 3, 0, 1, -}; -private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) -{ - switch(hiByte) - { - case 0: - return ((jjbitVec2[i2] & l2) != 0L); - default : - if ((jjbitVec0[i1] & l1) != 0L) - return true; - return false; - } -} - -/** Token literal values. */ -public static final String[] jjstrLiteralImages = { -"", null, null, null, null, null, null, null, null, null, "\53", "\55", "\50", -"\51", "\72", "\136", null, null, null, null, null, null, "\133", "\173", null, -"\124\117", "\135", null, null, "\124\117", "\175", null, null, }; - -/** Lexer state names. */ -public static final String[] lexStateNames = { - "Boost", - "RangeEx", - "RangeIn", - "DEFAULT", -}; - -/** Lex State array. */ -public static final int[] jjnewLexState = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, -1, 2, 1, 3, - -1, 3, -1, -1, -1, 3, -1, -1, -}; -static final long[] jjtoToken = { - 0x1ffffff81L, -}; -static final long[] jjtoSkip = { - 0x40L, -}; -protected CharStream input_stream; -private final int[] jjrounds = new int[38]; -private final int[] jjstateSet = new int[76]; -protected char curChar; -/** Constructor. */ -public PrecedenceQueryParserTokenManager(CharStream stream){ - input_stream = stream; -} - -/** Constructor. */ -public PrecedenceQueryParserTokenManager(CharStream stream, int lexState){ - this(stream); - SwitchTo(lexState); -} - -/** Reinitialise parser. */ -public void ReInit(CharStream stream) -{ - jjmatchedPos = jjnewStateCnt = 0; - curLexState = defaultLexState; - input_stream = stream; - ReInitRounds(); -} -private void ReInitRounds() -{ - int i; - jjround = 0x80000001; - for (i = 38; i-- > 0;) - jjrounds[i] = 0x80000000; -} - -/** Reinitialise parser. */ -public void ReInit(CharStream stream, int lexState) -{ - ReInit(stream); - SwitchTo(lexState); -} - -/** Switch to specified lex state. */ -public void SwitchTo(int lexState) -{ - if (lexState >= 4 || lexState < 0) - throw new TokenMgrError("Error: Ignoring invalid lexical state : " + lexState + ". State unchanged.", TokenMgrError.INVALID_LEXICAL_STATE); - else - curLexState = lexState; -} - -protected Token jjFillToken() -{ - final Token t; - final String curTokenImage; - final int beginLine; - final int endLine; - final int beginColumn; - final int endColumn; - String im = jjstrLiteralImages[jjmatchedKind]; - curTokenImage = (im == null) ? input_stream.GetImage() : im; - beginLine = input_stream.getBeginLine(); - beginColumn = input_stream.getBeginColumn(); - endLine = input_stream.getEndLine(); - endColumn = input_stream.getEndColumn(); - t = Token.newToken(jjmatchedKind, curTokenImage); - - t.beginLine = beginLine; - t.endLine = endLine; - t.beginColumn = beginColumn; - t.endColumn = endColumn; - - return t; -} - -int curLexState = 3; -int defaultLexState = 3; -int jjnewStateCnt; -int jjround; -int jjmatchedPos; -int jjmatchedKind; - -/** Get the next Token. */ -public Token getNextToken() -{ - Token matchedToken; - int curPos = 0; - - EOFLoop : - for (;;) - { - try - { - curChar = input_stream.BeginToken(); - } - catch(java.io.IOException e) - { - jjmatchedKind = 0; - matchedToken = jjFillToken(); - return matchedToken; - } - - switch(curLexState) - { - case 0: - jjmatchedKind = 0x7fffffff; - jjmatchedPos = 0; - curPos = jjMoveStringLiteralDfa0_0(); - break; - case 1: - jjmatchedKind = 0x7fffffff; - jjmatchedPos = 0; - curPos = jjMoveStringLiteralDfa0_1(); - break; - case 2: - jjmatchedKind = 0x7fffffff; - jjmatchedPos = 0; - curPos = jjMoveStringLiteralDfa0_2(); - break; - case 3: - jjmatchedKind = 0x7fffffff; - jjmatchedPos = 0; - curPos = jjMoveStringLiteralDfa0_3(); - break; - } - if (jjmatchedKind != 0x7fffffff) - { - if (jjmatchedPos + 1 < curPos) - input_stream.backup(curPos - jjmatchedPos - 1); - if ((jjtoToken[jjmatchedKind >> 6] & (1L << (jjmatchedKind & 077))) != 0L) - { - matchedToken = jjFillToken(); - if (jjnewLexState[jjmatchedKind] != -1) - curLexState = jjnewLexState[jjmatchedKind]; - return matchedToken; - } - else - { - if (jjnewLexState[jjmatchedKind] != -1) - curLexState = jjnewLexState[jjmatchedKind]; - continue EOFLoop; - } - } - int error_line = input_stream.getEndLine(); - int error_column = input_stream.getEndColumn(); - String error_after = null; - boolean EOFSeen = false; - try { input_stream.readChar(); input_stream.backup(1); } - catch (java.io.IOException e1) { - EOFSeen = true; - error_after = curPos <= 1 ? "" : input_stream.GetImage(); - if (curChar == '\n' || curChar == '\r') { - error_line++; - error_column = 0; - } - else - error_column++; - } - if (!EOFSeen) { - input_stream.backup(1); - error_after = curPos <= 1 ? "" : input_stream.GetImage(); - } - throw new TokenMgrError(EOFSeen, curLexState, error_line, error_column, error_after, curChar, TokenMgrError.LEXICAL_ERROR); - } -} - -private void jjCheckNAdd(int state) -{ - if (jjrounds[state] != jjround) - { - jjstateSet[jjnewStateCnt++] = state; - jjrounds[state] = jjround; - } -} -private void jjAddStates(int start, int end) -{ - do { - jjstateSet[jjnewStateCnt++] = jjnextStates[start]; - } while (start++ != end); -} -private void jjCheckNAddTwoStates(int state1, int state2) -{ - jjCheckNAdd(state1); - jjCheckNAdd(state2); -} - -private void jjCheckNAddStates(int start, int end) -{ - do { - jjCheckNAdd(jjnextStates[start]); - } while (start++ != end); -} - -} diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java deleted file mode 100644 index 8402b3d5017..00000000000 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/Token.java +++ /dev/null @@ -1,124 +0,0 @@ -/* Generated By:JavaCC: Do not edit this line. Token.java Version 4.1 */ -/* JavaCCOptions:TOKEN_EXTENDS=,KEEP_LINE_COL=null */ -package org.apache.lucene.queryParser.precedence; - -/** - * Describes the input token stream. - */ - -public class Token { - - /** - * An integer that describes the kind of this token. This numbering - * system is determined by JavaCCParser, and a table of these numbers is - * stored in the file ...Constants.java. - */ - public int kind; - - /** The line number of the first character of this Token. */ - public int beginLine; - /** The column number of the first character of this Token. */ - public int beginColumn; - /** The line number of the last character of this Token. */ - public int endLine; - /** The column number of the last character of this Token. */ - public int endColumn; - - /** - * The string image of the token. - */ - public String image; - - /** - * A reference to the next regular (non-special) token from the input - * stream. If this is the last token from the input stream, or if the - * token manager has not read tokens beyond this one, this field is - * set to null. This is true only if this token is also a regular - * token. Otherwise, see below for a description of the contents of - * this field. - */ - public Token next; - - /** - * This field is used to access special tokens that occur prior to this - * token, but after the immediately preceding regular (non-special) token. - * If there are no such special tokens, this field is set to null. - * When there are more than one such special token, this field refers - * to the last of these special tokens, which in turn refers to the next - * previous special token through its specialToken field, and so on - * until the first special token (whose specialToken field is null). - * The next fields of special tokens refer to other special tokens that - * immediately follow it (without an intervening regular token). If there - * is no such token, this field is null. - */ - public Token specialToken; - - /** - * An optional attribute value of the Token. - * Tokens which are not used as syntactic sugar will often contain - * meaningful values that will be used later on by the compiler or - * interpreter. This attribute value is often different from the image. - * Any subclass of Token that actually wants to return a non-null value can - * override this method as appropriate. - */ - public Object getValue() { - return null; - } - - /** - * No-argument constructor - */ - public Token() {} - - /** - * Constructs a new token for the specified Image. - */ - public Token(int kind) - { - this(kind, null); - } - - /** - * Constructs a new token for the specified Image and Kind. - */ - public Token(int kind, String image) - { - this.kind = kind; - this.image = image; - } - - /** - * Returns the image. - */ - public String toString() - { - return image; - } - - /** - * Returns a new Token object, by default. However, if you want, you - * can create and return subclass objects based on the value of ofKind. - * Simply add the cases to the switch for all those special cases. - * For example, if you have a subclass of Token called IDToken that - * you want to create if ofKind is ID, simply add something like : - * - * case MyParserConstants.ID : return new IDToken(ofKind, image); - * - * to the following switch statement. Then you can cast matchedToken - * variable to the appropriate type and use sit in your lexical actions. - */ - public static Token newToken(int ofKind, String image) - { - switch(ofKind) - { - default : return new Token(ofKind, image); - } - } - - public static Token newToken(int ofKind) - { - return newToken(ofKind, null); - } - -} -/* JavaCC - OriginalChecksum=0dc5808f2ab8aac8775ea9175fa2cb51 (do not edit this line) */ diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java deleted file mode 100644 index 01e87510c8f..00000000000 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java +++ /dev/null @@ -1,141 +0,0 @@ -/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 4.1 */ -/* JavaCCOptions: */ -package org.apache.lucene.queryParser.precedence; - -/** Token Manager Error. */ -@SuppressWarnings("serial") -public class TokenMgrError extends Error -{ - - /* - * Ordinals for various reasons why an Error of this type can be thrown. - */ - - /** - * Lexical error occurred. - */ - static final int LEXICAL_ERROR = 0; - - /** - * An attempt was made to create a second instance of a static token manager. - */ - static final int STATIC_LEXER_ERROR = 1; - - /** - * Tried to change to an invalid lexical state. - */ - static final int INVALID_LEXICAL_STATE = 2; - - /** - * Detected (and bailed out of) an infinite loop in the token manager. - */ - static final int LOOP_DETECTED = 3; - - /** - * Indicates the reason why the exception is thrown. It will have - * one of the above 4 values. - */ - int errorCode; - - /** - * Replaces unprintable characters by their escaped (or unicode escaped) - * equivalents in the given string - */ - protected static final String addEscapes(String str) { - StringBuffer retval = new StringBuffer(); - char ch; - for (int i = 0; i < str.length(); i++) { - switch (str.charAt(i)) - { - case 0 : - continue; - case '\b': - retval.append("\\b"); - continue; - case '\t': - retval.append("\\t"); - continue; - case '\n': - retval.append("\\n"); - continue; - case '\f': - retval.append("\\f"); - continue; - case '\r': - retval.append("\\r"); - continue; - case '\"': - retval.append("\\\""); - continue; - case '\'': - retval.append("\\\'"); - continue; - case '\\': - retval.append("\\\\"); - continue; - default: - if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { - String s = "0000" + Integer.toString(ch, 16); - retval.append("\\u" + s.substring(s.length() - 4, s.length())); - } else { - retval.append(ch); - } - continue; - } - } - return retval.toString(); - } - - /** - * Returns a detailed message for the Error when it is thrown by the - * token manager to indicate a lexical error. - * Parameters : - * EOFSeen : indicates if EOF caused the lexical error - * curLexState : lexical state in which this error occurred - * errorLine : line number when the error occurred - * errorColumn : column number when the error occurred - * errorAfter : prefix that was seen before this error occurred - * curchar : the offending character - * Note: You can customize the lexical error message by modifying this method. - */ - protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) { - return("Lexical error at line " + - errorLine + ", column " + - errorColumn + ". Encountered: " + - (EOFSeen ? " " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") + - "after : \"" + addEscapes(errorAfter) + "\""); - } - - /** - * You can also modify the body of this method to customize your error messages. - * For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not - * of end-users concern, so you can return something like : - * - * "Internal Error : Please file a bug report .... " - * - * from this method for such cases in the release version of your parser. - */ - public String getMessage() { - return super.getMessage(); - } - - /* - * Constructors of various flavors follow. - */ - - /** No arg constructor. */ - public TokenMgrError() { - } - - /** Constructor with message and reason. */ - public TokenMgrError(String message, int reason) { - super(message); - errorCode = reason; - } - - /** Full Constructor. */ - public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) { - this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); - } -} -/* JavaCC - OriginalChecksum=257b82f2650841e86289a309cb3dae76 (do not edit this line) */ diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/package.html b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/package.html index fdc3a30651d..f15ebadb3ea 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/package.html +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/package.html @@ -16,7 +16,24 @@ limitations under the License. --> + + + -QueryParser designed to handle operator precedence in a more sensible fashion than the default QueryParser. + +This package contains the Precedence Query Parser Implementation + +

Lucene Precedence Query Parser

+ +

+The Precedence Query Parser extends the Standard Query Parser and enables +the boolean precedence. So, the query is parsed to +<(+a +b) (+c +d)> instead of <+a +b +c +d>. +

+

+Check {@link org.apache.lucene.queryParser.standard.StandardQueryParser} for more details about the +supported syntax and query parser functionalities. +

+ diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/processors/BooleanModifiersQueryNodeProcessor.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/processors/BooleanModifiersQueryNodeProcessor.java new file mode 100644 index 00000000000..5c5264acacc --- /dev/null +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/processors/BooleanModifiersQueryNodeProcessor.java @@ -0,0 +1,138 @@ +package org.apache.lucene.queryParser.precedence.processors; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.queryParser.core.QueryNodeException; +import org.apache.lucene.queryParser.core.nodes.AndQueryNode; +import org.apache.lucene.queryParser.core.nodes.BooleanQueryNode; +import org.apache.lucene.queryParser.core.nodes.ModifierQueryNode; +import org.apache.lucene.queryParser.core.nodes.OrQueryNode; +import org.apache.lucene.queryParser.core.nodes.QueryNode; +import org.apache.lucene.queryParser.core.nodes.ModifierQueryNode.Modifier; +import org.apache.lucene.queryParser.core.processors.QueryNodeProcessorImpl; +import org.apache.lucene.queryParser.precedence.PrecedenceQueryParser; +import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute; +import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute.Operator; + +/** + *

+ * This processor is used to apply the correct {@link ModifierQueryNode} to {@link BooleanQueryNode}s children. + *

+ *

+ * It walks through the query node tree looking for {@link BooleanQueryNode}s. If an {@link AndQueryNode} is found, + * every child, which is not a {@link ModifierQueryNode} or the {@link ModifierQueryNode} + * is {@link Modifier#MOD_NONE}, becomes a {@link Modifier#MOD_REQ}. For any other + * {@link BooleanQueryNode} which is not an {@link OrQueryNode}, it checks the default operator is {@link Operator#AND}, + * if it is, the same operation when an {@link AndQueryNode} is found is applied to it. + *

+ * + * @see DefaultOperatorAttribute + * @see PrecedenceQueryParser#setDefaultOperator + */ +public class BooleanModifiersQueryNodeProcessor extends QueryNodeProcessorImpl { + + private ArrayList childrenBuffer = new ArrayList(); + + private Boolean usingAnd = false; + + public BooleanModifiersQueryNodeProcessor() { + // empty constructor + } + + @Override + public QueryNode process(QueryNode queryTree) throws QueryNodeException { + + if (!getQueryConfigHandler().hasAttribute(DefaultOperatorAttribute.class)) { + throw new IllegalArgumentException( + "DefaultOperatorAttribute should be set on the QueryConfigHandler"); + } + + this.usingAnd = Operator.AND == getQueryConfigHandler().getAttribute( + DefaultOperatorAttribute.class).getOperator(); + + return super.process(queryTree); + + } + + @Override + protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException { + + if (node instanceof AndQueryNode) { + this.childrenBuffer.clear(); + List children = node.getChildren(); + + for (QueryNode child : children) { + this.childrenBuffer.add(applyModifier(child, Modifier.MOD_REQ)); + } + + node.set(this.childrenBuffer); + + } else if (this.usingAnd && node instanceof BooleanQueryNode + && !(node instanceof OrQueryNode)) { + + this.childrenBuffer.clear(); + List children = node.getChildren(); + + for (QueryNode child : children) { + this.childrenBuffer.add(applyModifier(child, Modifier.MOD_REQ)); + } + + node.set(this.childrenBuffer); + + } + + return node; + + } + + private QueryNode applyModifier(QueryNode node, Modifier mod) { + + // check if modifier is not already defined and is default + if (!(node instanceof ModifierQueryNode)) { + return new ModifierQueryNode(node, mod); + + } else { + ModifierQueryNode modNode = (ModifierQueryNode) node; + + if (modNode.getModifier() == Modifier.MOD_NONE) { + return new ModifierQueryNode(modNode.getChild(), mod); + } + + } + + return node; + + } + + @Override + protected QueryNode preProcessNode(QueryNode node) throws QueryNodeException { + return node; + } + + @Override + protected List setChildrenOrder(List children) + throws QueryNodeException { + + return children; + + } + +} diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/processors/PrecedenceQueryNodeProcessorPipeline.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/processors/PrecedenceQueryNodeProcessorPipeline.java new file mode 100644 index 00000000000..724c150ed26 --- /dev/null +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/processors/PrecedenceQueryNodeProcessorPipeline.java @@ -0,0 +1,59 @@ +package org.apache.lucene.queryParser.precedence.processors; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.queryParser.core.config.QueryConfigHandler; +import org.apache.lucene.queryParser.precedence.PrecedenceQueryParser; +import org.apache.lucene.queryParser.standard.processors.GroupQueryNodeProcessor; +import org.apache.lucene.queryParser.standard.processors.StandardQueryNodeProcessorPipeline; + +/** + *

+ * This processor pipeline extends {@link StandardQueryNodeProcessorPipeline} and enables + * boolean precedence on it. + *

+ *

+ * EXPERT: the precedence is enabled by removing {@link GroupQueryNodeProcessor} from the + * {@link StandardQueryNodeProcessorPipeline} and appending {@link BooleanModifiersQueryNodeProcessor} + * to the pipeline. + *

+ * + * @see PrecedenceQueryParser + * @see StandardQueryNodeProcessorPipeline + */ +public class PrecedenceQueryNodeProcessorPipeline extends StandardQueryNodeProcessorPipeline { + + /** + * @see StandardQueryNodeProcessorPipeline#StandardQueryNodeProcessorPipeline(QueryConfigHandler) + */ + public PrecedenceQueryNodeProcessorPipeline(QueryConfigHandler queryConfig) { + super(queryConfig); + + for (int i = 0 ; i < size() ; i++) { + + if (get(i).getClass().equals(GroupQueryNodeProcessor.class)) { + remove(i--); + } + + } + + add(new BooleanModifiersQueryNodeProcessor()); + + } + +} diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/processors/package.html b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/processors/package.html new file mode 100644 index 00000000000..3bef35582d1 --- /dev/null +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/processors/package.html @@ -0,0 +1,47 @@ + + + + + + + + +This package contains the processors used by Precedence Query Parser + +

Lucene Precedence Query Parser Processors

+ +

+This package contains the 2 {@link org.apache.lucene.queryParser.core.processors.QueryNodeProcessor}s used by +{@link org.apache.lucene.queryParser.precedence.PrecedenceQueryParser}. +

+

+{@link org.apache.lucene.queryParser.precedence.processors.BooleanModifiersQueryNodeProcessor}: this processor +is used to apply {@link org.apache.lucene.queryParser.core.nodes.ModifierQueryNode}s on +{@link org.apache.lucene.queryParser.core.nodes.BooleanQueryNode} children according to the boolean type +or the default operator. +

+

+{@link org.apache.lucene.queryParser.precedence.processors.PrecedenceQueryNodeProcessorPipeline}: this +processor pipeline is used by {@link org.apache.lucene.queryParser.precedence.PrecedenceQueryParser}. It extends +{@link org.apache.lucene.queryParser.standard.processors.StandardQueryNodeProcessorPipeline} and rearrange +the pipeline so the boolean precedence is processed correctly. Check {@link org.apache.lucene.queryParser.precedence.processors.PrecedenceQueryNodeProcessorPipeline} +for more details. +

+ + + diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java index 26204227e41..b68ab0bd7aa 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java @@ -17,67 +17,82 @@ package org.apache.lucene.queryParser.precedence; * limitations under the License. */ +import java.io.IOException; +import java.io.Reader; +import java.text.DateFormat; +import java.util.Calendar; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Map; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.document.DateField; import org.apache.lucene.document.DateTools; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.queryParser.TestQueryParser; +import org.apache.lucene.queryParser.core.QueryNodeException; +import org.apache.lucene.queryParser.core.QueryNodeParseException; +import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute.Operator; +import org.apache.lucene.queryParser.standard.parser.ParseException; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FuzzyQuery; -import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.RegexpQuery; -import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.WildcardQuery; -import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.automaton.BasicAutomata; import org.apache.lucene.util.automaton.CharacterRunAutomaton; -import java.io.IOException; -import java.io.Reader; -import java.text.DateFormat; -import java.util.Calendar; -import java.util.GregorianCalendar; - +/** + *

+ * This test case tests {@link PrecedenceQueryParser}. + *

+ *

+ * It contains all tests from {@link TestQueryParser} with some adjusted to + * fit the precedence requirement, plus some precedence test cases. + *

+ * + * @see TestQueryParser + */ public class TestPrecedenceQueryParser extends LuceneTestCase { + public static Analyzer qpAnalyzer = new QPTestAnalyzer(); public static final class QPTestFilter extends TokenFilter { /** - * Filter which discards the token 'stop' and which expands the - * token 'phrase' into 'phrase1 phrase2' + * Filter which discards the token 'stop' and which expands the token + * 'phrase' into 'phrase1 phrase2' */ public QPTestFilter(TokenStream in) { super(in); } boolean inPhrase = false; + int savedStart = 0, savedEnd = 0; CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); - - @Override + public boolean incrementToken() throws IOException { - clearAttributes(); if (inPhrase) { inPhrase = false; termAtt.setEmpty().append("phrase2"); offsetAtt.setOffset(savedStart, savedEnd); return true; } else - while(input.incrementToken()) + while (input.incrementToken()) if (termAtt.toString().equals("phrase")) { inPhrase = true; savedStart = offsetAtt.startOffset(); @@ -94,31 +109,13 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { public static final class QPTestAnalyzer extends Analyzer { /** Filters MockTokenizer with StopFilter. */ - @Override public final TokenStream tokenStream(String fieldName, Reader reader) { return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true)); } } - public static class QPTestParser extends PrecedenceQueryParser { - public QPTestParser(String f, Analyzer a) { - super(f, a); - } - - @Override - protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException { - throw new ParseException("Fuzzy queries not allowed"); - } - - @Override - protected Query getWildcardQuery(String field, String termStr) throws ParseException { - throw new ParseException("Wildcard queries not allowed"); - } - } - private int originalMaxClauses; - @Override public void setUp() throws Exception { super.setUp(); originalMaxClauses = BooleanQuery.getMaxClauseCount(); @@ -127,40 +124,31 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { public PrecedenceQueryParser getParser(Analyzer a) throws Exception { if (a == null) a = new MockAnalyzer(MockTokenizer.SIMPLE, true); - PrecedenceQueryParser qp = new PrecedenceQueryParser("field", a); - qp.setDefaultOperator(PrecedenceQueryParser.OR_OPERATOR); + PrecedenceQueryParser qp = new PrecedenceQueryParser(); + qp.setAnalyzer(a); + qp.setDefaultOperator(Operator.OR); return qp; } public Query getQuery(String query, Analyzer a) throws Exception { - return getParser(a).parse(query); + return getParser(a).parse(query, "field"); } public void assertQueryEquals(String query, Analyzer a, String result) - throws Exception { + throws Exception { Query q = getQuery(query, a); String s = q.toString("field"); if (!s.equals(result)) { - fail("Query /" + query + "/ yielded /" + s - + "/, expecting /" + result + "/"); + fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result + + "/"); } } - public void assertWildcardQueryEquals(String query, boolean lowercase, String result) - throws Exception { + public void assertWildcardQueryEquals(String query, boolean lowercase, + String result) throws Exception { PrecedenceQueryParser qp = getParser(null); qp.setLowercaseExpandedTerms(lowercase); - Query q = qp.parse(query); - String s = q.toString("field"); - if (!s.equals(result)) { - fail("WildcardQuery /" + query + "/ yielded /" + s - + "/, expecting /" + result + "/"); - } - } - - public void assertWildcardQueryEquals(String query, String result) throws Exception { - PrecedenceQueryParser qp = getParser(null); - Query q = qp.parse(query); + Query q = qp.parse(query, "field"); String s = q.toString("field"); if (!s.equals(result)) { fail("WildcardQuery /" + query + "/ yielded /" + s + "/, expecting /" @@ -168,36 +156,41 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { } } - public Query getQueryDOA(String query, Analyzer a) - throws Exception { - if (a == null) - a = new MockAnalyzer(MockTokenizer.SIMPLE, true); - PrecedenceQueryParser qp = new PrecedenceQueryParser("field", a); - qp.setDefaultOperator(PrecedenceQueryParser.AND_OPERATOR); - return qp.parse(query); - } - - public void assertQueryEqualsDOA(String query, Analyzer a, String result) - throws Exception { - Query q = getQueryDOA(query, a); + public void assertWildcardQueryEquals(String query, String result) + throws Exception { + PrecedenceQueryParser qp = getParser(null); + Query q = qp.parse(query, "field"); String s = q.toString("field"); if (!s.equals(result)) { - fail("Query /" + query + "/ yielded /" + s - + "/, expecting /" + result + "/"); + fail("WildcardQuery /" + query + "/ yielded /" + s + "/, expecting /" + + result + "/"); } } - // failing tests disabled since PrecedenceQueryParser - // is currently unmaintained - public void _testSimple() throws Exception { - assertQueryEquals("", null, ""); + public Query getQueryDOA(String query, Analyzer a) throws Exception { + if (a == null) + a = new MockAnalyzer(MockTokenizer.SIMPLE, true); + PrecedenceQueryParser qp = new PrecedenceQueryParser(); + qp.setAnalyzer(a); + qp.setDefaultOperator(Operator.AND); + return qp.parse(query, "field"); + } + public void assertQueryEqualsDOA(String query, Analyzer a, String result) + throws Exception { + Query q = getQueryDOA(query, a); + String s = q.toString("field"); + if (!s.equals(result)) { + fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result + + "/"); + } + } + + public void testSimple() throws Exception { assertQueryEquals("term term term", null, "term term term"); assertQueryEquals("türm term term", null, "türm term term"); assertQueryEquals("ümlaut", null, "ümlaut"); - assertQueryEquals("+a", null, "+a"); - assertQueryEquals("-a", null, "-a"); assertQueryEquals("a AND b", null, "+a +b"); assertQueryEquals("(a AND b)", null, "+a +b"); assertQueryEquals("c OR (a AND b)", null, "c (+a +b)"); @@ -212,9 +205,9 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { assertQueryEquals("+term -term term", null, "+term -term term"); assertQueryEquals("foo:term AND field:anotherTerm", null, - "+foo:term +anotherterm"); + "+foo:term +anotherterm"); assertQueryEquals("term AND \"phrase phrase\"", null, - "+term +\"phrase phrase\""); + "+term +\"phrase phrase\""); assertQueryEquals("\"hello there\"", null, "\"hello there\""); assertTrue(getQuery("a AND b", null) instanceof BooleanQuery); assertTrue(getQuery("hello", null) instanceof TermQuery); @@ -229,25 +222,25 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { assertQueryEquals("\"term germ\"^2", null, "\"term germ\"^2.0"); assertQueryEquals("(foo OR bar) AND (baz OR boo)", null, - "+(foo bar) +(baz boo)"); - assertQueryEquals("((a OR b) AND NOT c) OR d", null, - "(+(a b) -c) d"); + "+(foo bar) +(baz boo)"); + assertQueryEquals("((a OR b) AND NOT c) OR d", null, "(+(a b) -c) d"); assertQueryEquals("+(apple \"steve jobs\") -(foo bar baz)", null, - "+(apple \"steve jobs\") -(foo bar baz)"); + "+(apple \"steve jobs\") -(foo bar baz)"); assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null, - "+(title:dog title:cat) -author:\"bob dole\""); - - PrecedenceQueryParser qp = new PrecedenceQueryParser("field", new MockAnalyzer()); - // make sure OR is the default: - assertEquals(PrecedenceQueryParser.OR_OPERATOR, qp.getDefaultOperator()); - qp.setDefaultOperator(PrecedenceQueryParser.AND_OPERATOR); - assertEquals(PrecedenceQueryParser.AND_OPERATOR, qp.getDefaultOperator()); - qp.setDefaultOperator(PrecedenceQueryParser.OR_OPERATOR); - assertEquals(PrecedenceQueryParser.OR_OPERATOR, qp.getDefaultOperator()); + "+(title:dog title:cat) -author:\"bob dole\""); - assertQueryEquals("a OR !b", null, "a (-b)"); - assertQueryEquals("a OR ! b", null, "a (-b)"); - assertQueryEquals("a OR -b", null, "a (-b)"); + PrecedenceQueryParser qp = new PrecedenceQueryParser(); + qp.setAnalyzer(new MockAnalyzer()); + // make sure OR is the default: + assertEquals(Operator.OR, qp.getDefaultOperator()); + qp.setDefaultOperator(Operator.AND); + assertEquals(Operator.AND, qp.getDefaultOperator()); + qp.setDefaultOperator(Operator.OR); + assertEquals(Operator.OR, qp.getDefaultOperator()); + + assertQueryEquals("a OR !b", null, "a -b"); + assertQueryEquals("a OR ! b", null, "a -b"); + assertQueryEquals("a OR -b", null, "a -b"); } public void testPunct() throws Exception { @@ -266,110 +259,24 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { } public void testNumber() throws Exception { -// The numbers go away because SimpleAnalzyer ignores them + // The numbers go away because SimpleAnalzyer ignores them assertQueryEquals("3", null, ""); assertQueryEquals("term 1.0 1 2", null, "term"); assertQueryEquals("term term1 term2", null, "term term term"); - Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, true); + Analyzer a = new MockAnalyzer(); assertQueryEquals("3", a, "3"); assertQueryEquals("term 1.0 1 2", a, "term 1.0 1 2"); assertQueryEquals("term term1 term2", a, "term term1 term2"); } - //individual CJK chars as terms, like StandardAnalyzer - private class SimpleCJKTokenizer extends Tokenizer { - private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - - public SimpleCJKTokenizer(Reader input) { - super(input); - } - - @Override - public boolean incrementToken() throws IOException { - int ch = input.read(); - if (ch < 0) - return false; - clearAttributes(); - termAtt.setEmpty().append((char) ch); - return true; - } - } - - private class SimpleCJKAnalyzer extends Analyzer { - @Override - public TokenStream tokenStream(String fieldName, Reader reader) { - return new SimpleCJKTokenizer(reader); - } - } - - public void testCJKTerm() throws Exception { - // individual CJK chars as terms - SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); - - BooleanQuery expected = new BooleanQuery(); - expected.add(new TermQuery(new Term("field", "中")), BooleanClause.Occur.SHOULD); - expected.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD); - - assertEquals(expected, getQuery("中国", analyzer)); - } - - public void testCJKBoostedTerm() throws Exception { - // individual CJK chars as terms - SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); - - BooleanQuery expected = new BooleanQuery(); - expected.setBoost(0.5f); - expected.add(new TermQuery(new Term("field", "中")), BooleanClause.Occur.SHOULD); - expected.add(new TermQuery(new Term("field", "国")), BooleanClause.Occur.SHOULD); - - assertEquals(expected, getQuery("中国^0.5", analyzer)); - } - - public void testCJKPhrase() throws Exception { - // individual CJK chars as terms - SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); - - PhraseQuery expected = new PhraseQuery(); - expected.add(new Term("field", "中")); - expected.add(new Term("field", "国")); - - assertEquals(expected, getQuery("\"中国\"", analyzer)); - } - - public void testCJKBoostedPhrase() throws Exception { - // individual CJK chars as terms - SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); - - PhraseQuery expected = new PhraseQuery(); - expected.setBoost(0.5f); - expected.add(new Term("field", "中")); - expected.add(new Term("field", "国")); - - assertEquals(expected, getQuery("\"中国\"^0.5", analyzer)); - } - - public void testCJKSloppyPhrase() throws Exception { - // individual CJK chars as terms - SimpleCJKAnalyzer analyzer = new SimpleCJKAnalyzer(); - - PhraseQuery expected = new PhraseQuery(); - expected.setSlop(3); - expected.add(new Term("field", "中")); - expected.add(new Term("field", "国")); - - assertEquals(expected, getQuery("\"中国\"~3", analyzer)); - } - - // failing tests disabled since PrecedenceQueryParser - // is currently unmaintained - public void _testWildcard() throws Exception { + public void testWildcard() throws Exception { assertQueryEquals("term*", null, "term*"); assertQueryEquals("term*^2", null, "term*^2.0"); - assertQueryEquals("term~", null, "term~0.5"); + assertQueryEquals("term~", null, "term~2.0"); assertQueryEquals("term~0.7", null, "term~0.7"); - assertQueryEquals("term~^2", null, "term^2.0~0.5"); - assertQueryEquals("term^2~", null, "term^2.0~0.5"); + assertQueryEquals("term~^3", null, "term~2.0^3.0"); + assertQueryEquals("term^3~", null, "term~2.0^3.0"); assertQueryEquals("term*germ", null, "term*germ"); assertQueryEquals("term*germ^3", null, "term*germ^3.0"); @@ -377,24 +284,25 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { assertTrue(getQuery("term*^2", null) instanceof PrefixQuery); assertTrue(getQuery("term~", null) instanceof FuzzyQuery); assertTrue(getQuery("term~0.7", null) instanceof FuzzyQuery); - FuzzyQuery fq = (FuzzyQuery)getQuery("term~0.7", null); + FuzzyQuery fq = (FuzzyQuery) getQuery("term~0.7", null); assertEquals(0.7f, fq.getMinSimilarity(), 0.1f); assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength()); - fq = (FuzzyQuery)getQuery("term~", null); - assertEquals(0.5f, fq.getMinSimilarity(), 0.1f); + fq = (FuzzyQuery) getQuery("term~", null); + assertEquals(2.0f, fq.getMinSimilarity(), 0.1f); assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength()); try { - getQuery("term~1.1", null); // value > 1, throws exception + getQuery("term~1.1", null); // value > 1, throws exception fail(); - } catch(ParseException pe) { + } catch (ParseException pe) { // expected exception } assertTrue(getQuery("term*germ", null) instanceof WildcardQuery); -/* Tests to see that wild card terms are (or are not) properly - * lower-cased with propery parser configuration - */ -// First prefix queries: + /* + * Tests to see that wild card terms are (or are not) properly lower-cased + * with propery parser configuration + */ + // First prefix queries: // by default, convert to lowercase: assertWildcardQueryEquals("Term*", true, "term*"); // explicitly set lowercase: @@ -405,7 +313,7 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { assertWildcardQueryEquals("term*", false, "term*"); assertWildcardQueryEquals("Term*", false, "Term*"); assertWildcardQueryEquals("TERM*", false, "TERM*"); -// Then 'full' wildcard queries: + // Then 'full' wildcard queries: // by default, convert to lowercase: assertWildcardQueryEquals("Te?m", "te?m"); // explicitly set lowercase: @@ -418,11 +326,11 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { assertWildcardQueryEquals("Te?m", false, "Te?m"); assertWildcardQueryEquals("TE?M", false, "TE?M"); assertWildcardQueryEquals("Te?m*gerM", false, "Te?m*gerM"); -// Fuzzy queries: - assertWildcardQueryEquals("Term~", "term~0.5"); - assertWildcardQueryEquals("Term~", true, "term~0.5"); - assertWildcardQueryEquals("Term~", false, "Term~0.5"); -// Range queries: + // Fuzzy queries: + assertWildcardQueryEquals("Term~", "term~2.0"); + assertWildcardQueryEquals("Term~", true, "term~2.0"); + assertWildcardQueryEquals("Term~", false, "Term~2.0"); + // Range queries: assertWildcardQueryEquals("[A TO C]", "[a TO c]"); assertWildcardQueryEquals("[A TO C]", true, "[a TO c]"); assertWildcardQueryEquals("[A TO C]", false, "[A TO C]"); @@ -434,11 +342,11 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { assertQueryEquals("term -stop term", qpAnalyzer, "term term"); assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll"); assertQueryEquals("term phrase term", qpAnalyzer, - "term (phrase1 phrase2) term"); + "term (phrase1 phrase2) term"); // note the parens in this next assertion differ from the original // QueryParser behavior assertQueryEquals("term AND NOT phrase term", qpAnalyzer, - "(+term -(phrase1 phrase2)) term"); + "(+term -(phrase1 phrase2)) term"); assertQueryEquals("stop", qpAnalyzer, ""); assertQueryEquals("stop OR stop AND stop", qpAnalyzer, ""); assertTrue(getQuery("term term term", qpAnalyzer) instanceof BooleanQuery); @@ -455,9 +363,10 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { assertQueryEquals("[ a TO z] OR bar", null, "[a TO z] bar"); assertQueryEquals("[ a TO z] AND bar", null, "+[a TO z] +bar"); assertQueryEquals("( bar blar { a TO z}) ", null, "bar blar {a TO z}"); - assertQueryEquals("gack ( bar blar { a TO z}) ", null, "gack (bar blar {a TO z})"); + assertQueryEquals("gack ( bar blar { a TO z}) ", null, + "gack (bar blar {a TO z})"); } - + private String escapeDateString(String s) { if (s.contains(" ")) { return "\"" + s + "\""; @@ -471,53 +380,106 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { return DateTools.dateToString(df.parse(s), DateTools.Resolution.DAY); } - public String getLocalizedDate(int year, int month, int day) { + private String getLocalizedDate(int year, int month, int day, + boolean extendLastDate) { DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT); Calendar calendar = new GregorianCalendar(); - calendar.clear(); calendar.set(year, month, day); - calendar.set(Calendar.HOUR_OF_DAY, 23); - calendar.set(Calendar.MINUTE, 59); - calendar.set(Calendar.SECOND, 59); - calendar.set(Calendar.MILLISECOND, 999); + if (extendLastDate) { + calendar.set(Calendar.HOUR_OF_DAY, 23); + calendar.set(Calendar.MINUTE, 59); + calendar.set(Calendar.SECOND, 59); + calendar.set(Calendar.MILLISECOND, 999); + } return df.format(calendar.getTime()); } public void testDateRange() throws Exception { - String startDate = getLocalizedDate(2002, 1, 1); - String endDate = getLocalizedDate(2002, 1, 4); - assertQueryEquals("[ " + escapeDateString(startDate) + " TO " + escapeDateString(endDate) + "]", null, - "[" + getDate(startDate) + " TO " + getDate(endDate) + "]"); - assertQueryEquals("{ " + escapeDateString(startDate) + " " + escapeDateString(endDate) + " }", null, - "{" + getDate(startDate) + " TO " + getDate(endDate) + "}"); + String startDate = getLocalizedDate(2002, 1, 1, false); + String endDate = getLocalizedDate(2002, 1, 4, false); + Calendar endDateExpected = new GregorianCalendar(); + endDateExpected.set(2002, 1, 4, 23, 59, 59); + endDateExpected.set(Calendar.MILLISECOND, 999); + final String defaultField = "default"; + final String monthField = "month"; + final String hourField = "hour"; + PrecedenceQueryParser qp = new PrecedenceQueryParser(new MockAnalyzer()); + + // Don't set any date resolution and verify if DateField is used + assertDateRangeQueryEquals(qp, defaultField, startDate, endDate, + endDateExpected.getTime(), null); + + Map fieldMap = new HashMap(); + // set a field specific date resolution + fieldMap.put(monthField, DateTools.Resolution.MONTH); + qp.setDateResolution(fieldMap); + + // DateField should still be used for defaultField + assertDateRangeQueryEquals(qp, defaultField, startDate, endDate, + endDateExpected.getTime(), null); + + // set default date resolution to MILLISECOND + qp.setDateResolution(DateTools.Resolution.MILLISECOND); + + // set second field specific date resolution + fieldMap.put(hourField, DateTools.Resolution.HOUR); + qp.setDateResolution(fieldMap); + + // for this field no field specific date resolution has been set, + // so verify if the default resolution is used + assertDateRangeQueryEquals(qp, defaultField, startDate, endDate, + endDateExpected.getTime(), DateTools.Resolution.MILLISECOND); + + // verify if field specific date resolutions are used for these two fields + assertDateRangeQueryEquals(qp, monthField, startDate, endDate, + endDateExpected.getTime(), DateTools.Resolution.MONTH); + + assertDateRangeQueryEquals(qp, hourField, startDate, endDate, + endDateExpected.getTime(), DateTools.Resolution.HOUR); + } + + /** for testing DateTools support */ + private String getDate(String s, DateTools.Resolution resolution) + throws Exception { + DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT); + return getDate(df.parse(s), resolution); + } + + /** for testing DateTools support */ + private String getDate(Date d, DateTools.Resolution resolution) + throws Exception { + if (resolution == null) { + return DateField.dateToString(d); + } else { + return DateTools.dateToString(d, resolution); + } + } + + public void assertQueryEquals(PrecedenceQueryParser qp, String field, String query, + String result) throws Exception { + Query q = qp.parse(query, field); + String s = q.toString(field); + if (!s.equals(result)) { + fail("Query /" + query + "/ yielded /" + s + "/, expecting /" + result + + "/"); + } + } + + public void assertDateRangeQueryEquals(PrecedenceQueryParser qp, String field, + String startDate, String endDate, Date endDateInclusive, + DateTools.Resolution resolution) throws Exception { + assertQueryEquals(qp, field, field + ":[" + escapeDateString(startDate) + + " TO " + escapeDateString(endDate) + "]", "[" + + getDate(startDate, resolution) + " TO " + + getDate(endDateInclusive, resolution) + "]"); + assertQueryEquals(qp, field, field + ":{" + escapeDateString(startDate) + + " TO " + escapeDateString(endDate) + "}", "{" + + getDate(startDate, resolution) + " TO " + + getDate(endDate, resolution) + "}"); } public void testEscaped() throws Exception { Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, false); - - /*assertQueryEquals("\\[brackets", a, "\\[brackets"); - assertQueryEquals("\\[brackets", null, "brackets"); - assertQueryEquals("\\\\", a, "\\\\"); - assertQueryEquals("\\+blah", a, "\\+blah"); - assertQueryEquals("\\(blah", a, "\\(blah"); - - assertQueryEquals("\\-blah", a, "\\-blah"); - assertQueryEquals("\\!blah", a, "\\!blah"); - assertQueryEquals("\\{blah", a, "\\{blah"); - assertQueryEquals("\\}blah", a, "\\}blah"); - assertQueryEquals("\\:blah", a, "\\:blah"); - assertQueryEquals("\\^blah", a, "\\^blah"); - assertQueryEquals("\\[blah", a, "\\[blah"); - assertQueryEquals("\\]blah", a, "\\]blah"); - assertQueryEquals("\\\"blah", a, "\\\"blah"); - assertQueryEquals("\\(blah", a, "\\(blah"); - assertQueryEquals("\\)blah", a, "\\)blah"); - assertQueryEquals("\\~blah", a, "\\~blah"); - assertQueryEquals("\\*blah", a, "\\*blah"); - assertQueryEquals("\\?blah", a, "\\?blah"); - //assertQueryEquals("foo \\&\\& bar", a, "foo \\&\\& bar"); - //assertQueryEquals("foo \\|| bar", a, "foo \\|| bar"); - //assertQueryEquals("foo \\AND bar", a, "foo \\AND bar");*/ assertQueryEquals("a\\-b:c", a, "a-b:c"); assertQueryEquals("a\\+b:c", a, "a+b:c"); @@ -551,44 +513,29 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { assertQueryEquals("[ a\\\\ TO a\\* ]", null, "[a\\ TO a*]"); } - public void testTabNewlineCarriageReturn() - throws Exception { - assertQueryEqualsDOA("+weltbank +worlbank", null, - "+weltbank +worlbank"); + public void testTabNewlineCarriageReturn() throws Exception { + assertQueryEqualsDOA("+weltbank +worlbank", null, "+weltbank +worlbank"); - assertQueryEqualsDOA("+weltbank\n+worlbank", null, - "+weltbank +worlbank"); - assertQueryEqualsDOA("weltbank \n+worlbank", null, - "+weltbank +worlbank"); - assertQueryEqualsDOA("weltbank \n +worlbank", null, - "+weltbank +worlbank"); + assertQueryEqualsDOA("+weltbank\n+worlbank", null, "+weltbank +worlbank"); + assertQueryEqualsDOA("weltbank \n+worlbank", null, "+weltbank +worlbank"); + assertQueryEqualsDOA("weltbank \n +worlbank", null, "+weltbank +worlbank"); - assertQueryEqualsDOA("+weltbank\r+worlbank", null, - "+weltbank +worlbank"); - assertQueryEqualsDOA("weltbank \r+worlbank", null, - "+weltbank +worlbank"); - assertQueryEqualsDOA("weltbank \r +worlbank", null, - "+weltbank +worlbank"); + assertQueryEqualsDOA("+weltbank\r+worlbank", null, "+weltbank +worlbank"); + assertQueryEqualsDOA("weltbank \r+worlbank", null, "+weltbank +worlbank"); + assertQueryEqualsDOA("weltbank \r +worlbank", null, "+weltbank +worlbank"); - assertQueryEqualsDOA("+weltbank\r\n+worlbank", null, - "+weltbank +worlbank"); - assertQueryEqualsDOA("weltbank \r\n+worlbank", null, - "+weltbank +worlbank"); - assertQueryEqualsDOA("weltbank \r\n +worlbank", null, - "+weltbank +worlbank"); + assertQueryEqualsDOA("+weltbank\r\n+worlbank", null, "+weltbank +worlbank"); + assertQueryEqualsDOA("weltbank \r\n+worlbank", null, "+weltbank +worlbank"); + assertQueryEqualsDOA("weltbank \r\n +worlbank", null, "+weltbank +worlbank"); assertQueryEqualsDOA("weltbank \r \n +worlbank", null, - "+weltbank +worlbank"); + "+weltbank +worlbank"); - assertQueryEqualsDOA("+weltbank\t+worlbank", null, - "+weltbank +worlbank"); - assertQueryEqualsDOA("weltbank \t+worlbank", null, - "+weltbank +worlbank"); - assertQueryEqualsDOA("weltbank \t +worlbank", null, - "+weltbank +worlbank"); + assertQueryEqualsDOA("+weltbank\t+worlbank", null, "+weltbank +worlbank"); + assertQueryEqualsDOA("weltbank \t+worlbank", null, "+weltbank +worlbank"); + assertQueryEqualsDOA("weltbank \t +worlbank", null, "+weltbank +worlbank"); } - public void testSimpleDAO() - throws Exception { + public void testSimpleDAO() throws Exception { assertQueryEqualsDOA("term term term", null, "+term +term +term"); assertQueryEqualsDOA("term +term term", null, "+term +term +term"); assertQueryEqualsDOA("term term +term", null, "+term +term +term"); @@ -596,23 +543,25 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { assertQueryEqualsDOA("-term term term", null, "-term +term +term"); } - public void testBoost() - throws Exception { + public void testBoost() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(BasicAutomata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(MockTokenizer.SIMPLE, true, stopSet, true); - PrecedenceQueryParser qp = new PrecedenceQueryParser("field", oneStopAnalyzer); - Query q = qp.parse("on^1.0"); + + PrecedenceQueryParser qp = new PrecedenceQueryParser(); + qp.setAnalyzer(oneStopAnalyzer); + Query q = qp.parse("on^1.0", "field"); assertNotNull(q); - q = qp.parse("\"hello\"^2.0"); + q = qp.parse("\"hello\"^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); - q = qp.parse("hello^2.0"); + q = qp.parse("hello^2.0", "field"); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); - q = qp.parse("\"on\"^1.0"); + q = qp.parse("\"on\"^1.0", "field"); assertNotNull(q); - q = getParser(new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)).parse("the^3"); + q = getParser(new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)).parse("the^3", + "field"); assertNotNull(q); } @@ -620,105 +569,75 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { try { assertQueryEquals("\"some phrase", null, "abc"); fail("ParseException expected, not thrown"); - } catch (ParseException expected) { + } catch (QueryNodeParseException expected) { } } - public void testCustomQueryParserWildcard() { - try { - new QPTestParser("contents", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("a?t"); - } catch (ParseException expected) { - return; - } - fail("Wildcard queries should not be allowed"); - } - - public void testCustomQueryParserFuzzy() throws Exception { - try { - new QPTestParser("contents", new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("xunit~"); - } catch (ParseException expected) { - return; - } - fail("Fuzzy queries should not be allowed"); - } - public void testBooleanQuery() throws Exception { BooleanQuery.setMaxClauseCount(2); try { - getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("one two three"); + getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)).parse("one two three", "field"); fail("ParseException expected due to too many boolean clauses"); - } catch (ParseException expected) { + } catch (QueryNodeException expected) { // too many boolean clauses, so ParseException is expected } } /** - * This test differs from the original QueryParser, showing how the - * precedence issue has been corrected. + * This test differs from the original QueryParser, showing how the precedence + * issue has been corrected. */ - // failing tests disabled since PrecedenceQueryParser - // is currently unmaintained - public void _testPrecedence() throws Exception { + public void testPrecedence() throws Exception { PrecedenceQueryParser parser = getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); - Query query1 = parser.parse("A AND B OR C AND D"); - Query query2 = parser.parse("(A AND B) OR (C AND D)"); + Query query1 = parser.parse("A AND B OR C AND D", "field"); + Query query2 = parser.parse("(A AND B) OR (C AND D)", "field"); assertEquals(query1, query2); - query1 = parser.parse("A OR B C"); - query2 = parser.parse("A B C"); + query1 = parser.parse("A OR B C", "field"); + query2 = parser.parse("(A B) C", "field"); assertEquals(query1, query2); - query1 = parser.parse("A AND B C"); - query2 = parser.parse("(+A +B) C"); + query1 = parser.parse("A AND B C", "field"); + query2 = parser.parse("(+A +B) C", "field"); assertEquals(query1, query2); - query1 = parser.parse("A AND NOT B"); - query2 = parser.parse("+A -B"); + query1 = parser.parse("A AND NOT B", "field"); + query2 = parser.parse("+A -B", "field"); assertEquals(query1, query2); - query1 = parser.parse("A OR NOT B"); - query2 = parser.parse("A -B"); + query1 = parser.parse("A OR NOT B", "field"); + query2 = parser.parse("A -B", "field"); assertEquals(query1, query2); - query1 = parser.parse("A OR NOT B AND C"); - query2 = parser.parse("A (-B +C)"); + query1 = parser.parse("A OR NOT B AND C", "field"); + query2 = parser.parse("A (-B +C)", "field"); assertEquals(query1, query2); - } - - public void testRegexps() throws Exception { - PrecedenceQueryParser qp = getParser(new MockAnalyzer(MockTokenizer.WHITESPACE, false)); - RegexpQuery q = new RegexpQuery(new Term("field", "[a-z][123]")); - assertEquals(q, qp.parse("/[a-z][123]/")); - qp.setLowercaseExpandedTerms(true); - assertEquals(q, qp.parse("/[A-Z][123]/")); - q.setBoost(0.5f); - assertEquals(q, qp.parse("/[A-Z][123]/^0.5")); - qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE); - assertTrue(qp.parse("/[A-Z][123]/^0.5") instanceof RegexpQuery); - assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)qp.parse("/[A-Z][123]/^0.5")).getRewriteMethod()); - assertEquals(q, qp.parse("/[A-Z][123]/^0.5")); - qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); - Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]")); - assertEquals(escaped, qp.parse("/[a-z]\\/[123]/")); - Query escaped2 = new RegexpQuery(new Term("field", "[a-z]\\*[123]")); - assertEquals(escaped2, qp.parse("/[a-z]\\*[123]/")); + parser.setDefaultOperator(Operator.AND); + query1 = parser.parse("A AND B OR C AND D", "field"); + query2 = parser.parse("(A AND B) OR (C AND D)", "field"); + assertEquals(query1, query2); + + query1 = parser.parse("A AND B C", "field"); + query2 = parser.parse("(A B) C", "field"); + assertEquals(query1, query2); + + query1 = parser.parse("A AND B C", "field"); + query2 = parser.parse("(+A +B) C", "field"); + assertEquals(query1, query2); + + query1 = parser.parse("A AND NOT B", "field"); + query2 = parser.parse("+A -B", "field"); + assertEquals(query1, query2); + + query1 = parser.parse("A AND NOT B OR C", "field"); + query2 = parser.parse("(+A -B) OR C", "field"); + assertEquals(query1, query2); - BooleanQuery complex = new BooleanQuery(); - BooleanQuery inner = new BooleanQuery(); - inner.add(new RegexpQuery(new Term("field", "[a-z]\\/[123]")), Occur.MUST); - inner.add(new TermQuery(new Term("path", "/etc/init.d/")), Occur.MUST); - complex.add(inner, Occur.SHOULD); - complex.add(new TermQuery(new Term("field", "/etc/init[.]d/lucene/")), Occur.SHOULD); - assertEquals(complex, qp.parse("/[a-z]\\/[123]/ AND path:/etc/init.d/ OR /etc\\/init\\[.\\]d/lucene/ ")); } - - @Override - public void tearDown() throws Exception { + public void tearDown() { BooleanQuery.setMaxClauseCount(originalMaxClauses); - super.tearDown(); } }