From 3a92e1b93ebce73ab148eecc31776dde07c3d56d Mon Sep 17 00:00:00 2001 From: Dawid Weiss Date: Fri, 18 Sep 2020 09:38:20 +0200 Subject: [PATCH] LUCENE-9528: cleanup of flexible query parser's grammar (#1879) --- .../standard/nodes/RegexpQueryNode.java | 14 +- .../standard/parser/StandardSyntaxParser.java | 1237 ++++++----------- .../standard/parser/StandardSyntaxParser.jj | 793 ++++++----- .../parser/StandardSyntaxParserConstants.java | 24 +- .../StandardSyntaxParserTokenManager.java | 346 ++--- .../precedence/TestPrecedenceQueryParser.java | 1 - .../flexible/standard/TestQPHelper.java | 6 +- .../queryparser/util/QueryParserTestBase.java | 1 - 8 files changed, 965 insertions(+), 1457 deletions(-) diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/nodes/RegexpQueryNode.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/nodes/RegexpQueryNode.java index cba2612cb61..6ecbacce0f8 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/nodes/RegexpQueryNode.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/nodes/RegexpQueryNode.java @@ -26,10 +26,10 @@ import org.apache.lucene.util.BytesRef; /** * A {@link RegexpQueryNode} represents {@link RegexpQuery} query Examples: /[a-z]|[0-9]/ */ -public class RegexpQueryNode extends QueryNodeImpl implements TextableQueryNode, -FieldableNode { +public class RegexpQueryNode extends QueryNodeImpl implements TextableQueryNode, FieldableNode { private CharSequence text; private CharSequence field; + /** * @param field * - field name @@ -46,6 +46,16 @@ FieldableNode { this.text = text.subSequence(begin, end); } + /** + * @param field + * - field name + * @param text + * - value that contains a regular expression + */ + public RegexpQueryNode(CharSequence field, CharSequence text) { + this(field, text, 0, text.length()); + } + public BytesRef textToBytesRef() { return new BytesRef(text); } diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java index 4a5667f2001..61a323851f0 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java @@ -20,13 +20,15 @@ package org.apache.lucene.queryparser.flexible.standard.parser; */ import java.io.StringReader; -import java.util.Vector; -import java.util.Arrays; +import java.io.Reader; +import java.util.Collections; +import java.util.ArrayList; import org.apache.lucene.queryparser.flexible.messages.Message; import org.apache.lucene.queryparser.flexible.messages.MessageImpl; import org.apache.lucene.queryparser.flexible.core.QueryNodeParseException; import org.apache.lucene.queryparser.flexible.core.messages.QueryParserMessages; +import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; import org.apache.lucene.queryparser.flexible.core.nodes.AndQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.BooleanQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.BoostQueryNode; @@ -35,86 +37,45 @@ import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.ModifierQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.GroupQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.OrQueryNode; -import org.apache.lucene.queryparser.flexible.standard.nodes.RegexpQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.SlopQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode; -import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser; import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode; +import org.apache.lucene.queryparser.flexible.standard.nodes.RegexpQueryNode; import org.apache.lucene.queryparser.charstream.CharStream; import org.apache.lucene.queryparser.charstream.FastCharStream; +import static org.apache.lucene.queryparser.flexible.standard.parser.EscapeQuerySyntaxImpl.discardEscapeChar; + /** * Parser for the standard Lucene syntax */ public class StandardSyntaxParser implements SyntaxParser, StandardSyntaxParserConstants { - - - // syntax parser constructor - public StandardSyntaxParser() { - this(new FastCharStream(new StringReader(""))); + public StandardSyntaxParser() { + this(new FastCharStream(Reader.nullReader())); } - /** Parses a query string, returning a {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode}. - * @param query the query string to be parsed. - * @throws ParseException if the parsing fails - */ - public QueryNode parse(CharSequence query, CharSequence field) throws QueryNodeParseException { - ReInit(new FastCharStream(new StringReader(query.toString()))); - try { - // TopLevelQuery is a Query followed by the end-of-input (EOF) - QueryNode querynode = TopLevelQuery(field); - return querynode; - } - catch (ParseException tme) { - tme.setQuery(query); - throw tme; - } - catch (Error tme) { - Message message = new MessageImpl(QueryParserMessages.INVALID_SYNTAX_CANNOT_PARSE, query, tme.getMessage()); - QueryNodeParseException e = new QueryNodeParseException(tme); - e.setQuery(query); - e.setNonLocalizedMessage(message); - throw e; - } - } - final public ModifierQueryNode.Modifier Modifiers() throws ParseException {ModifierQueryNode.Modifier ret = ModifierQueryNode.Modifier.MOD_NONE; - switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case NOT: - case PLUS: - case MINUS:{ - switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case PLUS:{ - jj_consume_token(PLUS); -ret = ModifierQueryNode.Modifier.MOD_REQ; - break; - } - case MINUS:{ - jj_consume_token(MINUS); -ret = ModifierQueryNode.Modifier.MOD_NOT; - break; - } - case NOT:{ - jj_consume_token(NOT); -ret = ModifierQueryNode.Modifier.MOD_NOT; - break; - } - default: - jj_la1[0] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - break; - } - default: - jj_la1[1] = jj_gen; - ; + /** + * Parses a query string, returning a {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode}. + * @param query the query string to be parsed. + * @throws ParseException if the parsing fails + */ + public QueryNode parse(CharSequence query, CharSequence field) throws QueryNodeParseException { + ReInit(new FastCharStream(new StringReader(query.toString()))); + try { + return TopLevelQuery(field); + } catch (ParseException tme) { + tme.setQuery(query); + throw tme; + } catch (Error tme) { + Message message = new MessageImpl(QueryParserMessages.INVALID_SYNTAX_CANNOT_PARSE, query, tme.getMessage()); + QueryNodeParseException e = new QueryNodeParseException(tme); + e.setQuery(query); + e.setNonLocalizedMessage(message); + throw e; } -{if ("" != null) return ret;} - throw new Error("Missing return statement in function"); -} + } -// This makes sure that there is no garbage after the query string final public QueryNode TopLevelQuery(CharSequence field) throws ParseException {QueryNode q; q = Query(field); jj_consume_token(0); @@ -122,68 +83,50 @@ ret = ModifierQueryNode.Modifier.MOD_NOT; throw new Error("Missing return statement in function"); } -// These changes were made to introduce operator precedence: -// - Clause() now returns a QueryNode. -// - The modifiers are consumed by Clause() and returned as part of the QueryNode Object -// - Query does not consume conjunctions (AND, OR) anymore. -// - This is now done by two new non-terminals: ConjClause and DisjClause -// The parse tree looks similar to this: -// Query ::= DisjQuery ( DisjQuery )* -// DisjQuery ::= ConjQuery ( OR ConjQuery )* -// ConjQuery ::= Clause ( AND Clause )* -// Clause ::= [ Modifier ] ... - final public - -QueryNode Query(CharSequence field) throws ParseException {Vector clauses = null; - QueryNode c, first=null; - first = DisjQuery(field); + final private QueryNode Query(CharSequence field) throws ParseException {ArrayList clauses = new ArrayList<>(); + QueryNode node; label_1: while (true) { + node = DisjQuery(field); +clauses.add(node); switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { case NOT: case PLUS: case MINUS: case LPAREN: case QUOTED: + case NUMBER: case TERM: case REGEXPTERM: case RANGEIN_START: - case RANGEEX_START: - case NUMBER:{ + case RANGEEX_START:{ ; break; } default: - jj_la1[2] = jj_gen; + jj_la1[0] = jj_gen; break label_1; } - c = DisjQuery(field); -if (clauses == null) { - clauses = new Vector(); - clauses.addElement(first); - } - clauses.addElement(c); } -if (clauses != null) { - {if ("" != null) return new BooleanQueryNode(clauses);} - } else { - // Handle the case of a "pure" negation query which - // needs to be wrapped as a boolean query, otherwise - // the returned result drops the negation. - if (first instanceof ModifierQueryNode) { - ModifierQueryNode m = (ModifierQueryNode) first; - if (m.getModifier() == ModifierQueryNode.Modifier.MOD_NOT) { - {if ("" != null) return new BooleanQueryNode(Arrays.asList(m));} - } - } - {if ("" != null) return first;} +// Handle the case of a "pure" negation query which + // needs to be wrapped as a boolean query, otherwise + // the returned result drops the negation. + if (clauses.size() == 1) { + QueryNode first = clauses.get(0); + if (first instanceof ModifierQueryNode + && ((ModifierQueryNode) first).getModifier() == ModifierQueryNode.Modifier.MOD_NOT) { + clauses.set(0, new BooleanQueryNode(Collections.singletonList(first))); } + } + + {if ("" != null) return clauses.size() == 1 ? clauses.get(0) : new BooleanQueryNode(clauses);} throw new Error("Missing return statement in function"); } - final public QueryNode DisjQuery(CharSequence field) throws ParseException {QueryNode first, c; - Vector clauses = null; - first = ConjQuery(field); + final private QueryNode DisjQuery(CharSequence field) throws ParseException {ArrayList clauses = new ArrayList<>(); + QueryNode node; + node = ConjQuery(field); +clauses.add(node); label_2: while (true) { switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { @@ -192,28 +135,21 @@ if (clauses != null) { break; } default: - jj_la1[3] = jj_gen; + jj_la1[1] = jj_gen; break label_2; } jj_consume_token(OR); - c = ConjQuery(field); -if (clauses == null) { - clauses = new Vector(); - clauses.addElement(first); - } - clauses.addElement(c); - } -if (clauses != null) { - {if ("" != null) return new OrQueryNode(clauses);} - } else { - {if ("" != null) return first;} + node = ConjQuery(field); +clauses.add(node); } +{if ("" != null) return clauses.size() == 1 ? clauses.get(0) : new OrQueryNode(clauses);} throw new Error("Missing return statement in function"); } - final public QueryNode ConjQuery(CharSequence field) throws ParseException {QueryNode first, c; - Vector clauses = null; - first = ModClause(field); + final private QueryNode ConjQuery(CharSequence field) throws ParseException {ArrayList clauses = new ArrayList<>(); + QueryNode node; + node = ModClause(field); +clauses.add(node); label_3: while (true) { switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { @@ -222,142 +158,110 @@ if (clauses != null) { break; } default: - jj_la1[4] = jj_gen; + jj_la1[2] = jj_gen; break label_3; } jj_consume_token(AND); - c = ModClause(field); -if (clauses == null) { - clauses = new Vector(); - clauses.addElement(first); - } - clauses.addElement(c); - } -if (clauses != null) { - {if ("" != null) return new AndQueryNode(clauses);} - } else { - {if ("" != null) return first;} + node = ModClause(field); +clauses.add(node); } +{if ("" != null) return clauses.size() == 1 ? clauses.get(0) : new AndQueryNode(clauses);} throw new Error("Missing return statement in function"); } -// QueryNode Query(CharSequence field) : -// { -// List clauses = new ArrayList(); -// List modifiers = new ArrayList(); -// QueryNode q, firstQuery=null; -// ModifierQueryNode.Modifier mods; -// int conj; -// } -// { -// mods=Modifiers() q=Clause(field) -// { -// if (mods == ModifierQueryNode.Modifier.MOD_NONE) firstQuery=q; -// -// // do not create modifier nodes with MOD_NONE -// if (mods != ModifierQueryNode.Modifier.MOD_NONE) { -// q = new ModifierQueryNode(q, mods); -// } -// clauses.add(q); -// } -// ( -// conj=Conjunction() mods=Modifiers() q=Clause(field) -// { -// // do not create modifier nodes with MOD_NONE -// if (mods != ModifierQueryNode.Modifier.MOD_NONE) { -// q = new ModifierQueryNode(q, mods); -// } -// clauses.add(q); -// //TODO: figure out what to do with AND and ORs -// } -// )* -// { -// if (clauses.size() == 1 && firstQuery != null) -// return firstQuery; -// else { -// return new BooleanQueryNode(clauses); -// } -// } -// } - final public -QueryNode ModClause(CharSequence field) throws ParseException {QueryNode q; - ModifierQueryNode.Modifier mods; - mods = Modifiers(); - q = Clause(field); -if (mods != ModifierQueryNode.Modifier.MOD_NONE) { - q = new ModifierQueryNode(q, mods); - } - {if ("" != null) return q;} - throw new Error("Missing return statement in function"); -} - - final public QueryNode Clause(CharSequence field) throws ParseException {QueryNode q; - Token fieldToken=null, boost=null, operator=null, term=null; - FieldQueryNode qLower, qUpper; - boolean lowerInclusive, upperInclusive; - - boolean group = false; - if (jj_2_2(3)) { - fieldToken = jj_consume_token(TERM); + final private QueryNode ModClause(CharSequence field) throws ParseException {QueryNode q; + ModifierQueryNode.Modifier modifier = ModifierQueryNode.Modifier.MOD_NONE; + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case NOT: + case PLUS: + case MINUS:{ switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case OP_COLON: - case OP_EQUAL:{ - switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case OP_COLON:{ - jj_consume_token(OP_COLON); - break; - } - case OP_EQUAL:{ - jj_consume_token(OP_EQUAL); - break; - } - default: - jj_la1[5] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } -field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image); - q = Term(field); + case PLUS:{ + jj_consume_token(PLUS); +modifier = ModifierQueryNode.Modifier.MOD_REQ; break; } - case OP_LESSTHAN: - case OP_LESSTHANEQ: - case OP_MORETHAN: - case OP_MORETHANEQ:{ + case NOT: + case MINUS:{ switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case OP_LESSTHAN:{ - operator = jj_consume_token(OP_LESSTHAN); + case MINUS:{ + jj_consume_token(MINUS); break; } - case OP_LESSTHANEQ:{ - operator = jj_consume_token(OP_LESSTHANEQ); - break; - } - case OP_MORETHAN:{ - operator = jj_consume_token(OP_MORETHAN); - break; - } - case OP_MORETHANEQ:{ - operator = jj_consume_token(OP_MORETHANEQ); + case NOT:{ + jj_consume_token(NOT); break; } default: - jj_la1[6] = jj_gen; + jj_la1[3] = jj_gen; jj_consume_token(-1); throw new ParseException(); } -field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image); +modifier = ModifierQueryNode.Modifier.MOD_NOT; + break; + } + default: + jj_la1[4] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + break; + } + default: + jj_la1[5] = jj_gen; + ; + } + q = Clause(field); +if (modifier != ModifierQueryNode.Modifier.MOD_NONE) { + q = new ModifierQueryNode(q, modifier); + } + {if ("" != null) return q;} + throw new Error("Missing return statement in function"); +} + + final private QueryNode Clause(CharSequence field) throws ParseException {QueryNode q; + if (jj_2_2(2)) { + q = FieldRangeExpr(field); + } else { + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case LPAREN: + case QUOTED: + case NUMBER: + case TERM: + case REGEXPTERM: + case RANGEIN_START: + case RANGEEX_START:{ + if (jj_2_1(2)) { + field = FieldName(); + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case OP_COLON:{ + jj_consume_token(OP_COLON); + break; + } + case OP_EQUAL:{ + jj_consume_token(OP_EQUAL); + break; + } + default: + jj_la1[6] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + } else { + ; + } switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case TERM:{ - term = jj_consume_token(TERM); + case QUOTED: + case NUMBER: + case TERM: + case REGEXPTERM: + case RANGEIN_START: + case RANGEEX_START:{ + q = Term(field); break; } - case QUOTED:{ - term = jj_consume_token(QUOTED); - break; - } - case NUMBER:{ - term = jj_consume_token(NUMBER); + case LPAREN:{ + q = GroupingExpr(field); break; } default: @@ -365,51 +269,6 @@ field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image); jj_consume_token(-1); throw new ParseException(); } -if (term.kind == QUOTED) { - term.image = term.image.substring(1, term.image.length()-1); - } - switch (operator.kind) { - case OP_LESSTHAN: - lowerInclusive = true; - upperInclusive = false; - - qLower = new FieldQueryNode(field, - "*", term.beginColumn, term.endColumn); - qUpper = new FieldQueryNode(field, - EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); - - break; - case OP_LESSTHANEQ: - lowerInclusive = true; - upperInclusive = true; - - qLower = new FieldQueryNode(field, - "*", term.beginColumn, term.endColumn); - qUpper = new FieldQueryNode(field, - EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); - break; - case OP_MORETHAN: - lowerInclusive = false; - upperInclusive = true; - - qLower = new FieldQueryNode(field, - EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); - qUpper = new FieldQueryNode(field, - "*", term.beginColumn, term.endColumn); - break; - case OP_MORETHANEQ: - lowerInclusive = true; - upperInclusive = true; - - qLower = new FieldQueryNode(field, - EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); - qUpper = new FieldQueryNode(field, - "*", term.beginColumn, term.endColumn); - break; - default: - {if (true) throw new Error("Unhandled case: operator="+operator.toString());} - } - q = new TermRangeQueryNode(qLower, qUpper, lowerInclusive, upperInclusive); break; } default: @@ -417,103 +276,156 @@ if (term.kind == QUOTED) { jj_consume_token(-1); throw new ParseException(); } - } else if (jj_2_3(3)) { - if (jj_2_1(3)) { - fieldToken = jj_consume_token(TERM); - switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case OP_COLON:{ - jj_consume_token(OP_COLON); - break; - } - case OP_EQUAL:{ - jj_consume_token(OP_EQUAL); - break; - } - default: - jj_la1[9] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } -field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image); - } else { - ; - } - switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case QUOTED: - case TERM: - case REGEXPTERM: - case RANGEIN_START: - case RANGEEX_START: - case NUMBER:{ - q = Term(field); - break; - } - case LPAREN:{ - jj_consume_token(LPAREN); - q = Query(field); - jj_consume_token(RPAREN); - switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case CARAT:{ - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); - break; - } - default: - jj_la1[10] = jj_gen; - ; - } -group=true; - break; - } - default: - jj_la1[11] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - } else { - jj_consume_token(-1); - throw new ParseException(); } -if (boost != null) { - float f = (float)1.0; - try { - f = Float.parseFloat(boost.image); - // avoid boosting null queries, such as those caused by stop words - if (q != null) { - q = new BoostQueryNode(q, f); - } - } catch (Exception ignored) { - /* Should this be handled somehow? (defaults to "no boost", if - * boost number is invalid) - */ - } - } - if (group) { q = new GroupQueryNode(q);} - {if ("" != null) return q;} +{if ("" != null) return q;} throw new Error("Missing return statement in function"); } - final public QueryNode Term(CharSequence field) throws ParseException {Token term, boost=null, fuzzySlop=null, goop1, goop2; - boolean fuzzy = false; - boolean regexp = false; - boolean startInc=false; - boolean endInc=false; - QueryNode q =null; - FieldQueryNode qLower, qUpper; - float defaultMinSimilarity = org.apache.lucene.search.FuzzyQuery.defaultMaxEdits; + final private CharSequence FieldName() throws ParseException {Token name; + name = jj_consume_token(TERM); +{if ("" != null) return discardEscapeChar(name.image);} + throw new Error("Missing return statement in function"); +} + + final private GroupQueryNode GroupingExpr(CharSequence field) throws ParseException {QueryNode q; + Token boost; + jj_consume_token(LPAREN); + q = Query(field); + jj_consume_token(RPAREN); switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case TERM: - case REGEXPTERM: + case CARAT:{ + q = Boost(q); + break; + } + default: + jj_la1[9] = jj_gen; + ; + } +{if ("" != null) return new GroupQueryNode(q);} + throw new Error("Missing return statement in function"); +} + + final private QueryNode Boost(QueryNode node) throws ParseException {Token boost; + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); +{if ("" != null) return node == null ? node : new BoostQueryNode(node, Float.parseFloat(boost.image));} + throw new Error("Missing return statement in function"); +} + + final private QueryNode FuzzyOp(CharSequence field, Token term, QueryNode node) throws ParseException {Token similarity = null; + jj_consume_token(TILDE); + if (jj_2_3(2)) { + similarity = jj_consume_token(NUMBER); + } else { + ; + } +float fms = org.apache.lucene.search.FuzzyQuery.defaultMaxEdits; + if (similarity != null) { + fms = Float.parseFloat(similarity.image); + if (fms < 0.0f) { + {if (true) throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS));} + } else if (fms >= 1.0f && fms != (int) fms) { + {if (true) throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_EDITS));} + } + } + {if ("" != null) return new FuzzyQueryNode(field, discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn);} + throw new Error("Missing return statement in function"); +} + + final private TermRangeQueryNode FieldRangeExpr(CharSequence field) throws ParseException {Token operator, term; + FieldQueryNode qLower, qUpper; + boolean lowerInclusive, upperInclusive; + field = FieldName(); + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case OP_LESSTHAN:{ + jj_consume_token(OP_LESSTHAN); + break; + } + case OP_LESSTHANEQ:{ + jj_consume_token(OP_LESSTHANEQ); + break; + } + case OP_MORETHAN:{ + jj_consume_token(OP_MORETHAN); + break; + } + case OP_MORETHANEQ:{ + jj_consume_token(OP_MORETHANEQ); + break; + } + default: + jj_la1[10] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } +operator = token; + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case TERM:{ + jj_consume_token(TERM); + break; + } + case QUOTED:{ + jj_consume_token(QUOTED); + break; + } case NUMBER:{ + jj_consume_token(NUMBER); + break; + } + default: + jj_la1[11] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } +term = token; +if (term.kind == QUOTED) { + term.image = term.image.substring(1, term.image.length() - 1); + } + switch (operator.kind) { + case OP_LESSTHAN: + lowerInclusive = true; + upperInclusive = false; + qLower = new FieldQueryNode(field, "*", term.beginColumn, term.endColumn); + qUpper = new FieldQueryNode(field, discardEscapeChar(term.image), term.beginColumn, term.endColumn); + break; + case OP_LESSTHANEQ: + lowerInclusive = true; + upperInclusive = true; + qLower = new FieldQueryNode(field, "*", term.beginColumn, term.endColumn); + qUpper = new FieldQueryNode(field, discardEscapeChar(term.image), term.beginColumn, term.endColumn); + break; + case OP_MORETHAN: + lowerInclusive = false; + upperInclusive = true; + qLower = new FieldQueryNode(field, discardEscapeChar(term.image), term.beginColumn, term.endColumn); + qUpper = new FieldQueryNode(field, "*", term.beginColumn, term.endColumn); + break; + case OP_MORETHANEQ: + lowerInclusive = true; + upperInclusive = true; + qLower = new FieldQueryNode(field, discardEscapeChar(term.image), term.beginColumn, term.endColumn); + qUpper = new FieldQueryNode(field, "*", term.beginColumn, term.endColumn); + break; + default: + {if (true) throw new Error("Unhandled case, operator=" + operator);} + } + {if ("" != null) return new TermRangeQueryNode(qLower, qUpper, lowerInclusive, upperInclusive);} + throw new Error("Missing return statement in function"); +} + + final private QueryNode Term(CharSequence field) throws ParseException {QueryNode q; + Token term, fuzzySlop=null; + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case REGEXPTERM:{ + term = jj_consume_token(REGEXPTERM); +q = new RegexpQueryNode(field, term.image.substring(1, term.image.length() - 1)); + break; + } + case NUMBER: + case TERM:{ switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { case TERM:{ term = jj_consume_token(TERM); -q = new FieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); - break; - } - case REGEXPTERM:{ - term = jj_consume_token(REGEXPTERM); -regexp=true; break; } case NUMBER:{ @@ -525,203 +437,150 @@ regexp=true; jj_consume_token(-1); throw new ParseException(); } +q = new FieldQueryNode(field, discardEscapeChar(term.image), term.beginColumn, term.endColumn); switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case FUZZY_SLOP:{ - fuzzySlop = jj_consume_token(FUZZY_SLOP); -fuzzy=true; + case TILDE:{ + q = FuzzyOp(field, term, q); break; } default: jj_la1[13] = jj_gen; ; } - switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case CARAT:{ - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); - switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case FUZZY_SLOP:{ - fuzzySlop = jj_consume_token(FUZZY_SLOP); -fuzzy=true; - break; - } - default: - jj_la1[14] = jj_gen; - ; - } - break; - } - default: - jj_la1[15] = jj_gen; - ; - } -if (fuzzy) { - float fms = defaultMinSimilarity; - try { - fms = Float.parseFloat(fuzzySlop.image.substring(1)); - } catch (Exception ignored) { } - if(fms < 0.0f){ - {if (true) throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS));} - } else if (fms >= 1.0f && fms != (int) fms) { - {if (true) throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_EDITS));} - } - q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn); - } else if (regexp) { - String re = term.image.substring(1, term.image.length()-1); - q = new RegexpQueryNode(field, re, 0, re.length()); - } break; } case RANGEIN_START: case RANGEEX_START:{ - switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case RANGEIN_START:{ - jj_consume_token(RANGEIN_START); -startInc=true; - break; - } - case RANGEEX_START:{ - jj_consume_token(RANGEEX_START); - break; - } - default: - jj_la1[16] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case RANGE_GOOP:{ - goop1 = jj_consume_token(RANGE_GOOP); - break; - } - case RANGE_QUOTED:{ - goop1 = jj_consume_token(RANGE_QUOTED); - break; - } - case RANGE_TO:{ - goop1 = jj_consume_token(RANGE_TO); - break; - } - default: - jj_la1[17] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - jj_consume_token(RANGE_TO); - switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case RANGE_GOOP:{ - goop2 = jj_consume_token(RANGE_GOOP); - break; - } - case RANGE_QUOTED:{ - goop2 = jj_consume_token(RANGE_QUOTED); - break; - } - case RANGE_TO:{ - goop2 = jj_consume_token(RANGE_TO); - break; - } - default: - jj_la1[18] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case RANGEIN_END:{ - jj_consume_token(RANGEIN_END); -endInc=true; - break; - } - case RANGEEX_END:{ - jj_consume_token(RANGEEX_END); - break; - } - default: - jj_la1[19] = jj_gen; - jj_consume_token(-1); - throw new ParseException(); - } - switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case CARAT:{ - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); - break; - } - default: - jj_la1[20] = jj_gen; - ; - } -if (goop1.kind == RANGE_QUOTED) { - goop1.image = goop1.image.substring(1, goop1.image.length()-1); - } - if (goop2.kind == RANGE_QUOTED) { - goop2.image = goop2.image.substring(1, goop2.image.length()-1); - } - - qLower = new FieldQueryNode(field, - EscapeQuerySyntaxImpl.discardEscapeChar(goop1.image), goop1.beginColumn, goop1.endColumn); - qUpper = new FieldQueryNode(field, - EscapeQuerySyntaxImpl.discardEscapeChar(goop2.image), goop2.beginColumn, goop2.endColumn); - q = new TermRangeQueryNode(qLower, qUpper, startInc ? true : false, endInc ? true : false); + q = TermRangeExpr(field); break; } case QUOTED:{ - term = jj_consume_token(QUOTED); -q = new QuotedFieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image.substring(1, term.image.length()-1)), term.beginColumn + 1, term.endColumn - 1); - switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case FUZZY_SLOP:{ - fuzzySlop = jj_consume_token(FUZZY_SLOP); - break; - } - default: - jj_la1[21] = jj_gen; - ; - } - switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { - case CARAT:{ - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); - break; - } - default: - jj_la1[22] = jj_gen; - ; - } -int phraseSlop = 0; - - if (fuzzySlop != null) { - try { - phraseSlop = (int)Float.parseFloat(fuzzySlop.image.substring(1)); - q = new SlopQueryNode(q, phraseSlop); - } - catch (Exception ignored) { - /* Should this be handled somehow? (defaults to "no PhraseSlop", if - * slop number is invalid) - */ - } - } + q = QuotedTerm(field); break; } default: - jj_la1[23] = jj_gen; + jj_la1[14] = jj_gen; jj_consume_token(-1); throw new ParseException(); } -if (boost != null) { - float f = (float)1.0; - try { - f = Float.parseFloat(boost.image); - // avoid boosting null queries, such as those caused by stop words - if (q != null) { - q = new BoostQueryNode(q, f); - } - } catch (Exception ignored) { - /* Should this be handled somehow? (defaults to "no boost", if - * boost number is invalid) - */ + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case CARAT:{ + q = Boost(q); + break; } + default: + jj_la1[15] = jj_gen; + ; } - {if ("" != null) return q;} +{if ("" != null) return q;} + throw new Error("Missing return statement in function"); +} + + final private QueryNode QuotedTerm(CharSequence field) throws ParseException {QueryNode q; + Token term, slop; + term = jj_consume_token(QUOTED); +String image = term.image.substring(1, term.image.length() - 1); + q = new QuotedFieldQueryNode(field, discardEscapeChar(image), term.beginColumn + 1, term.endColumn - 1); + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case TILDE:{ + jj_consume_token(TILDE); + slop = jj_consume_token(NUMBER); +q = new SlopQueryNode(q, (int) Float.parseFloat(slop.image)); + break; + } + default: + jj_la1[16] = jj_gen; + ; + } +{if ("" != null) return q;} + throw new Error("Missing return statement in function"); +} + + final private TermRangeQueryNode TermRangeExpr(CharSequence field) throws ParseException {Token left, right; + boolean leftInclusive = false; + boolean rightInclusive = false; + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case RANGEIN_START:{ + jj_consume_token(RANGEIN_START); +leftInclusive = true; + break; + } + case RANGEEX_START:{ + jj_consume_token(RANGEEX_START); + break; + } + default: + jj_la1[17] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case RANGE_GOOP:{ + jj_consume_token(RANGE_GOOP); + break; + } + case RANGE_QUOTED:{ + jj_consume_token(RANGE_QUOTED); + break; + } + case RANGE_TO:{ + jj_consume_token(RANGE_TO); + break; + } + default: + jj_la1[18] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } +left = token; + jj_consume_token(RANGE_TO); + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case RANGE_GOOP:{ + jj_consume_token(RANGE_GOOP); + break; + } + case RANGE_QUOTED:{ + jj_consume_token(RANGE_QUOTED); + break; + } + case RANGE_TO:{ + jj_consume_token(RANGE_TO); + break; + } + default: + jj_la1[19] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } +right = token; + switch ((jj_ntk==-1)?jj_ntk_f():jj_ntk) { + case RANGEIN_END:{ + jj_consume_token(RANGEIN_END); +rightInclusive = true; + break; + } + case RANGEEX_END:{ + jj_consume_token(RANGEEX_END); + break; + } + default: + jj_la1[20] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } +if (left.kind == RANGE_QUOTED) { + left.image = left.image.substring(1, left.image.length() - 1); + } + if (right.kind == RANGE_QUOTED) { + right.image = right.image.substring(1, right.image.length() - 1); + } + + FieldQueryNode qLower = new FieldQueryNode(field, + discardEscapeChar(left.image), left.beginColumn, left.endColumn); + FieldQueryNode qUpper = new FieldQueryNode(field, + discardEscapeChar(right.image), right.beginColumn, right.endColumn); + + {if ("" != null) return new TermRangeQueryNode(qLower, qUpper, leftInclusive, rightInclusive);} throw new Error("Missing return statement in function"); } @@ -749,192 +608,21 @@ if (boost != null) { finally { jj_save(2, xla); } } - private boolean jj_3R_22() - { - if (jj_scan_token(OR)) return true; - return false; - } - - private boolean jj_3R_13() - { - if (jj_3R_21()) return true; - Token xsp; - while (true) { - xsp = jj_scanpos; - if (jj_3R_22()) { jj_scanpos = xsp; break; } - } - return false; - } - - private boolean jj_3R_17() - { - if (jj_scan_token(RANGEIN_START)) return true; - return false; - } - - private boolean jj_3R_23() - { - if (jj_3R_25()) return true; - if (jj_3R_26()) return true; - return false; - } - - private boolean jj_3R_11() - { - Token xsp; - xsp = jj_scanpos; - if (jj_3R_17()) { - jj_scanpos = xsp; - if (jj_scan_token(27)) return true; - } - xsp = jj_scanpos; - if (jj_scan_token(33)) { - jj_scanpos = xsp; - if (jj_scan_token(32)) { - jj_scanpos = xsp; - if (jj_scan_token(29)) return true; - } - } - if (jj_scan_token(RANGE_TO)) return true; - return false; - } - - private boolean jj_3R_30() - { - if (jj_scan_token(NOT)) return true; - return false; - } - - private boolean jj_3R_29() - { - if (jj_scan_token(MINUS)) return true; - return false; - } - - private boolean jj_3R_24() - { - if (jj_scan_token(AND)) return true; - return false; - } - - private boolean jj_3R_7() - { - if (jj_scan_token(LPAREN)) return true; - if (jj_3R_9()) return true; - if (jj_scan_token(RPAREN)) return true; - return false; - } - - private boolean jj_3R_28() - { - if (jj_scan_token(PLUS)) return true; - return false; - } - - private boolean jj_3R_27() - { - Token xsp; - xsp = jj_scanpos; - if (jj_3R_28()) { - jj_scanpos = xsp; - if (jj_3R_29()) { - jj_scanpos = xsp; - if (jj_3R_30()) return true; - } - } - return false; - } - - private boolean jj_3R_21() - { - if (jj_3R_23()) return true; - Token xsp; - while (true) { - xsp = jj_scanpos; - if (jj_3R_24()) { jj_scanpos = xsp; break; } - } - return false; - } - - private boolean jj_3R_19() - { - if (jj_scan_token(CARAT)) return true; - if (jj_scan_token(NUMBER)) return true; - return false; - } - - private boolean jj_3R_6() - { - if (jj_3R_8()) return true; - return false; - } - - private boolean jj_3R_18() - { - if (jj_scan_token(FUZZY_SLOP)) return true; - return false; - } - - private boolean jj_3R_25() - { - Token xsp; - xsp = jj_scanpos; - if (jj_3R_27()) jj_scanpos = xsp; - return false; - } - - private boolean jj_3R_16() - { - if (jj_scan_token(REGEXPTERM)) return true; - return false; - } - - private boolean jj_3_1() - { - if (jj_scan_token(TERM)) return true; - Token xsp; - xsp = jj_scanpos; - if (jj_scan_token(15)) { - jj_scanpos = xsp; - if (jj_scan_token(16)) return true; - } - return false; - } - - private boolean jj_3R_20() - { - if (jj_scan_token(CARAT)) return true; - if (jj_scan_token(NUMBER)) return true; - return false; - } - - private boolean jj_3_3() - { - Token xsp; - xsp = jj_scanpos; - if (jj_3_1()) jj_scanpos = xsp; - xsp = jj_scanpos; - if (jj_3R_6()) { - jj_scanpos = xsp; - if (jj_3R_7()) return true; - } - return false; - } - - private boolean jj_3R_15() + private boolean jj_3R_4() { if (jj_scan_token(TERM)) return true; return false; } - private boolean jj_3R_14() + private boolean jj_3_2() { - if (jj_3R_13()) return true; + if (jj_3R_5()) return true; return false; } private boolean jj_3R_5() { + if (jj_3R_4()) return true; Token xsp; xsp = jj_scanpos; if (jj_scan_token(17)) { @@ -947,103 +635,24 @@ if (boost != null) { } } } - xsp = jj_scanpos; - if (jj_scan_token(23)) { - jj_scanpos = xsp; - if (jj_scan_token(22)) { - jj_scanpos = xsp; - if (jj_scan_token(28)) return true; - } - } return false; } - private boolean jj_3R_4() + private boolean jj_3_3() { + if (jj_scan_token(NUMBER)) return true; + return false; + } + + private boolean jj_3_1() + { + if (jj_3R_4()) return true; Token xsp; xsp = jj_scanpos; if (jj_scan_token(15)) { jj_scanpos = xsp; if (jj_scan_token(16)) return true; } - if (jj_3R_8()) return true; - return false; - } - - private boolean jj_3R_10() - { - Token xsp; - xsp = jj_scanpos; - if (jj_3R_15()) { - jj_scanpos = xsp; - if (jj_3R_16()) { - jj_scanpos = xsp; - if (jj_scan_token(28)) return true; - } - } - xsp = jj_scanpos; - if (jj_3R_18()) jj_scanpos = xsp; - xsp = jj_scanpos; - if (jj_3R_19()) jj_scanpos = xsp; - return false; - } - - private boolean jj_3R_12() - { - if (jj_scan_token(QUOTED)) return true; - Token xsp; - xsp = jj_scanpos; - if (jj_scan_token(24)) jj_scanpos = xsp; - xsp = jj_scanpos; - if (jj_3R_20()) jj_scanpos = xsp; - return false; - } - - private boolean jj_3R_9() - { - if (jj_3R_13()) return true; - Token xsp; - while (true) { - xsp = jj_scanpos; - if (jj_3R_14()) { jj_scanpos = xsp; break; } - } - return false; - } - - private boolean jj_3_2() - { - if (jj_scan_token(TERM)) return true; - Token xsp; - xsp = jj_scanpos; - if (jj_3R_4()) { - jj_scanpos = xsp; - if (jj_3R_5()) return true; - } - return false; - } - - private boolean jj_3R_8() - { - Token xsp; - xsp = jj_scanpos; - if (jj_3R_10()) { - jj_scanpos = xsp; - if (jj_3R_11()) { - jj_scanpos = xsp; - if (jj_3R_12()) return true; - } - } - return false; - } - - private boolean jj_3R_26() - { - Token xsp; - xsp = jj_scanpos; - if (jj_3_2()) { - jj_scanpos = xsp; - if (jj_3_3()) return true; - } return false; } @@ -1057,7 +666,7 @@ if (boost != null) { private Token jj_scanpos, jj_lastpos; private int jj_la; private int jj_gen; - final private int[] jj_la1 = new int[24]; + final private int[] jj_la1 = new int[21]; static private int[] jj_la1_0; static private int[] jj_la1_1; static { @@ -1065,10 +674,10 @@ if (boost != null) { jj_la1_init_1(); } private static void jj_la1_init_0() { - jj_la1_0 = new int[] {0x1c00,0x1c00,0x1ec03c00,0x200,0x100,0x18000,0x1e0000,0x10c00000,0x1f8000,0x18000,0x200000,0x1ec02000,0x12800000,0x1000000,0x1000000,0x200000,0xc000000,0x20000000,0x20000000,0xc0000000,0x200000,0x1000000,0x200000,0x1ec00000,}; + jj_la1_0 = new int[] {0x1f803c00,0x200,0x100,0x1400,0x1c00,0x1c00,0x18000,0x1f802000,0x1f802000,0x200000,0x1e0000,0x3800000,0x3000000,0x400000,0x1f800000,0x200000,0x400000,0x18000000,0x20000000,0x20000000,0xc0000000,}; } private static void jj_la1_init_1() { - jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3,0x3,0x0,0x0,0x0,0x0,0x0,}; + jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x3,0x3,0x0,}; } final private JJCalls[] jj_2_rtns = new JJCalls[3]; private boolean jj_rescan = false; @@ -1080,7 +689,7 @@ if (boost != null) { token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 24; i++) jj_la1[i] = -1; + for (int i = 0; i < 21; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } @@ -1090,7 +699,7 @@ if (boost != null) { token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 24; i++) jj_la1[i] = -1; + for (int i = 0; i < 21; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } @@ -1100,7 +709,7 @@ if (boost != null) { token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 24; i++) jj_la1[i] = -1; + for (int i = 0; i < 21; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } @@ -1110,7 +719,7 @@ if (boost != null) { token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 24; i++) jj_la1[i] = -1; + for (int i = 0; i < 21; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } @@ -1241,7 +850,7 @@ if (boost != null) { la1tokens[jj_kind] = true; jj_kind = -1; } - for (int i = 0; i < 24; i++) { + for (int i = 0; i < 21; i++) { if (jj_la1[i] == jj_gen) { for (int j = 0; j < 32; j++) { if ((jj_la1_0[i] & (1< TOKEN : { - <#_NUM_CHAR: ["0"-"9"] > -// every character that follows a backslash is considered as an escaped character -| <#_ESCAPED_CHAR: "\\" ~[] > -| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^", - "<", ">", "=", "[", "]", "\"", "{", "}", "~", "\\", "/" ] - | <_ESCAPED_CHAR> ) > -| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) > -| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") > -| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) > + <#_NUM_CHAR: ["0"-"9"] > + // Every character that follows a backslash is considered as an escaped character + | <#_ESCAPED_CHAR: "\\" ~[] > + | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^", + "<", ">", "=", "[", "]", "\"", "{", "}", "~", "\\", "/" ] + | <_ESCAPED_CHAR> ) > + | <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) > + | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") > + | <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) > } SKIP : { - < <_WHITESPACE>> + < <_WHITESPACE> > } TOKEN : { - -| -| -| -| -| -| -| -| -| " > -| =" > -| : Boost -| )* "\""> -| (<_TERM_CHAR>)* > -| )+ ( "." (<_NUM_CHAR>)+ )? )? > -| -| : Range -| : Range -} - - TOKEN : { -)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT + + | + | + | + | + | + | + | + | + | " > + | =" > + | + | + | )* "\""> + | )+ ( "." (<_NUM_CHAR>)+ )? > + | (<_TERM_CHAR>)* > + | + | : Range + | : Range } TOKEN : { - -| : DEFAULT -| : DEFAULT -| -| + + | : DEFAULT + | : DEFAULT + | + | } -ModifierQueryNode.Modifier Modifiers() : { - ModifierQueryNode.Modifier ret = ModifierQueryNode.Modifier.MOD_NONE; -} -{ - [ - { ret = ModifierQueryNode.Modifier.MOD_REQ; } - | { ret = ModifierQueryNode.Modifier.MOD_NOT; } - | { ret = ModifierQueryNode.Modifier.MOD_NOT; } - ] - { return ret; } -} -// This makes sure that there is no garbage after the query string -QueryNode TopLevelQuery(CharSequence field) : + +// Non-terminal production rules. + +/** + * The top-level rule ensures that there is no garbage after the query string. + * + *
{@code
+ * TopLevelQuery ::= Query 
+ * }
+ */ +public QueryNode TopLevelQuery(CharSequence field) : { QueryNode q; } { - q=Query(field) - { - return q; + q = Query(field) { + return q; } } -// These changes were made to introduce operator precedence: -// - Clause() now returns a QueryNode. -// - The modifiers are consumed by Clause() and returned as part of the QueryNode Object -// - Query does not consume conjunctions (AND, OR) anymore. -// - This is now done by two new non-terminals: ConjClause and DisjClause -// The parse tree looks similar to this: -// Query ::= DisjQuery ( DisjQuery )* -// DisjQuery ::= ConjQuery ( OR ConjQuery )* -// ConjQuery ::= Clause ( AND Clause )* -// Clause ::= [ Modifier ] ... - - -QueryNode Query(CharSequence field) : -{ - Vector clauses = null; - QueryNode c, first=null; +/** + * A query consists of one or more disjunction queries (solves operator precedence). + *
{@code
+ * Query ::= DisjQuery ( DisjQuery )*
+ * DisjQuery ::= ConjQuery ( OR ConjQuery )*
+ * ConjQuery ::= ModClause ( AND ModClause )*
+ * }
+ */ +private QueryNode Query(CharSequence field) : { + ArrayList clauses = new ArrayList(); + QueryNode node; } { - first=DisjQuery(field) - ( - c=DisjQuery(field) - { - if (clauses == null) { - clauses = new Vector(); - clauses.addElement(first); - } - clauses.addElement(c); - } - )* - { - if (clauses != null) { - return new BooleanQueryNode(clauses); - } else { - // Handle the case of a "pure" negation query which - // needs to be wrapped as a boolean query, otherwise - // the returned result drops the negation. - if (first instanceof ModifierQueryNode) { - ModifierQueryNode m = (ModifierQueryNode) first; - if (m.getModifier() == ModifierQueryNode.Modifier.MOD_NOT) { - return new BooleanQueryNode(Arrays.asList(m)); - } - } - return first; + ( node = DisjQuery(field) { clauses.add(node); } )+ + { + // Handle the case of a "pure" negation query which + // needs to be wrapped as a boolean query, otherwise + // the returned result drops the negation. + if (clauses.size() == 1) { + QueryNode first = clauses.get(0); + if (first instanceof ModifierQueryNode + && ((ModifierQueryNode) first).getModifier() == ModifierQueryNode.Modifier.MOD_NOT) { + clauses.set(0, new BooleanQueryNode(Collections.singletonList(first))); } } -} -QueryNode DisjQuery(CharSequence field) : { - QueryNode first, c; - Vector clauses = null; -} -{ - first = ConjQuery(field) - ( - c=ConjQuery(field) - { - if (clauses == null) { - clauses = new Vector(); - clauses.addElement(first); - } - clauses.addElement(c); - } - )* - { - if (clauses != null) { - return new OrQueryNode(clauses); - } else { - return first; - } + return clauses.size() == 1 ? clauses.get(0) : new BooleanQueryNode(clauses); } } -QueryNode ConjQuery(CharSequence field) : { - QueryNode first, c; - Vector clauses = null; +/** + * A disjoint clause consists of one or more conjunction clauses. + *
{@code
+ * DisjQuery ::= ConjQuery ( OR ConjQuery )*
+ * }
+ */ +private QueryNode DisjQuery(CharSequence field) : { + ArrayList clauses = new ArrayList(); + QueryNode node; } { - first = ModClause(field) - ( - c=ModClause(field) - { - if (clauses == null) { - clauses = new Vector(); - clauses.addElement(first); - } - clauses.addElement(c); - } - )* + node = ConjQuery(field) { clauses.add(node); } + ( node = ConjQuery(field) { clauses.add(node); } )* { - if (clauses != null) { - return new AndQueryNode(clauses); - } else { - return first; - } + return clauses.size() == 1 ? clauses.get(0) : new OrQueryNode(clauses); } } -// QueryNode Query(CharSequence field) : -// { -// List clauses = new ArrayList(); -// List modifiers = new ArrayList(); -// QueryNode q, firstQuery=null; -// ModifierQueryNode.Modifier mods; -// int conj; -// } -// { -// mods=Modifiers() q=Clause(field) -// { -// if (mods == ModifierQueryNode.Modifier.MOD_NONE) firstQuery=q; -// -// // do not create modifier nodes with MOD_NONE -// if (mods != ModifierQueryNode.Modifier.MOD_NONE) { -// q = new ModifierQueryNode(q, mods); -// } -// clauses.add(q); -// } -// ( -// conj=Conjunction() mods=Modifiers() q=Clause(field) -// { -// // do not create modifier nodes with MOD_NONE -// if (mods != ModifierQueryNode.Modifier.MOD_NONE) { -// q = new ModifierQueryNode(q, mods); -// } -// clauses.add(q); -// //TODO: figure out what to do with AND and ORs -// } -// )* -// { -// if (clauses.size() == 1 && firstQuery != null) -// return firstQuery; -// else { -// return new BooleanQueryNode(clauses); -// } -// } -// } - -QueryNode ModClause(CharSequence field) : { - QueryNode q; - ModifierQueryNode.Modifier mods; +/** + * A conjunction clause consists of one or more modifier-clause pairs. + *
{@code
+ * ConjQuery ::= ModClause ( AND ModClause )*
+ * }
+ */ +private QueryNode ConjQuery(CharSequence field) : { + ArrayList clauses = new ArrayList(); + QueryNode node; } { - mods=Modifiers() q= Clause(field) { - if (mods != ModifierQueryNode.Modifier.MOD_NONE) { - q = new ModifierQueryNode(q, mods); - } - return q; - } + node = ModClause(field) { clauses.add(node); } + ( node = ModClause(field) { clauses.add(node); } )* + { + return clauses.size() == 1 ? clauses.get(0) : new AndQueryNode(clauses); + } } -QueryNode Clause(CharSequence field) : { +/** + * A modifier-atomic clause pair. + *
{@code
+ * ModClause ::= (Modifier)? Clause
+ * }
+ */ +private QueryNode ModClause(CharSequence field) : { QueryNode q; - Token fieldToken=null, boost=null, operator=null, term=null; + ModifierQueryNode.Modifier modifier = ModifierQueryNode.Modifier.MOD_NONE; +} +{ + ( { modifier = ModifierQueryNode.Modifier.MOD_REQ; } + | ( | ) { modifier = ModifierQueryNode.Modifier.MOD_NOT; } + )? + q = Clause(field) + { + if (modifier != ModifierQueryNode.Modifier.MOD_NONE) { + q = new ModifierQueryNode(q, modifier); + } + return q; + } +} + +/** + * An atomic clause consists of a field range expression, a potentially + * field-qualified term or a group. + * + *
{@code
+ * Clause ::= FieldRangeExpr
+ *          | (FieldName (':' | '='))? (Term | GroupingExpr)
+ * }
+ */ +private QueryNode Clause(CharSequence field) : { + QueryNode q; +} +{ + ( + LOOKAHEAD(2) q = FieldRangeExpr(field) + | (LOOKAHEAD(2) field = FieldName() ( | ))? ( q = Term(field) | q = GroupingExpr(field)) + ) + { + return q; + } +} + +/** + * A field name. This utility method strips escape characters from field names. + */ +private CharSequence FieldName() : { + Token name; +} +{ + name = { return discardEscapeChar(name.image); } +} + +/** + * An grouping expression is a Query with potential boost applied to it. + * + *
{@code
+ * GroupingExpr ::= '(' Query ')' ('^' )?
+ * }
+ */ +private GroupQueryNode GroupingExpr(CharSequence field) : { + QueryNode q; + Token boost; +} +{ + q = Query(field) (q = Boost(q))? + { + return new GroupQueryNode(q); + } +} + +/** + * Score boost modifier. + * + *
{@code
+ * Boost ::= '^' 
+ * }
+ */ +private QueryNode Boost(QueryNode node) : { + Token boost; +} +{ + boost = + { + return node == null ? node : new BoostQueryNode(node, Float.parseFloat(boost.image)); + } +} + +/** + * Fuzzy term modifier. + * + *
{@code
+ * Fuzzy ::= '~' ?
+ * }
+ */ +private QueryNode FuzzyOp(CharSequence field, Token term, QueryNode node) : { + Token similarity = null; +} +{ + (LOOKAHEAD(2) similarity = )? + { + float fms = org.apache.lucene.search.FuzzyQuery.defaultMaxEdits; + if (similarity != null) { + fms = Float.parseFloat(similarity.image); + if (fms < 0.0f) { + throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS)); + } else if (fms >= 1.0f && fms != (int) fms) { + throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_EDITS)); + } + } + return new FuzzyQueryNode(field, discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn); + } +} + +/** + * A field range expression selects all field values larger/ smaller (or equal) than a given one. + *
{@code
+ * FieldRangeExpr ::= FieldName ('<' | '>' | '<=' | '>=') ( |  | )
+ * }
+ */ +private TermRangeQueryNode FieldRangeExpr(CharSequence field) : { + Token operator, term; FieldQueryNode qLower, qUpper; boolean lowerInclusive, upperInclusive; - - boolean group = false; } { -( - LOOKAHEAD(3) fieldToken= ( - ( | ) {field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image);} q=Term(field) - | ( operator= | operator= | operator= | operator= ) {field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image);}( term= | term= | term= ) - { - if (term.kind == QUOTED) { - term.image = term.image.substring(1, term.image.length()-1); - } - switch (operator.kind) { - case OP_LESSTHAN: - lowerInclusive = true; - upperInclusive = false; - - qLower = new FieldQueryNode(field, - "*", term.beginColumn, term.endColumn); - qUpper = new FieldQueryNode(field, - EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); - - break; - case OP_LESSTHANEQ: - lowerInclusive = true; - upperInclusive = true; - - qLower = new FieldQueryNode(field, - "*", term.beginColumn, term.endColumn); - qUpper = new FieldQueryNode(field, - EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); - break; - case OP_MORETHAN: - lowerInclusive = false; - upperInclusive = true; - - qLower = new FieldQueryNode(field, - EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); - qUpper = new FieldQueryNode(field, - "*", term.beginColumn, term.endColumn); - break; - case OP_MORETHANEQ: - lowerInclusive = true; - upperInclusive = true; - - qLower = new FieldQueryNode(field, - EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); - qUpper = new FieldQueryNode(field, - "*", term.beginColumn, term.endColumn); - break; - default: - throw new Error("Unhandled case: operator="+operator.toString()); - } - q = new TermRangeQueryNode(qLower, qUpper, lowerInclusive, upperInclusive); - } - ) - | LOOKAHEAD(3) [ - LOOKAHEAD(3) - fieldToken= - ( | ) {field=EscapeQuerySyntaxImpl.discardEscapeChar(fieldToken.image);} - ] - ( - (q=Term(field)) - | ( q=Query(field) ( boost=)? {group=true;}) - ) -) - { - if (boost != null) { - float f = (float)1.0; - try { - f = Float.parseFloat(boost.image); - // avoid boosting null queries, such as those caused by stop words - if (q != null) { - q = new BoostQueryNode(q, f); - } - } catch (Exception ignored) { - /* Should this be handled somehow? (defaults to "no boost", if - * boost number is invalid) - */ - } - } - if (group) { q = new GroupQueryNode(q);} - return q; - } -} - - -QueryNode Term(CharSequence field) : { - Token term, boost=null, fuzzySlop=null, goop1, goop2; - boolean fuzzy = false; - boolean regexp = false; - boolean startInc=false; - boolean endInc=false; - QueryNode q =null; - FieldQueryNode qLower, qUpper; - float defaultMinSimilarity = org.apache.lucene.search.FuzzyQuery.defaultMaxEdits; -} -{ - ( - ( - term= { q = new FieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), term.beginColumn, term.endColumn); } - | term= { regexp=true; } - | term= - ) - [ fuzzySlop= { fuzzy=true; } ] - [ boost= [ fuzzySlop= { fuzzy=true; } ] ] - { - if (fuzzy) { - float fms = defaultMinSimilarity; - try { - fms = Float.parseFloat(fuzzySlop.image.substring(1)); - } catch (Exception ignored) { } - if(fms < 0.0f){ - throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_LIMITS)); - } else if (fms >= 1.0f && fms != (int) fms) { - throw new ParseException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX_FUZZY_EDITS)); - } - q = new FuzzyQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image), fms, term.beginColumn, term.endColumn); - } else if (regexp) { - String re = term.image.substring(1, term.image.length()-1); - q = new RegexpQueryNode(field, re, 0, re.length()); - } - } - | ( ( {startInc=true;} | ) - ( goop1=|goop1=|goop1= ) - ( ) - ( goop2=|goop2=|goop2= ) - ( {endInc=true;} | )) - [ boost= ] - { - if (goop1.kind == RANGE_QUOTED) { - goop1.image = goop1.image.substring(1, goop1.image.length()-1); - } - if (goop2.kind == RANGE_QUOTED) { - goop2.image = goop2.image.substring(1, goop2.image.length()-1); - } - - qLower = new FieldQueryNode(field, - EscapeQuerySyntaxImpl.discardEscapeChar(goop1.image), goop1.beginColumn, goop1.endColumn); - qUpper = new FieldQueryNode(field, - EscapeQuerySyntaxImpl.discardEscapeChar(goop2.image), goop2.beginColumn, goop2.endColumn); - q = new TermRangeQueryNode(qLower, qUpper, startInc ? true : false, endInc ? true : false); - } - | term= {q = new QuotedFieldQueryNode(field, EscapeQuerySyntaxImpl.discardEscapeChar(term.image.substring(1, term.image.length()-1)), term.beginColumn + 1, term.endColumn - 1);} - [ fuzzySlop= ] - [ boost= ] - { - int phraseSlop = 0; - - if (fuzzySlop != null) { - try { - phraseSlop = (int)Float.parseFloat(fuzzySlop.image.substring(1)); - q = new SlopQueryNode(q, phraseSlop); - } - catch (Exception ignored) { - /* Should this be handled somehow? (defaults to "no PhraseSlop", if - * slop number is invalid) - */ - } - } - - } - ) + field = FieldName() + ( | | | ) { operator = token; } + ( | | ) { term = token; } { - if (boost != null) { - float f = (float)1.0; - try { - f = Float.parseFloat(boost.image); - // avoid boosting null queries, such as those caused by stop words - if (q != null) { - q = new BoostQueryNode(q, f); - } - } catch (Exception ignored) { - /* Should this be handled somehow? (defaults to "no boost", if - * boost number is invalid) - */ - } + if (term.kind == QUOTED) { + term.image = term.image.substring(1, term.image.length() - 1); } - return q; + switch (operator.kind) { + case OP_LESSTHAN: + lowerInclusive = true; + upperInclusive = false; + qLower = new FieldQueryNode(field, "*", term.beginColumn, term.endColumn); + qUpper = new FieldQueryNode(field, discardEscapeChar(term.image), term.beginColumn, term.endColumn); + break; + case OP_LESSTHANEQ: + lowerInclusive = true; + upperInclusive = true; + qLower = new FieldQueryNode(field, "*", term.beginColumn, term.endColumn); + qUpper = new FieldQueryNode(field, discardEscapeChar(term.image), term.beginColumn, term.endColumn); + break; + case OP_MORETHAN: + lowerInclusive = false; + upperInclusive = true; + qLower = new FieldQueryNode(field, discardEscapeChar(term.image), term.beginColumn, term.endColumn); + qUpper = new FieldQueryNode(field, "*", term.beginColumn, term.endColumn); + break; + case OP_MORETHANEQ: + lowerInclusive = true; + upperInclusive = true; + qLower = new FieldQueryNode(field, discardEscapeChar(term.image), term.beginColumn, term.endColumn); + qUpper = new FieldQueryNode(field, "*", term.beginColumn, term.endColumn); + break; + default: + throw new Error("Unhandled case, operator=" + operator); + } + return new TermRangeQueryNode(qLower, qUpper, lowerInclusive, upperInclusive); + } +} + +/** + * A term expression. + * + *
{@code
+ * Term ::= ( | ) ('~' )? ('^' )?
+ *        |  ('^' )?
+ *        | TermRangeExpr ('^' )?
+ *        | QuotedTerm ('^' )?
+ * }
+ */ +private QueryNode Term(CharSequence field) : { + QueryNode q; + Token term, fuzzySlop=null; +} +{ + ( + term = + { q = new RegexpQueryNode(field, term.image.substring(1, term.image.length() - 1)); } + | (term = | term = ) + { q = new FieldQueryNode(field, discardEscapeChar(term.image), term.beginColumn, term.endColumn); } + ( q = FuzzyOp(field, term, q) )? + | q = TermRangeExpr(field) + | q = QuotedTerm(field) + ) + ( q = Boost(q) )? + { + return q; + } +} + + +/** + * A quoted term (phrase). + * + *
{@code
+ * QuotedTerm ::=  ('~' )?
+ * }
+ */ +private QueryNode QuotedTerm(CharSequence field) : { + QueryNode q; + Token term, slop; +} +{ + term = + { + String image = term.image.substring(1, term.image.length() - 1); + q = new QuotedFieldQueryNode(field, discardEscapeChar(image), term.beginColumn + 1, term.endColumn - 1); + } + ( slop = { q = new SlopQueryNode(q, (int) Float.parseFloat(slop.image)); } )? + { + return q; + } +} + +/** + * A value range expression. + * + *
{@code
+ * TermRangeExpr ::= ('[' | '{')  'TO'  (']' | '}')
+ * }
+ */ +private TermRangeQueryNode TermRangeExpr(CharSequence field) : { + Token left, right; + boolean leftInclusive = false; + boolean rightInclusive = false; +} +{ + // RANGE_TO can be consumed as range start/end because this needs to be accepted as a valid range: + // [TO TO TO] + ( + ( { leftInclusive = true; } | ) + ( | | ) { left = token; } + + ( | | ) { right = token; } + ( { rightInclusive = true; } | ) + ) + + { + if (left.kind == RANGE_QUOTED) { + left.image = left.image.substring(1, left.image.length() - 1); + } + if (right.kind == RANGE_QUOTED) { + right.image = right.image.substring(1, right.image.length() - 1); + } + + FieldQueryNode qLower = new FieldQueryNode(field, + discardEscapeChar(left.image), left.beginColumn, left.endColumn); + FieldQueryNode qUpper = new FieldQueryNode(field, + discardEscapeChar(right.image), right.beginColumn, right.endColumn); + + return new TermRangeQueryNode(qLower, qUpper, leftInclusive, rightInclusive); } } diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserConstants.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserConstants.java index 27b29853235..1e07b01834b 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserConstants.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserConstants.java @@ -51,19 +51,19 @@ public interface StandardSyntaxParserConstants { /** RegularExpression Id. */ int CARAT = 21; /** RegularExpression Id. */ - int QUOTED = 22; + int TILDE = 22; /** RegularExpression Id. */ - int TERM = 23; + int QUOTED = 23; /** RegularExpression Id. */ - int FUZZY_SLOP = 24; + int NUMBER = 24; /** RegularExpression Id. */ - int REGEXPTERM = 25; + int TERM = 25; /** RegularExpression Id. */ - int RANGEIN_START = 26; + int REGEXPTERM = 26; /** RegularExpression Id. */ - int RANGEEX_START = 27; + int RANGEIN_START = 27; /** RegularExpression Id. */ - int NUMBER = 28; + int RANGEEX_START = 28; /** RegularExpression Id. */ int RANGE_TO = 29; /** RegularExpression Id. */ @@ -76,11 +76,9 @@ public interface StandardSyntaxParserConstants { int RANGE_GOOP = 33; /** Lexical state. */ - int Boost = 0; + int Range = 0; /** Lexical state. */ - int Range = 1; - /** Lexical state. */ - int DEFAULT = 2; + int DEFAULT = 1; /** Literal token values. */ String[] tokenImage = { @@ -106,13 +104,13 @@ public interface StandardSyntaxParserConstants { "\">\"", "\">=\"", "\"^\"", + "\"~\"", "", + "", "", - "", "", "\"[\"", "\"{\"", - "", "\"TO\"", "\"]\"", "\"}\"", diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java index 2860fae683c..b0ef7fdbb63 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java @@ -40,6 +40,8 @@ package org.apache.lucene.queryparser.flexible.standard.parser; + + @@ -50,15 +52,15 @@ public class StandardSyntaxParserTokenManager implements StandardSyntaxParserCon // (debugStream omitted). /** Set debug output. */ // (setDebugStream omitted). -private final int jjStopStringLiteralDfa_2(int pos, long active0){ +private final int jjStopStringLiteralDfa_1(int pos, long active0){ switch (pos) { default : return -1; } } -private final int jjStartNfa_2(int pos, long active0){ - return jjMoveNfa_2(jjStopStringLiteralDfa_2(pos, active0), pos + 1); +private final int jjStartNfa_1(int pos, long active0){ + return jjMoveNfa_1(jjStopStringLiteralDfa_1(pos, active0), pos + 1); } private int jjStopAtPos(int pos, int kind) { @@ -66,7 +68,7 @@ private int jjStopAtPos(int pos, int kind) jjmatchedPos = pos; return pos + 1; } -private int jjMoveStringLiteralDfa0_2(){ +private int jjMoveStringLiteralDfa0_1(){ switch(curChar) { case 40: @@ -81,26 +83,28 @@ private int jjMoveStringLiteralDfa0_2(){ return jjStopAtPos(0, 15); case 60: jjmatchedKind = 17; - return jjMoveStringLiteralDfa1_2(0x40000L); + return jjMoveStringLiteralDfa1_1(0x40000L); case 61: return jjStopAtPos(0, 16); case 62: jjmatchedKind = 19; - return jjMoveStringLiteralDfa1_2(0x100000L); + return jjMoveStringLiteralDfa1_1(0x100000L); case 91: - return jjStopAtPos(0, 26); + return jjStopAtPos(0, 27); case 94: return jjStopAtPos(0, 21); case 123: - return jjStopAtPos(0, 27); + return jjStopAtPos(0, 28); + case 126: + return jjStopAtPos(0, 22); default : - return jjMoveNfa_2(0, 0); + return jjMoveNfa_1(0, 0); } } -private int jjMoveStringLiteralDfa1_2(long active0){ +private int jjMoveStringLiteralDfa1_1(long active0){ try { curChar = input_stream.readChar(); } catch(java.io.IOException e) { - jjStopStringLiteralDfa_2(0, active0); + jjStopStringLiteralDfa_1(0, active0); return 1; } switch(curChar) @@ -114,7 +118,7 @@ private int jjMoveStringLiteralDfa1_2(long active0){ default : break; } - return jjStartNfa_2(0, active0); + return jjStartNfa_1(0, active0); } static final long[] jjbitVec0 = { 0x1L, 0x0L, 0x0L, 0x0L @@ -128,10 +132,10 @@ static final long[] jjbitVec3 = { static final long[] jjbitVec4 = { 0xfffefffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL }; -private int jjMoveNfa_2(int startState, int curPos) +private int jjMoveNfa_1(int startState, int curPos) { int startsAt = 0; - jjnewStateCnt = 33; + jjnewStateCnt = 32; int i = 1; jjstateSet[0] = startState; int kind = 0x7fffffff; @@ -149,9 +153,9 @@ private int jjMoveNfa_2(int startState, int curPos) case 0: if ((0x8bff54f8ffffd9ffL & l) != 0L) { - if (kind > 23) - kind = 23; - { jjCheckNAddTwoStates(20, 21); } + if (kind > 25) + kind = 25; + { jjCheckNAddTwoStates(23, 24); } } else if ((0x100002600L & l) != 0L) { @@ -167,7 +171,13 @@ private int jjMoveNfa_2(int startState, int curPos) if (kind > 10) kind = 10; } - if (curChar == 38) + if ((0x3ff000000000000L & l) != 0L) + { + if (kind > 24) + kind = 24; + { jjCheckNAddTwoStates(19, 20); } + } + else if (curChar == 38) jjstateSet[jjnewStateCnt++] = 4; break; case 4: @@ -194,58 +204,58 @@ private int jjMoveNfa_2(int startState, int curPos) { jjCheckNAddStates(3, 5); } break; case 18: - if (curChar == 34 && kind > 22) - kind = 22; + if (curChar == 34 && kind > 23) + kind = 23; break; case 19: - if ((0x8bff54f8ffffd9ffL & l) == 0L) + if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 23) - kind = 23; - { jjCheckNAddTwoStates(20, 21); } + if (kind > 24) + kind = 24; + { jjCheckNAddTwoStates(19, 20); } break; case 20: - if ((0x8bff7cf8ffffd9ffL & l) == 0L) + if (curChar == 46) + { jjCheckNAdd(21); } + break; + case 21: + if ((0x3ff000000000000L & l) == 0L) break; - if (kind > 23) - kind = 23; - { jjCheckNAddTwoStates(20, 21); } + if (kind > 24) + kind = 24; + { jjCheckNAdd(21); } break; case 22: - if (kind > 23) - kind = 23; - { jjCheckNAddTwoStates(20, 21); } + if ((0x8bff54f8ffffd9ffL & l) == 0L) + break; + if (kind > 25) + kind = 25; + { jjCheckNAddTwoStates(23, 24); } + break; + case 23: + if ((0x8bff7cf8ffffd9ffL & l) == 0L) + break; + if (kind > 25) + kind = 25; + { jjCheckNAddTwoStates(23, 24); } break; case 25: - if ((0x3ff000000000000L & l) == 0L) - break; - if (kind > 24) - kind = 24; - { jjAddStates(6, 7); } - break; - case 26: - if (curChar == 46) - { jjCheckNAdd(27); } + if (kind > 25) + kind = 25; + { jjCheckNAddTwoStates(23, 24); } break; case 27: - if ((0x3ff000000000000L & l) == 0L) - break; - if (kind > 24) - kind = 24; - { jjCheckNAdd(27); } - break; - case 28: - case 30: + case 29: if (curChar == 47) { jjCheckNAddStates(0, 2); } break; - case 29: + case 28: if ((0xffff7fffffffffffL & l) != 0L) { jjCheckNAddStates(0, 2); } break; - case 32: - if (curChar == 47 && kind > 25) - kind = 25; + case 31: + if (curChar == 47 && kind > 26) + kind = 26; break; default : break; } @@ -261,18 +271,12 @@ private int jjMoveNfa_2(int startState, int curPos) case 0: if ((0x97ffffff87ffffffL & l) != 0L) { - if (kind > 23) - kind = 23; - { jjCheckNAddTwoStates(20, 21); } - } - else if (curChar == 126) - { - if (kind > 24) - kind = 24; - jjstateSet[jjnewStateCnt++] = 25; + if (kind > 25) + kind = 25; + { jjCheckNAddTwoStates(23, 24); } } else if (curChar == 92) - { jjCheckNAdd(22); } + { jjCheckNAdd(25); } if (curChar == 78) jjstateSet[jjnewStateCnt++] = 11; else if (curChar == 124) @@ -333,40 +337,33 @@ private int jjMoveNfa_2(int startState, int curPos) case 17: { jjCheckNAddStates(3, 5); } break; - case 19: - case 20: + case 22: + case 23: if ((0x97ffffff87ffffffL & l) == 0L) break; - if (kind > 23) - kind = 23; - { jjCheckNAddTwoStates(20, 21); } - break; - case 21: - if (curChar == 92) - { jjCheckNAddTwoStates(22, 22); } - break; - case 22: - if (kind > 23) - kind = 23; - { jjCheckNAddTwoStates(20, 21); } - break; - case 23: - if (curChar == 92) - { jjCheckNAdd(22); } + if (kind > 25) + kind = 25; + { jjCheckNAddTwoStates(23, 24); } break; case 24: - if (curChar != 126) - break; - if (kind > 24) - kind = 24; - jjstateSet[jjnewStateCnt++] = 25; + if (curChar == 92) + { jjCheckNAddTwoStates(25, 25); } break; - case 29: + case 25: + if (kind > 25) + kind = 25; + { jjCheckNAddTwoStates(23, 24); } + break; + case 26: + if (curChar == 92) + { jjCheckNAdd(25); } + break; + case 28: { jjAddStates(0, 2); } break; - case 31: + case 30: if (curChar == 92) - jjstateSet[jjnewStateCnt++] = 30; + jjstateSet[jjnewStateCnt++] = 29; break; default : break; } @@ -391,9 +388,9 @@ private int jjMoveNfa_2(int startState, int curPos) } if (jjCanMove_2(hiByte, i1, i2, l1, l2)) { - if (kind > 23) - kind = 23; - { jjCheckNAddTwoStates(20, 21); } + if (kind > 25) + kind = 25; + { jjCheckNAddTwoStates(23, 24); } } break; case 15: @@ -401,22 +398,22 @@ private int jjMoveNfa_2(int startState, int curPos) if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { jjCheckNAddStates(3, 5); } break; - case 19: - case 20: + case 22: + case 23: if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) break; - if (kind > 23) - kind = 23; - { jjCheckNAddTwoStates(20, 21); } + if (kind > 25) + kind = 25; + { jjCheckNAddTwoStates(23, 24); } break; - case 22: + case 25: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) break; - if (kind > 23) - kind = 23; - { jjCheckNAddTwoStates(20, 21); } + if (kind > 25) + kind = 25; + { jjCheckNAddTwoStates(23, 24); } break; - case 29: + case 28: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { jjAddStates(0, 2); } break; @@ -431,96 +428,13 @@ private int jjMoveNfa_2(int startState, int curPos) kind = 0x7fffffff; } ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 33 - (jjnewStateCnt = startsAt))) + if ((i = jjnewStateCnt) == (startsAt = 32 - (jjnewStateCnt = startsAt))) return curPos; try { curChar = input_stream.readChar(); } catch(java.io.IOException e) { return curPos; } } } -private int jjMoveStringLiteralDfa0_0() -{ - return jjMoveNfa_0(0, 0); -} -private int jjMoveNfa_0(int startState, int curPos) -{ - int startsAt = 0; - jjnewStateCnt = 3; - int i = 1; - jjstateSet[0] = startState; - int kind = 0x7fffffff; - for (;;) - { - if (++jjround == 0x7fffffff) - ReInitRounds(); - if (curChar < 64) - { - long l = 1L << curChar; - do - { - switch(jjstateSet[--i]) - { - case 0: - if ((0x3ff000000000000L & l) == 0L) - break; - if (kind > 28) - kind = 28; - { jjAddStates(8, 9); } - break; - case 1: - if (curChar == 46) - { jjCheckNAdd(2); } - break; - case 2: - if ((0x3ff000000000000L & l) == 0L) - break; - if (kind > 28) - kind = 28; - { jjCheckNAdd(2); } - break; - default : break; - } - } while(i != startsAt); - } - else if (curChar < 128) - { - long l = 1L << (curChar & 077); - do - { - switch(jjstateSet[--i]) - { - default : break; - } - } while(i != startsAt); - } - else - { - int hiByte = (curChar >> 8); - int i1 = hiByte >> 6; - long l1 = 1L << (hiByte & 077); - int i2 = (curChar & 0xff) >> 6; - long l2 = 1L << (curChar & 077); - do - { - switch(jjstateSet[--i]) - { - default : if (i1 == 0 || l1 == 0 || i2 == 0 || l2 == 0) break; else break; - } - } while(i != startsAt); - } - if (kind != 0x7fffffff) - { - jjmatchedKind = kind; - jjmatchedPos = curPos; - kind = 0x7fffffff; - } - ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 3 - (jjnewStateCnt = startsAt))) - return curPos; - try { curChar = input_stream.readChar(); } - catch(java.io.IOException e) { return curPos; } - } -} -private final int jjStopStringLiteralDfa_1(int pos, long active0){ +private final int jjStopStringLiteralDfa_0(int pos, long active0){ switch (pos) { case 0: @@ -534,48 +448,48 @@ private final int jjStopStringLiteralDfa_1(int pos, long active0){ return -1; } } -private final int jjStartNfa_1(int pos, long active0){ - return jjMoveNfa_1(jjStopStringLiteralDfa_1(pos, active0), pos + 1); +private final int jjStartNfa_0(int pos, long active0){ + return jjMoveNfa_0(jjStopStringLiteralDfa_0(pos, active0), pos + 1); } -private int jjMoveStringLiteralDfa0_1(){ +private int jjMoveStringLiteralDfa0_0(){ switch(curChar) { case 84: - return jjMoveStringLiteralDfa1_1(0x20000000L); + return jjMoveStringLiteralDfa1_0(0x20000000L); case 93: return jjStopAtPos(0, 30); case 125: return jjStopAtPos(0, 31); default : - return jjMoveNfa_1(0, 0); + return jjMoveNfa_0(0, 0); } } -private int jjMoveStringLiteralDfa1_1(long active0){ +private int jjMoveStringLiteralDfa1_0(long active0){ try { curChar = input_stream.readChar(); } catch(java.io.IOException e) { - jjStopStringLiteralDfa_1(0, active0); + jjStopStringLiteralDfa_0(0, active0); return 1; } switch(curChar) { case 79: if ((active0 & 0x20000000L) != 0L) - return jjStartNfaWithStates_1(1, 29, 6); + return jjStartNfaWithStates_0(1, 29, 6); break; default : break; } - return jjStartNfa_1(0, active0); + return jjStartNfa_0(0, active0); } -private int jjStartNfaWithStates_1(int pos, int kind, int state) +private int jjStartNfaWithStates_0(int pos, int kind, int state) { jjmatchedKind = kind; jjmatchedPos = pos; try { curChar = input_stream.readChar(); } catch(java.io.IOException e) { return pos + 1; } - return jjMoveNfa_1(state, pos + 1); + return jjMoveNfa_0(state, pos + 1); } -private int jjMoveNfa_1(int startState, int curPos) +private int jjMoveNfa_0(int startState, int curPos) { int startsAt = 0; jjnewStateCnt = 7; @@ -614,11 +528,11 @@ private int jjMoveNfa_1(int startState, int curPos) break; case 2: if ((0xfffffffbffffffffL & l) != 0L) - { jjCheckNAddStates(10, 12); } + { jjCheckNAddStates(6, 8); } break; case 3: if (curChar == 34) - { jjCheckNAddStates(10, 12); } + { jjCheckNAddStates(6, 8); } break; case 5: if (curChar == 34 && kind > 32) @@ -651,7 +565,7 @@ private int jjMoveNfa_1(int startState, int curPos) { jjCheckNAdd(6); } break; case 2: - { jjAddStates(10, 12); } + { jjAddStates(6, 8); } break; case 4: if (curChar == 92) @@ -687,7 +601,7 @@ private int jjMoveNfa_1(int startState, int curPos) break; case 2: if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - { jjAddStates(10, 12); } + { jjAddStates(6, 8); } break; case 6: if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) @@ -717,8 +631,8 @@ private int jjMoveNfa_1(int startState, int curPos) /** Token literal values. */ public static final String[] jjstrLiteralImages = { "", null, null, null, null, null, null, null, null, null, null, "\53", "\55", -"\50", "\51", "\72", "\75", "\74", "\74\75", "\76", "\76\75", "\136", null, null, -null, null, "\133", "\173", null, "\124\117", "\135", "\175", null, null, }; +"\50", "\51", "\72", "\75", "\74", "\74\75", "\76", "\76\75", "\136", "\176", null, +null, null, null, "\133", "\173", "\124\117", "\135", "\175", null, null, }; protected Token jjFillToken() { final Token t; @@ -745,7 +659,7 @@ protected Token jjFillToken() return t; } static final int[] jjnextStates = { - 29, 31, 32, 15, 16, 18, 25, 26, 0, 1, 2, 4, 5, + 28, 30, 31, 15, 16, 18, 2, 4, 5, }; private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) { @@ -784,8 +698,8 @@ private static final boolean jjCanMove_2(int hiByte, int i1, int i2, long l1, lo } } -int curLexState = 2; -int defaultLexState = 2; +int curLexState = 1; +int defaultLexState = 1; int jjnewStateCnt; int jjround; int jjmatchedPos; @@ -824,11 +738,6 @@ public Token getNextToken() jjmatchedPos = 0; curPos = jjMoveStringLiteralDfa0_1(); break; - case 2: - jjmatchedKind = 0x7fffffff; - jjmatchedPos = 0; - curPos = jjMoveStringLiteralDfa0_2(); - break; } if (jjmatchedKind != 0x7fffffff) { @@ -954,7 +863,7 @@ private void jjCheckNAddStates(int start, int end) { int i; jjround = 0x80000001; - for (i = 33; i-- > 0;) + for (i = 32; i-- > 0;) jjrounds[i] = 0x80000000; } @@ -969,7 +878,7 @@ private void jjCheckNAddStates(int start, int end) /** Switch to specified lex state. */ public void SwitchTo(int lexState) { - if (lexState >= 3 || lexState < 0) + if (lexState >= 2 || lexState < 0) throw new TokenMgrError("Error: Ignoring invalid lexical state : " + lexState + ". State unchanged.", TokenMgrError.INVALID_LEXICAL_STATE); else curLexState = lexState; @@ -978,15 +887,14 @@ private void jjCheckNAddStates(int start, int end) /** Lexer state names. */ public static final String[] lexStateNames = { - "Boost", "Range", "DEFAULT", }; /** Lex State array. */ public static final int[] jjnewLexState = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, - -1, 1, 1, 2, -1, 2, 2, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 0, 0, -1, 1, 1, -1, -1, }; static final long[] jjtoToken = { 0x3ffffff01L, @@ -1002,8 +910,8 @@ static final long[] jjtoMore = { }; protected CharStream input_stream; - private final int[] jjrounds = new int[33]; - private final int[] jjstateSet = new int[2 * 33]; + private final int[] jjrounds = new int[32]; + private final int[] jjstateSet = new int[2 * 32]; private final StringBuilder jjimage = new StringBuilder(); private StringBuilder image = jjimage; private int jjimageLen; diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java index 9ed7a356678..f7b9bbc4818 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/precedence/TestPrecedenceQueryParser.java @@ -301,7 +301,6 @@ public class TestPrecedenceQueryParser extends LuceneTestCase { assertQueryEquals("term~", null, "term~2"); assertQueryEquals("term~0.7", null, "term~1"); assertQueryEquals("term~^3", null, "(term~2)^3.0"); - assertQueryEquals("term^3~", null, "(term~2)^3.0"); assertQueryEquals("term*germ", null, "term*germ"); assertQueryEquals("term*germ^3", null, "(term*germ)^3.0"); diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java index e0fd4baf3bc..bfef03b1910 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/flexible/standard/TestQPHelper.java @@ -567,8 +567,6 @@ public class TestQPHelper extends LuceneTestCase { assertQueryEquals("term~0.7", null, "term~1"); assertQueryEquals("term~^3", null, "(term~2)^3.0"); - - assertQueryEquals("term^3~", null, "(term~2)^3.0"); assertQueryEquals("term*germ", null, "term*germ"); assertQueryEquals("term*germ^3", null, "(term*germ)^3.0"); @@ -585,6 +583,7 @@ public class TestQPHelper extends LuceneTestCase { assertEquals(FuzzyQuery.defaultPrefixLength, fq.getPrefixLength()); assertQueryNodeException("term~1.1"); // value > 1, throws exception + assertQueryNodeException("term^3~"); // Boost must be applied to FuzzyOp. assertTrue(getQuery("term*germ", null) instanceof WildcardQuery); @@ -1173,6 +1172,9 @@ public class TestQPHelper extends LuceneTestCase { re = new RegexpQuery(new Term("field", "http~0.5")); assertEquals(re, qp.parse("field:/http~0.5/", df)); assertEquals(re, qp.parse("/http~0.5/", df)); + + // fuzzy op doesn't apply to regexps. + assertQueryNodeException("/http/~2"); re = new RegexpQuery(new Term("field", "boo")); assertEquals(re, qp.parse("field:/boo/", df)); diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java index b4451a6cd52..4b4e061ea21 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java @@ -459,7 +459,6 @@ public abstract class QueryParserTestBase extends LuceneTestCase { assertQueryEquals("term~1", null, "term~1"); assertQueryEquals("term~0.7", null, "term~1"); assertQueryEquals("term~^3", null, "(term~2)^3.0"); - assertQueryEquals("term^3~", null, "(term~2)^3.0"); assertQueryEquals("term*germ", null, "term*germ"); assertQueryEquals("term*germ^3", null, "(term*germ)^3.0");