mirror of https://github.com/apache/lucene.git
LUCENE-5336: add SimpleQueryParser for human-entered queries
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1541151 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
85a8991beb
commit
3c9e753df6
|
@ -61,6 +61,13 @@ Optimizations
|
|||
on Windows if NIOFSDirectory is used, mmapped files are still locked.
|
||||
(Michael Poindexter, Robert Muir, Uwe Schindler)
|
||||
|
||||
======================= Lucene 4.7.0 =======================
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-5336: Add SimpleQueryParser: parser for human-entered queries.
|
||||
(Jack Conradson via Robert Muir)
|
||||
|
||||
======================= Lucene 4.6.0 =======================
|
||||
|
||||
New Features
|
||||
|
|
|
@ -0,0 +1,517 @@
|
|||
package org.apache.lucene.queryparser.simple;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.QueryBuilder;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* SimpleQueryParser is used to parse human readable query syntax.
|
||||
* <p>
|
||||
* The main idea behind this parser is that a person should be able to type
|
||||
* whatever they want to represent a query, and this parser will do its best
|
||||
* to interpret what to search for no matter how poorly composed the request
|
||||
* may be. Tokens are considered to be any of a term, phrase, or subquery for the
|
||||
* operations described below. Whitespace including ' ' '\n' '\r' and '\t'
|
||||
* and certain operators may be used to delimit tokens ( ) + | " .
|
||||
* <p>
|
||||
* Any errors in query syntax will be ignored and the parser will attempt
|
||||
* to decipher what it can; however, this may mean odd or unexpected results.
|
||||
* <h4>Query Operators</h4>
|
||||
* <ul>
|
||||
* <li>'{@code +}' specifies {@code AND} operation: <tt>token1+token2</tt>
|
||||
* <li>'{@code |}' specifies {@code OR} operation: <tt>token1|token2</tt>
|
||||
* <li>'{@code -}' negates a single token: <tt>-token0</tt>
|
||||
* <li>'{@code "}' creates phrases of terms: <tt>"term1 term2 ..."</tt>
|
||||
* <li>'{@code *}' at the end of terms specifies prefix query: <tt>term*</tt>
|
||||
* <li>'{@code (}' and '{@code )}' specifies precedence: <tt>token1 + (token2 | token3)</tt>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The {@link #setDefaultOperator default operator} is {@code OR} if no other operator is specified.
|
||||
* For example, the following will {@code OR} {@code token1} and {@code token2} together:
|
||||
* <tt>token1 token2</tt>
|
||||
* <p>
|
||||
* Normal operator precedence will be simple order from right to left.
|
||||
* For example, the following will evaluate {@code token1 OR token2} first,
|
||||
* then {@code AND} with {@code token3}:
|
||||
* <blockquote>token1 | token2 + token3</blockquote>
|
||||
* <h4>Escaping</h4>
|
||||
* <p>
|
||||
* An individual term may contain any possible character with certain characters
|
||||
* requiring escaping using a '{@code \}'. The following characters will need to be escaped in
|
||||
* terms and phrases:
|
||||
* {@code + | " ( ) ' \}
|
||||
* <p>
|
||||
* The '{@code -}' operator is a special case. On individual terms (not phrases) the first
|
||||
* character of a term that is {@code -} must be escaped; however, any '{@code -}' characters
|
||||
* beyond the first character do not need to be escaped.
|
||||
* For example:
|
||||
* <ul>
|
||||
* <li>{@code -term1} -- Specifies {@code NOT} operation against {@code term1}
|
||||
* <li>{@code \-term1} -- Searches for the term {@code -term1}.
|
||||
* <li>{@code term-1} -- Searches for the term {@code term-1}.
|
||||
* <li>{@code term\-1} -- Searches for the term {@code term-1}.
|
||||
* </ul>
|
||||
* <p>
|
||||
* The '{@code *}' operator is a special case. On individual terms (not phrases) the last
|
||||
* character of a term that is '{@code *}' must be escaped; however, any '{@code *}' characters
|
||||
* before the last character do not need to be escaped:
|
||||
* <ul>
|
||||
* <li>{@code term1*} -- Searches for the prefix {@code term1}
|
||||
* <li>{@code term1\*} -- Searches for the term {@code term1*}
|
||||
* <li>{@code term*1} -- Searches for the term {@code term*1}
|
||||
* <li>{@code term\*1} -- Searches for the term {@code term*1}
|
||||
* </ul>
|
||||
* <p>
|
||||
* Note that above examples consider the terms before text processing.
|
||||
*/
|
||||
public class SimpleQueryParser extends QueryBuilder {
|
||||
/** Map of fields to query against with their weights */
|
||||
protected final Map<String,Float> weights;
|
||||
/** flags to the parser (to turn features on/off) */
|
||||
protected final int flags;
|
||||
|
||||
/** Enables {@code AND} operator (+) */
|
||||
public static final int AND_OPERATOR = 1<<0;
|
||||
/** Enables {@code NOT} operator (-) */
|
||||
public static final int NOT_OPERATOR = 1<<1;
|
||||
/** Enables {@code OR} operator (|) */
|
||||
public static final int OR_OPERATOR = 1<<2;
|
||||
/** Enables {@code PREFIX} operator (*) */
|
||||
public static final int PREFIX_OPERATOR = 1<<3;
|
||||
/** Enables {@code PHRASE} operator (") */
|
||||
public static final int PHRASE_OPERATOR = 1<<4;
|
||||
/** Enables {@code PRECEDENCE} operators: {@code (} and {@code )} */
|
||||
public static final int PRECEDENCE_OPERATORS = 1<<5;
|
||||
/** Enables {@code ESCAPE} operator (\) */
|
||||
public static final int ESCAPE_OPERATOR = 1<<6;
|
||||
/** Enables {@code WHITESPACE} operators: ' ' '\n' '\r' '\t' */
|
||||
public static final int WHITESPACE_OPERATOR = 1<<7;
|
||||
|
||||
private BooleanClause.Occur defaultOperator = BooleanClause.Occur.SHOULD;
|
||||
|
||||
/** Creates a new parser searching over a single field. */
|
||||
public SimpleQueryParser(Analyzer analyzer, String field) {
|
||||
this(analyzer, Collections.singletonMap(field, 1.0F));
|
||||
}
|
||||
|
||||
/** Creates a new parser searching over multiple fields with different weights. */
|
||||
public SimpleQueryParser(Analyzer analyzer, Map<String, Float> weights) {
|
||||
this(analyzer, weights, -1);
|
||||
}
|
||||
|
||||
/** Creates a new parser with custom flags used to enable/disable certain features. */
|
||||
public SimpleQueryParser(Analyzer analyzer, Map<String, Float> weights, int flags) {
|
||||
super(analyzer);
|
||||
this.weights = weights;
|
||||
this.flags = flags;
|
||||
}
|
||||
|
||||
/** Parses the query text and returns parsed query (or null if empty) */
|
||||
public Query parse(String queryText) {
|
||||
char data[] = queryText.toCharArray();
|
||||
char buffer[] = new char[data.length];
|
||||
|
||||
State state = new State(data, buffer, 0, data.length);
|
||||
parseSubQuery(state);
|
||||
return state.top;
|
||||
}
|
||||
|
||||
private void parseSubQuery(State state) {
|
||||
while (state.index < state.length) {
|
||||
if (state.data[state.index] == '(' && (flags & PRECEDENCE_OPERATORS) != 0) {
|
||||
// the beginning of a subquery has been found
|
||||
consumeSubQuery(state);
|
||||
} else if (state.data[state.index] == ')' && (flags & PRECEDENCE_OPERATORS) != 0) {
|
||||
// this is an extraneous character so it is ignored
|
||||
++state.index;
|
||||
} else if (state.data[state.index] == '"' && (flags & PHRASE_OPERATOR) != 0) {
|
||||
// the beginning of a phrase has been found
|
||||
consumePhrase(state);
|
||||
} else if (state.data[state.index] == '+' && (flags & AND_OPERATOR) != 0) {
|
||||
// an and operation has been explicitly set
|
||||
// if an operation has already been set this one is ignored
|
||||
// if a term (or phrase or subquery) has not been found yet the
|
||||
// operation is also ignored since there is no previous
|
||||
// term (or phrase or subquery) to and with
|
||||
if (state.currentOperation == null && state.top != null) {
|
||||
state.currentOperation = BooleanClause.Occur.MUST;
|
||||
}
|
||||
|
||||
++state.index;
|
||||
} else if (state.data[state.index] == '|' && (flags & OR_OPERATOR) != 0) {
|
||||
// an or operation has been explicitly set
|
||||
// if an operation has already been set this one is ignored
|
||||
// if a term (or phrase or subquery) has not been found yet the
|
||||
// operation is also ignored since there is no previous
|
||||
// term (or phrase or subquery) to or with
|
||||
if (state.currentOperation == null && state.top != null) {
|
||||
state.currentOperation = BooleanClause.Occur.SHOULD;
|
||||
}
|
||||
|
||||
++state.index;
|
||||
} else if (state.data[state.index] == '-' && (flags & NOT_OPERATOR) != 0) {
|
||||
// a not operator has been found, so increase the not count
|
||||
// two not operators in a row negate each other
|
||||
++state.not;
|
||||
++state.index;
|
||||
|
||||
// continue so the not operator is not reset
|
||||
// before the next character is determined
|
||||
continue;
|
||||
} else if ((state.data[state.index] == ' '
|
||||
|| state.data[state.index] == '\t'
|
||||
|| state.data[state.index] == '\n'
|
||||
|| state.data[state.index] == '\r') && (flags & WHITESPACE_OPERATOR) != 0) {
|
||||
// ignore any whitespace found as it may have already been
|
||||
// used a delimiter across a term (or phrase or subquery)
|
||||
// or is simply extraneous
|
||||
++state.index;
|
||||
} else {
|
||||
// the beginning of a token has been found
|
||||
consumeToken(state);
|
||||
}
|
||||
|
||||
// reset the not operator as even whitespace is not allowed when
|
||||
// specifying the not operation for a term (or phrase or subquery)
|
||||
state.not = 0;
|
||||
}
|
||||
}
|
||||
|
||||
private void consumeSubQuery(State state) {
|
||||
assert (flags & PRECEDENCE_OPERATORS) != 0;
|
||||
int start = ++state.index;
|
||||
int precedence = 1;
|
||||
boolean escaped = false;
|
||||
|
||||
while (state.index < state.length) {
|
||||
if (!escaped) {
|
||||
if (state.data[state.index] == '\\' && (flags & ESCAPE_OPERATOR) != 0) {
|
||||
// an escape character has been found so
|
||||
// whatever character is next will become
|
||||
// part of the subquery unless the escape
|
||||
// character is the last one in the data
|
||||
escaped = true;
|
||||
++state.index;
|
||||
|
||||
continue;
|
||||
} else if (state.data[state.index] == '(') {
|
||||
// increase the precedence as there is a
|
||||
// subquery in the current subquery
|
||||
++precedence;
|
||||
} else if (state.data[state.index] == ')') {
|
||||
--precedence;
|
||||
|
||||
if (precedence == 0) {
|
||||
// this should be the end of the subquery
|
||||
// all characters found will used for
|
||||
// creating the subquery
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
escaped = false;
|
||||
++state.index;
|
||||
}
|
||||
|
||||
if (state.index == state.length) {
|
||||
// a closing parenthesis was never found so the opening
|
||||
// parenthesis is considered extraneous and will be ignored
|
||||
state.index = start;
|
||||
} else if (state.index == start) {
|
||||
// a closing parenthesis was found immediately after the opening
|
||||
// parenthesis so the current operation is reset since it would
|
||||
// have been applied to this subquery
|
||||
state.currentOperation = null;
|
||||
|
||||
++state.index;
|
||||
} else {
|
||||
// a complete subquery has been found and is recursively parsed by
|
||||
// starting over with a new state object
|
||||
State subState = new State(state.data, state.buffer, start, state.index);
|
||||
parseSubQuery(subState);
|
||||
buildQueryTree(state, subState.top);
|
||||
|
||||
++state.index;
|
||||
}
|
||||
}
|
||||
|
||||
private void consumePhrase(State state) {
|
||||
assert (flags & PHRASE_OPERATOR) != 0;
|
||||
int start = ++state.index;
|
||||
int copied = 0;
|
||||
boolean escaped = false;
|
||||
|
||||
while (state.index < state.length) {
|
||||
if (!escaped) {
|
||||
if (state.data[state.index] == '\\' && (flags & ESCAPE_OPERATOR) != 0) {
|
||||
// an escape character has been found so
|
||||
// whatever character is next will become
|
||||
// part of the phrase unless the escape
|
||||
// character is the last one in the data
|
||||
escaped = true;
|
||||
++state.index;
|
||||
|
||||
continue;
|
||||
} else if (state.data[state.index] == '"') {
|
||||
// this should be the end of the phrase
|
||||
// all characters found will used for
|
||||
// creating the phrase query
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
escaped = false;
|
||||
state.buffer[copied++] = state.data[state.index++];
|
||||
}
|
||||
|
||||
if (state.index == state.length) {
|
||||
// a closing double quote was never found so the opening
|
||||
// double quote is considered extraneous and will be ignored
|
||||
state.index = start;
|
||||
} else if (state.index == start) {
|
||||
// a closing double quote was found immediately after the opening
|
||||
// double quote so the current operation is reset since it would
|
||||
// have been applied to this phrase
|
||||
state.currentOperation = null;
|
||||
|
||||
++state.index;
|
||||
} else {
|
||||
// a complete phrase has been found and is parsed through
|
||||
// through the analyzer from the given field
|
||||
String phrase = new String(state.buffer, 0, copied);
|
||||
Query branch = newPhraseQuery(phrase);
|
||||
buildQueryTree(state, branch);
|
||||
|
||||
++state.index;
|
||||
}
|
||||
}
|
||||
|
||||
private void consumeToken(State state) {
|
||||
int copied = 0;
|
||||
boolean escaped = false;
|
||||
boolean prefix = false;
|
||||
|
||||
while (state.index < state.length) {
|
||||
if (!escaped) {
|
||||
if (state.data[state.index] == '\\' && (flags & ESCAPE_OPERATOR) != 0) {
|
||||
// an escape character has been found so
|
||||
// whatever character is next will become
|
||||
// part of the term unless the escape
|
||||
// character is the last one in the data
|
||||
escaped = true;
|
||||
prefix = false;
|
||||
++state.index;
|
||||
|
||||
continue;
|
||||
} else if ((state.data[state.index] == '"' && (flags & PHRASE_OPERATOR) != 0)
|
||||
|| (state.data[state.index] == '|' && (flags & OR_OPERATOR) != 0)
|
||||
|| (state.data[state.index] == '+' && (flags & AND_OPERATOR) != 0)
|
||||
|| (state.data[state.index] == '(' && (flags & PRECEDENCE_OPERATORS) != 0)
|
||||
|| (state.data[state.index] == ')' && (flags & PRECEDENCE_OPERATORS) != 0)
|
||||
|| ((state.data[state.index] == ' '
|
||||
|| state.data[state.index] == '\t'
|
||||
|| state.data[state.index] == '\n'
|
||||
|| state.data[state.index] == '\r') && (flags & WHITESPACE_OPERATOR) != 0)) {
|
||||
// this should be the end of the term
|
||||
// all characters found will used for
|
||||
// creating the term query
|
||||
break;
|
||||
}
|
||||
|
||||
// wildcard tracks whether or not the last character
|
||||
// was a '*' operator that hasn't been escaped
|
||||
// there must be at least one valid character before
|
||||
// searching for a prefixed set of terms
|
||||
prefix = copied > 0 && state.data[state.index] == '*' && (flags & PREFIX_OPERATOR) != 0;
|
||||
}
|
||||
|
||||
escaped = false;
|
||||
state.buffer[copied++] = state.data[state.index++];
|
||||
}
|
||||
|
||||
if (copied > 0) {
|
||||
final Query branch;
|
||||
|
||||
if (prefix) {
|
||||
// if a term is found with a closing '*' it is considered to be a prefix query
|
||||
// and will have prefix added as an option
|
||||
String token = new String(state.buffer, 0, copied - 1);
|
||||
branch = newPrefixQuery(token);
|
||||
} else {
|
||||
// a standard term has been found so it will be run through
|
||||
// the entire analysis chain from the specified schema field
|
||||
String token = new String(state.buffer, 0, copied);
|
||||
branch = newDefaultQuery(token);
|
||||
}
|
||||
|
||||
buildQueryTree(state, branch);
|
||||
}
|
||||
}
|
||||
|
||||
// buildQueryTree should be called after a term, phrase, or subquery
|
||||
// is consumed to be added to our existing query tree
|
||||
// this method will only add to the existing tree if the branch contained in state is not null
|
||||
private void buildQueryTree(State state, Query branch) {
|
||||
if (branch != null) {
|
||||
// modify our branch to a BooleanQuery wrapper for not
|
||||
// this is necessary any time a term, phrase, or subquery is negated
|
||||
if (state.not % 2 == 1) {
|
||||
BooleanQuery nq = new BooleanQuery();
|
||||
nq.add(branch, BooleanClause.Occur.MUST_NOT);
|
||||
nq.add(new MatchAllDocsQuery(), BooleanClause.Occur.SHOULD);
|
||||
branch = nq;
|
||||
}
|
||||
|
||||
// first term (or phrase or subquery) found and will begin our query tree
|
||||
if (state.top == null) {
|
||||
state.top = branch;
|
||||
} else {
|
||||
// more than one term (or phrase or subquery) found
|
||||
// set currentOperation to the default if no other operation is explicitly set
|
||||
if (state.currentOperation == null) {
|
||||
state.currentOperation = defaultOperator;
|
||||
}
|
||||
|
||||
// operational change requiring a new parent node
|
||||
// this occurs if the previous operation is not the same as current operation
|
||||
// because the previous operation must be evaluated separately to preserve
|
||||
// the proper precedence and the current operation will take over as the top of the tree
|
||||
if (state.previousOperation != state.currentOperation) {
|
||||
BooleanQuery bq = new BooleanQuery();
|
||||
bq.add(state.top, state.currentOperation);
|
||||
state.top = bq;
|
||||
}
|
||||
|
||||
// reset all of the state for reuse
|
||||
((BooleanQuery)state.top).add(branch, state.currentOperation);
|
||||
state.previousOperation = state.currentOperation;
|
||||
}
|
||||
|
||||
// reset the current operation as it was intended to be applied to
|
||||
// the incoming term (or phrase or subquery) even if branch was null
|
||||
// due to other possible errors
|
||||
state.currentOperation = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method to generate a standard query (no phrase or prefix operators).
|
||||
*/
|
||||
protected Query newDefaultQuery(String text) {
|
||||
BooleanQuery bq = new BooleanQuery(true);
|
||||
for (Map.Entry<String,Float> entry : weights.entrySet()) {
|
||||
Query q = createBooleanQuery(entry.getKey(), text, defaultOperator);
|
||||
if (q != null) {
|
||||
q.setBoost(entry.getValue());
|
||||
bq.add(q, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
return simplify(bq);
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method to generate a phrase query.
|
||||
*/
|
||||
protected Query newPhraseQuery(String text) {
|
||||
BooleanQuery bq = new BooleanQuery(true);
|
||||
for (Map.Entry<String,Float> entry : weights.entrySet()) {
|
||||
Query q = createPhraseQuery(entry.getKey(), text);
|
||||
if (q != null) {
|
||||
q.setBoost(entry.getValue());
|
||||
bq.add(q, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
return simplify(bq);
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method to generate a prefix query.
|
||||
*/
|
||||
protected Query newPrefixQuery(String text) {
|
||||
BooleanQuery bq = new BooleanQuery(true);
|
||||
for (Map.Entry<String,Float> entry : weights.entrySet()) {
|
||||
PrefixQuery prefix = new PrefixQuery(new Term(entry.getKey(), text));
|
||||
prefix.setBoost(entry.getValue());
|
||||
bq.add(prefix, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
return simplify(bq);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper to simplify boolean queries with 0 or 1 clause
|
||||
*/
|
||||
protected Query simplify(BooleanQuery bq) {
|
||||
if (bq.clauses().isEmpty()) {
|
||||
return null;
|
||||
} else if (bq.clauses().size() == 1) {
|
||||
return bq.clauses().get(0).getQuery();
|
||||
} else {
|
||||
return bq;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the implicit operator setting, which will be
|
||||
* either {@code SHOULD} or {@code MUST}.
|
||||
*/
|
||||
public BooleanClause.Occur getDefaultOperator() {
|
||||
return defaultOperator;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the implicit operator setting, which must be
|
||||
* either {@code SHOULD} or {@code MUST}.
|
||||
*/
|
||||
public void setDefaultOperator(BooleanClause.Occur operator) {
|
||||
if (operator != BooleanClause.Occur.SHOULD && operator != BooleanClause.Occur.MUST) {
|
||||
throw new IllegalArgumentException("invalid operator: only SHOULD or MUST are allowed");
|
||||
}
|
||||
this.defaultOperator = operator;
|
||||
}
|
||||
|
||||
static class State {
|
||||
final char[] data; // the characters in the query string
|
||||
final char[] buffer; // a temporary buffer used to reduce necessary allocations
|
||||
int index;
|
||||
int length;
|
||||
|
||||
BooleanClause.Occur currentOperation;
|
||||
BooleanClause.Occur previousOperation;
|
||||
int not;
|
||||
|
||||
Query top;
|
||||
|
||||
State(char[] data, char[] buffer, int index, int length) {
|
||||
this.data = data;
|
||||
this.buffer = buffer;
|
||||
this.index = index;
|
||||
this.length = length;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<body>
|
||||
A simple query parser for human-entered queries.
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,558 @@
|
|||
package org.apache.lucene.queryparser.simple;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.AND_OPERATOR;
|
||||
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.ESCAPE_OPERATOR;
|
||||
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.NOT_OPERATOR;
|
||||
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.OR_OPERATOR;
|
||||
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.PHRASE_OPERATOR;
|
||||
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.PRECEDENCE_OPERATORS;
|
||||
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.PREFIX_OPERATOR;
|
||||
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.WHITESPACE_OPERATOR;
|
||||
|
||||
/** Tests for {@link SimpleQueryParser} */
|
||||
public class TestSimpleQueryParser extends LuceneTestCase {
|
||||
|
||||
/**
|
||||
* helper to parse a query with whitespace+lowercase analyzer across "field",
|
||||
* with default operator of MUST
|
||||
*/
|
||||
private Query parse(String text) {
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
SimpleQueryParser parser = new SimpleQueryParser(analyzer, "field");
|
||||
parser.setDefaultOperator(Occur.MUST);
|
||||
return parser.parse(text);
|
||||
}
|
||||
|
||||
/** test a simple term */
|
||||
public void testTerm() throws Exception {
|
||||
Query expected = new TermQuery(new Term("field", "foobar"));
|
||||
|
||||
assertEquals(expected, parse("foobar"));
|
||||
}
|
||||
|
||||
/** test a simple phrase */
|
||||
public void testPhrase() throws Exception {
|
||||
PhraseQuery expected = new PhraseQuery();
|
||||
expected.add(new Term("field", "foo"));
|
||||
expected.add(new Term("field", "bar"));
|
||||
|
||||
assertEquals(expected, parse("\"foo bar\""));
|
||||
}
|
||||
|
||||
/** test a simple prefix */
|
||||
public void testPrefix() throws Exception {
|
||||
PrefixQuery expected = new PrefixQuery(new Term("field", "foobar"));
|
||||
|
||||
assertEquals(expected, parse("foobar*"));
|
||||
}
|
||||
|
||||
/** test some AND'd terms using '+' operator */
|
||||
public void testAND() throws Exception {
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
expected.add(new TermQuery(new Term("field", "foo")), Occur.MUST);
|
||||
expected.add(new TermQuery(new Term("field", "bar")), Occur.MUST);
|
||||
|
||||
assertEquals(expected, parse("foo+bar"));
|
||||
}
|
||||
|
||||
/** test some AND'd phrases using '+' operator */
|
||||
public void testANDPhrase() throws Exception {
|
||||
PhraseQuery phrase1 = new PhraseQuery();
|
||||
phrase1.add(new Term("field", "foo"));
|
||||
phrase1.add(new Term("field", "bar"));
|
||||
PhraseQuery phrase2 = new PhraseQuery();
|
||||
phrase2.add(new Term("field", "star"));
|
||||
phrase2.add(new Term("field", "wars"));
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
expected.add(phrase1, Occur.MUST);
|
||||
expected.add(phrase2, Occur.MUST);
|
||||
|
||||
assertEquals(expected, parse("\"foo bar\"+\"star wars\""));
|
||||
}
|
||||
|
||||
/** test some AND'd terms (just using whitespace) */
|
||||
public void testANDImplicit() throws Exception {
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
expected.add(new TermQuery(new Term("field", "foo")), Occur.MUST);
|
||||
expected.add(new TermQuery(new Term("field", "bar")), Occur.MUST);
|
||||
|
||||
assertEquals(expected, parse("foo bar"));
|
||||
}
|
||||
|
||||
/** test some OR'd terms */
|
||||
public void testOR() throws Exception {
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
expected.add(new TermQuery(new Term("field", "foo")), Occur.SHOULD);
|
||||
expected.add(new TermQuery(new Term("field", "bar")), Occur.SHOULD);
|
||||
|
||||
assertEquals(expected, parse("foo|bar"));
|
||||
assertEquals(expected, parse("foo||bar"));
|
||||
}
|
||||
|
||||
/** test some OR'd terms (just using whitespace) */
|
||||
public void testORImplicit() throws Exception {
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
expected.add(new TermQuery(new Term("field", "foo")), Occur.SHOULD);
|
||||
expected.add(new TermQuery(new Term("field", "bar")), Occur.SHOULD);
|
||||
|
||||
SimpleQueryParser parser = new SimpleQueryParser(new MockAnalyzer(random()), "field");
|
||||
assertEquals(expected, parser.parse("foo bar"));
|
||||
}
|
||||
|
||||
/** test some OR'd phrases using '|' operator */
|
||||
public void testORPhrase() throws Exception {
|
||||
PhraseQuery phrase1 = new PhraseQuery();
|
||||
phrase1.add(new Term("field", "foo"));
|
||||
phrase1.add(new Term("field", "bar"));
|
||||
PhraseQuery phrase2 = new PhraseQuery();
|
||||
phrase2.add(new Term("field", "star"));
|
||||
phrase2.add(new Term("field", "wars"));
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
expected.add(phrase1, Occur.SHOULD);
|
||||
expected.add(phrase2, Occur.SHOULD);
|
||||
|
||||
assertEquals(expected, parse("\"foo bar\"|\"star wars\""));
|
||||
}
|
||||
|
||||
/** test negated term */
|
||||
public void testNOT() throws Exception {
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
expected.add(new TermQuery(new Term("field", "foo")), Occur.MUST_NOT);
|
||||
expected.add(new MatchAllDocsQuery(), Occur.SHOULD);
|
||||
|
||||
assertEquals(expected, parse("-foo"));
|
||||
assertEquals(expected, parse("-(foo)"));
|
||||
assertEquals(expected, parse("---foo"));
|
||||
}
|
||||
|
||||
/** test crazy prefixes with multiple asterisks */
|
||||
public void testCrazyPrefixes1() throws Exception {
|
||||
Query expected = new PrefixQuery(new Term("field", "st*ar"));
|
||||
|
||||
assertEquals(expected, parse("st*ar*"));
|
||||
}
|
||||
|
||||
/** test prefixes with some escaping */
|
||||
public void testCrazyPrefixes2() throws Exception {
|
||||
Query expected = new PrefixQuery(new Term("field", "st*ar\\*"));
|
||||
|
||||
assertEquals(expected, parse("st*ar\\\\**"));
|
||||
}
|
||||
|
||||
/** not a prefix query! the prefix operator is escaped */
|
||||
public void testTermInDisguise() throws Exception {
|
||||
Query expected = new TermQuery(new Term("field", "st*ar\\*"));
|
||||
|
||||
assertEquals(expected, parse("sT*Ar\\\\\\*"));
|
||||
}
|
||||
|
||||
// a number of test cases here have garbage/errors in
|
||||
// the syntax passed in to test that the query can
|
||||
// still be interpreted as a guess to what the human
|
||||
// input was trying to be
|
||||
|
||||
public void testGarbageTerm() throws Exception {
|
||||
Query expected = new TermQuery(new Term("field", "star"));
|
||||
|
||||
assertEquals(expected, parse("star"));
|
||||
assertEquals(expected, parse("star\n"));
|
||||
assertEquals(expected, parse("star\r"));
|
||||
assertEquals(expected, parse("star\t"));
|
||||
assertEquals(expected, parse("star("));
|
||||
assertEquals(expected, parse("star)"));
|
||||
assertEquals(expected, parse("star\""));
|
||||
assertEquals(expected, parse("\t \r\n\nstar \n \r \t "));
|
||||
assertEquals(expected, parse("- + \"\" - star \\"));
|
||||
}
|
||||
|
||||
public void testGarbageEmpty() throws Exception {
|
||||
assertNull(parse(""));
|
||||
assertNull(parse(" "));
|
||||
assertNull(parse(" "));
|
||||
assertNull(parse("\\ "));
|
||||
assertNull(parse("\\ \\ "));
|
||||
assertNull(parse("\"\""));
|
||||
assertNull(parse("\" \""));
|
||||
assertNull(parse("\" \"|\" \""));
|
||||
assertNull(parse("(\" \"|\" \")"));
|
||||
assertNull(parse("\" \" \" \""));
|
||||
assertNull(parse("(\" \" \" \")"));
|
||||
}
|
||||
|
||||
public void testGarbageAND() throws Exception {
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
expected.add(new TermQuery(new Term("field", "star")), Occur.MUST);
|
||||
expected.add(new TermQuery(new Term("field", "wars")), Occur.MUST);
|
||||
|
||||
assertEquals(expected, parse("star wars"));
|
||||
assertEquals(expected, parse("star+wars"));
|
||||
assertEquals(expected, parse(" star wars "));
|
||||
assertEquals(expected, parse(" star + wars "));
|
||||
assertEquals(expected, parse(" | star + + | wars "));
|
||||
assertEquals(expected, parse(" | star + + | wars \\"));
|
||||
}
|
||||
|
||||
public void testGarbageOR() throws Exception {
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
expected.add(new TermQuery(new Term("field", "star")), Occur.SHOULD);
|
||||
expected.add(new TermQuery(new Term("field", "wars")), Occur.SHOULD);
|
||||
|
||||
assertEquals(expected, parse("star|wars"));
|
||||
assertEquals(expected, parse(" star | wars "));
|
||||
assertEquals(expected, parse(" | star | + | wars "));
|
||||
assertEquals(expected, parse(" + star | + + wars \\"));
|
||||
}
|
||||
|
||||
public void testGarbageNOT() throws Exception {
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
expected.add(new TermQuery(new Term("field", "star")), Occur.MUST_NOT);
|
||||
expected.add(new MatchAllDocsQuery(), Occur.SHOULD);
|
||||
|
||||
assertEquals(expected, parse("-star"));
|
||||
assertEquals(expected, parse("---star"));
|
||||
assertEquals(expected, parse("- -star -"));
|
||||
}
|
||||
|
||||
public void testGarbagePhrase() throws Exception {
|
||||
PhraseQuery expected = new PhraseQuery();
|
||||
expected.add(new Term("field", "star"));
|
||||
expected.add(new Term("field", "wars"));
|
||||
|
||||
assertEquals(expected, parse("\"star wars\""));
|
||||
assertEquals(expected, parse("\"star wars\\ \""));
|
||||
assertEquals(expected, parse("\"\" | \"star wars\""));
|
||||
assertEquals(expected, parse(" \"star wars\" \"\"\\"));
|
||||
}
|
||||
|
||||
public void testGarbageSubquery() throws Exception {
|
||||
Query expected = new TermQuery(new Term("field", "star"));
|
||||
|
||||
assertEquals(expected, parse("(star)"));
|
||||
assertEquals(expected, parse("(star))"));
|
||||
assertEquals(expected, parse("((star)"));
|
||||
assertEquals(expected, parse(" -()(star) \n\n\r "));
|
||||
assertEquals(expected, parse("| + - ( + - | star \n ) \n"));
|
||||
}
|
||||
|
||||
public void testCompoundAnd() throws Exception {
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
expected.add(new TermQuery(new Term("field", "star")), Occur.MUST);
|
||||
expected.add(new TermQuery(new Term("field", "wars")), Occur.MUST);
|
||||
expected.add(new TermQuery(new Term("field", "empire")), Occur.MUST);
|
||||
|
||||
assertEquals(expected, parse("star wars empire"));
|
||||
assertEquals(expected, parse("star+wars + empire"));
|
||||
assertEquals(expected, parse(" | --star wars empire \n\\"));
|
||||
}
|
||||
|
||||
public void testCompoundOr() throws Exception {
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
expected.add(new TermQuery(new Term("field", "star")), Occur.SHOULD);
|
||||
expected.add(new TermQuery(new Term("field", "wars")), Occur.SHOULD);
|
||||
expected.add(new TermQuery(new Term("field", "empire")), Occur.SHOULD);
|
||||
|
||||
assertEquals(expected, parse("star|wars|empire"));
|
||||
assertEquals(expected, parse("star|wars | empire"));
|
||||
assertEquals(expected, parse(" | --star|wars|empire \n\\"));
|
||||
}
|
||||
|
||||
public void testComplex00() throws Exception {
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
BooleanQuery inner = new BooleanQuery();
|
||||
inner.add(new TermQuery(new Term("field", "star")), Occur.SHOULD);
|
||||
inner.add(new TermQuery(new Term("field", "wars")), Occur.SHOULD);
|
||||
expected.add(inner, Occur.MUST);
|
||||
expected.add(new TermQuery(new Term("field", "empire")), Occur.MUST);
|
||||
|
||||
assertEquals(expected, parse("star|wars empire"));
|
||||
assertEquals(expected, parse("star|wars + empire"));
|
||||
assertEquals(expected, parse("star| + wars + ----empire |"));
|
||||
}
|
||||
|
||||
public void testComplex01() throws Exception {
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
BooleanQuery inner = new BooleanQuery();
|
||||
inner.add(new TermQuery(new Term("field", "star")), Occur.MUST);
|
||||
inner.add(new TermQuery(new Term("field", "wars")), Occur.MUST);
|
||||
expected.add(inner, Occur.SHOULD);
|
||||
expected.add(new TermQuery(new Term("field", "empire")), Occur.SHOULD);
|
||||
|
||||
assertEquals(expected, parse("star wars | empire"));
|
||||
assertEquals(expected, parse("star + wars|empire"));
|
||||
assertEquals(expected, parse("star + | wars | ----empire +"));
|
||||
}
|
||||
|
||||
public void testComplex02() throws Exception {
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
BooleanQuery inner = new BooleanQuery();
|
||||
inner.add(new TermQuery(new Term("field", "star")), Occur.MUST);
|
||||
inner.add(new TermQuery(new Term("field", "wars")), Occur.MUST);
|
||||
expected.add(inner, Occur.SHOULD);
|
||||
expected.add(new TermQuery(new Term("field", "empire")), Occur.SHOULD);
|
||||
expected.add(new TermQuery(new Term("field", "strikes")), Occur.SHOULD);
|
||||
|
||||
assertEquals(expected, parse("star wars | empire | strikes"));
|
||||
assertEquals(expected, parse("star + wars|empire | strikes"));
|
||||
assertEquals(expected, parse("star + | wars | ----empire | + --strikes \\"));
|
||||
}
|
||||
|
||||
public void testComplex03() throws Exception {
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
BooleanQuery inner = new BooleanQuery();
|
||||
BooleanQuery inner2 = new BooleanQuery();
|
||||
inner2.add(new TermQuery(new Term("field", "star")), Occur.MUST);
|
||||
inner2.add(new TermQuery(new Term("field", "wars")), Occur.MUST);
|
||||
inner.add(inner2, Occur.SHOULD);
|
||||
inner.add(new TermQuery(new Term("field", "empire")), Occur.SHOULD);
|
||||
inner.add(new TermQuery(new Term("field", "strikes")), Occur.SHOULD);
|
||||
expected.add(inner, Occur.MUST);
|
||||
expected.add(new TermQuery(new Term("field", "back")), Occur.MUST);
|
||||
|
||||
assertEquals(expected, parse("star wars | empire | strikes back"));
|
||||
assertEquals(expected, parse("star + wars|empire | strikes + back"));
|
||||
assertEquals(expected, parse("star + | wars | ----empire | + --strikes + | --back \\"));
|
||||
}
|
||||
|
||||
public void testComplex04() throws Exception {
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
BooleanQuery inner = new BooleanQuery();
|
||||
BooleanQuery inner2 = new BooleanQuery();
|
||||
inner.add(new TermQuery(new Term("field", "star")), Occur.MUST);
|
||||
inner.add(new TermQuery(new Term("field", "wars")), Occur.MUST);
|
||||
inner2.add(new TermQuery(new Term("field", "strikes")), Occur.MUST);
|
||||
inner2.add(new TermQuery(new Term("field", "back")), Occur.MUST);
|
||||
expected.add(inner, Occur.SHOULD);
|
||||
expected.add(new TermQuery(new Term("field", "empire")), Occur.SHOULD);
|
||||
expected.add(inner2, Occur.SHOULD);
|
||||
|
||||
assertEquals(expected, parse("(star wars) | empire | (strikes back)"));
|
||||
assertEquals(expected, parse("(star + wars) |empire | (strikes + back)"));
|
||||
assertEquals(expected, parse("(star + | wars |) | ----empire | + --(strikes + | --back) \\"));
|
||||
}
|
||||
|
||||
public void testComplex05() throws Exception {
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
BooleanQuery inner1 = new BooleanQuery();
|
||||
BooleanQuery inner2 = new BooleanQuery();
|
||||
BooleanQuery inner3 = new BooleanQuery();
|
||||
BooleanQuery inner4 = new BooleanQuery();
|
||||
|
||||
expected.add(inner1, Occur.SHOULD);
|
||||
expected.add(inner2, Occur.SHOULD);
|
||||
|
||||
inner1.add(new TermQuery(new Term("field", "star")), Occur.MUST);
|
||||
inner1.add(new TermQuery(new Term("field", "wars")), Occur.MUST);
|
||||
|
||||
inner2.add(new TermQuery(new Term("field", "empire")), Occur.SHOULD);
|
||||
inner2.add(inner3, Occur.SHOULD);
|
||||
|
||||
inner3.add(new TermQuery(new Term("field", "strikes")), Occur.MUST);
|
||||
inner3.add(new TermQuery(new Term("field", "back")), Occur.MUST);
|
||||
inner3.add(inner4, Occur.MUST);
|
||||
|
||||
inner4.add(new TermQuery(new Term("field", "jarjar")), Occur.MUST_NOT);
|
||||
inner4.add(new MatchAllDocsQuery(), Occur.SHOULD);
|
||||
|
||||
assertEquals(expected, parse("(star wars) | (empire | (strikes back -jarjar))"));
|
||||
assertEquals(expected, parse("(star + wars) |(empire | (strikes + back -jarjar) () )"));
|
||||
assertEquals(expected, parse("(star + | wars |) | --(--empire | + --(strikes + | --back + -jarjar) \"\" ) \""));
|
||||
}
|
||||
|
||||
public void testComplex06() throws Exception {
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
BooleanQuery inner1 = new BooleanQuery();
|
||||
BooleanQuery inner2 = new BooleanQuery();
|
||||
BooleanQuery inner3 = new BooleanQuery();
|
||||
|
||||
expected.add(new TermQuery(new Term("field", "star")), Occur.MUST);
|
||||
expected.add(inner1, Occur.MUST);
|
||||
|
||||
inner1.add(new TermQuery(new Term("field", "wars")), Occur.SHOULD);
|
||||
inner1.add(inner2, Occur.SHOULD);
|
||||
|
||||
inner2.add(inner3, Occur.MUST);
|
||||
inner3.add(new TermQuery(new Term("field", "empire")), Occur.SHOULD);
|
||||
inner3.add(new TermQuery(new Term("field", "strikes")), Occur.SHOULD);
|
||||
inner2.add(new TermQuery(new Term("field", "back")), Occur.MUST);
|
||||
inner2.add(new TermQuery(new Term("field", "jar+|jar")), Occur.MUST);
|
||||
|
||||
assertEquals(expected, parse("star (wars | (empire | strikes back jar\\+\\|jar))"));
|
||||
assertEquals(expected, parse("star + (wars |(empire | strikes + back jar\\+\\|jar) () )"));
|
||||
assertEquals(expected, parse("star + (| wars | | --(--empire | + --strikes + | --back + jar\\+\\|jar) \"\" ) \""));
|
||||
}
|
||||
|
||||
/** test a term with field weights */
|
||||
public void testWeightedTerm() throws Exception {
|
||||
Map<String,Float> weights = new LinkedHashMap<>();
|
||||
weights.put("field0", 5f);
|
||||
weights.put("field1", 10f);
|
||||
|
||||
BooleanQuery expected = new BooleanQuery(true);
|
||||
Query field0 = new TermQuery(new Term("field0", "foo"));
|
||||
field0.setBoost(5f);
|
||||
expected.add(field0, Occur.SHOULD);
|
||||
Query field1 = new TermQuery(new Term("field1", "foo"));
|
||||
field1.setBoost(10f);
|
||||
expected.add(field1, Occur.SHOULD);
|
||||
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
SimpleQueryParser parser = new SimpleQueryParser(analyzer, weights);
|
||||
assertEquals(expected, parser.parse("foo"));
|
||||
}
|
||||
|
||||
/** test a more complex query with field weights */
|
||||
public void testWeightedOR() throws Exception {
|
||||
Map<String,Float> weights = new LinkedHashMap<>();
|
||||
weights.put("field0", 5f);
|
||||
weights.put("field1", 10f);
|
||||
|
||||
BooleanQuery expected = new BooleanQuery();
|
||||
BooleanQuery foo = new BooleanQuery(true);
|
||||
Query field0 = new TermQuery(new Term("field0", "foo"));
|
||||
field0.setBoost(5f);
|
||||
foo.add(field0, Occur.SHOULD);
|
||||
Query field1 = new TermQuery(new Term("field1", "foo"));
|
||||
field1.setBoost(10f);
|
||||
foo.add(field1, Occur.SHOULD);
|
||||
expected.add(foo, Occur.SHOULD);
|
||||
|
||||
BooleanQuery bar = new BooleanQuery(true);
|
||||
field0 = new TermQuery(new Term("field0", "bar"));
|
||||
field0.setBoost(5f);
|
||||
bar.add(field0, Occur.SHOULD);
|
||||
field1 = new TermQuery(new Term("field1", "bar"));
|
||||
field1.setBoost(10f);
|
||||
bar.add(field1, Occur.SHOULD);
|
||||
expected.add(bar, Occur.SHOULD);
|
||||
|
||||
Analyzer analyzer = new MockAnalyzer(random());
|
||||
SimpleQueryParser parser = new SimpleQueryParser(analyzer, weights);
|
||||
assertEquals(expected, parser.parse("foo|bar"));
|
||||
}
|
||||
|
||||
/** helper to parse a query with keyword analyzer across "field" */
|
||||
private Query parseKeyword(String text, int flags) {
|
||||
Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
|
||||
SimpleQueryParser parser = new SimpleQueryParser(analyzer,
|
||||
Collections.singletonMap("field", 1f),
|
||||
flags);
|
||||
return parser.parse(text);
|
||||
}
|
||||
|
||||
/** test the ability to enable/disable phrase operator */
|
||||
public void testDisablePhrase() {
|
||||
Query expected = new TermQuery(new Term("field", "\"test\""));
|
||||
assertEquals(expected, parseKeyword("\"test\"", ~PHRASE_OPERATOR));
|
||||
}
|
||||
|
||||
/** test the ability to enable/disable prefix operator */
|
||||
public void testDisablePrefix() {
|
||||
Query expected = new TermQuery(new Term("field", "test*"));
|
||||
assertEquals(expected, parseKeyword("test*", ~PREFIX_OPERATOR));
|
||||
}
|
||||
|
||||
/** test the ability to enable/disable AND operator */
|
||||
public void testDisableAND() {
|
||||
Query expected = new TermQuery(new Term("field", "foo+bar"));
|
||||
assertEquals(expected, parseKeyword("foo+bar", ~AND_OPERATOR));
|
||||
expected = new TermQuery(new Term("field", "+foo+bar"));
|
||||
assertEquals(expected, parseKeyword("+foo+bar", ~AND_OPERATOR));
|
||||
}
|
||||
|
||||
/** test the ability to enable/disable OR operator */
|
||||
public void testDisableOR() {
|
||||
Query expected = new TermQuery(new Term("field", "foo|bar"));
|
||||
assertEquals(expected, parseKeyword("foo|bar", ~OR_OPERATOR));
|
||||
expected = new TermQuery(new Term("field", "|foo|bar"));
|
||||
assertEquals(expected, parseKeyword("|foo|bar", ~OR_OPERATOR));
|
||||
}
|
||||
|
||||
/** test the ability to enable/disable NOT operator */
|
||||
public void testDisableNOT() {
|
||||
Query expected = new TermQuery(new Term("field", "-foo"));
|
||||
assertEquals(expected, parseKeyword("-foo", ~NOT_OPERATOR));
|
||||
}
|
||||
|
||||
/** test the ability to enable/disable precedence operators */
|
||||
public void testDisablePrecedence() {
|
||||
Query expected = new TermQuery(new Term("field", "(foo)"));
|
||||
assertEquals(expected, parseKeyword("(foo)", ~PRECEDENCE_OPERATORS));
|
||||
expected = new TermQuery(new Term("field", ")foo("));
|
||||
assertEquals(expected, parseKeyword(")foo(", ~PRECEDENCE_OPERATORS));
|
||||
}
|
||||
|
||||
/** test the ability to enable/disable escape operators */
|
||||
public void testDisableEscape() {
|
||||
Query expected = new TermQuery(new Term("field", "foo\\bar"));
|
||||
assertEquals(expected, parseKeyword("foo\\bar", ~ESCAPE_OPERATOR));
|
||||
assertEquals(expected, parseKeyword("(foo\\bar)", ~ESCAPE_OPERATOR));
|
||||
assertEquals(expected, parseKeyword("\"foo\\bar\"", ~ESCAPE_OPERATOR));
|
||||
}
|
||||
|
||||
public void testDisableWhitespace() {
|
||||
Query expected = new TermQuery(new Term("field", "foo foo"));
|
||||
assertEquals(expected, parseKeyword("foo foo", ~WHITESPACE_OPERATOR));
|
||||
expected = new TermQuery(new Term("field", " foo foo\n "));
|
||||
assertEquals(expected, parseKeyword(" foo foo\n ", ~WHITESPACE_OPERATOR));
|
||||
expected = new TermQuery(new Term("field", "\t\tfoo foo foo"));
|
||||
assertEquals(expected, parseKeyword("\t\tfoo foo foo", ~WHITESPACE_OPERATOR));
|
||||
}
|
||||
|
||||
// we aren't supposed to barf on any input...
|
||||
public void testRandomQueries() throws Exception {
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
String query = _TestUtil.randomUnicodeString(random());
|
||||
parse(query); // no exception
|
||||
parseKeyword(query, _TestUtil.nextInt(random(), 0, 256)); // no exception
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandomQueries2() throws Exception {
|
||||
char chars[] = new char[] { 'a', '1', '|', '&', ' ', '(', ')', '"', '-' };
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
sb.setLength(0);
|
||||
int queryLength = random().nextInt(20);
|
||||
for (int j = 0; j < queryLength; j++) {
|
||||
sb.append(chars[random().nextInt(chars.length)]);
|
||||
}
|
||||
parse(sb.toString()); // no exception
|
||||
parseKeyword(sb.toString(), _TestUtil.nextInt(random(), 0, 256)); // no exception
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue