mirror of https://github.com/apache/lucene.git
LUCENE-5410: add fuzzy and near to SimpleQueryParser
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1563558 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c70deaf3aa
commit
584fda3bdf
|
@ -125,6 +125,9 @@ New Features
|
||||||
* LUCENE-5320: Add SearcherTaxonomyManager over search and taxonomy index
|
* LUCENE-5320: Add SearcherTaxonomyManager over search and taxonomy index
|
||||||
directories (i.e. not only NRT). (Shai Erera)
|
directories (i.e. not only NRT). (Shai Erera)
|
||||||
|
|
||||||
|
* LUCENE-5410: Add fuzzy and near support via '~' operator to SimpleQueryParser.
|
||||||
|
(Lee Hinman via Robert Muir)
|
||||||
|
|
||||||
Build
|
Build
|
||||||
|
|
||||||
* LUCENE-5217,LUCENE-5420: Maven config: get dependencies from Ant+Ivy config;
|
* LUCENE-5217,LUCENE-5420: Maven config: get dependencies from Ant+Ivy config;
|
||||||
|
|
|
@ -58,7 +58,12 @@ public class MultiPhraseQuery extends Query {
|
||||||
/** Sets the phrase slop for this query.
|
/** Sets the phrase slop for this query.
|
||||||
* @see PhraseQuery#setSlop(int)
|
* @see PhraseQuery#setSlop(int)
|
||||||
*/
|
*/
|
||||||
public void setSlop(int s) { slop = s; }
|
public void setSlop(int s) {
|
||||||
|
if (s < 0) {
|
||||||
|
throw new IllegalArgumentException("slop value cannot be negative");
|
||||||
|
}
|
||||||
|
slop = s;
|
||||||
|
}
|
||||||
|
|
||||||
/** Sets the phrase slop for this query.
|
/** Sets the phrase slop for this query.
|
||||||
* @see PhraseQuery#getSlop()
|
* @see PhraseQuery#getSlop()
|
||||||
|
|
|
@ -68,7 +68,12 @@ public class PhraseQuery extends Query {
|
||||||
results are sorted by exactness.
|
results are sorted by exactness.
|
||||||
|
|
||||||
<p>The slop is zero by default, requiring exact matches.*/
|
<p>The slop is zero by default, requiring exact matches.*/
|
||||||
public void setSlop(int s) { slop = s; }
|
public void setSlop(int s) {
|
||||||
|
if (s < 0) {
|
||||||
|
throw new IllegalArgumentException("slop value cannot be negative");
|
||||||
|
}
|
||||||
|
slop = s;
|
||||||
|
}
|
||||||
/** Returns the slop. See setSlop(). */
|
/** Returns the slop. See setSlop(). */
|
||||||
public int getSlop() { return slop; }
|
public int getSlop() { return slop; }
|
||||||
|
|
||||||
|
|
|
@ -568,4 +568,16 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
||||||
return terms;
|
return terms;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testNegativeSlop() throws Exception {
|
||||||
|
MultiPhraseQuery query = new MultiPhraseQuery();
|
||||||
|
query.add(new Term("field", "two"));
|
||||||
|
query.add(new Term("field", "one"));
|
||||||
|
try {
|
||||||
|
query.setSlop(-2);
|
||||||
|
fail("didn't get expected exception");
|
||||||
|
} catch (IllegalArgumentException expected) {
|
||||||
|
// expected exception
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -678,4 +678,16 @@ public class TestPhraseQuery extends LuceneTestCase {
|
||||||
reader.close();
|
reader.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testNegativeSlop() throws Exception {
|
||||||
|
PhraseQuery query = new PhraseQuery();
|
||||||
|
query.add(new Term("field", "two"));
|
||||||
|
query.add(new Term("field", "one"));
|
||||||
|
try {
|
||||||
|
query.setSlop(-2);
|
||||||
|
fail("didn't get expected exception");
|
||||||
|
} catch (IllegalArgumentException expected) {
|
||||||
|
// expected exception
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,10 +21,12 @@ import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.FuzzyQuery;
|
||||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
import org.apache.lucene.search.PrefixQuery;
|
import org.apache.lucene.search.PrefixQuery;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.util.QueryBuilder;
|
import org.apache.lucene.util.QueryBuilder;
|
||||||
|
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||||
|
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -48,6 +50,8 @@ import java.util.Map;
|
||||||
* <li>'{@code -}' negates a single token: <tt>-token0</tt>
|
* <li>'{@code -}' negates a single token: <tt>-token0</tt>
|
||||||
* <li>'{@code "}' creates phrases of terms: <tt>"term1 term2 ..."</tt>
|
* <li>'{@code "}' creates phrases of terms: <tt>"term1 term2 ..."</tt>
|
||||||
* <li>'{@code *}' at the end of terms specifies prefix query: <tt>term*</tt>
|
* <li>'{@code *}' at the end of terms specifies prefix query: <tt>term*</tt>
|
||||||
|
* <li>'{@code ~}N' at the end of terms specifies fuzzy query: <tt>term~1</tt>
|
||||||
|
* <li>'{@code ~}N' at the end of phrases specifies near query: <tt>"term1 term2"~5</tt>
|
||||||
* <li>'{@code (}' and '{@code )}' specifies precedence: <tt>token1 + (token2 | token3)</tt>
|
* <li>'{@code (}' and '{@code )}' specifies precedence: <tt>token1 + (token2 | token3)</tt>
|
||||||
* </ul>
|
* </ul>
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -111,6 +115,11 @@ public class SimpleQueryParser extends QueryBuilder {
|
||||||
public static final int ESCAPE_OPERATOR = 1<<6;
|
public static final int ESCAPE_OPERATOR = 1<<6;
|
||||||
/** Enables {@code WHITESPACE} operators: ' ' '\n' '\r' '\t' */
|
/** Enables {@code WHITESPACE} operators: ' ' '\n' '\r' '\t' */
|
||||||
public static final int WHITESPACE_OPERATOR = 1<<7;
|
public static final int WHITESPACE_OPERATOR = 1<<7;
|
||||||
|
/** Enables {@code FUZZY} operators: (~) on single terms */
|
||||||
|
public static final int FUZZY_OPERATOR = 1<<8;
|
||||||
|
/** Enables {@code NEAR} operators: (~) on phrases */
|
||||||
|
public static final int NEAR_OPERATOR = 1<<9;
|
||||||
|
|
||||||
|
|
||||||
private BooleanClause.Occur defaultOperator = BooleanClause.Occur.SHOULD;
|
private BooleanClause.Occur defaultOperator = BooleanClause.Occur.SHOULD;
|
||||||
|
|
||||||
|
@ -266,6 +275,7 @@ public class SimpleQueryParser extends QueryBuilder {
|
||||||
int start = ++state.index;
|
int start = ++state.index;
|
||||||
int copied = 0;
|
int copied = 0;
|
||||||
boolean escaped = false;
|
boolean escaped = false;
|
||||||
|
boolean hasSlop = false;
|
||||||
|
|
||||||
while (state.index < state.length) {
|
while (state.index < state.length) {
|
||||||
if (!escaped) {
|
if (!escaped) {
|
||||||
|
@ -279,12 +289,25 @@ public class SimpleQueryParser extends QueryBuilder {
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
} else if (state.data[state.index] == '"') {
|
} else if (state.data[state.index] == '"') {
|
||||||
|
// if there are still characters after the closing ", check for a
|
||||||
|
// tilde
|
||||||
|
if (state.length > (state.index + 1) &&
|
||||||
|
state.data[state.index+1] == '~' &&
|
||||||
|
(flags & NEAR_OPERATOR) != 0) {
|
||||||
|
state.index++;
|
||||||
|
// check for characters after the tilde
|
||||||
|
if (state.length > (state.index + 1)) {
|
||||||
|
hasSlop = true;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
// this should be the end of the phrase
|
// this should be the end of the phrase
|
||||||
// all characters found will used for
|
// all characters found will used for
|
||||||
// creating the phrase query
|
// creating the phrase query
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
escaped = false;
|
escaped = false;
|
||||||
state.buffer[copied++] = state.data[state.index++];
|
state.buffer[copied++] = state.data[state.index++];
|
||||||
|
@ -305,7 +328,12 @@ public class SimpleQueryParser extends QueryBuilder {
|
||||||
// a complete phrase has been found and is parsed through
|
// a complete phrase has been found and is parsed through
|
||||||
// through the analyzer from the given field
|
// through the analyzer from the given field
|
||||||
String phrase = new String(state.buffer, 0, copied);
|
String phrase = new String(state.buffer, 0, copied);
|
||||||
Query branch = newPhraseQuery(phrase);
|
Query branch;
|
||||||
|
if (hasSlop) {
|
||||||
|
branch = newPhraseQuery(phrase, parseFuzziness(state));
|
||||||
|
} else {
|
||||||
|
branch = newPhraseQuery(phrase, 0);
|
||||||
|
}
|
||||||
buildQueryTree(state, branch);
|
buildQueryTree(state, branch);
|
||||||
|
|
||||||
++state.index;
|
++state.index;
|
||||||
|
@ -316,6 +344,7 @@ public class SimpleQueryParser extends QueryBuilder {
|
||||||
int copied = 0;
|
int copied = 0;
|
||||||
boolean escaped = false;
|
boolean escaped = false;
|
||||||
boolean prefix = false;
|
boolean prefix = false;
|
||||||
|
boolean fuzzy = false;
|
||||||
|
|
||||||
while (state.index < state.length) {
|
while (state.index < state.length) {
|
||||||
if (!escaped) {
|
if (!escaped) {
|
||||||
|
@ -329,19 +358,14 @@ public class SimpleQueryParser extends QueryBuilder {
|
||||||
++state.index;
|
++state.index;
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
} else if ((state.data[state.index] == '"' && (flags & PHRASE_OPERATOR) != 0)
|
} else if (tokenFinished(state)) {
|
||||||
|| (state.data[state.index] == '|' && (flags & OR_OPERATOR) != 0)
|
|
||||||
|| (state.data[state.index] == '+' && (flags & AND_OPERATOR) != 0)
|
|
||||||
|| (state.data[state.index] == '(' && (flags & PRECEDENCE_OPERATORS) != 0)
|
|
||||||
|| (state.data[state.index] == ')' && (flags & PRECEDENCE_OPERATORS) != 0)
|
|
||||||
|| ((state.data[state.index] == ' '
|
|
||||||
|| state.data[state.index] == '\t'
|
|
||||||
|| state.data[state.index] == '\n'
|
|
||||||
|| state.data[state.index] == '\r') && (flags & WHITESPACE_OPERATOR) != 0)) {
|
|
||||||
// this should be the end of the term
|
// this should be the end of the term
|
||||||
// all characters found will used for
|
// all characters found will used for
|
||||||
// creating the term query
|
// creating the term query
|
||||||
break;
|
break;
|
||||||
|
} else if (copied > 0 && state.data[state.index] == '~' && (flags & FUZZY_OPERATOR) != 0) {
|
||||||
|
fuzzy = true;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// wildcard tracks whether or not the last character
|
// wildcard tracks whether or not the last character
|
||||||
|
@ -358,7 +382,17 @@ public class SimpleQueryParser extends QueryBuilder {
|
||||||
if (copied > 0) {
|
if (copied > 0) {
|
||||||
final Query branch;
|
final Query branch;
|
||||||
|
|
||||||
if (prefix) {
|
if (fuzzy && (flags & FUZZY_OPERATOR) != 0) {
|
||||||
|
String token = new String(state.buffer, 0, copied);
|
||||||
|
int fuzziness = parseFuzziness(state);
|
||||||
|
// edit distance has a maximum, limit to the maximum supported
|
||||||
|
fuzziness = Math.min(fuzziness, LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
|
||||||
|
if (fuzziness == 0) {
|
||||||
|
branch = newDefaultQuery(token);
|
||||||
|
} else {
|
||||||
|
branch = newFuzzyQuery(token, fuzziness);
|
||||||
|
}
|
||||||
|
} else if (prefix) {
|
||||||
// if a term is found with a closing '*' it is considered to be a prefix query
|
// if a term is found with a closing '*' it is considered to be a prefix query
|
||||||
// and will have prefix added as an option
|
// and will have prefix added as an option
|
||||||
String token = new String(state.buffer, 0, copied - 1);
|
String token = new String(state.buffer, 0, copied - 1);
|
||||||
|
@ -420,6 +454,60 @@ public class SimpleQueryParser extends QueryBuilder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper parsing fuzziness from parsing state
|
||||||
|
* @return slop/edit distance, 0 in the case of non-parsing slop/edit string
|
||||||
|
*/
|
||||||
|
private int parseFuzziness(State state) {
|
||||||
|
char slopText[] = new char[state.length];
|
||||||
|
int slopLength = 0;
|
||||||
|
|
||||||
|
if (state.data[state.index] == '~') {
|
||||||
|
while (state.index < state.length) {
|
||||||
|
state.index++;
|
||||||
|
// it's possible that the ~ was at the end, so check after incrementing
|
||||||
|
// to make sure we don't go out of bounds
|
||||||
|
if (state.index < state.length) {
|
||||||
|
if (tokenFinished(state)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
slopText[slopLength] = state.data[state.index];
|
||||||
|
slopLength++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int fuzziness = 0;
|
||||||
|
try {
|
||||||
|
fuzziness = Integer.parseInt(new String(slopText, 0, slopLength));
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
// swallow number format exceptions parsing fuzziness
|
||||||
|
}
|
||||||
|
// negative -> 0
|
||||||
|
if (fuzziness < 0) {
|
||||||
|
fuzziness = 0;
|
||||||
|
}
|
||||||
|
return fuzziness;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper returning true if the state has reached the end of token.
|
||||||
|
*/
|
||||||
|
private boolean tokenFinished(State state) {
|
||||||
|
if ((state.data[state.index] == '"' && (flags & PHRASE_OPERATOR) != 0)
|
||||||
|
|| (state.data[state.index] == '|' && (flags & OR_OPERATOR) != 0)
|
||||||
|
|| (state.data[state.index] == '+' && (flags & AND_OPERATOR) != 0)
|
||||||
|
|| (state.data[state.index] == '(' && (flags & PRECEDENCE_OPERATORS) != 0)
|
||||||
|
|| (state.data[state.index] == ')' && (flags & PRECEDENCE_OPERATORS) != 0)
|
||||||
|
|| ((state.data[state.index] == ' '
|
||||||
|
|| state.data[state.index] == '\t'
|
||||||
|
|| state.data[state.index] == '\n'
|
||||||
|
|| state.data[state.index] == '\r') && (flags & WHITESPACE_OPERATOR) != 0)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory method to generate a standard query (no phrase or prefix operators).
|
* Factory method to generate a standard query (no phrase or prefix operators).
|
||||||
*/
|
*/
|
||||||
|
@ -436,12 +524,27 @@ public class SimpleQueryParser extends QueryBuilder {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory method to generate a phrase query.
|
* Factory method to generate a fuzzy query.
|
||||||
*/
|
*/
|
||||||
protected Query newPhraseQuery(String text) {
|
protected Query newFuzzyQuery(String text, int fuzziness) {
|
||||||
BooleanQuery bq = new BooleanQuery(true);
|
BooleanQuery bq = new BooleanQuery(true);
|
||||||
for (Map.Entry<String,Float> entry : weights.entrySet()) {
|
for (Map.Entry<String,Float> entry : weights.entrySet()) {
|
||||||
Query q = createPhraseQuery(entry.getKey(), text);
|
Query q = new FuzzyQuery(new Term(entry.getKey(), text), fuzziness);
|
||||||
|
if (q != null) {
|
||||||
|
q.setBoost(entry.getValue());
|
||||||
|
bq.add(q, BooleanClause.Occur.SHOULD);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return simplify(bq);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Factory method to generate a phrase query with slop.
|
||||||
|
*/
|
||||||
|
protected Query newPhraseQuery(String text, int slop) {
|
||||||
|
BooleanQuery bq = new BooleanQuery(true);
|
||||||
|
for (Map.Entry<String,Float> entry : weights.entrySet()) {
|
||||||
|
Query q = createPhraseQuery(entry.getKey(), text, slop);
|
||||||
if (q != null) {
|
if (q != null) {
|
||||||
q.setBoost(entry.getValue());
|
q.setBoost(entry.getValue());
|
||||||
bq.add(q, BooleanClause.Occur.SHOULD);
|
bq.add(q, BooleanClause.Occur.SHOULD);
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.FuzzyQuery;
|
||||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||||
import org.apache.lucene.search.PhraseQuery;
|
import org.apache.lucene.search.PhraseQuery;
|
||||||
import org.apache.lucene.search.PrefixQuery;
|
import org.apache.lucene.search.PrefixQuery;
|
||||||
|
@ -34,14 +35,17 @@ import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TermQuery;
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util._TestUtil;
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||||
|
|
||||||
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.AND_OPERATOR;
|
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.AND_OPERATOR;
|
||||||
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.ESCAPE_OPERATOR;
|
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.ESCAPE_OPERATOR;
|
||||||
|
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.FUZZY_OPERATOR;
|
||||||
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.NOT_OPERATOR;
|
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.NOT_OPERATOR;
|
||||||
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.OR_OPERATOR;
|
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.OR_OPERATOR;
|
||||||
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.PHRASE_OPERATOR;
|
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.PHRASE_OPERATOR;
|
||||||
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.PRECEDENCE_OPERATORS;
|
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.PRECEDENCE_OPERATORS;
|
||||||
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.PREFIX_OPERATOR;
|
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.PREFIX_OPERATOR;
|
||||||
|
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.NEAR_OPERATOR;
|
||||||
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.WHITESPACE_OPERATOR;
|
import static org.apache.lucene.queryparser.simple.SimpleQueryParser.WHITESPACE_OPERATOR;
|
||||||
|
|
||||||
/** Tests for {@link SimpleQueryParser} */
|
/** Tests for {@link SimpleQueryParser} */
|
||||||
|
@ -58,6 +62,18 @@ public class TestSimpleQueryParser extends LuceneTestCase {
|
||||||
return parser.parse(text);
|
return parser.parse(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* helper to parse a query with whitespace+lowercase analyzer across "field",
|
||||||
|
* with default operator of MUST
|
||||||
|
*/
|
||||||
|
private Query parse(String text, int flags) {
|
||||||
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
|
SimpleQueryParser parser = new SimpleQueryParser(analyzer,
|
||||||
|
Collections.singletonMap("field", 1f), flags);
|
||||||
|
parser.setDefaultOperator(Occur.MUST);
|
||||||
|
return parser.parse(text);
|
||||||
|
}
|
||||||
|
|
||||||
/** test a simple term */
|
/** test a simple term */
|
||||||
public void testTerm() throws Exception {
|
public void testTerm() throws Exception {
|
||||||
Query expected = new TermQuery(new Term("field", "foobar"));
|
Query expected = new TermQuery(new Term("field", "foobar"));
|
||||||
|
@ -65,6 +81,24 @@ public class TestSimpleQueryParser extends LuceneTestCase {
|
||||||
assertEquals(expected, parse("foobar"));
|
assertEquals(expected, parse("foobar"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** test a fuzzy query */
|
||||||
|
public void testFuzzy() throws Exception {
|
||||||
|
Query regular = new TermQuery(new Term("field", "foobar"));
|
||||||
|
Query expected = new FuzzyQuery(new Term("field", "foobar"), 2);
|
||||||
|
|
||||||
|
assertEquals(expected, parse("foobar~2"));
|
||||||
|
assertEquals(regular, parse("foobar~"));
|
||||||
|
assertEquals(regular, parse("foobar~a"));
|
||||||
|
assertEquals(regular, parse("foobar~1a"));
|
||||||
|
|
||||||
|
BooleanQuery bool = new BooleanQuery();
|
||||||
|
FuzzyQuery fuzzy = new FuzzyQuery(new Term("field", "foo"), LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
|
||||||
|
bool.add(fuzzy, Occur.MUST);
|
||||||
|
bool.add(new TermQuery(new Term("field", "bar")), Occur.MUST);
|
||||||
|
|
||||||
|
assertEquals(bool, parse("foo~" + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE + 1 + " bar"));
|
||||||
|
}
|
||||||
|
|
||||||
/** test a simple phrase */
|
/** test a simple phrase */
|
||||||
public void testPhrase() throws Exception {
|
public void testPhrase() throws Exception {
|
||||||
PhraseQuery expected = new PhraseQuery();
|
PhraseQuery expected = new PhraseQuery();
|
||||||
|
@ -74,6 +108,43 @@ public class TestSimpleQueryParser extends LuceneTestCase {
|
||||||
assertEquals(expected, parse("\"foo bar\""));
|
assertEquals(expected, parse("\"foo bar\""));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** test a simple phrase with various slop settings */
|
||||||
|
public void testPhraseWithSlop() throws Exception {
|
||||||
|
PhraseQuery expectedWithSlop = new PhraseQuery();
|
||||||
|
expectedWithSlop.add(new Term("field", "foo"));
|
||||||
|
expectedWithSlop.add(new Term("field", "bar"));
|
||||||
|
expectedWithSlop.setSlop(2);
|
||||||
|
|
||||||
|
assertEquals(expectedWithSlop, parse("\"foo bar\"~2"));
|
||||||
|
|
||||||
|
PhraseQuery expectedWithMultiDigitSlop = new PhraseQuery();
|
||||||
|
expectedWithMultiDigitSlop.add(new Term("field", "foo"));
|
||||||
|
expectedWithMultiDigitSlop.add(new Term("field", "bar"));
|
||||||
|
expectedWithMultiDigitSlop.setSlop(10);
|
||||||
|
|
||||||
|
assertEquals(expectedWithMultiDigitSlop, parse("\"foo bar\"~10"));
|
||||||
|
|
||||||
|
PhraseQuery expectedNoSlop = new PhraseQuery();
|
||||||
|
expectedNoSlop.add(new Term("field", "foo"));
|
||||||
|
expectedNoSlop.add(new Term("field", "bar"));
|
||||||
|
|
||||||
|
assertEquals("Ignore trailing tilde with no slop", expectedNoSlop, parse("\"foo bar\"~"));
|
||||||
|
assertEquals("Ignore non-numeric trailing slop", expectedNoSlop, parse("\"foo bar\"~a"));
|
||||||
|
assertEquals("Ignore non-numeric trailing slop", expectedNoSlop, parse("\"foo bar\"~1a"));
|
||||||
|
assertEquals("Ignore negative trailing slop", expectedNoSlop, parse("\"foo bar\"~-1"));
|
||||||
|
|
||||||
|
PhraseQuery pq = new PhraseQuery();
|
||||||
|
pq.add(new Term("field", "foo"));
|
||||||
|
pq.add(new Term("field", "bar"));
|
||||||
|
pq.setSlop(12);
|
||||||
|
|
||||||
|
BooleanQuery expectedBoolean = new BooleanQuery();
|
||||||
|
expectedBoolean.add(pq, Occur.MUST);
|
||||||
|
expectedBoolean.add(new TermQuery(new Term("field", "baz")), Occur.MUST);
|
||||||
|
|
||||||
|
assertEquals(expectedBoolean, parse("\"foo bar\"~12 baz"));
|
||||||
|
}
|
||||||
|
|
||||||
/** test a simple prefix */
|
/** test a simple prefix */
|
||||||
public void testPrefix() throws Exception {
|
public void testPrefix() throws Exception {
|
||||||
PrefixQuery expected = new PrefixQuery(new Term("field", "foobar"));
|
PrefixQuery expected = new PrefixQuery(new Term("field", "foobar"));
|
||||||
|
@ -533,17 +604,33 @@ public class TestSimpleQueryParser extends LuceneTestCase {
|
||||||
assertEquals(expected, parseKeyword("\t\tfoo foo foo", ~WHITESPACE_OPERATOR));
|
assertEquals(expected, parseKeyword("\t\tfoo foo foo", ~WHITESPACE_OPERATOR));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testDisableFuzziness() {
|
||||||
|
Query expected = new TermQuery(new Term("field", "foo~1"));
|
||||||
|
assertEquals(expected, parseKeyword("foo~1", ~FUZZY_OPERATOR));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testDisableSlop() {
|
||||||
|
PhraseQuery expectedPhrase = new PhraseQuery();
|
||||||
|
expectedPhrase.add(new Term("field", "foo"));
|
||||||
|
expectedPhrase.add(new Term("field", "bar"));
|
||||||
|
|
||||||
|
BooleanQuery expected = new BooleanQuery();
|
||||||
|
expected.add(expectedPhrase, Occur.MUST);
|
||||||
|
expected.add(new TermQuery(new Term("field", "~2")), Occur.MUST);
|
||||||
|
assertEquals(expected, parse("\"foo bar\"~2", ~NEAR_OPERATOR));
|
||||||
|
}
|
||||||
|
|
||||||
// we aren't supposed to barf on any input...
|
// we aren't supposed to barf on any input...
|
||||||
public void testRandomQueries() throws Exception {
|
public void testRandomQueries() throws Exception {
|
||||||
for (int i = 0; i < 1000; i++) {
|
for (int i = 0; i < 1000; i++) {
|
||||||
String query = _TestUtil.randomUnicodeString(random());
|
String query = _TestUtil.randomUnicodeString(random());
|
||||||
parse(query); // no exception
|
parse(query); // no exception
|
||||||
parseKeyword(query, _TestUtil.nextInt(random(), 0, 256)); // no exception
|
parseKeyword(query, _TestUtil.nextInt(random(), 0, 1024)); // no exception
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRandomQueries2() throws Exception {
|
public void testRandomQueries2() throws Exception {
|
||||||
char chars[] = new char[] { 'a', '1', '|', '&', ' ', '(', ')', '"', '-' };
|
char chars[] = new char[] { 'a', '1', '|', '&', ' ', '(', ')', '"', '-', '~'};
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
for (int i = 0; i < 1000; i++) {
|
for (int i = 0; i < 1000; i++) {
|
||||||
sb.setLength(0);
|
sb.setLength(0);
|
||||||
|
@ -552,7 +639,7 @@ public class TestSimpleQueryParser extends LuceneTestCase {
|
||||||
sb.append(chars[random().nextInt(chars.length)]);
|
sb.append(chars[random().nextInt(chars.length)]);
|
||||||
}
|
}
|
||||||
parse(sb.toString()); // no exception
|
parse(sb.toString()); // no exception
|
||||||
parseKeyword(sb.toString(), _TestUtil.nextInt(random(), 0, 256)); // no exception
|
parseKeyword(sb.toString(), _TestUtil.nextInt(random(), 0, 1024)); // no exception
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -18,11 +18,9 @@ package org.apache.solr.search;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.queryparser.simple.SimpleQueryParser;
|
import org.apache.lucene.queryparser.simple.SimpleQueryParser;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.PrefixQuery;
|
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.solr.common.params.CommonParams;
|
import org.apache.solr.common.params.CommonParams;
|
||||||
import org.apache.solr.common.params.SimpleParams;
|
import org.apache.solr.common.params.SimpleParams;
|
||||||
|
@ -73,36 +71,21 @@ public class SimpleQParserPlugin extends QParserPlugin {
|
||||||
/** The name that can be used to specify this plugin should be used to parse the query. */
|
/** The name that can be used to specify this plugin should be used to parse the query. */
|
||||||
public static String NAME = "simple";
|
public static String NAME = "simple";
|
||||||
|
|
||||||
/** Enables {@code AND} operator (+) */
|
|
||||||
private static final String AND_OPERATOR = "AND";
|
|
||||||
/** Enables {@code NOT} operator (-) */
|
|
||||||
private static final String NOT_OPERATOR = "NOT";
|
|
||||||
/** Enables {@code OR} operator (|) */
|
|
||||||
private static final String OR_OPERATOR = "OR";
|
|
||||||
/** Enables {@code PREFIX} operator (*) */
|
|
||||||
private static final String PREFIX_OPERATOR = "PREFIX";
|
|
||||||
/** Enables {@code PHRASE} operator (") */
|
|
||||||
private static final String PHRASE_OPERATOR = "PHRASE";
|
|
||||||
/** Enables {@code PRECEDENCE} operators: {@code (} and {@code )} */
|
|
||||||
private static final String PRECEDENCE_OPERATORS = "PRECEDENCE";
|
|
||||||
/** Enables {@code ESCAPE} operator (\) */
|
|
||||||
private static final String ESCAPE_OPERATOR = "ESCAPE";
|
|
||||||
/** Enables {@code WHITESPACE} operators: ' ' '\n' '\r' '\t' */
|
|
||||||
private static final String WHITESPACE_OPERATOR = "WHITESPACE";
|
|
||||||
|
|
||||||
/** Map of string operators to their int counterparts in SimpleQueryParser. */
|
/** Map of string operators to their int counterparts in SimpleQueryParser. */
|
||||||
private static final Map<String, Integer> OPERATORS = new HashMap<String, Integer>();
|
private static final Map<String, Integer> OPERATORS = new HashMap<String, Integer>();
|
||||||
|
|
||||||
/* Setup the map of possible operators. */
|
/* Setup the map of possible operators. */
|
||||||
static {
|
static {
|
||||||
OPERATORS.put(AND_OPERATOR, SimpleQueryParser.AND_OPERATOR);
|
OPERATORS.put(SimpleParams.AND_OPERATOR, SimpleQueryParser.AND_OPERATOR);
|
||||||
OPERATORS.put(NOT_OPERATOR, SimpleQueryParser.NOT_OPERATOR);
|
OPERATORS.put(SimpleParams.NOT_OPERATOR, SimpleQueryParser.NOT_OPERATOR);
|
||||||
OPERATORS.put(OR_OPERATOR, SimpleQueryParser.OR_OPERATOR);
|
OPERATORS.put(SimpleParams.OR_OPERATOR, SimpleQueryParser.OR_OPERATOR);
|
||||||
OPERATORS.put(PREFIX_OPERATOR, SimpleQueryParser.PREFIX_OPERATOR);
|
OPERATORS.put(SimpleParams.PREFIX_OPERATOR, SimpleQueryParser.PREFIX_OPERATOR);
|
||||||
OPERATORS.put(PHRASE_OPERATOR, SimpleQueryParser.PHRASE_OPERATOR);
|
OPERATORS.put(SimpleParams.PHRASE_OPERATOR, SimpleQueryParser.PHRASE_OPERATOR);
|
||||||
OPERATORS.put(PRECEDENCE_OPERATORS, SimpleQueryParser.PRECEDENCE_OPERATORS);
|
OPERATORS.put(SimpleParams.PRECEDENCE_OPERATORS, SimpleQueryParser.PRECEDENCE_OPERATORS);
|
||||||
OPERATORS.put(ESCAPE_OPERATOR, SimpleQueryParser.ESCAPE_OPERATOR);
|
OPERATORS.put(SimpleParams.ESCAPE_OPERATOR, SimpleQueryParser.ESCAPE_OPERATOR);
|
||||||
OPERATORS.put(WHITESPACE_OPERATOR, SimpleQueryParser.WHITESPACE_OPERATOR);
|
OPERATORS.put(SimpleParams.WHITESPACE_OPERATOR, SimpleQueryParser.WHITESPACE_OPERATOR);
|
||||||
|
OPERATORS.put(SimpleParams.FUZZY_OPERATOR, SimpleQueryParser.FUZZY_OPERATOR);
|
||||||
|
OPERATORS.put(SimpleParams.NEAR_OPERATOR, SimpleQueryParser.NEAR_OPERATOR);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** No initialization is necessary so this method is empty. */
|
/** No initialization is necessary so this method is empty. */
|
||||||
|
|
|
@ -26,4 +26,25 @@ public interface SimpleParams {
|
||||||
|
|
||||||
/** Override the currently enabled/disabled query operators. */
|
/** Override the currently enabled/disabled query operators. */
|
||||||
public static String QO = "q.operators";
|
public static String QO = "q.operators";
|
||||||
|
|
||||||
|
/** Enables {@code AND} operator (+) */
|
||||||
|
public static final String AND_OPERATOR = "AND";
|
||||||
|
/** Enables {@code NOT} operator (-) */
|
||||||
|
public static final String NOT_OPERATOR = "NOT";
|
||||||
|
/** Enables {@code OR} operator (|) */
|
||||||
|
public static final String OR_OPERATOR = "OR";
|
||||||
|
/** Enables {@code PREFIX} operator (*) */
|
||||||
|
public static final String PREFIX_OPERATOR = "PREFIX";
|
||||||
|
/** Enables {@code PHRASE} operator (") */
|
||||||
|
public static final String PHRASE_OPERATOR = "PHRASE";
|
||||||
|
/** Enables {@code PRECEDENCE} operators: {@code (} and {@code )} */
|
||||||
|
public static final String PRECEDENCE_OPERATORS = "PRECEDENCE";
|
||||||
|
/** Enables {@code ESCAPE} operator (\) */
|
||||||
|
public static final String ESCAPE_OPERATOR = "ESCAPE";
|
||||||
|
/** Enables {@code WHITESPACE} operators: ' ' '\n' '\r' '\t' */
|
||||||
|
public static final String WHITESPACE_OPERATOR = "WHITESPACE";
|
||||||
|
/** Enables {@code FUZZY} operator (~) */
|
||||||
|
public static final String FUZZY_OPERATOR = "FUZZY";
|
||||||
|
/** Enables {@code NEAR} operator (~) */
|
||||||
|
public static final String NEAR_OPERATOR = "NEAR";
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue