Fix for LUCENE-3578, the ability to specify order for complex phrase queries

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1578113 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Erick Erickson 2014-03-16 16:54:04 +00:00
parent c446aec1ed
commit e8c203b545
3 changed files with 40 additions and 7 deletions

View File

@ -98,6 +98,9 @@ New Features
support arbitrary Sort specifications.
(Robert Muir, Mike McCandless, Adrien Grand)
* LUCENE-3758: Allow the ComplexPhraseQueryParser to search order or
un-order proximity queries. (Ahmet Arslan via Erick Erickson)
API Changes
* LUCENE-5454: Add RandomAccessOrds, an optional extension of SortedSetDocValues

View File

@ -66,6 +66,18 @@ public class ComplexPhraseQueryParser extends QueryParser {
private boolean isPass2ResolvingPhrases;
private boolean inOrder = true;
/**
* When <code>inOrder</code> is true, the search terms must
* exists in the documents as the same order as in query.
*
* @param inOrder parameter to choose between ordered or un-ordered proximity search
*/
public void setInOrder(final boolean inOrder) {
this.inOrder = inOrder;
}
private ComplexPhraseQuery currentPhraseQuery = null;
public ComplexPhraseQueryParser(Version matchVersion, String f, Analyzer a) {
@ -74,7 +86,7 @@ public class ComplexPhraseQueryParser extends QueryParser {
@Override
protected Query getFieldQuery(String field, String queryText, int slop) {
ComplexPhraseQuery cpq = new ComplexPhraseQuery(field, queryText, slop);
ComplexPhraseQuery cpq = new ComplexPhraseQuery(field, queryText, slop, inOrder);
complexPhrases.add(cpq); // add to list of phrases to be parsed once
// we
// are through with this pass
@ -202,14 +214,17 @@ public class ComplexPhraseQueryParser extends QueryParser {
int slopFactor;
private final boolean inOrder;
private Query contents;
public ComplexPhraseQuery(String field, String phrasedQueryStringContents,
int slopFactor) {
int slopFactor, boolean inOrder) {
super();
this.field = field;
this.phrasedQueryStringContents = phrasedQueryStringContents;
this.slopFactor = slopFactor;
this.inOrder = inOrder;
}
// Called by ComplexPhraseQueryParser for each phrase after the main
@ -280,7 +295,7 @@ public class ComplexPhraseQueryParser extends QueryParser {
}
if (numNegatives == 0) {
// The simple case - no negative elements in phrase
return new SpanNearQuery(allSpanClauses, slopFactor, true);
return new SpanNearQuery(allSpanClauses, slopFactor, inOrder);
}
// Complex case - we have mixed positives and negatives in the
// sequence.
@ -302,11 +317,11 @@ public class ComplexPhraseQueryParser extends QueryParser {
// need to increase slop factor based on gaps introduced by
// negatives
include = new SpanNearQuery(includeClauses, slopFactor + numNegatives,
true);
inOrder);
}
// Use sequence of positive and negative values as the exclude.
SpanNearQuery exclude = new SpanNearQuery(allSpanClauses, slopFactor,
true);
inOrder);
SpanNotQuery snot = new SpanNotQuery(include, exclude);
return snot;
}

View File

@ -49,6 +49,8 @@ public class TestComplexPhraseQuery extends LuceneTestCase {
String defaultFieldName = "name";
boolean inOrder = true;
public void testComplexPhrases() throws Exception {
checkMatches("\"john smith\"", "1"); // Simple multi-term still works
checkMatches("\"j* smyth~\"", "1,2"); // wildcards and fuzzies are OK in
@ -72,8 +74,20 @@ public class TestComplexPhraseQuery extends LuceneTestCase {
checkBadQuery("\"jo* \"smith\" \""); // phrases inside phrases is bad
}
public void testUnOrderedProximitySearches() throws Exception {
inOrder = true;
checkMatches("\"smith jo*\"~2", ""); // ordered proximity produces empty set
inOrder = false;
checkMatches("\"smith jo*\"~2", "1,2,3"); // un-ordered proximity
}
private void checkBadQuery(String qString) {
QueryParser qp = new ComplexPhraseQueryParser(TEST_VERSION_CURRENT, defaultFieldName, analyzer);
ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(TEST_VERSION_CURRENT, defaultFieldName, analyzer);
qp.setInOrder(inOrder);
Throwable expected = null;
try {
qp.parse(qString);
@ -86,7 +100,8 @@ public class TestComplexPhraseQuery extends LuceneTestCase {
private void checkMatches(String qString, String expectedVals)
throws Exception {
QueryParser qp = new ComplexPhraseQueryParser(TEST_VERSION_CURRENT, defaultFieldName, analyzer);
ComplexPhraseQueryParser qp = new ComplexPhraseQueryParser(TEST_VERSION_CURRENT, defaultFieldName, analyzer);
qp.setInOrder(inOrder);
qp.setFuzzyPrefixLength(1); // usually a good idea
Query q = qp.parse(qString);