LUCENE-7533: Classic query parser: disallow autoGeneratePhraseQueries=true when splitOnWhitespace=false (and vice-versa).

This commit is contained in:
Steve Rowe 2016-11-17 18:50:58 -05:00
parent 61a6072573
commit 6d1962a902
6 changed files with 109 additions and 26 deletions

View File

@ -5,6 +5,11 @@ http://s.apache.org/luceneversions
======================= Lucene 6.4.0 =======================
API Changes
* LUCENE-7533: Classic query parser no longer allows autoGeneratePhraseQueries
to be set to true when splitOnWhitespace is false (and vice-versa).
New features
* LUCENE-5867: Added BooleanSimilarity. (Robert Muir, Adrien Grand)
@ -16,6 +21,9 @@ Bug Fixes
* LUCENE-7562: CompletionFieldsConsumer sometimes throws
NullPointerException on ghost fields (Oliver Eilhard via Mike McCandless)
* LUCENE-7533: Classic query parser: disallow autoGeneratePhraseQueries=true
when splitOnWhitespace=false (and vice-versa). (Steve Rowe)
Improvements

View File

@ -96,6 +96,27 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
init(f, a);
}
/**
* Set to true if phrase queries will be automatically generated
* when the analyzer returns more than one term from whitespace
* delimited text.
* NOTE: this behavior may not be suitable for all languages.
* <p>
* Set to false if phrase queries should only be generated when
* surrounded by double quotes.
* <p>
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
*/
@Override
public void setAutoGeneratePhraseQueries(boolean value) {
if (splitOnWhitespace == false && value == true) {
throw new IllegalArgumentException
("setAutoGeneratePhraseQueries(true) is disallowed when getSplitOnWhitespace() == false");
}
this.autoGeneratePhraseQueries = value;
}
/**
* @see #setSplitOnWhitespace(boolean)
*/
@ -106,8 +127,15 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
/**
* Whether query text should be split on whitespace prior to analysis.
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
* <p>
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
*/
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
if (splitOnWhitespace == false && getAutoGeneratePhraseQueries() == true) {
throw new IllegalArgumentException
("setSplitOnWhitespace(false) is disallowed when getAutoGeneratePhraseQueries() == true");
}
this.splitOnWhitespace = splitOnWhitespace;
}
@ -635,6 +663,31 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
finally { jj_save(2, xla); }
}
private boolean jj_3R_3() {
if (jj_scan_token(TERM)) return true;
jj_lookingAhead = true;
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
jj_lookingAhead = false;
if (!jj_semLA || jj_3R_6()) return true;
Token xsp;
if (jj_3R_7()) return true;
while (true) {
xsp = jj_scanpos;
if (jj_3R_7()) { jj_scanpos = xsp; break; }
}
return false;
}
private boolean jj_3R_6() {
return false;
}
private boolean jj_3R_5() {
if (jj_scan_token(STAR)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
private boolean jj_3R_4() {
if (jj_scan_token(TERM)) return true;
if (jj_scan_token(COLON)) return true;
@ -666,31 +719,6 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
return false;
}
private boolean jj_3R_3() {
if (jj_scan_token(TERM)) return true;
jj_lookingAhead = true;
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
jj_lookingAhead = false;
if (!jj_semLA || jj_3R_6()) return true;
Token xsp;
if (jj_3R_7()) return true;
while (true) {
xsp = jj_scanpos;
if (jj_3R_7()) { jj_scanpos = xsp; break; }
}
return false;
}
private boolean jj_3R_6() {
return false;
}
private boolean jj_3R_5() {
if (jj_scan_token(STAR)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
/** Generated Token Manager. */
public QueryParserTokenManager token_source;
/** Current token. */

View File

@ -120,6 +120,27 @@ public class QueryParser extends QueryParserBase {
init(f, a);
}
/**
* Set to true if phrase queries will be automatically generated
* when the analyzer returns more than one term from whitespace
* delimited text.
* NOTE: this behavior may not be suitable for all languages.
* <p>
* Set to false if phrase queries should only be generated when
* surrounded by double quotes.
* <p>
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
*/
@Override
public void setAutoGeneratePhraseQueries(boolean value) {
if (splitOnWhitespace == false && value == true) {
throw new IllegalArgumentException
("setAutoGeneratePhraseQueries(true) is disallowed when getSplitOnWhitespace() == false");
}
this.autoGeneratePhraseQueries = value;
}
/**
* @see #setSplitOnWhitespace(boolean)
*/
@ -130,8 +151,15 @@ public class QueryParser extends QueryParserBase {
/**
* Whether query text should be split on whitespace prior to analysis.
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
* <p>
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
*/
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
if (splitOnWhitespace == false && getAutoGeneratePhraseQueries() == true) {
throw new IllegalArgumentException
("setSplitOnWhitespace(false) is disallowed when getAutoGeneratePhraseQueries() == true");
}
this.splitOnWhitespace = splitOnWhitespace;
}

View File

@ -146,7 +146,7 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer
* Set to false if phrase queries should only be generated when
* surrounded by double quotes.
*/
public final void setAutoGeneratePhraseQueries(boolean value) {
public void setAutoGeneratePhraseQueries(boolean value) {
this.autoGeneratePhraseQueries = value;
}

View File

@ -702,4 +702,19 @@ public class TestQueryParser extends QueryParserTestBase {
assertQueryEquals("guinea pig", new MockSynonymAnalyzer(), "guinea pig");
splitOnWhitespace = oldSplitOnWhitespace;
}
// LUCENE-7533
public void test_splitOnWhitespace_with_autoGeneratePhraseQueries() {
final QueryParser qp = new QueryParser("field", new MockAnalyzer(random()));
expectThrows(IllegalArgumentException.class, () -> {
qp.setSplitOnWhitespace(false);
qp.setAutoGeneratePhraseQueries(true);
});
final QueryParser qp2 = new QueryParser("field", new MockAnalyzer(random()));
expectThrows(IllegalArgumentException.class, () -> {
qp2.setSplitOnWhitespace(true);
qp2.setAutoGeneratePhraseQueries(true);
qp2.setSplitOnWhitespace(false);
});
}
}

View File

@ -38,6 +38,7 @@ import org.apache.lucene.index.Term;
//import org.apache.lucene.queryparser.classic.ParseException;
//import org.apache.lucene.queryparser.classic.QueryParser;
//import org.apache.lucene.queryparser.classic.QueryParserBase;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.QueryParserBase;
//import org.apache.lucene.queryparser.classic.QueryParserTokenManager;
import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration;
@ -340,6 +341,9 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
PhraseQuery expected = new PhraseQuery("field", "", "");
CommonQueryParserConfiguration qp = getParserConfig(analyzer);
if (qp instanceof QueryParser) { // Always true, since TestStandardQP overrides this method
((QueryParser)qp).setSplitOnWhitespace(true); // LUCENE-7533
}
setAutoGeneratePhraseQueries(qp, true);
assertEquals(expected, getQuery("中国",qp));
}