mirror of https://github.com/apache/lucene.git
LUCENE-7533: Classic query parser: disallow autoGeneratePhraseQueries=true when splitOnWhitespace=false (and vice-versa).
This commit is contained in:
parent
61a6072573
commit
6d1962a902
|
@ -5,6 +5,11 @@ http://s.apache.org/luceneversions
|
|||
|
||||
======================= Lucene 6.4.0 =======================
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-7533: Classic query parser no longer allows autoGeneratePhraseQueries
|
||||
to be set to true when splitOnWhitespace is false (and vice-versa).
|
||||
|
||||
New features
|
||||
|
||||
* LUCENE-5867: Added BooleanSimilarity. (Robert Muir, Adrien Grand)
|
||||
|
@ -17,6 +22,9 @@ Bug Fixes
|
|||
* LUCENE-7562: CompletionFieldsConsumer sometimes throws
|
||||
NullPointerException on ghost fields (Oliver Eilhard via Mike McCandless)
|
||||
|
||||
* LUCENE-7533: Classic query parser: disallow autoGeneratePhraseQueries=true
|
||||
when splitOnWhitespace=false (and vice-versa). (Steve Rowe)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-7532: Add back lost codec file format documentation
|
||||
|
|
|
@ -96,6 +96,27 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
init(f, a);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set to true if phrase queries will be automatically generated
|
||||
* when the analyzer returns more than one term from whitespace
|
||||
* delimited text.
|
||||
* NOTE: this behavior may not be suitable for all languages.
|
||||
* <p>
|
||||
* Set to false if phrase queries should only be generated when
|
||||
* surrounded by double quotes.
|
||||
* <p>
|
||||
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
|
||||
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
|
||||
*/
|
||||
@Override
|
||||
public void setAutoGeneratePhraseQueries(boolean value) {
|
||||
if (splitOnWhitespace == false && value == true) {
|
||||
throw new IllegalArgumentException
|
||||
("setAutoGeneratePhraseQueries(true) is disallowed when getSplitOnWhitespace() == false");
|
||||
}
|
||||
this.autoGeneratePhraseQueries = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #setSplitOnWhitespace(boolean)
|
||||
*/
|
||||
|
@ -106,8 +127,15 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
/**
|
||||
* Whether query text should be split on whitespace prior to analysis.
|
||||
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
|
||||
* <p>
|
||||
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
|
||||
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
|
||||
*/
|
||||
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
|
||||
if (splitOnWhitespace == false && getAutoGeneratePhraseQueries() == true) {
|
||||
throw new IllegalArgumentException
|
||||
("setSplitOnWhitespace(false) is disallowed when getAutoGeneratePhraseQueries() == true");
|
||||
}
|
||||
this.splitOnWhitespace = splitOnWhitespace;
|
||||
}
|
||||
|
||||
|
@ -635,6 +663,31 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
finally { jj_save(2, xla); }
|
||||
}
|
||||
|
||||
private boolean jj_3R_3() {
|
||||
if (jj_scan_token(TERM)) return true;
|
||||
jj_lookingAhead = true;
|
||||
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
|
||||
jj_lookingAhead = false;
|
||||
if (!jj_semLA || jj_3R_6()) return true;
|
||||
Token xsp;
|
||||
if (jj_3R_7()) return true;
|
||||
while (true) {
|
||||
xsp = jj_scanpos;
|
||||
if (jj_3R_7()) { jj_scanpos = xsp; break; }
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3R_6() {
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3R_5() {
|
||||
if (jj_scan_token(STAR)) return true;
|
||||
if (jj_scan_token(COLON)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3R_4() {
|
||||
if (jj_scan_token(TERM)) return true;
|
||||
if (jj_scan_token(COLON)) return true;
|
||||
|
@ -666,31 +719,6 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3R_3() {
|
||||
if (jj_scan_token(TERM)) return true;
|
||||
jj_lookingAhead = true;
|
||||
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
|
||||
jj_lookingAhead = false;
|
||||
if (!jj_semLA || jj_3R_6()) return true;
|
||||
Token xsp;
|
||||
if (jj_3R_7()) return true;
|
||||
while (true) {
|
||||
xsp = jj_scanpos;
|
||||
if (jj_3R_7()) { jj_scanpos = xsp; break; }
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3R_6() {
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3R_5() {
|
||||
if (jj_scan_token(STAR)) return true;
|
||||
if (jj_scan_token(COLON)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Generated Token Manager. */
|
||||
public QueryParserTokenManager token_source;
|
||||
/** Current token. */
|
||||
|
|
|
@ -120,6 +120,27 @@ public class QueryParser extends QueryParserBase {
|
|||
init(f, a);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set to true if phrase queries will be automatically generated
|
||||
* when the analyzer returns more than one term from whitespace
|
||||
* delimited text.
|
||||
* NOTE: this behavior may not be suitable for all languages.
|
||||
* <p>
|
||||
* Set to false if phrase queries should only be generated when
|
||||
* surrounded by double quotes.
|
||||
* <p>
|
||||
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
|
||||
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
|
||||
*/
|
||||
@Override
|
||||
public void setAutoGeneratePhraseQueries(boolean value) {
|
||||
if (splitOnWhitespace == false && value == true) {
|
||||
throw new IllegalArgumentException
|
||||
("setAutoGeneratePhraseQueries(true) is disallowed when getSplitOnWhitespace() == false");
|
||||
}
|
||||
this.autoGeneratePhraseQueries = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #setSplitOnWhitespace(boolean)
|
||||
*/
|
||||
|
@ -130,8 +151,15 @@ public class QueryParser extends QueryParserBase {
|
|||
/**
|
||||
* Whether query text should be split on whitespace prior to analysis.
|
||||
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
|
||||
* <p>
|
||||
* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true
|
||||
* is disallowed. See <a href="https://issues.apache.org/jira/browse/LUCENE-7533">LUCENE-7533</a>.
|
||||
*/
|
||||
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
|
||||
if (splitOnWhitespace == false && getAutoGeneratePhraseQueries() == true) {
|
||||
throw new IllegalArgumentException
|
||||
("setSplitOnWhitespace(false) is disallowed when getAutoGeneratePhraseQueries() == true");
|
||||
}
|
||||
this.splitOnWhitespace = splitOnWhitespace;
|
||||
}
|
||||
|
||||
|
|
|
@ -146,7 +146,7 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer
|
|||
* Set to false if phrase queries should only be generated when
|
||||
* surrounded by double quotes.
|
||||
*/
|
||||
public final void setAutoGeneratePhraseQueries(boolean value) {
|
||||
public void setAutoGeneratePhraseQueries(boolean value) {
|
||||
this.autoGeneratePhraseQueries = value;
|
||||
}
|
||||
|
||||
|
|
|
@ -702,4 +702,19 @@ public class TestQueryParser extends QueryParserTestBase {
|
|||
assertQueryEquals("guinea pig", new MockSynonymAnalyzer(), "guinea pig");
|
||||
splitOnWhitespace = oldSplitOnWhitespace;
|
||||
}
|
||||
|
||||
// LUCENE-7533
|
||||
public void test_splitOnWhitespace_with_autoGeneratePhraseQueries() {
|
||||
final QueryParser qp = new QueryParser("field", new MockAnalyzer(random()));
|
||||
expectThrows(IllegalArgumentException.class, () -> {
|
||||
qp.setSplitOnWhitespace(false);
|
||||
qp.setAutoGeneratePhraseQueries(true);
|
||||
});
|
||||
final QueryParser qp2 = new QueryParser("field", new MockAnalyzer(random()));
|
||||
expectThrows(IllegalArgumentException.class, () -> {
|
||||
qp2.setSplitOnWhitespace(true);
|
||||
qp2.setAutoGeneratePhraseQueries(true);
|
||||
qp2.setSplitOnWhitespace(false);
|
||||
});
|
||||
}
|
||||
}
|
|
@ -38,6 +38,7 @@ import org.apache.lucene.index.Term;
|
|||
//import org.apache.lucene.queryparser.classic.ParseException;
|
||||
//import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
//import org.apache.lucene.queryparser.classic.QueryParserBase;
|
||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
import org.apache.lucene.queryparser.classic.QueryParserBase;
|
||||
//import org.apache.lucene.queryparser.classic.QueryParserTokenManager;
|
||||
import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration;
|
||||
|
@ -340,6 +341,9 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
|||
|
||||
PhraseQuery expected = new PhraseQuery("field", "中", "国");
|
||||
CommonQueryParserConfiguration qp = getParserConfig(analyzer);
|
||||
if (qp instanceof QueryParser) { // Always true, since TestStandardQP overrides this method
|
||||
((QueryParser)qp).setSplitOnWhitespace(true); // LUCENE-7533
|
||||
}
|
||||
setAutoGeneratePhraseQueries(qp, true);
|
||||
assertEquals(expected, getQuery("中国",qp));
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue