From 6d1962a902bffbf6bea4b81b09524104140d9f73 Mon Sep 17 00:00:00 2001 From: Steve Rowe Date: Thu, 17 Nov 2016 18:50:58 -0500 Subject: [PATCH] LUCENE-7533: Classic query parser: disallow autoGeneratePhraseQueries=true when splitOnWhitespace=false (and vice-versa). --- lucene/CHANGES.txt | 8 ++ .../queryparser/classic/QueryParser.java | 78 +++++++++++++------ .../lucene/queryparser/classic/QueryParser.jj | 28 +++++++ .../queryparser/classic/QueryParserBase.java | 2 +- .../queryparser/classic/TestQueryParser.java | 15 ++++ .../queryparser/util/QueryParserTestBase.java | 4 + 6 files changed, 109 insertions(+), 26 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 6039562ae4b..4a99bce9244 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -5,6 +5,11 @@ http://s.apache.org/luceneversions ======================= Lucene 6.4.0 ======================= +API Changes + +* LUCENE-7533: Classic query parser no longer allows autoGeneratePhraseQueries + to be set to true when splitOnWhitespace is false (and vice-versa). + New features * LUCENE-5867: Added BooleanSimilarity. (Robert Muir, Adrien Grand) @@ -16,6 +21,9 @@ Bug Fixes * LUCENE-7562: CompletionFieldsConsumer sometimes throws NullPointerException on ghost fields (Oliver Eilhard via Mike McCandless) + +* LUCENE-7533: Classic query parser: disallow autoGeneratePhraseQueries=true + when splitOnWhitespace=false (and vice-versa). (Steve Rowe) Improvements diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java index c137d3043b7..2410cb08047 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java @@ -96,6 +96,27 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants init(f, a); } + /** + * Set to true if phrase queries will be automatically generated + * when the analyzer returns more than one term from whitespace + * delimited text. + * NOTE: this behavior may not be suitable for all languages. + *

+ * Set to false if phrase queries should only be generated when + * surrounded by double quotes. + *

+ * The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true + * is disallowed. See LUCENE-7533. + */ + @Override + public void setAutoGeneratePhraseQueries(boolean value) { + if (splitOnWhitespace == false && value == true) { + throw new IllegalArgumentException + ("setAutoGeneratePhraseQueries(true) is disallowed when getSplitOnWhitespace() == false"); + } + this.autoGeneratePhraseQueries = value; + } + /** * @see #setSplitOnWhitespace(boolean) */ @@ -106,8 +127,15 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants /** * Whether query text should be split on whitespace prior to analysis. * Default is {@value #DEFAULT_SPLIT_ON_WHITESPACE}. + *

+ * The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true + * is disallowed. See LUCENE-7533. */ public void setSplitOnWhitespace(boolean splitOnWhitespace) { + if (splitOnWhitespace == false && getAutoGeneratePhraseQueries() == true) { + throw new IllegalArgumentException + ("setSplitOnWhitespace(false) is disallowed when getAutoGeneratePhraseQueries() == true"); + } this.splitOnWhitespace = splitOnWhitespace; } @@ -635,6 +663,31 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants finally { jj_save(2, xla); } } + private boolean jj_3R_3() { + if (jj_scan_token(TERM)) return true; + jj_lookingAhead = true; + jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind); + jj_lookingAhead = false; + if (!jj_semLA || jj_3R_6()) return true; + Token xsp; + if (jj_3R_7()) return true; + while (true) { + xsp = jj_scanpos; + if (jj_3R_7()) { jj_scanpos = xsp; break; } + } + return false; + } + + private boolean jj_3R_6() { + return false; + } + + private boolean jj_3R_5() { + if (jj_scan_token(STAR)) return true; + if (jj_scan_token(COLON)) return true; + return false; + } + private boolean jj_3R_4() { if (jj_scan_token(TERM)) return true; if (jj_scan_token(COLON)) return true; @@ -666,31 +719,6 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants return false; } - private boolean jj_3R_3() { - if (jj_scan_token(TERM)) return true; - jj_lookingAhead = true; - jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind); - jj_lookingAhead = false; - if (!jj_semLA || jj_3R_6()) return true; - Token xsp; - if (jj_3R_7()) return true; - while (true) { - xsp = jj_scanpos; - if (jj_3R_7()) { jj_scanpos = xsp; break; } - } - return false; - } - - private boolean jj_3R_6() { - return false; - } - - private boolean jj_3R_5() { - if (jj_scan_token(STAR)) return true; - if (jj_scan_token(COLON)) return true; - return false; - } - /** Generated Token Manager. */ public QueryParserTokenManager token_source; /** Current token. */ diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj index 71a5c391c27..970a0555d6e 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj @@ -120,6 +120,27 @@ public class QueryParser extends QueryParserBase { init(f, a); } + /** + * Set to true if phrase queries will be automatically generated + * when the analyzer returns more than one term from whitespace + * delimited text. + * NOTE: this behavior may not be suitable for all languages. + *

+ * Set to false if phrase queries should only be generated when + * surrounded by double quotes. + *

+ * The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true + * is disallowed. See LUCENE-7533. + */ + @Override + public void setAutoGeneratePhraseQueries(boolean value) { + if (splitOnWhitespace == false && value == true) { + throw new IllegalArgumentException + ("setAutoGeneratePhraseQueries(true) is disallowed when getSplitOnWhitespace() == false"); + } + this.autoGeneratePhraseQueries = value; + } + /** * @see #setSplitOnWhitespace(boolean) */ @@ -130,8 +151,15 @@ public class QueryParser extends QueryParserBase { /** * Whether query text should be split on whitespace prior to analysis. * Default is {@value #DEFAULT_SPLIT_ON_WHITESPACE}. + *

+ * The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true + * is disallowed. See LUCENE-7533. */ public void setSplitOnWhitespace(boolean splitOnWhitespace) { + if (splitOnWhitespace == false && getAutoGeneratePhraseQueries() == true) { + throw new IllegalArgumentException + ("setSplitOnWhitespace(false) is disallowed when getAutoGeneratePhraseQueries() == true"); + } this.splitOnWhitespace = splitOnWhitespace; } diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java index 8b0866fc641..7fad3520c92 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java @@ -146,7 +146,7 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer * Set to false if phrase queries should only be generated when * surrounded by double quotes. */ - public final void setAutoGeneratePhraseQueries(boolean value) { + public void setAutoGeneratePhraseQueries(boolean value) { this.autoGeneratePhraseQueries = value; } diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java index c3d7b37f5c1..bc9648b688c 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java @@ -702,4 +702,19 @@ public class TestQueryParser extends QueryParserTestBase { assertQueryEquals("guinea pig", new MockSynonymAnalyzer(), "guinea pig"); splitOnWhitespace = oldSplitOnWhitespace; } + + // LUCENE-7533 + public void test_splitOnWhitespace_with_autoGeneratePhraseQueries() { + final QueryParser qp = new QueryParser("field", new MockAnalyzer(random())); + expectThrows(IllegalArgumentException.class, () -> { + qp.setSplitOnWhitespace(false); + qp.setAutoGeneratePhraseQueries(true); + }); + final QueryParser qp2 = new QueryParser("field", new MockAnalyzer(random())); + expectThrows(IllegalArgumentException.class, () -> { + qp2.setSplitOnWhitespace(true); + qp2.setAutoGeneratePhraseQueries(true); + qp2.setSplitOnWhitespace(false); + }); + } } \ No newline at end of file diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java index 71c65f68197..edfa27ef98b 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java @@ -38,6 +38,7 @@ import org.apache.lucene.index.Term; //import org.apache.lucene.queryparser.classic.ParseException; //import org.apache.lucene.queryparser.classic.QueryParser; //import org.apache.lucene.queryparser.classic.QueryParserBase; +import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.queryparser.classic.QueryParserBase; //import org.apache.lucene.queryparser.classic.QueryParserTokenManager; import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration; @@ -340,6 +341,9 @@ public abstract class QueryParserTestBase extends LuceneTestCase { PhraseQuery expected = new PhraseQuery("field", "中", "国"); CommonQueryParserConfiguration qp = getParserConfig(analyzer); + if (qp instanceof QueryParser) { // Always true, since TestStandardQP overrides this method + ((QueryParser)qp).setSplitOnWhitespace(true); // LUCENE-7533 + } setAutoGeneratePhraseQueries(qp, true); assertEquals(expected, getQuery("中国",qp)); }