From 8938c9f3d0608544ba6f16832bad7421d4080b7b Mon Sep 17 00:00:00 2001 From: Steve Rowe Date: Thu, 17 Nov 2016 18:50:58 -0500 Subject: [PATCH] LUCENE-7533: Classic query parser: disallow autoGeneratePhraseQueries=true when splitOnWhitespace=false (and vice-versa). --- lucene/CHANGES.txt | 8 ++ .../queryparser/classic/QueryParser.java | 78 +++++++++++++------ .../lucene/queryparser/classic/QueryParser.jj | 28 +++++++ .../queryparser/classic/QueryParserBase.java | 2 +- .../queryparser/classic/TestQueryParser.java | 14 ++++ .../queryparser/util/QueryParserTestBase.java | 4 + 6 files changed, 108 insertions(+), 26 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index f5dfd446a2d..051c326ab81 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -56,6 +56,11 @@ Other ======================= Lucene 6.4.0 ======================= +API Changes + +* LUCENE-7533: Classic query parser no longer allows autoGeneratePhraseQueries + to be set to true when splitOnWhitespace is false (and vice-versa). + New features * LUCENE-5867: Added BooleanSimilarity. (Robert Muir, Adrien Grand) @@ -67,6 +72,9 @@ Bug Fixes * LUCENE-7562: CompletionFieldsConsumer sometimes throws NullPointerException on ghost fields (Oliver Eilhard via Mike McCandless) + +* LUCENE-7533: Classic query parser: disallow autoGeneratePhraseQueries=true + when splitOnWhitespace=false (and vice-versa). (Steve Rowe) Improvements diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java index dc43ab14592..5e5d57bf294 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.java @@ -96,6 +96,27 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants init(f, a); } + /** + * Set to true if phrase queries will be automatically generated + * when the analyzer returns more than one term from whitespace + * delimited text. + * NOTE: this behavior may not be suitable for all languages. + *

+ * Set to false if phrase queries should only be generated when + * surrounded by double quotes. + *

+ * The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true + * is disallowed. See LUCENE-7533. + */ + @Override + public void setAutoGeneratePhraseQueries(boolean value) { + if (splitOnWhitespace == false && value == true) { + throw new IllegalArgumentException + ("setAutoGeneratePhraseQueries(true) is disallowed when getSplitOnWhitespace() == false"); + } + this.autoGeneratePhraseQueries = value; + } + /** * @see #setSplitOnWhitespace(boolean) */ @@ -106,8 +127,15 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants /** * Whether query text should be split on whitespace prior to analysis. * Default is {@value #DEFAULT_SPLIT_ON_WHITESPACE}. + *

+ * The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true + * is disallowed. See LUCENE-7533. */ public void setSplitOnWhitespace(boolean splitOnWhitespace) { + if (splitOnWhitespace == false && getAutoGeneratePhraseQueries() == true) { + throw new IllegalArgumentException + ("setSplitOnWhitespace(false) is disallowed when getAutoGeneratePhraseQueries() == true"); + } this.splitOnWhitespace = splitOnWhitespace; } @@ -635,6 +663,31 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants finally { jj_save(2, xla); } } + private boolean jj_3R_3() { + if (jj_scan_token(TERM)) return true; + jj_lookingAhead = true; + jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind); + jj_lookingAhead = false; + if (!jj_semLA || jj_3R_6()) return true; + Token xsp; + if (jj_3R_7()) return true; + while (true) { + xsp = jj_scanpos; + if (jj_3R_7()) { jj_scanpos = xsp; break; } + } + return false; + } + + private boolean jj_3R_6() { + return false; + } + + private boolean jj_3R_5() { + if (jj_scan_token(STAR)) return true; + if (jj_scan_token(COLON)) return true; + return false; + } + private boolean jj_3R_4() { if (jj_scan_token(TERM)) return true; if (jj_scan_token(COLON)) return true; @@ -666,31 +719,6 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants return false; } - private boolean jj_3R_3() { - if (jj_scan_token(TERM)) return true; - jj_lookingAhead = true; - jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind); - jj_lookingAhead = false; - if (!jj_semLA || jj_3R_6()) return true; - Token xsp; - if (jj_3R_7()) return true; - while (true) { - xsp = jj_scanpos; - if (jj_3R_7()) { jj_scanpos = xsp; break; } - } - return false; - } - - private boolean jj_3R_6() { - return false; - } - - private boolean jj_3R_5() { - if (jj_scan_token(STAR)) return true; - if (jj_scan_token(COLON)) return true; - return false; - } - /** Generated Token Manager. */ public QueryParserTokenManager token_source; /** Current token. */ diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj index ea185881464..35cbe1a1977 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParser.jj @@ -120,6 +120,27 @@ public class QueryParser extends QueryParserBase { init(f, a); } + /** + * Set to true if phrase queries will be automatically generated + * when the analyzer returns more than one term from whitespace + * delimited text. + * NOTE: this behavior may not be suitable for all languages. + *

+ * Set to false if phrase queries should only be generated when + * surrounded by double quotes. + *

+ * The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true + * is disallowed. See LUCENE-7533. + */ + @Override + public void setAutoGeneratePhraseQueries(boolean value) { + if (splitOnWhitespace == false && value == true) { + throw new IllegalArgumentException + ("setAutoGeneratePhraseQueries(true) is disallowed when getSplitOnWhitespace() == false"); + } + this.autoGeneratePhraseQueries = value; + } + /** * @see #setSplitOnWhitespace(boolean) */ @@ -130,8 +151,15 @@ public class QueryParser extends QueryParserBase { /** * Whether query text should be split on whitespace prior to analysis. * Default is {@value #DEFAULT_SPLIT_ON_WHITESPACE}. + *

+ * The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true + * is disallowed. See LUCENE-7533. */ public void setSplitOnWhitespace(boolean splitOnWhitespace) { + if (splitOnWhitespace == false && getAutoGeneratePhraseQueries() == true) { + throw new IllegalArgumentException + ("setSplitOnWhitespace(false) is disallowed when getAutoGeneratePhraseQueries() == true"); + } this.splitOnWhitespace = splitOnWhitespace; } diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java index fbe08a90770..41d3764f0ac 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java @@ -144,7 +144,7 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer * Set to false if phrase queries should only be generated when * surrounded by double quotes. */ - public final void setAutoGeneratePhraseQueries(boolean value) { + public void setAutoGeneratePhraseQueries(boolean value) { this.autoGeneratePhraseQueries = value; } diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java index de90e29affe..bb976249bf6 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/classic/TestQueryParser.java @@ -840,6 +840,20 @@ public class TestQueryParser extends QueryParserTestBase { assertTrue(isAHit(qp.parse("เ??"), s, analyzer)); } + // LUCENE-7533 + public void test_splitOnWhitespace_with_autoGeneratePhraseQueries() { + final QueryParser qp = new QueryParser(FIELD, new MockAnalyzer(random())); + expectThrows(IllegalArgumentException.class, () -> { + qp.setSplitOnWhitespace(false); + qp.setAutoGeneratePhraseQueries(true); + }); + final QueryParser qp2 = new QueryParser(FIELD, new MockAnalyzer(random())); + expectThrows(IllegalArgumentException.class, () -> { + qp2.setSplitOnWhitespace(true); + qp2.setAutoGeneratePhraseQueries(true); + qp2.setSplitOnWhitespace(false); + }); + } private boolean isAHit(Query q, String content, Analyzer analyzer) throws IOException{ Directory ramDir = newDirectory(); diff --git a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java index 217019350b7..004110634e1 100644 --- a/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java +++ b/lucene/queryparser/src/test/org/apache/lucene/queryparser/util/QueryParserTestBase.java @@ -38,6 +38,7 @@ import org.apache.lucene.index.Term; //import org.apache.lucene.queryparser.classic.ParseException; //import org.apache.lucene.queryparser.classic.QueryParser; //import org.apache.lucene.queryparser.classic.QueryParserBase; +import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.queryparser.classic.QueryParserBase; //import org.apache.lucene.queryparser.classic.QueryParserTokenManager; import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration; @@ -328,6 +329,9 @@ public abstract class QueryParserTestBase extends LuceneTestCase { PhraseQuery expected = new PhraseQuery("field", "中", "国"); CommonQueryParserConfiguration qp = getParserConfig(analyzer); + if (qp instanceof QueryParser) { // Always true, since TestStandardQP overrides this method + ((QueryParser)qp).setSplitOnWhitespace(true); // LUCENE-7533 + } setAutoGeneratePhraseQueries(qp, true); assertEquals(expected, getQuery("中国",qp)); }