diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index af151eda840..9a5299ccd29 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -202,6 +202,13 @@ New Features requires "stored" and must not be multiValued. It's intended for fields that might have very large values so that they don't get cached in memory. (David Smiley) +* SOLR-9185: Solr's edismax and "Lucene"/standard query parsers will no longer split on whitespace before sending + terms to analysis, if given the "sow=false" request param ("sow"=>"split on whitespace"). This enables multi-term + source synonyms to match at query-time using SynonymGraphFilterFactory; other analysis components will also now + work at query time, e.g. ShingleFilterFactory. By default, and when the "sow=true" param is specified, these + parsers' behavior remains the same: queries will be split on whitespace before sending individual terms to analysis. + (Steve Rowe) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/parser/QueryParser.java b/solr/core/src/java/org/apache/solr/parser/QueryParser.java index 42f359ee811..d9a64f4f7c0 100644 --- a/solr/core/src/java/org/apache/solr/parser/QueryParser.java +++ b/solr/core/src/java/org/apache/solr/parser/QueryParser.java @@ -3,13 +3,17 @@ package org.apache.solr.parser; import java.io.StringReader; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.Query; -import org.apache.lucene.util.Version; -import org.apache.solr.search.QParser; import org.apache.solr.search.SyntaxError; +import org.apache.solr.search.QParser; +import org.apache.solr.search.QueryParserConfigurationException; public class QueryParser extends SolrQueryParserBase implements QueryParserConstants { @@ -17,9 +21,44 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst */ static public enum Operator { OR, AND } - public QueryParser(Version matchVersion, String defaultField, QParser parser) { + /** default split on whitespace behavior */ + public static final boolean DEFAULT_SPLIT_ON_WHITESPACE = true; + + public QueryParser(String defaultField, QParser parser) { this(new FastCharStream(new StringReader(""))); - init(matchVersion, defaultField, parser); + init(defaultField, parser); + } + + /** + * @see #setSplitOnWhitespace(boolean) + */ + public boolean getSplitOnWhitespace() { + return splitOnWhitespace; + } + + /** + * Whether query text should be split on whitespace prior to analysis. + * Default is {@value #DEFAULT_SPLIT_ON_WHITESPACE}. + */ + public void setSplitOnWhitespace(boolean splitOnWhitespace) { + this.splitOnWhitespace = splitOnWhitespace; + } + + private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE; + private static Set disallowedPostMultiTerm + = new HashSet(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR)); + private static boolean allowedPostMultiTerm(int tokenKind) { + return disallowedPostMultiTerm.contains(tokenKind) == false; + } + + @Override + protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, + boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError { + if ((getAutoGeneratePhraseQueries() || fieldAutoGenPhraseQueries) && splitOnWhitespace == false) { + throw new QueryParserConfigurationException + ("Field '" + field + "': autoGeneratePhraseQueries == true is disallowed when sow/splitOnWhitespace == false"); + } + return super.newFieldQuery(analyzer, field, queryText, quoted, fieldAutoGenPhraseQueries); } // * Query ::= ( Clause )* @@ -96,13 +135,38 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst final public Query Query(String field) throws ParseException, SyntaxError { List clauses = new ArrayList(); - Query q, firstQuery=null; + Query q; int conj, mods; - mods = Modifiers(); - q = Clause(field); - addClause(clauses, CONJ_NONE, mods, q); - if (mods == MOD_NONE) - firstQuery=q; + if (jj_2_1(2)) { + MultiTerm(field, clauses); + } else { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case NOT: + case PLUS: + case MINUS: + case BAREOPER: + case LPAREN: + case STAR: + case QUOTED: + case TERM: + case PREFIXTERM: + case WILDTERM: + case REGEXPTERM: + case RANGEIN_START: + case RANGEEX_START: + case LPARAMS: + case FILTER: + case NUMBER: + mods = Modifiers(); + q = Clause(field); + addClause(clauses, CONJ_NONE, mods, q); + break; + default: + jj_la1[4] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + } label_1: while (true) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { @@ -127,19 +191,50 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst ; break; default: - jj_la1[4] = jj_gen; + jj_la1[5] = jj_gen; break label_1; } - conj = Conjunction(); - mods = Modifiers(); - q = Clause(field); - addClause(clauses, conj, mods, q); - } - if (clauses.size() == 1 && firstQuery != null) - {if (true) return rawToNormal(firstQuery);} - else { - {if (true) return getBooleanQuery(clauses);} + if (jj_2_2(2)) { + MultiTerm(field, clauses); + } else { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case AND: + case OR: + case NOT: + case PLUS: + case MINUS: + case BAREOPER: + case LPAREN: + case STAR: + case QUOTED: + case TERM: + case PREFIXTERM: + case WILDTERM: + case REGEXPTERM: + case RANGEIN_START: + case RANGEEX_START: + case LPARAMS: + case FILTER: + case NUMBER: + conj = Conjunction(); + mods = Modifiers(); + q = Clause(field); + addClause(clauses, conj, mods, q); + break; + default: + jj_la1[6] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } } + } + if (clauses.size() == 1 && clauses.get(0).getOccur() == BooleanClause.Occur.SHOULD) { + Query firstQuery = clauses.get(0).getQuery(); + if ( ! (firstQuery instanceof RawQuery) || ((RawQuery)firstQuery).getTermCount() == 1) { + {if (true) return rawToNormal(firstQuery);} + } + } + {if (true) return getBooleanQuery(clauses);} throw new Error("Missing return statement in function"); } @@ -148,20 +243,20 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst Token fieldToken=null, boost=null; Token localParams=null; int flags = 0; - if (jj_2_1(2)) { + if (jj_2_3(2)) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case TERM: fieldToken = jj_consume_token(TERM); jj_consume_token(COLON); - field=discardEscapeChar(fieldToken.image); + field = discardEscapeChar(fieldToken.image); break; case STAR: jj_consume_token(STAR); jj_consume_token(COLON); - field="*"; + field = "*"; break; default: - jj_la1[5] = jj_gen; + jj_la1[7] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -191,7 +286,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst boost = jj_consume_token(NUMBER); break; default: - jj_la1[6] = jj_gen; + jj_la1[8] = jj_gen; ; } break; @@ -206,10 +301,10 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst boost = jj_consume_token(NUMBER); break; default: - jj_la1[7] = jj_gen; + jj_la1[9] = jj_gen; ; } - q=getFilter(q); restoreFlags(flags); + q=getFilter(q); restoreFlags(flags); break; case LPARAMS: localParams = jj_consume_token(LPARAMS); @@ -219,17 +314,17 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst boost = jj_consume_token(NUMBER); break; default: - jj_la1[8] = jj_gen; + jj_la1[10] = jj_gen; ; } - q=getLocalParams(field, localParams.image); + q=getLocalParams(field, localParams.image); break; default: - jj_la1[9] = jj_gen; + jj_la1[11] = jj_gen; jj_consume_token(-1); throw new ParseException(); } - {if (true) return handleBoost(q, boost);} + {if (true) return handleBoost(q, boost);} throw new Error("Missing return statement in function"); } @@ -278,35 +373,48 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst term.image = term.image.substring(0,1); break; default: - jj_la1[10] = jj_gen; + jj_la1[12] = jj_gen; jj_consume_token(-1); throw new ParseException(); } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case FUZZY_SLOP: - fuzzySlop = jj_consume_token(FUZZY_SLOP); - fuzzy=true; - break; - default: - jj_la1[11] = jj_gen; - ; - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); + case FUZZY_SLOP: switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + fuzzy=true; + break; + default: + jj_la1[13] = jj_gen; + ; + } + break; case FUZZY_SLOP: fuzzySlop = jj_consume_token(FUZZY_SLOP); - fuzzy=true; + fuzzy=true; + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + break; + default: + jj_la1[14] = jj_gen; + ; + } break; default: - jj_la1[12] = jj_gen; - ; + jj_la1[15] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); } break; default: - jj_la1[13] = jj_gen; + jj_la1[16] = jj_gen; ; } q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp); @@ -316,13 +424,13 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case RANGEIN_START: jj_consume_token(RANGEIN_START); - startInc=true; + startInc = true; break; case RANGEEX_START: jj_consume_token(RANGEEX_START); break; default: - jj_la1[14] = jj_gen; + jj_la1[17] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -334,7 +442,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst goop1 = jj_consume_token(RANGE_QUOTED); break; default: - jj_la1[15] = jj_gen; + jj_la1[18] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -343,7 +451,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst jj_consume_token(RANGE_TO); break; default: - jj_la1[16] = jj_gen; + jj_la1[19] = jj_gen; ; } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { @@ -354,20 +462,20 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst goop2 = jj_consume_token(RANGE_QUOTED); break; default: - jj_la1[17] = jj_gen; + jj_la1[20] = jj_gen; jj_consume_token(-1); throw new ParseException(); } switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case RANGEIN_END: jj_consume_token(RANGEIN_END); - endInc=true; + endInc = true; break; case RANGEEX_END: jj_consume_token(RANGEEX_END); break; default: - jj_la1[18] = jj_gen; + jj_la1[21] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -377,46 +485,71 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst boost = jj_consume_token(NUMBER); break; default: - jj_la1[19] = jj_gen; + jj_la1[22] = jj_gen; ; } - boolean startOpen=false; - boolean endOpen=false; - if (goop1.kind == RANGE_QUOTED) { - goop1.image = goop1.image.substring(1, goop1.image.length()-1); - } else if ("*".equals(goop1.image)) { - startOpen=true; - } - if (goop2.kind == RANGE_QUOTED) { - goop2.image = goop2.image.substring(1, goop2.image.length()-1); - } else if ("*".equals(goop2.image)) { - endOpen=true; - } - q = getRangeQuery(getField(field), startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc); + boolean startOpen=false; + boolean endOpen=false; + if (goop1.kind == RANGE_QUOTED) { + goop1.image = goop1.image.substring(1, goop1.image.length()-1); + } else if ("*".equals(goop1.image)) { + startOpen=true; + } + if (goop2.kind == RANGE_QUOTED) { + goop2.image = goop2.image.substring(1, goop2.image.length()-1); + } else if ("*".equals(goop2.image)) { + endOpen=true; + } + q = getRangeQuery(getField(field), + startOpen ? null : discardEscapeChar(goop1.image), + endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc); break; case QUOTED: term = jj_consume_token(QUOTED); switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { - case FUZZY_SLOP: - fuzzySlop = jj_consume_token(FUZZY_SLOP); - break; - default: - jj_la1[20] = jj_gen; - ; - } - switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); + case FUZZY_SLOP: + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + fuzzy=true; + break; + default: + jj_la1[23] = jj_gen; + ; + } + break; + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + fuzzy=true; + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + break; + default: + jj_la1[24] = jj_gen; + ; + } + break; + default: + jj_la1[25] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } break; default: - jj_la1[21] = jj_gen; + jj_la1[26] = jj_gen; ; } - q = handleQuotedTerm(getField(field), term, fuzzySlop); + q = handleQuotedTerm(getField(field), term, fuzzySlop); break; default: - jj_la1[22] = jj_gen; + jj_la1[27] = jj_gen; jj_consume_token(-1); throw new ParseException(); } @@ -424,6 +557,44 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst throw new Error("Missing return statement in function"); } + final public void MultiTerm(String field, List clauses) throws ParseException, SyntaxError { + Token text; + List terms = null; + text = jj_consume_token(TERM); + if (splitOnWhitespace) { + Query q = getFieldQuery(getField(field), discardEscapeChar(text.image), false, true); + addClause(clauses, CONJ_NONE, MOD_NONE, q); + } else { + terms = new ArrayList(); + terms.add(discardEscapeChar(text.image)); + } + if (getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind)) { + + } else { + jj_consume_token(-1); + throw new ParseException(); + } + label_2: + while (true) { + text = jj_consume_token(TERM); + if (splitOnWhitespace) { + Query q = getFieldQuery(getField(field), discardEscapeChar(text.image), false, true); + addClause(clauses, CONJ_NONE, MOD_NONE, q); + } else { + terms.add(discardEscapeChar(text.image)); + } + if (getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind)) { + ; + } else { + break label_2; + } + } + if (splitOnWhitespace == false) { + Query q = getFieldQuery(getField(field), terms, true); + addMultiTermClause(clauses, q); + } + } + private boolean jj_2_1(int xla) { jj_la = xla; jj_lastpos = jj_scanpos = token; try { return !jj_3_1(); } @@ -431,28 +602,76 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst finally { jj_save(0, xla); } } - private boolean jj_3R_3() { - if (jj_scan_token(STAR)) return true; - if (jj_scan_token(COLON)) return true; + private boolean jj_2_2(int xla) { + jj_la = xla; jj_lastpos = jj_scanpos = token; + try { return !jj_3_2(); } + catch(LookaheadSuccess ls) { return true; } + finally { jj_save(1, xla); } + } + + private boolean jj_2_3(int xla) { + jj_la = xla; jj_lastpos = jj_scanpos = token; + try { return !jj_3_3(); } + catch(LookaheadSuccess ls) { return true; } + finally { jj_save(2, xla); } + } + + private boolean jj_3R_7() { + if (jj_scan_token(TERM)) return true; return false; } - private boolean jj_3R_2() { + private boolean jj_3R_4() { if (jj_scan_token(TERM)) return true; if (jj_scan_token(COLON)) return true; return false; } private boolean jj_3_1() { + if (jj_3R_3()) return true; + return false; + } + + private boolean jj_3R_6() { + return false; + } + + private boolean jj_3R_3() { + if (jj_scan_token(TERM)) return true; + jj_lookingAhead = true; + jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind); + jj_lookingAhead = false; + if (!jj_semLA || jj_3R_6()) return true; + Token xsp; + if (jj_3R_7()) return true; + while (true) { + xsp = jj_scanpos; + if (jj_3R_7()) { jj_scanpos = xsp; break; } + } + return false; + } + + private boolean jj_3_3() { Token xsp; xsp = jj_scanpos; - if (jj_3R_2()) { + if (jj_3R_4()) { jj_scanpos = xsp; - if (jj_3R_3()) return true; + if (jj_3R_5()) return true; } return false; } + private boolean jj_3_2() { + if (jj_3R_3()) return true; + return false; + } + + private boolean jj_3R_5() { + if (jj_scan_token(STAR)) return true; + if (jj_scan_token(COLON)) return true; + return false; + } + /** Generated Token Manager. */ public QueryParserTokenManager token_source; /** Current token. */ @@ -462,8 +681,11 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst private int jj_ntk; private Token jj_scanpos, jj_lastpos; private int jj_la; + /** Whether we are looking ahead. */ + private boolean jj_lookingAhead = false; + private boolean jj_semLA; private int jj_gen; - final private int[] jj_la1 = new int[23]; + final private int[] jj_la1 = new int[28]; static private int[] jj_la1_0; static private int[] jj_la1_1; static { @@ -471,12 +693,12 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst jj_la1_init_1(); } private static void jj_la1_init_0() { - jj_la1_0 = new int[] {0x6000,0x6000,0x38000,0x38000,0xfb4fe000,0x2400000,0x800000,0x800000,0x800000,0xfb4c0000,0x3a440000,0x4000000,0x4000000,0x800000,0xc0000000,0x0,0x0,0x0,0x0,0x800000,0x4000000,0x800000,0xfb440000,}; + jj_la1_0 = new int[] {0x6000,0x6000,0x38000,0x38000,0xfb4f8000,0xfb4fe000,0xfb4fe000,0x2400000,0x800000,0x800000,0x800000,0xfb4c0000,0x3a440000,0x4000000,0x800000,0x4800000,0x4800000,0xc0000000,0x0,0x0,0x0,0x0,0x800000,0x4000000,0x800000,0x4800000,0x4800000,0xfb440000,}; } private static void jj_la1_init_1() { - jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x7,0x0,0x0,0x0,0x0,0x7,0x4,0x0,0x0,0x0,0x0,0xc0,0x8,0xc0,0x30,0x0,0x0,0x0,0x4,}; + jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x7,0x7,0x7,0x0,0x0,0x0,0x0,0x7,0x4,0x0,0x0,0x0,0x0,0x0,0xc0,0x8,0xc0,0x30,0x0,0x0,0x0,0x0,0x0,0x4,}; } - final private JJCalls[] jj_2_rtns = new JJCalls[1]; + final private JJCalls[] jj_2_rtns = new JJCalls[3]; private boolean jj_rescan = false; private int jj_gc = 0; @@ -486,7 +708,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 23; i++) jj_la1[i] = -1; + for (int i = 0; i < 28; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } @@ -495,8 +717,9 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst token_source.ReInit(stream); token = new Token(); jj_ntk = -1; + jj_lookingAhead = false; jj_gen = 0; - for (int i = 0; i < 23; i++) jj_la1[i] = -1; + for (int i = 0; i < 28; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } @@ -506,7 +729,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 23; i++) jj_la1[i] = -1; + for (int i = 0; i < 28; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } @@ -516,7 +739,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst token = new Token(); jj_ntk = -1; jj_gen = 0; - for (int i = 0; i < 23; i++) jj_la1[i] = -1; + for (int i = 0; i < 28; i++) jj_la1[i] = -1; for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); } @@ -579,7 +802,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst /** Get the specific Token. */ final public Token getToken(int index) { - Token t = token; + Token t = jj_lookingAhead ? jj_scanpos : token; for (int i = 0; i < index; i++) { if (t.next != null) t = t.next; else t = t.next = token_source.getNextToken(); @@ -633,7 +856,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst la1tokens[jj_kind] = true; jj_kind = -1; } - for (int i = 0; i < 23; i++) { + for (int i = 0; i < 28; i++) { if (jj_la1[i] == jj_gen) { for (int j = 0; j < 32; j++) { if ((jj_la1_0[i] & (1<{@value #DEFAULT_SPLIT_ON_WHITESPACE}. + */ + public void setSplitOnWhitespace(boolean splitOnWhitespace) { + this.splitOnWhitespace = splitOnWhitespace; + } + + private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE; + private static Set disallowedPostMultiTerm + = new HashSet(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR)); + private static boolean allowedPostMultiTerm(int tokenKind) { + return disallowedPostMultiTerm.contains(tokenKind) == false; + } + + @Override + protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, + boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError { + if ((getAutoGeneratePhraseQueries() || fieldAutoGenPhraseQueries) && splitOnWhitespace == false) { + throw new QueryParserConfigurationException + ("Field '" + field + "': autoGeneratePhraseQueries == true is disallowed when sow/splitOnWhitespace == false"); + } + return super.newFieldQuery(analyzer, field, queryText, quoted, fieldAutoGenPhraseQueries); } } @@ -63,17 +97,15 @@ TOKEN_MGR_DECLS : { /* ***************** */ <*> TOKEN : { - <#_NUM_CHAR: ["0"-"9"] > - // every character that follows a backslash is considered as an escaped character - | <#_ESCAPED_CHAR: "\\" ~[] > - | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^", - "[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ] - | <_ESCAPED_CHAR> ) > - | <#_TERM_CHAR: ( <_TERM_START_CHAR> - | <_ESCAPED_CHAR> | "-" | "+" | "/" | "!") > - | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") > - | <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) > - | <#_SQUOTED_CHAR: ( ~[ "'", "\\" ] | <_ESCAPED_CHAR> ) > + <#_NUM_CHAR: ["0"-"9"] > +| <#_ESCAPED_CHAR: "\\" ~[] > // every character that follows a backslash is considered as an escaped character +| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^", + "[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ] + | <_ESCAPED_CHAR> ) > +| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" | "/" | "!") > +| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") > +| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) > +| <#_SQUOTED_CHAR: ( ~[ "'", "\\" ] | <_ESCAPED_CHAR> ) > } SKIP : { @@ -93,44 +125,43 @@ TOKEN_MGR_DECLS : { < <_WHITESPACE>> } - TOKEN : { - - | - | - | - | - | > - | - | - | - | - | : Boost - | )* "\""> - | (<_TERM_CHAR>)* > - | )+ ( "." (<_NUM_CHAR>)+ )? )? > - | (<_TERM_CHAR>)* "*" ) > - | | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > - | - | : Range - | : Range + +| +| +| +| +| > +| +| +| +| +| : Boost +| )* "\""> +| (<_TERM_CHAR>)* > +| )+ ( "." (<_NUM_CHAR>)+ )? )? > +| (<_TERM_CHAR>)* "*" ) > +| | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > +| +| : Range +| : Range // TODO: consider using token states instead of inlining SQUOTED -// | )* "'"> -// | )* (~["=","}"])+ ( "=" ( | | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* > - | )* (~["=","}"])+ ( "=" ( | ("'" (<_SQUOTED_CHAR>)* "'") | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* > - | +// | )* "'"> +// | )* (~["=","}"])+ ( "=" ( | | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* > +| )* (~["=","}"])+ ( "=" ( | ("'" (<_SQUOTED_CHAR>)* "'") | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* > +| } TOKEN : { - )+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT + )+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT } TOKEN : { - - | : DEFAULT - | : DEFAULT - | - | + +| : DEFAULT +| : DEFAULT +| +| } // * Query ::= ( Clause )* @@ -160,8 +191,7 @@ int Modifiers() : { } // This makes sure that there is no garbage after the query string -Query TopLevelQuery(String field) throws SyntaxError : -{ +Query TopLevelQuery(String field) throws SyntaxError : { Query q; } { @@ -174,27 +204,31 @@ Query TopLevelQuery(String field) throws SyntaxError : Query Query(String field) throws SyntaxError : { List clauses = new ArrayList(); - Query q, firstQuery=null; + Query q; int conj, mods; } { - mods=Modifiers() q=Clause(field) - { - addClause(clauses, CONJ_NONE, mods, q); - if (mods == MOD_NONE) - firstQuery=q; - } ( - conj=Conjunction() mods=Modifiers() q=Clause(field) - { addClause(clauses, conj, mods, q); } + LOOKAHEAD(2) + MultiTerm(field, clauses) + | mods=Modifiers() q=Clause(field) + { addClause(clauses, CONJ_NONE, mods, q); } + ) + ( + LOOKAHEAD(2) + MultiTerm(field, clauses) + | conj=Conjunction() mods=Modifiers() q=Clause(field) + { addClause(clauses, conj, mods, q); } )* - { - if (clauses.size() == 1 && firstQuery != null) + { + if (clauses.size() == 1 && clauses.get(0).getOccur() == BooleanClause.Occur.SHOULD) { + Query firstQuery = clauses.get(0).getQuery(); + if ( ! (firstQuery instanceof RawQuery) || ((RawQuery)firstQuery).getTermCount() == 1) { return rawToNormal(firstQuery); - else { - return getBooleanQuery(clauses); } } + return getBooleanQuery(clauses); + } } Query Clause(String field) throws SyntaxError : { @@ -204,26 +238,22 @@ Query Clause(String field) throws SyntaxError : { int flags = 0; } { - [ LOOKAHEAD(2) ( - fieldToken= {field=discardEscapeChar(fieldToken.image);} - | {field="*";} + fieldToken= { field = discardEscapeChar(fieldToken.image); } + | { field = "*"; } ) ] - - ( q=Term(field) - | q=Query(field) ( boost=)? - | ( { flags=startFilter(); } q=Query(field) ( boost=)? { q=getFilter(q); restoreFlags(flags); } ) - | (localParams = ( boost=)? { q=getLocalParams(field, localParams.image); } ) + | q=Query(field) [ boost= ] + | ( { flags=startFilter(); } q=Query(field) [ boost= ] { q=getFilter(q); restoreFlags(flags); } ) + | (localParams = [ boost= ] { q=getLocalParams(field, localParams.image); } ) ) - { return handleBoost(q, boost); } + { return handleBoost(q, boost); } } - Query Term(String field) throws SyntaxError : { Token term, boost=null, fuzzySlop=null, goop1, goop2; boolean prefix = false; @@ -245,40 +275,78 @@ Query Term(String field) throws SyntaxError : { | term= | term= { term.image = term.image.substring(0,1); } ) - [ fuzzySlop= { fuzzy=true; } ] - [ boost= [ fuzzySlop= { fuzzy=true; } ] ] + [ + boost= [ fuzzySlop= { fuzzy=true; } ] + | fuzzySlop= { fuzzy=true; } [ boost= ] + ] + { q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp); } + + | ( { startInc = true; } | ) + ( goop1= | goop1= ) + [ ] + ( goop2= | goop2= ) + ( { endInc = true; } | ) + [ boost= ] { - q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp); - } - | ( ( {startInc=true;} | ) - ( goop1=|goop1= ) - [ ] - ( goop2=|goop2= ) - ( {endInc=true;} | )) - [ boost= ] - { - boolean startOpen=false; - boolean endOpen=false; - if (goop1.kind == RANGE_QUOTED) { - goop1.image = goop1.image.substring(1, goop1.image.length()-1); - } else if ("*".equals(goop1.image)) { - startOpen=true; - } - if (goop2.kind == RANGE_QUOTED) { - goop2.image = goop2.image.substring(1, goop2.image.length()-1); - } else if ("*".equals(goop2.image)) { - endOpen=true; - } - q = getRangeQuery(getField(field), startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc); - } - | term= - [ fuzzySlop= ] - [ boost= ] - { - q = handleQuotedTerm(getField(field), term, fuzzySlop); + boolean startOpen=false; + boolean endOpen=false; + if (goop1.kind == RANGE_QUOTED) { + goop1.image = goop1.image.substring(1, goop1.image.length()-1); + } else if ("*".equals(goop1.image)) { + startOpen=true; } + if (goop2.kind == RANGE_QUOTED) { + goop2.image = goop2.image.substring(1, goop2.image.length()-1); + } else if ("*".equals(goop2.image)) { + endOpen=true; + } + q = getRangeQuery(getField(field), + startOpen ? null : discardEscapeChar(goop1.image), + endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc); + } + | term= + [ + boost= [ fuzzySlop= { fuzzy=true; } ] + | fuzzySlop= { fuzzy=true; } [ boost= ] + ] + { q = handleQuotedTerm(getField(field), term, fuzzySlop); } ) + { return handleBoost(q, boost); } +} + +void MultiTerm(String field, List clauses) throws SyntaxError : { + Token text; + List terms = null; +} +{ + text= { - return handleBoost(q, boost); + if (splitOnWhitespace) { + Query q = getFieldQuery(getField(field), discardEscapeChar(text.image), false, true); + addClause(clauses, CONJ_NONE, MOD_NONE, q); + } else { + terms = new ArrayList(); + terms.add(discardEscapeChar(text.image)); + } + } + // Both lookaheads are required; the first lookahead vets the first following term and the second lookahead vets the rest + LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) }) + ( + LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) }) + text= + { + if (splitOnWhitespace) { + Query q = getFieldQuery(getField(field), discardEscapeChar(text.image), false, true); + addClause(clauses, CONJ_NONE, MOD_NONE, q); + } else { + terms.add(discardEscapeChar(text.image)); + } + } + )+ + { + if (splitOnWhitespace == false) { + Query q = getFieldQuery(getField(field), terms, true); + addMultiTermClause(clauses, q); + } } } diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java index cb3b1eedf05..08ccdd11b58 100644 --- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java +++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java @@ -18,10 +18,12 @@ package org.apache.solr.parser; import java.io.StringReader; import java.util.ArrayList; +import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.reverse.ReverseStringFilter; @@ -41,7 +43,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.QueryBuilder; -import org.apache.lucene.util.Version; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Operations; @@ -59,7 +60,7 @@ import org.apache.solr.search.SolrConstantScoreQuery; import org.apache.solr.search.SyntaxError; /** This class is overridden by QueryParser in QueryParser.jj - * and acts to separate the majority of the Java code from the .jj grammar file. + * and acts to separate the majority of the Java code from the .jj grammar file. */ public abstract class SolrQueryParserBase extends QueryBuilder { @@ -83,7 +84,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder { public static final Operator OR_OPERATOR = Operator.OR; /** The default operator that parser uses to combine query terms */ - Operator operator = OR_OPERATOR; + protected Operator operator = OR_OPERATOR; MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_REWRITE; boolean allowLeadingWildcard = true; @@ -133,16 +134,32 @@ public abstract class SolrQueryParserBase extends QueryBuilder { // internal: A simple raw fielded query public static class RawQuery extends Query { final SchemaField sfield; - final String externalVal; + private final List externalVals; public RawQuery(SchemaField sfield, String externalVal) { + this(sfield, Collections.singletonList(externalVal)); + } + + public RawQuery(SchemaField sfield, List externalVals) { this.sfield = sfield; - this.externalVal = externalVal; + this.externalVals = externalVals; + } + + public int getTermCount() { + return externalVals.size(); + } + + public List getExternalVals() { + return externalVals; + } + + public String getJoinedExternalVal() { + return externalVals.size() == 1 ? externalVals.get(0) : String.join(" ", externalVals); } @Override public String toString(String field) { - return "RAW(" + field + "," + externalVal + ")"; + return "RAW(" + field + "," + getJoinedExternalVal() + ")"; } @Override @@ -165,7 +182,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder { public abstract Query TopLevelQuery(String field) throws ParseException, SyntaxError; - public void init(Version matchVersion, String defaultField, QParser parser) { + public void init(String defaultField, QParser parser) { this.schema = parser.getReq().getSchema(); this.parser = parser; this.flags = parser.getFlags(); @@ -406,17 +423,30 @@ public abstract class SolrQueryParserBase extends QueryBuilder { throw new RuntimeException("Clause cannot be both required and prohibited"); } + /** + * Called from QueryParser's MultiTerm rule. + * Assumption: no conjunction or modifiers (conj == CONJ_NONE and mods == MOD_NONE) + */ + protected void addMultiTermClause(List clauses, Query q) { + // We might have been passed a null query; the term might have been + // filtered away by the analyzer. + if (q == null) { + return; + } + clauses.add(newBooleanClause(q, operator == AND_OPERATOR ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD)); + } - - protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted) throws SyntaxError { + protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, + boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError { BooleanClause.Occur occur = operator == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; - return createFieldQuery(analyzer, occur, field, queryText, quoted || autoGeneratePhraseQueries, phraseSlop); + return createFieldQuery(analyzer, occur, field, queryText, + quoted || fieldAutoGenPhraseQueries || autoGeneratePhraseQueries, phraseSlop); } /** - * Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}. + * Base implementation delegates to {@link #getFieldQuery(String,String,boolean,boolean)}. * This method may be overridden, for example, to return * a SpanNearQuery instead of a PhraseQuery. * @@ -440,7 +470,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder { query = builder.build(); } else if (query instanceof MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery)query; - + if (slop != mpq.getSlop()) { query = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build(); } @@ -492,7 +522,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder { protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) { // FuzzyQuery doesn't yet allow constant score rewrite String text = term.text(); - int numEdits = FuzzyQuery.floatToEdits(minimumSimilarity, + int numEdits = FuzzyQuery.floatToEdits(minimumSimilarity, text.codePointCount(0, text.length())); return new FuzzyQuery(term,numEdits,prefixLength); } @@ -536,14 +566,21 @@ public abstract class SolrQueryParserBase extends QueryBuilder { } SchemaField sfield = null; - List fieldValues = null; + List fieldValues = null; - - boolean useTermsQuery = (flags & QParser.FLAG_FILTER)!=0 && clauses.size() > TERMS_QUERY_THRESHOLD; - int clausesAdded = 0; + boolean onlyRawQueries = true; + int allRawQueriesTermCount = 0; + for (BooleanClause clause : clauses) { + if (clause.getQuery() instanceof RawQuery) { + allRawQueriesTermCount += ((RawQuery)clause.getQuery()).getTermCount(); + } else { + onlyRawQueries = false; + } + } + boolean useTermsQuery = (flags & QParser.FLAG_FILTER)!=0 && allRawQueriesTermCount > TERMS_QUERY_THRESHOLD; BooleanQuery.Builder booleanBuilder = newBooleanQuery(); - Map> fmap = new HashMap<>(); + Map> fmap = new HashMap<>(); for (BooleanClause clause : clauses) { Query subq = clause.getQuery(); @@ -563,14 +600,14 @@ public abstract class SolrQueryParserBase extends QueryBuilder { // If this field isn't indexed, or if it is indexed and we want to use TermsQuery, then collect this value. // We are currently relying on things like PointField not being marked as indexed in order to bypass // the "useTermQuery" check. - if (fieldValues == null && useTermsQuery || !sfield.indexed()) { + if ((fieldValues == null && useTermsQuery) || !sfield.indexed()) { fieldValues = new ArrayList<>(2); fmap.put(sfield, fieldValues); } } if (fieldValues != null) { - fieldValues.add(rawq.externalVal); + fieldValues.add(rawq); continue; } @@ -578,33 +615,50 @@ public abstract class SolrQueryParserBase extends QueryBuilder { } } - clausesAdded++; booleanBuilder.add(clause); } - for (Map.Entry> entry : fmap.entrySet()) { + for (Map.Entry> entry : fmap.entrySet()) { sfield = entry.getKey(); fieldValues = entry.getValue(); FieldType ft = sfield.getType(); // TODO: pull more of this logic out to FieldType? We would need to be able to add clauses to our existing booleanBuilder. - if (sfield.indexed() && fieldValues.size() < TERMS_QUERY_THRESHOLD || fieldValues.size() == 1) { + int termCount = fieldValues.stream().mapToInt(RawQuery::getTermCount).sum(); + if ((sfield.indexed() && termCount < TERMS_QUERY_THRESHOLD) || termCount == 1) { // use boolean query instead - for (String externalVal : fieldValues) { - Query subq = ft.getFieldQuery(this.parser, sfield, externalVal); - clausesAdded++; - booleanBuilder.add(subq, BooleanClause.Occur.SHOULD); + for (RawQuery rawq : fieldValues) { + Query subq; + if (ft.isTokenized() && sfield.indexed()) { + boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries(); + subq = newFieldQuery(getAnalyzer(), sfield.getName(), rawq.getJoinedExternalVal(), + false, fieldAutoGenPhraseQueries); + booleanBuilder.add(subq, BooleanClause.Occur.SHOULD); + } else { + for (String externalVal : rawq.getExternalVals()) { + subq = ft.getFieldQuery(this.parser, sfield, externalVal); + booleanBuilder.add(subq, BooleanClause.Occur.SHOULD); + } + } } } else { - Query subq = ft.getSetQuery(this.parser, sfield, fieldValues); - if (fieldValues.size() == clauses.size()) return subq; // if this is everything, don't wrap in a boolean query - clausesAdded++; + List externalVals + = fieldValues.stream().flatMap(rawq -> rawq.getExternalVals().stream()).collect(Collectors.toList()); + Query subq = ft.getSetQuery(this.parser, sfield, externalVals); + if (onlyRawQueries && termCount == allRawQueriesTermCount) return subq; // if this is everything, don't wrap in a boolean query booleanBuilder.add(subq, BooleanClause.Occur.SHOULD); } } - return booleanBuilder.build(); + BooleanQuery bq = booleanBuilder.build(); + if (bq.clauses().size() == 1) { // Unwrap single SHOULD query + BooleanClause clause = bq.clauses().iterator().next(); + if (clause.getOccur() == BooleanClause.Occur.SHOULD) { + return clause.getQuery(); + } + } + return bq; } @@ -835,9 +889,26 @@ public abstract class SolrQueryParserBase extends QueryBuilder { // Create a "normal" query from a RawQuery (or just return the current query if it's not raw) Query rawToNormal(Query q) { - if (!(q instanceof RawQuery)) return q; - RawQuery rq = (RawQuery)q; - return rq.sfield.getType().getFieldQuery(parser, rq.sfield, rq.externalVal); + Query normal = q; + if (q instanceof RawQuery) { + RawQuery rawq = (RawQuery)q; + if (rawq.sfield.getType().isTokenized()) { + normal = rawq.sfield.getType().getFieldQuery(parser, rawq.sfield, rawq.getJoinedExternalVal()); + } else { + FieldType ft = rawq.sfield.getType(); + if (rawq.getTermCount() == 1) { + normal = ft.getFieldQuery(this.parser, rawq.sfield, rawq.getExternalVals().get(0)); + } else { + BooleanQuery.Builder booleanBuilder = newBooleanQuery(); + for (String externalVal : rawq.getExternalVals()) { + Query subq = ft.getFieldQuery(this.parser, rawq.sfield, externalVal); + booleanBuilder.add(subq, BooleanClause.Occur.SHOULD); + } + normal = booleanBuilder.build(); + } + } + } + return normal; } protected Query getFieldQuery(String field, String queryText, boolean quoted) throws SyntaxError { @@ -877,21 +948,87 @@ public abstract class SolrQueryParserBase extends QueryBuilder { FieldType ft = sf.getType(); // delegate to type for everything except tokenized fields if (ft.isTokenized() && sf.indexed()) { - return newFieldQuery(getAnalyzer(), field, queryText, quoted || (ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries())); + boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries(); + return newFieldQuery(getAnalyzer(), field, queryText, quoted, fieldAutoGenPhraseQueries); } else { if (raw) { return new RawQuery(sf, queryText); } else { - return sf.getType().getFieldQuery(parser, sf, queryText); + return ft.getFieldQuery(parser, sf, queryText); } } } // default to a normal field query - return newFieldQuery(getAnalyzer(), field, queryText, quoted); + return newFieldQuery(getAnalyzer(), field, queryText, quoted, false); } - protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){ + // Assumption: quoted is always false + protected Query getFieldQuery(String field, List queryTerms, boolean raw) throws SyntaxError { + checkNullField(field); + + SchemaField sf; + if (field.equals(lastFieldName)) { + // only look up the SchemaField on a field change... this helps with memory allocation of dynamic fields + // and large queries like foo_i:(1 2 3 4 5 6 7 8 9 10) when we are passed "foo_i" each time. + sf = lastField; + } else { + // intercept magic field name of "_" to use as a hook for our + // own functions. + if (field.charAt(0) == '_' && parser != null) { + MagicFieldName magic = MagicFieldName.get(field); + if (null != magic) { + subQParser = parser.subQuery(String.join(" ", queryTerms), magic.subParser); + return subQParser.getQuery(); + } + } + + lastFieldName = field; + sf = lastField = schema.getFieldOrNull(field); + } + + if (sf != null) { + FieldType ft = sf.getType(); + // delegate to type for everything except tokenized fields + if (ft.isTokenized() && sf.indexed()) { + String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms); + boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries(); + return newFieldQuery(getAnalyzer(), field, queryText, false, fieldAutoGenPhraseQueries); + } else { + if (raw) { + return new RawQuery(sf, queryTerms); + } else { + if (queryTerms.size() == 1) { + return ft.getFieldQuery(parser, sf, queryTerms.get(0)); + } else { + List subqs = new ArrayList<>(); + for (String queryTerm : queryTerms) { + try { + subqs.add(ft.getFieldQuery(parser, sf, queryTerm)); + } catch (Exception e) { // assumption: raw = false only when called from ExtendedDismaxQueryParser.getQuery() + // for edismax: ignore parsing failures + } + } + if (subqs.size() == 1) { + return subqs.get(0); + } else { // delay building boolean query until we must + final BooleanClause.Occur occur + = operator == AND_OPERATOR ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; + BooleanQuery.Builder booleanBuilder = newBooleanQuery(); + subqs.forEach(subq -> booleanBuilder.add(subq, occur)); + return booleanBuilder.build(); + } + } + } + } + } + + // default to a normal field query + String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms); + return newFieldQuery(getAnalyzer(), field, queryText, false, false); + } + + protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){ checkNullField(field); SchemaField sf = schema.getField(field); diff --git a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java index ebb6188ec18..c0aee881c93 100644 --- a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java +++ b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java @@ -17,6 +17,7 @@ package org.apache.solr.search; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -160,6 +161,8 @@ public class ExtendedDismaxQParser extends QParser { // but always for unstructured implicit bqs created by getFieldQuery up.minShouldMatch = config.minShouldMatch; + + up.setSplitOnWhitespace(config.splitOnWhitespace); parsedUserQuery = parseOriginalQuery(up, mainUserQuery, clauses, config); @@ -307,6 +310,8 @@ public class ExtendedDismaxQParser extends QParser { up.setRemoveStopFilter(true); query = up.parse(mainUserQuery); } + } catch (QueryParserConfigurationException e) { + throw e; // Don't ignore configuration exceptions } catch (Exception e) { // ignore failure and reparse later after escaping reserved chars up.exceptions = false; @@ -545,6 +550,7 @@ public class ExtendedDismaxQParser extends QParser { pp.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, getFieldBoosts(fields)); pp.setPhraseSlop(slop); pp.setRemoveStopFilter(true); // remove stop filter and keep stopwords + pp.setSplitOnWhitespace(config.splitOnWhitespace); /* :TODO: reevaluate using makeDismax=true vs false... * @@ -976,6 +982,7 @@ public class ExtendedDismaxQParser extends QParser { private String field; private String val; private String val2; + private List vals; private boolean bool; private boolean bool2; private float flt; @@ -1036,6 +1043,7 @@ public class ExtendedDismaxQParser extends QParser { this.type = quoted ? QType.PHRASE : QType.FIELD; this.field = field; this.val = val; + this.vals = null; this.slop = getPhraseSlop(); // unspecified return getAliasedQuery(); } @@ -1045,10 +1053,21 @@ public class ExtendedDismaxQParser extends QParser { this.type = QType.PHRASE; this.field = field; this.val = val; + this.vals = null; this.slop = slop; return getAliasedQuery(); } - + + @Override + protected Query getFieldQuery(String field, List queryTerms, boolean raw) throws SyntaxError { + this.type = QType.FIELD; + this.field = field; + this.val = null; + this.vals = queryTerms; + this.slop = getPhraseSlop(); + return getAliasedMultiTermQuery(queryTerms); + } + @Override protected Query getPrefixQuery(String field, String val) throws SyntaxError { if (val.equals("") && field.equals("*")) { @@ -1057,11 +1076,17 @@ public class ExtendedDismaxQParser extends QParser { this.type = QType.PREFIX; this.field = field; this.val = val; + this.vals = null; return getAliasedQuery(); } @Override - protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted) throws SyntaxError { + protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, + boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError { + if ((getAutoGeneratePhraseQueries() || fieldAutoGenPhraseQueries) && getSplitOnWhitespace() == false) { + throw new QueryParserConfigurationException + ("Field '" + field + "': autoGeneratePhraseQueries == true is disallowed when sow/splitOnWhitespace == false"); + } Analyzer actualAnalyzer; if (removeStopFilter) { if (nonStopFilterAnalyzerPerField == null) { @@ -1074,7 +1099,7 @@ public class ExtendedDismaxQParser extends QParser { } else { actualAnalyzer = parser.getReq().getSchema().getFieldType(field).getQueryAnalyzer(); } - return super.newFieldQuery(actualAnalyzer, field, queryText, quoted); + return super.newFieldQuery(actualAnalyzer, field, queryText, quoted, fieldAutoGenPhraseQueries); } @Override @@ -1083,6 +1108,7 @@ public class ExtendedDismaxQParser extends QParser { this.field = field; this.val = a; this.val2 = b; + this.vals = null; this.bool = startInclusive; this.bool2 = endInclusive; return getAliasedQuery(); @@ -1100,6 +1126,7 @@ public class ExtendedDismaxQParser extends QParser { this.type = QType.WILDCARD; this.field = field; this.val = val; + this.vals = null; return getAliasedQuery(); } @@ -1108,6 +1135,7 @@ public class ExtendedDismaxQParser extends QParser { this.type = QType.FUZZY; this.field = field; this.val = val; + this.vals = null; this.flt = minSimilarity; return getAliasedQuery(); } @@ -1157,7 +1185,129 @@ public class ExtendedDismaxQParser extends QParser { return getQuery(); } } - + + /** + * Delegates to the super class unless the field has been specified + * as an alias -- in which case we recurse on each of + * the aliased fields, and the results are composed into a + * DisjunctionMaxQuery. (so yes: aliases which point at other + * aliases should work) + */ + protected Query getAliasedMultiTermQuery(List queryTerms) throws SyntaxError { + Alias a = aliases.get(field); + this.validateCyclicAliasing(field); + if (a != null) { + List lst = getQueries(a); + if (lst == null || lst.size() == 0) { + return getQuery(); + } + + // make a DisjunctionMaxQuery in this case too... it will stop + // the "mm" processing from making everything required in the case + // that the query expanded to multiple clauses. + // DisMaxQuery.rewrite() removes itself if there is just a single clause anyway. + // if (lst.size()==1) return lst.get(0); + if (makeDismax) { + if (lst.get(0) instanceof BooleanQuery && allSameQueryStructure(lst)) { + BooleanQuery.Builder q = new BooleanQuery.Builder(); + List subs = new ArrayList<>(lst.size()); + for (int c = 0 ; c < ((BooleanQuery)lst.get(0)).clauses().size() ; ++c) { + subs.clear(); + // Make a dismax query for each clause position in the boolean per-field queries. + for (int n = 0 ; n < lst.size() ; ++n) { + subs.add(((BooleanQuery)lst.get(n)).clauses().get(c).getQuery()); + } + q.add(newBooleanClause(new DisjunctionMaxQuery(subs, a.tie), BooleanClause.Occur.SHOULD)); + } + return q.build(); + } else { + return new DisjunctionMaxQuery(lst, a.tie); + } + } else { + BooleanQuery.Builder q = new BooleanQuery.Builder(); + for (Query sub : lst) { + q.add(sub, BooleanClause.Occur.SHOULD); + } + return q.build(); + } + } else { + // verify that a fielded query is actually on a field that exists... if not, + // then throw an exception to get us out of here, and we'll treat it like a + // literal when we try the escape+re-parse. + if (exceptions) { + FieldType ft = schema.getFieldTypeNoEx(field); + if (ft == null && null == MagicFieldName.get(field)) { + throw unknownField; + } + } + return getQuery(); + } + } + + /** Recursively examines the given query list for identical structure in all queries. */ + private boolean allSameQueryStructure(List lst) { + boolean allSame = true; + Query firstQuery = lst.get(0); + for (int n = 1 ; n < lst.size(); ++n) { + Query nthQuery = lst.get(n); + if (nthQuery.getClass() != firstQuery.getClass()) { + allSame = false; + break; + } + if (firstQuery instanceof BooleanQuery) { + List firstBooleanClauses = ((BooleanQuery)firstQuery).clauses(); + List nthBooleanClauses = ((BooleanQuery)nthQuery).clauses(); + if (firstBooleanClauses.size() != nthBooleanClauses.size()) { + allSame = false; + break; + } + for (int c = 0 ; c < firstBooleanClauses.size() ; ++c) { + if (nthBooleanClauses.get(c).getQuery().getClass() != firstBooleanClauses.get(c).getQuery().getClass() + || nthBooleanClauses.get(c).getOccur() != firstBooleanClauses.get(c).getOccur()) { + allSame = false; + break; + } + if (firstBooleanClauses.get(c).getQuery() instanceof BooleanQuery && ! allSameQueryStructure + (Arrays.asList(firstBooleanClauses.get(c).getQuery(), nthBooleanClauses.get(c).getQuery()))) { + allSame = false; + break; + } + } + } + } + return allSame; + } + + @Override + protected void addMultiTermClause(List clauses, Query q) { + // We might have been passed a null query; the terms might have been filtered away by the analyzer. + if (q == null) { + return; + } + + boolean required = operator == AND_OPERATOR; + BooleanClause.Occur occur = required ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; + + if (q instanceof BooleanQuery) { + boolean allOptionalDisMaxQueries = true; + for (BooleanClause c : ((BooleanQuery)q).clauses()) { + if (c.getOccur() != BooleanClause.Occur.SHOULD || ! (c.getQuery() instanceof DisjunctionMaxQuery)) { + allOptionalDisMaxQueries = false; + break; + } + } + if (allOptionalDisMaxQueries) { + // getAliasedMultiTermQuery() constructed a BooleanQuery containing only SHOULD DisjunctionMaxQuery-s. + // Unwrap the query and add a clause for each contained DisMax query. + for (BooleanClause c : ((BooleanQuery)q).clauses()) { + clauses.add(newBooleanClause(c.getQuery(), occur)); + } + return; + } + } + clauses.add(newBooleanClause(q, occur)); + } + /** * Validate there is no cyclic referencing in the aliasing */ @@ -1212,7 +1362,12 @@ public class ExtendedDismaxQParser extends QParser { switch (type) { case FIELD: // fallthrough case PHRASE: - Query query = super.getFieldQuery(field, val, type == QType.PHRASE, false); + Query query; + if (val == null) { + query = super.getFieldQuery(field, vals, false); + } else { + query = super.getFieldQuery(field, val, type == QType.PHRASE, false); + } // Boolean query on a whitespace-separated string // If these were synonyms we would have a SynonymQuery if (query instanceof BooleanQuery) { @@ -1248,6 +1403,8 @@ public class ExtendedDismaxQParser extends QParser { } return null; + } catch (QueryParserConfigurationException e) { + throw e; // Don't ignore configuration exceptions } catch (Exception e) { // an exception here is due to the field query not being compatible with the input text // for example, passing a string to a numeric field. @@ -1442,7 +1599,7 @@ public class ExtendedDismaxQParser extends QParser { */ public class ExtendedDismaxConfiguration { - /** + /** * The field names specified by 'qf' that (most) clauses will * be queried against */ @@ -1478,7 +1635,9 @@ public class ExtendedDismaxQParser extends QParser { protected boolean lowercaseOperators; protected String[] boostFuncs; - + + protected boolean splitOnWhitespace; + public ExtendedDismaxConfiguration(SolrParams localParams, SolrParams params, SolrQueryRequest req) { solrParams = SolrParams.wrapDefaults(localParams, params); @@ -1522,6 +1681,8 @@ public class ExtendedDismaxQParser extends QParser { boostFuncs = solrParams.getParams(DisMaxParams.BF); multBoosts = solrParams.getParams(DMP.MULT_BOOST); + + splitOnWhitespace = solrParams.getBool(QueryParsing.SPLIT_ON_WHITESPACE, SolrQueryParser.DEFAULT_SPLIT_ON_WHITESPACE); } /** * diff --git a/solr/core/src/java/org/apache/solr/search/LuceneQParser.java b/solr/core/src/java/org/apache/solr/search/LuceneQParser.java index 9ac318b9ad4..9668d8f8e4a 100644 --- a/solr/core/src/java/org/apache/solr/search/LuceneQParser.java +++ b/solr/core/src/java/org/apache/solr/search/LuceneQParser.java @@ -19,6 +19,7 @@ package org.apache.solr.search; import org.apache.lucene.search.Query; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.StrUtils; import org.apache.solr.request.SolrQueryRequest; /** @@ -46,6 +47,8 @@ public class LuceneQParser extends QParser { lparser.setDefaultOperator (QueryParsing.getQueryParserDefaultOperator(getReq().getSchema(), getParam(QueryParsing.OP))); + lparser.setSplitOnWhitespace(StrUtils.parseBool + (getParam(QueryParsing.SPLIT_ON_WHITESPACE), SolrQueryParser.DEFAULT_SPLIT_ON_WHITESPACE)); return lparser.parse(qstr); } diff --git a/solr/core/src/java/org/apache/solr/search/LuceneQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/LuceneQParserPlugin.java index 07b35ade7ef..4e2a4d6cc13 100644 --- a/solr/core/src/java/org/apache/solr/search/LuceneQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/LuceneQParserPlugin.java @@ -28,6 +28,8 @@ import java.util.List; *
Other parameters:
    *
  • q.op - the default operator "OR" or "AND"
  • *
  • df - the default field name
  • + *
  • sow - split on whitespace prior to analysis, boolean, + * default={@value org.apache.solr.search.SolrQueryParser#DEFAULT_SPLIT_ON_WHITESPACE}
  • *
*
Example: {!lucene q.op=AND df=text sort='price asc'}myfield:foo +bar -baz */ diff --git a/solr/core/src/java/org/apache/solr/search/QueryParserConfigurationException.java b/solr/core/src/java/org/apache/solr/search/QueryParserConfigurationException.java new file mode 100644 index 00000000000..0dd2a338a2c --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/QueryParserConfigurationException.java @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search; + +public class QueryParserConfigurationException extends IllegalArgumentException { + public QueryParserConfigurationException(String message) { + super(message); + } +} diff --git a/solr/core/src/java/org/apache/solr/search/QueryParsing.java b/solr/core/src/java/org/apache/solr/search/QueryParsing.java index fb32c6e934d..692de1a7097 100644 --- a/solr/core/src/java/org/apache/solr/search/QueryParsing.java +++ b/solr/core/src/java/org/apache/solr/search/QueryParsing.java @@ -51,6 +51,7 @@ public class QueryParsing { public static final String F = "f"; // field that a query or command pertains to public static final String TYPE = "type";// parser for this query or command public static final String DEFTYPE = "defType"; // default parser for any direct subqueries + public static final String SPLIT_ON_WHITESPACE = "sow"; // Whether to split on whitespace prior to analysis public static final String LOCALPARAM_START = "{!"; public static final char LOCALPARAM_END = '}'; // true if the value was specified by the "v" param (i.e. v=myval, or v=$param) diff --git a/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java b/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java index 49a492b556c..60ef9fb36c0 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java +++ b/solr/core/src/java/org/apache/solr/search/SolrQueryParser.java @@ -25,7 +25,7 @@ import org.apache.solr.parser.QueryParser; public class SolrQueryParser extends QueryParser { public SolrQueryParser(QParser parser, String defaultField) { - super(parser.getReq().getCore().getSolrConfig().luceneMatchVersion, defaultField, parser); + super(defaultField, parser); } } diff --git a/solr/core/src/test-files/solr/collection1/conf/multiword-synonyms.txt b/solr/core/src/test-files/solr/collection1/conf/multiword-synonyms.txt new file mode 100644 index 00000000000..0ef4d78182c --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/multiword-synonyms.txt @@ -0,0 +1,13 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +US, U.S., U S, USA, U.S.A., U S A, United States, United States of America \ No newline at end of file diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-multiword-synonyms.xml b/solr/core/src/test-files/solr/collection1/conf/schema-multiword-synonyms.xml new file mode 100644 index 00000000000..5544e22a639 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-multiword-synonyms.xml @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + id + + + + + + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/synonyms.txt b/solr/core/src/test-files/solr/collection1/conf/synonyms.txt index b0e31cb7ec8..340abd7847c 100644 --- a/solr/core/src/test-files/solr/collection1/conf/synonyms.txt +++ b/solr/core/src/test-files/solr/collection1/conf/synonyms.txt @@ -29,3 +29,5 @@ Television, Televisions, TV, TVs # Synonym mappings can be used for spelling correction too pixima => pixma +# multiword synonyms +wi fi => wifi diff --git a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java index c3b119f1182..27bf40fb818 100644 --- a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java +++ b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java @@ -16,7 +16,9 @@ */ package org.apache.solr.search; +import java.util.Arrays; import java.util.HashSet; +import java.util.Map; import java.util.Random; import java.util.Set; @@ -32,9 +34,11 @@ import org.apache.solr.common.SolrException; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.TextField; import org.apache.solr.util.SolrPluginUtils; import org.junit.BeforeClass; import org.junit.Test; +import org.noggit.ObjectBuilder; public class TestExtendedDismaxParser extends SolrTestCaseJ4 { @@ -62,7 +66,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 { "foo_i", "8" )); assertU(adoc("id", "47", "trait_ss", "Pig", - "text", "line up and fly directly at the enemy death cannons, clogging them with wreckage!")); + "text_sw", "line up and fly directly at the enemy death cannons, clogging them with wreckage!")); assertU(adoc("id", "48", "text_sw", "this has gigabyte potential", "foo_i","100")); assertU(adoc("id", "49", "text_sw", "start the big apple end", "foo_i","-100")); assertU(adoc("id", "50", "text_sw", "start new big city end")); @@ -88,98 +92,109 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 { assertU(adoc("id", "69", "text_sw", "ties barbie")); assertU(adoc("id", "70", "text_sw", "hair")); assertU(adoc("id", "71", "text_sw", "ties")); + assertU(adoc("id", "72", "text_sw", "wifi ATM")); assertU(commit()); } @Test public void testSyntax() throws Exception { - // a bare * should be treated as *:* - assertJQ(req("defType","edismax", "q","*", "df","doesnotexist_s") - ,"/response/docs/[0]==" // make sure we get something... - ); - assertJQ(req("defType","edismax", "q","doesnotexist_s:*") - ,"/response/numFound==0" // nothing should be found - ); - assertJQ(req("defType","edismax","q","doesnotexist_s:*") - ,"/response/numFound==0" // nothing should be found - ); - assertJQ(req("defType","edismax","q","doesnotexist_s:( * * * )") - ,"/response/numFound==0" // nothing should be found - ); + for (String sow : Arrays.asList("true", "false")) { + // a bare * should be treated as *:* + assertJQ(req("defType", "edismax", "q", "*", "df", "doesnotexist_s", "sow", sow) + , "/response/docs/[0]==" // make sure we get something... + ); + assertJQ(req("defType", "edismax", "q", "doesnotexist_s:*", "sow", sow) + , "/response/numFound==0" // nothing should be found + ); + assertJQ(req("defType", "edismax", "q", "doesnotexist_s:*", "sow", sow) + , "/response/numFound==0" // nothing should be found + ); + assertJQ(req("defType", "edismax", "q", "doesnotexist_s:( * * * )", "sow", sow) + , "/response/numFound==0" // nothing should be found + ); + } } public void testTrailingOperators() throws Exception { - // really just test that exceptions aren't thrown by - // single + - + for (String sow : Arrays.asList("true", "false")) { + // really just test that exceptions aren't thrown by + // single + - - assertJQ(req("defType","edismax", "q","-") - ,"/response=="); + assertJQ(req("defType", "edismax", "q", "-", "df", "text_sw", "sow", sow) + , "/response=="); - assertJQ(req("defType","edismax", "q","+") - ,"/response=="); + assertJQ(req("defType", "edismax", "q", "+", "df", "text_sw", "sow", sow) + , "/response=="); - assertJQ(req("defType","edismax", "q","+ - +") - ,"/response=="); + assertJQ(req("defType", "edismax", "q", "+ - +", "df", "text_sw", "sow", sow) + , "/response=="); - assertJQ(req("defType","edismax", "q","- + -") - ,"/response=="); + assertJQ(req("defType", "edismax", "q", "- + -", "df", "text_sw", "sow", sow) + , "/response=="); - assertJQ(req("defType","edismax", "q","id:47 +") - ,"/response/numFound==1"); + assertJQ(req("defType", "edismax", "q", "id:47 +", "df", "text_sw", "sow", sow) + , "/response/numFound==1"); - assertJQ(req("defType","edismax", "q","id:47 -") - ,"/response/numFound==1"); + assertJQ(req("defType", "edismax", "q", "id:47 -", "df", "text_sw", "sow", sow) + , "/response/numFound==1"); - Random r = random(); - for (int i=0; i<100; i++) { - StringBuilder sb = new StringBuilder(); - for (int j=0; j 0 hits)", req("q", "(line notfound) OR notfound", - "qf", "text", + "qf", "text_sw", "q.op", "AND", "mm", "0%", "defType", "edismax") , "*[count(//doc)=0]"); assertQ("test default operator with mm (OR + 0% => 1 hit)", req("q", "line notfound OR notfound", - "qf", "text", + "qf", "text_sw", "q.op", "OR", "mm", "0%", "defType", "edismax") , "*[count(//doc)=1]"); assertQ("test default operator with mm (OR + 100% => 0 hits)", req("q", "line notfound OR notfound", - "qf", "text", + "qf", "text_sw", "q.op", "OR", "mm", "100%", "defType", "edismax") , "*[count(//doc)=0]"); assertQ("test default operator with mm (OR + 35% => 1 hit)", req("q", "line notfound notfound2 OR notfound", - "qf", "text", + "qf", "text_sw", "q.op", "OR", "mm", "35%", "defType", "edismax") , "*[count(//doc)=1]"); assertQ("test default operator with mm (OR + 75% => 0 hits)", req("q", "line notfound notfound2 OR notfound3", - "qf", "text", + "qf", "text_sw", "q.op", "OR", "mm", "75%", "defType", "edismax") , "*[count(//doc)=0]"); assertQ("test default operator with mm (AND + 0% => 1 hit)", req("q", "(line enemy) OR notfound", - "qf", "text", + "qf", "text_sw", "q.op", "AND", "mm", "0%", "defType", "edismax") , "*[count(//doc)=1]"); assertQ("test default operator with mm (AND + 50% => 1 hit)", req("q", "(line enemy) OR (line notfound) OR (death cannons) OR (death notfound)", - "qf", "text", + "qf", "text_sw", "q.op", "AND", "mm", "50%", "defType", "edismax") , "*[count(//doc)=1]"); assertQ("test default operator with mm (AND + 75% => 0 hits)", req("q", "(line enemy) OR (line notfound) OR (death cannons) OR (death notfound)", - "qf", "text", + "qf", "text_sw", "q.op", "AND", "mm", "75%", "defType", "edismax") @@ -1092,214 +1107,257 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 { * Test that minShouldMatch applies to Optional terms only */ public void testMinShouldMatchOptional() throws Exception { - assertQ("test minShouldMatch (top level optional terms only)", - req("q", "stocks oil gold", // +(((text_sw:stock) (text_sw:oil) (text_sw:gold))~1) - "qf", "text_sw", - "mm", "50%", - "defType", "edismax") - , "*[count(//doc)=4]"); - - assertQ("test minShouldMatch (top level optional and negative terms mm=50%)", - req("q", "stocks oil gold -stockade", // +(((text_sw:stock) (text_sw:oil) (text_sw:gold) -(text_sw:stockad))~1) - "qf", "text_sw", - "mm", "50%", - "defType", "edismax") - , "*[count(//doc)=3]"); + for (String sow : Arrays.asList("true", "false")) { + assertQ("test minShouldMatch (top level optional terms only)", + req("q", "stocks oil gold", // +(((text_sw:stock) (text_sw:oil) (text_sw:gold))~1) + "qf", "text_sw", + "mm", "50%", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=4]"); - assertQ("test minShouldMatch (top level optional and negative terms mm=100%)", - req("q", "stocks gold -stockade", // +(((text_sw:stock) (text_sw:oil) (text_sw:gold) -(text_sw:stockad))~2) - "qf", "text_sw", - "mm", "100%", - "defType", "edismax") - , "*[count(//doc)=1]"); + assertQ("test minShouldMatch (top level optional terms only and sow=false)", + req("q", "stocks oil gold", // +(((text_sw:stock) (text_sw:oil) (text_sw:gold))~1) + "qf", "text_sw", + "mm", "50%", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=4]"); - assertQ("test minShouldMatch (top level required terms only)", - req("q", "stocks AND oil", // +(+(text_sw:stock) +(text_sw:oil)) - "qf", "text_sw", - "mm", "50%", - "defType", "edismax") - , "*[count(//doc)=1]"); + assertQ("test minShouldMatch (top level optional and negative terms mm=50%)", + req("q", "stocks oil gold -stockade", // +(((text_sw:stock) (text_sw:oil) (text_sw:gold) -(text_sw:stockad))~1) + "qf", "text_sw", + "mm", "50%", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=3]"); - assertQ("test minShouldMatch (top level optional and required terms)", - req("q", "oil gold +stocks", // +(((text_sw:oil) (text_sw:gold) +(text_sw:stock))~1) - "qf", "text_sw", - "mm", "50%", - "defType", "edismax") - , "*[count(//doc)=3]"); + assertQ("test minShouldMatch (top level optional and negative terms mm=100%)", + req("q", "stocks gold -stockade", // +(((text_sw:stock) (text_sw:oil) (text_sw:gold) -(text_sw:stockad))~2) + "qf", "text_sw", + "mm", "100%", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=1]"); - assertQ("test minShouldMatch (top level optional with explicit OR and parens)", - req("q", "(snake OR stocks) oil", - "qf", "text_sw", - "mm", "100%", - "defType", "edismax") - , "*[count(//doc)=2]"); + assertQ("test minShouldMatch (top level required terms only)", + req("q", "stocks AND oil", // +(+(text_sw:stock) +(text_sw:oil)) + "qf", "text_sw", + "mm", "50%", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=1]"); - // The results for these two appear odd, but are correct as per BooleanQuery processing. - // See: http://searchhub.org/2011/12/28/why-not-and-or-and-not/ - // Non-parenthesis OR/AND precedence is not true to abstract boolean logic in solr when q.op = AND - // and when q.op = OR all three clauses are top-level and optional so mm takes over - assertQ("test minShouldMatch (top level optional with explicit OR without parens)", - req("q", "snake OR stocks oil", - "qf", "text_sw", - "q.op", "OR", - "mm", "100%", - "defType", "edismax") - , "*[count(//doc)=0]"); - assertQ("test minShouldMatch (top level optional with explicit OR without parens)", - req("q", "snake OR stocks oil", - "qf", "text_sw", - "q.op", "AND", - "mm", "100%", - "defType", "edismax") - , "*[count(//doc)=0]"); + assertQ("test minShouldMatch (top level optional and required terms)", + req("q", "oil gold +stocks", // +(((text_sw:oil) (text_sw:gold) +(text_sw:stock))~1) + "qf", "text_sw", + "mm", "50%", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=3]"); - // SOLR-9174 - assertQ("test minShouldMatch=1<-1 with explicit OR, one impossible clause, and no explicit q.op", - req("q", "barbie OR (hair AND nonexistentword)", - "qf", "text_sw", - "mm", "1<-1", - "defType", "edismax") - , "*[count(//doc)=3]"); + assertQ("test minShouldMatch (top level optional with explicit OR and parens)", + req("q", "(snake OR stocks) oil", + "qf", "text_sw", + "mm", "100%", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=2]"); + + // The results for these two appear odd, but are correct as per BooleanQuery processing. + // See: http://searchhub.org/2011/12/28/why-not-and-or-and-not/ + // Non-parenthesis OR/AND precedence is not true to abstract boolean logic in solr when q.op = AND + // and when q.op = OR all three clauses are top-level and optional so mm takes over + assertQ("test minShouldMatch (top level optional with explicit OR without parens)", + req("q", "snake OR stocks oil", + "qf", "text_sw", + "q.op", "OR", + "mm", "100%", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=0]"); + assertQ("test minShouldMatch (top level optional with explicit OR without parens)", + req("q", "snake OR stocks oil", + "qf", "text_sw", + "q.op", "AND", + "mm", "100%", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=0]"); + + // SOLR-9174 + assertQ("test minShouldMatch=1<-1 with explicit OR, one impossible clause, and no explicit q.op", + req("q", "barbie OR (hair AND nonexistentword)", + "qf", "text_sw", + "mm", "1<-1", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=3]"); + } } /* SOLR-8812 */ @Test public void testDefaultMM() throws Exception { // Ensure MM is off when explicit operators (+/-/OR/NOT) are used and no explicit mm spec is specified. - assertQ("Explicit OR in query with no explicit mm and q.op=AND => mm = 0%", - req("q", "oil OR stocks", - "qf", "text_sw", - "q.op", "AND", - "defType", "edismax") - , "*[count(//doc)=4]"); - assertQ("Explicit 'or' in query with lowercaseOperators=true, no explicit mm and q.op=AND => mm = 0%", - req("q", "oil or stocks", - "qf", "text_sw", - "q.op", "AND", - "lowercaseOperators", "true", - "defType", "edismax") - , "*[count(//doc)=4]"); - assertQ("Explicit OR in query with no explicit mm and no explicit q.op => mm = 0%", - req("q", "oil OR stocks", - "qf", "text_sw", - "defType", "edismax") - , "*[count(//doc)=4]"); - assertQ("No operator in query with no explicit mm and q.op=OR => mm = 0%", - req("q", "oil stocks", - "qf", "text_sw", - "defType", "edismax") - , "*[count(//doc)=4]"); - assertQ("No operator in query with no explicit mm and q.op=AND => mm = 100%", - req("q", "oil stocks", - "qf", "text_sw", - "q.op", "AND", - "defType", "edismax") - , "*[count(//doc)=1]"); - assertQ("No operator in query with no explicit mm and q.op=OR => mm = 0%", - req("q", "oil stocks", - "qf", "text_sw", - "q.op", "OR", - "defType", "edismax") - , "*[count(//doc)=4]"); + for (String sow : Arrays.asList("true", "false")) { + assertQ("Explicit OR in query with no explicit mm and q.op=AND => mm = 0%", + req("q", "oil OR stocks", + "qf", "text_sw", + "q.op", "AND", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=4]"); + assertQ("Explicit 'or' in query with lowercaseOperators=true, no explicit mm and q.op=AND => mm = 0%", + req("q", "oil or stocks", + "qf", "text_sw", + "q.op", "AND", + "lowercaseOperators", "true", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=4]"); + assertQ("Explicit OR in query with no explicit mm and no explicit q.op => mm = 0%", + req("q", "oil OR stocks", + "qf", "text_sw", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=4]"); + assertQ("No operator in query with no explicit mm and q.op=OR => mm = 0%", + req("q", "oil stocks", + "qf", "text_sw", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=4]"); + assertQ("No operator in query with no explicit mm and q.op=AND => mm = 100%", + req("q", "oil stocks", + "qf", "text_sw", + "q.op", "AND", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=1]"); + assertQ("No operator in query with no explicit mm and q.op=OR => mm = 0%", + req("q", "oil stocks", + "qf", "text_sw", + "q.op", "OR", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=4]"); - assertQ("Explicit '-' operator in query with no explicit mm and no explicit q.op => mm = 0%", - req("q", "hair ties -barbie", - "qf", "text_sw", - "defType", "edismax") - , "*[count(//doc)=3]"); - assertQ("Explicit NOT in query with no explicit mm and no explicit q.op => mm = 0%", - req("q", "hair ties NOT barbie", - "qf", "text_sw", - "defType", "edismax") - , "*[count(//doc)=3]"); + assertQ("Explicit '-' operator in query with no explicit mm and no explicit q.op => mm = 0%", + req("q", "hair ties -barbie", + "qf", "text_sw", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=3]"); + assertQ("Explicit NOT in query with no explicit mm and no explicit q.op => mm = 0%", + req("q", "hair ties NOT barbie", + "qf", "text_sw", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=3]"); - assertQ("Explicit '-' operator in query with no explicit mm and q.op=OR => mm = 0%", - req("q", "hair ties -barbie", - "qf", "text_sw", - "q.op", "OR", - "defType", "edismax") - , "*[count(//doc)=3]"); - assertQ("Explicit NOT in query with no explicit mm and q.op=OR => mm = 0%", - req("q", "hair ties NOT barbie", - "qf", "text_sw", - "q.op", "OR", - "defType", "edismax") - , "*[count(//doc)=3]"); + assertQ("Explicit '-' operator in query with no explicit mm and q.op=OR => mm = 0%", + req("q", "hair ties -barbie", + "qf", "text_sw", + "q.op", "OR", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=3]"); + assertQ("Explicit NOT in query with no explicit mm and q.op=OR => mm = 0%", + req("q", "hair ties NOT barbie", + "qf", "text_sw", + "q.op", "OR", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=3]"); - assertQ("Explicit '-' operator in query with no explicit mm and q.op=OR => mm = 0%", - req("q", "hair AND ties -barbie", - "qf", "text_sw", - "q.op", "OR", - "defType", "edismax") - , "*[count(//doc)=1]"); - assertQ("Explicit NOT in query with no explicit mm and q.op=OR => mm = 0%", - req("q", "hair AND ties -barbie", - "qf", "text_sw", - "q.op", "OR", - "defType", "edismax") - , "*[count(//doc)=1]"); + assertQ("Explicit '-' operator in query with no explicit mm and q.op=OR => mm = 0%", + req("q", "hair AND ties -barbie", + "qf", "text_sw", + "q.op", "OR", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=1]"); + assertQ("Explicit NOT in query with no explicit mm and q.op=OR => mm = 0%", + req("q", "hair AND ties -barbie", + "qf", "text_sw", + "q.op", "OR", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=1]"); - assertQ("No explicit non-AND operator in query with no explicit mm and q.op=OR => mm = 0%", - req("q", "hair AND ties barbie", - "qf", "text_sw", - "q.op", "OR", - "defType", "edismax") - , "*[count(//doc)=2]"); - assertQ("No explicit non-AND operator in query with no explicit mm and q.op=AND => mm = 100%", - req("q", "hair AND ties barbie", - "qf", "text_sw", - "q.op", "AND", - "defType", "edismax") - , "*[count(//doc)=1]"); - assertQ("No explicit non-AND operator in query with no explicit mm and no explicit q.op => mm = 0%", - req("q", "hair AND ties barbie", - "qf", "text_sw", - "defType", "edismax") - , "*[count(//doc)=2]"); - assertQ("No explicit non-AND operator in query with no explicit mm and no explicit q.op => mm = 0%", - req("q", "hair and ties barbie", - "qf", "text_sw", - "lowercaseOperators", "true", - "defType", "edismax") - , "*[count(//doc)=2]"); + assertQ("No explicit non-AND operator in query with no explicit mm and q.op=OR => mm = 0%", + req("q", "hair AND ties barbie", + "qf", "text_sw", + "q.op", "OR", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=2]"); + assertQ("No explicit non-AND operator in query with no explicit mm and q.op=AND => mm = 100%", + req("q", "hair AND ties barbie", + "qf", "text_sw", + "q.op", "AND", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=1]"); + assertQ("No explicit non-AND operator in query with no explicit mm and no explicit q.op => mm = 0%", + req("q", "hair AND ties barbie", + "qf", "text_sw", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=2]"); + assertQ("No explicit non-AND operator in query with no explicit mm and no explicit q.op => mm = 0%", + req("q", "hair and ties barbie", + "qf", "text_sw", + "lowercaseOperators", "true", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=2]"); - assertQ("Explicit '-' operator in query with no explicit mm and q.op=AND => mm = 100%", - req("q", "hair ties -barbie", - "qf", "text_sw", - "q.op", "AND", - "defType", "edismax") - , "*[count(//doc)=1]"); - assertQ("Explicit NOT in query with no explicit mm and q.op=AND => mm = 100%", - req("q", "hair ties NOT barbie", - "qf", "text_sw", - "q.op", "AND", - "defType", "edismax") - , "*[count(//doc)=1]"); + assertQ("Explicit '-' operator in query with no explicit mm and q.op=AND => mm = 100%", + req("q", "hair ties -barbie", + "qf", "text_sw", + "q.op", "AND", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=1]"); + assertQ("Explicit NOT in query with no explicit mm and q.op=AND => mm = 100%", + req("q", "hair ties NOT barbie", + "qf", "text_sw", + "q.op", "AND", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=1]"); - assertQ("Explicit OR in query with no explicit mm and q.op=AND => mm = 0%", - req("q", "hair OR ties barbie", - "qf", "text_sw", - "q.op", "AND", - "defType", "edismax") - , "*[count(//doc)=3]"); - assertQ("Explicit OR in query with no explicit mm and q.op=OR => mm = 0%", - req("q", "hair OR ties barbie", - "qf", "text_sw", - "q.op", "OR", - "defType", "edismax") - , "*[count(//doc)=6]"); - assertQ("Explicit OR in query with no explicit mm and no explicit q.op => mm = 0%", - req("q", "hair OR ties barbie", - "qf", "text_sw", - "defType", "edismax") - , "*[count(//doc)=6]"); + assertQ("Explicit OR in query with no explicit mm and q.op=AND => mm = 0%", + req("q", "hair OR ties barbie", + "qf", "text_sw", + "q.op", "AND", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=3]"); + assertQ("Explicit OR in query with no explicit mm and q.op=OR => mm = 0%", + req("q", "hair OR ties barbie", + "qf", "text_sw", + "q.op", "OR", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=6]"); + assertQ("Explicit OR in query with no explicit mm and no explicit q.op => mm = 0%", + req("q", "hair OR ties barbie", + "qf", "text_sw", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=6]"); - assertQ("Explicit '+' operator in query with no explicit mm and q.op=AND => mm = 0%", - req("q", "hair ties +barbie", - "qf", "text_sw", - "q.op", "AND", - "defType", "edismax") - , "*[count(//doc)=1]"); + assertQ("Explicit '+' operator in query with no explicit mm and q.op=AND => mm = 0%", + req("q", "hair ties +barbie", + "qf", "text_sw", + "q.op", "AND", + "sow", sow, + "defType", "edismax") + , "*[count(//doc)=1]"); + } } public void testEdismaxSimpleExtension() throws SyntaxError { @@ -1336,6 +1394,380 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 { } + // LUCENE-7533 + public void testSplitOnWhitespace_with_autoGeneratePhraseQueries() throws Exception { + assertTrue(((TextField)h.getCore().getLatestSchema().getField("text").getType()).getAutoGeneratePhraseQueries()); + + try (SolrQueryRequest req = req()) { + final QParser qparser = QParser.getParser("{!edismax sow=false fq=text}blah blah)", req); + expectThrows(IllegalArgumentException.class, qparser::getQuery); + } + } + + @Test + public void testSplitOnWhitespace_Basic() throws Exception { + // The "text_sw" field has synonyms loaded from synonyms.txt + + // retrieve the single document containing literal "wifi" + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wifi", "sow","true") + , "/response/numFound==1" + , "/response/docs/[0]/id=='72'" + ); + + // trigger the "wi fi => wifi" synonym + assertJQ(req("qf", "text_sw title", "defType","edismax", "q","wi fi", "sow","false") + , "/response/numFound==1" + , "/response/docs/[0]/id=='72'" + ); + assertJQ(req("qf", "text_sw title", "defType","edismax", "q","wi fi", "sow","true") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi") // default sow=true + , "/response/numFound==0" + ); + + assertJQ(req("qf","text_sw title", "q","{!edismax sow=false}wi fi") + , "/response/numFound==1" + , "/response/docs/[0]/id=='72'" + ); + assertJQ(req("df", "text_sw title", "q","{!edismax sow=true}wi fi") + , "/response/numFound==0" + ); + assertJQ(req("df", "text_sw title", "q", "{!edismax}wi fi") // default sow=true + , "/response/numFound==0" + ); + + assertQ(req("qf", "name title", + "q", "barking curds of stigma", + "defType", "edismax", + "sow", "false", + "debugQuery", "true"), + "//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:barking | title:barking))')]", + "//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:curds | title:curds))')]", + "//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:of | title:of))')]", + "//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:stigma | title:stigma))')]" + ); + assertQ(req("qf", "name title", + "q", "barking curds of stigma", + "defType", "edismax", + "sow", "true", + "debugQuery", "true"), + "//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:barking | title:barking))')]", + "//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:curds | title:curds))')]", + "//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:of | title:of))')]", + "//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:stigma | title:stigma))')]" + ); + assertQ(req("qf", "name title", + "q", "barking curds of stigma", + "defType", "edismax", + "debugQuery", "true"), // Default sow=true + "//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:barking | title:barking))')]", + "//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:curds | title:curds))')]", + "//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:of | title:of))')]", + "//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:stigma | title:stigma))')]" + ); + } + + public void testSplitOnWhitespace_Different_Field_Analysis() throws Exception { + // When the *structure* of produced queries is different in each field, + // sow=true produces boolean-of-dismax query structure, + // and sow=false produces dismax-of-boolean query structure. + assertQ(req("qf", "text_sw title", + "q", "olive the other", + "defType", "edismax", + "sow", "true", + "debugQuery", "true"), + "//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((text_sw:oliv | title:olive))')]", + "//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((title:the))')]", + "//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((text_sw:other | title:other))')]" + ); + assertQ(req("qf", "text_sw title", + "q", "olive the other", + "defType", "edismax", + "sow", "false", + "debugQuery", "true"), + "//str[@name='parsedquery'][contains(.,'+DisjunctionMaxQuery(((text_sw:oliv text_sw:other) | (title:olive title:the title:other)))')]" + ); + + // When field's analysis produce different query structures, mm processing is always done on the boolean query. + // sow=true produces (boolean-of-dismax)~ query structure, + // and sow=false produces dismax-of-(boolean)~ query structure. + assertQ(req("qf", "text_sw title", + "q", "olive the other", + "defType", "edismax", + "sow", "true", + "mm", "100%", + "debugQuery", "true"), + "//str[@name='parsedquery'][contains(.,'+(DisjunctionMaxQuery((text_sw:oliv | title:olive)) DisjunctionMaxQuery((title:the)) DisjunctionMaxQuery((text_sw:other | title:other)))~3')]" + ); + assertQ(req("qf", "text_sw title", + "q", "olive the other", + "defType", "edismax", + "sow", "false", + "mm", "100%", + "debugQuery", "true"), + "//str[@name='parsedquery'][contains(.,'+DisjunctionMaxQuery((((text_sw:oliv text_sw:other)~2) | ((title:olive title:the title:other)~3)))')]" + ); + + + // When the *structure* of produced queries is the same in each field, + // sow=false/true produce the same boolean-of-dismax query structure + for (String sow : Arrays.asList("true", "false")) { + assertQ(req("qf", "text_sw title", + "q", "olive blah other", + "defType", "edismax", + "sow", sow, + "debugQuery", "true"), + "//str[@name='parsedquery'][contains(.,'" + + "+(DisjunctionMaxQuery((text_sw:oliv | title:olive))" + + " DisjunctionMaxQuery((text_sw:blah | title:blah))" + + " DisjunctionMaxQuery((text_sw:other | title:other)))')]" + ); + } + } + + public void testOperatorsAndMultiWordSynonyms() throws Exception { + // The "text_sw" field has synonyms loaded from synonyms.txt + + // retrieve the single document containing literal "wifi" + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wifi", "sow","true") + , "/response/numFound==1" + , "/response/docs/[0]/id=='72'" + ); + // trigger the "wi fi => wifi" synonym + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi", "sow","false") + , "/response/numFound==1" + , "/response/docs/[0]/id=='72'" + ); + + assertJQ(req("qf","text_sw title", "defType","edismax", "q","+wi fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","-wi fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","!wi fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi* fi", "sow","false") + , "/response/numFound==2" // matches because wi* matches "wifi" in one doc and "with" in another + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","w? fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi~1 fi", "sow","false") + , "/response/numFound==4" // matches because wi~1 matches ti (stemmed "ties") + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi^2 fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi^=2 fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi +fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi -fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi !fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi*", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi?", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi~1", "sow","false") + , "/response/numFound==4" // matches because fi~1 matches ti (stemmed "ties") + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi^2", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi^=2", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","text_sw:wi fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi text_sw:fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","NOT wi fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi NOT fi", "sow","false") + , "/response/numFound==0" + ); + + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi AND ATM", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","ATM AND wi fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi && ATM", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","ATM && wi fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi) AND ATM", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","ATM AND (wi fi)", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi) && ATM", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","ATM && (wi fi)", "sow","false") + , "/response/numFound==1" + ); + + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi OR NotThereAtAll", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","NotThereAtAll OR wi fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi || NotThereAtAll", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","NotThereAtAll || wi fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi) OR NotThereAtAll", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","NotThereAtAll OR (wi fi)", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi) || NotThereAtAll", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","NotThereAtAll || (wi fi)", "sow","false") + , "/response/numFound==1" + ); + + assertJQ(req("qf","text_sw title", "defType","edismax", "q","\"wi\" fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi \"fi\"", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi) fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi (fi)", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","/wi/ fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi /fi/", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi)", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","+(wi fi)", "sow","false") + , "/response/numFound==1" + ); + + Map all = (Map)ObjectBuilder.fromJSON(h.query(req("q", "*:*", "rows", "0", "wt", "json"))); + int totalDocs = Integer.parseInt(((Map)all.get("response")).get("numFound").toString()); + int allDocsExceptOne = totalDocs - 1; + + assertJQ(req("qf","text_sw title", "defType","edismax", "q","-(wi fi)", "sow","false") + , "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the text_sw field + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","!(wi fi)", "sow","false") + , "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the text_sw field + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","NOT (wi fi)", "sow","false") + , "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the text_sw field + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi)^2", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi)^=2", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","text_sw:(wi fi)", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","+ATM wi fi", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","-ATM wi fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","-NotThereAtAll wi fi", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","!ATM wi fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","!NotThereAtAll wi fi", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","NOT ATM wi fi", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","NOT NotThereAtAll wi fi", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","AT* wi fi", "sow","false") + , "/response/numFound==2" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","AT? wi fi", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","\"ATM\" wi fi", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi +ATM", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi -ATM", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi -NotThereAtAll", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi !ATM", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi !NotThereAtAll", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi NOT ATM", "sow","false") + , "/response/numFound==0" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi NOT NotThereAtAll", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi AT*", "sow","false") + , "/response/numFound==2" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi AT?", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi \"ATM\"", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","\"wi fi\"~2", "sow","false") + , "/response/numFound==1" + ); + assertJQ(req("qf","text_sw title", "defType","edismax", "q","text_sw:\"wi fi\"", "sow","false") + , "/response/numFound==1" + ); + } + + + private boolean containsClause(Query query, String field, String value, int boost, boolean fuzzy) { diff --git a/solr/core/src/test/org/apache/solr/search/TestMultiWordSynonyms.java b/solr/core/src/test/org/apache/solr/search/TestMultiWordSynonyms.java new file mode 100644 index 00000000000..ecc80c398a8 --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/TestMultiWordSynonyms.java @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.solr.search; + +import java.util.Arrays; + +import org.apache.solr.SolrTestCaseJ4; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestMultiWordSynonyms extends SolrTestCaseJ4 { + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig.xml", "schema-multiword-synonyms.xml"); + index(); + } + + private static void index() throws Exception { + assertU(adoc("id","1", "text","USA Today")); + assertU(adoc("id","2", "text","A dynamic US economy")); + assertU(adoc("id","3", "text","The United States of America's 50 states")); + assertU(adoc("id","4", "text","Party in the U.S.A.")); + assertU(adoc("id","5", "text","These United States")); + + assertU(adoc("id","6", "text","America United of States")); + assertU(adoc("id","7", "text","States United")); + + assertU(commit()); + } + + @Test + public void testNonPhrase() throws Exception { + // Don't split on whitespace (sow=false) + for (String q : Arrays.asList("US", "U.S.", "USA", "U.S.A.", "United States", "United States of America")) { + for (String defType : Arrays.asList("lucene", "edismax")) { + assertJQ(req("q", q, + "defType", defType, + "df", "text", + "sow", "false") + , "/response/numFound==7" + ); + } + } + + // Split on whitespace (sow=true) + for (String q : Arrays.asList("US", "U.S.", "USA", "U.S.A.")) { + for (String defType : Arrays.asList("lucene", "edismax")) { + assertJQ(req("q", q, + "defType", defType, + "df", "text", + "sow", "true") + , "/response/numFound==7" + ); + } + } + for (String q : Arrays.asList("United States", "United States of America")) { + for (String defType : Arrays.asList("lucene", "edismax")) { + assertJQ(req("q", q, + "defType", defType, + "df", "text", + "sow", "true") + , "/response/numFound==4" + ); + } + } + } + + @Test + public void testPhrase() throws Exception { + for (String q : Arrays.asList + ("\"US\"", "\"U.S.\"", "\"USA\"", "\"U.S.A.\"", "\"United States\"", "\"United States of America\"")) { + for (String defType : Arrays.asList("lucene", "edismax")) { + for (String sow : Arrays.asList("true", "false")) { + assertJQ(req("q", q, + "defType", defType, + "df", "text", + "sow", sow) + , "/response/numFound==5" + ); + } + } + } + } +} diff --git a/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java b/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java index 8195c058a31..92bd6c0e2d6 100644 --- a/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java +++ b/solr/core/src/test/org/apache/solr/search/TestSolrQueryParser.java @@ -16,7 +16,12 @@ */ package org.apache.solr.search; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.Random; import org.apache.lucene.search.BooleanClause; @@ -28,12 +33,15 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.params.MapSolrParams; import org.apache.solr.core.SolrInfoMBean; import org.apache.solr.parser.QueryParser; import org.apache.solr.query.FilterQuery; import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.TextField; import org.junit.BeforeClass; import org.junit.Test; +import org.noggit.ObjectBuilder; public class TestSolrQueryParser extends SolrTestCaseJ4 { @@ -57,6 +65,8 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 { assertU(adoc("id", "12", "eee_s", "X")); assertU(adoc("id", "13", "eee_s", "'balance'", "rrr_s", "/leading_slash")); + assertU(adoc("id", "20", "syn", "wifi ATM")); + assertU(commit()); } @@ -208,86 +218,105 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 { QParser qParser; Query q,qq; - // relevance query should not be a filter - qParser = QParser.getParser("foo_s:(a b c)", req); - q = qParser.getQuery(); - assertEquals(3, ((BooleanQuery)q).clauses().size()); + Map sowFalseParamsMap = new HashMap<>(); + sowFalseParamsMap.put("sow", "false"); + Map sowTrueParamsMap = new HashMap<>(); + sowTrueParamsMap.put("sow", "true"); + List paramMaps = Arrays.asList + (new MapSolrParams(Collections.emptyMap()), // no sow param (i.e. the default sow value) + new MapSolrParams(sowFalseParamsMap), + new MapSolrParams(sowTrueParamsMap)); - // small filter query should still use BooleanQuery - if (QueryParser.TERMS_QUERY_THRESHOLD > 3) { + for (MapSolrParams params : paramMaps) { + // relevance query should not be a filter qParser = QParser.getParser("foo_s:(a b c)", req); - qParser.setIsFilter(true); // this may change in the future + qParser.setParams(params); q = qParser.getQuery(); assertEquals(3, ((BooleanQuery) q).clauses().size()); + + // small filter query should still use BooleanQuery + if (QueryParser.TERMS_QUERY_THRESHOLD > 3) { + qParser = QParser.getParser("foo_s:(a b c)", req); + qParser.setParams(params); + qParser.setIsFilter(true); // this may change in the future + q = qParser.getQuery(); + assertEquals(3, ((BooleanQuery) q).clauses().size()); + } + + // large relevancy query should use BooleanQuery + // TODO: we may decide that string fields shouldn't have relevance in the future... change to a text field w/o a stop filter if so + qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req); + qParser.setParams(params); + q = qParser.getQuery(); + assertEquals(26, ((BooleanQuery)q).clauses().size()); + + // large filter query should use TermsQuery + qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req); + qParser.setIsFilter(true); // this may change in the future + qParser.setParams(params); + q = qParser.getQuery(); + assertEquals(26, ((TermInSetQuery)q).getTermData().size()); + + // large numeric filter query should use TermsQuery (for trie fields) + qParser = QParser.getParser("foo_ti:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req); + qParser.setIsFilter(true); // this may change in the future + qParser.setParams(params); + q = qParser.getQuery(); + assertEquals(20, ((TermInSetQuery)q).getTermData().size()); + + // for point fields large filter query should use PointInSetQuery + qParser = QParser.getParser("foo_pi:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req); + qParser.setIsFilter(true); // this may change in the future + qParser.setParams(params); + q = qParser.getQuery(); + assertTrue(q instanceof PointInSetQuery); + assertEquals(20, ((PointInSetQuery)q).getPackedPoints().size()); + + // a filter() clause inside a relevancy query should be able to use a TermsQuery + qParser = QParser.getParser("foo_s:aaa filter(foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z))", req); + qParser.setParams(params); + q = qParser.getQuery(); + assertEquals(2, ((BooleanQuery)q).clauses().size()); + qq = ((BooleanQuery)q).clauses().get(0).getQuery(); + if (qq instanceof TermQuery) { + qq = ((BooleanQuery)q).clauses().get(1).getQuery(); + } + + if (qq instanceof FilterQuery) { + qq = ((FilterQuery)qq).getQuery(); + } + + assertEquals(26, ((TermInSetQuery) qq).getTermData().size()); + + // test mixed boolean query, including quotes (which shouldn't matter) + qParser = QParser.getParser("foo_s:(a +aaa b -bbb c d e f bar_s:(qqq www) g h i j k l m n o p q r s t u v w x y z)", req); + qParser.setIsFilter(true); // this may change in the future + qParser.setParams(params); + q = qParser.getQuery(); + assertEquals(4, ((BooleanQuery)q).clauses().size()); + qq = null; + for (BooleanClause clause : ((BooleanQuery)q).clauses()) { + qq = clause.getQuery(); + if (qq instanceof TermInSetQuery) break; + } + assertEquals(26, ((TermInSetQuery)qq).getTermData().size()); + + // test terms queries of two different fields (LUCENE-7637 changed to require all terms be in the same field) + StringBuilder sb = new StringBuilder(); + for (int i=0; i<17; i++) { + char letter = (char)('a'+i); + sb.append("foo_s:" + letter + " bar_s:" + letter + " "); + } + qParser = QParser.getParser(sb.toString(), req); + qParser.setIsFilter(true); // this may change in the future + qParser.setParams(params); + q = qParser.getQuery(); + assertEquals(2, ((BooleanQuery)q).clauses().size()); + for (BooleanClause clause : ((BooleanQuery)q).clauses()) { + qq = clause.getQuery(); + assertEquals(17, ((TermInSetQuery)qq).getTermData().size()); + } } - - // large relevancy query should use BooleanQuery - // TODO: we may decide that string fields shouldn't have relevance in the future... change to a text field w/o a stop filter if so - qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req); - q = qParser.getQuery(); - assertEquals(26, ((BooleanQuery)q).clauses().size()); - - // large filter query should use TermsQuery - qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req); - qParser.setIsFilter(true); // this may change in the future - q = qParser.getQuery(); - assertEquals(26, ((TermInSetQuery)q).getTermData().size()); - - // large numeric filter query should use TermsQuery (for trie fields) - qParser = QParser.getParser("foo_ti:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req); - qParser.setIsFilter(true); // this may change in the future - q = qParser.getQuery(); - assertEquals(20, ((TermInSetQuery)q).getTermData().size()); - - // for point fields large filter query should use PointInSetQuery - qParser = QParser.getParser("foo_pi:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req); - qParser.setIsFilter(true); // this may change in the future - q = qParser.getQuery(); - assertTrue(q instanceof PointInSetQuery); - assertEquals(20, ((PointInSetQuery)q).getPackedPoints().size()); - - // a filter() clause inside a relevancy query should be able to use a TermsQuery - qParser = QParser.getParser("foo_s:aaa filter(foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z))", req); - q = qParser.getQuery(); - assertEquals(2, ((BooleanQuery)q).clauses().size()); - qq = ((BooleanQuery)q).clauses().get(0).getQuery(); - if (qq instanceof TermQuery) { - qq = ((BooleanQuery)q).clauses().get(1).getQuery(); - } - - if (qq instanceof FilterQuery) { - qq = ((FilterQuery)qq).getQuery(); - } - - assertEquals(26, ((TermInSetQuery)qq).getTermData().size()); - - // test mixed boolean query, including quotes (which shouldn't matter) - qParser = QParser.getParser("foo_s:(a +aaa b -bbb c d e f bar_s:(qqq www) g h i j k l m n o p q r s t u v w x y z)", req); - qParser.setIsFilter(true); // this may change in the future - q = qParser.getQuery(); - assertEquals(4, ((BooleanQuery)q).clauses().size()); - qq = null; - for (BooleanClause clause : ((BooleanQuery)q).clauses()) { - qq = clause.getQuery(); - if (qq instanceof TermInSetQuery) break; - } - assertEquals(26, ((TermInSetQuery)qq).getTermData().size()); - - // test terms queries of two different fields (LUCENE-7637 changed to require all terms be in the same field) - StringBuilder sb = new StringBuilder(); - for (int i=0; i<17; i++) { - char letter = (char)('a'+i); - sb.append("foo_s:" + letter + " bar_s:" + letter + " "); - } - qParser = QParser.getParser(sb.toString(), req); - qParser.setIsFilter(true); // this may change in the future - q = qParser.getQuery(); - assertEquals(2, ((BooleanQuery)q).clauses().size()); - for (BooleanClause clause : ((BooleanQuery)q).clauses()) { - qq = clause.getQuery(); - assertEquals(17, ((TermInSetQuery)qq).getTermData().size()); - } - req.close(); } @@ -306,6 +335,10 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 { // This will still fail when used as the main query, but will pass in a filter query since TermsQuery can be used. assertJQ(req("q","*:*", "fq", q) ,"/response/numFound==6"); + assertJQ(req("q","*:*", "fq", q, "sow", "false") + ,"/response/numFound==6"); + assertJQ(req("q","*:*", "fq", q, "sow", "true") + ,"/response/numFound==6"); } @Test @@ -540,4 +573,400 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 { req.close(); } + // LUCENE-7533 + public void testSplitOnWhitespace_with_autoGeneratePhraseQueries() throws Exception { + assertTrue(((TextField)h.getCore().getLatestSchema().getField("text").getType()).getAutoGeneratePhraseQueries()); + + try (SolrQueryRequest req = req()) { + final QParser qparser = QParser.getParser("{!lucene sow=false qf=text}blah blah", req); + expectThrows(QueryParserConfigurationException.class, qparser::getQuery); + } + } + + @Test + public void testSplitOnWhitespace_Basic() throws Exception { + // The "syn" field has synonyms loaded from synonyms.txt + + assertJQ(req("df", "syn", "q", "wifi", "sow", "true") // retrieve the single document containing literal "wifi" + , "/response/numFound==1" + , "/response/docs/[0]/id=='20'" + ); + + assertJQ(req("df", "syn", "q", "wi fi", "sow", "false") // trigger the "wi fi => wifi" synonym + , "/response/numFound==1" + , "/response/docs/[0]/id=='20'" + ); + + assertJQ(req("df", "syn", "q", "wi fi", "sow", "true") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi fi") // default sow=true + , "/response/numFound==0" + ); + + assertJQ(req("df", "syn", "q", "{!lucene sow=false}wi fi") + , "/response/numFound==1" + , "/response/docs/[0]/id=='20'" + ); + assertJQ(req("df", "syn", "q", "{!lucene sow=true}wi fi") + , "/response/numFound==0" + ); + + assertJQ(req("df", "syn", "q", "{!lucene}wi fi") // default sow=true + , "/response/numFound==0" + ); + } + + public void testSplitOnWhitespace_Comments() throws Exception { + // The "syn" field has synonyms loaded from synonyms.txt + + assertJQ(req("df", "syn", "q", "wifi", "sow", "true") // retrieve the single document containing literal "wifi" + , "/response/numFound==1" + , "/response/docs/[0]/id=='20'" + ); + assertJQ(req("df", "syn", "q", "wi fi", "sow", "false") // trigger the "wi fi => wifi" synonym + , "/response/numFound==1" + , "/response/docs/[0]/id=='20'" + ); + assertJQ(req("df", "syn", "q", "wi /* foo */ fi", "sow", "false") // trigger the "wi fi => wifi" synonym + , "/response/numFound==1" + , "/response/docs/[0]/id=='20'" + ); + assertJQ(req("df", "syn", "q", "wi /* foo */ /* bar */ fi", "sow", "false") // trigger the "wi fi => wifi" synonym + , "/response/numFound==1" + , "/response/docs/[0]/id=='20'" + ); + assertJQ(req("df", "syn", "q", " /* foo */ wi fi /* bar */", "sow", "false") // trigger the "wi fi => wifi" synonym + , "/response/numFound==1" + , "/response/docs/[0]/id=='20'" + ); + assertJQ(req("df", "syn", "q", " /* foo */ wi /* bar */ fi /* baz */", "sow", "false") // trigger the "wi fi => wifi" synonym + , "/response/numFound==1" + , "/response/docs/[0]/id=='20'" + ); + + assertJQ(req("df", "syn", "q", "wi fi", "sow", "true") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi /* foo */ fi", "sow", "true") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi /* foo */ /* bar */ fi", "sow", "true") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "/* foo */ wi fi /* bar */", "sow", "true") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "/* foo */ wi /* bar */ fi /* baz */", "sow", "true") + , "/response/numFound==0" + ); + + assertJQ(req("df", "syn", "q", "wi fi") // default sow=true + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi /* foo */ fi") // default sow=true + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi /* foo */ /* bar */ fi") // default sow=true + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "/* foo */ wi fi /* bar */") // default sow=true + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "/* foo */ wi /* bar */ fi /* baz */") // default sow=true + , "/response/numFound==0" + ); + + + assertJQ(req("df", "syn", "q", "{!lucene sow=false}wi fi") + , "/response/numFound==1" + , "/response/docs/[0]/id=='20'" + ); + assertJQ(req("df", "syn", "q", "{!lucene sow=false}wi /* foo */ fi") + , "/response/numFound==1" + , "/response/docs/[0]/id=='20'" + ); + assertJQ(req("df", "syn", "q", "{!lucene sow=false}wi /* foo */ /* bar */ fi") + , "/response/numFound==1" + , "/response/docs/[0]/id=='20'" + ); + assertJQ(req("df", "syn", "q", "{!lucene sow=false}/* foo */ wi fi /* bar */") + , "/response/numFound==1" + , "/response/docs/[0]/id=='20'" + ); + assertJQ(req("df", "syn", "q", "{!lucene sow=false}/* foo */ wi /* bar */ fi /* baz */") + , "/response/numFound==1" + , "/response/docs/[0]/id=='20'" + ); + + assertJQ(req("df", "syn", "q", "{!lucene sow=true}wi fi") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "{!lucene sow=true}wi /* foo */ fi") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "{!lucene sow=true}wi /* foo */ /* bar */ fi") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "{!lucene sow=true}/* foo */ wi fi /* bar */") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "{!lucene sow=true}/* foo */ wi /* bar */ fi /* baz */") + , "/response/numFound==0" + ); + + assertJQ(req("df", "syn", "q", "{!lucene}wi fi") // default sow=true + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "{!lucene}wi /* foo */ fi") // default sow=true + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "{!lucene}wi /* foo */ /* bar */ fi") // default sow=true + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "{!lucene}/* foo */ wi fi /* bar */") // default sow=true + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "{!lucene}/* foo */ wi /* bar */ fi /* baz */") // default sow=true + , "/response/numFound==0" + ); + } + + public void testOperatorsAndMultiWordSynonyms() throws Exception { + // The "syn" field has synonyms loaded from synonyms.txt + + assertJQ(req("df", "syn", "q", "wifi", "sow", "true") // retrieve the single document containing literal "wifi" + , "/response/numFound==1" + , "/response/docs/[0]/id=='20'" + ); + assertJQ(req("df", "syn", "q", "wi fi", "sow", "false") // trigger the "wi fi => wifi" synonym + , "/response/numFound==1" + , "/response/docs/[0]/id=='20'" + ); + + assertJQ(req("df", "syn", "q", "+wi fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "-wi fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "!wi fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi* fi", "sow", "false") // matches because wi* matches wifi + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "w? fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi~1 fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi^2 fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi^=2 fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi +fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi -fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi !fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi fi*", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi fi?", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi fi~1", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi fi^2", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi fi^=2", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "syn:wi fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi syn:fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "NOT wi fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi NOT fi", "sow", "false") + , "/response/numFound==0" + ); + + assertJQ(req("df", "syn", "q", "wi fi AND ATM", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "ATM AND wi fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi fi && ATM", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "ATM && wi fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "(wi fi) AND ATM", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "ATM AND (wi fi)", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "(wi fi) && ATM", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "ATM && (wi fi)", "sow", "false") + , "/response/numFound==1" + ); + + assertJQ(req("df", "syn", "q", "wi fi OR NotThereAtAll", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "NotThereAtAll OR wi fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi fi || NotThereAtAll", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "NotThereAtAll || wi fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "(wi fi) OR NotThereAtAll", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "NotThereAtAll OR (wi fi)", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "(wi fi) || NotThereAtAll", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "NotThereAtAll || (wi fi)", "sow", "false") + , "/response/numFound==1" + ); + + assertJQ(req("df", "syn", "q", "\"wi\" fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi \"fi\"", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "(wi) fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi (fi)", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "/wi/ fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi /fi/", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "(wi fi)", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "+(wi fi)", "sow", "false") + , "/response/numFound==1" + ); + + Map all = (Map)ObjectBuilder.fromJSON(h.query(req("q", "*:*", "rows", "0", "wt", "json"))); + int totalDocs = Integer.parseInt(((Map)all.get("response")).get("numFound").toString()); + int allDocsExceptOne = totalDocs - 1; + + assertJQ(req("df", "syn", "q", "-(wi fi)", "sow", "false") + , "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the syn field + ); + assertJQ(req("df", "syn", "q", "!(wi fi)", "sow", "false") + , "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the syn field + ); + assertJQ(req("df", "syn", "q", "NOT (wi fi)", "sow", "false") + , "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the syn field + ); + assertJQ(req("df", "syn", "q", "(wi fi)^2", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "(wi fi)^=2", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "syn:(wi fi)", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "+ATM wi fi", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "-ATM wi fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "-NotThereAtAll wi fi", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "!ATM wi fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "!NotThereAtAll wi fi", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "NOT ATM wi fi", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "NOT NotThereAtAll wi fi", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "AT* wi fi", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "AT? wi fi", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "\"ATM\" wi fi", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "wi fi +ATM", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "wi fi -ATM", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi fi -NotThereAtAll", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "wi fi !ATM", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi fi !NotThereAtAll", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "wi fi NOT ATM", "sow", "false") + , "/response/numFound==0" + ); + assertJQ(req("df", "syn", "q", "wi fi NOT NotThereAtAll", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "wi fi AT*", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "wi fi AT?", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "wi fi \"ATM\"", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "\"wi fi\"~2", "sow", "false") + , "/response/numFound==1" + ); + assertJQ(req("df", "syn", "q", "syn:\"wi fi\"", "sow", "false") + , "/response/numFound==1" + ); + } } \ No newline at end of file