SOLR-9185: Solr's edismax and Lucene/standard query parsers should optionally not split on whitespace before sending terms to analysis

This commit is contained in:
Steve Rowe 2017-03-16 19:41:37 -04:00
parent 4ee7fc3890
commit d1b2fb33ef
16 changed files with 2257 additions and 603 deletions

View File

@ -202,6 +202,13 @@ New Features
requires "stored" and must not be multiValued. It's intended for fields that might have very large values so that requires "stored" and must not be multiValued. It's intended for fields that might have very large values so that
they don't get cached in memory. (David Smiley) they don't get cached in memory. (David Smiley)
* SOLR-9185: Solr's edismax and "Lucene"/standard query parsers will no longer split on whitespace before sending
terms to analysis, if given the "sow=false" request param ("sow"=>"split on whitespace"). This enables multi-term
source synonyms to match at query-time using SynonymGraphFilterFactory; other analysis components will also now
work at query time, e.g. ShingleFilterFactory. By default, and when the "sow=true" param is specified, these
parsers' behavior remains the same: queries will be split on whitespace before sending individual terms to analysis.
(Steve Rowe)
Bug Fixes Bug Fixes
---------------------- ----------------------

View File

@ -3,13 +3,17 @@ package org.apache.solr.parser;
import java.io.StringReader; import java.io.StringReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version;
import org.apache.solr.search.QParser;
import org.apache.solr.search.SyntaxError; import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryParserConfigurationException;
public class QueryParser extends SolrQueryParserBase implements QueryParserConstants { public class QueryParser extends SolrQueryParserBase implements QueryParserConstants {
@ -17,9 +21,44 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
*/ */
static public enum Operator { OR, AND } static public enum Operator { OR, AND }
public QueryParser(Version matchVersion, String defaultField, QParser parser) { /** default split on whitespace behavior */
public static final boolean DEFAULT_SPLIT_ON_WHITESPACE = true;
public QueryParser(String defaultField, QParser parser) {
this(new FastCharStream(new StringReader(""))); this(new FastCharStream(new StringReader("")));
init(matchVersion, defaultField, parser); init(defaultField, parser);
}
/**
* @see #setSplitOnWhitespace(boolean)
*/
public boolean getSplitOnWhitespace() {
return splitOnWhitespace;
}
/**
* Whether query text should be split on whitespace prior to analysis.
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
*/
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
this.splitOnWhitespace = splitOnWhitespace;
}
private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE;
private static Set<Integer> disallowedPostMultiTerm
= new HashSet<Integer>(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR));
private static boolean allowedPostMultiTerm(int tokenKind) {
return disallowedPostMultiTerm.contains(tokenKind) == false;
}
@Override
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError {
if ((getAutoGeneratePhraseQueries() || fieldAutoGenPhraseQueries) && splitOnWhitespace == false) {
throw new QueryParserConfigurationException
("Field '" + field + "': autoGeneratePhraseQueries == true is disallowed when sow/splitOnWhitespace == false");
}
return super.newFieldQuery(analyzer, field, queryText, quoted, fieldAutoGenPhraseQueries);
} }
// * Query ::= ( Clause )* // * Query ::= ( Clause )*
@ -96,13 +135,38 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
final public Query Query(String field) throws ParseException, SyntaxError { final public Query Query(String field) throws ParseException, SyntaxError {
List<BooleanClause> clauses = new ArrayList<BooleanClause>(); List<BooleanClause> clauses = new ArrayList<BooleanClause>();
Query q, firstQuery=null; Query q;
int conj, mods; int conj, mods;
if (jj_2_1(2)) {
MultiTerm(field, clauses);
} else {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case NOT:
case PLUS:
case MINUS:
case BAREOPER:
case LPAREN:
case STAR:
case QUOTED:
case TERM:
case PREFIXTERM:
case WILDTERM:
case REGEXPTERM:
case RANGEIN_START:
case RANGEEX_START:
case LPARAMS:
case FILTER:
case NUMBER:
mods = Modifiers(); mods = Modifiers();
q = Clause(field); q = Clause(field);
addClause(clauses, CONJ_NONE, mods, q); addClause(clauses, CONJ_NONE, mods, q);
if (mods == MOD_NONE) break;
firstQuery=q; default:
jj_la1[4] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
}
label_1: label_1:
while (true) { while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
@ -127,19 +191,50 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
; ;
break; break;
default: default:
jj_la1[4] = jj_gen; jj_la1[5] = jj_gen;
break label_1; break label_1;
} }
if (jj_2_2(2)) {
MultiTerm(field, clauses);
} else {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case AND:
case OR:
case NOT:
case PLUS:
case MINUS:
case BAREOPER:
case LPAREN:
case STAR:
case QUOTED:
case TERM:
case PREFIXTERM:
case WILDTERM:
case REGEXPTERM:
case RANGEIN_START:
case RANGEEX_START:
case LPARAMS:
case FILTER:
case NUMBER:
conj = Conjunction(); conj = Conjunction();
mods = Modifiers(); mods = Modifiers();
q = Clause(field); q = Clause(field);
addClause(clauses, conj, mods, q); addClause(clauses, conj, mods, q);
break;
default:
jj_la1[6] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
} }
if (clauses.size() == 1 && firstQuery != null) }
}
if (clauses.size() == 1 && clauses.get(0).getOccur() == BooleanClause.Occur.SHOULD) {
Query firstQuery = clauses.get(0).getQuery();
if ( ! (firstQuery instanceof RawQuery) || ((RawQuery)firstQuery).getTermCount() == 1) {
{if (true) return rawToNormal(firstQuery);} {if (true) return rawToNormal(firstQuery);}
else {
{if (true) return getBooleanQuery(clauses);}
} }
}
{if (true) return getBooleanQuery(clauses);}
throw new Error("Missing return statement in function"); throw new Error("Missing return statement in function");
} }
@ -148,20 +243,20 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
Token fieldToken=null, boost=null; Token fieldToken=null, boost=null;
Token localParams=null; Token localParams=null;
int flags = 0; int flags = 0;
if (jj_2_1(2)) { if (jj_2_3(2)) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case TERM: case TERM:
fieldToken = jj_consume_token(TERM); fieldToken = jj_consume_token(TERM);
jj_consume_token(COLON); jj_consume_token(COLON);
field=discardEscapeChar(fieldToken.image); field = discardEscapeChar(fieldToken.image);
break; break;
case STAR: case STAR:
jj_consume_token(STAR); jj_consume_token(STAR);
jj_consume_token(COLON); jj_consume_token(COLON);
field="*"; field = "*";
break; break;
default: default:
jj_la1[5] = jj_gen; jj_la1[7] = jj_gen;
jj_consume_token(-1); jj_consume_token(-1);
throw new ParseException(); throw new ParseException();
} }
@ -191,7 +286,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
boost = jj_consume_token(NUMBER); boost = jj_consume_token(NUMBER);
break; break;
default: default:
jj_la1[6] = jj_gen; jj_la1[8] = jj_gen;
; ;
} }
break; break;
@ -206,7 +301,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
boost = jj_consume_token(NUMBER); boost = jj_consume_token(NUMBER);
break; break;
default: default:
jj_la1[7] = jj_gen; jj_la1[9] = jj_gen;
; ;
} }
q=getFilter(q); restoreFlags(flags); q=getFilter(q); restoreFlags(flags);
@ -219,13 +314,13 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
boost = jj_consume_token(NUMBER); boost = jj_consume_token(NUMBER);
break; break;
default: default:
jj_la1[8] = jj_gen; jj_la1[10] = jj_gen;
; ;
} }
q=getLocalParams(field, localParams.image); q=getLocalParams(field, localParams.image);
break; break;
default: default:
jj_la1[9] = jj_gen; jj_la1[11] = jj_gen;
jj_consume_token(-1); jj_consume_token(-1);
throw new ParseException(); throw new ParseException();
} }
@ -278,19 +373,13 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
term.image = term.image.substring(0,1); term.image = term.image.substring(0,1);
break; break;
default: default:
jj_la1[10] = jj_gen; jj_la1[12] = jj_gen;
jj_consume_token(-1); jj_consume_token(-1);
throw new ParseException(); throw new ParseException();
} }
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case CARAT:
case FUZZY_SLOP: case FUZZY_SLOP:
fuzzySlop = jj_consume_token(FUZZY_SLOP);
fuzzy=true;
break;
default:
jj_la1[11] = jj_gen;
;
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case CARAT: case CARAT:
jj_consume_token(CARAT); jj_consume_token(CARAT);
@ -301,12 +390,31 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
fuzzy=true; fuzzy=true;
break; break;
default: default:
jj_la1[12] = jj_gen; jj_la1[13] = jj_gen;
;
}
break;
case FUZZY_SLOP:
fuzzySlop = jj_consume_token(FUZZY_SLOP);
fuzzy=true;
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case CARAT:
jj_consume_token(CARAT);
boost = jj_consume_token(NUMBER);
break;
default:
jj_la1[14] = jj_gen;
; ;
} }
break; break;
default: default:
jj_la1[13] = jj_gen; jj_la1[15] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
break;
default:
jj_la1[16] = jj_gen;
; ;
} }
q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp); q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp);
@ -316,13 +424,13 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case RANGEIN_START: case RANGEIN_START:
jj_consume_token(RANGEIN_START); jj_consume_token(RANGEIN_START);
startInc=true; startInc = true;
break; break;
case RANGEEX_START: case RANGEEX_START:
jj_consume_token(RANGEEX_START); jj_consume_token(RANGEEX_START);
break; break;
default: default:
jj_la1[14] = jj_gen; jj_la1[17] = jj_gen;
jj_consume_token(-1); jj_consume_token(-1);
throw new ParseException(); throw new ParseException();
} }
@ -334,7 +442,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
goop1 = jj_consume_token(RANGE_QUOTED); goop1 = jj_consume_token(RANGE_QUOTED);
break; break;
default: default:
jj_la1[15] = jj_gen; jj_la1[18] = jj_gen;
jj_consume_token(-1); jj_consume_token(-1);
throw new ParseException(); throw new ParseException();
} }
@ -343,7 +451,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
jj_consume_token(RANGE_TO); jj_consume_token(RANGE_TO);
break; break;
default: default:
jj_la1[16] = jj_gen; jj_la1[19] = jj_gen;
; ;
} }
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
@ -354,20 +462,20 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
goop2 = jj_consume_token(RANGE_QUOTED); goop2 = jj_consume_token(RANGE_QUOTED);
break; break;
default: default:
jj_la1[17] = jj_gen; jj_la1[20] = jj_gen;
jj_consume_token(-1); jj_consume_token(-1);
throw new ParseException(); throw new ParseException();
} }
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case RANGEIN_END: case RANGEIN_END:
jj_consume_token(RANGEIN_END); jj_consume_token(RANGEIN_END);
endInc=true; endInc = true;
break; break;
case RANGEEX_END: case RANGEEX_END:
jj_consume_token(RANGEEX_END); jj_consume_token(RANGEEX_END);
break; break;
default: default:
jj_la1[18] = jj_gen; jj_la1[21] = jj_gen;
jj_consume_token(-1); jj_consume_token(-1);
throw new ParseException(); throw new ParseException();
} }
@ -377,7 +485,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
boost = jj_consume_token(NUMBER); boost = jj_consume_token(NUMBER);
break; break;
default: default:
jj_la1[19] = jj_gen; jj_la1[22] = jj_gen;
; ;
} }
boolean startOpen=false; boolean startOpen=false;
@ -392,31 +500,56 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
} else if ("*".equals(goop2.image)) { } else if ("*".equals(goop2.image)) {
endOpen=true; endOpen=true;
} }
q = getRangeQuery(getField(field), startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc); q = getRangeQuery(getField(field),
startOpen ? null : discardEscapeChar(goop1.image),
endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
break; break;
case QUOTED: case QUOTED:
term = jj_consume_token(QUOTED); term = jj_consume_token(QUOTED);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case CARAT:
case FUZZY_SLOP:
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case CARAT:
jj_consume_token(CARAT);
boost = jj_consume_token(NUMBER);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case FUZZY_SLOP: case FUZZY_SLOP:
fuzzySlop = jj_consume_token(FUZZY_SLOP); fuzzySlop = jj_consume_token(FUZZY_SLOP);
fuzzy=true;
break; break;
default: default:
jj_la1[20] = jj_gen; jj_la1[23] = jj_gen;
; ;
} }
break;
case FUZZY_SLOP:
fuzzySlop = jj_consume_token(FUZZY_SLOP);
fuzzy=true;
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case CARAT: case CARAT:
jj_consume_token(CARAT); jj_consume_token(CARAT);
boost = jj_consume_token(NUMBER); boost = jj_consume_token(NUMBER);
break; break;
default: default:
jj_la1[21] = jj_gen; jj_la1[24] = jj_gen;
;
}
break;
default:
jj_la1[25] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
break;
default:
jj_la1[26] = jj_gen;
; ;
} }
q = handleQuotedTerm(getField(field), term, fuzzySlop); q = handleQuotedTerm(getField(field), term, fuzzySlop);
break; break;
default: default:
jj_la1[22] = jj_gen; jj_la1[27] = jj_gen;
jj_consume_token(-1); jj_consume_token(-1);
throw new ParseException(); throw new ParseException();
} }
@ -424,6 +557,44 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
throw new Error("Missing return statement in function"); throw new Error("Missing return statement in function");
} }
final public void MultiTerm(String field, List<BooleanClause> clauses) throws ParseException, SyntaxError {
Token text;
List<String> terms = null;
text = jj_consume_token(TERM);
if (splitOnWhitespace) {
Query q = getFieldQuery(getField(field), discardEscapeChar(text.image), false, true);
addClause(clauses, CONJ_NONE, MOD_NONE, q);
} else {
terms = new ArrayList<String>();
terms.add(discardEscapeChar(text.image));
}
if (getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind)) {
} else {
jj_consume_token(-1);
throw new ParseException();
}
label_2:
while (true) {
text = jj_consume_token(TERM);
if (splitOnWhitespace) {
Query q = getFieldQuery(getField(field), discardEscapeChar(text.image), false, true);
addClause(clauses, CONJ_NONE, MOD_NONE, q);
} else {
terms.add(discardEscapeChar(text.image));
}
if (getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind)) {
;
} else {
break label_2;
}
}
if (splitOnWhitespace == false) {
Query q = getFieldQuery(getField(field), terms, true);
addMultiTermClause(clauses, q);
}
}
private boolean jj_2_1(int xla) { private boolean jj_2_1(int xla) {
jj_la = xla; jj_lastpos = jj_scanpos = token; jj_la = xla; jj_lastpos = jj_scanpos = token;
try { return !jj_3_1(); } try { return !jj_3_1(); }
@ -431,28 +602,76 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
finally { jj_save(0, xla); } finally { jj_save(0, xla); }
} }
private boolean jj_3R_3() { private boolean jj_2_2(int xla) {
if (jj_scan_token(STAR)) return true; jj_la = xla; jj_lastpos = jj_scanpos = token;
if (jj_scan_token(COLON)) return true; try { return !jj_3_2(); }
catch(LookaheadSuccess ls) { return true; }
finally { jj_save(1, xla); }
}
private boolean jj_2_3(int xla) {
jj_la = xla; jj_lastpos = jj_scanpos = token;
try { return !jj_3_3(); }
catch(LookaheadSuccess ls) { return true; }
finally { jj_save(2, xla); }
}
private boolean jj_3R_7() {
if (jj_scan_token(TERM)) return true;
return false; return false;
} }
private boolean jj_3R_2() { private boolean jj_3R_4() {
if (jj_scan_token(TERM)) return true; if (jj_scan_token(TERM)) return true;
if (jj_scan_token(COLON)) return true; if (jj_scan_token(COLON)) return true;
return false; return false;
} }
private boolean jj_3_1() { private boolean jj_3_1() {
if (jj_3R_3()) return true;
return false;
}
private boolean jj_3R_6() {
return false;
}
private boolean jj_3R_3() {
if (jj_scan_token(TERM)) return true;
jj_lookingAhead = true;
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
jj_lookingAhead = false;
if (!jj_semLA || jj_3R_6()) return true;
Token xsp;
if (jj_3R_7()) return true;
while (true) {
xsp = jj_scanpos;
if (jj_3R_7()) { jj_scanpos = xsp; break; }
}
return false;
}
private boolean jj_3_3() {
Token xsp; Token xsp;
xsp = jj_scanpos; xsp = jj_scanpos;
if (jj_3R_2()) { if (jj_3R_4()) {
jj_scanpos = xsp; jj_scanpos = xsp;
if (jj_3R_3()) return true; if (jj_3R_5()) return true;
} }
return false; return false;
} }
private boolean jj_3_2() {
if (jj_3R_3()) return true;
return false;
}
private boolean jj_3R_5() {
if (jj_scan_token(STAR)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
/** Generated Token Manager. */ /** Generated Token Manager. */
public QueryParserTokenManager token_source; public QueryParserTokenManager token_source;
/** Current token. */ /** Current token. */
@ -462,8 +681,11 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
private int jj_ntk; private int jj_ntk;
private Token jj_scanpos, jj_lastpos; private Token jj_scanpos, jj_lastpos;
private int jj_la; private int jj_la;
/** Whether we are looking ahead. */
private boolean jj_lookingAhead = false;
private boolean jj_semLA;
private int jj_gen; private int jj_gen;
final private int[] jj_la1 = new int[23]; final private int[] jj_la1 = new int[28];
static private int[] jj_la1_0; static private int[] jj_la1_0;
static private int[] jj_la1_1; static private int[] jj_la1_1;
static { static {
@ -471,12 +693,12 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
jj_la1_init_1(); jj_la1_init_1();
} }
private static void jj_la1_init_0() { private static void jj_la1_init_0() {
jj_la1_0 = new int[] {0x6000,0x6000,0x38000,0x38000,0xfb4fe000,0x2400000,0x800000,0x800000,0x800000,0xfb4c0000,0x3a440000,0x4000000,0x4000000,0x800000,0xc0000000,0x0,0x0,0x0,0x0,0x800000,0x4000000,0x800000,0xfb440000,}; jj_la1_0 = new int[] {0x6000,0x6000,0x38000,0x38000,0xfb4f8000,0xfb4fe000,0xfb4fe000,0x2400000,0x800000,0x800000,0x800000,0xfb4c0000,0x3a440000,0x4000000,0x800000,0x4800000,0x4800000,0xc0000000,0x0,0x0,0x0,0x0,0x800000,0x4000000,0x800000,0x4800000,0x4800000,0xfb440000,};
} }
private static void jj_la1_init_1() { private static void jj_la1_init_1() {
jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x7,0x0,0x0,0x0,0x0,0x7,0x4,0x0,0x0,0x0,0x0,0xc0,0x8,0xc0,0x30,0x0,0x0,0x0,0x4,}; jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x7,0x7,0x7,0x0,0x0,0x0,0x0,0x7,0x4,0x0,0x0,0x0,0x0,0x0,0xc0,0x8,0xc0,0x30,0x0,0x0,0x0,0x0,0x0,0x4,};
} }
final private JJCalls[] jj_2_rtns = new JJCalls[1]; final private JJCalls[] jj_2_rtns = new JJCalls[3];
private boolean jj_rescan = false; private boolean jj_rescan = false;
private int jj_gc = 0; private int jj_gc = 0;
@ -486,7 +708,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
token = new Token(); token = new Token();
jj_ntk = -1; jj_ntk = -1;
jj_gen = 0; jj_gen = 0;
for (int i = 0; i < 23; i++) jj_la1[i] = -1; for (int i = 0; i < 28; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
} }
@ -495,8 +717,9 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
token_source.ReInit(stream); token_source.ReInit(stream);
token = new Token(); token = new Token();
jj_ntk = -1; jj_ntk = -1;
jj_lookingAhead = false;
jj_gen = 0; jj_gen = 0;
for (int i = 0; i < 23; i++) jj_la1[i] = -1; for (int i = 0; i < 28; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
} }
@ -506,7 +729,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
token = new Token(); token = new Token();
jj_ntk = -1; jj_ntk = -1;
jj_gen = 0; jj_gen = 0;
for (int i = 0; i < 23; i++) jj_la1[i] = -1; for (int i = 0; i < 28; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
} }
@ -516,7 +739,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
token = new Token(); token = new Token();
jj_ntk = -1; jj_ntk = -1;
jj_gen = 0; jj_gen = 0;
for (int i = 0; i < 23; i++) jj_la1[i] = -1; for (int i = 0; i < 28; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
} }
@ -579,7 +802,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
/** Get the specific Token. */ /** Get the specific Token. */
final public Token getToken(int index) { final public Token getToken(int index) {
Token t = token; Token t = jj_lookingAhead ? jj_scanpos : token;
for (int i = 0; i < index; i++) { for (int i = 0; i < index; i++) {
if (t.next != null) t = t.next; if (t.next != null) t = t.next;
else t = t.next = token_source.getNextToken(); else t = t.next = token_source.getNextToken();
@ -633,7 +856,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
la1tokens[jj_kind] = true; la1tokens[jj_kind] = true;
jj_kind = -1; jj_kind = -1;
} }
for (int i = 0; i < 23; i++) { for (int i = 0; i < 28; i++) {
if (jj_la1[i] == jj_gen) { if (jj_la1[i] == jj_gen) {
for (int j = 0; j < 32; j++) { for (int j = 0; j < 32; j++) {
if ((jj_la1_0[i] & (1<<j)) != 0) { if ((jj_la1_0[i] & (1<<j)) != 0) {
@ -672,7 +895,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
private void jj_rescan_token() { private void jj_rescan_token() {
jj_rescan = true; jj_rescan = true;
for (int i = 0; i < 1; i++) { for (int i = 0; i < 3; i++) {
try { try {
JJCalls p = jj_2_rtns[i]; JJCalls p = jj_2_rtns[i];
do { do {
@ -680,6 +903,8 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
jj_la = p.arg; jj_lastpos = jj_scanpos = p.first; jj_la = p.arg; jj_lastpos = jj_scanpos = p.first;
switch (i) { switch (i) {
case 0: jj_3_1(); break; case 0: jj_3_1(); break;
case 1: jj_3_2(); break;
case 2: jj_3_3(); break;
} }
} }
p = p.next; p = p.next;

View File

@ -27,18 +27,17 @@ package org.apache.solr.parser;
import java.io.StringReader; import java.io.StringReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.Version;
import org.apache.solr.search.SyntaxError; import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryParserConfigurationException;
public class QueryParser extends SolrQueryParserBase { public class QueryParser extends SolrQueryParserBase {
@ -46,9 +45,44 @@ public class QueryParser extends SolrQueryParserBase {
*/ */
static public enum Operator { OR, AND } static public enum Operator { OR, AND }
public QueryParser(Version matchVersion, String defaultField, QParser parser) { /** default split on whitespace behavior */
public static final boolean DEFAULT_SPLIT_ON_WHITESPACE = true;
public QueryParser(String defaultField, QParser parser) {
this(new FastCharStream(new StringReader(""))); this(new FastCharStream(new StringReader("")));
init(matchVersion, defaultField, parser); init(defaultField, parser);
}
/**
* @see #setSplitOnWhitespace(boolean)
*/
public boolean getSplitOnWhitespace() {
return splitOnWhitespace;
}
/**
* Whether query text should be split on whitespace prior to analysis.
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
*/
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
this.splitOnWhitespace = splitOnWhitespace;
}
private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE;
private static Set<Integer> disallowedPostMultiTerm
= new HashSet<Integer>(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR));
private static boolean allowedPostMultiTerm(int tokenKind) {
return disallowedPostMultiTerm.contains(tokenKind) == false;
}
@Override
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError {
if ((getAutoGeneratePhraseQueries() || fieldAutoGenPhraseQueries) && splitOnWhitespace == false) {
throw new QueryParserConfigurationException
("Field '" + field + "': autoGeneratePhraseQueries == true is disallowed when sow/splitOnWhitespace == false");
}
return super.newFieldQuery(analyzer, field, queryText, quoted, fieldAutoGenPhraseQueries);
} }
} }
@ -64,16 +98,14 @@ TOKEN_MGR_DECLS : {
<*> TOKEN : { <*> TOKEN : {
<#_NUM_CHAR: ["0"-"9"] > <#_NUM_CHAR: ["0"-"9"] >
// every character that follows a backslash is considered as an escaped character | <#_ESCAPED_CHAR: "\\" ~[] > // every character that follows a backslash is considered as an escaped character
| <#_ESCAPED_CHAR: "\\" ~[] > | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
"[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ] "[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ]
| <_ESCAPED_CHAR> ) > | <_ESCAPED_CHAR> ) >
| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" | "/" | "!") >
| <_ESCAPED_CHAR> | "-" | "+" | "/" | "!") > | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") > | <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) > | <#_SQUOTED_CHAR: ( ~[ "'", "\\" ] | <_ESCAPED_CHAR> ) >
| <#_SQUOTED_CHAR: ( ~[ "'", "\\" ] | <_ESCAPED_CHAR> ) >
} }
<DEFAULT, COMMENT> SKIP : { <DEFAULT, COMMENT> SKIP : {
@ -93,32 +125,31 @@ TOKEN_MGR_DECLS : {
< <_WHITESPACE>> < <_WHITESPACE>>
} }
<DEFAULT> TOKEN : { <DEFAULT> TOKEN : {
<AND: ("AND" | "&&") > <AND: ("AND" | "&&") >
| <OR: ("OR" | "||") > | <OR: ("OR" | "||") >
| <NOT: ("NOT" | "!") > | <NOT: ("NOT" | "!") >
| <PLUS: "+" > | <PLUS: "+" >
| <MINUS: "-" > | <MINUS: "-" >
| <BAREOPER: ("+"|"-"|"!") <_WHITESPACE> > | <BAREOPER: ("+"|"-"|"!") <_WHITESPACE> >
| <LPAREN: "(" > | <LPAREN: "(" >
| <RPAREN: ")" > | <RPAREN: ")" >
| <COLON: ":" > | <COLON: ":" >
| <STAR: "*" > | <STAR: "*" >
| <CARAT: "^" > : Boost | <CARAT: "^" > : Boost
| <QUOTED: "\"" (<_QUOTED_CHAR>)* "\""> | <QUOTED: "\"" (<_QUOTED_CHAR>)* "\"">
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* > | <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? > | <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
| <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) > | <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >
| <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > | <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
| <REGEXPTERM: "/" (~["*","/"] | "\\/" ) (~[ "/" ] | "\\/" )* "/" > | <REGEXPTERM: "/" (~["*","/"] | "\\/" ) (~[ "/" ] | "\\/" )* "/" >
| <RANGEIN_START: "[" > : Range | <RANGEIN_START: "[" > : Range
| <RANGEEX_START: "{" > : Range | <RANGEEX_START: "{" > : Range
// TODO: consider using token states instead of inlining SQUOTED // TODO: consider using token states instead of inlining SQUOTED
// | <SQUOTED: "'" (<_SQUOTED_CHAR>)* "'"> // | <SQUOTED: "'" (<_SQUOTED_CHAR>)* "'">
// | <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | <SQUOTED> | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* > // | <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | <SQUOTED> | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* >
| <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | ("'" (<_SQUOTED_CHAR>)* "'") | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* > | <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | ("'" (<_SQUOTED_CHAR>)* "'") | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* >
| <FILTER: "filter(" > | <FILTER: "filter(" >
} }
<Boost> TOKEN : { <Boost> TOKEN : {
@ -127,10 +158,10 @@ TOKEN_MGR_DECLS : {
<Range> TOKEN : { <Range> TOKEN : {
<RANGE_TO: "TO"> <RANGE_TO: "TO">
| <RANGEIN_END: "]"> : DEFAULT | <RANGEIN_END: "]"> : DEFAULT
| <RANGEEX_END: "}"> : DEFAULT | <RANGEEX_END: "}"> : DEFAULT
| <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\""> | <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
| <RANGE_GOOP: (~[ " ", "]", "}" ])+ > | <RANGE_GOOP: (~[ " ", "]", "}" ])+ >
} }
// * Query ::= ( Clause )* // * Query ::= ( Clause )*
@ -160,8 +191,7 @@ int Modifiers() : {
} }
// This makes sure that there is no garbage after the query string // This makes sure that there is no garbage after the query string
Query TopLevelQuery(String field) throws SyntaxError : Query TopLevelQuery(String field) throws SyntaxError : {
{
Query q; Query q;
} }
{ {
@ -174,27 +204,31 @@ Query TopLevelQuery(String field) throws SyntaxError :
Query Query(String field) throws SyntaxError : Query Query(String field) throws SyntaxError :
{ {
List<BooleanClause> clauses = new ArrayList<BooleanClause>(); List<BooleanClause> clauses = new ArrayList<BooleanClause>();
Query q, firstQuery=null; Query q;
int conj, mods; int conj, mods;
} }
{ {
mods=Modifiers() q=Clause(field)
{
addClause(clauses, CONJ_NONE, mods, q);
if (mods == MOD_NONE)
firstQuery=q;
}
( (
conj=Conjunction() mods=Modifiers() q=Clause(field) LOOKAHEAD(2)
MultiTerm(field, clauses)
| mods=Modifiers() q=Clause(field)
{ addClause(clauses, CONJ_NONE, mods, q); }
)
(
LOOKAHEAD(2)
MultiTerm(field, clauses)
| conj=Conjunction() mods=Modifiers() q=Clause(field)
{ addClause(clauses, conj, mods, q); } { addClause(clauses, conj, mods, q); }
)* )*
{ {
if (clauses.size() == 1 && firstQuery != null) if (clauses.size() == 1 && clauses.get(0).getOccur() == BooleanClause.Occur.SHOULD) {
Query firstQuery = clauses.get(0).getQuery();
if ( ! (firstQuery instanceof RawQuery) || ((RawQuery)firstQuery).getTermCount() == 1) {
return rawToNormal(firstQuery); return rawToNormal(firstQuery);
else {
return getBooleanQuery(clauses);
} }
} }
return getBooleanQuery(clauses);
}
} }
Query Clause(String field) throws SyntaxError : { Query Clause(String field) throws SyntaxError : {
@ -204,26 +238,22 @@ Query Clause(String field) throws SyntaxError : {
int flags = 0; int flags = 0;
} }
{ {
[ [
LOOKAHEAD(2) LOOKAHEAD(2)
( (
fieldToken=<TERM> <COLON> {field=discardEscapeChar(fieldToken.image);} fieldToken=<TERM> <COLON> { field = discardEscapeChar(fieldToken.image); }
| <STAR> <COLON> {field="*";} | <STAR> <COLON> { field = "*"; }
) )
] ]
( (
q=Term(field) q=Term(field)
| <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? | <LPAREN> q=Query(field) <RPAREN> [ <CARAT> boost=<NUMBER> ]
| (<FILTER> { flags=startFilter(); } q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? { q=getFilter(q); restoreFlags(flags); } ) | (<FILTER> { flags=startFilter(); } q=Query(field) <RPAREN> [ <CARAT> boost=<NUMBER> ] { q=getFilter(q); restoreFlags(flags); } )
| (localParams = <LPARAMS> (<CARAT> boost=<NUMBER>)? { q=getLocalParams(field, localParams.image); } ) | (localParams = <LPARAMS> [ <CARAT> boost=<NUMBER> ] { q=getLocalParams(field, localParams.image); } )
) )
{ return handleBoost(q, boost); } { return handleBoost(q, boost); }
} }
Query Term(String field) throws SyntaxError : { Query Term(String field) throws SyntaxError : {
Token term, boost=null, fuzzySlop=null, goop1, goop2; Token term, boost=null, fuzzySlop=null, goop1, goop2;
boolean prefix = false; boolean prefix = false;
@ -245,16 +275,17 @@ Query Term(String field) throws SyntaxError : {
| term=<NUMBER> | term=<NUMBER>
| term=<BAREOPER> { term.image = term.image.substring(0,1); } | term=<BAREOPER> { term.image = term.image.substring(0,1); }
) )
[ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] [
[ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ] <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
{ | fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } [ <CARAT> boost=<NUMBER> ]
q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp); ]
} { q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp); }
| ( ( <RANGEIN_START> {startInc=true;} | <RANGEEX_START> )
( goop1=<RANGE_GOOP>|goop1=<RANGE_QUOTED> ) | ( <RANGEIN_START> { startInc = true; } | <RANGEEX_START> )
( goop1=<RANGE_GOOP> | goop1=<RANGE_QUOTED> )
[ <RANGE_TO> ] [ <RANGE_TO> ]
( goop2=<RANGE_GOOP>|goop2=<RANGE_QUOTED> ) ( goop2=<RANGE_GOOP> | goop2=<RANGE_QUOTED> )
( <RANGEIN_END> {endInc=true;} | <RANGEEX_END>)) ( <RANGEIN_END> { endInc = true; } | <RANGEEX_END> )
[ <CARAT> boost=<NUMBER> ] [ <CARAT> boost=<NUMBER> ]
{ {
boolean startOpen=false; boolean startOpen=false;
@ -269,16 +300,53 @@ Query Term(String field) throws SyntaxError : {
} else if ("*".equals(goop2.image)) { } else if ("*".equals(goop2.image)) {
endOpen=true; endOpen=true;
} }
q = getRangeQuery(getField(field), startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc); q = getRangeQuery(getField(field),
startOpen ? null : discardEscapeChar(goop1.image),
endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
} }
| term=<QUOTED> | term=<QUOTED>
[ fuzzySlop=<FUZZY_SLOP> ] [
[ <CARAT> boost=<NUMBER> ] <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
{ | fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } [ <CARAT> boost=<NUMBER> ]
q = handleQuotedTerm(getField(field), term, fuzzySlop); ]
} { q = handleQuotedTerm(getField(field), term, fuzzySlop); }
) )
{ return handleBoost(q, boost); }
}
void MultiTerm(String field, List<BooleanClause> clauses) throws SyntaxError : {
Token text;
List<String> terms = null;
}
{
text=<TERM>
{ {
return handleBoost(q, boost); if (splitOnWhitespace) {
Query q = getFieldQuery(getField(field), discardEscapeChar(text.image), false, true);
addClause(clauses, CONJ_NONE, MOD_NONE, q);
} else {
terms = new ArrayList<String>();
terms.add(discardEscapeChar(text.image));
}
}
// Both lookaheads are required; the first lookahead vets the first following term and the second lookahead vets the rest
LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) })
(
LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) })
text=<TERM>
{
if (splitOnWhitespace) {
Query q = getFieldQuery(getField(field), discardEscapeChar(text.image), false, true);
addClause(clauses, CONJ_NONE, MOD_NONE, q);
} else {
terms.add(discardEscapeChar(text.image));
}
}
)+
{
if (splitOnWhitespace == false) {
Query q = getFieldQuery(getField(field), terms, true);
addMultiTermClause(clauses, q);
}
} }
} }

View File

@ -18,10 +18,12 @@ package org.apache.solr.parser;
import java.io.StringReader; import java.io.StringReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.reverse.ReverseStringFilter; import org.apache.lucene.analysis.reverse.ReverseStringFilter;
@ -41,7 +43,6 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.QueryBuilder; import org.apache.lucene.util.QueryBuilder;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.Operations;
@ -83,7 +84,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
public static final Operator OR_OPERATOR = Operator.OR; public static final Operator OR_OPERATOR = Operator.OR;
/** The default operator that parser uses to combine query terms */ /** The default operator that parser uses to combine query terms */
Operator operator = OR_OPERATOR; protected Operator operator = OR_OPERATOR;
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_REWRITE; MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_REWRITE;
boolean allowLeadingWildcard = true; boolean allowLeadingWildcard = true;
@ -133,16 +134,32 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
// internal: A simple raw fielded query // internal: A simple raw fielded query
public static class RawQuery extends Query { public static class RawQuery extends Query {
final SchemaField sfield; final SchemaField sfield;
final String externalVal; private final List<String> externalVals;
public RawQuery(SchemaField sfield, String externalVal) { public RawQuery(SchemaField sfield, String externalVal) {
this(sfield, Collections.singletonList(externalVal));
}
public RawQuery(SchemaField sfield, List<String> externalVals) {
this.sfield = sfield; this.sfield = sfield;
this.externalVal = externalVal; this.externalVals = externalVals;
}
public int getTermCount() {
return externalVals.size();
}
public List<String> getExternalVals() {
return externalVals;
}
public String getJoinedExternalVal() {
return externalVals.size() == 1 ? externalVals.get(0) : String.join(" ", externalVals);
} }
@Override @Override
public String toString(String field) { public String toString(String field) {
return "RAW(" + field + "," + externalVal + ")"; return "RAW(" + field + "," + getJoinedExternalVal() + ")";
} }
@Override @Override
@ -165,7 +182,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
public abstract Query TopLevelQuery(String field) throws ParseException, SyntaxError; public abstract Query TopLevelQuery(String field) throws ParseException, SyntaxError;
public void init(Version matchVersion, String defaultField, QParser parser) { public void init(String defaultField, QParser parser) {
this.schema = parser.getReq().getSchema(); this.schema = parser.getReq().getSchema();
this.parser = parser; this.parser = parser;
this.flags = parser.getFlags(); this.flags = parser.getFlags();
@ -406,17 +423,30 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
throw new RuntimeException("Clause cannot be both required and prohibited"); throw new RuntimeException("Clause cannot be both required and prohibited");
} }
/**
* Called from QueryParser's MultiTerm rule.
* Assumption: no conjunction or modifiers (conj == CONJ_NONE and mods == MOD_NONE)
*/
protected void addMultiTermClause(List<BooleanClause> clauses, Query q) {
// We might have been passed a null query; the term might have been
// filtered away by the analyzer.
if (q == null) {
return;
}
clauses.add(newBooleanClause(q, operator == AND_OPERATOR ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD));
}
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted) throws SyntaxError { boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError {
BooleanClause.Occur occur = operator == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; BooleanClause.Occur occur = operator == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
return createFieldQuery(analyzer, occur, field, queryText, quoted || autoGeneratePhraseQueries, phraseSlop); return createFieldQuery(analyzer, occur, field, queryText,
quoted || fieldAutoGenPhraseQueries || autoGeneratePhraseQueries, phraseSlop);
} }
/** /**
* Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}. * Base implementation delegates to {@link #getFieldQuery(String,String,boolean,boolean)}.
* This method may be overridden, for example, to return * This method may be overridden, for example, to return
* a SpanNearQuery instead of a PhraseQuery. * a SpanNearQuery instead of a PhraseQuery.
* *
@ -536,14 +566,21 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
} }
SchemaField sfield = null; SchemaField sfield = null;
List<String> fieldValues = null; List<RawQuery> fieldValues = null;
boolean onlyRawQueries = true;
boolean useTermsQuery = (flags & QParser.FLAG_FILTER)!=0 && clauses.size() > TERMS_QUERY_THRESHOLD; int allRawQueriesTermCount = 0;
int clausesAdded = 0; for (BooleanClause clause : clauses) {
if (clause.getQuery() instanceof RawQuery) {
allRawQueriesTermCount += ((RawQuery)clause.getQuery()).getTermCount();
} else {
onlyRawQueries = false;
}
}
boolean useTermsQuery = (flags & QParser.FLAG_FILTER)!=0 && allRawQueriesTermCount > TERMS_QUERY_THRESHOLD;
BooleanQuery.Builder booleanBuilder = newBooleanQuery(); BooleanQuery.Builder booleanBuilder = newBooleanQuery();
Map<SchemaField, List<String>> fmap = new HashMap<>(); Map<SchemaField, List<RawQuery>> fmap = new HashMap<>();
for (BooleanClause clause : clauses) { for (BooleanClause clause : clauses) {
Query subq = clause.getQuery(); Query subq = clause.getQuery();
@ -563,14 +600,14 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
// If this field isn't indexed, or if it is indexed and we want to use TermsQuery, then collect this value. // If this field isn't indexed, or if it is indexed and we want to use TermsQuery, then collect this value.
// We are currently relying on things like PointField not being marked as indexed in order to bypass // We are currently relying on things like PointField not being marked as indexed in order to bypass
// the "useTermQuery" check. // the "useTermQuery" check.
if (fieldValues == null && useTermsQuery || !sfield.indexed()) { if ((fieldValues == null && useTermsQuery) || !sfield.indexed()) {
fieldValues = new ArrayList<>(2); fieldValues = new ArrayList<>(2);
fmap.put(sfield, fieldValues); fmap.put(sfield, fieldValues);
} }
} }
if (fieldValues != null) { if (fieldValues != null) {
fieldValues.add(rawq.externalVal); fieldValues.add(rawq);
continue; continue;
} }
@ -578,33 +615,50 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
} }
} }
clausesAdded++;
booleanBuilder.add(clause); booleanBuilder.add(clause);
} }
for (Map.Entry<SchemaField,List<String>> entry : fmap.entrySet()) { for (Map.Entry<SchemaField,List<RawQuery>> entry : fmap.entrySet()) {
sfield = entry.getKey(); sfield = entry.getKey();
fieldValues = entry.getValue(); fieldValues = entry.getValue();
FieldType ft = sfield.getType(); FieldType ft = sfield.getType();
// TODO: pull more of this logic out to FieldType? We would need to be able to add clauses to our existing booleanBuilder. // TODO: pull more of this logic out to FieldType? We would need to be able to add clauses to our existing booleanBuilder.
if (sfield.indexed() && fieldValues.size() < TERMS_QUERY_THRESHOLD || fieldValues.size() == 1) { int termCount = fieldValues.stream().mapToInt(RawQuery::getTermCount).sum();
if ((sfield.indexed() && termCount < TERMS_QUERY_THRESHOLD) || termCount == 1) {
// use boolean query instead // use boolean query instead
for (String externalVal : fieldValues) { for (RawQuery rawq : fieldValues) {
Query subq = ft.getFieldQuery(this.parser, sfield, externalVal); Query subq;
clausesAdded++; if (ft.isTokenized() && sfield.indexed()) {
boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries();
subq = newFieldQuery(getAnalyzer(), sfield.getName(), rawq.getJoinedExternalVal(),
false, fieldAutoGenPhraseQueries);
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
} else {
for (String externalVal : rawq.getExternalVals()) {
subq = ft.getFieldQuery(this.parser, sfield, externalVal);
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD); booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
} }
}
}
} else { } else {
Query subq = ft.getSetQuery(this.parser, sfield, fieldValues); List<String> externalVals
if (fieldValues.size() == clauses.size()) return subq; // if this is everything, don't wrap in a boolean query = fieldValues.stream().flatMap(rawq -> rawq.getExternalVals().stream()).collect(Collectors.toList());
clausesAdded++; Query subq = ft.getSetQuery(this.parser, sfield, externalVals);
if (onlyRawQueries && termCount == allRawQueriesTermCount) return subq; // if this is everything, don't wrap in a boolean query
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD); booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
} }
} }
return booleanBuilder.build(); BooleanQuery bq = booleanBuilder.build();
if (bq.clauses().size() == 1) { // Unwrap single SHOULD query
BooleanClause clause = bq.clauses().iterator().next();
if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
return clause.getQuery();
}
}
return bq;
} }
@ -835,9 +889,26 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
// Create a "normal" query from a RawQuery (or just return the current query if it's not raw) // Create a "normal" query from a RawQuery (or just return the current query if it's not raw)
Query rawToNormal(Query q) { Query rawToNormal(Query q) {
if (!(q instanceof RawQuery)) return q; Query normal = q;
RawQuery rq = (RawQuery)q; if (q instanceof RawQuery) {
return rq.sfield.getType().getFieldQuery(parser, rq.sfield, rq.externalVal); RawQuery rawq = (RawQuery)q;
if (rawq.sfield.getType().isTokenized()) {
normal = rawq.sfield.getType().getFieldQuery(parser, rawq.sfield, rawq.getJoinedExternalVal());
} else {
FieldType ft = rawq.sfield.getType();
if (rawq.getTermCount() == 1) {
normal = ft.getFieldQuery(this.parser, rawq.sfield, rawq.getExternalVals().get(0));
} else {
BooleanQuery.Builder booleanBuilder = newBooleanQuery();
for (String externalVal : rawq.getExternalVals()) {
Query subq = ft.getFieldQuery(this.parser, rawq.sfield, externalVal);
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
}
normal = booleanBuilder.build();
}
}
}
return normal;
} }
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws SyntaxError { protected Query getFieldQuery(String field, String queryText, boolean quoted) throws SyntaxError {
@ -877,18 +948,84 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
FieldType ft = sf.getType(); FieldType ft = sf.getType();
// delegate to type for everything except tokenized fields // delegate to type for everything except tokenized fields
if (ft.isTokenized() && sf.indexed()) { if (ft.isTokenized() && sf.indexed()) {
return newFieldQuery(getAnalyzer(), field, queryText, quoted || (ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries())); boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries();
return newFieldQuery(getAnalyzer(), field, queryText, quoted, fieldAutoGenPhraseQueries);
} else { } else {
if (raw) { if (raw) {
return new RawQuery(sf, queryText); return new RawQuery(sf, queryText);
} else { } else {
return sf.getType().getFieldQuery(parser, sf, queryText); return ft.getFieldQuery(parser, sf, queryText);
} }
} }
} }
// default to a normal field query // default to a normal field query
return newFieldQuery(getAnalyzer(), field, queryText, quoted); return newFieldQuery(getAnalyzer(), field, queryText, quoted, false);
}
// Assumption: quoted is always false
protected Query getFieldQuery(String field, List<String> queryTerms, boolean raw) throws SyntaxError {
checkNullField(field);
SchemaField sf;
if (field.equals(lastFieldName)) {
// only look up the SchemaField on a field change... this helps with memory allocation of dynamic fields
// and large queries like foo_i:(1 2 3 4 5 6 7 8 9 10) when we are passed "foo_i" each time.
sf = lastField;
} else {
// intercept magic field name of "_" to use as a hook for our
// own functions.
if (field.charAt(0) == '_' && parser != null) {
MagicFieldName magic = MagicFieldName.get(field);
if (null != magic) {
subQParser = parser.subQuery(String.join(" ", queryTerms), magic.subParser);
return subQParser.getQuery();
}
}
lastFieldName = field;
sf = lastField = schema.getFieldOrNull(field);
}
if (sf != null) {
FieldType ft = sf.getType();
// delegate to type for everything except tokenized fields
if (ft.isTokenized() && sf.indexed()) {
String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms);
boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries();
return newFieldQuery(getAnalyzer(), field, queryText, false, fieldAutoGenPhraseQueries);
} else {
if (raw) {
return new RawQuery(sf, queryTerms);
} else {
if (queryTerms.size() == 1) {
return ft.getFieldQuery(parser, sf, queryTerms.get(0));
} else {
List<Query> subqs = new ArrayList<>();
for (String queryTerm : queryTerms) {
try {
subqs.add(ft.getFieldQuery(parser, sf, queryTerm));
} catch (Exception e) { // assumption: raw = false only when called from ExtendedDismaxQueryParser.getQuery()
// for edismax: ignore parsing failures
}
}
if (subqs.size() == 1) {
return subqs.get(0);
} else { // delay building boolean query until we must
final BooleanClause.Occur occur
= operator == AND_OPERATOR ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
BooleanQuery.Builder booleanBuilder = newBooleanQuery();
subqs.forEach(subq -> booleanBuilder.add(subq, occur));
return booleanBuilder.build();
}
}
}
}
}
// default to a normal field query
String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms);
return newFieldQuery(getAnalyzer(), field, queryText, false, false);
} }
protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){ protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){

View File

@ -17,6 +17,7 @@
package org.apache.solr.search; package org.apache.solr.search;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
@ -161,6 +162,8 @@ public class ExtendedDismaxQParser extends QParser {
// but always for unstructured implicit bqs created by getFieldQuery // but always for unstructured implicit bqs created by getFieldQuery
up.minShouldMatch = config.minShouldMatch; up.minShouldMatch = config.minShouldMatch;
up.setSplitOnWhitespace(config.splitOnWhitespace);
parsedUserQuery = parseOriginalQuery(up, mainUserQuery, clauses, config); parsedUserQuery = parseOriginalQuery(up, mainUserQuery, clauses, config);
if (parsedUserQuery == null) { if (parsedUserQuery == null) {
@ -307,6 +310,8 @@ public class ExtendedDismaxQParser extends QParser {
up.setRemoveStopFilter(true); up.setRemoveStopFilter(true);
query = up.parse(mainUserQuery); query = up.parse(mainUserQuery);
} }
} catch (QueryParserConfigurationException e) {
throw e; // Don't ignore configuration exceptions
} catch (Exception e) { } catch (Exception e) {
// ignore failure and reparse later after escaping reserved chars // ignore failure and reparse later after escaping reserved chars
up.exceptions = false; up.exceptions = false;
@ -545,6 +550,7 @@ public class ExtendedDismaxQParser extends QParser {
pp.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, getFieldBoosts(fields)); pp.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, getFieldBoosts(fields));
pp.setPhraseSlop(slop); pp.setPhraseSlop(slop);
pp.setRemoveStopFilter(true); // remove stop filter and keep stopwords pp.setRemoveStopFilter(true); // remove stop filter and keep stopwords
pp.setSplitOnWhitespace(config.splitOnWhitespace);
/* :TODO: reevaluate using makeDismax=true vs false... /* :TODO: reevaluate using makeDismax=true vs false...
* *
@ -976,6 +982,7 @@ public class ExtendedDismaxQParser extends QParser {
private String field; private String field;
private String val; private String val;
private String val2; private String val2;
private List<String> vals;
private boolean bool; private boolean bool;
private boolean bool2; private boolean bool2;
private float flt; private float flt;
@ -1036,6 +1043,7 @@ public class ExtendedDismaxQParser extends QParser {
this.type = quoted ? QType.PHRASE : QType.FIELD; this.type = quoted ? QType.PHRASE : QType.FIELD;
this.field = field; this.field = field;
this.val = val; this.val = val;
this.vals = null;
this.slop = getPhraseSlop(); // unspecified this.slop = getPhraseSlop(); // unspecified
return getAliasedQuery(); return getAliasedQuery();
} }
@ -1045,10 +1053,21 @@ public class ExtendedDismaxQParser extends QParser {
this.type = QType.PHRASE; this.type = QType.PHRASE;
this.field = field; this.field = field;
this.val = val; this.val = val;
this.vals = null;
this.slop = slop; this.slop = slop;
return getAliasedQuery(); return getAliasedQuery();
} }
@Override
protected Query getFieldQuery(String field, List<String> queryTerms, boolean raw) throws SyntaxError {
this.type = QType.FIELD;
this.field = field;
this.val = null;
this.vals = queryTerms;
this.slop = getPhraseSlop();
return getAliasedMultiTermQuery(queryTerms);
}
@Override @Override
protected Query getPrefixQuery(String field, String val) throws SyntaxError { protected Query getPrefixQuery(String field, String val) throws SyntaxError {
if (val.equals("") && field.equals("*")) { if (val.equals("") && field.equals("*")) {
@ -1057,11 +1076,17 @@ public class ExtendedDismaxQParser extends QParser {
this.type = QType.PREFIX; this.type = QType.PREFIX;
this.field = field; this.field = field;
this.val = val; this.val = val;
this.vals = null;
return getAliasedQuery(); return getAliasedQuery();
} }
@Override @Override
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted) throws SyntaxError { protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError {
if ((getAutoGeneratePhraseQueries() || fieldAutoGenPhraseQueries) && getSplitOnWhitespace() == false) {
throw new QueryParserConfigurationException
("Field '" + field + "': autoGeneratePhraseQueries == true is disallowed when sow/splitOnWhitespace == false");
}
Analyzer actualAnalyzer; Analyzer actualAnalyzer;
if (removeStopFilter) { if (removeStopFilter) {
if (nonStopFilterAnalyzerPerField == null) { if (nonStopFilterAnalyzerPerField == null) {
@ -1074,7 +1099,7 @@ public class ExtendedDismaxQParser extends QParser {
} else { } else {
actualAnalyzer = parser.getReq().getSchema().getFieldType(field).getQueryAnalyzer(); actualAnalyzer = parser.getReq().getSchema().getFieldType(field).getQueryAnalyzer();
} }
return super.newFieldQuery(actualAnalyzer, field, queryText, quoted); return super.newFieldQuery(actualAnalyzer, field, queryText, quoted, fieldAutoGenPhraseQueries);
} }
@Override @Override
@ -1083,6 +1108,7 @@ public class ExtendedDismaxQParser extends QParser {
this.field = field; this.field = field;
this.val = a; this.val = a;
this.val2 = b; this.val2 = b;
this.vals = null;
this.bool = startInclusive; this.bool = startInclusive;
this.bool2 = endInclusive; this.bool2 = endInclusive;
return getAliasedQuery(); return getAliasedQuery();
@ -1100,6 +1126,7 @@ public class ExtendedDismaxQParser extends QParser {
this.type = QType.WILDCARD; this.type = QType.WILDCARD;
this.field = field; this.field = field;
this.val = val; this.val = val;
this.vals = null;
return getAliasedQuery(); return getAliasedQuery();
} }
@ -1108,6 +1135,7 @@ public class ExtendedDismaxQParser extends QParser {
this.type = QType.FUZZY; this.type = QType.FUZZY;
this.field = field; this.field = field;
this.val = val; this.val = val;
this.vals = null;
this.flt = minSimilarity; this.flt = minSimilarity;
return getAliasedQuery(); return getAliasedQuery();
} }
@ -1158,6 +1186,128 @@ public class ExtendedDismaxQParser extends QParser {
} }
} }
/**
* Delegates to the super class unless the field has been specified
* as an alias -- in which case we recurse on each of
* the aliased fields, and the results are composed into a
* DisjunctionMaxQuery. (so yes: aliases which point at other
* aliases should work)
*/
protected Query getAliasedMultiTermQuery(List<String> queryTerms) throws SyntaxError {
Alias a = aliases.get(field);
this.validateCyclicAliasing(field);
if (a != null) {
List<Query> lst = getQueries(a);
if (lst == null || lst.size() == 0) {
return getQuery();
}
// make a DisjunctionMaxQuery in this case too... it will stop
// the "mm" processing from making everything required in the case
// that the query expanded to multiple clauses.
// DisMaxQuery.rewrite() removes itself if there is just a single clause anyway.
// if (lst.size()==1) return lst.get(0);
if (makeDismax) {
if (lst.get(0) instanceof BooleanQuery && allSameQueryStructure(lst)) {
BooleanQuery.Builder q = new BooleanQuery.Builder();
List<Query> subs = new ArrayList<>(lst.size());
for (int c = 0 ; c < ((BooleanQuery)lst.get(0)).clauses().size() ; ++c) {
subs.clear();
// Make a dismax query for each clause position in the boolean per-field queries.
for (int n = 0 ; n < lst.size() ; ++n) {
subs.add(((BooleanQuery)lst.get(n)).clauses().get(c).getQuery());
}
q.add(newBooleanClause(new DisjunctionMaxQuery(subs, a.tie), BooleanClause.Occur.SHOULD));
}
return q.build();
} else {
return new DisjunctionMaxQuery(lst, a.tie);
}
} else {
BooleanQuery.Builder q = new BooleanQuery.Builder();
for (Query sub : lst) {
q.add(sub, BooleanClause.Occur.SHOULD);
}
return q.build();
}
} else {
// verify that a fielded query is actually on a field that exists... if not,
// then throw an exception to get us out of here, and we'll treat it like a
// literal when we try the escape+re-parse.
if (exceptions) {
FieldType ft = schema.getFieldTypeNoEx(field);
if (ft == null && null == MagicFieldName.get(field)) {
throw unknownField;
}
}
return getQuery();
}
}
/** Recursively examines the given query list for identical structure in all queries. */
private boolean allSameQueryStructure(List<Query> lst) {
boolean allSame = true;
Query firstQuery = lst.get(0);
for (int n = 1 ; n < lst.size(); ++n) {
Query nthQuery = lst.get(n);
if (nthQuery.getClass() != firstQuery.getClass()) {
allSame = false;
break;
}
if (firstQuery instanceof BooleanQuery) {
List<BooleanClause> firstBooleanClauses = ((BooleanQuery)firstQuery).clauses();
List<BooleanClause> nthBooleanClauses = ((BooleanQuery)nthQuery).clauses();
if (firstBooleanClauses.size() != nthBooleanClauses.size()) {
allSame = false;
break;
}
for (int c = 0 ; c < firstBooleanClauses.size() ; ++c) {
if (nthBooleanClauses.get(c).getQuery().getClass() != firstBooleanClauses.get(c).getQuery().getClass()
|| nthBooleanClauses.get(c).getOccur() != firstBooleanClauses.get(c).getOccur()) {
allSame = false;
break;
}
if (firstBooleanClauses.get(c).getQuery() instanceof BooleanQuery && ! allSameQueryStructure
(Arrays.asList(firstBooleanClauses.get(c).getQuery(), nthBooleanClauses.get(c).getQuery()))) {
allSame = false;
break;
}
}
}
}
return allSame;
}
@Override
protected void addMultiTermClause(List<BooleanClause> clauses, Query q) {
// We might have been passed a null query; the terms might have been filtered away by the analyzer.
if (q == null) {
return;
}
boolean required = operator == AND_OPERATOR;
BooleanClause.Occur occur = required ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
if (q instanceof BooleanQuery) {
boolean allOptionalDisMaxQueries = true;
for (BooleanClause c : ((BooleanQuery)q).clauses()) {
if (c.getOccur() != BooleanClause.Occur.SHOULD || ! (c.getQuery() instanceof DisjunctionMaxQuery)) {
allOptionalDisMaxQueries = false;
break;
}
}
if (allOptionalDisMaxQueries) {
// getAliasedMultiTermQuery() constructed a BooleanQuery containing only SHOULD DisjunctionMaxQuery-s.
// Unwrap the query and add a clause for each contained DisMax query.
for (BooleanClause c : ((BooleanQuery)q).clauses()) {
clauses.add(newBooleanClause(c.getQuery(), occur));
}
return;
}
}
clauses.add(newBooleanClause(q, occur));
}
/** /**
* Validate there is no cyclic referencing in the aliasing * Validate there is no cyclic referencing in the aliasing
*/ */
@ -1212,7 +1362,12 @@ public class ExtendedDismaxQParser extends QParser {
switch (type) { switch (type) {
case FIELD: // fallthrough case FIELD: // fallthrough
case PHRASE: case PHRASE:
Query query = super.getFieldQuery(field, val, type == QType.PHRASE, false); Query query;
if (val == null) {
query = super.getFieldQuery(field, vals, false);
} else {
query = super.getFieldQuery(field, val, type == QType.PHRASE, false);
}
// Boolean query on a whitespace-separated string // Boolean query on a whitespace-separated string
// If these were synonyms we would have a SynonymQuery // If these were synonyms we would have a SynonymQuery
if (query instanceof BooleanQuery) { if (query instanceof BooleanQuery) {
@ -1248,6 +1403,8 @@ public class ExtendedDismaxQParser extends QParser {
} }
return null; return null;
} catch (QueryParserConfigurationException e) {
throw e; // Don't ignore configuration exceptions
} catch (Exception e) { } catch (Exception e) {
// an exception here is due to the field query not being compatible with the input text // an exception here is due to the field query not being compatible with the input text
// for example, passing a string to a numeric field. // for example, passing a string to a numeric field.
@ -1479,6 +1636,8 @@ public class ExtendedDismaxQParser extends QParser {
protected String[] boostFuncs; protected String[] boostFuncs;
protected boolean splitOnWhitespace;
public ExtendedDismaxConfiguration(SolrParams localParams, public ExtendedDismaxConfiguration(SolrParams localParams,
SolrParams params, SolrQueryRequest req) { SolrParams params, SolrQueryRequest req) {
solrParams = SolrParams.wrapDefaults(localParams, params); solrParams = SolrParams.wrapDefaults(localParams, params);
@ -1522,6 +1681,8 @@ public class ExtendedDismaxQParser extends QParser {
boostFuncs = solrParams.getParams(DisMaxParams.BF); boostFuncs = solrParams.getParams(DisMaxParams.BF);
multBoosts = solrParams.getParams(DMP.MULT_BOOST); multBoosts = solrParams.getParams(DMP.MULT_BOOST);
splitOnWhitespace = solrParams.getBool(QueryParsing.SPLIT_ON_WHITESPACE, SolrQueryParser.DEFAULT_SPLIT_ON_WHITESPACE);
} }
/** /**
* *

View File

@ -19,6 +19,7 @@ package org.apache.solr.search;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
/** /**
@ -46,6 +47,8 @@ public class LuceneQParser extends QParser {
lparser.setDefaultOperator lparser.setDefaultOperator
(QueryParsing.getQueryParserDefaultOperator(getReq().getSchema(), (QueryParsing.getQueryParserDefaultOperator(getReq().getSchema(),
getParam(QueryParsing.OP))); getParam(QueryParsing.OP)));
lparser.setSplitOnWhitespace(StrUtils.parseBool
(getParam(QueryParsing.SPLIT_ON_WHITESPACE), SolrQueryParser.DEFAULT_SPLIT_ON_WHITESPACE));
return lparser.parse(qstr); return lparser.parse(qstr);
} }

View File

@ -28,6 +28,8 @@ import java.util.List;
* <br>Other parameters:<ul> * <br>Other parameters:<ul>
* <li>q.op - the default operator "OR" or "AND"</li> * <li>q.op - the default operator "OR" or "AND"</li>
* <li>df - the default field name</li> * <li>df - the default field name</li>
* <li>sow - split on whitespace prior to analysis, boolean,
* default=<code>{@value org.apache.solr.search.SolrQueryParser#DEFAULT_SPLIT_ON_WHITESPACE}</code></li>
* </ul> * </ul>
* <br>Example: <code>{!lucene q.op=AND df=text sort='price asc'}myfield:foo +bar -baz</code> * <br>Example: <code>{!lucene q.op=AND df=text sort='price asc'}myfield:foo +bar -baz</code>
*/ */

View File

@ -0,0 +1,24 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
public class QueryParserConfigurationException extends IllegalArgumentException {
public QueryParserConfigurationException(String message) {
super(message);
}
}

View File

@ -51,6 +51,7 @@ public class QueryParsing {
public static final String F = "f"; // field that a query or command pertains to public static final String F = "f"; // field that a query or command pertains to
public static final String TYPE = "type";// parser for this query or command public static final String TYPE = "type";// parser for this query or command
public static final String DEFTYPE = "defType"; // default parser for any direct subqueries public static final String DEFTYPE = "defType"; // default parser for any direct subqueries
public static final String SPLIT_ON_WHITESPACE = "sow"; // Whether to split on whitespace prior to analysis
public static final String LOCALPARAM_START = "{!"; public static final String LOCALPARAM_START = "{!";
public static final char LOCALPARAM_END = '}'; public static final char LOCALPARAM_END = '}';
// true if the value was specified by the "v" param (i.e. v=myval, or v=$param) // true if the value was specified by the "v" param (i.e. v=myval, or v=$param)

View File

@ -25,7 +25,7 @@ import org.apache.solr.parser.QueryParser;
public class SolrQueryParser extends QueryParser { public class SolrQueryParser extends QueryParser {
public SolrQueryParser(QParser parser, String defaultField) { public SolrQueryParser(QParser parser, String defaultField) {
super(parser.getReq().getCore().getSolrConfig().luceneMatchVersion, defaultField, parser); super(defaultField, parser);
} }
} }

View File

@ -0,0 +1,13 @@
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
US, U.S., U S, USA, U.S.A., U S A, United States, United States of America

View File

@ -0,0 +1,50 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<schema name="test-multi-word-synonyms" version="1.6">
<fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
<field name="signatureField" type="string" indexed="true" stored="false"/>
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SynonymGraphFilterFactory"
synonyms="multiword-synonyms.txt"
tokenizerFactory="solr.StandardTokenizerFactory"
ignoreCase="true"
expand="true"/>
</analyzer>
</fieldType>
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
<uniqueKey>id</uniqueKey>
<field name="_version_" type="long" indexed="false" stored="false" docValues="true"/>
<field name="text" type="text" indexed="true" stored="true"/>
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
</schema>

View File

@ -29,3 +29,5 @@ Television, Televisions, TV, TVs
# Synonym mappings can be used for spelling correction too # Synonym mappings can be used for spelling correction too
pixima => pixma pixima => pixma
# multiword synonyms
wi fi => wifi

View File

@ -16,7 +16,9 @@
*/ */
package org.apache.solr.search; package org.apache.solr.search;
import java.util.Arrays;
import java.util.HashSet; import java.util.HashSet;
import java.util.Map;
import java.util.Random; import java.util.Random;
import java.util.Set; import java.util.Set;
@ -32,9 +34,11 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.TextField;
import org.apache.solr.util.SolrPluginUtils; import org.apache.solr.util.SolrPluginUtils;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import org.noggit.ObjectBuilder;
public class TestExtendedDismaxParser extends SolrTestCaseJ4 { public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
@ -62,7 +66,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
"foo_i", "8" "foo_i", "8"
)); ));
assertU(adoc("id", "47", "trait_ss", "Pig", assertU(adoc("id", "47", "trait_ss", "Pig",
"text", "line up and fly directly at the enemy death cannons, clogging them with wreckage!")); "text_sw", "line up and fly directly at the enemy death cannons, clogging them with wreckage!"));
assertU(adoc("id", "48", "text_sw", "this has gigabyte potential", "foo_i","100")); assertU(adoc("id", "48", "text_sw", "this has gigabyte potential", "foo_i","100"));
assertU(adoc("id", "49", "text_sw", "start the big apple end", "foo_i","-100")); assertU(adoc("id", "49", "text_sw", "start the big apple end", "foo_i","-100"));
assertU(adoc("id", "50", "text_sw", "start new big city end")); assertU(adoc("id", "50", "text_sw", "start new big city end"));
@ -88,48 +92,52 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
assertU(adoc("id", "69", "text_sw", "ties barbie")); assertU(adoc("id", "69", "text_sw", "ties barbie"));
assertU(adoc("id", "70", "text_sw", "hair")); assertU(adoc("id", "70", "text_sw", "hair"));
assertU(adoc("id", "71", "text_sw", "ties")); assertU(adoc("id", "71", "text_sw", "ties"));
assertU(adoc("id", "72", "text_sw", "wifi ATM"));
assertU(commit()); assertU(commit());
} }
@Test @Test
public void testSyntax() throws Exception { public void testSyntax() throws Exception {
for (String sow : Arrays.asList("true", "false")) {
// a bare * should be treated as *:* // a bare * should be treated as *:*
assertJQ(req("defType","edismax", "q","*", "df","doesnotexist_s") assertJQ(req("defType", "edismax", "q", "*", "df", "doesnotexist_s", "sow", sow)
,"/response/docs/[0]==" // make sure we get something... , "/response/docs/[0]==" // make sure we get something...
); );
assertJQ(req("defType","edismax", "q","doesnotexist_s:*") assertJQ(req("defType", "edismax", "q", "doesnotexist_s:*", "sow", sow)
,"/response/numFound==0" // nothing should be found , "/response/numFound==0" // nothing should be found
); );
assertJQ(req("defType","edismax","q","doesnotexist_s:*") assertJQ(req("defType", "edismax", "q", "doesnotexist_s:*", "sow", sow)
,"/response/numFound==0" // nothing should be found , "/response/numFound==0" // nothing should be found
); );
assertJQ(req("defType","edismax","q","doesnotexist_s:( * * * )") assertJQ(req("defType", "edismax", "q", "doesnotexist_s:( * * * )", "sow", sow)
,"/response/numFound==0" // nothing should be found , "/response/numFound==0" // nothing should be found
); );
} }
}
public void testTrailingOperators() throws Exception { public void testTrailingOperators() throws Exception {
for (String sow : Arrays.asList("true", "false")) {
// really just test that exceptions aren't thrown by // really just test that exceptions aren't thrown by
// single + - // single + -
assertJQ(req("defType","edismax", "q","-") assertJQ(req("defType", "edismax", "q", "-", "df", "text_sw", "sow", sow)
,"/response=="); , "/response==");
assertJQ(req("defType","edismax", "q","+") assertJQ(req("defType", "edismax", "q", "+", "df", "text_sw", "sow", sow)
,"/response=="); , "/response==");
assertJQ(req("defType","edismax", "q","+ - +") assertJQ(req("defType", "edismax", "q", "+ - +", "df", "text_sw", "sow", sow)
,"/response=="); , "/response==");
assertJQ(req("defType","edismax", "q","- + -") assertJQ(req("defType", "edismax", "q", "- + -", "df", "text_sw", "sow", sow)
,"/response=="); , "/response==");
assertJQ(req("defType","edismax", "q","id:47 +") assertJQ(req("defType", "edismax", "q", "id:47 +", "df", "text_sw", "sow", sow)
,"/response/numFound==1"); , "/response/numFound==1");
assertJQ(req("defType","edismax", "q","id:47 -") assertJQ(req("defType", "edismax", "q", "id:47 -", "df", "text_sw", "sow", sow)
,"/response/numFound==1"); , "/response/numFound==1");
Random r = random(); Random r = random();
for (int i=0; i<100; i++) { for (int i=0; i<100; i++) {
@ -144,42 +152,49 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
} }
String q = sb.toString(); String q = sb.toString();
assertJQ(req("defType","edismax", "q",q) assertJQ(req("defType", "edismax", "q", q, "df", "text_sw", "sow", sow)
,"/response=="); , "/response==");
}
} }
} }
public void testLowercaseOperators() { public void testLowercaseOperators() {
for (String sow : Arrays.asList("true", "false")) {
assertQ("Upper case operator", assertQ("Upper case operator",
req("q","Zapp AND Brannigan", req("q", "Zapp AND Brannigan",
"qf", "name", "qf", "name",
"lowercaseOperators", "false", "lowercaseOperators", "false",
"defType","edismax") "sow", sow,
,"*[count(//doc)=1]"); "defType", "edismax")
, "*[count(//doc)=1]");
assertQ("Upper case operator, allow lowercase", assertQ("Upper case operator, allow lowercase",
req("q","Zapp AND Brannigan", req("q", "Zapp AND Brannigan",
"qf", "name", "qf", "name",
"lowercaseOperators", "true", "lowercaseOperators", "true",
"defType","edismax") "sow", sow,
,"*[count(//doc)=1]"); "defType", "edismax")
, "*[count(//doc)=1]");
assertQ("Lower case operator, don't allow lowercase operators", assertQ("Lower case operator, don't allow lowercase operators",
req("q","Zapp and Brannigan", req("q", "Zapp and Brannigan",
"qf", "name", "qf", "name",
"q.op", "AND", "q.op", "AND",
"lowercaseOperators", "false", "lowercaseOperators", "false",
"defType","edismax") "sow", sow,
,"*[count(//doc)=0]"); "defType", "edismax")
, "*[count(//doc)=0]");
assertQ("Lower case operator, allow lower case operators", assertQ("Lower case operator, allow lower case operators",
req("q","Zapp and Brannigan", req("q", "Zapp and Brannigan",
"qf", "name", "qf", "name",
"q.op", "AND", "q.op", "AND",
"lowercaseOperators", "true", "lowercaseOperators", "true",
"defType","edismax") "sow", sow,
,"*[count(//doc)=1]"); "defType", "edismax")
, "*[count(//doc)=1]");
}
} }
// test the edismax query parser based on the dismax parser // test the edismax query parser based on the dismax parser
@ -249,42 +264,42 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
, twor , twor
); );
assertQ(req("defType", "edismax", "qf", "name title subject text", assertQ(req("defType", "edismax", "qf", "name title subject text_sw",
"q","op"), twor "q","op"), twor
); );
assertQ(req("defType", "edismax", assertQ(req("defType", "edismax",
"qf", "name title subject text", "qf", "name title subject text_sw",
"q.op", "AND", "q.op", "AND",
"q","Order op"), oner "q","Order op"), oner
); );
assertQ(req("defType", "edismax", assertQ(req("defType", "edismax",
"qf", "name title subject text", "qf", "name title subject text_sw",
"q.op", "OR", "q.op", "OR",
"q","Order op"), twor "q","Order op"), twor
); );
assertQ(req("defType", "edismax", "qf", "name title subject text", assertQ(req("defType", "edismax", "qf", "name title subject text_sw",
"q","Order AND op"), oner "q","Order AND op"), oner
); );
assertQ(req("defType", "edismax", "qf", "name title subject text", assertQ(req("defType", "edismax", "qf", "name title subject text_sw",
"q","Order and op"), oner "q","Order and op"), oner
); );
assertQ(req("defType", "edismax", "qf", "name title subject text", assertQ(req("defType", "edismax", "qf", "name title subject text_sw",
"q","+Order op"), oner "q","+Order op"), oner
); );
assertQ(req("defType", "edismax", "qf", "name title subject text", assertQ(req("defType", "edismax", "qf", "name title subject text_sw",
"q","Order OR op"), twor "q","Order OR op"), twor
); );
assertQ(req("defType", "edismax", "qf", "name title subject text", assertQ(req("defType", "edismax", "qf", "name title subject text_sw",
"q","Order or op"), twor "q","Order or op"), twor
); );
assertQ(req("defType", "edismax", "qf", "name title subject text", assertQ(req("defType", "edismax", "qf", "name title subject text_sw",
"q","*:*"), allr "q","*:*"), allr
); );
assertQ(req("defType", "edismax", "qf", "name title subject text", assertQ(req("defType", "edismax", "qf", "name title subject text_sw",
"q","star OR (-star)"), allr "q","star OR (-star)"), allr
); );
assertQ(req("defType", "edismax", "qf", "name title subject text", assertQ(req("defType", "edismax", "qf", "name title subject text_sw",
"q","id:42 OR (-id:42)"), allr "q","id:42 OR (-id:42)"), allr
); );
@ -536,7 +551,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
"//str[@name='parsedquery_toString'][.='+(id:42)^5.0']"); "//str[@name='parsedquery_toString'][.='+(id:42)^5.0']");
assertQ(req("defType","edismax", "uf","-*", "q","cannons"), assertQ(req("defType","edismax", "uf","-*", "q","cannons", "qf","text_sw"),
oner); oner);
assertQ(req("defType","edismax", "uf","* -id", "q","42", "qf", "id"), oner); assertQ(req("defType","edismax", "uf","* -id", "q","42", "qf", "id"), oner);
@ -870,7 +885,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
"*[count(//doc)=3]"); "*[count(//doc)=3]");
assertQ( assertQ(
"Might be double-escaping a client-escaped colon", "Might be double-escaping a client-escaped colon",
req("q", "text_sw:(theos OR thistokenhasa\\:preescapedcolon OR theou)", "defType", "edismax", "qf", "text"), req("q", "text_sw:(theos OR thistokenhasa\\:preescapedcolon OR theou)", "defType", "edismax", "qf", "text_sw"),
"*[count(//doc)=3]"); "*[count(//doc)=3]");
} }
@ -1032,56 +1047,56 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
// "line up and fly directly at the enemy death cannons, clogging them with wreckage!" // "line up and fly directly at the enemy death cannons, clogging them with wreckage!"
assertQ("test default operator with mm (AND + 0% => 0 hits)", assertQ("test default operator with mm (AND + 0% => 0 hits)",
req("q", "(line notfound) OR notfound", req("q", "(line notfound) OR notfound",
"qf", "text", "qf", "text_sw",
"q.op", "AND", "q.op", "AND",
"mm", "0%", "mm", "0%",
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=0]"); , "*[count(//doc)=0]");
assertQ("test default operator with mm (OR + 0% => 1 hit)", assertQ("test default operator with mm (OR + 0% => 1 hit)",
req("q", "line notfound OR notfound", req("q", "line notfound OR notfound",
"qf", "text", "qf", "text_sw",
"q.op", "OR", "q.op", "OR",
"mm", "0%", "mm", "0%",
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=1]"); , "*[count(//doc)=1]");
assertQ("test default operator with mm (OR + 100% => 0 hits)", assertQ("test default operator with mm (OR + 100% => 0 hits)",
req("q", "line notfound OR notfound", req("q", "line notfound OR notfound",
"qf", "text", "qf", "text_sw",
"q.op", "OR", "q.op", "OR",
"mm", "100%", "mm", "100%",
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=0]"); , "*[count(//doc)=0]");
assertQ("test default operator with mm (OR + 35% => 1 hit)", assertQ("test default operator with mm (OR + 35% => 1 hit)",
req("q", "line notfound notfound2 OR notfound", req("q", "line notfound notfound2 OR notfound",
"qf", "text", "qf", "text_sw",
"q.op", "OR", "q.op", "OR",
"mm", "35%", "mm", "35%",
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=1]"); , "*[count(//doc)=1]");
assertQ("test default operator with mm (OR + 75% => 0 hits)", assertQ("test default operator with mm (OR + 75% => 0 hits)",
req("q", "line notfound notfound2 OR notfound3", req("q", "line notfound notfound2 OR notfound3",
"qf", "text", "qf", "text_sw",
"q.op", "OR", "q.op", "OR",
"mm", "75%", "mm", "75%",
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=0]"); , "*[count(//doc)=0]");
assertQ("test default operator with mm (AND + 0% => 1 hit)", assertQ("test default operator with mm (AND + 0% => 1 hit)",
req("q", "(line enemy) OR notfound", req("q", "(line enemy) OR notfound",
"qf", "text", "qf", "text_sw",
"q.op", "AND", "q.op", "AND",
"mm", "0%", "mm", "0%",
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=1]"); , "*[count(//doc)=1]");
assertQ("test default operator with mm (AND + 50% => 1 hit)", assertQ("test default operator with mm (AND + 50% => 1 hit)",
req("q", "(line enemy) OR (line notfound) OR (death cannons) OR (death notfound)", req("q", "(line enemy) OR (line notfound) OR (death cannons) OR (death notfound)",
"qf", "text", "qf", "text_sw",
"q.op", "AND", "q.op", "AND",
"mm", "50%", "mm", "50%",
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=1]"); , "*[count(//doc)=1]");
assertQ("test default operator with mm (AND + 75% => 0 hits)", assertQ("test default operator with mm (AND + 75% => 0 hits)",
req("q", "(line enemy) OR (line notfound) OR (death cannons) OR (death notfound)", req("q", "(line enemy) OR (line notfound) OR (death cannons) OR (death notfound)",
"qf", "text", "qf", "text_sw",
"q.op", "AND", "q.op", "AND",
"mm", "75%", "mm", "75%",
"defType", "edismax") "defType", "edismax")
@ -1092,10 +1107,20 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
* Test that minShouldMatch applies to Optional terms only * Test that minShouldMatch applies to Optional terms only
*/ */
public void testMinShouldMatchOptional() throws Exception { public void testMinShouldMatchOptional() throws Exception {
for (String sow : Arrays.asList("true", "false")) {
assertQ("test minShouldMatch (top level optional terms only)", assertQ("test minShouldMatch (top level optional terms only)",
req("q", "stocks oil gold", // +(((text_sw:stock) (text_sw:oil) (text_sw:gold))~1) req("q", "stocks oil gold", // +(((text_sw:stock) (text_sw:oil) (text_sw:gold))~1)
"qf", "text_sw", "qf", "text_sw",
"mm", "50%", "mm", "50%",
"sow", sow,
"defType", "edismax")
, "*[count(//doc)=4]");
assertQ("test minShouldMatch (top level optional terms only and sow=false)",
req("q", "stocks oil gold", // +(((text_sw:stock) (text_sw:oil) (text_sw:gold))~1)
"qf", "text_sw",
"mm", "50%",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=4]"); , "*[count(//doc)=4]");
@ -1103,6 +1128,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
req("q", "stocks oil gold -stockade", // +(((text_sw:stock) (text_sw:oil) (text_sw:gold) -(text_sw:stockad))~1) req("q", "stocks oil gold -stockade", // +(((text_sw:stock) (text_sw:oil) (text_sw:gold) -(text_sw:stockad))~1)
"qf", "text_sw", "qf", "text_sw",
"mm", "50%", "mm", "50%",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=3]"); , "*[count(//doc)=3]");
@ -1110,6 +1136,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
req("q", "stocks gold -stockade", // +(((text_sw:stock) (text_sw:oil) (text_sw:gold) -(text_sw:stockad))~2) req("q", "stocks gold -stockade", // +(((text_sw:stock) (text_sw:oil) (text_sw:gold) -(text_sw:stockad))~2)
"qf", "text_sw", "qf", "text_sw",
"mm", "100%", "mm", "100%",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=1]"); , "*[count(//doc)=1]");
@ -1117,6 +1144,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
req("q", "stocks AND oil", // +(+(text_sw:stock) +(text_sw:oil)) req("q", "stocks AND oil", // +(+(text_sw:stock) +(text_sw:oil))
"qf", "text_sw", "qf", "text_sw",
"mm", "50%", "mm", "50%",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=1]"); , "*[count(//doc)=1]");
@ -1124,6 +1152,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
req("q", "oil gold +stocks", // +(((text_sw:oil) (text_sw:gold) +(text_sw:stock))~1) req("q", "oil gold +stocks", // +(((text_sw:oil) (text_sw:gold) +(text_sw:stock))~1)
"qf", "text_sw", "qf", "text_sw",
"mm", "50%", "mm", "50%",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=3]"); , "*[count(//doc)=3]");
@ -1131,6 +1160,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
req("q", "(snake OR stocks) oil", req("q", "(snake OR stocks) oil",
"qf", "text_sw", "qf", "text_sw",
"mm", "100%", "mm", "100%",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=2]"); , "*[count(//doc)=2]");
@ -1143,6 +1173,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
"qf", "text_sw", "qf", "text_sw",
"q.op", "OR", "q.op", "OR",
"mm", "100%", "mm", "100%",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=0]"); , "*[count(//doc)=0]");
assertQ("test minShouldMatch (top level optional with explicit OR without parens)", assertQ("test minShouldMatch (top level optional with explicit OR without parens)",
@ -1150,6 +1181,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
"qf", "text_sw", "qf", "text_sw",
"q.op", "AND", "q.op", "AND",
"mm", "100%", "mm", "100%",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=0]"); , "*[count(//doc)=0]");
@ -1158,18 +1190,22 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
req("q", "barbie OR (hair AND nonexistentword)", req("q", "barbie OR (hair AND nonexistentword)",
"qf", "text_sw", "qf", "text_sw",
"mm", "1<-1", "mm", "1<-1",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=3]"); , "*[count(//doc)=3]");
} }
}
/* SOLR-8812 */ /* SOLR-8812 */
@Test @Test
public void testDefaultMM() throws Exception { public void testDefaultMM() throws Exception {
// Ensure MM is off when explicit operators (+/-/OR/NOT) are used and no explicit mm spec is specified. // Ensure MM is off when explicit operators (+/-/OR/NOT) are used and no explicit mm spec is specified.
for (String sow : Arrays.asList("true", "false")) {
assertQ("Explicit OR in query with no explicit mm and q.op=AND => mm = 0%", assertQ("Explicit OR in query with no explicit mm and q.op=AND => mm = 0%",
req("q", "oil OR stocks", req("q", "oil OR stocks",
"qf", "text_sw", "qf", "text_sw",
"q.op", "AND", "q.op", "AND",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=4]"); , "*[count(//doc)=4]");
assertQ("Explicit 'or' in query with lowercaseOperators=true, no explicit mm and q.op=AND => mm = 0%", assertQ("Explicit 'or' in query with lowercaseOperators=true, no explicit mm and q.op=AND => mm = 0%",
@ -1177,39 +1213,46 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
"qf", "text_sw", "qf", "text_sw",
"q.op", "AND", "q.op", "AND",
"lowercaseOperators", "true", "lowercaseOperators", "true",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=4]"); , "*[count(//doc)=4]");
assertQ("Explicit OR in query with no explicit mm and no explicit q.op => mm = 0%", assertQ("Explicit OR in query with no explicit mm and no explicit q.op => mm = 0%",
req("q", "oil OR stocks", req("q", "oil OR stocks",
"qf", "text_sw", "qf", "text_sw",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=4]"); , "*[count(//doc)=4]");
assertQ("No operator in query with no explicit mm and q.op=OR => mm = 0%", assertQ("No operator in query with no explicit mm and q.op=OR => mm = 0%",
req("q", "oil stocks", req("q", "oil stocks",
"qf", "text_sw", "qf", "text_sw",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=4]"); , "*[count(//doc)=4]");
assertQ("No operator in query with no explicit mm and q.op=AND => mm = 100%", assertQ("No operator in query with no explicit mm and q.op=AND => mm = 100%",
req("q", "oil stocks", req("q", "oil stocks",
"qf", "text_sw", "qf", "text_sw",
"q.op", "AND", "q.op", "AND",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=1]"); , "*[count(//doc)=1]");
assertQ("No operator in query with no explicit mm and q.op=OR => mm = 0%", assertQ("No operator in query with no explicit mm and q.op=OR => mm = 0%",
req("q", "oil stocks", req("q", "oil stocks",
"qf", "text_sw", "qf", "text_sw",
"q.op", "OR", "q.op", "OR",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=4]"); , "*[count(//doc)=4]");
assertQ("Explicit '-' operator in query with no explicit mm and no explicit q.op => mm = 0%", assertQ("Explicit '-' operator in query with no explicit mm and no explicit q.op => mm = 0%",
req("q", "hair ties -barbie", req("q", "hair ties -barbie",
"qf", "text_sw", "qf", "text_sw",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=3]"); , "*[count(//doc)=3]");
assertQ("Explicit NOT in query with no explicit mm and no explicit q.op => mm = 0%", assertQ("Explicit NOT in query with no explicit mm and no explicit q.op => mm = 0%",
req("q", "hair ties NOT barbie", req("q", "hair ties NOT barbie",
"qf", "text_sw", "qf", "text_sw",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=3]"); , "*[count(//doc)=3]");
@ -1217,12 +1260,14 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
req("q", "hair ties -barbie", req("q", "hair ties -barbie",
"qf", "text_sw", "qf", "text_sw",
"q.op", "OR", "q.op", "OR",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=3]"); , "*[count(//doc)=3]");
assertQ("Explicit NOT in query with no explicit mm and q.op=OR => mm = 0%", assertQ("Explicit NOT in query with no explicit mm and q.op=OR => mm = 0%",
req("q", "hair ties NOT barbie", req("q", "hair ties NOT barbie",
"qf", "text_sw", "qf", "text_sw",
"q.op", "OR", "q.op", "OR",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=3]"); , "*[count(//doc)=3]");
@ -1230,12 +1275,14 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
req("q", "hair AND ties -barbie", req("q", "hair AND ties -barbie",
"qf", "text_sw", "qf", "text_sw",
"q.op", "OR", "q.op", "OR",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=1]"); , "*[count(//doc)=1]");
assertQ("Explicit NOT in query with no explicit mm and q.op=OR => mm = 0%", assertQ("Explicit NOT in query with no explicit mm and q.op=OR => mm = 0%",
req("q", "hair AND ties -barbie", req("q", "hair AND ties -barbie",
"qf", "text_sw", "qf", "text_sw",
"q.op", "OR", "q.op", "OR",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=1]"); , "*[count(//doc)=1]");
@ -1243,23 +1290,27 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
req("q", "hair AND ties barbie", req("q", "hair AND ties barbie",
"qf", "text_sw", "qf", "text_sw",
"q.op", "OR", "q.op", "OR",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=2]"); , "*[count(//doc)=2]");
assertQ("No explicit non-AND operator in query with no explicit mm and q.op=AND => mm = 100%", assertQ("No explicit non-AND operator in query with no explicit mm and q.op=AND => mm = 100%",
req("q", "hair AND ties barbie", req("q", "hair AND ties barbie",
"qf", "text_sw", "qf", "text_sw",
"q.op", "AND", "q.op", "AND",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=1]"); , "*[count(//doc)=1]");
assertQ("No explicit non-AND operator in query with no explicit mm and no explicit q.op => mm = 0%", assertQ("No explicit non-AND operator in query with no explicit mm and no explicit q.op => mm = 0%",
req("q", "hair AND ties barbie", req("q", "hair AND ties barbie",
"qf", "text_sw", "qf", "text_sw",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=2]"); , "*[count(//doc)=2]");
assertQ("No explicit non-AND operator in query with no explicit mm and no explicit q.op => mm = 0%", assertQ("No explicit non-AND operator in query with no explicit mm and no explicit q.op => mm = 0%",
req("q", "hair and ties barbie", req("q", "hair and ties barbie",
"qf", "text_sw", "qf", "text_sw",
"lowercaseOperators", "true", "lowercaseOperators", "true",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=2]"); , "*[count(//doc)=2]");
@ -1267,12 +1318,14 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
req("q", "hair ties -barbie", req("q", "hair ties -barbie",
"qf", "text_sw", "qf", "text_sw",
"q.op", "AND", "q.op", "AND",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=1]"); , "*[count(//doc)=1]");
assertQ("Explicit NOT in query with no explicit mm and q.op=AND => mm = 100%", assertQ("Explicit NOT in query with no explicit mm and q.op=AND => mm = 100%",
req("q", "hair ties NOT barbie", req("q", "hair ties NOT barbie",
"qf", "text_sw", "qf", "text_sw",
"q.op", "AND", "q.op", "AND",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=1]"); , "*[count(//doc)=1]");
@ -1280,17 +1333,20 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
req("q", "hair OR ties barbie", req("q", "hair OR ties barbie",
"qf", "text_sw", "qf", "text_sw",
"q.op", "AND", "q.op", "AND",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=3]"); , "*[count(//doc)=3]");
assertQ("Explicit OR in query with no explicit mm and q.op=OR => mm = 0%", assertQ("Explicit OR in query with no explicit mm and q.op=OR => mm = 0%",
req("q", "hair OR ties barbie", req("q", "hair OR ties barbie",
"qf", "text_sw", "qf", "text_sw",
"q.op", "OR", "q.op", "OR",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=6]"); , "*[count(//doc)=6]");
assertQ("Explicit OR in query with no explicit mm and no explicit q.op => mm = 0%", assertQ("Explicit OR in query with no explicit mm and no explicit q.op => mm = 0%",
req("q", "hair OR ties barbie", req("q", "hair OR ties barbie",
"qf", "text_sw", "qf", "text_sw",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=6]"); , "*[count(//doc)=6]");
@ -1298,9 +1354,11 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
req("q", "hair ties +barbie", req("q", "hair ties +barbie",
"qf", "text_sw", "qf", "text_sw",
"q.op", "AND", "q.op", "AND",
"sow", sow,
"defType", "edismax") "defType", "edismax")
, "*[count(//doc)=1]"); , "*[count(//doc)=1]");
} }
}
public void testEdismaxSimpleExtension() throws SyntaxError { public void testEdismaxSimpleExtension() throws SyntaxError {
ModifiableSolrParams params = new ModifiableSolrParams(); ModifiableSolrParams params = new ModifiableSolrParams();
@ -1336,6 +1394,380 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
} }
// LUCENE-7533
public void testSplitOnWhitespace_with_autoGeneratePhraseQueries() throws Exception {
assertTrue(((TextField)h.getCore().getLatestSchema().getField("text").getType()).getAutoGeneratePhraseQueries());
try (SolrQueryRequest req = req()) {
final QParser qparser = QParser.getParser("{!edismax sow=false fq=text}blah blah)", req);
expectThrows(IllegalArgumentException.class, qparser::getQuery);
}
}
@Test
public void testSplitOnWhitespace_Basic() throws Exception {
// The "text_sw" field has synonyms loaded from synonyms.txt
// retrieve the single document containing literal "wifi"
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wifi", "sow","true")
, "/response/numFound==1"
, "/response/docs/[0]/id=='72'"
);
// trigger the "wi fi => wifi" synonym
assertJQ(req("qf", "text_sw title", "defType","edismax", "q","wi fi", "sow","false")
, "/response/numFound==1"
, "/response/docs/[0]/id=='72'"
);
assertJQ(req("qf", "text_sw title", "defType","edismax", "q","wi fi", "sow","true")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "q","{!edismax sow=false}wi fi")
, "/response/numFound==1"
, "/response/docs/[0]/id=='72'"
);
assertJQ(req("df", "text_sw title", "q","{!edismax sow=true}wi fi")
, "/response/numFound==0"
);
assertJQ(req("df", "text_sw title", "q", "{!edismax}wi fi") // default sow=true
, "/response/numFound==0"
);
assertQ(req("qf", "name title",
"q", "barking curds of stigma",
"defType", "edismax",
"sow", "false",
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:barking | title:barking))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:curds | title:curds))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:of | title:of))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:stigma | title:stigma))')]"
);
assertQ(req("qf", "name title",
"q", "barking curds of stigma",
"defType", "edismax",
"sow", "true",
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:barking | title:barking))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:curds | title:curds))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:of | title:of))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:stigma | title:stigma))')]"
);
assertQ(req("qf", "name title",
"q", "barking curds of stigma",
"defType", "edismax",
"debugQuery", "true"), // Default sow=true
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:barking | title:barking))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:curds | title:curds))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:of | title:of))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((name:stigma | title:stigma))')]"
);
}
public void testSplitOnWhitespace_Different_Field_Analysis() throws Exception {
// When the *structure* of produced queries is different in each field,
// sow=true produces boolean-of-dismax query structure,
// and sow=false produces dismax-of-boolean query structure.
assertQ(req("qf", "text_sw title",
"q", "olive the other",
"defType", "edismax",
"sow", "true",
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((text_sw:oliv | title:olive))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((title:the))')]",
"//str[@name='parsedquery'][contains(.,'DisjunctionMaxQuery((text_sw:other | title:other))')]"
);
assertQ(req("qf", "text_sw title",
"q", "olive the other",
"defType", "edismax",
"sow", "false",
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'+DisjunctionMaxQuery(((text_sw:oliv text_sw:other) | (title:olive title:the title:other)))')]"
);
// When field's analysis produce different query structures, mm processing is always done on the boolean query.
// sow=true produces (boolean-of-dismax)~<mm> query structure,
// and sow=false produces dismax-of-(boolean)~<mm> query structure.
assertQ(req("qf", "text_sw title",
"q", "olive the other",
"defType", "edismax",
"sow", "true",
"mm", "100%",
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'+(DisjunctionMaxQuery((text_sw:oliv | title:olive)) DisjunctionMaxQuery((title:the)) DisjunctionMaxQuery((text_sw:other | title:other)))~3')]"
);
assertQ(req("qf", "text_sw title",
"q", "olive the other",
"defType", "edismax",
"sow", "false",
"mm", "100%",
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'+DisjunctionMaxQuery((((text_sw:oliv text_sw:other)~2) | ((title:olive title:the title:other)~3)))')]"
);
// When the *structure* of produced queries is the same in each field,
// sow=false/true produce the same boolean-of-dismax query structure
for (String sow : Arrays.asList("true", "false")) {
assertQ(req("qf", "text_sw title",
"q", "olive blah other",
"defType", "edismax",
"sow", sow,
"debugQuery", "true"),
"//str[@name='parsedquery'][contains(.,'"
+ "+(DisjunctionMaxQuery((text_sw:oliv | title:olive))"
+ " DisjunctionMaxQuery((text_sw:blah | title:blah))"
+ " DisjunctionMaxQuery((text_sw:other | title:other)))')]"
);
}
}
public void testOperatorsAndMultiWordSynonyms() throws Exception {
// The "text_sw" field has synonyms loaded from synonyms.txt
// retrieve the single document containing literal "wifi"
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wifi", "sow","true")
, "/response/numFound==1"
, "/response/docs/[0]/id=='72'"
);
// trigger the "wi fi => wifi" synonym
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi", "sow","false")
, "/response/numFound==1"
, "/response/docs/[0]/id=='72'"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","+wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","-wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","!wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi* fi", "sow","false")
, "/response/numFound==2" // matches because wi* matches "wifi" in one doc and "with" in another
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","w? fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi~1 fi", "sow","false")
, "/response/numFound==4" // matches because wi~1 matches ti (stemmed "ties")
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi^2 fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi^=2 fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi +fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi -fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi !fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi*", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi?", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi~1", "sow","false")
, "/response/numFound==4" // matches because fi~1 matches ti (stemmed "ties")
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi^2", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi^=2", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","text_sw:wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi text_sw:fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","NOT wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi NOT fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi AND ATM", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","ATM AND wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi && ATM", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","ATM && wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi) AND ATM", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","ATM AND (wi fi)", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi) && ATM", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","ATM && (wi fi)", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi OR NotThereAtAll", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","NotThereAtAll OR wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi || NotThereAtAll", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","NotThereAtAll || wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi) OR NotThereAtAll", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","NotThereAtAll OR (wi fi)", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi) || NotThereAtAll", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","NotThereAtAll || (wi fi)", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","\"wi\" fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi \"fi\"", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi) fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi (fi)", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","/wi/ fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi /fi/", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi)", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","+(wi fi)", "sow","false")
, "/response/numFound==1"
);
Map all = (Map)ObjectBuilder.fromJSON(h.query(req("q", "*:*", "rows", "0", "wt", "json")));
int totalDocs = Integer.parseInt(((Map)all.get("response")).get("numFound").toString());
int allDocsExceptOne = totalDocs - 1;
assertJQ(req("qf","text_sw title", "defType","edismax", "q","-(wi fi)", "sow","false")
, "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the text_sw field
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","!(wi fi)", "sow","false")
, "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the text_sw field
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","NOT (wi fi)", "sow","false")
, "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the text_sw field
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi)^2", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","(wi fi)^=2", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","text_sw:(wi fi)", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","+ATM wi fi", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","-ATM wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","-NotThereAtAll wi fi", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","!ATM wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","!NotThereAtAll wi fi", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","NOT ATM wi fi", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","NOT NotThereAtAll wi fi", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","AT* wi fi", "sow","false")
, "/response/numFound==2"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","AT? wi fi", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","\"ATM\" wi fi", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi +ATM", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi -ATM", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi -NotThereAtAll", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi !ATM", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi !NotThereAtAll", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi NOT ATM", "sow","false")
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi NOT NotThereAtAll", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi AT*", "sow","false")
, "/response/numFound==2"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi AT?", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi \"ATM\"", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","\"wi fi\"~2", "sow","false")
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","text_sw:\"wi fi\"", "sow","false")
, "/response/numFound==1"
);
}
private boolean containsClause(Query query, String field, String value, private boolean containsClause(Query query, String field, String value,
int boost, boolean fuzzy) { int boost, boolean fuzzy) {

View File

@ -0,0 +1,100 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.util.Arrays;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestMultiWordSynonyms extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig.xml", "schema-multiword-synonyms.xml");
index();
}
private static void index() throws Exception {
assertU(adoc("id","1", "text","USA Today"));
assertU(adoc("id","2", "text","A dynamic US economy"));
assertU(adoc("id","3", "text","The United States of America's 50 states"));
assertU(adoc("id","4", "text","Party in the U.S.A."));
assertU(adoc("id","5", "text","These United States"));
assertU(adoc("id","6", "text","America United of States"));
assertU(adoc("id","7", "text","States United"));
assertU(commit());
}
@Test
public void testNonPhrase() throws Exception {
// Don't split on whitespace (sow=false)
for (String q : Arrays.asList("US", "U.S.", "USA", "U.S.A.", "United States", "United States of America")) {
for (String defType : Arrays.asList("lucene", "edismax")) {
assertJQ(req("q", q,
"defType", defType,
"df", "text",
"sow", "false")
, "/response/numFound==7"
);
}
}
// Split on whitespace (sow=true)
for (String q : Arrays.asList("US", "U.S.", "USA", "U.S.A.")) {
for (String defType : Arrays.asList("lucene", "edismax")) {
assertJQ(req("q", q,
"defType", defType,
"df", "text",
"sow", "true")
, "/response/numFound==7"
);
}
}
for (String q : Arrays.asList("United States", "United States of America")) {
for (String defType : Arrays.asList("lucene", "edismax")) {
assertJQ(req("q", q,
"defType", defType,
"df", "text",
"sow", "true")
, "/response/numFound==4"
);
}
}
}
@Test
public void testPhrase() throws Exception {
for (String q : Arrays.asList
("\"US\"", "\"U.S.\"", "\"USA\"", "\"U.S.A.\"", "\"United States\"", "\"United States of America\"")) {
for (String defType : Arrays.asList("lucene", "edismax")) {
for (String sow : Arrays.asList("true", "false")) {
assertJQ(req("q", q,
"defType", defType,
"df", "text",
"sow", sow)
, "/response/numFound==5"
);
}
}
}
}
}

View File

@ -16,7 +16,12 @@
*/ */
package org.apache.solr.search; package org.apache.solr.search;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map;
import java.util.Random; import java.util.Random;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
@ -28,12 +33,15 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.core.SolrInfoMBean; import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.parser.QueryParser; import org.apache.solr.parser.QueryParser;
import org.apache.solr.query.FilterQuery; import org.apache.solr.query.FilterQuery;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.TextField;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import org.noggit.ObjectBuilder;
public class TestSolrQueryParser extends SolrTestCaseJ4 { public class TestSolrQueryParser extends SolrTestCaseJ4 {
@ -57,6 +65,8 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
assertU(adoc("id", "12", "eee_s", "X")); assertU(adoc("id", "12", "eee_s", "X"));
assertU(adoc("id", "13", "eee_s", "'balance'", "rrr_s", "/leading_slash")); assertU(adoc("id", "13", "eee_s", "'balance'", "rrr_s", "/leading_slash"));
assertU(adoc("id", "20", "syn", "wifi ATM"));
assertU(commit()); assertU(commit());
} }
@ -208,14 +218,26 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
QParser qParser; QParser qParser;
Query q,qq; Query q,qq;
Map<String, String> sowFalseParamsMap = new HashMap<>();
sowFalseParamsMap.put("sow", "false");
Map<String, String> sowTrueParamsMap = new HashMap<>();
sowTrueParamsMap.put("sow", "true");
List<MapSolrParams> paramMaps = Arrays.asList
(new MapSolrParams(Collections.emptyMap()), // no sow param (i.e. the default sow value)
new MapSolrParams(sowFalseParamsMap),
new MapSolrParams(sowTrueParamsMap));
for (MapSolrParams params : paramMaps) {
// relevance query should not be a filter // relevance query should not be a filter
qParser = QParser.getParser("foo_s:(a b c)", req); qParser = QParser.getParser("foo_s:(a b c)", req);
qParser.setParams(params);
q = qParser.getQuery(); q = qParser.getQuery();
assertEquals(3, ((BooleanQuery)q).clauses().size()); assertEquals(3, ((BooleanQuery) q).clauses().size());
// small filter query should still use BooleanQuery // small filter query should still use BooleanQuery
if (QueryParser.TERMS_QUERY_THRESHOLD > 3) { if (QueryParser.TERMS_QUERY_THRESHOLD > 3) {
qParser = QParser.getParser("foo_s:(a b c)", req); qParser = QParser.getParser("foo_s:(a b c)", req);
qParser.setParams(params);
qParser.setIsFilter(true); // this may change in the future qParser.setIsFilter(true); // this may change in the future
q = qParser.getQuery(); q = qParser.getQuery();
assertEquals(3, ((BooleanQuery) q).clauses().size()); assertEquals(3, ((BooleanQuery) q).clauses().size());
@ -224,30 +246,35 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
// large relevancy query should use BooleanQuery // large relevancy query should use BooleanQuery
// TODO: we may decide that string fields shouldn't have relevance in the future... change to a text field w/o a stop filter if so // TODO: we may decide that string fields shouldn't have relevance in the future... change to a text field w/o a stop filter if so
qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req); qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req);
qParser.setParams(params);
q = qParser.getQuery(); q = qParser.getQuery();
assertEquals(26, ((BooleanQuery)q).clauses().size()); assertEquals(26, ((BooleanQuery)q).clauses().size());
// large filter query should use TermsQuery // large filter query should use TermsQuery
qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req); qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req);
qParser.setIsFilter(true); // this may change in the future qParser.setIsFilter(true); // this may change in the future
qParser.setParams(params);
q = qParser.getQuery(); q = qParser.getQuery();
assertEquals(26, ((TermInSetQuery)q).getTermData().size()); assertEquals(26, ((TermInSetQuery)q).getTermData().size());
// large numeric filter query should use TermsQuery (for trie fields) // large numeric filter query should use TermsQuery (for trie fields)
qParser = QParser.getParser("foo_ti:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req); qParser = QParser.getParser("foo_ti:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req);
qParser.setIsFilter(true); // this may change in the future qParser.setIsFilter(true); // this may change in the future
qParser.setParams(params);
q = qParser.getQuery(); q = qParser.getQuery();
assertEquals(20, ((TermInSetQuery)q).getTermData().size()); assertEquals(20, ((TermInSetQuery)q).getTermData().size());
// for point fields large filter query should use PointInSetQuery // for point fields large filter query should use PointInSetQuery
qParser = QParser.getParser("foo_pi:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req); qParser = QParser.getParser("foo_pi:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req);
qParser.setIsFilter(true); // this may change in the future qParser.setIsFilter(true); // this may change in the future
qParser.setParams(params);
q = qParser.getQuery(); q = qParser.getQuery();
assertTrue(q instanceof PointInSetQuery); assertTrue(q instanceof PointInSetQuery);
assertEquals(20, ((PointInSetQuery)q).getPackedPoints().size()); assertEquals(20, ((PointInSetQuery)q).getPackedPoints().size());
// a filter() clause inside a relevancy query should be able to use a TermsQuery // a filter() clause inside a relevancy query should be able to use a TermsQuery
qParser = QParser.getParser("foo_s:aaa filter(foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z))", req); qParser = QParser.getParser("foo_s:aaa filter(foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z))", req);
qParser.setParams(params);
q = qParser.getQuery(); q = qParser.getQuery();
assertEquals(2, ((BooleanQuery)q).clauses().size()); assertEquals(2, ((BooleanQuery)q).clauses().size());
qq = ((BooleanQuery)q).clauses().get(0).getQuery(); qq = ((BooleanQuery)q).clauses().get(0).getQuery();
@ -259,11 +286,12 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
qq = ((FilterQuery)qq).getQuery(); qq = ((FilterQuery)qq).getQuery();
} }
assertEquals(26, ((TermInSetQuery)qq).getTermData().size()); assertEquals(26, ((TermInSetQuery) qq).getTermData().size());
// test mixed boolean query, including quotes (which shouldn't matter) // test mixed boolean query, including quotes (which shouldn't matter)
qParser = QParser.getParser("foo_s:(a +aaa b -bbb c d e f bar_s:(qqq www) g h i j k l m n o p q r s t u v w x y z)", req); qParser = QParser.getParser("foo_s:(a +aaa b -bbb c d e f bar_s:(qqq www) g h i j k l m n o p q r s t u v w x y z)", req);
qParser.setIsFilter(true); // this may change in the future qParser.setIsFilter(true); // this may change in the future
qParser.setParams(params);
q = qParser.getQuery(); q = qParser.getQuery();
assertEquals(4, ((BooleanQuery)q).clauses().size()); assertEquals(4, ((BooleanQuery)q).clauses().size());
qq = null; qq = null;
@ -281,13 +309,14 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
} }
qParser = QParser.getParser(sb.toString(), req); qParser = QParser.getParser(sb.toString(), req);
qParser.setIsFilter(true); // this may change in the future qParser.setIsFilter(true); // this may change in the future
qParser.setParams(params);
q = qParser.getQuery(); q = qParser.getQuery();
assertEquals(2, ((BooleanQuery)q).clauses().size()); assertEquals(2, ((BooleanQuery)q).clauses().size());
for (BooleanClause clause : ((BooleanQuery)q).clauses()) { for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
qq = clause.getQuery(); qq = clause.getQuery();
assertEquals(17, ((TermInSetQuery)qq).getTermData().size()); assertEquals(17, ((TermInSetQuery)qq).getTermData().size());
} }
}
req.close(); req.close();
} }
@ -306,6 +335,10 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
// This will still fail when used as the main query, but will pass in a filter query since TermsQuery can be used. // This will still fail when used as the main query, but will pass in a filter query since TermsQuery can be used.
assertJQ(req("q","*:*", "fq", q) assertJQ(req("q","*:*", "fq", q)
,"/response/numFound==6"); ,"/response/numFound==6");
assertJQ(req("q","*:*", "fq", q, "sow", "false")
,"/response/numFound==6");
assertJQ(req("q","*:*", "fq", q, "sow", "true")
,"/response/numFound==6");
} }
@Test @Test
@ -540,4 +573,400 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
req.close(); req.close();
} }
// LUCENE-7533
public void testSplitOnWhitespace_with_autoGeneratePhraseQueries() throws Exception {
assertTrue(((TextField)h.getCore().getLatestSchema().getField("text").getType()).getAutoGeneratePhraseQueries());
try (SolrQueryRequest req = req()) {
final QParser qparser = QParser.getParser("{!lucene sow=false qf=text}blah blah", req);
expectThrows(QueryParserConfigurationException.class, qparser::getQuery);
}
}
@Test
public void testSplitOnWhitespace_Basic() throws Exception {
// The "syn" field has synonyms loaded from synonyms.txt
assertJQ(req("df", "syn", "q", "wifi", "sow", "true") // retrieve the single document containing literal "wifi"
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "wi fi", "sow", "false") // trigger the "wi fi => wifi" synonym
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "wi fi", "sow", "true")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=false}wi fi")
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=true}wi fi")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene}wi fi") // default sow=true
, "/response/numFound==0"
);
}
public void testSplitOnWhitespace_Comments() throws Exception {
// The "syn" field has synonyms loaded from synonyms.txt
assertJQ(req("df", "syn", "q", "wifi", "sow", "true") // retrieve the single document containing literal "wifi"
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "wi fi", "sow", "false") // trigger the "wi fi => wifi" synonym
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "wi /* foo */ fi", "sow", "false") // trigger the "wi fi => wifi" synonym
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "wi /* foo */ /* bar */ fi", "sow", "false") // trigger the "wi fi => wifi" synonym
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", " /* foo */ wi fi /* bar */", "sow", "false") // trigger the "wi fi => wifi" synonym
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", " /* foo */ wi /* bar */ fi /* baz */", "sow", "false") // trigger the "wi fi => wifi" synonym
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "wi fi", "sow", "true")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi /* foo */ fi", "sow", "true")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi /* foo */ /* bar */ fi", "sow", "true")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "/* foo */ wi fi /* bar */", "sow", "true")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "/* foo */ wi /* bar */ fi /* baz */", "sow", "true")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi /* foo */ fi") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi /* foo */ /* bar */ fi") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "/* foo */ wi fi /* bar */") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "/* foo */ wi /* bar */ fi /* baz */") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=false}wi fi")
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=false}wi /* foo */ fi")
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=false}wi /* foo */ /* bar */ fi")
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=false}/* foo */ wi fi /* bar */")
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=false}/* foo */ wi /* bar */ fi /* baz */")
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=true}wi fi")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=true}wi /* foo */ fi")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=true}wi /* foo */ /* bar */ fi")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=true}/* foo */ wi fi /* bar */")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=true}/* foo */ wi /* bar */ fi /* baz */")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene}wi fi") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene}wi /* foo */ fi") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene}wi /* foo */ /* bar */ fi") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene}/* foo */ wi fi /* bar */") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene}/* foo */ wi /* bar */ fi /* baz */") // default sow=true
, "/response/numFound==0"
);
}
public void testOperatorsAndMultiWordSynonyms() throws Exception {
// The "syn" field has synonyms loaded from synonyms.txt
assertJQ(req("df", "syn", "q", "wifi", "sow", "true") // retrieve the single document containing literal "wifi"
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "wi fi", "sow", "false") // trigger the "wi fi => wifi" synonym
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "+wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "-wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "!wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi* fi", "sow", "false") // matches because wi* matches wifi
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "w? fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi~1 fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi^2 fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi^=2 fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi +fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi -fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi !fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi*", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi?", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi~1", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi^2", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi^=2", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "syn:wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi syn:fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "NOT wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi NOT fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi AND ATM", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "ATM AND wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi && ATM", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "ATM && wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "(wi fi) AND ATM", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "ATM AND (wi fi)", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "(wi fi) && ATM", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "ATM && (wi fi)", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "wi fi OR NotThereAtAll", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "NotThereAtAll OR wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi || NotThereAtAll", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "NotThereAtAll || wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "(wi fi) OR NotThereAtAll", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "NotThereAtAll OR (wi fi)", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "(wi fi) || NotThereAtAll", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "NotThereAtAll || (wi fi)", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "\"wi\" fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi \"fi\"", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "(wi) fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi (fi)", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "/wi/ fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi /fi/", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "(wi fi)", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "+(wi fi)", "sow", "false")
, "/response/numFound==1"
);
Map all = (Map)ObjectBuilder.fromJSON(h.query(req("q", "*:*", "rows", "0", "wt", "json")));
int totalDocs = Integer.parseInt(((Map)all.get("response")).get("numFound").toString());
int allDocsExceptOne = totalDocs - 1;
assertJQ(req("df", "syn", "q", "-(wi fi)", "sow", "false")
, "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the syn field
);
assertJQ(req("df", "syn", "q", "!(wi fi)", "sow", "false")
, "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the syn field
);
assertJQ(req("df", "syn", "q", "NOT (wi fi)", "sow", "false")
, "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the syn field
);
assertJQ(req("df", "syn", "q", "(wi fi)^2", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "(wi fi)^=2", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "syn:(wi fi)", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "+ATM wi fi", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "-ATM wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "-NotThereAtAll wi fi", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "!ATM wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "!NotThereAtAll wi fi", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "NOT ATM wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "NOT NotThereAtAll wi fi", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "AT* wi fi", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "AT? wi fi", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "\"ATM\" wi fi", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "wi fi +ATM", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "wi fi -ATM", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi -NotThereAtAll", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "wi fi !ATM", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi !NotThereAtAll", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "wi fi NOT ATM", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi NOT NotThereAtAll", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "wi fi AT*", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "wi fi AT?", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "wi fi \"ATM\"", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "\"wi fi\"~2", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "syn:\"wi fi\"", "sow", "false")
, "/response/numFound==1"
);
}
} }