SOLR-9185: Solr's edismax and Lucene/standard query parsers should optionally not split on whitespace before sending terms to analysis

This commit is contained in:
Steve Rowe 2017-03-16 19:41:37 -04:00
parent 4ee7fc3890
commit d1b2fb33ef
16 changed files with 2257 additions and 603 deletions

View File

@ -202,6 +202,13 @@ New Features
requires "stored" and must not be multiValued. It's intended for fields that might have very large values so that requires "stored" and must not be multiValued. It's intended for fields that might have very large values so that
they don't get cached in memory. (David Smiley) they don't get cached in memory. (David Smiley)
* SOLR-9185: Solr's edismax and "Lucene"/standard query parsers will no longer split on whitespace before sending
terms to analysis, if given the "sow=false" request param ("sow"=>"split on whitespace"). This enables multi-term
source synonyms to match at query-time using SynonymGraphFilterFactory; other analysis components will also now
work at query time, e.g. ShingleFilterFactory. By default, and when the "sow=true" param is specified, these
parsers' behavior remains the same: queries will be split on whitespace before sending individual terms to analysis.
(Steve Rowe)
Bug Fixes Bug Fixes
---------------------- ----------------------

View File

@ -3,13 +3,17 @@ package org.apache.solr.parser;
import java.io.StringReader; import java.io.StringReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version;
import org.apache.solr.search.QParser;
import org.apache.solr.search.SyntaxError; import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryParserConfigurationException;
public class QueryParser extends SolrQueryParserBase implements QueryParserConstants { public class QueryParser extends SolrQueryParserBase implements QueryParserConstants {
@ -17,9 +21,44 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
*/ */
static public enum Operator { OR, AND } static public enum Operator { OR, AND }
public QueryParser(Version matchVersion, String defaultField, QParser parser) { /** default split on whitespace behavior */
public static final boolean DEFAULT_SPLIT_ON_WHITESPACE = true;
public QueryParser(String defaultField, QParser parser) {
this(new FastCharStream(new StringReader(""))); this(new FastCharStream(new StringReader("")));
init(matchVersion, defaultField, parser); init(defaultField, parser);
}
/**
* @see #setSplitOnWhitespace(boolean)
*/
public boolean getSplitOnWhitespace() {
return splitOnWhitespace;
}
/**
* Whether query text should be split on whitespace prior to analysis.
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
*/
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
this.splitOnWhitespace = splitOnWhitespace;
}
private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE;
private static Set<Integer> disallowedPostMultiTerm
= new HashSet<Integer>(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR));
private static boolean allowedPostMultiTerm(int tokenKind) {
return disallowedPostMultiTerm.contains(tokenKind) == false;
}
@Override
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError {
if ((getAutoGeneratePhraseQueries() || fieldAutoGenPhraseQueries) && splitOnWhitespace == false) {
throw new QueryParserConfigurationException
("Field '" + field + "': autoGeneratePhraseQueries == true is disallowed when sow/splitOnWhitespace == false");
}
return super.newFieldQuery(analyzer, field, queryText, quoted, fieldAutoGenPhraseQueries);
} }
// * Query ::= ( Clause )* // * Query ::= ( Clause )*
@ -96,13 +135,38 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
final public Query Query(String field) throws ParseException, SyntaxError { final public Query Query(String field) throws ParseException, SyntaxError {
List<BooleanClause> clauses = new ArrayList<BooleanClause>(); List<BooleanClause> clauses = new ArrayList<BooleanClause>();
Query q, firstQuery=null; Query q;
int conj, mods; int conj, mods;
mods = Modifiers(); if (jj_2_1(2)) {
q = Clause(field); MultiTerm(field, clauses);
addClause(clauses, CONJ_NONE, mods, q); } else {
if (mods == MOD_NONE) switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
firstQuery=q; case NOT:
case PLUS:
case MINUS:
case BAREOPER:
case LPAREN:
case STAR:
case QUOTED:
case TERM:
case PREFIXTERM:
case WILDTERM:
case REGEXPTERM:
case RANGEIN_START:
case RANGEEX_START:
case LPARAMS:
case FILTER:
case NUMBER:
mods = Modifiers();
q = Clause(field);
addClause(clauses, CONJ_NONE, mods, q);
break;
default:
jj_la1[4] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
}
label_1: label_1:
while (true) { while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
@ -127,19 +191,50 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
; ;
break; break;
default: default:
jj_la1[4] = jj_gen; jj_la1[5] = jj_gen;
break label_1; break label_1;
} }
conj = Conjunction(); if (jj_2_2(2)) {
mods = Modifiers(); MultiTerm(field, clauses);
q = Clause(field); } else {
addClause(clauses, conj, mods, q); switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
} case AND:
if (clauses.size() == 1 && firstQuery != null) case OR:
{if (true) return rawToNormal(firstQuery);} case NOT:
else { case PLUS:
{if (true) return getBooleanQuery(clauses);} case MINUS:
case BAREOPER:
case LPAREN:
case STAR:
case QUOTED:
case TERM:
case PREFIXTERM:
case WILDTERM:
case REGEXPTERM:
case RANGEIN_START:
case RANGEEX_START:
case LPARAMS:
case FILTER:
case NUMBER:
conj = Conjunction();
mods = Modifiers();
q = Clause(field);
addClause(clauses, conj, mods, q);
break;
default:
jj_la1[6] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
} }
}
if (clauses.size() == 1 && clauses.get(0).getOccur() == BooleanClause.Occur.SHOULD) {
Query firstQuery = clauses.get(0).getQuery();
if ( ! (firstQuery instanceof RawQuery) || ((RawQuery)firstQuery).getTermCount() == 1) {
{if (true) return rawToNormal(firstQuery);}
}
}
{if (true) return getBooleanQuery(clauses);}
throw new Error("Missing return statement in function"); throw new Error("Missing return statement in function");
} }
@ -148,20 +243,20 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
Token fieldToken=null, boost=null; Token fieldToken=null, boost=null;
Token localParams=null; Token localParams=null;
int flags = 0; int flags = 0;
if (jj_2_1(2)) { if (jj_2_3(2)) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case TERM: case TERM:
fieldToken = jj_consume_token(TERM); fieldToken = jj_consume_token(TERM);
jj_consume_token(COLON); jj_consume_token(COLON);
field=discardEscapeChar(fieldToken.image); field = discardEscapeChar(fieldToken.image);
break; break;
case STAR: case STAR:
jj_consume_token(STAR); jj_consume_token(STAR);
jj_consume_token(COLON); jj_consume_token(COLON);
field="*"; field = "*";
break; break;
default: default:
jj_la1[5] = jj_gen; jj_la1[7] = jj_gen;
jj_consume_token(-1); jj_consume_token(-1);
throw new ParseException(); throw new ParseException();
} }
@ -191,7 +286,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
boost = jj_consume_token(NUMBER); boost = jj_consume_token(NUMBER);
break; break;
default: default:
jj_la1[6] = jj_gen; jj_la1[8] = jj_gen;
; ;
} }
break; break;
@ -206,10 +301,10 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
boost = jj_consume_token(NUMBER); boost = jj_consume_token(NUMBER);
break; break;
default: default:
jj_la1[7] = jj_gen; jj_la1[9] = jj_gen;
; ;
} }
q=getFilter(q); restoreFlags(flags); q=getFilter(q); restoreFlags(flags);
break; break;
case LPARAMS: case LPARAMS:
localParams = jj_consume_token(LPARAMS); localParams = jj_consume_token(LPARAMS);
@ -219,17 +314,17 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
boost = jj_consume_token(NUMBER); boost = jj_consume_token(NUMBER);
break; break;
default: default:
jj_la1[8] = jj_gen; jj_la1[10] = jj_gen;
; ;
} }
q=getLocalParams(field, localParams.image); q=getLocalParams(field, localParams.image);
break; break;
default: default:
jj_la1[9] = jj_gen; jj_la1[11] = jj_gen;
jj_consume_token(-1); jj_consume_token(-1);
throw new ParseException(); throw new ParseException();
} }
{if (true) return handleBoost(q, boost);} {if (true) return handleBoost(q, boost);}
throw new Error("Missing return statement in function"); throw new Error("Missing return statement in function");
} }
@ -278,35 +373,48 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
term.image = term.image.substring(0,1); term.image = term.image.substring(0,1);
break; break;
default: default:
jj_la1[10] = jj_gen; jj_la1[12] = jj_gen;
jj_consume_token(-1); jj_consume_token(-1);
throw new ParseException(); throw new ParseException();
} }
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case FUZZY_SLOP:
fuzzySlop = jj_consume_token(FUZZY_SLOP);
fuzzy=true;
break;
default:
jj_la1[11] = jj_gen;
;
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case CARAT: case CARAT:
jj_consume_token(CARAT); case FUZZY_SLOP:
boost = jj_consume_token(NUMBER);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case CARAT:
jj_consume_token(CARAT);
boost = jj_consume_token(NUMBER);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case FUZZY_SLOP:
fuzzySlop = jj_consume_token(FUZZY_SLOP);
fuzzy=true;
break;
default:
jj_la1[13] = jj_gen;
;
}
break;
case FUZZY_SLOP: case FUZZY_SLOP:
fuzzySlop = jj_consume_token(FUZZY_SLOP); fuzzySlop = jj_consume_token(FUZZY_SLOP);
fuzzy=true; fuzzy=true;
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case CARAT:
jj_consume_token(CARAT);
boost = jj_consume_token(NUMBER);
break;
default:
jj_la1[14] = jj_gen;
;
}
break; break;
default: default:
jj_la1[12] = jj_gen; jj_la1[15] = jj_gen;
; jj_consume_token(-1);
throw new ParseException();
} }
break; break;
default: default:
jj_la1[13] = jj_gen; jj_la1[16] = jj_gen;
; ;
} }
q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp); q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp);
@ -316,13 +424,13 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case RANGEIN_START: case RANGEIN_START:
jj_consume_token(RANGEIN_START); jj_consume_token(RANGEIN_START);
startInc=true; startInc = true;
break; break;
case RANGEEX_START: case RANGEEX_START:
jj_consume_token(RANGEEX_START); jj_consume_token(RANGEEX_START);
break; break;
default: default:
jj_la1[14] = jj_gen; jj_la1[17] = jj_gen;
jj_consume_token(-1); jj_consume_token(-1);
throw new ParseException(); throw new ParseException();
} }
@ -334,7 +442,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
goop1 = jj_consume_token(RANGE_QUOTED); goop1 = jj_consume_token(RANGE_QUOTED);
break; break;
default: default:
jj_la1[15] = jj_gen; jj_la1[18] = jj_gen;
jj_consume_token(-1); jj_consume_token(-1);
throw new ParseException(); throw new ParseException();
} }
@ -343,7 +451,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
jj_consume_token(RANGE_TO); jj_consume_token(RANGE_TO);
break; break;
default: default:
jj_la1[16] = jj_gen; jj_la1[19] = jj_gen;
; ;
} }
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
@ -354,20 +462,20 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
goop2 = jj_consume_token(RANGE_QUOTED); goop2 = jj_consume_token(RANGE_QUOTED);
break; break;
default: default:
jj_la1[17] = jj_gen; jj_la1[20] = jj_gen;
jj_consume_token(-1); jj_consume_token(-1);
throw new ParseException(); throw new ParseException();
} }
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case RANGEIN_END: case RANGEIN_END:
jj_consume_token(RANGEIN_END); jj_consume_token(RANGEIN_END);
endInc=true; endInc = true;
break; break;
case RANGEEX_END: case RANGEEX_END:
jj_consume_token(RANGEEX_END); jj_consume_token(RANGEEX_END);
break; break;
default: default:
jj_la1[18] = jj_gen; jj_la1[21] = jj_gen;
jj_consume_token(-1); jj_consume_token(-1);
throw new ParseException(); throw new ParseException();
} }
@ -377,46 +485,71 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
boost = jj_consume_token(NUMBER); boost = jj_consume_token(NUMBER);
break; break;
default: default:
jj_la1[19] = jj_gen; jj_la1[22] = jj_gen;
; ;
} }
boolean startOpen=false; boolean startOpen=false;
boolean endOpen=false; boolean endOpen=false;
if (goop1.kind == RANGE_QUOTED) { if (goop1.kind == RANGE_QUOTED) {
goop1.image = goop1.image.substring(1, goop1.image.length()-1); goop1.image = goop1.image.substring(1, goop1.image.length()-1);
} else if ("*".equals(goop1.image)) { } else if ("*".equals(goop1.image)) {
startOpen=true; startOpen=true;
} }
if (goop2.kind == RANGE_QUOTED) { if (goop2.kind == RANGE_QUOTED) {
goop2.image = goop2.image.substring(1, goop2.image.length()-1); goop2.image = goop2.image.substring(1, goop2.image.length()-1);
} else if ("*".equals(goop2.image)) { } else if ("*".equals(goop2.image)) {
endOpen=true; endOpen=true;
} }
q = getRangeQuery(getField(field), startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc); q = getRangeQuery(getField(field),
startOpen ? null : discardEscapeChar(goop1.image),
endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
break; break;
case QUOTED: case QUOTED:
term = jj_consume_token(QUOTED); term = jj_consume_token(QUOTED);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case FUZZY_SLOP:
fuzzySlop = jj_consume_token(FUZZY_SLOP);
break;
default:
jj_la1[20] = jj_gen;
;
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case CARAT: case CARAT:
jj_consume_token(CARAT); case FUZZY_SLOP:
boost = jj_consume_token(NUMBER); switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case CARAT:
jj_consume_token(CARAT);
boost = jj_consume_token(NUMBER);
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case FUZZY_SLOP:
fuzzySlop = jj_consume_token(FUZZY_SLOP);
fuzzy=true;
break;
default:
jj_la1[23] = jj_gen;
;
}
break;
case FUZZY_SLOP:
fuzzySlop = jj_consume_token(FUZZY_SLOP);
fuzzy=true;
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case CARAT:
jj_consume_token(CARAT);
boost = jj_consume_token(NUMBER);
break;
default:
jj_la1[24] = jj_gen;
;
}
break;
default:
jj_la1[25] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
break; break;
default: default:
jj_la1[21] = jj_gen; jj_la1[26] = jj_gen;
; ;
} }
q = handleQuotedTerm(getField(field), term, fuzzySlop); q = handleQuotedTerm(getField(field), term, fuzzySlop);
break; break;
default: default:
jj_la1[22] = jj_gen; jj_la1[27] = jj_gen;
jj_consume_token(-1); jj_consume_token(-1);
throw new ParseException(); throw new ParseException();
} }
@ -424,6 +557,44 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
throw new Error("Missing return statement in function"); throw new Error("Missing return statement in function");
} }
final public void MultiTerm(String field, List<BooleanClause> clauses) throws ParseException, SyntaxError {
Token text;
List<String> terms = null;
text = jj_consume_token(TERM);
if (splitOnWhitespace) {
Query q = getFieldQuery(getField(field), discardEscapeChar(text.image), false, true);
addClause(clauses, CONJ_NONE, MOD_NONE, q);
} else {
terms = new ArrayList<String>();
terms.add(discardEscapeChar(text.image));
}
if (getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind)) {
} else {
jj_consume_token(-1);
throw new ParseException();
}
label_2:
while (true) {
text = jj_consume_token(TERM);
if (splitOnWhitespace) {
Query q = getFieldQuery(getField(field), discardEscapeChar(text.image), false, true);
addClause(clauses, CONJ_NONE, MOD_NONE, q);
} else {
terms.add(discardEscapeChar(text.image));
}
if (getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind)) {
;
} else {
break label_2;
}
}
if (splitOnWhitespace == false) {
Query q = getFieldQuery(getField(field), terms, true);
addMultiTermClause(clauses, q);
}
}
private boolean jj_2_1(int xla) { private boolean jj_2_1(int xla) {
jj_la = xla; jj_lastpos = jj_scanpos = token; jj_la = xla; jj_lastpos = jj_scanpos = token;
try { return !jj_3_1(); } try { return !jj_3_1(); }
@ -431,28 +602,76 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
finally { jj_save(0, xla); } finally { jj_save(0, xla); }
} }
private boolean jj_3R_3() { private boolean jj_2_2(int xla) {
if (jj_scan_token(STAR)) return true; jj_la = xla; jj_lastpos = jj_scanpos = token;
if (jj_scan_token(COLON)) return true; try { return !jj_3_2(); }
catch(LookaheadSuccess ls) { return true; }
finally { jj_save(1, xla); }
}
private boolean jj_2_3(int xla) {
jj_la = xla; jj_lastpos = jj_scanpos = token;
try { return !jj_3_3(); }
catch(LookaheadSuccess ls) { return true; }
finally { jj_save(2, xla); }
}
private boolean jj_3R_7() {
if (jj_scan_token(TERM)) return true;
return false; return false;
} }
private boolean jj_3R_2() { private boolean jj_3R_4() {
if (jj_scan_token(TERM)) return true; if (jj_scan_token(TERM)) return true;
if (jj_scan_token(COLON)) return true; if (jj_scan_token(COLON)) return true;
return false; return false;
} }
private boolean jj_3_1() { private boolean jj_3_1() {
if (jj_3R_3()) return true;
return false;
}
private boolean jj_3R_6() {
return false;
}
private boolean jj_3R_3() {
if (jj_scan_token(TERM)) return true;
jj_lookingAhead = true;
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
jj_lookingAhead = false;
if (!jj_semLA || jj_3R_6()) return true;
Token xsp;
if (jj_3R_7()) return true;
while (true) {
xsp = jj_scanpos;
if (jj_3R_7()) { jj_scanpos = xsp; break; }
}
return false;
}
private boolean jj_3_3() {
Token xsp; Token xsp;
xsp = jj_scanpos; xsp = jj_scanpos;
if (jj_3R_2()) { if (jj_3R_4()) {
jj_scanpos = xsp; jj_scanpos = xsp;
if (jj_3R_3()) return true; if (jj_3R_5()) return true;
} }
return false; return false;
} }
private boolean jj_3_2() {
if (jj_3R_3()) return true;
return false;
}
private boolean jj_3R_5() {
if (jj_scan_token(STAR)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
/** Generated Token Manager. */ /** Generated Token Manager. */
public QueryParserTokenManager token_source; public QueryParserTokenManager token_source;
/** Current token. */ /** Current token. */
@ -462,8 +681,11 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
private int jj_ntk; private int jj_ntk;
private Token jj_scanpos, jj_lastpos; private Token jj_scanpos, jj_lastpos;
private int jj_la; private int jj_la;
/** Whether we are looking ahead. */
private boolean jj_lookingAhead = false;
private boolean jj_semLA;
private int jj_gen; private int jj_gen;
final private int[] jj_la1 = new int[23]; final private int[] jj_la1 = new int[28];
static private int[] jj_la1_0; static private int[] jj_la1_0;
static private int[] jj_la1_1; static private int[] jj_la1_1;
static { static {
@ -471,12 +693,12 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
jj_la1_init_1(); jj_la1_init_1();
} }
private static void jj_la1_init_0() { private static void jj_la1_init_0() {
jj_la1_0 = new int[] {0x6000,0x6000,0x38000,0x38000,0xfb4fe000,0x2400000,0x800000,0x800000,0x800000,0xfb4c0000,0x3a440000,0x4000000,0x4000000,0x800000,0xc0000000,0x0,0x0,0x0,0x0,0x800000,0x4000000,0x800000,0xfb440000,}; jj_la1_0 = new int[] {0x6000,0x6000,0x38000,0x38000,0xfb4f8000,0xfb4fe000,0xfb4fe000,0x2400000,0x800000,0x800000,0x800000,0xfb4c0000,0x3a440000,0x4000000,0x800000,0x4800000,0x4800000,0xc0000000,0x0,0x0,0x0,0x0,0x800000,0x4000000,0x800000,0x4800000,0x4800000,0xfb440000,};
} }
private static void jj_la1_init_1() { private static void jj_la1_init_1() {
jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x7,0x0,0x0,0x0,0x0,0x7,0x4,0x0,0x0,0x0,0x0,0xc0,0x8,0xc0,0x30,0x0,0x0,0x0,0x4,}; jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x7,0x7,0x7,0x0,0x0,0x0,0x0,0x7,0x4,0x0,0x0,0x0,0x0,0x0,0xc0,0x8,0xc0,0x30,0x0,0x0,0x0,0x0,0x0,0x4,};
} }
final private JJCalls[] jj_2_rtns = new JJCalls[1]; final private JJCalls[] jj_2_rtns = new JJCalls[3];
private boolean jj_rescan = false; private boolean jj_rescan = false;
private int jj_gc = 0; private int jj_gc = 0;
@ -486,7 +708,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
token = new Token(); token = new Token();
jj_ntk = -1; jj_ntk = -1;
jj_gen = 0; jj_gen = 0;
for (int i = 0; i < 23; i++) jj_la1[i] = -1; for (int i = 0; i < 28; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
} }
@ -495,8 +717,9 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
token_source.ReInit(stream); token_source.ReInit(stream);
token = new Token(); token = new Token();
jj_ntk = -1; jj_ntk = -1;
jj_lookingAhead = false;
jj_gen = 0; jj_gen = 0;
for (int i = 0; i < 23; i++) jj_la1[i] = -1; for (int i = 0; i < 28; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
} }
@ -506,7 +729,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
token = new Token(); token = new Token();
jj_ntk = -1; jj_ntk = -1;
jj_gen = 0; jj_gen = 0;
for (int i = 0; i < 23; i++) jj_la1[i] = -1; for (int i = 0; i < 28; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
} }
@ -516,7 +739,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
token = new Token(); token = new Token();
jj_ntk = -1; jj_ntk = -1;
jj_gen = 0; jj_gen = 0;
for (int i = 0; i < 23; i++) jj_la1[i] = -1; for (int i = 0; i < 28; i++) jj_la1[i] = -1;
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
} }
@ -579,7 +802,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
/** Get the specific Token. */ /** Get the specific Token. */
final public Token getToken(int index) { final public Token getToken(int index) {
Token t = token; Token t = jj_lookingAhead ? jj_scanpos : token;
for (int i = 0; i < index; i++) { for (int i = 0; i < index; i++) {
if (t.next != null) t = t.next; if (t.next != null) t = t.next;
else t = t.next = token_source.getNextToken(); else t = t.next = token_source.getNextToken();
@ -633,7 +856,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
la1tokens[jj_kind] = true; la1tokens[jj_kind] = true;
jj_kind = -1; jj_kind = -1;
} }
for (int i = 0; i < 23; i++) { for (int i = 0; i < 28; i++) {
if (jj_la1[i] == jj_gen) { if (jj_la1[i] == jj_gen) {
for (int j = 0; j < 32; j++) { for (int j = 0; j < 32; j++) {
if ((jj_la1_0[i] & (1<<j)) != 0) { if ((jj_la1_0[i] & (1<<j)) != 0) {
@ -672,7 +895,7 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
private void jj_rescan_token() { private void jj_rescan_token() {
jj_rescan = true; jj_rescan = true;
for (int i = 0; i < 1; i++) { for (int i = 0; i < 3; i++) {
try { try {
JJCalls p = jj_2_rtns[i]; JJCalls p = jj_2_rtns[i];
do { do {
@ -680,6 +903,8 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
jj_la = p.arg; jj_lastpos = jj_scanpos = p.first; jj_la = p.arg; jj_lastpos = jj_scanpos = p.first;
switch (i) { switch (i) {
case 0: jj_3_1(); break; case 0: jj_3_1(); break;
case 1: jj_3_2(); break;
case 2: jj_3_3(); break;
} }
} }
p = p.next; p = p.next;

View File

@ -27,18 +27,17 @@ package org.apache.solr.parser;
import java.io.StringReader; import java.io.StringReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Locale; import java.util.Set;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.Version;
import org.apache.solr.search.SyntaxError; import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.QParser; import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryParserConfigurationException;
public class QueryParser extends SolrQueryParserBase { public class QueryParser extends SolrQueryParserBase {
@ -46,9 +45,44 @@ public class QueryParser extends SolrQueryParserBase {
*/ */
static public enum Operator { OR, AND } static public enum Operator { OR, AND }
public QueryParser(Version matchVersion, String defaultField, QParser parser) { /** default split on whitespace behavior */
public static final boolean DEFAULT_SPLIT_ON_WHITESPACE = true;
public QueryParser(String defaultField, QParser parser) {
this(new FastCharStream(new StringReader(""))); this(new FastCharStream(new StringReader("")));
init(matchVersion, defaultField, parser); init(defaultField, parser);
}
/**
* @see #setSplitOnWhitespace(boolean)
*/
public boolean getSplitOnWhitespace() {
return splitOnWhitespace;
}
/**
* Whether query text should be split on whitespace prior to analysis.
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
*/
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
this.splitOnWhitespace = splitOnWhitespace;
}
private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE;
private static Set<Integer> disallowedPostMultiTerm
= new HashSet<Integer>(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR));
private static boolean allowedPostMultiTerm(int tokenKind) {
return disallowedPostMultiTerm.contains(tokenKind) == false;
}
@Override
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError {
if ((getAutoGeneratePhraseQueries() || fieldAutoGenPhraseQueries) && splitOnWhitespace == false) {
throw new QueryParserConfigurationException
("Field '" + field + "': autoGeneratePhraseQueries == true is disallowed when sow/splitOnWhitespace == false");
}
return super.newFieldQuery(analyzer, field, queryText, quoted, fieldAutoGenPhraseQueries);
} }
} }
@ -63,17 +97,15 @@ TOKEN_MGR_DECLS : {
/* ***************** */ /* ***************** */
<*> TOKEN : { <*> TOKEN : {
<#_NUM_CHAR: ["0"-"9"] > <#_NUM_CHAR: ["0"-"9"] >
// every character that follows a backslash is considered as an escaped character | <#_ESCAPED_CHAR: "\\" ~[] > // every character that follows a backslash is considered as an escaped character
| <#_ESCAPED_CHAR: "\\" ~[] > | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^", "[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ]
"[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ] | <_ESCAPED_CHAR> ) >
| <_ESCAPED_CHAR> ) > | <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" | "/" | "!") >
| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
| <_ESCAPED_CHAR> | "-" | "+" | "/" | "!") > | <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") > | <#_SQUOTED_CHAR: ( ~[ "'", "\\" ] | <_ESCAPED_CHAR> ) >
| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
| <#_SQUOTED_CHAR: ( ~[ "'", "\\" ] | <_ESCAPED_CHAR> ) >
} }
<DEFAULT, COMMENT> SKIP : { <DEFAULT, COMMENT> SKIP : {
@ -93,44 +125,43 @@ TOKEN_MGR_DECLS : {
< <_WHITESPACE>> < <_WHITESPACE>>
} }
<DEFAULT> TOKEN : { <DEFAULT> TOKEN : {
<AND: ("AND" | "&&") > <AND: ("AND" | "&&") >
| <OR: ("OR" | "||") > | <OR: ("OR" | "||") >
| <NOT: ("NOT" | "!") > | <NOT: ("NOT" | "!") >
| <PLUS: "+" > | <PLUS: "+" >
| <MINUS: "-" > | <MINUS: "-" >
| <BAREOPER: ("+"|"-"|"!") <_WHITESPACE> > | <BAREOPER: ("+"|"-"|"!") <_WHITESPACE> >
| <LPAREN: "(" > | <LPAREN: "(" >
| <RPAREN: ")" > | <RPAREN: ")" >
| <COLON: ":" > | <COLON: ":" >
| <STAR: "*" > | <STAR: "*" >
| <CARAT: "^" > : Boost | <CARAT: "^" > : Boost
| <QUOTED: "\"" (<_QUOTED_CHAR>)* "\""> | <QUOTED: "\"" (<_QUOTED_CHAR>)* "\"">
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* > | <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? > | <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
| <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) > | <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >
| <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > | <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
| <REGEXPTERM: "/" (~["*","/"] | "\\/" ) (~[ "/" ] | "\\/" )* "/" > | <REGEXPTERM: "/" (~["*","/"] | "\\/" ) (~[ "/" ] | "\\/" )* "/" >
| <RANGEIN_START: "[" > : Range | <RANGEIN_START: "[" > : Range
| <RANGEEX_START: "{" > : Range | <RANGEEX_START: "{" > : Range
// TODO: consider using token states instead of inlining SQUOTED // TODO: consider using token states instead of inlining SQUOTED
// | <SQUOTED: "'" (<_SQUOTED_CHAR>)* "'"> // | <SQUOTED: "'" (<_SQUOTED_CHAR>)* "'">
// | <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | <SQUOTED> | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* > // | <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | <SQUOTED> | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* >
| <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | ("'" (<_SQUOTED_CHAR>)* "'") | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* > | <LPARAMS: ("{!" ( (<_WHITESPACE>)* (~["=","}"])+ ( "=" (<QUOTED> | ("'" (<_SQUOTED_CHAR>)* "'") | (~[" ","}"])+ )? )? )* "}")+ (~[")"," ","\t","\n","{","^"])* >
| <FILTER: "filter(" > | <FILTER: "filter(" >
} }
<Boost> TOKEN : { <Boost> TOKEN : {
<NUMBER: ("=")?("-")? (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT <NUMBER: ("=")?("-")? (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
} }
<Range> TOKEN : { <Range> TOKEN : {
<RANGE_TO: "TO"> <RANGE_TO: "TO">
| <RANGEIN_END: "]"> : DEFAULT | <RANGEIN_END: "]"> : DEFAULT
| <RANGEEX_END: "}"> : DEFAULT | <RANGEEX_END: "}"> : DEFAULT
| <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\""> | <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
| <RANGE_GOOP: (~[ " ", "]", "}" ])+ > | <RANGE_GOOP: (~[ " ", "]", "}" ])+ >
} }
// * Query ::= ( Clause )* // * Query ::= ( Clause )*
@ -160,8 +191,7 @@ int Modifiers() : {
} }
// This makes sure that there is no garbage after the query string // This makes sure that there is no garbage after the query string
Query TopLevelQuery(String field) throws SyntaxError : Query TopLevelQuery(String field) throws SyntaxError : {
{
Query q; Query q;
} }
{ {
@ -174,27 +204,31 @@ Query TopLevelQuery(String field) throws SyntaxError :
Query Query(String field) throws SyntaxError : Query Query(String field) throws SyntaxError :
{ {
List<BooleanClause> clauses = new ArrayList<BooleanClause>(); List<BooleanClause> clauses = new ArrayList<BooleanClause>();
Query q, firstQuery=null; Query q;
int conj, mods; int conj, mods;
} }
{ {
mods=Modifiers() q=Clause(field)
{
addClause(clauses, CONJ_NONE, mods, q);
if (mods == MOD_NONE)
firstQuery=q;
}
( (
conj=Conjunction() mods=Modifiers() q=Clause(field) LOOKAHEAD(2)
{ addClause(clauses, conj, mods, q); } MultiTerm(field, clauses)
| mods=Modifiers() q=Clause(field)
{ addClause(clauses, CONJ_NONE, mods, q); }
)
(
LOOKAHEAD(2)
MultiTerm(field, clauses)
| conj=Conjunction() mods=Modifiers() q=Clause(field)
{ addClause(clauses, conj, mods, q); }
)* )*
{ {
if (clauses.size() == 1 && firstQuery != null) if (clauses.size() == 1 && clauses.get(0).getOccur() == BooleanClause.Occur.SHOULD) {
Query firstQuery = clauses.get(0).getQuery();
if ( ! (firstQuery instanceof RawQuery) || ((RawQuery)firstQuery).getTermCount() == 1) {
return rawToNormal(firstQuery); return rawToNormal(firstQuery);
else {
return getBooleanQuery(clauses);
} }
} }
return getBooleanQuery(clauses);
}
} }
Query Clause(String field) throws SyntaxError : { Query Clause(String field) throws SyntaxError : {
@ -204,26 +238,22 @@ Query Clause(String field) throws SyntaxError : {
int flags = 0; int flags = 0;
} }
{ {
[ [
LOOKAHEAD(2) LOOKAHEAD(2)
( (
fieldToken=<TERM> <COLON> {field=discardEscapeChar(fieldToken.image);} fieldToken=<TERM> <COLON> { field = discardEscapeChar(fieldToken.image); }
| <STAR> <COLON> {field="*";} | <STAR> <COLON> { field = "*"; }
) )
] ]
( (
q=Term(field) q=Term(field)
| <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? | <LPAREN> q=Query(field) <RPAREN> [ <CARAT> boost=<NUMBER> ]
| (<FILTER> { flags=startFilter(); } q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? { q=getFilter(q); restoreFlags(flags); } ) | (<FILTER> { flags=startFilter(); } q=Query(field) <RPAREN> [ <CARAT> boost=<NUMBER> ] { q=getFilter(q); restoreFlags(flags); } )
| (localParams = <LPARAMS> (<CARAT> boost=<NUMBER>)? { q=getLocalParams(field, localParams.image); } ) | (localParams = <LPARAMS> [ <CARAT> boost=<NUMBER> ] { q=getLocalParams(field, localParams.image); } )
) )
{ return handleBoost(q, boost); } { return handleBoost(q, boost); }
} }
Query Term(String field) throws SyntaxError : { Query Term(String field) throws SyntaxError : {
Token term, boost=null, fuzzySlop=null, goop1, goop2; Token term, boost=null, fuzzySlop=null, goop1, goop2;
boolean prefix = false; boolean prefix = false;
@ -245,40 +275,78 @@ Query Term(String field) throws SyntaxError : {
| term=<NUMBER> | term=<NUMBER>
| term=<BAREOPER> { term.image = term.image.substring(0,1); } | term=<BAREOPER> { term.image = term.image.substring(0,1); }
) )
[ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] [
[ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ] <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
| fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } [ <CARAT> boost=<NUMBER> ]
]
{ q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp); }
| ( <RANGEIN_START> { startInc = true; } | <RANGEEX_START> )
( goop1=<RANGE_GOOP> | goop1=<RANGE_QUOTED> )
[ <RANGE_TO> ]
( goop2=<RANGE_GOOP> | goop2=<RANGE_QUOTED> )
( <RANGEIN_END> { endInc = true; } | <RANGEEX_END> )
[ <CARAT> boost=<NUMBER> ]
{ {
q = handleBareTokenQuery(getField(field), term, fuzzySlop, prefix, wildcard, fuzzy, regexp); boolean startOpen=false;
} boolean endOpen=false;
| ( ( <RANGEIN_START> {startInc=true;} | <RANGEEX_START> ) if (goop1.kind == RANGE_QUOTED) {
( goop1=<RANGE_GOOP>|goop1=<RANGE_QUOTED> ) goop1.image = goop1.image.substring(1, goop1.image.length()-1);
[ <RANGE_TO> ] } else if ("*".equals(goop1.image)) {
( goop2=<RANGE_GOOP>|goop2=<RANGE_QUOTED> ) startOpen=true;
( <RANGEIN_END> {endInc=true;} | <RANGEEX_END>))
[ <CARAT> boost=<NUMBER> ]
{
boolean startOpen=false;
boolean endOpen=false;
if (goop1.kind == RANGE_QUOTED) {
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
} else if ("*".equals(goop1.image)) {
startOpen=true;
}
if (goop2.kind == RANGE_QUOTED) {
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
} else if ("*".equals(goop2.image)) {
endOpen=true;
}
q = getRangeQuery(getField(field), startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
}
| term=<QUOTED>
[ fuzzySlop=<FUZZY_SLOP> ]
[ <CARAT> boost=<NUMBER> ]
{
q = handleQuotedTerm(getField(field), term, fuzzySlop);
} }
if (goop2.kind == RANGE_QUOTED) {
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
} else if ("*".equals(goop2.image)) {
endOpen=true;
}
q = getRangeQuery(getField(field),
startOpen ? null : discardEscapeChar(goop1.image),
endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
}
| term=<QUOTED>
[
<CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
| fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } [ <CARAT> boost=<NUMBER> ]
]
{ q = handleQuotedTerm(getField(field), term, fuzzySlop); }
) )
{ return handleBoost(q, boost); }
}
void MultiTerm(String field, List<BooleanClause> clauses) throws SyntaxError : {
Token text;
List<String> terms = null;
}
{
text=<TERM>
{ {
return handleBoost(q, boost); if (splitOnWhitespace) {
Query q = getFieldQuery(getField(field), discardEscapeChar(text.image), false, true);
addClause(clauses, CONJ_NONE, MOD_NONE, q);
} else {
terms = new ArrayList<String>();
terms.add(discardEscapeChar(text.image));
}
}
// Both lookaheads are required; the first lookahead vets the first following term and the second lookahead vets the rest
LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) })
(
LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) })
text=<TERM>
{
if (splitOnWhitespace) {
Query q = getFieldQuery(getField(field), discardEscapeChar(text.image), false, true);
addClause(clauses, CONJ_NONE, MOD_NONE, q);
} else {
terms.add(discardEscapeChar(text.image));
}
}
)+
{
if (splitOnWhitespace == false) {
Query q = getFieldQuery(getField(field), terms, true);
addMultiTermClause(clauses, q);
}
} }
} }

View File

@ -18,10 +18,12 @@ package org.apache.solr.parser;
import java.io.StringReader; import java.io.StringReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.reverse.ReverseStringFilter; import org.apache.lucene.analysis.reverse.ReverseStringFilter;
@ -41,7 +43,6 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.QueryBuilder; import org.apache.lucene.util.QueryBuilder;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations; import org.apache.lucene.util.automaton.Operations;
@ -59,7 +60,7 @@ import org.apache.solr.search.SolrConstantScoreQuery;
import org.apache.solr.search.SyntaxError; import org.apache.solr.search.SyntaxError;
/** This class is overridden by QueryParser in QueryParser.jj /** This class is overridden by QueryParser in QueryParser.jj
* and acts to separate the majority of the Java code from the .jj grammar file. * and acts to separate the majority of the Java code from the .jj grammar file.
*/ */
public abstract class SolrQueryParserBase extends QueryBuilder { public abstract class SolrQueryParserBase extends QueryBuilder {
@ -83,7 +84,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
public static final Operator OR_OPERATOR = Operator.OR; public static final Operator OR_OPERATOR = Operator.OR;
/** The default operator that parser uses to combine query terms */ /** The default operator that parser uses to combine query terms */
Operator operator = OR_OPERATOR; protected Operator operator = OR_OPERATOR;
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_REWRITE; MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_REWRITE;
boolean allowLeadingWildcard = true; boolean allowLeadingWildcard = true;
@ -133,16 +134,32 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
// internal: A simple raw fielded query // internal: A simple raw fielded query
public static class RawQuery extends Query { public static class RawQuery extends Query {
final SchemaField sfield; final SchemaField sfield;
final String externalVal; private final List<String> externalVals;
public RawQuery(SchemaField sfield, String externalVal) { public RawQuery(SchemaField sfield, String externalVal) {
this(sfield, Collections.singletonList(externalVal));
}
public RawQuery(SchemaField sfield, List<String> externalVals) {
this.sfield = sfield; this.sfield = sfield;
this.externalVal = externalVal; this.externalVals = externalVals;
}
public int getTermCount() {
return externalVals.size();
}
public List<String> getExternalVals() {
return externalVals;
}
public String getJoinedExternalVal() {
return externalVals.size() == 1 ? externalVals.get(0) : String.join(" ", externalVals);
} }
@Override @Override
public String toString(String field) { public String toString(String field) {
return "RAW(" + field + "," + externalVal + ")"; return "RAW(" + field + "," + getJoinedExternalVal() + ")";
} }
@Override @Override
@ -165,7 +182,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
public abstract Query TopLevelQuery(String field) throws ParseException, SyntaxError; public abstract Query TopLevelQuery(String field) throws ParseException, SyntaxError;
public void init(Version matchVersion, String defaultField, QParser parser) { public void init(String defaultField, QParser parser) {
this.schema = parser.getReq().getSchema(); this.schema = parser.getReq().getSchema();
this.parser = parser; this.parser = parser;
this.flags = parser.getFlags(); this.flags = parser.getFlags();
@ -406,17 +423,30 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
throw new RuntimeException("Clause cannot be both required and prohibited"); throw new RuntimeException("Clause cannot be both required and prohibited");
} }
/**
* Called from QueryParser's MultiTerm rule.
* Assumption: no conjunction or modifiers (conj == CONJ_NONE and mods == MOD_NONE)
*/
protected void addMultiTermClause(List<BooleanClause> clauses, Query q) {
// We might have been passed a null query; the term might have been
// filtered away by the analyzer.
if (q == null) {
return;
}
clauses.add(newBooleanClause(q, operator == AND_OPERATOR ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD));
}
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted) throws SyntaxError { boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError {
BooleanClause.Occur occur = operator == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; BooleanClause.Occur occur = operator == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
return createFieldQuery(analyzer, occur, field, queryText, quoted || autoGeneratePhraseQueries, phraseSlop); return createFieldQuery(analyzer, occur, field, queryText,
quoted || fieldAutoGenPhraseQueries || autoGeneratePhraseQueries, phraseSlop);
} }
/** /**
* Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}. * Base implementation delegates to {@link #getFieldQuery(String,String,boolean,boolean)}.
* This method may be overridden, for example, to return * This method may be overridden, for example, to return
* a SpanNearQuery instead of a PhraseQuery. * a SpanNearQuery instead of a PhraseQuery.
* *
@ -440,7 +470,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
query = builder.build(); query = builder.build();
} else if (query instanceof MultiPhraseQuery) { } else if (query instanceof MultiPhraseQuery) {
MultiPhraseQuery mpq = (MultiPhraseQuery)query; MultiPhraseQuery mpq = (MultiPhraseQuery)query;
if (slop != mpq.getSlop()) { if (slop != mpq.getSlop()) {
query = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build(); query = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build();
} }
@ -492,7 +522,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) { protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) {
// FuzzyQuery doesn't yet allow constant score rewrite // FuzzyQuery doesn't yet allow constant score rewrite
String text = term.text(); String text = term.text();
int numEdits = FuzzyQuery.floatToEdits(minimumSimilarity, int numEdits = FuzzyQuery.floatToEdits(minimumSimilarity,
text.codePointCount(0, text.length())); text.codePointCount(0, text.length()));
return new FuzzyQuery(term,numEdits,prefixLength); return new FuzzyQuery(term,numEdits,prefixLength);
} }
@ -536,14 +566,21 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
} }
SchemaField sfield = null; SchemaField sfield = null;
List<String> fieldValues = null; List<RawQuery> fieldValues = null;
boolean onlyRawQueries = true;
boolean useTermsQuery = (flags & QParser.FLAG_FILTER)!=0 && clauses.size() > TERMS_QUERY_THRESHOLD; int allRawQueriesTermCount = 0;
int clausesAdded = 0; for (BooleanClause clause : clauses) {
if (clause.getQuery() instanceof RawQuery) {
allRawQueriesTermCount += ((RawQuery)clause.getQuery()).getTermCount();
} else {
onlyRawQueries = false;
}
}
boolean useTermsQuery = (flags & QParser.FLAG_FILTER)!=0 && allRawQueriesTermCount > TERMS_QUERY_THRESHOLD;
BooleanQuery.Builder booleanBuilder = newBooleanQuery(); BooleanQuery.Builder booleanBuilder = newBooleanQuery();
Map<SchemaField, List<String>> fmap = new HashMap<>(); Map<SchemaField, List<RawQuery>> fmap = new HashMap<>();
for (BooleanClause clause : clauses) { for (BooleanClause clause : clauses) {
Query subq = clause.getQuery(); Query subq = clause.getQuery();
@ -563,14 +600,14 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
// If this field isn't indexed, or if it is indexed and we want to use TermsQuery, then collect this value. // If this field isn't indexed, or if it is indexed and we want to use TermsQuery, then collect this value.
// We are currently relying on things like PointField not being marked as indexed in order to bypass // We are currently relying on things like PointField not being marked as indexed in order to bypass
// the "useTermQuery" check. // the "useTermQuery" check.
if (fieldValues == null && useTermsQuery || !sfield.indexed()) { if ((fieldValues == null && useTermsQuery) || !sfield.indexed()) {
fieldValues = new ArrayList<>(2); fieldValues = new ArrayList<>(2);
fmap.put(sfield, fieldValues); fmap.put(sfield, fieldValues);
} }
} }
if (fieldValues != null) { if (fieldValues != null) {
fieldValues.add(rawq.externalVal); fieldValues.add(rawq);
continue; continue;
} }
@ -578,33 +615,50 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
} }
} }
clausesAdded++;
booleanBuilder.add(clause); booleanBuilder.add(clause);
} }
for (Map.Entry<SchemaField,List<String>> entry : fmap.entrySet()) { for (Map.Entry<SchemaField,List<RawQuery>> entry : fmap.entrySet()) {
sfield = entry.getKey(); sfield = entry.getKey();
fieldValues = entry.getValue(); fieldValues = entry.getValue();
FieldType ft = sfield.getType(); FieldType ft = sfield.getType();
// TODO: pull more of this logic out to FieldType? We would need to be able to add clauses to our existing booleanBuilder. // TODO: pull more of this logic out to FieldType? We would need to be able to add clauses to our existing booleanBuilder.
if (sfield.indexed() && fieldValues.size() < TERMS_QUERY_THRESHOLD || fieldValues.size() == 1) { int termCount = fieldValues.stream().mapToInt(RawQuery::getTermCount).sum();
if ((sfield.indexed() && termCount < TERMS_QUERY_THRESHOLD) || termCount == 1) {
// use boolean query instead // use boolean query instead
for (String externalVal : fieldValues) { for (RawQuery rawq : fieldValues) {
Query subq = ft.getFieldQuery(this.parser, sfield, externalVal); Query subq;
clausesAdded++; if (ft.isTokenized() && sfield.indexed()) {
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD); boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries();
subq = newFieldQuery(getAnalyzer(), sfield.getName(), rawq.getJoinedExternalVal(),
false, fieldAutoGenPhraseQueries);
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
} else {
for (String externalVal : rawq.getExternalVals()) {
subq = ft.getFieldQuery(this.parser, sfield, externalVal);
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
}
}
} }
} else { } else {
Query subq = ft.getSetQuery(this.parser, sfield, fieldValues); List<String> externalVals
if (fieldValues.size() == clauses.size()) return subq; // if this is everything, don't wrap in a boolean query = fieldValues.stream().flatMap(rawq -> rawq.getExternalVals().stream()).collect(Collectors.toList());
clausesAdded++; Query subq = ft.getSetQuery(this.parser, sfield, externalVals);
if (onlyRawQueries && termCount == allRawQueriesTermCount) return subq; // if this is everything, don't wrap in a boolean query
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD); booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
} }
} }
return booleanBuilder.build(); BooleanQuery bq = booleanBuilder.build();
if (bq.clauses().size() == 1) { // Unwrap single SHOULD query
BooleanClause clause = bq.clauses().iterator().next();
if (clause.getOccur() == BooleanClause.Occur.SHOULD) {
return clause.getQuery();
}
}
return bq;
} }
@ -835,9 +889,26 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
// Create a "normal" query from a RawQuery (or just return the current query if it's not raw) // Create a "normal" query from a RawQuery (or just return the current query if it's not raw)
Query rawToNormal(Query q) { Query rawToNormal(Query q) {
if (!(q instanceof RawQuery)) return q; Query normal = q;
RawQuery rq = (RawQuery)q; if (q instanceof RawQuery) {
return rq.sfield.getType().getFieldQuery(parser, rq.sfield, rq.externalVal); RawQuery rawq = (RawQuery)q;
if (rawq.sfield.getType().isTokenized()) {
normal = rawq.sfield.getType().getFieldQuery(parser, rawq.sfield, rawq.getJoinedExternalVal());
} else {
FieldType ft = rawq.sfield.getType();
if (rawq.getTermCount() == 1) {
normal = ft.getFieldQuery(this.parser, rawq.sfield, rawq.getExternalVals().get(0));
} else {
BooleanQuery.Builder booleanBuilder = newBooleanQuery();
for (String externalVal : rawq.getExternalVals()) {
Query subq = ft.getFieldQuery(this.parser, rawq.sfield, externalVal);
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
}
normal = booleanBuilder.build();
}
}
}
return normal;
} }
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws SyntaxError { protected Query getFieldQuery(String field, String queryText, boolean quoted) throws SyntaxError {
@ -877,21 +948,87 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
FieldType ft = sf.getType(); FieldType ft = sf.getType();
// delegate to type for everything except tokenized fields // delegate to type for everything except tokenized fields
if (ft.isTokenized() && sf.indexed()) { if (ft.isTokenized() && sf.indexed()) {
return newFieldQuery(getAnalyzer(), field, queryText, quoted || (ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries())); boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries();
return newFieldQuery(getAnalyzer(), field, queryText, quoted, fieldAutoGenPhraseQueries);
} else { } else {
if (raw) { if (raw) {
return new RawQuery(sf, queryText); return new RawQuery(sf, queryText);
} else { } else {
return sf.getType().getFieldQuery(parser, sf, queryText); return ft.getFieldQuery(parser, sf, queryText);
} }
} }
} }
// default to a normal field query // default to a normal field query
return newFieldQuery(getAnalyzer(), field, queryText, quoted); return newFieldQuery(getAnalyzer(), field, queryText, quoted, false);
} }
protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){ // Assumption: quoted is always false
protected Query getFieldQuery(String field, List<String> queryTerms, boolean raw) throws SyntaxError {
checkNullField(field);
SchemaField sf;
if (field.equals(lastFieldName)) {
// only look up the SchemaField on a field change... this helps with memory allocation of dynamic fields
// and large queries like foo_i:(1 2 3 4 5 6 7 8 9 10) when we are passed "foo_i" each time.
sf = lastField;
} else {
// intercept magic field name of "_" to use as a hook for our
// own functions.
if (field.charAt(0) == '_' && parser != null) {
MagicFieldName magic = MagicFieldName.get(field);
if (null != magic) {
subQParser = parser.subQuery(String.join(" ", queryTerms), magic.subParser);
return subQParser.getQuery();
}
}
lastFieldName = field;
sf = lastField = schema.getFieldOrNull(field);
}
if (sf != null) {
FieldType ft = sf.getType();
// delegate to type for everything except tokenized fields
if (ft.isTokenized() && sf.indexed()) {
String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms);
boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries();
return newFieldQuery(getAnalyzer(), field, queryText, false, fieldAutoGenPhraseQueries);
} else {
if (raw) {
return new RawQuery(sf, queryTerms);
} else {
if (queryTerms.size() == 1) {
return ft.getFieldQuery(parser, sf, queryTerms.get(0));
} else {
List<Query> subqs = new ArrayList<>();
for (String queryTerm : queryTerms) {
try {
subqs.add(ft.getFieldQuery(parser, sf, queryTerm));
} catch (Exception e) { // assumption: raw = false only when called from ExtendedDismaxQueryParser.getQuery()
// for edismax: ignore parsing failures
}
}
if (subqs.size() == 1) {
return subqs.get(0);
} else { // delay building boolean query until we must
final BooleanClause.Occur occur
= operator == AND_OPERATOR ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
BooleanQuery.Builder booleanBuilder = newBooleanQuery();
subqs.forEach(subq -> booleanBuilder.add(subq, occur));
return booleanBuilder.build();
}
}
}
}
}
// default to a normal field query
String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms);
return newFieldQuery(getAnalyzer(), field, queryText, false, false);
}
protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){
checkNullField(field); checkNullField(field);
SchemaField sf = schema.getField(field); SchemaField sf = schema.getField(field);

View File

@ -17,6 +17,7 @@
package org.apache.solr.search; package org.apache.solr.search;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
@ -160,6 +161,8 @@ public class ExtendedDismaxQParser extends QParser {
// but always for unstructured implicit bqs created by getFieldQuery // but always for unstructured implicit bqs created by getFieldQuery
up.minShouldMatch = config.minShouldMatch; up.minShouldMatch = config.minShouldMatch;
up.setSplitOnWhitespace(config.splitOnWhitespace);
parsedUserQuery = parseOriginalQuery(up, mainUserQuery, clauses, config); parsedUserQuery = parseOriginalQuery(up, mainUserQuery, clauses, config);
@ -307,6 +310,8 @@ public class ExtendedDismaxQParser extends QParser {
up.setRemoveStopFilter(true); up.setRemoveStopFilter(true);
query = up.parse(mainUserQuery); query = up.parse(mainUserQuery);
} }
} catch (QueryParserConfigurationException e) {
throw e; // Don't ignore configuration exceptions
} catch (Exception e) { } catch (Exception e) {
// ignore failure and reparse later after escaping reserved chars // ignore failure and reparse later after escaping reserved chars
up.exceptions = false; up.exceptions = false;
@ -545,6 +550,7 @@ public class ExtendedDismaxQParser extends QParser {
pp.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, getFieldBoosts(fields)); pp.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, getFieldBoosts(fields));
pp.setPhraseSlop(slop); pp.setPhraseSlop(slop);
pp.setRemoveStopFilter(true); // remove stop filter and keep stopwords pp.setRemoveStopFilter(true); // remove stop filter and keep stopwords
pp.setSplitOnWhitespace(config.splitOnWhitespace);
/* :TODO: reevaluate using makeDismax=true vs false... /* :TODO: reevaluate using makeDismax=true vs false...
* *
@ -976,6 +982,7 @@ public class ExtendedDismaxQParser extends QParser {
private String field; private String field;
private String val; private String val;
private String val2; private String val2;
private List<String> vals;
private boolean bool; private boolean bool;
private boolean bool2; private boolean bool2;
private float flt; private float flt;
@ -1036,6 +1043,7 @@ public class ExtendedDismaxQParser extends QParser {
this.type = quoted ? QType.PHRASE : QType.FIELD; this.type = quoted ? QType.PHRASE : QType.FIELD;
this.field = field; this.field = field;
this.val = val; this.val = val;
this.vals = null;
this.slop = getPhraseSlop(); // unspecified this.slop = getPhraseSlop(); // unspecified
return getAliasedQuery(); return getAliasedQuery();
} }
@ -1045,10 +1053,21 @@ public class ExtendedDismaxQParser extends QParser {
this.type = QType.PHRASE; this.type = QType.PHRASE;
this.field = field; this.field = field;
this.val = val; this.val = val;
this.vals = null;
this.slop = slop; this.slop = slop;
return getAliasedQuery(); return getAliasedQuery();
} }
@Override
protected Query getFieldQuery(String field, List<String> queryTerms, boolean raw) throws SyntaxError {
this.type = QType.FIELD;
this.field = field;
this.val = null;
this.vals = queryTerms;
this.slop = getPhraseSlop();
return getAliasedMultiTermQuery(queryTerms);
}
@Override @Override
protected Query getPrefixQuery(String field, String val) throws SyntaxError { protected Query getPrefixQuery(String field, String val) throws SyntaxError {
if (val.equals("") && field.equals("*")) { if (val.equals("") && field.equals("*")) {
@ -1057,11 +1076,17 @@ public class ExtendedDismaxQParser extends QParser {
this.type = QType.PREFIX; this.type = QType.PREFIX;
this.field = field; this.field = field;
this.val = val; this.val = val;
this.vals = null;
return getAliasedQuery(); return getAliasedQuery();
} }
@Override @Override
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted) throws SyntaxError { protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError {
if ((getAutoGeneratePhraseQueries() || fieldAutoGenPhraseQueries) && getSplitOnWhitespace() == false) {
throw new QueryParserConfigurationException
("Field '" + field + "': autoGeneratePhraseQueries == true is disallowed when sow/splitOnWhitespace == false");
}
Analyzer actualAnalyzer; Analyzer actualAnalyzer;
if (removeStopFilter) { if (removeStopFilter) {
if (nonStopFilterAnalyzerPerField == null) { if (nonStopFilterAnalyzerPerField == null) {
@ -1074,7 +1099,7 @@ public class ExtendedDismaxQParser extends QParser {
} else { } else {
actualAnalyzer = parser.getReq().getSchema().getFieldType(field).getQueryAnalyzer(); actualAnalyzer = parser.getReq().getSchema().getFieldType(field).getQueryAnalyzer();
} }
return super.newFieldQuery(actualAnalyzer, field, queryText, quoted); return super.newFieldQuery(actualAnalyzer, field, queryText, quoted, fieldAutoGenPhraseQueries);
} }
@Override @Override
@ -1083,6 +1108,7 @@ public class ExtendedDismaxQParser extends QParser {
this.field = field; this.field = field;
this.val = a; this.val = a;
this.val2 = b; this.val2 = b;
this.vals = null;
this.bool = startInclusive; this.bool = startInclusive;
this.bool2 = endInclusive; this.bool2 = endInclusive;
return getAliasedQuery(); return getAliasedQuery();
@ -1100,6 +1126,7 @@ public class ExtendedDismaxQParser extends QParser {
this.type = QType.WILDCARD; this.type = QType.WILDCARD;
this.field = field; this.field = field;
this.val = val; this.val = val;
this.vals = null;
return getAliasedQuery(); return getAliasedQuery();
} }
@ -1108,6 +1135,7 @@ public class ExtendedDismaxQParser extends QParser {
this.type = QType.FUZZY; this.type = QType.FUZZY;
this.field = field; this.field = field;
this.val = val; this.val = val;
this.vals = null;
this.flt = minSimilarity; this.flt = minSimilarity;
return getAliasedQuery(); return getAliasedQuery();
} }
@ -1157,7 +1185,129 @@ public class ExtendedDismaxQParser extends QParser {
return getQuery(); return getQuery();
} }
} }
/**
* Delegates to the super class unless the field has been specified
* as an alias -- in which case we recurse on each of
* the aliased fields, and the results are composed into a
* DisjunctionMaxQuery. (so yes: aliases which point at other
* aliases should work)
*/
protected Query getAliasedMultiTermQuery(List<String> queryTerms) throws SyntaxError {
Alias a = aliases.get(field);
this.validateCyclicAliasing(field);
if (a != null) {
List<Query> lst = getQueries(a);
if (lst == null || lst.size() == 0) {
return getQuery();
}
// make a DisjunctionMaxQuery in this case too... it will stop
// the "mm" processing from making everything required in the case
// that the query expanded to multiple clauses.
// DisMaxQuery.rewrite() removes itself if there is just a single clause anyway.
// if (lst.size()==1) return lst.get(0);
if (makeDismax) {
if (lst.get(0) instanceof BooleanQuery && allSameQueryStructure(lst)) {
BooleanQuery.Builder q = new BooleanQuery.Builder();
List<Query> subs = new ArrayList<>(lst.size());
for (int c = 0 ; c < ((BooleanQuery)lst.get(0)).clauses().size() ; ++c) {
subs.clear();
// Make a dismax query for each clause position in the boolean per-field queries.
for (int n = 0 ; n < lst.size() ; ++n) {
subs.add(((BooleanQuery)lst.get(n)).clauses().get(c).getQuery());
}
q.add(newBooleanClause(new DisjunctionMaxQuery(subs, a.tie), BooleanClause.Occur.SHOULD));
}
return q.build();
} else {
return new DisjunctionMaxQuery(lst, a.tie);
}
} else {
BooleanQuery.Builder q = new BooleanQuery.Builder();
for (Query sub : lst) {
q.add(sub, BooleanClause.Occur.SHOULD);
}
return q.build();
}
} else {
// verify that a fielded query is actually on a field that exists... if not,
// then throw an exception to get us out of here, and we'll treat it like a
// literal when we try the escape+re-parse.
if (exceptions) {
FieldType ft = schema.getFieldTypeNoEx(field);
if (ft == null && null == MagicFieldName.get(field)) {
throw unknownField;
}
}
return getQuery();
}
}
/** Recursively examines the given query list for identical structure in all queries. */
private boolean allSameQueryStructure(List<Query> lst) {
boolean allSame = true;
Query firstQuery = lst.get(0);
for (int n = 1 ; n < lst.size(); ++n) {
Query nthQuery = lst.get(n);
if (nthQuery.getClass() != firstQuery.getClass()) {
allSame = false;
break;
}
if (firstQuery instanceof BooleanQuery) {
List<BooleanClause> firstBooleanClauses = ((BooleanQuery)firstQuery).clauses();
List<BooleanClause> nthBooleanClauses = ((BooleanQuery)nthQuery).clauses();
if (firstBooleanClauses.size() != nthBooleanClauses.size()) {
allSame = false;
break;
}
for (int c = 0 ; c < firstBooleanClauses.size() ; ++c) {
if (nthBooleanClauses.get(c).getQuery().getClass() != firstBooleanClauses.get(c).getQuery().getClass()
|| nthBooleanClauses.get(c).getOccur() != firstBooleanClauses.get(c).getOccur()) {
allSame = false;
break;
}
if (firstBooleanClauses.get(c).getQuery() instanceof BooleanQuery && ! allSameQueryStructure
(Arrays.asList(firstBooleanClauses.get(c).getQuery(), nthBooleanClauses.get(c).getQuery()))) {
allSame = false;
break;
}
}
}
}
return allSame;
}
@Override
protected void addMultiTermClause(List<BooleanClause> clauses, Query q) {
// We might have been passed a null query; the terms might have been filtered away by the analyzer.
if (q == null) {
return;
}
boolean required = operator == AND_OPERATOR;
BooleanClause.Occur occur = required ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
if (q instanceof BooleanQuery) {
boolean allOptionalDisMaxQueries = true;
for (BooleanClause c : ((BooleanQuery)q).clauses()) {
if (c.getOccur() != BooleanClause.Occur.SHOULD || ! (c.getQuery() instanceof DisjunctionMaxQuery)) {
allOptionalDisMaxQueries = false;
break;
}
}
if (allOptionalDisMaxQueries) {
// getAliasedMultiTermQuery() constructed a BooleanQuery containing only SHOULD DisjunctionMaxQuery-s.
// Unwrap the query and add a clause for each contained DisMax query.
for (BooleanClause c : ((BooleanQuery)q).clauses()) {
clauses.add(newBooleanClause(c.getQuery(), occur));
}
return;
}
}
clauses.add(newBooleanClause(q, occur));
}
/** /**
* Validate there is no cyclic referencing in the aliasing * Validate there is no cyclic referencing in the aliasing
*/ */
@ -1212,7 +1362,12 @@ public class ExtendedDismaxQParser extends QParser {
switch (type) { switch (type) {
case FIELD: // fallthrough case FIELD: // fallthrough
case PHRASE: case PHRASE:
Query query = super.getFieldQuery(field, val, type == QType.PHRASE, false); Query query;
if (val == null) {
query = super.getFieldQuery(field, vals, false);
} else {
query = super.getFieldQuery(field, val, type == QType.PHRASE, false);
}
// Boolean query on a whitespace-separated string // Boolean query on a whitespace-separated string
// If these were synonyms we would have a SynonymQuery // If these were synonyms we would have a SynonymQuery
if (query instanceof BooleanQuery) { if (query instanceof BooleanQuery) {
@ -1248,6 +1403,8 @@ public class ExtendedDismaxQParser extends QParser {
} }
return null; return null;
} catch (QueryParserConfigurationException e) {
throw e; // Don't ignore configuration exceptions
} catch (Exception e) { } catch (Exception e) {
// an exception here is due to the field query not being compatible with the input text // an exception here is due to the field query not being compatible with the input text
// for example, passing a string to a numeric field. // for example, passing a string to a numeric field.
@ -1442,7 +1599,7 @@ public class ExtendedDismaxQParser extends QParser {
*/ */
public class ExtendedDismaxConfiguration { public class ExtendedDismaxConfiguration {
/** /**
* The field names specified by 'qf' that (most) clauses will * The field names specified by 'qf' that (most) clauses will
* be queried against * be queried against
*/ */
@ -1478,7 +1635,9 @@ public class ExtendedDismaxQParser extends QParser {
protected boolean lowercaseOperators; protected boolean lowercaseOperators;
protected String[] boostFuncs; protected String[] boostFuncs;
protected boolean splitOnWhitespace;
public ExtendedDismaxConfiguration(SolrParams localParams, public ExtendedDismaxConfiguration(SolrParams localParams,
SolrParams params, SolrQueryRequest req) { SolrParams params, SolrQueryRequest req) {
solrParams = SolrParams.wrapDefaults(localParams, params); solrParams = SolrParams.wrapDefaults(localParams, params);
@ -1522,6 +1681,8 @@ public class ExtendedDismaxQParser extends QParser {
boostFuncs = solrParams.getParams(DisMaxParams.BF); boostFuncs = solrParams.getParams(DisMaxParams.BF);
multBoosts = solrParams.getParams(DMP.MULT_BOOST); multBoosts = solrParams.getParams(DMP.MULT_BOOST);
splitOnWhitespace = solrParams.getBool(QueryParsing.SPLIT_ON_WHITESPACE, SolrQueryParser.DEFAULT_SPLIT_ON_WHITESPACE);
} }
/** /**
* *

View File

@ -19,6 +19,7 @@ package org.apache.solr.search;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
/** /**
@ -46,6 +47,8 @@ public class LuceneQParser extends QParser {
lparser.setDefaultOperator lparser.setDefaultOperator
(QueryParsing.getQueryParserDefaultOperator(getReq().getSchema(), (QueryParsing.getQueryParserDefaultOperator(getReq().getSchema(),
getParam(QueryParsing.OP))); getParam(QueryParsing.OP)));
lparser.setSplitOnWhitespace(StrUtils.parseBool
(getParam(QueryParsing.SPLIT_ON_WHITESPACE), SolrQueryParser.DEFAULT_SPLIT_ON_WHITESPACE));
return lparser.parse(qstr); return lparser.parse(qstr);
} }

View File

@ -28,6 +28,8 @@ import java.util.List;
* <br>Other parameters:<ul> * <br>Other parameters:<ul>
* <li>q.op - the default operator "OR" or "AND"</li> * <li>q.op - the default operator "OR" or "AND"</li>
* <li>df - the default field name</li> * <li>df - the default field name</li>
* <li>sow - split on whitespace prior to analysis, boolean,
* default=<code>{@value org.apache.solr.search.SolrQueryParser#DEFAULT_SPLIT_ON_WHITESPACE}</code></li>
* </ul> * </ul>
* <br>Example: <code>{!lucene q.op=AND df=text sort='price asc'}myfield:foo +bar -baz</code> * <br>Example: <code>{!lucene q.op=AND df=text sort='price asc'}myfield:foo +bar -baz</code>
*/ */

View File

@ -0,0 +1,24 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
public class QueryParserConfigurationException extends IllegalArgumentException {
public QueryParserConfigurationException(String message) {
super(message);
}
}

View File

@ -51,6 +51,7 @@ public class QueryParsing {
public static final String F = "f"; // field that a query or command pertains to public static final String F = "f"; // field that a query or command pertains to
public static final String TYPE = "type";// parser for this query or command public static final String TYPE = "type";// parser for this query or command
public static final String DEFTYPE = "defType"; // default parser for any direct subqueries public static final String DEFTYPE = "defType"; // default parser for any direct subqueries
public static final String SPLIT_ON_WHITESPACE = "sow"; // Whether to split on whitespace prior to analysis
public static final String LOCALPARAM_START = "{!"; public static final String LOCALPARAM_START = "{!";
public static final char LOCALPARAM_END = '}'; public static final char LOCALPARAM_END = '}';
// true if the value was specified by the "v" param (i.e. v=myval, or v=$param) // true if the value was specified by the "v" param (i.e. v=myval, or v=$param)

View File

@ -25,7 +25,7 @@ import org.apache.solr.parser.QueryParser;
public class SolrQueryParser extends QueryParser { public class SolrQueryParser extends QueryParser {
public SolrQueryParser(QParser parser, String defaultField) { public SolrQueryParser(QParser parser, String defaultField) {
super(parser.getReq().getCore().getSolrConfig().luceneMatchVersion, defaultField, parser); super(defaultField, parser);
} }
} }

View File

@ -0,0 +1,13 @@
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
US, U.S., U S, USA, U.S.A., U S A, United States, United States of America

View File

@ -0,0 +1,50 @@
<?xml version="1.0" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<schema name="test-multi-word-synonyms" version="1.6">
<fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
<field name="signatureField" type="string" indexed="true" stored="false"/>
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SynonymGraphFilterFactory"
synonyms="multiword-synonyms.txt"
tokenizerFactory="solr.StandardTokenizerFactory"
ignoreCase="true"
expand="true"/>
</analyzer>
</fieldType>
<field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
<uniqueKey>id</uniqueKey>
<field name="_version_" type="long" indexed="false" stored="false" docValues="true"/>
<field name="text" type="text" indexed="true" stored="true"/>
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
</schema>

View File

@ -29,3 +29,5 @@ Television, Televisions, TV, TVs
# Synonym mappings can be used for spelling correction too # Synonym mappings can be used for spelling correction too
pixima => pixma pixima => pixma
# multiword synonyms
wi fi => wifi

View File

@ -0,0 +1,100 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.util.Arrays;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestMultiWordSynonyms extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig.xml", "schema-multiword-synonyms.xml");
index();
}
private static void index() throws Exception {
assertU(adoc("id","1", "text","USA Today"));
assertU(adoc("id","2", "text","A dynamic US economy"));
assertU(adoc("id","3", "text","The United States of America's 50 states"));
assertU(adoc("id","4", "text","Party in the U.S.A."));
assertU(adoc("id","5", "text","These United States"));
assertU(adoc("id","6", "text","America United of States"));
assertU(adoc("id","7", "text","States United"));
assertU(commit());
}
@Test
public void testNonPhrase() throws Exception {
// Don't split on whitespace (sow=false)
for (String q : Arrays.asList("US", "U.S.", "USA", "U.S.A.", "United States", "United States of America")) {
for (String defType : Arrays.asList("lucene", "edismax")) {
assertJQ(req("q", q,
"defType", defType,
"df", "text",
"sow", "false")
, "/response/numFound==7"
);
}
}
// Split on whitespace (sow=true)
for (String q : Arrays.asList("US", "U.S.", "USA", "U.S.A.")) {
for (String defType : Arrays.asList("lucene", "edismax")) {
assertJQ(req("q", q,
"defType", defType,
"df", "text",
"sow", "true")
, "/response/numFound==7"
);
}
}
for (String q : Arrays.asList("United States", "United States of America")) {
for (String defType : Arrays.asList("lucene", "edismax")) {
assertJQ(req("q", q,
"defType", defType,
"df", "text",
"sow", "true")
, "/response/numFound==4"
);
}
}
}
@Test
public void testPhrase() throws Exception {
for (String q : Arrays.asList
("\"US\"", "\"U.S.\"", "\"USA\"", "\"U.S.A.\"", "\"United States\"", "\"United States of America\"")) {
for (String defType : Arrays.asList("lucene", "edismax")) {
for (String sow : Arrays.asList("true", "false")) {
assertJQ(req("q", q,
"defType", defType,
"df", "text",
"sow", sow)
, "/response/numFound==5"
);
}
}
}
}
}

View File

@ -16,7 +16,12 @@
*/ */
package org.apache.solr.search; package org.apache.solr.search;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map;
import java.util.Random; import java.util.Random;
import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause;
@ -28,12 +33,15 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.core.SolrInfoMBean; import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.parser.QueryParser; import org.apache.solr.parser.QueryParser;
import org.apache.solr.query.FilterQuery; import org.apache.solr.query.FilterQuery;
import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.TextField;
import org.junit.BeforeClass; import org.junit.BeforeClass;
import org.junit.Test; import org.junit.Test;
import org.noggit.ObjectBuilder;
public class TestSolrQueryParser extends SolrTestCaseJ4 { public class TestSolrQueryParser extends SolrTestCaseJ4 {
@ -57,6 +65,8 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
assertU(adoc("id", "12", "eee_s", "X")); assertU(adoc("id", "12", "eee_s", "X"));
assertU(adoc("id", "13", "eee_s", "'balance'", "rrr_s", "/leading_slash")); assertU(adoc("id", "13", "eee_s", "'balance'", "rrr_s", "/leading_slash"));
assertU(adoc("id", "20", "syn", "wifi ATM"));
assertU(commit()); assertU(commit());
} }
@ -208,86 +218,105 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
QParser qParser; QParser qParser;
Query q,qq; Query q,qq;
// relevance query should not be a filter Map<String, String> sowFalseParamsMap = new HashMap<>();
qParser = QParser.getParser("foo_s:(a b c)", req); sowFalseParamsMap.put("sow", "false");
q = qParser.getQuery(); Map<String, String> sowTrueParamsMap = new HashMap<>();
assertEquals(3, ((BooleanQuery)q).clauses().size()); sowTrueParamsMap.put("sow", "true");
List<MapSolrParams> paramMaps = Arrays.asList
(new MapSolrParams(Collections.emptyMap()), // no sow param (i.e. the default sow value)
new MapSolrParams(sowFalseParamsMap),
new MapSolrParams(sowTrueParamsMap));
// small filter query should still use BooleanQuery for (MapSolrParams params : paramMaps) {
if (QueryParser.TERMS_QUERY_THRESHOLD > 3) { // relevance query should not be a filter
qParser = QParser.getParser("foo_s:(a b c)", req); qParser = QParser.getParser("foo_s:(a b c)", req);
qParser.setIsFilter(true); // this may change in the future qParser.setParams(params);
q = qParser.getQuery(); q = qParser.getQuery();
assertEquals(3, ((BooleanQuery) q).clauses().size()); assertEquals(3, ((BooleanQuery) q).clauses().size());
// small filter query should still use BooleanQuery
if (QueryParser.TERMS_QUERY_THRESHOLD > 3) {
qParser = QParser.getParser("foo_s:(a b c)", req);
qParser.setParams(params);
qParser.setIsFilter(true); // this may change in the future
q = qParser.getQuery();
assertEquals(3, ((BooleanQuery) q).clauses().size());
}
// large relevancy query should use BooleanQuery
// TODO: we may decide that string fields shouldn't have relevance in the future... change to a text field w/o a stop filter if so
qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req);
qParser.setParams(params);
q = qParser.getQuery();
assertEquals(26, ((BooleanQuery)q).clauses().size());
// large filter query should use TermsQuery
qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req);
qParser.setIsFilter(true); // this may change in the future
qParser.setParams(params);
q = qParser.getQuery();
assertEquals(26, ((TermInSetQuery)q).getTermData().size());
// large numeric filter query should use TermsQuery (for trie fields)
qParser = QParser.getParser("foo_ti:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req);
qParser.setIsFilter(true); // this may change in the future
qParser.setParams(params);
q = qParser.getQuery();
assertEquals(20, ((TermInSetQuery)q).getTermData().size());
// for point fields large filter query should use PointInSetQuery
qParser = QParser.getParser("foo_pi:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req);
qParser.setIsFilter(true); // this may change in the future
qParser.setParams(params);
q = qParser.getQuery();
assertTrue(q instanceof PointInSetQuery);
assertEquals(20, ((PointInSetQuery)q).getPackedPoints().size());
// a filter() clause inside a relevancy query should be able to use a TermsQuery
qParser = QParser.getParser("foo_s:aaa filter(foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z))", req);
qParser.setParams(params);
q = qParser.getQuery();
assertEquals(2, ((BooleanQuery)q).clauses().size());
qq = ((BooleanQuery)q).clauses().get(0).getQuery();
if (qq instanceof TermQuery) {
qq = ((BooleanQuery)q).clauses().get(1).getQuery();
}
if (qq instanceof FilterQuery) {
qq = ((FilterQuery)qq).getQuery();
}
assertEquals(26, ((TermInSetQuery) qq).getTermData().size());
// test mixed boolean query, including quotes (which shouldn't matter)
qParser = QParser.getParser("foo_s:(a +aaa b -bbb c d e f bar_s:(qqq www) g h i j k l m n o p q r s t u v w x y z)", req);
qParser.setIsFilter(true); // this may change in the future
qParser.setParams(params);
q = qParser.getQuery();
assertEquals(4, ((BooleanQuery)q).clauses().size());
qq = null;
for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
qq = clause.getQuery();
if (qq instanceof TermInSetQuery) break;
}
assertEquals(26, ((TermInSetQuery)qq).getTermData().size());
// test terms queries of two different fields (LUCENE-7637 changed to require all terms be in the same field)
StringBuilder sb = new StringBuilder();
for (int i=0; i<17; i++) {
char letter = (char)('a'+i);
sb.append("foo_s:" + letter + " bar_s:" + letter + " ");
}
qParser = QParser.getParser(sb.toString(), req);
qParser.setIsFilter(true); // this may change in the future
qParser.setParams(params);
q = qParser.getQuery();
assertEquals(2, ((BooleanQuery)q).clauses().size());
for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
qq = clause.getQuery();
assertEquals(17, ((TermInSetQuery)qq).getTermData().size());
}
} }
// large relevancy query should use BooleanQuery
// TODO: we may decide that string fields shouldn't have relevance in the future... change to a text field w/o a stop filter if so
qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req);
q = qParser.getQuery();
assertEquals(26, ((BooleanQuery)q).clauses().size());
// large filter query should use TermsQuery
qParser = QParser.getParser("foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z)", req);
qParser.setIsFilter(true); // this may change in the future
q = qParser.getQuery();
assertEquals(26, ((TermInSetQuery)q).getTermData().size());
// large numeric filter query should use TermsQuery (for trie fields)
qParser = QParser.getParser("foo_ti:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req);
qParser.setIsFilter(true); // this may change in the future
q = qParser.getQuery();
assertEquals(20, ((TermInSetQuery)q).getTermData().size());
// for point fields large filter query should use PointInSetQuery
qParser = QParser.getParser("foo_pi:(1 2 3 4 5 6 7 8 9 10 20 19 18 17 16 15 14 13 12 11)", req);
qParser.setIsFilter(true); // this may change in the future
q = qParser.getQuery();
assertTrue(q instanceof PointInSetQuery);
assertEquals(20, ((PointInSetQuery)q).getPackedPoints().size());
// a filter() clause inside a relevancy query should be able to use a TermsQuery
qParser = QParser.getParser("foo_s:aaa filter(foo_s:(a b c d e f g h i j k l m n o p q r s t u v w x y z))", req);
q = qParser.getQuery();
assertEquals(2, ((BooleanQuery)q).clauses().size());
qq = ((BooleanQuery)q).clauses().get(0).getQuery();
if (qq instanceof TermQuery) {
qq = ((BooleanQuery)q).clauses().get(1).getQuery();
}
if (qq instanceof FilterQuery) {
qq = ((FilterQuery)qq).getQuery();
}
assertEquals(26, ((TermInSetQuery)qq).getTermData().size());
// test mixed boolean query, including quotes (which shouldn't matter)
qParser = QParser.getParser("foo_s:(a +aaa b -bbb c d e f bar_s:(qqq www) g h i j k l m n o p q r s t u v w x y z)", req);
qParser.setIsFilter(true); // this may change in the future
q = qParser.getQuery();
assertEquals(4, ((BooleanQuery)q).clauses().size());
qq = null;
for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
qq = clause.getQuery();
if (qq instanceof TermInSetQuery) break;
}
assertEquals(26, ((TermInSetQuery)qq).getTermData().size());
// test terms queries of two different fields (LUCENE-7637 changed to require all terms be in the same field)
StringBuilder sb = new StringBuilder();
for (int i=0; i<17; i++) {
char letter = (char)('a'+i);
sb.append("foo_s:" + letter + " bar_s:" + letter + " ");
}
qParser = QParser.getParser(sb.toString(), req);
qParser.setIsFilter(true); // this may change in the future
q = qParser.getQuery();
assertEquals(2, ((BooleanQuery)q).clauses().size());
for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
qq = clause.getQuery();
assertEquals(17, ((TermInSetQuery)qq).getTermData().size());
}
req.close(); req.close();
} }
@ -306,6 +335,10 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
// This will still fail when used as the main query, but will pass in a filter query since TermsQuery can be used. // This will still fail when used as the main query, but will pass in a filter query since TermsQuery can be used.
assertJQ(req("q","*:*", "fq", q) assertJQ(req("q","*:*", "fq", q)
,"/response/numFound==6"); ,"/response/numFound==6");
assertJQ(req("q","*:*", "fq", q, "sow", "false")
,"/response/numFound==6");
assertJQ(req("q","*:*", "fq", q, "sow", "true")
,"/response/numFound==6");
} }
@Test @Test
@ -540,4 +573,400 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
req.close(); req.close();
} }
// LUCENE-7533
public void testSplitOnWhitespace_with_autoGeneratePhraseQueries() throws Exception {
assertTrue(((TextField)h.getCore().getLatestSchema().getField("text").getType()).getAutoGeneratePhraseQueries());
try (SolrQueryRequest req = req()) {
final QParser qparser = QParser.getParser("{!lucene sow=false qf=text}blah blah", req);
expectThrows(QueryParserConfigurationException.class, qparser::getQuery);
}
}
@Test
public void testSplitOnWhitespace_Basic() throws Exception {
// The "syn" field has synonyms loaded from synonyms.txt
assertJQ(req("df", "syn", "q", "wifi", "sow", "true") // retrieve the single document containing literal "wifi"
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "wi fi", "sow", "false") // trigger the "wi fi => wifi" synonym
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "wi fi", "sow", "true")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=false}wi fi")
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=true}wi fi")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene}wi fi") // default sow=true
, "/response/numFound==0"
);
}
public void testSplitOnWhitespace_Comments() throws Exception {
// The "syn" field has synonyms loaded from synonyms.txt
assertJQ(req("df", "syn", "q", "wifi", "sow", "true") // retrieve the single document containing literal "wifi"
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "wi fi", "sow", "false") // trigger the "wi fi => wifi" synonym
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "wi /* foo */ fi", "sow", "false") // trigger the "wi fi => wifi" synonym
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "wi /* foo */ /* bar */ fi", "sow", "false") // trigger the "wi fi => wifi" synonym
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", " /* foo */ wi fi /* bar */", "sow", "false") // trigger the "wi fi => wifi" synonym
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", " /* foo */ wi /* bar */ fi /* baz */", "sow", "false") // trigger the "wi fi => wifi" synonym
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "wi fi", "sow", "true")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi /* foo */ fi", "sow", "true")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi /* foo */ /* bar */ fi", "sow", "true")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "/* foo */ wi fi /* bar */", "sow", "true")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "/* foo */ wi /* bar */ fi /* baz */", "sow", "true")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi /* foo */ fi") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi /* foo */ /* bar */ fi") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "/* foo */ wi fi /* bar */") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "/* foo */ wi /* bar */ fi /* baz */") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=false}wi fi")
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=false}wi /* foo */ fi")
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=false}wi /* foo */ /* bar */ fi")
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=false}/* foo */ wi fi /* bar */")
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=false}/* foo */ wi /* bar */ fi /* baz */")
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=true}wi fi")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=true}wi /* foo */ fi")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=true}wi /* foo */ /* bar */ fi")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=true}/* foo */ wi fi /* bar */")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene sow=true}/* foo */ wi /* bar */ fi /* baz */")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene}wi fi") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene}wi /* foo */ fi") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene}wi /* foo */ /* bar */ fi") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene}/* foo */ wi fi /* bar */") // default sow=true
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "{!lucene}/* foo */ wi /* bar */ fi /* baz */") // default sow=true
, "/response/numFound==0"
);
}
public void testOperatorsAndMultiWordSynonyms() throws Exception {
// The "syn" field has synonyms loaded from synonyms.txt
assertJQ(req("df", "syn", "q", "wifi", "sow", "true") // retrieve the single document containing literal "wifi"
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "wi fi", "sow", "false") // trigger the "wi fi => wifi" synonym
, "/response/numFound==1"
, "/response/docs/[0]/id=='20'"
);
assertJQ(req("df", "syn", "q", "+wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "-wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "!wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi* fi", "sow", "false") // matches because wi* matches wifi
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "w? fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi~1 fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi^2 fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi^=2 fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi +fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi -fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi !fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi*", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi?", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi~1", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi^2", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi^=2", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "syn:wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi syn:fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "NOT wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi NOT fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi AND ATM", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "ATM AND wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi && ATM", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "ATM && wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "(wi fi) AND ATM", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "ATM AND (wi fi)", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "(wi fi) && ATM", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "ATM && (wi fi)", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "wi fi OR NotThereAtAll", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "NotThereAtAll OR wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi || NotThereAtAll", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "NotThereAtAll || wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "(wi fi) OR NotThereAtAll", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "NotThereAtAll OR (wi fi)", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "(wi fi) || NotThereAtAll", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "NotThereAtAll || (wi fi)", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "\"wi\" fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi \"fi\"", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "(wi) fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi (fi)", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "/wi/ fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi /fi/", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "(wi fi)", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "+(wi fi)", "sow", "false")
, "/response/numFound==1"
);
Map all = (Map)ObjectBuilder.fromJSON(h.query(req("q", "*:*", "rows", "0", "wt", "json")));
int totalDocs = Integer.parseInt(((Map)all.get("response")).get("numFound").toString());
int allDocsExceptOne = totalDocs - 1;
assertJQ(req("df", "syn", "q", "-(wi fi)", "sow", "false")
, "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the syn field
);
assertJQ(req("df", "syn", "q", "!(wi fi)", "sow", "false")
, "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the syn field
);
assertJQ(req("df", "syn", "q", "NOT (wi fi)", "sow", "false")
, "/response/numFound==" + allDocsExceptOne // one doc contains "wifi" in the syn field
);
assertJQ(req("df", "syn", "q", "(wi fi)^2", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "(wi fi)^=2", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "syn:(wi fi)", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "+ATM wi fi", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "-ATM wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "-NotThereAtAll wi fi", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "!ATM wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "!NotThereAtAll wi fi", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "NOT ATM wi fi", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "NOT NotThereAtAll wi fi", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "AT* wi fi", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "AT? wi fi", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "\"ATM\" wi fi", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "wi fi +ATM", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "wi fi -ATM", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi -NotThereAtAll", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "wi fi !ATM", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi !NotThereAtAll", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "wi fi NOT ATM", "sow", "false")
, "/response/numFound==0"
);
assertJQ(req("df", "syn", "q", "wi fi NOT NotThereAtAll", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "wi fi AT*", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "wi fi AT?", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "wi fi \"ATM\"", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "\"wi fi\"~2", "sow", "false")
, "/response/numFound==1"
);
assertJQ(req("df", "syn", "q", "syn:\"wi fi\"", "sow", "false")
, "/response/numFound==1"
);
}
} }